diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..d72fd520b --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.pdf binary diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 65cbde37b..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: '' -labels: -assignees: - ---- - -_**Please provide all mandatory information!**_ - -## Describe the bug (mandatory) -A clear and concise description of what the bug is. - -## To Reproduce (mandatory) -Explain the steps to reproduce the behavior, For example, include a minimal code snippet, example files, etc. - -For problems when building or installing PyMuPDF, give the full output of the build/install command so that, for example, all pip/compiler/linker errors/warnings can be seen. - -## Expected behavior (optional) -Describe what you expected to happen (if not obvious). - -## Screenshots (optional) -If applicable, add screenshots to help explain your problem. - -## Your configuration (mandatory) - - Operating system, potentially version and bitness - - Python version, bitness - - PyMuPDF version, installation method (**wheel** or **generated** from source). - -For example, the output of `print(sys.version, "\n", sys.platform, "\n", fitz.__doc__)` would be sufficient (for the first two bullets). - -## Additional context (optional) -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 000000000..7fdb600ce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,93 @@ +name: Bug Report +description: Create a bug report for PyMuPDF + +# We omit `title: "..."` so that the field defaults to blank. If we set it to +# empty string, Github seems to reject this .yml file. + +body: + + - type: textarea + id: description + attributes: + label: Description of the bug + description: | + A clear and concise description of the bug. + + validations: + required: true + + - type: textarea + id: reproduce + attributes: + label: How to reproduce the bug + + # Should not word-wrap this description here. + description: | + * Explain the steps required to reproduce the bug. + * Include required code snippets, example files, etc. + * Describe what you expected to happen (if not obvious). + * If applicable, add screenshots to help explain the problem. + * Include any other information that could be relevant, for example information about the Python environment. + + For problems when building or installing PyMuPDF: + * Give the **exact** build/install commands that were run. + * Give the **complete** output from these commands. + + validations: + required: true + +# - type: markdown +# attributes: +# value: | +# # The information below is required. + + - type: dropdown + id: version + attributes: + label: PyMuPDF version + options: + - 1.26.6 + - 1.26.5 + - 1.26.4 + - 1.26.3 + - 1.26.1 + - 1.26.0 + - 1.25.x or earlier + - Built from source + description: | + * For example from `pymupdf.pymupdf_version`. + * We generally only look at bugs in the most recent release of PyMuPDF. + validations: + required: true + + - type: dropdown + id: os_name + attributes: + label: Operating system + #multiple: true + options: + - + - Windows + - Linux + - MacOS + - OpenBSD + - Other + validations: + required: true + + - type: dropdown + id: python_version + attributes: + label: Python version + #multiple: true + # Need quotes around `3.10` otherwise it is treated as a number and shows as `3.1`. + options: + - + - "3.14" + - "3.13" + - "3.12" + - "3.11" + - "3.10" + - "3.9" + validations: + required: true diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 8761c4520..40d29767f 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -3,14 +3,55 @@ name: Build wheels on: workflow_dispatch: inputs: - sdist: + + flavours: + description: 'If set, we build separate PyMuPDF and PyMuPDFb wheels.' type: boolean - wheels: + default: false + + sdist: type: boolean + default: true + wheels_linux_aarch64: type: boolean - wheels_macos_arm64: + default: true + + wheels_linux_auto: + type: boolean + default: true + + wheels_linux_pyodide: + type: boolean + default: false + + wheels_windows_auto: + type: boolean + default: true + + wheels_macos_auto: type: boolean + default: true + + wheels_cps: + description: 'wheels_cps: sets $CIBW_BUILD, E.g. "cp310* cp311*".' + type: string + + PYMUPDF_SETUP_MUPDF_BUILD: + description: 'Value for PYMUPDF_SETUP_MUPDF_BUILD, e.g.: git:--branch master https://github.com/ArtifexSoftware/mupdf.git' + type: string + default: '-' + + #PYMUPDF_SETUP_MUPDF_BUILD_TYPE: + # description: 'Value for PYMUPDF_SETUP_MUPDF_BUILD, e.g. debug.' + # type: string + # default: '-' + # We can't currently have more than 10 inputs + + PYMUPDF_SETUP_PY_LIMITED_API: + description: 'If not "0", we build a single wheel for each platform.' + type: string + default: '' jobs: @@ -18,36 +59,65 @@ jobs: if: ${{ inputs.sdist }} name: Build sdist runs-on: ubuntu-latest + steps: - - uses: actions/checkout@v2 - #with: - # fetch-depth: 0 # Optional, use if you use setuptools_scm - # submodules: true # Optional, use if you have submodules + - uses: actions/checkout@v4 - name: Build sdist - run: pipx run build --sdist + env: + inputs_wheels_default: 0 + inputs_sdist: 1 + inputs_flavours: ${{inputs.flavours}} + inputs_PYMUPDF_SETUP_MUPDF_BUILD: ${{inputs.PYMUPDF_SETUP_MUPDF_BUILD}} + run: + python scripts/gh_release.py - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: + name: sdist-${{ matrix.os }} path: dist/*.tar.gz build_wheels: - if: ${{ inputs.wheels }} + #if: ${{ inputs.wheels }} name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, windows-2019, macos-latest] + # 2024-05-08: Need to specify macos-13/14 to get x86_64/arm64. + os: [ubuntu-latest, windows-2019, macos-13, macos-14] + # Avoid cancelling of all cibuildwheel runs after a single failure. + fail-fast: false + steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + + # Get Python 3.12 x32 and x64 on Windows. (As of 2023-10-12 these are not + # always available by default.) + # + - name: Install Python 3.12 x32 on Windows. + if: runner.os == 'Windows' + uses: actions/setup-python@v5 + with: + python-version: '3.12' + architecture: x86 + - name: Install Python 3.12 x64 on Windows. + if: runner.os == 'Windows' + uses: actions/setup-python@v5 + with: + python-version: '3.12' # Get Python for running cibuildwheel. This also ensures that 'python' # works on MacOS, where it seems only 'python3' is available by default. # - - uses: actions/setup-python@v2 + # Note that it seem to be important on MacOS not to specify a + # Python version here with `python-version: '3.12'` - this makes + # `python-config3` return settings for Python-3.12, instead of for + # whatever Python is being used by cibuildwheel. + # + - uses: actions/setup-python@v5 # On Linux, get qemu so we can build for aarch64. # @@ -57,46 +127,36 @@ jobs: with: platforms: all - # Get cibuildwheel. - # - - name: Build wheels - uses: pypa/cibuildwheel@v2.11.2 - - # Set extra cibuildwheel options using environmental variables. + - name: gh_release + # Doesn't seem to be a way to passing inputs.* on command + # line, so we set environment instead. E.g. see: + # https://github.com/orgs/community/discussions/27088 # env: - # These exclusions are copied from PyMuPDF-1.19. - # - CIBW_SKIP: "pp* *i686 *-musllinux_* cp36*" + inputs_flavours: ${{inputs.flavours}} + inputs_sdist: ${{inputs.sdist}} - # On Linux and MacOS, tell cibuildwheel to build archs depending on - # inputs.wheels_linux_aarch64 and inputs.wheels_macos_arm64. - # - # https://github.amrom.workers.devmunity/t/possible-to-use-conditional-in-the-env-section-of-a-job/135170 - # Note that it seems that there must not be a space after the ':' in the following, i.e. - # ok: {"false":"auto", "true":"auto aarch64"} - # bad: {"false": "auto", "true": "auto aarch64"} - # - # This is useful: https://yamlchecker.com/ - # - CIBW_ARCHS_LINUX: ${{ fromJSON('{"false":"auto", "true":"auto aarch64"}')[inputs.wheels_linux_aarch64] }} - CIBW_ARCHS_MACOS: ${{ fromJSON('{"false":"auto", "true":"auto arm64"}')[inputs.wheels_macos_arm64] }} - - # For testing, build for single python version. - # - #CIBW_BUILD: "cp311*" - - # Get cibuildwheel to run pytest with each wheel. - # - # Setting verbosity here sometimes seems to result in SEGV's when - # running pytest. - # - CIBW_TEST_REQUIRES: "fontTools pytest" - CIBW_TEST_COMMAND: "pytest -s {project}/tests" - CIBW_BUILD_VERBOSITY: 3 + inputs_wheels_linux_aarch64: ${{inputs.wheels_linux_aarch64}} + inputs_wheels_linux_auto: ${{inputs.wheels_linux_auto}} + inputs_wheels_linux_pyodide: ${{inputs.wheels_linux_pyodide}} + #inputs_wheels_macos_arm64: ${{inputs.wheels_macos_arm64}} + inputs_wheels_macos_auto: ${{inputs.wheels_macos_auto}} + inputs_wheels_windows_auto: ${{inputs.wheels_windows_auto}} + + inputs_PYMUPDF_SETUP_MUPDF_BUILD: ${{inputs.PYMUPDF_SETUP_MUPDF_BUILD}} + #inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE: ${{inputs.PYMUPDF_SETUP_MUPDF_BUILD_TYPE}} + + inputs_wheels_cps: ${{inputs.wheels_cps}} + + PYMUPDF_SETUP_PY_LIMITED_API: ${{inputs.PYMUPDF_SETUP_PY_LIMITED_API}} + + run: + python scripts/gh_release.py + # Upload generated wheels, to be accessible from github Actions page. # - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: + name: wheels-${{ matrix.os }} path: ./wheelhouse/*.whl diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index 412e498f1..1ff2e9591 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -1,7 +1,5 @@ name: "CLA Assistant" on: - issue_comment: - types: [created] pull_request_target: types: [opened,closed,synchronize] @@ -12,7 +10,7 @@ jobs: - name: "CLA Assistant" if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target' # Beta Release - uses: contributor-assistant/github-action@v2.2.0 + uses: contributor-assistant/github-action@v2.4.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # the below token should have repo scope and must be manually added by you in the repository's secret @@ -21,10 +19,10 @@ jobs: path-to-signatures: 'signatures/version1/cla.json' path-to-document: 'https://artifex.com/documents/Artifex%20Contributor%20License%20Agreement.pdf' # branch should not be protected - branch: 'main' + branch: 'CLA' allowlist: - # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken + # the following are the optional inputs - If the optional inputs are not given, then default values will be taken #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) #create-file-commit-message: 'For example: Creating file for storing CLA Signatures' diff --git a/.github/workflows/test-valgrind.yml b/.github/workflows/test-valgrind.yml index 3806a1e71..244fcb95a 100644 --- a/.github/workflows/test-valgrind.yml +++ b/.github/workflows/test-valgrind.yml @@ -1,99 +1,27 @@ name: Test valgrind on: - schedule: - - cron: '13 5 * * *' workflow_dispatch: + schedule: + - cron: '13 6 * * *' jobs: - test_valgrind: - name: Test valgrind - runs-on: ${{ matrix.os }} + valgrind: + name: valgrind + runs-on: ubuntu-latest strategy: matrix: - os: [ubuntu-latest] - - # Avoid cancelling of all runs after a single failure. + args: [ + '', + '-m "git:--branch master https://github.com/ArtifexSoftware/mupdf"', + '-m "git:--branch 1.26.x https://github.com/ArtifexSoftware/mupdf"', + ] fail-fast: false - + steps: - - - uses: actions/checkout@v2 - - - uses: actions/setup-python@v2 - with: - # python-3.11 seems to generate valgrind errors e.g. 'Use of - # uninitialised value of size 8' in Py_INCREF. - # - # python-3.9 works. - # python-3.10 works. - python-version: '3.10' - - - name: Test valgrind - - run: | - import os - import subprocess - import sys - - def log(text): - print(f'test-valgrind.yml: {text}') - sys.stdout.flush() - - def run(command, env_extra=None): - env = None - if env_extra: - env = os.environ.copy() - env.update(env_extra) - log(f'Adding environment:') - for n, v in env_extra.items(): - log(f' {n}: {v!r}') - log(f'Running: {command}') - sys.stdout.flush() - subprocess.run(command, check=1, shell=1, env=env) - - # Change into parent directory (we will originally be inside the - # PyMuPDF checkout), otherwise there's potential confusion caused - # by the `fitz/` directory not being the installed `fitz` module. - # - log('Changing into parent directory of checkout.') - leaf = os.path.basename(os.getcwd()) - log(f'{os.getcwd()=}') - os.chdir('..') - log(f'{os.getcwd()=}') - - log('Installing valgrind.') - run(f'sudo apt install valgrind') - - log('Creating venv.') - run(f'{sys.executable} -m venv pylocal') - - log('Install required python packages.') - run(f'./pylocal/bin/python -m pip install -U pip') - run(f'./pylocal/bin/python -m pip install pytest fontTools') - - log('Installing PyMuPDF.') - if 0: - # Useful for quick testing - use pypi.org package instead of - # building locally. - run(f'./pylocal/bin/python -m pip install pymupdf') - else: - run( - f'./pylocal/bin/python -m pip install -vv ./{leaf}', - env_extra=dict( - PYMUPDF_SETUP_MUPDF_TGZ='', - PYMUPDF_SETUP_MUPDF_BUILD='git:--recursive --depth 1 --shallow-submodules --branch master https://github.com/ArtifexSoftware/mupdf.git', - PYMUPDF_SETUP_MUPDF_BUILD_TYPE='debug', - ), - ) - - log('Running PyMuPDF tests under valgrind.') - # We ignore memory leaks. - run(f'valgrind --error-exitcode=100 --errors-for-leak-kinds=none --fullpath-after= ./pylocal/bin/python -m pytest -s -vv {leaf}', - env_extra=dict( - PYTHONMALLOC='malloc', - ), - ) - - shell: python + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: valgrind + run: + python scripts/test.py ${{matrix.args}} -P 1 -T valgrind build test diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 214df8390..c7b59c722 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,51 +1,64 @@ -name: Test +# Run scripts/test.py directly on multiple Github servers. Instead of +# specifying individual inputs, we support a single string input which is used +# for the command line directly. +# +# This ensures we behave exactly like scripts/test.py, without confusion caused +# by having to translate between differing APIs. + +name: Tests on: - schedule: - - cron: '13 5 * * *' + #schedule: + # - cron: '47 4 * * *' + #pull_request: + # branches: [main] workflow_dispatch: + inputs: + args: + type: string + default: '' + description: 'Arguments to pass to scripts/test.py' jobs: test: - # Build+test current PyMuPDF git. - # name: Test runs-on: ${{ matrix.os }} strategy: matrix: - # 2023-05-09: Builds on Windows-latest do not work because our wdev.py - # (used to build MuPDF) picks up a later VS than setuptools (when - # building SWIG-generated PyMuPDF code). - # - os: [ubuntu-latest, windows-2019, macos-latest] + os: [ubuntu-latest, windows-2022, macos-13, macos-14] - # Avoid cancelling of all cibuildwheel runs after a single failure. + # Avoid cancelling of all runs after a single failure. fail-fast: false steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 - # Set up cibuildwheel. - # - - name: cibuildwheel - uses: pypa/cibuildwheel@v2.11.2 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + # https://github.com/pypa/cibuildwheel/issues/2114 + # https://cibuildwheel.pypa.io/en/stable/faq/#emulation + # + - name: Set up QEMU + if: runner.os == 'Linux' && runner.arch == 'X64' + uses: docker/setup-qemu-action@v3 + with: + platforms: all + + - name: test env: - # Build will use the default hard-coded mupdf URL. - - # Build on single cpu. - CIBW_ARCHS_LINUX: x86_64 - - # Build for single python version. - CIBW_BUILD: "cp311*" - - # Don't build for unsupported platforms or win32. - CIBW_SKIP: "pp* *i686 *-musllinux_* cp36* *win32*" - - # Get cibuildwheel to run pytest with each wheel. - CIBW_TEST_REQUIRES: "fontTools pytest" - CIBW_TEST_COMMAND: "pytest -s {project}/tests" - CIBW_BUILD_VERBOSITY: 3 + PYMUPDF_test_args: ${{inputs.args}} + run: + python scripts/test.py -a PYMUPDF_test_args + + # Upload generated wheels, to be accessible from github Actions page. + # + - uses: actions/upload-artifact@v4 + with: + path: | + wheelhouse/pymupdf*.whl + wheelhouse/pymupdf*.tar.gz + name: artifact-${{ matrix.os }} diff --git a/.github/workflows/test_multiple.yml b/.github/workflows/test_multiple.yml new file mode 100644 index 000000000..54f09444b --- /dev/null +++ b/.github/workflows/test_multiple.yml @@ -0,0 +1,39 @@ +# Run scripts/test.py on multiple OS's (Windows, Linux, MacOS x64, MacOS arm64) +# and with multiple specifications of MuPDF (PyMuPDF's hard-coded default, +# master branch, release branch). + +name: multiple + +on: + workflow_dispatch: + inputs: + args: + type: string + default: '' + description: 'Additional arguments to scripts/test.py' + schedule: + - cron: '13 6 * * *' + +jobs: + + multiple: + name: multiple + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-2022, macos-13, macos-14] + args: [ + '', + '-m "git:--branch master https://github.com/ArtifexSoftware/mupdf"', + '-m "git:--branch 1.26.x https://github.com/ArtifexSoftware/mupdf"', + ] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: multiple + env: + PYMUPDF_test_args: ${{inputs.args}} + run: + python scripts/test.py ${{matrix.args}} wheel test -a PYMUPDF_test_args diff --git a/.github/workflows/test_mupdf-master-branch.yml b/.github/workflows/test_mupdf-master-branch.yml deleted file mode 100644 index 0bb7f5a78..000000000 --- a/.github/workflows/test_mupdf-master-branch.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Test mupdf master branch - -on: - schedule: - - cron: '13 6 * * *' - workflow_dispatch: - -jobs: - - test_mupdf_master_branch: - # Build+test current PyMuPDF git with mupdf git master branch. - # - name: Test mupdf master branch - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, windows-2019, macos-latest] - - # Avoid cancelling of all cibuildwheel runs after a single failure. - fail-fast: false - - steps: - - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - # Set up cibuildwheel. - # - - name: cibuildwheel - uses: pypa/cibuildwheel@v2.11.2 - - env: - # PYMUPDF_SETUP_MUPDF_TGZ="": don't embed mupdf in sdist - no need - # because the build stage gets MuPDF using `git clone ...`. - # - # PYMUPDF_SETUP_MUPDF_BUILD="git:...": build with mupdf from a `git - # clone` command, selecting the current master branch. - # - CIBW_ENVIRONMENT: PYMUPDF_SETUP_MUPDF_TGZ="" PYMUPDF_SETUP_MUPDF_BUILD="git:--recursive --depth 1 --shallow-submodules --branch master https://github.com/ArtifexSoftware/mupdf.git" - - # Build on single cpu. - CIBW_ARCHS_LINUX: x86_64 - - # Build for single python version. - CIBW_BUILD: "cp311*" - - # Don't build for unsupported platforms. - CIBW_SKIP: "pp* *i686 *-musllinux_* cp36*" - - # Get cibuildwheel to run pytest with each wheel. - CIBW_TEST_REQUIRES: "fontTools pytest" - CIBW_TEST_COMMAND: "pytest -s {project}/tests" - CIBW_BUILD_VERBOSITY: 3 diff --git a/.github/workflows/test_mupdf-release-branch.yml b/.github/workflows/test_mupdf-release-branch.yml deleted file mode 100644 index 9f87834ad..000000000 --- a/.github/workflows/test_mupdf-release-branch.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Test mupdf release branch - -on: - schedule: - - cron: '20 6 * * *' - workflow_dispatch: - -jobs: - - test_mupdf_release_branch: - # Build+test current PyMuPDF git with mupdf git release branch. - # - name: Test mupdf release branch - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, windows-2019, macos-latest] - - # Avoid cancelling of all cibuildwheel runs after a single failure. - fail-fast: false - - steps: - - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - # Set up cibuildwheel. - # - - name: cibuildwheel - uses: pypa/cibuildwheel@v2.11.2 - - env: - # PYMUPDF_SETUP_MUPDF_TGZ="": don't embed mupdf in sdist - no need - # because the build stage gets MuPDF using `git clone ...`. - # - # PYMUPDF_SETUP_MUPDF_BUILD="git:...": build with mupdf from a `git - # clone` command, selecting the current release branch. - # - CIBW_ENVIRONMENT: PYMUPDF_SETUP_MUPDF_TGZ="" PYMUPDF_SETUP_MUPDF_BUILD="git:--recursive --depth 1 --shallow-submodules --branch 1.22.x https://github.com/ArtifexSoftware/mupdf.git" - - # Build on single cpu. - CIBW_ARCHS_LINUX: x86_64 - - # Build for single python version. - CIBW_BUILD: "cp311*" - - # Don't build for unsupported platforms. - CIBW_SKIP: "pp* *i686 *-musllinux_* cp36*" - - # Get cibuildwheel to run pytest with each wheel. - CIBW_TEST_REQUIRES: "fontTools pytest" - CIBW_TEST_COMMAND: "pytest -s {project}/tests" - CIBW_BUILD_VERBOSITY: 3 diff --git a/.github/workflows/test_pyodide.yml b/.github/workflows/test_pyodide.yml new file mode 100644 index 000000000..bc1e823ef --- /dev/null +++ b/.github/workflows/test_pyodide.yml @@ -0,0 +1,41 @@ +name: Test pyodide + +# Build and test pyodide wheels using cibuildwheel. + +on: + workflow_dispatch: + + schedule: + - cron: '13 5 * * *' + +jobs: + + pyodide: + name: pyodide + runs-on: ubuntu-latest + strategy: + matrix: + # 2025-09-05: We don't test with default mupdf because mupdf-1.26.7 + # does not have the required pyodide rpath changes. + args: [ + # '', + '-m "git:--branch master https://github.com/ArtifexSoftware/mupdf"', + '-m "git:--branch 1.26.x https://github.com/ArtifexSoftware/mupdf"', + ] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: pyodide + run: + python scripts/test.py ${{matrix.args}} --cibw-pyodide 1 cibw + + # We do not use upload-artifact@v4 because it fails due to us creating + # identically-named wheels. + #- uses: actions/upload-artifact@v4 + # with: + # path: ./wheelhouse/*.whl diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index a1fe5bb6a..e07aadf35 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -1,51 +1,46 @@ -name: Test quick +name: test_quick on: pull_request: branches: [main] workflow_dispatch: + inputs: + args: + type: string + default: '' + description: 'Additional arguments to scripts/test.py' jobs: - test_quick: - name: Test quick + master: + name: mupdf master runs-on: ${{ matrix.os }} strategy: matrix: - # We test on just Ubuntu, with hard-coded MuPDF, MuPDF master, and current MuPDF branch. - # os: [ubuntu-latest] - environment: [ - '', - 'PYMUPDF_SETUP_MUPDF_TGZ="" PYMUPDF_SETUP_MUPDF_BUILD="git:--recursive --depth 1 --shallow-submodules --branch master https://github.com/ArtifexSoftware/mupdf.git"', - 'PYMUPDF_SETUP_MUPDF_TGZ="" PYMUPDF_SETUP_MUPDF_BUILD="git:--recursive --depth 1 --shallow-submodules --branch 1.22.x https://github.com/ArtifexSoftware/mupdf.git"', - ] - - # Avoid cancelling of all cibuildwheel runs after a single failure. fail-fast: false - steps: - - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - - name: cibuildwheel - uses: pypa/cibuildwheel@v2.11.2 - + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: mupdf master env: - CIBW_ENVIRONMENT: ${{matrix.environment}} - - # Build on single cpu. - CIBW_ARCHS_LINUX: x86_64 - - # Build for single python version. - CIBW_BUILD: "cp311*" - - # Don't build for unsupported platforms or win32. - CIBW_SKIP: "pp* *i686 *-musllinux_* cp36* *win32*" + PYMUPDF_test_args: ${{inputs.args}} + run: + python scripts/test.py build test -m 'git:--branch master https://github.com/ArtifexSoftware/mupdf.git' -a PYMUPDF_test_args - # Get cibuildwheel to run pytest with each wheel. - CIBW_TEST_REQUIRES: "fontTools pytest" - CIBW_TEST_COMMAND: "pytest -s {project}/tests" - CIBW_BUILD_VERBOSITY: 3 + release: + name: mupdf release + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + fail-fast: false + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: mupdf release + env: + PYMUPDF_test_args: ${{inputs.args}} + run: + python scripts/test.py build test -m 'git:--branch 1.26.x https://github.com/ArtifexSoftware/mupdf.git' -a PYMUPDF_test_args diff --git a/.github/workflows/test_sysinstall.yml b/.github/workflows/test_sysinstall.yml new file mode 100644 index 000000000..a913de748 --- /dev/null +++ b/.github/workflows/test_sysinstall.yml @@ -0,0 +1,50 @@ +name: Test sysinstall + +on: + schedule: + - cron: '13 4 * * *' + workflow_dispatch: + inputs: + args: + description: 'Extra args for scripts/sysinstall.py.' + +jobs: + + sysinstall: + name: Test sysinstall + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + + steps: + + - uses: actions/checkout@v4 + + # It seems to be important not to install a custom python here, + # because `sudo` (which we need to use when installing to /usr/local + # etc) always ends up running the default python, even if we set + # $PATH etc. So for example we can end up with mupdf files and + # pymupdf files being installed into .../python3.11/site-packages and + # .../python3.10/site-packages, and tests all fail to import pymupdf. + # + #- uses: actions/setup-python@v5 + #with: + # # 3.12 doesn't have setuptools. As of 2024-01-03, MuPDF build requires setuptools before it + # # sees `--venv` and defers to a venv, so we currently have to force use of python 3.11. + # python-version: '3.11' + + + - name: sysinstall_venv + env: + PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST: ${{inputs.args}} + run: + # Use venv. + python3 scripts/sysinstall.py --mupdf-git '--branch master https://github.com/ArtifexSoftware/mupdf.git' + + - name: sysinstall_sudo + env: + PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST: ${{inputs.args}} + run: + # Do not use a venv, instead install required packages with sudo. + python3 scripts/sysinstall.py --mupdf-git '--branch master https://github.com/ArtifexSoftware/mupdf.git' --pip sudo --root / diff --git a/.python-version b/.python-version new file mode 100644 index 000000000..e4fba2183 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7e016e478..d20a78bb1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -20,8 +20,7 @@ sphinx: configuration: docs/conf.py # If using Sphinx, optionally build your docs in additional formats such as PDF -formats: - - pdf +formats: all # Optionally declare the Python requirements required to build your docs python: diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 2470e2f22..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include fitz/*.i -include fitz/_config.h -include mupdf.tgz -recursive-include tests * -global-exclude __pycache__/* diff --git a/README.md b/README.md index e7d20d791..b9f99949d 100644 --- a/README.md +++ b/README.md @@ -1,99 +1,60 @@ -# PyMuPDF 1.22.5 +# PyMuPDF -![logo](https://artifex.com/images/logos/py-mupdf-github-icon.png) +**PyMuPDF** is a high performance **Python** library for data extraction, analysis, conversion & manipulation of [PDF (and other) documents](https://pymupdf.readthedocs.io/en/latest/the-basics.html#supported-file-types). +# Community +Join us on **Discord** here: [#pymupdf](https://discord.gg/TSpYGBW4eq) -Release date: June 21, 2023 -On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [![Downloads](https://static.pepy.tech/personalized-badge/pymupdf?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads)](https://pepy.tech/project/pymupdf) +# Installation -# Author -[Artifex](mailto:support@artifex.com), based on code by [Jorj X. McKie](mailto:jorj.x.mckie@outlook.de) and [Ruikai Liu](mailto:lrk700@gmail.com). +**PyMuPDF** requires **Python 3.9 or later**, install using **pip** with: -# Introduction +`pip install PyMuPDF` -PyMuPDF adds Python bindings and abstractions to [MuPDF](https://mupdf.com/), a lightweight PDF, XPS, and eBook viewer, renderer, and toolkit. Both PyMuPDF and MuPDF are maintained and developed by Artifex Software, Inc. +There are **no mandatory** external dependencies. However, some [optional features](#pymupdf-optional-features) become available only if additional packages are installed. -MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB, MOBI and FB2 (eBooks) formats, and it is known for its top performance and exceptional rendering quality. +You can also try without installing by visiting [PyMuPDF.io](https://pymupdf.io/#examples). -With PyMuPDF you can access files with extensions like `.pdf`, `.xps`, `.oxps`, `.cbz`, `.fb2`, `.mobi` or `.epub`. In addition, about 10 popular image formats can also be handled like documents: `.png`, `.jpg`, `.bmp`, `.tiff`, `.svg` etc. # Usage -For all supported document types (i.e. **_including images_**) you can -* Decrypt the document. -* Access meta information, links and bookmarks. -* Render pages in raster formats (PNG and some others), or the vector format SVG. -* Search for text. -* Extract text and images. -* Convert to other formats: PDF, (X)HTML, XML, JSON, text. -* Do OCR (Optical Character Recognition) if Tesseract is installed. - -> To some degree, PyMuPDF can also be used as an [image converter](https://github.com/pymupdf/PyMuPDF/wiki/How-to-Convert-Images): it can read a range of input formats and can produce **Portable Network Graphics (PNG)**, **Portable Anymaps** (**PNM**, etc.), **Portable Arbitrary Maps (PAM)**, **Adobe PostScript** and **Adobe Photoshop** documents, making the use of other graphics packages obselete in these cases. But interfacing with e.g. PIL/Pillow for image input and output is easy as well. - -For **PDF documents,** there exists a plethora of additional features: they can be created, joined or split up. Pages can be inserted, deleted, re-arranged or modified in many ways (including annotations and form fields). -* Images and fonts can be extracted or inserted. - > You may want to have a look at [this](https://github.com/pymupdf/PyMuPDF-Utilities/blob/master/examples/edit-images/edit.py) cool GUI example script, which lets you **_insert, delete, replace_** or **_re-position_** images under your visual control. +Basic usage is as follows: - > If [fontTools](https://pypi.org/project/fonttools/) is installed, subsets can be built for eligible fonts based on their usage in the document. Especially for new PDFs, this can lead to significant file size reductions. -* Embedded files are fully supported. -* PDFs can be reformatted to support double-sided printing, posterizing, applying logos or watermarks -* Password protection is fully supported: decryption, encryption, encryption method selection, permission level and user / owner password setting. -* Support of the **PDF Optional Content** concept for images, text and drawings. -* Low-level PDF structures can be accessed and modified. -* **Command line module** ``"python -m fitz ..."``. A versatile utility with the following features +```python +import pymupdf # imports the pymupdf library +doc = pymupdf.open("example.pdf") # open a document +for page in doc: # iterate the document pages + text = page.get_text() # get plain text encoded as UTF-8 - - **encryption / decryption / optimization** - - creation of **sub-documents** - - document **joining** - - **image / font extraction** - - full support of **embedded files** - - **_layout-preserving text extraction_** (all documents) - - -Have a look at the basic [demos](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/demo), the [examples](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/examples) (which contain complete, working programs), and [notebooks](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/jupyter-notebooks). +``` # Documentation -Documentation is written using Sphinx and is available online. It is currently a combination of a reference guide and user manual. - -* You can view it online at [Read the Docs](https://pymupdf.readthedocs.io). This site also provides download options for PDF. -* For a **quick start** look at the [tutorial](https://pymupdf.readthedocs.io/en/latest/tutorial.html) and the [recipes](https://pymupdf.readthedocs.io/en/latest/faq.html) chapters. +Full documentation can be found on [pymupdf.readthedocs.io](https://pymupdf.readthedocs.io). -The latest changelog can be viewed [here](https://pymupdf.readthedocs.io/en/latest/changes.html). -# Installation - -PyMuPDF **requires Python 3.7 or later**. +# Optional Features -For versions 3.7 and up, Python wheels exist for **Windows** (32bit and 64bit), **Linux** (64bit, Intel and ARM) and **Mac OSX** (64bit, Intel only), so it can be installed from [PyPI](https://pypi.org/search/?q=pymupdf) in the usual way. To ensure pip support for the latest wheel platform tags, we strongly recommend to always upgrade pip first. +* [fontTools](https://pypi.org/project/fonttools/) for creating font subsets. +* [pymupdf-fonts](https://pypi.org/project/pymupdf-fonts/) contains some nice fonts for your text output. +* [Tesseract-OCR](https://github.com/tesseract-ocr/tesseract) for optical character recognition in images and document pages. - python -m pip install --upgrade pip - python -m pip install --upgrade pymupdf -There are **no mandatory** external dependencies. However, some **optional features** become available only if additional packages are installed: -* [Pillow](https://pypi.org/project/Pillow/) for using pillow image output directly from PyMuPDF -* [fontTools](https://pypi.org/project/fonttools/) for creating font subsets. -* [pymupdf-fonts](https://pypi.org/project/pymupdf-fonts/) contains some nice fonts for your text output. -* [Tesseract-OCR](https://github.com/tesseract-ocr/tesseract) for optical character recognition in images and document pages. Tesseract is separate software, not a Python package. To enable OCR functions in PyMuPDF, the system environment variable `"TESSDATA_PREFIX"` must be defined and contain the `tessdata` folder name of the Tesseract installation location. +# About +**PyMuPDF** adds **Python** bindings and abstractions to [MuPDF](https://mupdf.com/), a lightweight **PDF**, **XPS**, and **eBook** viewer, renderer, and toolkit. Both **PyMuPDF** and **MuPDF** are maintained and developed by [Artifex Software, Inc](https://artifex.com). -Older wheels - also with support for older Python versions - can be found [here](https://github.com/pymupdf/PyMuPDF-Optional-Material/tree/master/wheels-upto-Py3.5) and on PyPI. +**PyMuPDF** was originally written by [Jorj X. McKie](mailto:jorj.x.mckie@outlook.de). -> **Note:** If `pip` cannot find a wheel that is compatible with your platform, it will automatically build and install from source using the PyMuPDF sdist; this requires only that SWIG is installed on your system. # License and Copyright -PyMuPDF and MuPDF are available under both, open-source AGPL and commercial license agreements. +**PyMuPDF** is available under [open-source AGPL](https://www.gnu.org/licenses/agpl-3.0.html) and commercial license agreements. If you determine you cannot meet the requirements of the **AGPL**, please contact [Artifex](https://artifex.com/contact/pymupdf-inquiry.php) for more information regarding a commercial license. -Please read the full text of the [AGPL license agreement](https://www.gnu.org/licenses/agpl-3.0.html) (which is also included here in file COPYING) to ensure that your use case complies with the guidelines of this license. If you determine you cannot meet the requirements of the AGPL, please contact [Artifex](https://artifex.com/contact/pymupdf-inquiry.php) for more information regarding a commercial license. -Artifex is the exclusive commercial licensing agent for MuPDF. -Artifex, the Artifex logo, MuPDF, and the MuPDF logo are registered trademarks of Artifex Software Inc. PyMuPDF and the PyMuPDF logo are trademarks of Artifex Software, Inc. © 2022 Artifex Software, Inc. All rights reserved. -# Contact -Please use the [Discussions](https://github.com/pymupdf/PyMuPDF/discussions) menu for questions, comments, or asking for help, and submit issues [here](https://github.com/pymupdf/PyMuPDF/issues). diff --git a/READMEb.md b/READMEb.md new file mode 100644 index 000000000..3e1900038 --- /dev/null +++ b/READMEb.md @@ -0,0 +1,7 @@ +# PyMuPDFb + +This wheel contains [MuPDF](https://mupdf.readthedocs.io/) shared libraries for +use by [PyMuPDF](https://pymupdf.readthedocs.io/). + +This wheel is shared by PyMuPDF wheels that are specific to different Python +versions, significantly reducing the total size of a release. diff --git a/READMEd.md b/READMEd.md new file mode 100644 index 000000000..e37c5c0ea --- /dev/null +++ b/READMEd.md @@ -0,0 +1,4 @@ +# PyMuPDFd + +This wheel contains [MuPDF](https://mupdf.readthedocs.io/) build-time files +that were used to build [PyMuPDF](https://pymupdf.readthedocs.io/). diff --git a/changes.txt b/changes.txt index 2b893912f..ff33fb6a0 100644 --- a/changes.txt +++ b/changes.txt @@ -2,6 +2,1080 @@ Change Log ========== +**Changes in version 1.26.6** + +* Fixed issues: + + * **Fixed** `4699 `_: cannot find ExtGState resource + * **Fixed** `4712 `_: Crash with "corrupted double-linked list" + * **Fixed** `4742 `_: 'Rect' object has no attribute 'get_area' + * **Fixed** `4746 `_: Document.__init__() got an unexpected keyword argument 'encoding' + + +**Changes in version 1.26.5** (2025-10-10) + +* Use MuPDF-1.26.10. + +* Fixed issues: + + * **Fixed** `2883 `_: Improve the Python type annotations for fitz_new + * **Fixed** `4507 `_: Bugs in pyodide + * **Fixed** `4613 `_: Thai and number blocks are not auto-scaled and get wrong hyphen when using in insert_htmlbox + * **Fixed** `4700 `_: pymupdf.open() processes .zip file without raising + * **Fixed** `4716 `_: Problems with unreadable characters + +* Other: + + * Supported Python versions are now 3.9-3.14. + * We now define all class methods explicitly instead of with dynamic assignment; this improves type hints. + * Removed `pymupdf.utils.Shape` class, was duplicate of `pymupdf.Shape`. + * Allow use of cibuildwheel to build and test on Pyodide. + * Fixed various Pyodide bugs. + * In documentation, added section about Linux wheels and glibc compatibility. + * Improved documentation of pymupdf.open()'s arg. + * Retrospectively mark `4544 `_ as fixed in 1.26.4. + + +**Changes in version 1.26.4 (2025-08-25)** + +* Use MuPDF-1.26.7. + +* Fixed issues: + + * **Fixed** `3806 `_: pdf to image rendering ignore optional content offs + * **Fixed** `4388 `_: Incorrect PixMap from page due to cached data from other PDF + * **Fixed** `4457 `_: Wrong characters displayed after font subsetting (w/ native method) + * **Fixed** `4462 `_: delete_pages() does not accept a single int + * **Fixed** `4533 `_: Open PDF error segmentation fault + * **Fixed** `4544 `_: About pdf_clip_page + * **Fixed** `4565 `_: MacOS uses Tesseract and not Tesseract-OCR + * **Fixed** `4571 `_: Broken merged pdfs. + * **Fixed** `4590 `_: TypeError in utils.py scrub(): annot.update_file(buffer=...) is invalid + * **Fixed** `4614 `_: Intercept bad widgets when inserting to another PDF + * **Fixed** `4639 `_: pymupdf.mupdf.FzErrorGeneric: code=1: Director error: : 'JM_new_bbox_device_Device' object has no attribute 'layer_name' + +* Other: + + * Check that #4392 `Segfault when running with pytest and -Werror` is fixed if PyMuPDF is built with swig>=4.4. + * Add `Page.clip_to_rect()`. + * Improved search for Tesseract data. + * Retrospectively mark #4496 as fixed in 1.26.1. + * Retrospectively mark #4503 as fixed in 1.26.3. + * Added experimental support for Graal. + + +**Changes in version 1.26.3 (2025-07-02)** + +* Use MuPDF-1.26.3. + +* Fixed issues: + + * **Fixed** `4462 `_: delete_pages() does not accept a single int + * **Fixed** `4503 `_: Undetected character styles + * **Fixed** `4527 `_: Rect.intersects() is much slower than necessary + * **Fixed** `4564 `_: Possible encoding issue in PDF metadata + * **Fixed** `4575 `_: Bug with IRect contains method + +* Other: + + * Class Shape is now available as pymupdf.Shape. + * Added table cell markdown support. + + +**Changes in version 1.26.2** + +[Skipped.] + + +**Changes in version 1.26.1 (2025-06-11)** + +* Use MuPDF-1.26.2. + +* Fixed issues: + + * **Fixed** `4520 `_: show_pdf_page does not like empty pages created by new_page + * **Fixed** `4524 `_: fitz.get_text ignores 'pages' kwarg + * **Fixed** `4412 `_: Regression? Spurious error? in insert_pdf in v1.25.4 + * **Fixed** `4496 `_: pymupdf4llm with pymupdfpro + +* Other: + + * Partial fix for `4503 `_: Undetected character styles + * New method `Document.rewrite_images()`, useful for reducing file size, changing image formats, or converting color spaces. + * `Page.get_text()`: restrict positional args to match docs. + * Removed bogus definition of class `Shape`. + * Removed release date from module, docs and changelog. + * `pymupdf.pymupdf_date` and `pymupdf.VersionDate` are now both None. + * They will be removed in a future release. + + +**Changes in version 1.26.0 (2025-05-22)** + +* Use MuPDF-1.26.1. + +* Fixed issues: + + * **Fixed** `4324 `_: cluster_drawings() fails to cluster horizontal and vertical thin lines + * **Fixed** `4363 `_: Trouble with searching + * **Fixed** `4404 `_: IndexError in page.get_links() + * **Fixed** `4412 `_: Regression? Spurious error? in insert_pdf in v1.25.4 + * **Fixed** `4423 `_: pymupdf.mupdf.FzErrorFormat: code=7: cannot find object in xref error encountered after version 1.25.3 + * **Fixed** `4435 `_: get_pixmap method stuck on one page + * **Fixed** `4439 `_: New Xml class from data does not work - bug in code + * **Fixed** `4445 `_: Broken XREF table incorrectly repaired + * **Fixed** `4447 `_: Stroke color of annotations cannot be correctly set + * **Fixed** `4479 `_: set_layer_ui_config() toggles all layers rather than just one + * **Fixed** `4505 `_: Follow Widget flag values up its parent structure + +* Other: + + * Partial fixed for `4457 `_: Wrong characters displayed after font subsetting (w/ native method) + * Support image stamp annotations. + * Support recoloring pages. + * Added example of using Django's file storage API to open files with pymupdf. + * Clarified FreeText annotation color options. + We now raise an exception if an attempt is made to set attributes that can not be supported. + * Fixed potential segv in Pixmap.is_unicolor(). + * Added runtime assert that that PyMuPDF and MuPDF were built with compatible + NDEBUG settings (related to `4390 `_). + * Simplified handling of filename/filetype when opening documents. + * Removed PDF linearization support. + * Calls to `Document.save()` with `linear` set to true will now raise an exception. + * See https://artifex.com/blog/mupdf-removes-linearisation for more information. + +**Changes in version 1.25.5 (2025-03-31)** + +* Fixed issues: + + * **Fixed** `4372 `_: Text insertion fails due to missing /Resources object + * **Fixed** `4400 `_: Infinite loop in fill_textbox + * **Fixed** `4403 `_: Unable to get_text() - layer/clip nesting too deep + * **Fixed** `4415 `_: PDF page is mirrored, origin is at bottom-left + +* Other: + + * Use MuPDF-1.25.6. + * Fixed MuPDF SEGV on MacOS with particular fonts. + * Fixed `Annot.get_textpage()`'s `clip` arg. + * Fixed Python-3.14 (pre-release) build error. + + +**Changes in version 1.25.4 (2025-03-14)** + +* Use MuPDF-1.25.5. + +* Fixed issues: + + * **Fixed** `4079 `_: Unexpected result for apply_redactions() + * **Fixed** `4224 `_: MuPDF error: format error: negative code in 1d faxd + * **Fixed** `4303 `_: page.get_image_info() returns outdated cached results after replacing image + * **Fixed** `4309 `_: FzErrorFormat Error When Deleting First Page + * **Fixed** `4336 `_: Major Performance Regression: pix.color_count is 150x slower in version 1.25.3 compared to 1.23.8 + * **Fixed** `4341 `_: Invalid label retrieval when /Kids is an array of multiple /Nums + +* Other: + + * Fixed handling of duplicate widget names when joining PDFs (PR #4347). + * Improved Pyodide build. + * Avoid SWIG-related build errors with Python-3.13 by disabling PY_LIMITED_API. + + +**Changes in version 1.25.3 (2025-02-06)** + +* Use MuPDF-1.25.4. + +* Fixed issues: + + * **Fixed** `4139 `_: Text color numbers change between 1.24.14 and 1.25.0 + * **Fixed** `4141 `_: Some insertion methods fails for pages without a /Resources object + * **Fixed** `4180 `_: Search problems + * **Fixed** `4182 `_: Text coordinate extraction error + * **Fixed** `4245 `_: Highlighting issue distorted on recent versions + * **Fixed** `4254 `_: add_freetext_annot is drawing text outside the annotation box + +* Other: + + * In annotations: + * Added support for subtype FreeTextCallout. + * Added support for rich text. + * Added miter_limit arg to insert_text*() to allow suppression of spikes caused by long miters. + * Add Widget Support to `Document.insert_pdf()`. + * Add `bibi` to span dicts. + * Add `synthetic' to char dict. + * Fixed Pyodide builds. + + +**Changes in version 1.25.2 (2025-01-17)** + +* Fixed issues: + + * **Fixed** `4055 `_: "Yes" for all checkboxes does not work for all PDF rendering engines. + * **Fixed** `4155 `_: samples_mv is unsafe + * **Fixed** `4162 `_: Got AttributeError, when tried to add Signature field + * **Fixed** `4186 `_: Incorrect handling of JPEG with color space CMYK image extraction + * **Fixed** `4195 `_: Pixmaps that are inverted and have an alpha channel are not rendered properly + * **Fixed** `4225 `_: pixmap.pil_save() fails due to colorspace definition + * **Fixed** `4232 `_: Incorrect Font style and Size + +* Other: + + * Use Python's built-in glyphname <> unicode conversion. + * Improve speed of pixmap color inversion. + * Add new `char_flags` member to span dictionary, for example allows detection of invisible text. + * Detect image masks in TextPage output. + * Added `Pixmap.pil_image()`. + + +**Changes in version 1.25.1 (2024-12-11)** + +* Use MuPDF-1.25.2. + +* Fixed issues: + + * **Fixed** `4125 `_: memory leak while convert Pixmap's colorspace + * **Fixed** `4034 `_: Possible regression in pdf cleaning during save. + + +**Changes in version 1.25.0 (2024-12-05)** + +* Use MuPDF-1.25.1. + +* Fixed issues: + + * **Fixed** `4026 `_: page.get_text('blocks') output two piece of very similar text with different bbox + * **Fixed** `4004 `_: Segmentation Fault When Updating PDF Form Field Value + * **Fixed** `3887 `_: Subset Fonts problem using Fallback Font + * **Fixed** `3886 `_: Another issue with destroying PDF when inserting html + * **Fixed** `3751 `_: apply_redactions causes part of the page content to be hidden / transparent + + +.. codespell:ignore-begin + +**Changes in version 1.24.14 (2024-11-19)** + +* Use MuPDF-1.24.11. + +* Fixed issues: + + * **Fixed** `3448 `_: get_pixmap function removes the table and leaves just the content behind + * **Fixed** `3758 `_: Got "malloc(): unaligned tcache chunk detected Aborted (core dumped)" while using add_redact_annot/apply_redactions + * **Fixed** `3813 `_: Stories: Ordered list count broken with nested unordered list + * **Fixed** `3933 `_: font.valid_codepoints() - malfunction + * **Fixed** `4018 `_: PyMuPDF hangs when iterating over zero page PDF pages backwards + * **Fixed** `4043 `_: fullcopypage bug + * **Fixed** `4047 `_: Segmentation Fault in add_redact_annot + * **Fixed** `4050 `_: Content of dict returned by doc.embfile_info() does not fit to documentation + +* Other: + + * Ensure that words from `Page.get_text()` never contain RTL/LTR char mixtures. + * Fix building with system MuPDF. + * Add dot product for points and vectors. + + +**Changes in version 1.24.13 (2024-10-29)** + +* Fixed issues: + + * **Fixed** `3848 `_: Piximap program crash + * **Fixed** `3950 `_: Unable to consistently extract field labels from PDFs + * **Fixed** `3981 `_: PyMuPDF 1.24.12 with pyinstaller throws error. + * **Fixed** `3994 `_: pix.color_topusage raise Segmentation fault (core dumped) + + +**Changes in version 1.24.12 (2024-10-21)** + +* Fixed issues: + + * **Fixed** `3914 `_: Ability to print MuPDF errors to logging instead of stdout + * **Fixed** `3916 `_: insert_htmlbox error: int too large to convert to float + * **Fixed** `3950 `_: Unable to consistently extract field labels from PDFs + +* Supported Python versions are now 3.9-3.13. + + * Dropped support for Python-3.8 because end-of-life. + * Added support for Python-3.13 because now released. + * See: https://devguide.python.org/versions/ + + +**Changes in version 1.24.11 (2024-10-03)** + +* Use MuPDF-1.24.10. + +* Fixed issues: + + * **Fixed** `3624 `_: Pdf file transform to image have a black block + * **Fixed** `3859 `_: doc.need_appearances() fails with "AttributeError: module 'pymupdf.mupdf' has no attribute 'PDF_TRUE' " + * **Fixed** `3863 `_: apply_redactions() does not work as expected + * **Fixed** `3905 `_: open stream can raise a FzErrorFormat error instead of FileDataError + +* Wheels now use the Python Stable ABI: + + * There is one PyMuPDF wheel for each platform. + * Each wheel works with all supported Python versions. + * Each wheel is built using the oldest supported Python version (currently 3.8). + * There is no PyMuPDFb wheel. + +* Other: + + * Improvements to get_text_words() with sort=True. + * Tests now always get the latest versions of required Python packages. + * Removed dependency on setuptools. + * Added item to PyMuPDF-1.24.10 changes below - fix of #3630. + + +**Changes in version 1.24.10 (2024-09-02)** + +* Use MuPDF-1.24.9. + +* Fixed issues: + + * **Fixed** `3450 `_: get_pixmap function takes too long to process + * **Fixed** `3569 `_: Invalid OCGs not ignored by SVG image creation + * **Fixed** `3603 `_: ObjStm compression and PDF linearization doesn't work together + * **Fixed** `3650 `_: Linebreak inserted between each letter + * **Fixed** `3661 `_: Update Document to check the /XYZ len + * **Fixed** `3698 `_: documentation issue - old code in the annotations documentation + * **Fixed** `3705 `_: Document.select() behaves weirdly in some particular kind of pdf files + * **Fixed** `3706 `_: extend Document.__getitem__ type annotation to reflect that the method also accepts slices + * **Fixed** `3727 `_: Method get_pixmap() make the program exit without any exceptions or messages + * **Fixed** `3767 `_: Cannot get Tessdata with Tesseract-OCR 5 + * **Fixed** `3773 `_: Link.set_border gives TypeError: '<' not supported between instances of 'NoneType' and 'int' + * **Fixed** `3774 `_: fitz.__version__` does not work anymore + * **Fixed** `3789 `_: ValueError: not enough values to unpack (expected 3, got 2) is thrown when call insert_pdf + * **Fixed** `3820 `_: class improves namedDest handling + + * **Fixed** `3630 `_: page.apply_redactions gives unwanted black rectangle + +* Other: + + * Object streams and linearization cannot be used together; attempting to do + so will raise an exception. (#3603) + * Fixed handling of non-existing /Contents object. + + +**Changes in version 1.24.9 (2024-07-24)** + +* Use MuPDF-1.24.8. + + +**Changes in version 1.24.8 (2024-07-22)** + +* Fixed issues: + + * **Fixed** `3636 `_: API documentation for the open function is not obvious to find. + * **Fixed** `3654 `_: docx parsing was broken in 1.24.7 + * **Fixed** `3677 `_: Unable to extract subset font name using the newer versions of PyMuPDF : 1.24.6 and 1.24.7. + * **Fixed** `3687 `_: Page.get_text results in AssertionError for epub files + +Other: + +* Fixed various spelling mistakes spotted by codespell. +* Improved how we modify MuPDF's default configuration on Windows. +* Make text search to work with ligatures. + + +**Changes in version 1.24.7 (2024-06-26)** + +* Fixed issues: + + * **Fixed** `3615 `_: Document.pagemode or Document.pagelayout crashes for epub files + * **Fixed** `3616 `_: not last version reported + + +**Changes in version 1.24.6 (2024-06-25)** + +* Use MuPDF-1.24.4 + +* Fixed issues: + + * **Fixed** `3599 `_: Story.fit_width() has a weird line + * **Fixed** `3594 `_: Garbled extraction for Amazon Sustainability Report + * **Fixed** `3591 `_: 'width' in Page.get_drawings() returns width equal as 0 + * **Fixed** `3561 `_: ZeroDivisionError: float division by zero with page.apply_redactions() + * **Fixed** `3559 `_: SegFault 11 when empty H1 H2 H3 H4 etc element is used in insert_htmlbox + * **Fixed** `3539 `_: Add dotted gridline detection to table recognition + * **Fixed** `3519 `_: get_toc(simple=False) AttributeError: 'Outline' object has no attribute 'rect' + * **Fixed** `3510 `_: page.get_label() gets wrong label on the first page of doc + * **Fixed** `3494 `_: 1.24.2/1.24.3: spurious characters introduced when using subset_fonts and insert_pdf + * **Fixed** `3470 `_: subset_fonts error exit without exception/warning + * **Fixed** `3400 `_: set_toc alters link coordinates for some rotated pages on pymupdf 1.24.2 + * **Fixed** `3347 `_: Incorrect links to points on pages having different heights + * **Fixed** `3237 `_: Set_metadata() does not work + * **Fixed** `3493 `_: Isolate PyMuPDF from other libraries; issues when PyMuPDF is loaded with other libraries like GdkPixbuf + +* Other: + + * Fixed concurrent use of PyMuPDF caused by use of constant temporary filenames. + + * Add musllinux x86_64 wheels to release. + + * Added clearer version information: + + * `pymupdf.pymupdf_version`. + * `pymupdf.mupdf_version`. + * `pymupdf.pymupdf_date`. + + +**Changes in version 1.24.5 (2024-05-30)** + +* Fixed issues: + + * **Fixed** `3479 `_: regression: fill_textbox: IndexError: pop from empty list + * **Fixed** `3488 `_: set_toc method error + +* Other: + + * Some more fixes to use MuPDF floating formatting. + * Removed/disabled some unnecessary diagnostics. + * Fixed utils.do_links() crash. + * Experimental new functions `pymupdf.apply_pages()` and `pymupdf.get_text()`. + * Addresses wrong label generation for label styles "a" and "A". + + +**Changes in version 1.24.4 (2024-05-16)** + + * **Fixed** `3418 `_: Re-introduced bug, text align add_redact_annot + * **Fixed** `3472 `_: insert_pdf gives SystemError + +* Other: + + * Fixed sysinstall test failing to remove all of prior installation before + new install. + * Fixed `utils.do_links()` crash. + * Correct `TextPage` creation Code. + * Unified various diagnostics. + * Fix bug in `page_merge()`. + + +**Changes in version 1.24.3 (2024-05-09)** + +* + The Python module is now called `pymupdf`. `fitz` is still supported for + backwards compatibility. + +* Use MuPDF-1.24.2. + +* Fixed issues: + + * **Fixed** `3357 `_: PyMuPDF==1.24.0 will hanging when using page.get_text("text") + * **Fixed** `3376 `_: Redacting results are not as expected in 1.24.x. + * **Fixed** `3379 `_: Documentation mismatch for get_text_blocks return value order. + * **Fixed** `3381 `_: Contents stream contains floats in scientific notation + * **Fixed** `3402 `_: Cannot add Widgets containing inter-field-calculation JavaScript + * **Fixed** `3414 `_: missing attribute set_dpi() + * **Fixed** `3430 `_: page.get_text() cause process freeze with certain pdf on v1.24.2 + +* Other: + + * New/modified methods: + + * `Page.remove_rotation()`: new, set page rotation to zero while keeping appearance. + + * Fixed some problems when checking for PDF properties. + * Fixed pip builds from sdist + (see discussion `3360 `_: + Alpine linux docker build failing "No matching distribution found for pymupdfb==1.24.1"). + + +**Changes in version 1.24.2 (2024-04-17)** + +* Removed obsolete classic implementation from releases + (previously available as module `fitz_old`). + +* Fixed issues: + + * **Fixed** `3331 `_: Document.pages() is incorrectly type-hinted + * **Fixed** `3354 `_: PyMuPDF==1.24.1: AttributeError: property 'metadata' of 'Document' object has no setter + +* Other: + + * New/modified methods: + + * `Document.bake()`: new, make annotations / fields permanent content. + * `Page.cluster_drawings()`: new, identifies drawing items + (i.e. vector graphics or line-art) + that belong together based on their geometrical vicinity. + * `Page.apply_redactions()`: added new parameter `text`. + * `Document.subset_fonts()`: use MuPDF's `pdf_subset_fonts()` instead of PyMuPDF code. + + * The `Document` class now supports page numbers specified as slices. + * Avoid causing MuPDF warnings. + + +**Changes in version 1.24.1 (2024-04-02)** + +* Fixed issues: + + * **Fixed** `3278 `_: apply_redactions moves some unredacted text + * **Fixed** `3301 `_: Be more permissive when classifying links as kind LINK_URI + * **Fixed** `3306 `_: Text containing capital 'ET' not appearing as annotation + +* Other: + + * Use MuPDF-1.24.1. + * Support ObjStm Compression. + Methods `Document.save()`, `Document.ez_save()` and `Document.write()` + now support new parameters `use_objstm`, compression_effort` and + `preserve_metadata`. + + +**Changes in version 1.24.0 (2024-03-21)** + +* Fixed issues: + + * **Fixed** `3281 `_: Preparing metadata (pyproject.toml) did not run successfully + * **Fixed** `3279 `_: PyMuPDF no longer builds in Alpine Linux + * **Fixed** `3257 `_: apply_redactions() deleting text outside of annoted box + * **Fixed** `3216 `_: AttributeError: 'Annot' object has no attribute '__del__' + * **Fixed** `3207 `_: get_drawings's items is missing line from h path operator + * **Fixed** `3201 `_: Memory leaks when merging PDFs + * **Fixed** `3197 `_: page.get_text() returns hexadecimal text for some characters + * **Fixed** `3196 `_: Remove text not working in 1.23.25 version vs 1.20.2 + * **Fixed** `3172 `_: PDF's 45º lines dissapearing in png conversion + * **Fixed** `3135 `_: Do not log warnings to stdout + * **Fixed** `3125 `_: get_pixmap method stuck on one page and runs forever + * **Fixed** `2964 `_: There is an issue with the image generated by the page.get_pixmap() function + +* Other: + + * Use MuPDF-1.24.0. + * Add support for redacting vector graphics. + * Several fixes for table module + + * Add new method for outputting the table as a markdown string. + + * Address errors in computing the table header object: + + We now allow None as the cell value, because this will be resolved where + needed (e.g. in the pandas DataFrame). + + We previously tried to enforce rect-like tuples in all header cell + bboxes, however this fails for tables with all-None columns. This fix + enables this and constructs an empty string in the corresponding cell + string. + + We now correctly include start / stop points of lines in the bbox of the + clustered graphic. We previously joined the line's rectangle - which had + no effect because this is always empty. + + * Improved exception text if we fail to open document. + * Fixed build with new libclang 18. + + +**Changes in version 1.23.26 (2024-02-29)** + +* Fixed issues: + + * **Fixed** `3199 `_: Add entry_points to setuptools configuration to provide command-line console scripts + * **Fixed** `3209 `_: Empty vertices in ink annotation + +* Other: + + * Improvements to table detection: + + * Improved check for empty tables, fixes bugs when determining table headers. + * Improved computation of enveloping vector graphic rectangles. + * Ignore more meaningless "pseudo" tables + + * Install command-line 'pymupdf' command that runs fitz/__main__.py. + * Don't overwrite MuPDF's config.h when building on non-Windows. + * Fix `Story` constructor's `archive` arg to match docs - now accepts a single `Archive` constructor arg. + * Do not include MuPDF source in sdist; will be downloaded automatically when building. + + +**Changes in version 1.23.25 (2024-02-20)** + +* Fixed issues: + + * **Fixed** `3182 `_: Pixmap.invert_irect argument type error + * **Fixed** `3186 `_: extractText() extracts broken text from pdf + * **Fixed** `3191 `_: Error on .find_tables() + +* Other: + + * When building, be able to specify python-config directly, with environment + variable `PIPCL_PYTHON_CONFIG`. + + +**Changes in version 1.23.24 (2024-02-19)** + +* Fixed issues: + + * **Fixed** `3148 `_: Table extraction - vertical text not handled correctly + * **Fixed** `3179 `_: Table Detection: Incorrect Separation of Vector Graphics Clusters + * **Fixed** `3180 `_: Cannot show optional content group: AttributeError: module 'fitz.mupdf' has no attribute 'pdf_array_push_drop' + +* Other: + + * Be able to test system install using `sudo pip install` instead of a venv. + + +**Changes in version 1.23.23 (2024-02-18)** + +* Fixed issues: + + * **Fixed** `3126 `_: Initialising Archive with a pathlib.Path fails. + * **Fixed** `3131 `_: Calling the next attribute of an Annot raises a "No attribute .parent" warning + * **Fixed** `3134 `_: Using an IRect as clip parameter in Page.get_pixmap no longer works since 1.23.9 + * **Fixed** `3140 `_: PDF document stays in use after closing + * **Fixed** `3150 `_: doc.select() hangs on this doc. + * **Fixed** `3163 `_: AssertionError on using fitz.IRect + * **Fixed** `3177 `_: fitz.Pixmap(None, pix) Unrecognised args for constructing Pixmap + +* Other: + + * + Improved `Document.select() by using new MuPDF function + `pdf_rearrange_pages()`. This is a more complete (and faster) + implementation of what needs to be done here in that not only pages will + be rearranged, but also consequential changes will be made to the table + of contents, links to removed pages and affected entries in the Optional + Content definitions. + * `TextWriter.appendv()`: added `small_caps` arg. + * Fixed some valgrind errors with MuPDF master. + * Fixed `Document.insert_image()` when build with MuPDF master. + + +**Changes in version 1.23.22 (2024-02-12)** + +* Fixed issues: + + * **Fixed** `3143 `_: Difference in decoding of OCGs names between doc.get_ocgs() and page.get_drawings() + + * **Fixed** `3139 `_: Pixmap resizing needs positional arg "clip" - even if None. + +* Other: + + * Removed the use of MuPDF function `fz_image_size()` from PyMuPDF. + + +**Changes in version 1.23.21 (2024-02-01)** + +* Fixed issues: + +* Other: + + * Fixed bug in set_xml_metadata(), PR `3112 https://github.com/pymupdf/PyMuPDF/pull/3112>`_: Fix pdf_add_stream metadata error + * Fixed lack of `.parent` member in `TextPage` from `Annot.get_textpage()`. + * Fixed bug in `Page.add_widget()`. + + +**Changes in version 1.23.20 (2024-01-29)** + +* Bug fixes: + + * **Fixed** `3100 `_: Wrong internal property accessed in get_xml_metadata + +* Other: + + * Significantly improved speed of `Document.get_toc()`. + + +**Changes in version 1.23.19 (2024-01-25)** + +* Bug fixes: + + * **Fixed** `3087 `_: Exception in insert_image with mask specified + * **Fixed** `3094 `_: TypeError: '<' not supported between instances of 'FzLocation' and 'int' in doc.delete_pages + +* Other: + + * When finding tables: + + * Allow addition of user-defined "virtual" vector graphics when finding tables. + * Confirm that the enveloping bboxes of vector graphics are inside the clip rectangle. + * Avoid slow finding of rectangle intersections. + + * Added `Font.bbox` property. + + +**Changes in version 1.23.18 (2024-01-23)** + +* Bug fixes: + + * **Fixed** `3081 `_: doc.close() not closing the document + +* Other: + + * Reduced size of sdist to fit on pypi.org (by reducing size of two test files). + * Fix `Annot.file_info()` if no `Desc` item. + + +**Changes in version 1.23.17 (2024-01-22)** + +* Bug fixes: + + * **Fixed** `3062 `_: page_rotation_reset does not return page to original rotation + * **Fixed** `3070 `_: update_link(): AttributeError: 'Page' object has no attribute 'super' + +* Other: + + * Fixed bug in `Page.links()` (PR #3075). + * Fixed bug in `Page.get_bboxlog()` with layers. + * Add support for timeouts in scripts/ and tests/run_compound.py. + + +**Changes in version 1.23.16 (2024-01-18)** + +* Bug fixes: + + * **Fixed** `3058 `_: Pixmap created from CMYK JPEG delivers RGB format + +* Other: + + * In table detection strategy "lines_strict", exclude fill-only vector graphics. + * Fixed sysinstall test failure. + * In documentation, update feature matrix with item about text writing. + + +**Changes in version 1.23.15 (2024-01-16)** + +* Bug fixes: + + * **Fixed** `3050 `_: python3.9 pix.set_pixel has something wrong in c.append( ord(i)) + +* Other: + + * Improved docs for Page.find_tables(). + + +**Changes in version 1.23.14 (2024-01-15)** + +* Bug fixes: + + * **Fixed** `3038 `_: JM_pixmap_from_display_list > Assertion Error : Checking for wrong type + * **Fixed** `3039 `_: Issue with doc.close() not closing the document in PyMuPDF + +* Other: + + * Ensure valid "re" rectangles in `Page.get_drawings()` with derotated pages. + + +**Changes in version 1.23.13 (2024-01-15)** + +* Bug fixes: + + * **Fixed** `2979 `_: list index out of range in to_pandas() + * **Fixed** `3001 `_: Calling find_tables() on one document alters the bounding boxes of a subsequent document + +* Other: + + * Fixed `Rect.height` and `Rect.width` to never return negative values. + * Fixed `TextPage.extractIMGINFO()`'s returned `dictkey_yres` value. + + +**Changes in version 1.23.12 (2024-01-12)** + +* * **Fixed** `3027 `_: Page.get_text throws Attribute Error for 'parent' + + +**Changes in version 1.23.11 (2024-01-12)** + +* Fixed some Pixmap construction bugs. +* Fixed Pixmap.yres(). + + +**Changes in version 1.23.10 (2024-01-12)** + +* Bug fixes: + + * **Fixed** `3020 `_: Can't resize a PixMap + +* Other: + + * Fixed Page.delete_image(). + + +**Changes in version 1.23.9 (2024-01-11)** + +* Default to new "rebased" implementation. + + * The old "classic" implementation is available with `import fitz_old as fitz`. + * For more information about why we are changing to the rebased implementation, + see: https://github.com/pymupdf/PyMuPDF/discussions/2680 + +* Use MuPDF-1.23.9. + +* Bug fixes (rebased implementation only): + + * **Fixed** `2911 `_: Page.derotation_matrix returns a tuple instead of a Matrix with rebased implementation + * **Fixed** `2919 `_: Rebased version: KeyError in resolve_names when merging pdfs + * **Fixed** `2922 `_: New feature that allows inserting named-destination links doesn't work + * **Fixed** `2943 `_: ZeroDivisionError: float division by zero when use apply_redactions() + * **Fixed** `2950 `_: Shelling out to pip during tests is problematic + * **Fixed** `2954 `_: Replacement unicode character in text extraction + * **Fixed** `2957 `_: apply_redactions() moving text + * **Fixed** `2961 `_: Passing a string as a page number raises IndexError instead of TypeError. + * **Fixed** `2969 `_: annot.next throws AttributeError + * **Fixed** `2978 `_: 1.23.9rc1: module 'fitz.mupdf' has no attribute 'fz_copy_pixmap_rect' + + * **Fixed** `2907 `_: segfault trying to call clean_contents on certain pdfs with python 3.12 + * **Fixed** `2905 `_: SystemError: returned a result with an exception set + * **Fixed** `2742 `_: Segmentation Fault when inserting three (but not two) copies of the same source page into one destination page + +* Other: + + * Add optional setting of opacity to `Page.insert_htmlbox()`. + * Fixed issue with add_redact_annot() mentioned in #2934. + * Fixed `Page.rotation()` to return 0 for non-PDF documents instead of raising an exception. + * Fixed internal quad detection to cope with any Python sequence. + * Fixed rebased `fitz.pymupdf_version_tuple` - was previously set to mupdf version. + * Improved support for Linux system installs, including adding regular testing on Github. + * Add missing `flake8` to `scripts/gh_release.py:test_packages`. + * Use newly public functions in MuPDF-1.23.8. + * Improved `scripts/test.py` to help investigation of MuPDF issues. + + +**Changes in version 1.23.8 (2023-12-19)** + +* Bug fixes (rebased implementation only): + + * **Fixed** `2634 `_: get_toc and set_toc do not behave consistently for rotated pages + * **Fixed** `2861 `_: AttributeError in getLinkDict during PDF Merge + * **Fixed** `2871 `_: KeyError in getLinkDict during PDF merge + * **Fixed** `2886 `_: Error in Skeleton for Named Link Destinations + +* Bug fixes (rebased and classic implementations): + + * **Fixed** `2885 `_: pymupdf find tables too slow + +* Other: + + * Rebased implementation: + + * `Page.insert_htmlbox()`: new, much more powerful alternative to `Page.insert_textbox()` or `TextWriter.fill_textbox()`, using `Story`. + * `Story.fit*()`: new methods for fitting a Story into an expanded rect. + * `Story.write_with_links()`: add support for external links. + * `Document.language()`: fixed to use MuPDF's new `mupdf.fz_string_from_text_language2()`. + * `Document.subset_fonts()` - fixed. + * Fixed internal `Archive._add_treeitem()` method. + * Fixed `fitz_new.__doc__` to contain PyMuPDF and Python version information, and OS name. + * Removed use of `(*args, **kwargs)` in API, we now specify keyword args explicitly. + * Work with new MuPDF Python exception classes. + + * Fixed bug where `button_states()` returns None when `/AP` points to an indirect object. + * Fixed pillow test to not ignore all errors, and install pillow when testing. + * Added test for `fitz.css_for_pymupdf_font()` (uses package `pymupdf-fonts`). + * Simplified Github Actions test specifications. + * Updated `tests/README.md`. + + +**Changes in version 1.23.7 (2023-11-30)** + +* Bug fixes in rebased implementation, not fixed in classic implementation: + + * **Fixed** `2232 `_: Geometry helper classes should support keyword arguments + * **Fixed** `2788 `_: Problem with get_toc in pymupdf 1.23.6 + * **Fixed** `2791 `_: Experiencing small memory leak in save() + +* Bug fixes (rebased and classic implementations): + + * **Fixed** `2736 `_: Failure when set cropbox with mediabox negative value + * **Fixed** `2749 `_: RuntimeError: cycle in structure tree + * **Fixed** `2753 `_: Story.write_with_links will ignore everything after the first "page break" in the HTML. + * **Fixed** `2812 `_: find_tables on landscape page generates reversed text + * **Fixed** `2829 `_: [cannot create /Annot for kind] is still printed despite #2345 is closed. + * **Fixed** `2841 `_: Unexpected KeyError when using scrub with fitz_new + +* Use MuPDF-1.23.7. + +* Other: + + * Rebased implementation: + + * Added flake8 code checking to test suite, and made various fixes. + * Disable diagnostics during Document constructor to match classic implementation. + + * Additional fix to `2553 `_: Invalid characters in versions >= 1.22 + * Fixed `MuPDF Bug 707324 `_: Story: HTML table row background color repeated incorrectly + * Added `scripts/test.py`, for simple build+test of PyMuPDF git checkout. + * Added `fitz.pymupdf_version_tuple`, e.g. `(1, 23, 6)`. + * Restored mistakenly-reverted fix for `2345 `_: Turn off print statements in utils.py + * Include any trailing `... repeated times...` text in warnings returned by `mupdf_warnings()` (rebased only). + + + +**Changes in version 1.23.6 (2023-11-06)** + +* Bug fixes: + + * **Fixed** `2553 `_: Invalid characters in versions >= 1.22 + * **Fixed** `2608 `_: Incorrect utf32 text extraction (high & low surrogates are split) + * **Fixed** `2710 `_: page.rect and text location wrong / differing from older version + * **Fixed** `2774 `_: wrong encoding for "\?" character when sort=True + * **Fixed** `2775 `_: fitz_new does not work with python3.10 or earlier + * **Fixed** `2777 `_: With fitz_new, wrong type for Page.mediabox + +* Other: + + * Use MuPDF-1.23.5. + * Added Document.resolve_names() (rebased implementation only). + + +**Changes in version 1.23.5 (2023-10-11)** + +* Bug fixes: + + * **Fixed** `2341 `_: Handling negative values in the zoom section for LINK_GOTO in linkDest + * **Fixed** `2522 `_: Typo in set_layer() - NameError: name 'f' is not defined + * **Fixed** `2548 `_: Fitz freezes on some PDFs when calling the fitz.Page.get_text_blocks method. + * **Fixed** `2596 `_: save(garbage=3) breaks get_pixmap() with side effect + * **Fixed** `2635 `_: "clean=True" makes objects invisible in the pdf + * **Fixed** `2637 `_: Page.insert_textbox incorrectly handles the last word if it starts a new line + * **Fixed** `2699 `_: extract paragraph with below table + * **Fixed** `2703 `_: Wrong fontsize calculation in corner cases ("page.get_texttrace()") + * **Fixed** `2710 `_: page.rect and text location wrong / differing from older version + * **Fixed** `2723 `_: When will a Python 3.12 wheel be available? + * **Fixed** `2730 `_: persistent get_text() formatting + +* Other: + + * Use MuPDF-1.23.4. + * Fix optimisation flags with system installs. + * Fixed the problem that the clip parameter does not take effect during table recognition + * Support Pillow mode "RGBa" + * Support extra word delimiters + * Support checking valid PDF name objects + + +**Changes in version 1.23.4 (2023-09-26)** + +* Improved build instructions. +* Fixed Tesseract in rebased implementation. +* Improvements to build/install with system MuPDF. +* Fixed Pyodide builds. +* Fixed rebased bug in _insert_image(). + +* Bug fixes: + + * **Fixed** `2556 `_: Segmentation fault at caling get_cdrawings(extended=True) + * **Fixed** `2637 `_: Page.insert_textbox incorrectly handles the last word if it starts a new line + * **Fixed** `2683 `_: Windows sdist build failure - non-quoting of path and using UNIX which command + * **Fixed** `2691 `_: Page.get_textpage_ocr() bug in rebased fitz_new version + * **Fixed** `2692 `_: Page.get_pixmap(clip=Rect()) bug in rebased fitz_new version + + +**Changes in version 1.23.3 (2023-08-31)** + +* Fixed use of Tesseract for OCR. + + +**Changes in version 1.23.2 (2023-08-28)** + +* **Fixed** `#2613 `_: release 1.23.0 not MacOS-arm64 compatible + + +**Changes in version 1.23.1 (2023-08-24)** + +* Updated README and package summary description. + +* + Fixed a problem on some Linux installations with Python-3.10 + (and possibly earlier versions) where `import fitz` failed with + `ImportError: libcrypt.so.2: cannot open shared object file: No such + file or directory`. + +* + Fixed `incompatible architecture` error on MacOS arm64. + +* + Fixed installation warning from Poetry about missing entry in wheels' + RECORD files. + + +**Changes in version 1.23.0 (2023-08-22)** + +* Add method `find_tables()` to the `Page` object. + + This allows locating tables on any supported document page, and + extracting table content by cell. + +* New "rebased" implementation of PyMuPDF. + + The rebased implementation is available as Python module + `fitz_new`. It can be used as a drop-in replacement with `import + fitz_new as fitz`. + +* + Python-independent MuPDF libraries are now in a second wheel called + `PyMuPDFb` that will be automatically installed by pip. + + This is to save space on pypi.org - a full release only needs one + `PyMuPDFb` wheel for each OS. + +* Bug fixes: + + * **Fixed** `#2542 `_: fitz.utils.scrub AttributeError Annot object has no attribute fileUpd inside + * **Fixed** `#2533 `_: get_texttrace returned a incorrect character bbox + * **Fixed** `#2537 `_: Validation when setting a grouped RadioButton throws a RuntimeError: path to 'V' has indirects + +* Other changes: + + * Dropped support for Python-3.7. + + * Fix for wrong page / annot `/Contents` cleaning. + + We need to set `pdf_filter_options::no_update` to zero. + + * Added new function get_tessdata(). + + * Cope with problem `/Annot` arrays. + + When copying page annotations in method Document.insert_pdf we + previously did not check the validity of members of the `/Annots` + array. For faulty members (like null or non-dictionary items) this + could cause unnecessary exceptions. This fix implements more checks + and skips such array items. + + * Additional annotation type checks. + + We did not previously check for annotation type when getting / + setting annotation border properties. This is now checked in + accordance with MuPDF. + + * Increase fault tolerance. + + Avoid exceptions in method `insert_pdf()` when source pages contains + invalid items in the `/Annots` array. + + * Return empty border dict for applicable annots. + + We previously were returning a non-empty border dictionary even for + non-applicable annotation types. We now return the empty dictionary + `{}` in these cases. This requires some corresponding changes in the + annotation `.update()` method, namely for dashes and border width. + + * Restrict `set_rect` to applicable annot types. + + We were insufficiently excluding non-applicable annotation types + from `set_rect()` method. We now let MuPDF catch unsupported + annotations and return `False` in these cases. + + * Wrong fontsize computation in `page.get_texttrace()`. + + When computing the font size we were using the final text + transformation matrix, where we should have taken `span->trm` + instead. This is corrected here. + + * Updates to cope with changes to latest MuPDF. + + `pdf_lookup_anchor()` has been removed. + + * Update fill_textbox to better respect rect.width + + The function norm_words in fill_textbox had a bug in its last + loop, appending n+1 characters when actually measuring width of n + characters. It led to a bug in fill_texbox when you tried to write + a single word mostly composed of "wide" letters (M,m, W, w...), + causing the written text to exceed the given rect. + + The fix was just to replace n+1 by n. + + * Add `script_focus` and `script_blur` options to widget. + + + **Changes in version 1.22.5 (2023-06-21)** * This release uses ``MuPDF-1.22.2``. @@ -16,6 +1090,8 @@ Change Log * **Fixed** `#2450 `_: Empty fill color and fill opacity for paths with fill and stroke operations with 1.22.* * **Fixed** `#2462 `_: Error at "get_drawing(extended=True )" * **Fixed** `#2468 `_: Decode error when trying to get drawings + * **Fixed** `#2710 `_: page.rect and text location wrong / differing from older version + * **Fixed** `#2723 `_: When will a Python 3.12 wheel be available? * New features: @@ -1731,3 +2807,5 @@ Changes in version 1.9.1 compared to version 1.8.0 are the following: * Incremental saves for changes are possible now using the call pattern *doc.save(doc.name, incremental=True)*. * A PDF's metadata can now be deleted, set or changed by document method *set_metadata()*. Supports incremental saves. * A PDF's bookmarks (or table of contents) can now be deleted, set or changed with the entries of a list using document method *set_toc(list)*. Supports incremental saves. + +.. codespell:ignore-end diff --git a/docs/404.rst b/docs/404.rst new file mode 100644 index 000000000..cf66a997a --- /dev/null +++ b/docs/404.rst @@ -0,0 +1,12 @@ +.. include:: header-404.rst + +404! +====== + + +**This page is not available.** + + +Please use the menu or search to find what you are looking for. + +.. include:: footer.rst diff --git a/docs/README.md b/docs/README.md index 3690e8477..438469d34 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,6 +2,7 @@ Welcome to the PyMuPDF documentation. This documentation relies on [Sphinx](https://www.sphinx-doc.org/en/master/) to publish HTML docs from markdown files written with [restructured text](https://en.wikipedia.org/wiki/ReStructuredText) (RST). + ## Sphinx version This README assumes you have [Sphinx v5.0.2 installed](https://www.sphinx-doc.org/en/master/usage/installation.html) on your system. @@ -11,7 +12,13 @@ This README assumes you have [Sphinx v5.0.2 installed](https://www.sphinx-doc.or Within `docs` update the associated restructured text (`.rst`) files. These files represent the corresponding document pages. +### Conventions + +- Code parameters and referenced code objects should be referenced within backticks, not italics, double backtick is better for safety +- When referencing names of some of our products surround with | , e.g. |PyMuPDF| , not PyMuPDF, see `header.rst` for products names listing +- When hyperlinking, avoid inline hyperlinks and try to references link from common location at page bottom, also avoid the use of "here" or "click here" as this provides little information about the link content. e.g. +"`Click here ` for our Story class". Should be re-written to something more like "Find out more `on our Story class `" ## Building HTML documentation @@ -33,19 +40,41 @@ This then creates the HTML documentation within `build/html`. > Use: `sphinx-build -a -b html . build/html` to build all, including the assets in `_static` (important if you have updated CSS). -## Building PDF documentation +### Using Sphinx Autobuild + +A better way of building the documentation if you are actively working on updates is to run: + +`sphinx-autobuild . _build/html` + +This will serve the docs on a localhost and auto-update the pages live as you make edits. + +### Building the Japanese documentation + +- From the "docs" location run: + +`sphinx-build -a -b html -D language=ja . _build/html/ja` +- Updating, after changes on the `main` branch and a sync with the main `en` .rst files, from the "docs" location, do: + +`sphinx-build -b gettext . _build/gettext` + +then: + +`sphinx-intl update -p _build/gettext -l ja` + +This will update the corresponding `po` files for further edits. Then check these files for "#, fuzzy" entries as the new stuff might exist there and requires editing. + + +## Building PDF documentation + - First ensure you have [rst2pdf](https://pypi.org/project/rst2pdf/) installed: - `python -m pip install rst2pdf` - - Then run: - -`sphinx-build -b pdf source build/pdf` +`sphinx-build -b pdf . build/pdf` This will then generate a single PDF for all of the documentation within `build/pdf`. diff --git a/docs/_static/PyMuPDF.ico b/docs/_static/PyMuPDF.ico index 38e08e010..de307f0f9 100644 Binary files a/docs/_static/PyMuPDF.ico and b/docs/_static/PyMuPDF.ico differ diff --git a/docs/_static/custom.css b/docs/_static/custom.css index d19337506..8162d076c 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,12 +1,3 @@ -/* icon sizing in the left menu */ -/*.icon-home { - width: 100px; - height: auto; -} -*/ - - - /* main document page: ensures pages fit to the available width and height */ .wy-nav-content { min-width: 100%; @@ -33,6 +24,7 @@ background-color: #fbff68; border-radius: 4px; border: 1px solid #222; + color:#000; } .discordLink { @@ -116,6 +108,12 @@ button.cta a { color:#fff !important; } +.footer-version { + font-weight: bold; + font-size: 12px; + color: #999; +} + /*** Furo theme overrides ***/ /* This is to do with hiding the Furo link text and the "Made with" text */ @@ -139,7 +137,7 @@ button.cta a { } .sidebar-logo { - width: 60px; + width: auto; height: 60px; } @@ -148,16 +146,13 @@ button.cta a { padding: 0; } -.sidebar-container .sidebar-search-container { - display: none; -} .sidebar-search-container.top { /*position:sticky; top:10px;*/ border-radius: 20px; border: solid #333 1px; - background-color: #fff; + /*background-color: #fff;*/ } .sidebar-search-container.top .sidebar-search { @@ -165,9 +160,8 @@ button.cta a { border-bottom: 0 !important; } -/* really important ! */ -.sidebar-search { - color: #000 !important; +.sidebar-drawer .sidebar-search-container { + width: 95%; } .toc-drawer .toc-title { @@ -178,7 +172,7 @@ button.cta a { :target>h1:first-of-type, span:target~h1:first-of-type { background-color: #007aff !important; color: #fff !important; - padding-top: 40px; /* accomodates header search blocking target */ + padding-top: 40px; /* accommodates header search blocking target */ margin-top: -40px; } @@ -188,15 +182,15 @@ button.cta a { span:target~h2:first-of-type, span:target~h3:first-of-type, span:target~h4:first-of-type, span:target~h5:first-of-type, span:target~h6:first-of-type { background-color: transparent !important; - padding-top: 40px; /* accomodates header search blocking target */ + padding-top: 40px; /* accommodates header search blocking target */ margin-top: -40px; text-decoration: underline; } -/* Dark mode colors */ -@media (prefers-color-scheme: dark) { - - - +/* small screens */ +@media all and (max-width : 550px) { + .discordLink img { + display: none; + } } diff --git a/docs/_static/forum-logo-wink.png b/docs/_static/forum-logo-wink.png new file mode 100644 index 000000000..20bc16b70 Binary files /dev/null and b/docs/_static/forum-logo-wink.png differ diff --git a/docs/_static/forum-logo.gif b/docs/_static/forum-logo.gif new file mode 100644 index 000000000..ba9ebd146 Binary files /dev/null and b/docs/_static/forum-logo.gif differ diff --git a/docs/_static/pymupdf-console.html b/docs/_static/pymupdf-console.html deleted file mode 100644 index 4decadd0f..000000000 --- a/docs/_static/pymupdf-console.html +++ /dev/null @@ -1,280 +0,0 @@ - - - - - - - - - - - -
- - - \ No newline at end of file diff --git a/docs/_static/pymupdf-sidebar-logo-dark.png b/docs/_static/pymupdf-sidebar-logo-dark.png deleted file mode 100644 index eec2b1e77..000000000 Binary files a/docs/_static/pymupdf-sidebar-logo-dark.png and /dev/null differ diff --git a/docs/_static/pymupdf-sidebar-logo-light.png b/docs/_static/pymupdf-sidebar-logo-light.png deleted file mode 100644 index 2efa7a2e4..000000000 Binary files a/docs/_static/pymupdf-sidebar-logo-light.png and /dev/null differ diff --git a/docs/_static/sidebar-logo-dark.svg b/docs/_static/sidebar-logo-dark.svg new file mode 100644 index 000000000..53f9ebf64 --- /dev/null +++ b/docs/_static/sidebar-logo-dark.svg @@ -0,0 +1,54 @@ + + + diff --git a/docs/_static/sidebar-logo-light.svg b/docs/_static/sidebar-logo-light.svg new file mode 100644 index 000000000..53f9ebf64 --- /dev/null +++ b/docs/_static/sidebar-logo-light.svg @@ -0,0 +1,54 @@ + + + diff --git a/docs/about-feature-matrix.rst b/docs/about-feature-matrix.rst index 666cc1d3d..793ea633f 100644 --- a/docs/about-feature-matrix.rst +++ b/docs/about-feature-matrix.rst @@ -35,6 +35,26 @@ :width: 0 :height: 0 +.. image:: images/icons/icon-txt.svg + :width: 0 + :height: 0 + +.. image:: images/icons/icon-docx.svg + :width: 0 + :height: 0 + +.. image:: images/icons/icon-pptx.svg + :width: 0 + :height: 0 + +.. image:: images/icons/icon-xlsx.svg + :width: 0 + :height: 0 + +.. image:: images/icons/icon-hangul.svg + :width: 0 + :height: 0 + .. raw:: html @@ -46,9 +66,10 @@ #feature-matrix th { border: 1px #999 solid; - padding: 10px; + padding: 10px 2px; background-color: #007aff; color: white; + text-align: center; } #feature-matrix tr { @@ -57,7 +78,8 @@ #feature-matrix td { border: 1px #999 solid; - padding: 10px; + padding: 10px 2px; + text-align: center; } #feature-matrix tr td.yes { @@ -134,19 +156,48 @@ background-size: 40px 40px; } + #feature-matrix .icon.txt { + background: url("_images/icon-txt.svg") 0 0 transparent no-repeat; + background-size: 40px 40px; + } + + #feature-matrix .icon.docx { + background: url("_images/icon-docx.svg") 0 0 transparent no-repeat; + background-size: 40px 40px; + } + + #feature-matrix .icon.pptx { + background: url("_images/icon-pptx.svg") 0 0 transparent no-repeat; + background-size: 40px 40px; + } + + #feature-matrix .icon.xlsx { + background: url("_images/icon-xlsx.svg") 0 0 transparent no-repeat; + background-size: 40px 40px; + } + + #feature-matrix .icon.hangul { + background: url("_images/icon-hangul.svg") 0 0 transparent no-repeat; + background-size: 40px 40px; + } + + - - - - - + + + + + + - - + + + + + - - - + + + + - - - - - + + + + + + - - + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + - + + - + + - + - + + - + + - + + - + + - - - - - + + + + + + - - + + + - + + - + + - + - + + - + + - - + + + + - - - + + + + - - + + - + +
FeaturePyMuPDFpikepdfPyPDF2pdfrwFeaturePyMuPDFpikepdfPyPDF2pdfrwpdfplumber / pdfminer
Supports Multiple Document Formats
Supports Multiple Document Formats PDF XPS @@ -155,7 +206,17 @@ FB2 CBZ SVG - Image + TXT + Image +
+ DOCX + XLSX + PPTX + HWPX + See note +
+ PDF PDF @@ -169,174 +230,396 @@
ImplementationC and PythonC++ and PythonImplementationPython and CPython and C++Python Python Python
Render Document PagesAll document typesNo renderingNo renderingNo renderingRender Document PagesAll document typesNo renderingNo renderingNo renderingNo rendering
Extract TextAll document typesWrite Text to PDF Page +
+ See: + Page.insert_htmlbox +
or:
+ Page.insert_textbox +
or:
+ TextWriter + +
PDF only
Extract Vector GraphicsAll document typesSupports CJK characters
Extract TextAll document typesPDF onlyPDF only
Extract Text as Markdown (.md)All document types
Extract TablesAll document typesPDF only
Extract Vector GraphicsAll document types Limited
Draw Vector Graphics (PDF)Draw Vector Graphics (PDF)
Based on Existing, Mature LibraryBased on Existing, Mature Library MuPDF QPDF
Automatic Repair of Damaged PDFsAutomatic Repair of Damaged PDFs
Encrypted PDFsEncrypted PDFs LimitedLimited Limited
Linerarized PDFsLinerarized PDFs
Incremental UpdatesIncremental Updates
Integrates with Jupyter and IPython NotebooksIntegrates with Jupyter and IPython Notebooks
Joining / Merging PDF with other Document TypesAll document typesPDF only PDF only PDF only Joining / Merging PDF with other Document TypesAll document typesPDF only PDF only PDF only PDF only
OCR API for Seamless Integration with TesseractAll document typesOCR API for Seamless Integration with TesseractAll document types
Integrated Checkpoint / Restart Feature (PDF)Integrated Checkpoint / Restart Feature (PDF)
PDF Optional ContentPDF Optional Content
PDF Embedded FilesPDF Embedded Files LimitedLimited Limited
PDF RedactionsPDF Redactions
PDF AnnotationsFullPDF AnnotationsFullLimited Limited
PDF Form FieldsCreate, read, updatePDF Form FieldsCreate, read, updateLimited, no creation Limited, no creation
PDF Page LabelsPDF Page Labels Read-only
Support Font Sub-SettingSupport Font Sub-Setting
+ + +
+ +
diff --git a/docs/about-performance.rst b/docs/about-performance.rst index 3f8171d5c..d89ed1f1b 100644 --- a/docs/about-performance.rst +++ b/docs/about-performance.rst @@ -166,9 +166,9 @@
-
Copying

This refers to opening a document and then saving it to a new file. This test measures the speed of reading a PDF and re-writing as a new PDF. This process is also at the core of functions like merging / joining multiple documents. The numbers below therefore apply to PDF joining and merging.

+
Copying

This refers to opening a document and then saving it to a new file. This test measures the speed of reading a PDF and re-writing as a new PDF. This process is also at the core of functions like merging / joining multiple documents. The numbers below therefore apply to PDF joining and merging.

-

The results for all 7,031 pages are:

+

The results for all 7,031 pages are:

@@ -183,7 +183,7 @@
400
300
200
-
100

seconds
+
100

seconds
@@ -210,19 +210,19 @@
-
fastest
+
fastest
-
slowest
+
slowest

-
Text Extraction

This refers to extracting simple, plain text from every page of the document and storing it in a text file.

+
Text Extraction

This refers to extracting simple, plain text from every page of the document and storing it in a text file.

-

The results for all 7,031 pages are:

+

The results for all 7,031 pages are:

@@ -235,7 +235,7 @@
400
300
200
-
100

seconds
+
100

seconds
@@ -261,10 +261,10 @@
-
fastest
+
fastest
-
slowest
+
slowest
@@ -273,9 +273,9 @@
-
Rendering

This refers to making an image (like PNG) from every page of a document at a given DPI resolution. This feature is the basis for displaying a document in a GUI window.

+
Rendering

This refers to making an image (like PNG) from every page of a document at a given DPI resolution. This feature is the basis for displaying a document in a GUI window.

-

The results for all 7,031 pages are:

+

The results for all 7,031 pages are:

@@ -290,7 +290,7 @@
800
600
400
-
200

seconds
+
200

seconds
@@ -314,9 +314,9 @@
-
fastest
+
fastest
-
slowest
+
slowest
@@ -324,5 +324,67 @@
+ + + + diff --git a/docs/about.rst b/docs/about.rst index 14fe690c1..13e22fb4f 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -12,15 +12,53 @@ Features Comparison .. _About_Feature_Matrix: -Feature matrix +Feature Matrix ~~~~~~~~~~~~~~~~~~~ -The following table illustrates how :title:`PyMuPDF` compares with other typical solutions. +The following table illustrates how |PyMuPDF| compares with other typical solutions. .. include:: about-feature-matrix.rst +---- + +.. image:: images/icons/icon-docx.svg + :width: 40 + :height: 40 + +.. image:: images/icons/icon-xlsx.svg + :width: 40 + :height: 40 + +.. image:: images/icons/icon-pptx.svg + :width: 40 + :height: 40 + + +.. image:: images/icons/icon-hangul.svg + :width: 40 + :height: 40 + + + +.. note:: + + A note about **Office** document types (DOCX, XLXS, PPTX) and **Hangul** documents (HWPX). These documents can be loaded into |PyMuPDF| and you will receive a :ref:`Document ` object. + + There are some caveats: + + + - we convert the input to **HTML** to layout the content. + - because of this the original page separation has gone. + + When saving out the result any faithful representation of the original layout cannot be expected. + + Therefore input files are mostly in a form that's useful for text extraction. + + +---- + .. _About_Performance: Performance @@ -28,7 +66,7 @@ Performance -To benchmark :title:`PyMuPDF` performance against a range of tasks a test suite with a fixed set of :ref:`8 PDFs with a total of 7,031 pages` containing text & images is used to obtain performance timings. +To benchmark |PyMuPDF| performance against a range of tasks a test suite with a fixed set of :ref:`8 PDFs with a total of 7,031 pages` containing text & images is used to obtain performance timings. Here are current results, grouped by task: @@ -49,13 +87,24 @@ License and Copyright -:title:`PyMuPDF` and :title:`MuPDF` are now available under both, open-source :title:`AGPL` and commercial license agreements. Please read the full text of the :title:`AGPL` license agreement, available in the distribution material (file COPYING) and `here `_, to ensure that your use case complies with the guidelines of the license. If you determine you cannot meet the requirements of the :title:`AGPL`, please contact `Artifex `_ for more information regarding a commercial license. +|PyMuPDF| and |MuPDF| are now available under both, open-source |AGPL| and commercial license agreements. Please read the full text of the |AGPL| license agreement, available in the distribution material (file COPYING) and `on the GNU license page `_, to ensure that your use case complies with the guidelines of the license. If you determine you cannot meet the requirements of the |AGPL|, please contact `Artifex `_ for more information regarding a commercial license. .. raw:: html - +

+ + + + :title:`Artifex` is the exclusive commercial licensing agent for :title:`MuPDF`. :title:`Artifex`, the :title:`Artifex` logo, :title:`MuPDF`, and the :title:`MuPDF` logo are registered trademarks of :title:`Artifex Software Inc.` diff --git a/docs/algebra.rst b/docs/algebra.rst index e7e01396c..9fcde7812 100644 --- a/docs/algebra.rst +++ b/docs/algebra.rst @@ -19,18 +19,18 @@ General Remarks ----------------- 1. Operators can be either **binary** (i.e. involving two objects) or **unary**. -2. The resulting type of **binary** operations is either a **new object of the left operand's class** or a bool. +2. The resulting type of **binary** operations is either a **new object of the left operand's class,** a bool or (for dot products) a float. 3. The result of **unary** operations is either a **new object** of the same class, a bool or a float. -4. The binary operators *+, -, *, /* are defined for all classes. They *roughly* do what you would expect -- **except, that the second operand ...** +4. The binary operators `+, -, *, /` are defined for all classes. They *roughly* do what you would expect -- **except, that the second operand ...** - may always be a number which then performs the operation on every component of the first one, - may always be a numeric sequence of the same length (2, 4 or 6) -- we call such sequences :data:`point_like`, :data:`rect_like`, :data:`quad_like` or :data:`matrix_like`, respectively. -5. Rectangles support additional binary operations: **intersection** (operator *"&"*), **union** (operator *"|"*) and **containment** checking. +5. Rectangles support **additional binary** operations: **intersection** (operator `"&"`), **union** (operator `"|"`) and **containment** checking. -6. Binary operators fully support in-place operations, so expressions like `a /= b` are valid if b is numeric or "a_like". +6. Binary operators fully support in-place operations. So if "°" is a binary operator then the expression `a °= b` is always valid and the same as `a = a ° b`. Therefore, be careful and do **not** do `p1 *= p2` for two points, because thereafter "p1" is a **float**. Unary Operations @@ -50,17 +50,21 @@ Oper. Result Binary Operations ------------------ -For every geometry object "a" and every number "b", the operations "a ° b" and "a °= b" are always defined for the operators *+, -, *, /*. The respective operation is simply executed for each component of "a". If the **second operand is not a number**, then the following is defined: +These are expressions like `a ° b` where "°" is any of the operators `+, -, *, /`. Also binary operations are expressions of the form `a == b` and `b in a`. -========= ======================================================================= +If "b" is a number, then the respective operation is executed for each component of "a". Otherwise, if "b" is **not a number,** then the following happens: + + +========= =========================================================================== Oper. Result -========= ======================================================================= +========= =========================================================================== a+b, a-b component-wise execution, "b" must be "a-like". -a*m, a/m "a" can be a point, rectangle or matrix, but "m" must be +a*m, a/m "a" can be a point, rectangle or matrix and "m" is a :data:`matrix_like`. *"a/m"* is treated as *"a*~m"* (see note below for non-invertible matrices). If "a" is a **point** or a **rectangle**, then *"a.transform(m)"* is executed. If "a" is a matrix, then matrix concatenation takes place. +a*b returns the **vector dot product** for a point "a" and point-like "b". a&b **intersection rectangle:** "a" must be a rectangle and "b" :data:`rect_like`. Delivers the **largest rectangle** contained in both operands. @@ -70,15 +74,15 @@ a|b **union rectangle:** "a" must be a rectangle, and "b" may be b in a if "b" is a number, then `b in tuple(a)` is returned. If "b" is :data:`point_like`, :data:`rect_like` or :data:`quad_like`, then "a" must be a rectangle, and `a.contains(b)` is returned. -a == b *True* if *bool(a-b)* is *False* ("b" may be "a-like"). -========= ======================================================================= +a == b ``True`` if *bool(a-b)* is ``False`` ("b" may be "a-like"). +========= =========================================================================== .. note:: Please note an important difference to usual arithmetic: - Matrix multiplication is **not commutative**, i.e. in general we have `m*n != n*m` for two matrices. Also, there are non-zero matrices which have no inverse, for example `m = Matrix(1, 0, 1, 0, 1, 0)`. If you try to divide by any of these, you will receive a `ZeroDivisionError` exception using operator *"/"*, e.g. for the expression `fitz.Identity / m`. But if you formulate `fitz.Identity * ~m`, the result will be `fitz.Matrix()` (the null matrix). + Matrix multiplication is **not commutative**, i.e. in general we have `m*n != n*m` for two matrices. Also, there are non-zero matrices which have no inverse, for example `m = Matrix(1, 0, 1, 0, 1, 0)`. If you try to divide by any of these, you will receive a `ZeroDivisionError` exception using operator *"/"*, e.g. for the expression `pymupdf.Identity / m`. But if you formulate `pymupdf.Identity * ~m`, the result will be `pymupdf.Matrix()` (the null matrix). - Admittedly, this represents an inconsistency, and we are considering to remove it. For the time being, you can choose to avoid an exception and check whether ~m is the null matrix, or accept a potential *ZeroDivisionError* by using `fitz.Identity / m`. + Admittedly, this represents an inconsistency, and we are considering to remove it. For the time being, you can choose to avoid an exception and check whether ~m is the null matrix, or accept a potential *ZeroDivisionError* by using `pymupdf.Identity / m`. .. note:: @@ -95,9 +99,9 @@ Manipulation with numbers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For the usual arithmetic operations, numbers are always allowed as second operand. In addition, you can formulate `"x in OBJ"`, where x is a number. It is implemented as `"x in tuple(OBJ)"`:: - >>> fitz.Rect(1, 2, 3, 4) + 5 - fitz.Rect(6.0, 7.0, 8.0, 9.0) - >>> 3 in fitz.Rect(1, 2, 3, 4) + >>> pymupdf.Rect(1, 2, 3, 4) + 5 + pymupdf.Rect(6.0, 7.0, 8.0, 9.0) + >>> 3 in pymupdf.Rect(1, 2, 3, 4) True >>> @@ -111,13 +115,36 @@ The following will create the upper left quarter of a document page rectangle:: The following will deliver the **middle point of a line** that connects two points **p1** and **p2**:: - >>> p1 = fitz.Point(1, 2) - >>> p2 = fitz.Point(4711, 3141) + >>> p1 = pymupdf.Point(1, 2) + >>> p2 = pymupdf.Point(4711, 3141) >>> mp = (p1 + p2) / 2 >>> mp Point(2356.0, 1571.5) >>> +Compute the **vector dot product** of two points. You can compute the **cosine of angles** and check orthogonality. + + >>> p1 = pymupdf.Point(1, 0) + >>> p2 = pymupdf.Point(1, 1) + >>> dot = p1 * p2 + >>> dot + 1.0 + + >>> # compute the cosine of the angle between p1 and p2: + >>> cosine = dot / (abs(p1) * abs(p2)) + >>> cosine # cosine of 45 degrees + 0.7071067811865475 + + >>> math.cos(mat.radians(45)) # verify: + 0.7071067811865476 + + >>> # check orhogonality + >>> p3 = pymupdf.Point(0, 1) + >>> # p1 and p3 are orthogonal so, as expected: + >>> p1 * p3 + 0.0 + + Manipulation with "like" Objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -134,15 +161,15 @@ The second operand of a binary operation can always be "like" the left operand. To shift a rectangle for 5 pixels to the right, do this:: - >>> fitz.Rect(100, 100, 200, 200) + (5, 0, 5, 0) # add 5 to the x coordinates + >>> pymupdf.Rect(100, 100, 200, 200) + (5, 0, 5, 0) # add 5 to the x coordinates Rect(105.0, 100.0, 205.0, 200.0) >>> Points, rectangles and matrices can be *transformed* with matrices. In PyMuPDF, we treat this like a **"multiplication"** (or resp. **"division"**), where the second operand may be "like" a matrix. Division in this context means "multiplication with the inverted matrix":: - >>> m = fitz.Matrix(1, 2, 3, 4, 5, 6) - >>> n = fitz.Matrix(6, 5, 4, 3, 2, 1) - >>> p = fitz.Point(1, 2) + >>> m = pymupdf.Matrix(1, 2, 3, 4, 5, 6) + >>> n = pymupdf.Matrix(6, 5, 4, 3, 2, 1) + >>> p = pymupdf.Point(1, 2) >>> p * m Point(12.0, 16.0) >>> p * (1, 2, 3, 4, 5, 6) @@ -161,18 +188,18 @@ Points, rectangles and matrices can be *transformed* with matrices. In PyMuPDF, Matrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0) >>> >>> # look at this non-invertible matrix: - >>> m = fitz.Matrix(1, 0, 1, 0, 1, 0) + >>> m = pymupdf.Matrix(1, 0, 1, 0, 1, 0) >>> ~m Matrix(0.0, 0.0, 0.0, 0.0, 0.0, 0.0) >>> # we try dividing by it in two ways: - >>> p = fitz.Point(1, 2) + >>> p = pymupdf.Point(1, 2) >>> p * ~m # this delivers point (0, 0): Point(0.0, 0.0) >>> p / m # but this is an exception: Traceback (most recent call last): File "", line 1, in p / m - File "... /site-packages/fitz/fitz.py", line 869, in __truediv__ + File "... /site-packages/fitz/pymupdf.py", line 869, in __truediv__ raise ZeroDivisionError("matrix not invertible") ZeroDivisionError: matrix not invertible >>> @@ -194,7 +221,7 @@ Here is an example for creating the smallest rectangle enclosing given points:: >>> >>> # now create a rectangle containing all these 100 points >>> # start with an empty rectangle - >>> r = fitz.Rect(points[0], points[0]) + >>> r = pymupdf.Rect(points[0], points[0]) >>> for p in points[1:]: # and include remaining points one by one r |= p >>> r # here is the to be expected result: diff --git a/docs/annot.rst b/docs/annot.rst index 48ba7cb4d..35a5a6b04 100644 --- a/docs/annot.rst +++ b/docs/annot.rst @@ -5,22 +5,26 @@ ================ Annot ================ -**This class is supported for PDF documents only.** -Quote from the :ref:`AdobeManual`: "An annotation associates an object such as a note, sound, or movie with a location on a page of a PDF document, or provides a way to interact with the user by means of the mouse and keyboard." +|pdf_only_class| + +Quote from the :ref:`AdobeManual`: + + *"An annotation associates an object such as a note, sound, or movie with a location on a page of a PDF document, or provides a way to interact with the user by means of the mouse and keyboard."* There is a parent-child relationship between an annotation and its page. If the page object becomes unusable (closed document, any document structure change, etc.), then so does every of its existing annotation objects -- an exception is raised saying that the object is "orphaned", whenever an annotation property or method is accessed. ================================== ============================================================== **Attribute** **Short Description** ================================== ============================================================== -:meth:`Annot.delete_responses` delete all responding annotions +:meth:`Annot.delete_responses` delete all responding annotations :meth:`Annot.get_file` get attached file content :meth:`Annot.get_oc` get :data:`xref` of an :data:`OCG` / :data:`OCMD` :meth:`Annot.get_pixmap` image of the annotation as a pixmap :meth:`Annot.get_sound` get the sound of an audio annotation :meth:`Annot.get_text` extract annotation text :meth:`Annot.get_textbox` extract annotation text +:meth:`Annot.get_textpage` create a TextPage for the annotation :meth:`Annot.set_border` set annotation's border properties :meth:`Annot.set_blendmode` set annotation's blend mode :meth:`Annot.set_colors` set annotation's colors @@ -66,7 +70,7 @@ There is a parent-child relationship between an annotation and its page. If the pair: alpha; Annot.get_pixmap pair: dpi; Annot.get_pixmap - .. method:: get_pixmap(matrix=fitz.Identity, dpi=None, colorspace=fitz.csRGB, alpha=False) + .. method:: get_pixmap(matrix=pymupdf.Identity, dpi=None, colorspace=pymupdf.csRGB, alpha=False) * Changed in v1.19.2: added support of dpi parameter. @@ -76,14 +80,18 @@ There is a parent-child relationship between an annotation and its page. If the :arg int dpi: (new in v1.19.2) desired resolution in dots per inch. If not `None`, the matrix parameter is ignored. - :arg colorspace: a colorspace to be used for image creation. Default is *fitz.csRGB*. + :arg colorspace: a colorspace to be used for image creation. Default is ``pymupdf.csRGB``. :type colorspace: :ref:`Colorspace` - :arg bool alpha: whether to include transparency information. Default is *False*. + :arg bool alpha: whether to include transparency information. Default is ``False``. :rtype: :ref:`Pixmap` - .. note:: If the annotation has just been created or modified, you should reload the page first via *page = doc.reload_page(page)*. + .. note:: + + * If the annotation has just been created or modified, you should :meth:`Document.reload_page` the page first via `page = doc.reload_page(page)`. + + * The pixmap will have *"premultiplied"* pixels if `alpha=True`. To learn about some background, e.g. look for "Premultiplied alpha" `in this online glossary `_. .. index:: @@ -129,11 +137,27 @@ There is a parent-child relationship between an annotation and its page. If the :arg rect-like rect: the area to consider, defaults to :attr:`Annot.rect`. + .. method:: get_textpage(clip=None, flags=3) + + Create a :ref:`TextPage` for the annotation. + + :arg int flags: indicator bits controlling the content available for subsequent text extractions and searches -- see the parameter of :meth:`Annot.get_text`. + + :arg rect-like clip: restrict extracted text to this area. + + :returns: :ref:`TextPage` + + |history_begin| + + * v1.25.5: fixed `clip` arg. + + |history_end| + .. method:: set_info(info=None, content=None, title=None, creationDate=None, modDate=None, subject=None) * Changed in version 1.16.10 - Changes annotation properties. These include dates, contents, subject and author (title). Changes for *name* and *id* will be ignored. The update happens selectively: To leave a property unchanged, set it to *None*. To delete existing data, use an empty string. + Changes annotation properties. These include dates, contents, subject and author (title). Changes for *name* and *id* will be ignored. The update happens selectively: To leave a property unchanged, set it to ``None``. To delete existing data, use an empty string. :arg dict info: a dictionary compatible with the *info* property (see below). All entries must be strings. If this argument is not a dictionary, the other arguments are used instead -- else they are ignored. :arg str content: *(new in v1.16.10)* see description in :attr:`info`. @@ -148,8 +172,7 @@ There is a parent-child relationship between an annotation and its page. If the .. note:: - * While 'FreeText', 'Line', 'PolyLine', and 'Polygon' annotations can have these properties, (Py-) MuPDF does not support line ends for 'FreeText', because the call-out variant of it is not supported. - * *(Changed in v1.16.16)* Some symbols have an interior area (diamonds, circles, squares, etc.). By default, these areas are filled with the fill color of the annotation. If this is *None*, then white is chosen. The *fill_color* argument of :meth:`Annot.update` can now be used to override this and give line end symbols their own fill color. + * Some symbols have an interior area (diamonds, circles, squares, etc.). These areas are filled with the fill color or the stroke color, depending on the annotation type. :arg int start: The symbol number for the first point. :arg int end: The symbol number for the last point. @@ -216,7 +239,7 @@ There is a parent-child relationship between an annotation and its page. If the The annotation's blend mode. See :ref:`AdobeManual`, page 324 for explanations. :rtype: str - :returns: the blend mode or *None*. + :returns: the blend mode or ``None``. .. method:: set_blendmode(blendmode) @@ -236,7 +259,7 @@ There is a parent-child relationship between an annotation and its page. If the :arg str name: the new name. - .. caution:: If you set the name of a 'Stamp' annotation, then this will **not change** the rectangle, nor will the text be layouted in any way. If you choose a standard text from :ref:`StampIcons` (the **exact** name piece after `"STAMP_"`), you should receive the original layout. An **arbitrary text** will not be changed to upper case, but be written in font "Times-Bold" as is, horizontally centered in **one line** and be shortened to fit. To get your text fully displayed, its length using fontsize 20 must not exceed 190 pixels. So please make sure that the following inequality is true: `fitz.get_text_length(text, fontname="tibo", fontsize=20) <= 190`. + .. caution:: If you set the name of a 'Stamp' annotation, then this will **not change** the rectangle, nor will the text be layouted in any way. If you choose a standard text from :ref:`StampIcons` (the **exact** name piece after `"STAMP_"`), you should receive the original layout. An **arbitrary text** will not be changed to upper case, but be written in font "Times-Bold" as is, horizontally centered in **one line** and be shortened to fit. To get your text fully displayed, its length using :data:`fontsize` 20 must not exceed 190 points. So please make sure that the following inequality is true: `pymupdf.get_text_length(text, fontname="tibo", fontsize=20) <= 190`. .. method:: set_rect(rect) @@ -283,15 +306,13 @@ There is a parent-child relationship between an annotation and its page. If the .. method:: set_colors(colors=None, stroke=None, fill=None) - * Changed in version 1.16.9: Allow colors to be directly set. These parameters are used if *colors* is not a dictionary. - - Changes the "stroke" and "fill" colors for supported annotation types -- not all annotations accept both. + Changes the "stroke" and "fill" colors for supported annotation types -- not all annotation types accept both. **Do not use this method at all for FreeText annotations** because it has its special conventions to deal with up to three colors (border, fill, text). :arg dict colors: a dictionary containing color specifications. For accepted dictionary keys and values see below. The most practical way should be to first make a copy of the *colors* property and then modify this dictionary as required. :arg sequence stroke: see above. :arg sequence fill: see above. - *Changed in v1.18.5:* To completely remove a color specification, use an empty sequence like `[]`. If you specify `None`, an existing specification will not be changed. + To completely remove a color specification, use an empty sequence like `[]`. If you specify `None`, an existing specification will not be changed. .. method:: delete_responses() @@ -327,20 +348,26 @@ There is a parent-child relationship between an annotation and its page. If the Color specifications may be made in the usual format used in PuMuPDF as sequences of floats ranging from 0.0 to 1.0 (including both). The sequence length must be 1, 3 or 4 (supporting GRAY, RGB and CMYK colorspaces respectively). For GRAY, just a float is also acceptable. :arg float opacity: *(new in v1.16.14)* **valid for all annotation types:** change or set the annotation's transparency. Valid values are *0 <= opacity < 1*. + :arg str blend_mode: *(new in v1.16.14)* **valid for all annotation types:** change or set the annotation's blend mode. For valid values see :ref:`BlendModes`. - :arg float fontsize: change font size of the text. 'FreeText' annotations only. - :arg sequence,float text_color: change the text color. 'FreeText' annotations only. - :arg sequence,float border_color: change the border color. 'FreeText' annotations only. + + :arg float fontsize: change :data:`fontsize` of the text. 'FreeText' annotations only. + + :arg sequence,float text_color: change the text color. 'FreeText' annotations only. This has the same effect as ``border_color``. Note that the text color of rich-text annotations cannot be changed at all because it is set by HTML / CSS syntax and part of the text itself. + + :arg sequence,float border_color: change the border color. 'FreeText' annotations only. This has the same effect as ``text_color``. + :arg sequence,float fill_color: the fill color. - * 'Line', 'Polyline', 'Polygon' annotations: use it to give applicable line end symbols a fill color other than that of the annotation *(changed in v1.16.16)*. + :arg bool cross_out: *(new in v1.17.2)* add two diagonal lines to the annotation rectangle. 'Redact' annotations only. If not desired, ``False`` must be specified even if the annotation was created with ``False``. - :arg bool cross_out: *(new in v1.17.2)* add two diagonal lines to the annotation rectangle. 'Redact' annotations only. If not desired, *False* must be specified even if the annotation was created with *False*. :arg int rotate: new rotation value. Default (-1) means no change. Supports 'FreeText' and several other annotation types (see :meth:`Annot.set_rotation`), [#f1]_. Only choose 0, 90, 180, or 270 degrees for 'FreeText'. Otherwise any integer is acceptable. :rtype: bool - .. note:: Using this method inside a :meth:`Page.annots` loop is **not recommended!** This is because most annotation updates require the owning page to be reloaded -- which cannot be done inside this loop. Please use the example coding pattern given in the documentation of this generator. + This method is the only way to change the colors of a FreeText annotation. You cannot use :meth:`Annot.set_colors` for this purpose. But be aware that for rich-text annotations, the text color is never changed. The text color is set by the ``text_color`` entry of the ``info`` dictionary. This is a limitation of |MuPDF| and not a bug. + + .. caution:: Using this method inside a :meth:`Page.annots` loop is **not recommended!** This is because most annotation updates require the owning page to be reloaded -- which cannot be done inside this loop. Please use the example coding pattern given in the documentation of this generator. .. attribute:: file_info @@ -348,7 +375,7 @@ There is a parent-child relationship between an annotation and its page. If the Basic information of the annot's attached file. :rtype: dict - :returns: a dictionary with keys *filename*, *ufilename*, *desc* (description), *size* (uncompressed file size), *length* (compressed length) for FileAttachment annot types, else *None*. + :returns: a dictionary with keys *filename*, *ufilename*, *desc* (description), *size* (uncompressed file size), *length* (compressed length) for FileAttachment annot types, else ``None``. .. method:: get_file() @@ -459,7 +486,7 @@ There is a parent-child relationship between an annotation and its page. If the .. attribute:: line_ends - A pair of integers specifying start and end symbol of annotations types 'FreeText', 'Line', 'PolyLine', and 'Polygon'. *None* if not applicable. For possible values and descriptions in this list, see the :ref:`AdobeManual`, table 1.76 on page 400. + A pair of integers specifying start and end symbol of annotations types 'FreeText', 'Line', 'PolyLine', and 'Polygon'. ``None`` if not applicable. For possible values and descriptions in this list, see the :ref:`AdobeManual`, table 1.76 on page 400. :rtype: tuple @@ -478,7 +505,10 @@ There is a parent-child relationship between an annotation and its page. If the .. attribute:: colors - dictionary of two lists of floats in range *0 <= float <= 1* specifying the "stroke" and the interior ("fill") colors. The stroke color is used for borders and everything that is actively painted or written ("stroked"). The fill color is used for the interior of objects like line ends, circles and squares. The lengths of these lists implicitly determine the colorspaces used: 1 = GRAY, 3 = RGB, 4 = CMYK. So "[1.0, 0.0, 0.0]" stands for RGB color red. Both lists can be empty if no color is specified. + dictionary of two lists of floats in range *0 <= float <= 1* specifying the "stroke" and the interior ("fill") colors. The stroke color is used for borders and everything that is actively painted or written ("stroked"). The fill color is used for the interior of objects like line ends, circles and squares. The lengths of these lists implicitly determine the colorspaces used: 1 = GRAY, 3 = RGB, 4 = CMYK. So "[1.0, 0.0, 0.0]" stands for RGB color red. Both lists can be empty if no color is specified. Be aware about some potentially unexpected cases: + + * The color of Highlight annotations is a **stroke** color, contrary to intuition. + * The color if FreeText annotations is a **stroke** color, but appears as the color that fills the rectangle and any applicable line end symbols. Text color and border color cannot be accessed at all. :rtype: dict @@ -532,7 +562,7 @@ There is a parent-child relationship between an annotation and its page. If the * *style* -- 1-byte border style: **"S"** (Solid) = solid line surrounding the annotation, **"D"** (Dashed) = dashed line surrounding the annotation, the dash pattern is specified by the *dashes* entry, **"B"** (Beveled) = a simulated embossed rectangle that appears to be raised above the surface of the page, **"I"** (Inset) = a simulated engraved rectangle that appears to be recessed below the surface of the page, **"U"** (Underline) = a single line along the bottom of the annotation rectangle. - * *clouds* -- an integer indicating a "cloudy" border, where `n` is an integer `-1 <= n <= 2`. A value `n = 0` indicates a straight line (no clouds), 1 means small and 2 means large semi-circles, mimicking the cloudy appearance. If -1, then no specification is present. + * *clouds* -- an integer indicating a "cloudy" border, where ``n`` is an integer `-1 <= n <= 2`. A value `n = 0` indicates a straight line (no clouds), 1 means small and 2 means large semi-circles, mimicking the cloudy appearance. If -1, then no specification is present. :rtype: dict @@ -550,7 +580,7 @@ Example -------- Change the graphical image of an annotation. Also update the "author" and the text to be shown in the popup window:: - doc = fitz.open("circle-in.pdf") + doc = pymupdf.open("circle-in.pdf") page = doc[0] # page 0 annot = page.first_annot # get the annotation annot.set_border(dashes=[3]) # set dashes to "3 on, 3 off ..." diff --git a/docs/app1.rst b/docs/app1.rst index e6ba768de..f3b0c7169 100644 --- a/docs/app1.rst +++ b/docs/app1.rst @@ -94,7 +94,7 @@ Example output:: HTML ~~~~ -:meth:`TextPage.extractHTML` (or *Page.get_text("html")* output fully reflects the structure of the page's *TextPage* -- much like DICT / JSON below. This includes images, font information and text positions. If wrapped in HTML header and trailer code, it can readily be displayed by an internet browser. Our above example:: +:meth:`TextPage.extractHTML` (or *Page.get_text("html")* output fully reflects the structure of the page's ``TextPage`` -- much like DICT / JSON below. This includes images, font information and text positions. If wrapped in HTML header and trailer code, it can readily be displayed by an internet browser. Our above example:: >>> for line in page.get_text("html").splitlines(): print(line) @@ -159,7 +159,7 @@ To address the font issue, you can use a simple utility script to scan through t DICT (or JSON) ~~~~~~~~~~~~~~~~ -:meth:`TextPage.extractDICT` (or *Page.get_text("dict", sort=False)*) output fully reflects the structure of a *TextPage* and provides image content and position detail (*bbox* -- boundary boxes in pixel units) for every block, line and span. Images are stored as *bytes* for DICT output and base64 encoded strings for JSON output. +:meth:`TextPage.extractDICT` (or *Page.get_text("dict", sort=False)*) output fully reflects the structure of a ``TextPage`` and provides image content and position detail (*bbox* -- boundary boxes in pixel units) for every block, line and span. Images are stored as *bytes* for DICT output and base64 encoded strings for JSON output. For a visualization of the dictionary structure have a look at :ref:`textpagedict`. @@ -266,7 +266,7 @@ XHTML Text Extraction Flags Defaults ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* New in version 1.16.2: Method :meth:`Page.get_text` supports a keyword parameter *flags* *(int)* to control the amount and the quality of extracted data. The following table shows the defaults settings (flags parameter omitted or None) for each extraction variant. If you specify flags with a value other than *None*, be aware that you must set **all desired** options. A description of the respective bit settings can be found in :ref:`TextPreserve`. +* New in version 1.16.2: Method :meth:`Page.get_text` supports a keyword parameter *flags* *(int)* to control the amount and the quality of extracted data. The following table shows the defaults settings (flags parameter omitted or None) for each extraction variant. If you specify flags with a value other than ``None``, be aware that you must set **all desired** options. A description of the respective bit settings can be found in :ref:`TextPreserve`. * New in v1.19.6: The default combinations in the following table are now available as Python constants: :data:`TEXTFLAGS_TEXT`, :data:`TEXTFLAGS_WORDS`, :data:`TEXTFLAGS_BLOCKS`, :data:`TEXTFLAGS_DICT`, :data:`TEXTFLAGS_RAWDICT`, :data:`TEXTFLAGS_HTML`, :data:`TEXTFLAGS_XHTML`, :data:`TEXTFLAGS_XML`, and :data:`TEXTFLAGS_SEARCH`. You can now easily modify a default flag, e.g. @@ -283,16 +283,17 @@ Text Extraction Flags Defaults `flags = TEXTFLAGS_SEARCH & ~TEXT_DEHYPHENATE` -=================== ==== ==== ===== === ==== ======= ===== ====== ====== -Indicator text html xhtml xml dict rawdict words blocks search -=================== ==== ==== ===== === ==== ======= ===== ====== ====== -preserve ligatures 1 1 1 1 1 1 1 1 1 -preserve whitespace 1 1 1 1 1 1 1 1 1 -preserve images n/a 1 1 n/a 1 1 n/a 0 0 -inhibit spaces 0 0 0 0 0 0 0 0 0 -dehyphenate 0 0 0 0 0 0 0 0 1 -clip to mediabox 1 1 1 1 1 1 1 1 1 -=================== ==== ==== ===== === ==== ======= ===== ====== ====== +========================= ==== ==== ===== === ==== ======= ===== ====== ====== +Indicator text html xhtml xml dict rawdict words blocks search +========================= ==== ==== ===== === ==== ======= ===== ====== ====== +preserve ligatures 1 1 1 1 1 1 1 1 0 +preserve whitespace 1 1 1 1 1 1 1 1 1 +preserve images n/a 1 1 n/a 1 1 n/a 0 0 +inhibit spaces 0 0 0 0 0 0 0 0 0 +dehyphenate 0 0 0 0 0 0 0 0 1 +clip to mediabox 1 1 1 1 1 1 1 1 1 +use CID instead of U+FFFD 1 1 1 1 1 1 1 1 0 +========================= ==== ==== ===== === ==== ======= ===== ====== ====== * **search** refers to the text search function. * **"json"** is handled exactly like **"dict"** and is hence left out. @@ -300,7 +301,7 @@ clip to mediabox 1 1 1 1 1 1 1 1 1 * An "n/a" specification means a value of 0 and setting this bit never has any effect on the output (but an adverse effect on performance). * If you are not interested in images when using an output variant which includes them by default, then by all means set the respective bit off: You will experience a better performance and much lower space requirements. -To show the effect of *TEXT_INHIBIT_SPACES* have a look at this example:: +To show the effect of `TEXT_INHIBIT_SPACES` have a look at this example:: >>> print(page.get_text("text")) H a l l o ! @@ -309,7 +310,7 @@ To show the effect of *TEXT_INHIBIT_SPACES* have a look at this example:: i n E n g l i s h . . . l e t ' s s e e w h a t h a p p e n s . - >>> print(page.get_text("text", flags=fitz.TEXT_INHIBIT_SPACES)) + >>> print(page.get_text("text", flags=pymupdf.TEXT_INHIBIT_SPACES)) Hallo! More text is following diff --git a/docs/app2.rst b/docs/app2.rst index 30a0849ed..fafdb6777 100644 --- a/docs/app2.rst +++ b/docs/app2.rst @@ -32,6 +32,6 @@ PyMuPDF Support ------------------ We continue to support the full old API with respect to embedded files -- with only minor, cosmetic changes. -There even also is a new function, which delivers a list of all names under which embedded data are resgistered in a PDF, :meth:`Document.embfile_names`. +There even also is a new function, which delivers a list of all names under which embedded data are registered in a PDF, :meth:`Document.embfile_names`. .. include:: footer.rst diff --git a/docs/app3.rst b/docs/app3.rst index cbaa36510..086f21a47 100644 --- a/docs/app3.rst +++ b/docs/app3.rst @@ -20,15 +20,15 @@ The transformation matrix contains information about how an image was transforme The relationship between image dimension and its bbox on a page is the following: 1. Using the original image's width and height, - - define the image rectangle `imgrect = fitz.Rect(0, 0, width, height)` - - define the "shrink matrix" `shrink = fitz.Matrix(1/width, 0, 0, 1/height, 0, 0)`. + - define the image rectangle `imgrect = pymupdf.Rect(0, 0, width, height)` + - define the "shrink matrix" `shrink = pymupdf.Matrix(1/width, 0, 0, 1/height, 0, 0)`. -2. Transforming the image rectangle with its shrink matrix, will result in the unit rectangle: `imgrect * shrink = fitz.Rect(0, 0, 1, 1)`. +2. Transforming the image rectangle with its shrink matrix, will result in the unit rectangle: `imgrect * shrink = pymupdf.Rect(0, 0, 1, 1)`. 3. Using the image **transformation matrix** "transform", the following steps will compute the bbox:: - imgrect = fitz.Rect(0, 0, width, height) - shrink = fitz.Matrix(1/width, 0, 0, 1/height, 0, 0) + imgrect = pymupdf.Rect(0, 0, width, height) + shrink = pymupdf.Matrix(1/width, 0, 0, 1/height, 0, 0) bbox = imgrect * shrink * transform 4. Inspecting the matrix product `shrink * transform` will reveal all information about what happened to the image rectangle to make it fit into the bbox on the page: rotation, scaling of its sides and translation of its origin. Let us look at an example: @@ -39,8 +39,8 @@ The relationship between image dimension and its bbox on a page is the following >>> #------------------------------------------------ >>> # define image shrink matrix and rectangle >>> #------------------------------------------------ - >>> shrink = fitz.Matrix(1 / 439, 0, 0, 1 / 501, 0, 0) - >>> imgrect = fitz.Rect(0, 0, 439, 501) + >>> shrink = pymupdf.Matrix(1 / 439, 0, 0, 1 / 501, 0, 0) + >>> imgrect = pymupdf.Rect(0, 0, 439, 501) >>> #------------------------------------------------ >>> # determine image bbox and transformation matrix: >>> #------------------------------------------------ @@ -59,7 +59,7 @@ The relationship between image dimension and its bbox on a page is the following >>> # the above shows: >>> # image sides are scaled by same factor ~0.4, >>> # and the image is rotated by 90 degrees clockwise - >>> # compare this with fitz.Matrix(-90) * 0.4 + >>> # compare this with pymupdf.Matrix(-90) * 0.4 >>> #------------------------------------------------ @@ -71,7 +71,7 @@ PDF Base 14 Fonts --------------------- The following 14 builtin font names **must be supported by every PDF viewer** application. They are available as a dictionary, which maps their full names amd their abbreviations in lower case to the full font basename. Wherever a **fontname** must be provided in PyMuPDF, any **key or value** from the dictionary may be used:: - In [2]: fitz.Base14_fontdict + In [2]: pymupdf.Base14_fontdict Out[2]: {'courier': 'Courier', 'courier-oblique': 'Courier-Oblique', @@ -113,12 +113,12 @@ To see how these fonts can be used -- including the **CJK built-in** fonts -- lo Adobe PDF References --------------------------- -This PDF Reference manual published by Adobe is frequently quoted throughout this documentation. It can be viewed and downloaded from `here `_. +This PDF Reference manual published by Adobe is frequently quoted throughout this documentation. It can be viewed and downloaded from `opensource.adobe.com `_. -.. note:: For a long time, an older version was also available under `this `_ link. It seems to be taken off of the web site in October 2021. Earlier (pre 1.19.*) versions of the PyMuPDF documentation used to refer to this document. We have undertaken an effort to replace referrals to the current specification above. ------------ + .. _SequenceTypes: Using Python Sequences as Arguments in PyMuPDF @@ -137,16 +137,16 @@ For example, specifying a sequence `"s"` in any of the following ways will make it usable in the following example expressions: -* `fitz.Point(s)` -* `fitz.Point(x, y) + s` +* `pymupdf.Point(s)` +* `pymupdf.Point(x, y) + s` * `doc.select(s)` Similarly with all geometry objects :ref:`Rect`, :ref:`IRect`, :ref:`Matrix` and :ref:`Point`. Because all PyMuPDF geometry classes themselves are special cases of sequences, they (with the exception of :ref:`Quad` -- see below) can be freely used where numerical sequences can be used, e.g. as arguments for functions like *list()*, *tuple()*, *array.array()* or *numpy.array()*. Look at the following snippet to see this work. ->>> import fitz, array, numpy as np ->>> m = fitz.Matrix(1, 2, 3, 4, 5, 6) +>>> import pymupdf, array, numpy as np +>>> m = pymupdf.Matrix(1, 2, 3, 4, 5, 6) >>> >>> list(m) [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] @@ -170,11 +170,11 @@ Ensuring Consistency of Important Objects in PyMuPDF ------------------------------------------------------------ PyMuPDF is a Python binding for the C library MuPDF. While a lot of effort has been invested by MuPDF's creators to approximate some sort of an object-oriented behavior, they certainly could not overcome basic shortcomings of the C language in that respect. -Python on the other hand implements the OO-model in a very clean way. The interface code between PyMuPDF and MuPDF consists of two basic files: *fitz.py* and *fitz_wrap.c*. They are created by the excellent SWIG tool for each new version. +Python on the other hand implements the OO-model in a very clean way. The interface code between PyMuPDF and MuPDF consists of two basic files: *pymupdf.py* and *fitz_wrap.c*. They are created by the excellent SWIG tool for each new version. -When you use one of PyMuPDF's objects or methods, this will result in execution of some code in *fitz.py*, which in turn will call some C code compiled with *fitz_wrap.c*. +When you use one of PyMuPDF's objects or methods, this will result in execution of some code in *pymupdf.py*, which in turn will call some C code compiled with *fitz_wrap.c*. -Because SWIG goes a long way to keep the Python and the C level in sync, everything works fine, if a certain set of rules is being strictly followed. For example: **never access** a :ref:`Page` object, after you have closed (or deleted or set to *None*) the owning :ref:`Document`. Or, less obvious: **never access** a page or any of its children (links or annotations) after you have executed one of the document methods *select()*, *delete_page()*, *insert_page()* ... and more. +Because SWIG goes a long way to keep the Python and the C level in sync, everything works fine, if a certain set of rules is being strictly followed. For example: **never access** a :ref:`Page` object, after you have closed (or deleted or set to ``None``) the owning :ref:`Document`. Or, less obvious: **never access** a page or any of its children (links or annotations) after you have executed one of the document methods *select()*, *delete_page()*, *insert_page()* ... and more. But just no longer accessing invalidated objects is actually not enough: They should rather be actively deleted entirely, to also free C-level resources (meaning allocated memory). @@ -186,7 +186,7 @@ The required logic has therefore been built into PyMuPDF itself in the following 1. If a page "loses" its owning document or is being deleted itself, all of its currently existing annotations and links will be made unusable in Python, and their C-level counterparts will be deleted and deallocated. -2. If a document is closed (or deleted or set to *None*) or if its structure has changed, then similarly all currently existing pages and their children will be made unusable, and corresponding C-level deletions will take place. "Structure changes" include methods like *select()*, *delePage()*, *insert_page()*, *insert_pdf()* and so on: all of these will result in a cascade of object deletions. +2. If a document is closed (or deleted or set to ``None``) or if its structure has changed, then similarly all currently existing pages and their children will be made unusable, and corresponding C-level deletions will take place. "Structure changes" include methods like *select()*, *delePage()*, *insert_page()*, *insert_pdf()* and so on: all of these will result in a cascade of object deletions. The programmer will normally not realize any of this. If he, however, tries to access invalidated objects, exceptions will be raised. @@ -213,11 +213,11 @@ RuntimeError: orphaned object: parent is None This shows the cascading effect: ->>> doc = fitz.open("some.pdf") +>>> doc = pymupdf.open("some.pdf") >>> page = doc[n] >>> annot = page.first_annot >>> page.rect -fitz.Rect(0.0, 0.0, 595.0, 842.0) +pymupdf.Rect(0.0, 0.0, 595.0, 842.0) >>> annot.type [5, 'Circle'] >>> del doc # or doc = None or doc.close() @@ -294,11 +294,131 @@ This design approach ensures that: .. _RedirectMessages: -Redirecting Error and Warning Messages +Diagnostics +---------------------------- + +.. _Messages: + +|PyMuPDF| messages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|PyMuPDF| has a Message system for showing text diagnostics. + +By default messages are written to `sys.stdout`. This can be controlled in +two ways: + +* + Set environment variable `PYMUPDF_MESSAGE` before |PyMuPDF| is imported. + +* + Call `set_messages()`: + + +|MuPDF| errors and warnings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|MuPDF| generates text errors and warnings. + +* + These errors and warnings are appended to an internal list, accessible with + `Tools.mupdf_warnings()`. Also see `Tools.reset_mupdf_warnings()`. + +* + By default these errors and warnings are also sent to the |PyMuPDF| message + system. + + * This can be controlled with `mupdf_display_errors()` and + `mupdf_display_warnings()`. + + * + These messages are prefixed with `MuPDF error:` and `MuPDF warning:` + respectively. + +Some |MuPDF| errors may lead to Python exceptions. + +Example output for a **recoverable error**. We are opening a damaged PDF, but MuPDF is able to repair it and gives us a little information on what happened. Then we illustrate how to find out whether the document can later be saved incrementally. Checking the :attr:`Document.is_dirty` attribute at this point also indicates that during `pymupdf.open` the document had to be repaired: + +>>> import pymupdf +>>> doc = pymupdf.open("damaged-file.pdf") # leads to a sys.stderr message: +mupdf: cannot find startxref +>>> print(pymupdf.TOOLS.mupdf_warnings()) # check if there is more info: +cannot find startxref +trying to repair broken xref +repairing PDF document +object missing 'endobj' token +>>> doc.can_save_incrementally() # this is to be expected: +False +>>> # the following indicates whether there are updates so far +>>> # this is the case because of the repair actions: +>>> doc.is_dirty +True +>>> # the document has nevertheless been created: +>>> doc +pymupdf.Document('damaged-file.pdf') +>>> # we now know that any save must occur to a new file + +Example output for an **unrecoverable error**: + +>>> import pymupdf +>>> doc = pymupdf.open("does-not-exist.pdf") +mupdf: cannot open does-not-exist.pdf: No such file or directory +Traceback (most recent call last): + File "", line 1, in + doc = pymupdf.open("does-not-exist.pdf") + File "C:\Users\Jorj\AppData\Local\Programs\Python\Python37\lib\site-packages\fitz\pymupdf.py", line 2200, in __init__ + _pymupdf.Document_swiginit(self, _pymupdf.new_Document(filename, stream, filetype, rect, width, height, fontsize)) +RuntimeError: cannot open does-not-exist.pdf: No such file or directory +>>> + + + +.. _Coordinates: + +Coordinates -------------------------------------------- -Since MuPDF version 1.16 error and warning messages can be redirected via an official plugin. -PyMuPDF will put error messages to `sys.stderr` prefixed with the string "mupdf:". Warnings are internally stored and can be accessed via *fitz.TOOLS.mupdf_warnings()*. There also is a function to empty this store. + +This is one of the most frequently used terms in this documentation. A **coordinate** generally means a pair of numbers `(x, y)` referring to some location, like a corner of a rectangle (:ref:`Rect`), a :ref:`Point` and so forth. The two values usually are floats, but there a objects like images which only allow them to be integers. + +To actually *find* a coordinate's location, we also need to know the *reference* point for ``x`` and ``y`` - in other words, we must know where location `(0, 0)` is positioned. Once `(0, 0)` (the "origin") is known, we speak of a "coordinate system". + +Several coordinate systems exist in document processing. For instance, the coordinate systems of a PDF page and the image created from it are **different**. We therefore need ways to *transform* coordinates from one system to another (and also back occasionally). This is the task of a :ref:`Matrix`. It is a mathematical function which works much like a factor that can be "multiplied" with a point or rectangle to give us the corresponding point / rectangle in another coordinate system. The inverse of a transformation matrix can be used to revert the transformation. Much like multiplying by some factor, say 3, can be reverted by dividing the result by 3 (or multiplying it with 1/3). + +Coordinates and Images +~~~~~~~~~~~~~~~~~~~~~~~ + +Images have a coordinate system with integer coordinates. Origin `(0, 0)` is the top-left point. ``x`` values must be in `range(width)`, and ``y`` values in `range(height)`. Therefore, ``y`` values *increase* if we go *downwards*. For every image, there is only a **finite number** of coordinates, namely `width * height`. A location in an image is also called a "pixel". + +- How **large** an image will be (in centimeters or inches) when e.g. printed, depends on additional information: the "resolution". This is measured in **DPI** (dots per inch, or pixels per inch). To find the printed size of some image, we therefore must divide its width and its height by the corresponding DPI values (there may separate ones for width and for height) and will get the respective number of inches. + + +Origin Point, Point Size and Y-Axis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In |PDF|, the origin `(0, 0)` of a page is located at its **bottom-left point**. In |MuPDF|, the origin `(0, 0)` of a page is located at its **top-left point**. + + +.. image:: images/img-coordinate-space.png + +Coordinates are float numbers and measured in **points**, where: + +- **one point equals 1/72 inches**. + +Typical document page sizes are **ISO A4** and **Letter**. A **Letter** page has a size of **8.5 x 11 inches**, corresponding to **612 x 792 points**. In the |PDF| coordinate system, the top-left point of a **Letter** page hence has the coordinate `(0, 792)` as **the y-axis points upwards**. Now we know our document size the |MuPDF| coordinate system for the bottom right would be coordinate `(612, 792)` (and for |PDF| this coordinate would then be `(612,0)`). + +- Theoretically, there are **infinitely many** coordinate positions on a |PDF| page. In practice however, at most the first 5 decimal places are sufficient for a reasonable precision. + + +- In |MuPDF|, multiple document formats are supported - |PDF| just being one among **over a dozen others**. Images are also supported as documents in |MuPDF| (therefore having one page usually). This is one of the reasons why |MuPDF| uses a coordinate system with the origin `(0, 0)` being the **top-left** point of any document page. **The y-axis points downwards**, like with images. Coordinates in |MuPDF| in any case are floats, like in |PDF|. + +- A rectangle `Rect(0, 0, 100, 100)` for instance in |MuPDF| (and thus |PyMuPDF|) therefore is a square with edges of length 100 points (= 1.39 inches or 3.53 centimeters). Its top-left corner is the origin. To switch between the two coordinate systems |PDF| to |MuPDF|, every :ref:`Page` object has a :attr:`Page.transformation_matrix`. Its inverse can be used to compute a rectangle's PDF coordinates. In this way we can conveniently find that `Rect(0, 0, 100, 100)` in |MuPDF| is the same as `Rect(0, 692, 100, 792)` in |PDF|. See this code snippet:: + + >>> page = doc.new_page(width=612, height=792) # make new Letter page + >>> ptm = page.transformation_matrix + >>> # the inverse matrix of ptm is ~ptm + >>> pymupdf.Rect(0, 0, 100, 100) * ~ptm + Rect(0.0, 692.0, 100.0, 792.0) + .. rubric:: Footnotes @@ -308,4 +428,7 @@ PyMuPDF will put error messages to `sys.stderr` prefixed with the string "mupdf: 1. The target PDF is not new / empty: grafting does not check for resources that already existed (e.g. images, fonts) in the target document before opening it. 2. Using :meth:`Page.show_pdf_page` for more than one source document: each grafting occurs **within one source** PDF only, not across multiple. So if e.g. the same image exists in pages from different source PDFs, then this will not be detected until garbage collection. + + + .. include:: footer.rst diff --git a/docs/app4.rst b/docs/app4.rst index 7d290376f..26037fc98 100644 --- a/docs/app4.rst +++ b/docs/app4.rst @@ -19,9 +19,9 @@ The following three sections deal with different performance aspects: * :ref:`Document Copying` - This includes opening and parsing :title:`PDFs`, then writing them to an output file. Because the same basic activities are also used for joining (merging) :title:`PDFs`, the results also apply to these use cases. * :ref:`Text Extraction` - This extracts plain text from :title:`PDFs` and writes it to an output text file. -* :ref:`Page Rendering` - This converts :title:`PDF` pages to image files looking identical to the pages. This ability is the basic prerequisite for using a tool in :title:`Python GUI` scripts to scroll through documents. We have chosen a medium-quality (resolution 150 DPI) version. +* :ref:`Page Rendering` - This converts |PDF| pages to image files looking identical to the pages. This ability is the basic prerequisite for using a tool in :title:`Python GUI` scripts to scroll through documents. We have chosen a medium-quality (resolution 150 DPI) version. -Please note that in all cases the actual speed in dealing with :title:`PDF` structures is not directly measured: instead, the timings also include the durations of writing files to the operating system's file system. This cannot be avoided because tools other than :title:`PyMuPDF` do not offer the option to e.g., separate the image **creation** step from the following step, which **writes** the image into a file. +Please note that in all cases the actual speed in dealing with |PDF| structures is not directly measured: instead, the timings also include the durations of writing files to the operating system's file system. This cannot be avoided because tools other than |PyMuPDF| do not offer the option to e.g., separate the image **creation** step from the following step, which **writes** the image into a file. So all timings documented include a common, OS-oriented base effort. Therefore, performance **differences per tool are actually larger** than the numbers suggest. @@ -55,7 +55,7 @@ A set of eight files is used for the performance testing. With each file we have - **KB/page** - **Textsize/page** - **Notes** - * - `adobe.pdf`_ + * - `adobe.pdf` - 32,472,771 - 1,310 - 794 @@ -63,7 +63,7 @@ A set of eight files is used for the performance testing. With each file we have - 24 - 1,942 - linearized, many links / bookmarks - * - `artifex-website.pdf`_ + * - `artifex-website.pdf` - 31,570,732 - 47 - 46 @@ -71,7 +71,7 @@ A set of eight files is used for the performance testing. With each file we have - 656 - 3,538 - graphics oriented - * - `db-systems.pdf`_ + * - `db-systems.pdf` - 29,326,355 - 1,241 - 0 @@ -79,7 +79,7 @@ A set of eight files is used for the performance testing. With each file we have - 23 - 2,142 - - * - `fontforge.pdf`_ + * - `fontforge.pdf` - 8,222,384 - 214 - 31 @@ -87,7 +87,7 @@ A set of eight files is used for the performance testing. With each file we have - 38 - 1,058 - mix of text & graphics - * - `pandas.pdf`_ + * - `pandas.pdf` - 10,585,962 - 3,071 - 536 @@ -95,7 +95,7 @@ A set of eight files is used for the performance testing. With each file we have - 3 - 1,539 - many pages - * - `pymupdf.pdf`_ + * - `pymupdf.pdf` - 6,805,176 - 478 - 276 @@ -103,7 +103,7 @@ A set of eight files is used for the performance testing. With each file we have - 14 - 1,937 - text oriented - * - `pythonbook.pdf`_ + * - `pythonbook.pdf` - 9,983,856 - 669 - 198 @@ -111,7 +111,7 @@ A set of eight files is used for the performance testing. With each file we have - 15 - 1,929 - - * - `sample-50-MB-pdf-file.pdf`_ + * - `sample-50-MB-pdf-file.pdf` - 52,521,850 - 1 - 0 @@ -130,7 +130,7 @@ A set of eight files is used for the performance testing. With each file we have Tools used ------------- -In each section, the same fixed set of :title:`PDF` files is being processed by a set of tools. The set of tools used per performance aspect however varies, depending on the supported tool features. +In each section, the same fixed set of |PDF| files is being processed by a set of tools. The set of tools used per performance aspect however varies, depending on the supported tool features. All tools are either platform independent, or at least can run on both, :title:`Windows` and :title:`Unix` / :title:`Linux`. @@ -140,20 +140,20 @@ All tools are either platform independent, or at least can run on both, :title:` * - **Tool** - **Description** - * - :title:`PyMuPDF` + * - |PyMuPDF| - The tool of this manual. * - PDFrw_ - A pure :title:`Python` tool, being used by :title:`rst2pdf`, has interface to :title:`ReportLab`. * - PyPDF2_ - A pure :title:`Python` tool with a large function set. * - PDFMiner_ - - A pure :title:`Python` to extract text and other data from :title:`PDF`. + - A pure :title:`Python` to extract text and other data from |PDF|. * - XPDF_ - A command line utility with multiple functions. * - PikePDF_ - A :title:`Python` package similar to :title:`PDFrw`, but based on :title:`C++` library :title:`QPDF`. * - PDF2JPG_ - - A :title:`Python` package specialized on rendering :title:`PDF` pages to :title:`JPG` images. + - A :title:`Python` package specialized on rendering |PDF| pages to :title:`JPG` images. @@ -163,18 +163,18 @@ All tools are either platform independent, or at least can run on both, :title:` Copying / Joining / Merging ---------------------------------- -How fast is a :title:`PDF` file read and its content parsed for further processing? The sheer parsing performance cannot directly be compared, because batch utilities always execute a requested task completely, in one go, front to end. :title:`PDFrw` too, has a *lazy* strategy for parsing, meaning it only parses those parts of a document that are required in any moment. +How fast is a |PDF| file read and its content parsed for further processing? The sheer parsing performance cannot directly be compared, because batch utilities always execute a requested task completely, in one go, front to end. :title:`PDFrw` too, has a *lazy* strategy for parsing, meaning it only parses those parts of a document that are required in any moment. -To find an answer to the question, we therefore measure the time to copy a :title:`PDF` file to an output file with each tool, and do nothing else. +To find an answer to the question, we therefore measure the time to copy a |PDF| file to an output file with each tool, and do nothing else. These are the :title:`Python` commands for how each tool is used: -:title:`PyMuPDF` +|PyMuPDF| .. code-block:: python - import fitz - doc = fitz.open("input.pdf") + import pymupdf + doc = pymupdf.open("input.pdf") doc.save("output.pdf") :title:`PDFrw` @@ -209,13 +209,13 @@ These are the :title:`Python` commands for how each tool is used: **Observations** -These are our run time findings in **seconds** along with a base rate summary compared to :title:`PyMuPDF`: +These are our run time findings in **seconds** along with a base rate summary compared to |PyMuPDF|: .. list-table:: :header-rows: 1 * - **Name** - - **PyMuPDF** + - |PyMuPDF| - **PDFrw** - **PikePDF** - **PyPDF2** @@ -287,13 +287,13 @@ The following table shows plain text extraction durations. All tools have been u **Observations** -These are our run time findings in **seconds** along with a base rate summary compared to :title:`PyMuPDF`: +These are our run time findings in **seconds** along with a base rate summary compared to |PyMuPDF|: .. list-table:: :header-rows: 1 * - **Name** - - **PyMuPDF** + - |PyMuPDF| - **XPDF** - **PyPDF2** - **PDFMiner** @@ -359,20 +359,20 @@ These are our run time findings in **seconds** along with a base rate summary co Page Rendering -------------------------- -We have tested rendering speed of :title:`PyMuPDF` against :title:`pdf2jpg` and :title:`XPDF` at a resolution of 150 DPI, +We have tested rendering speed of |PyMuPDF| against :title:`pdf2jpg` and :title:`XPDF` at a resolution of 150 DPI, These are the :title:`Python` commands for how each tool is used: -:title:`PyMuPDF` +|PyMuPDF| .. code-block:: python def ProcessFile(datei): print "processing:", datei - doc=fitz.open(datei) - for p in fitz.Pages(doc): + doc=pymupdf.open(datei) + for p in pymupdf.Pages(doc): pix = p.get_pixmap(dpi=150) pix.save("t-%s.png" % p.number) pix = None @@ -398,14 +398,14 @@ These are the :title:`Python` commands for how each tool is used: **Observations** -These are our run time findings in **seconds** along with a base rate summary compared to :title:`PyMuPDF`: +These are our run time findings in **seconds** along with a base rate summary compared to |PyMuPDF|: .. list-table:: :header-rows: 1 * - **Name** - - **PyMuPDF** + - |PyMuPDF| - **XPDF** - **PDF2JPG** * - adobe.pdf diff --git a/docs/archive-class.rst b/docs/archive-class.rst index 7efc2f557..ab3c0b7c5 100644 --- a/docs/archive-class.rst +++ b/docs/archive-class.rst @@ -42,7 +42,7 @@ In PyMuPDF, archives are currently only used by :ref:`Story` objects to specify * a Python binary object (`bytes`, `bytearray`, `io.BytesIO`): this will add a single-member sub-archive. In this case, the `path` parameter is **mandatory** and should be the member name under which this item can be found / retrieved. - * a tuple `(data, name)`: This will add a single-member sub-archive with the member name `name`. `data` may be a Python binary object or a local file name (in which case its binary file content is used). Use this format if you need to specify `path`. + * a tuple `(data, name)`: This will add a single-member sub-archive with the member name ``name``. ``data`` may be a Python binary object or a local file name (in which case its binary file content is used). Use this format if you need to specify `path`. * a Python sequence: This is a convenience format to specify any combination of the above. @@ -56,7 +56,7 @@ In PyMuPDF, archives are currently only used by :ref:`Story` objects to specify .. method:: add(content [,path]) - Append a sub-archive. The meaning of the parameters are exactly the same as explained above. Of course, parametrer `content` is not optional here. + Append a sub-archive. The meaning of the parameters are exactly the same as explained above. Of course, parameter `content` is not optional here. .. method:: has_entry(name) @@ -85,12 +85,12 @@ In PyMuPDF, archives are currently only used by :ref:`Story` objects to specify **Example:** >>> from pprint import pprint - >>> import fitz + >>> import pymupdf >>> dir1 = "fitz-32" # a folder name >>> dir2 = "fitz-64" # a folder name >>> img = ("nur-ruhig.jpg", "img") # an image file >>> members = (dir1, img, dir2) # we want to append these in one go - >>> arch = fitz.Archive() + >>> arch = pymupdf.Archive() >>> arch.add(members, path="mypath") >>> pprint(arch.entry_list) [{'entries': ['310', '37', '38', '39'], 'fmt': 'dir', 'path': 'mypath'}, diff --git a/docs/colors.rst b/docs/colors.rst index 1cd019b5f..b8e6dee56 100644 --- a/docs/colors.rst +++ b/docs/colors.rst @@ -16,19 +16,19 @@ Function *getColor()* As the color database may not be needed very often, one additional import statement seems acceptable to get access to it:: >>> # "getColor" is the only method you really need - >>> from fitz.utils import getColor + >>> from pymupdf.utils import getColor >>> getColor("aliceblue") (0.9411764705882353, 0.9725490196078431, 1.0) >>> # >>> # to get a list of all existing names - >>> from fitz.utils import getColorList + >>> from pymupdf.utils import getColorList >>> cl = getColorList() >>> cl ['ALICEBLUE', 'ANTIQUEWHITE', 'ANTIQUEWHITE1', 'ANTIQUEWHITE2', 'ANTIQUEWHITE3', 'ANTIQUEWHITE4', 'AQUAMARINE', 'AQUAMARINE1'] ... >>> # >>> # to see the full integer color coding - >>> from fitz.utils import getColorInfoList + >>> from pymupdf.utils import getColorInfoList >>> il = getColorInfoList() >>> il [('ALICEBLUE', 240, 248, 255), ('ANTIQUEWHITE', 250, 235, 215), diff --git a/docs/colorspace.rst b/docs/colorspace.rst index 2e4e3e37e..77cf3e109 100644 --- a/docs/colorspace.rst +++ b/docs/colorspace.rst @@ -21,13 +21,13 @@ Represents the color space of a :ref:`Pixmap`. .. attribute:: name - The name identifying the colorspace. Example: *fitz.csCMYK.name = 'DeviceCMYK'*. + The name identifying the colorspace. Example: *pymupdf.csCMYK.name = 'DeviceCMYK'*. :type: str .. attribute:: n - The number of bytes required to define the color of one pixel. Example: *fitz.csCMYK.n == 4*. + The number of bytes required to define the color of one pixel. Example: *pymupdf.csCMYK.n == 4*. :type: int @@ -36,8 +36,8 @@ Represents the color space of a :ref:`Pixmap`. For saving some typing effort, there exist predefined colorspace objects for the three available cases. - * :data:`csRGB` = *fitz.Colorspace(fitz.CS_RGB)* - * :data:`csGRAY` = *fitz.Colorspace(fitz.CS_GRAY)* - * :data:`csCMYK` = *fitz.Colorspace(fitz.CS_CMYK)* + * :data:`csRGB` = *pymupdf.Colorspace(pymupdf.CS_RGB)* + * :data:`csGRAY` = *pymupdf.Colorspace(pymupdf.CS_GRAY)* + * :data:`csCMYK` = *pymupdf.Colorspace(pymupdf.CS_CMYK)* .. include:: footer.rst \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index ee6162063..353c3464b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,7 +19,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. # extensions = ["sphinx.ext.autodoc", "sphinx.ext.coverage", "sphinx.ext.ifconfig"] -extensions = [] +extensions = ['sphinx_copybutton','notfound.extension','sphinxcontrib.googleanalytics'] # rst2pdf is not available on OpenBSD. if hasattr(os, "uname") and os.uname()[0] != "OpenBSD": extensions.append("rst2pdf.pdfbuilder") @@ -30,12 +30,17 @@ # The suffix of source filenames. source_suffix = ".rst" +# from: pip install sphinxcontrib-googleanalytics +googleanalytics_id = "G-JZTN4VTL9M" + # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. root_doc = "index" +rst_epilog = '' + # General information about the project. project = "PyMuPDF" thisday = datetime.date.today() @@ -46,19 +51,28 @@ # built documents. # # The full version, including alpha/beta/rc tags. -_path = os.path.abspath(f'{__file__}/../../fitz/version.i') -with open(_path) as f: - for line in f: - match = re.search('VersionBind = "([0-9][.][0-9]+[.][0-9])"', line) - if match: - release = match.group(1) - print(f'{__file__}: setting version from {_path}: {release}') - break - else: - raise Exception(f'Failed to find `VersionBind = ...` in {_path}') - -# The short X.Y version -version = release + +# PyMuPDF version is set in setup.py, so we import it here. +sys.path.insert(0, os.path.abspath(f'{__file__}/../..')) +try: + import setup +finally: + del sys.path[0] +version = setup.version_p +del setup # Necessary otherwise sphinx seems to do `setup()`. + +# Supported Python versions are set in scripts.test.py. +sys.path.insert(0, os.path.abspath(f'{__file__}/../../scripts')) +try: + import test +finally: + del sys.path[0] +python_versions_minor = test.python_versions_minor +del test +python_versions_list = [f'3.{i}' for i in python_versions_minor] +python_versions = ', '.join(python_versions_list[:-1]) + f' and {python_versions_list[-1]}' +# Make `|python_versions|` available in .rst files. +rst_epilog += f'.. |python_versions| replace:: {python_versions}\n' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -98,6 +112,15 @@ # If true, keep warnings as "system message" paragraphs in the built documents. keep_warnings = False +# Localization vars + +gettext_uuid = True + +gettext_compact = False + +locale_dirs = ["locales"] + + # -- Options for HTML output ---------------------------------------------- @@ -146,8 +169,8 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] html_theme_options = { - "light_logo": "pymupdf-sidebar-logo-dark.png", - "dark_logo": "pymupdf-sidebar-logo-light.png", + "light_logo": "sidebar-logo-dark.svg", + "dark_logo": "sidebar-logo-light.svg", } # A list of CSS files. The entry must be a filename string or a tuple containing diff --git a/docs/converting-files.rst b/docs/converting-files.rst new file mode 100644 index 000000000..d27da3679 --- /dev/null +++ b/docs/converting-files.rst @@ -0,0 +1,97 @@ +.. include:: header.rst + +.. _ConvertingFiles: + +============================== +Converting Files +============================== + + + +Files to PDF +~~~~~~~~~~~~~~~~~~ + +:ref:`Document types supported by PyMuPDF` can easily be converted to |PDF| by using the :meth:`Document.convert_to_pdf` method. This method returns a buffer of data which can then be utilized by |PyMuPDF| to create a new |PDF|. + + + +**Example** + +.. code-block:: python + + import pymupdf + + xps = pymupdf.open("input.xps") + pdfbytes = xps.convert_to_pdf() + pdf = pymupdf.open("pdf", pdfbytes) + pdf.save("output.pdf") + + + +PDF to SVG +~~~~~~~~~~~~~~~~~~ + +Technically, as SVG files cannot be multipage, we must export each page as an SVG. + +To get an SVG representation of a page use the :meth:`Page.get_svg_image` method. + +**Example** + +.. code-block:: python + + import pymupdf + + doc = pymupdf.open("input.pdf") + page = doc[0] + + # Convert page to SVG + svg_content = page.get_svg_image() + + # Save to file + with open("output.svg", "w", encoding="utf-8") as f: + f.write(svg_content) + + doc.close() + + +PDF to Markdown +~~~~~~~~~~~~~~~~~ + +By utlilizing the :doc:`PyMuPDF4LLM API ` we are able to convert PDF to a Markdown representation. + +**Example** + +.. code-block:: python + + import pymupdf4llm + import pathlib + + md_text = pymupdf4llm.to_markdown("test.pdf") + print(md_text) + + pathlib.Path("4llm-output.md").write_bytes(md_text.encode()) + + +PDF to DOCX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the pdf2docx_ library which uses |PyMuPDF| to provide document conversion from |PDF| to **DOCX** format. + + + +**Example** + +.. code-block:: python + + from pdf2docx import Converter + + pdf_file = 'input.pdf' + docx_file = 'output.docx' + + # convert pdf to docx + cv = Converter(pdf_file) + cv.convert(docx_file) # all pages by default + cv.close() + + +.. include:: footer.rst diff --git a/docs/coop_low.rst b/docs/coop_low.rst index bd9cfd5f4..f898b247e 100644 --- a/docs/coop_low.rst +++ b/docs/coop_low.rst @@ -62,13 +62,13 @@ TextPage ~~~~~~~~~ If you do not need images extracted alongside the text of a page, you can set the following option: ->>> flags = fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE +>>> flags = pymupdf.TEXT_PRESERVE_LIGATURES | pymupdf.TEXT_PRESERVE_WHITESPACE >>> tp = dl.get_textpage(flags) This will save ca. 25% overall execution time for the HTML, XHTML and JSON text extractions and **hugely** reduce the amount of storage (both, memory and disk space) if the document is graphics oriented. If you however do need images, use a value of 7 for flags: ->>> flags = fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_PRESERVE_IMAGES +>>> flags = pymupdf.TEXT_PRESERVE_LIGATURES | pymupdf.TEXT_PRESERVE_WHITESPACE | pymupdf.TEXT_PRESERVE_IMAGES .. include:: footer.rst diff --git a/docs/deprecated.rst b/docs/deprecated.rst deleted file mode 100644 index adba701bf..000000000 --- a/docs/deprecated.rst +++ /dev/null @@ -1,217 +0,0 @@ -.. Deprecated Names: - -* :index:`_isWrapped` -- :attr:`Page.is_wrapped` -* :index:`addCaretAnnot` -- :meth:`Page.add_caret_annot` -* :index:`addCircleAnnot` -- :meth:`Page.add_circle_annot` -* :index:`addFileAnnot` -- :meth:`Page.add_file_annot` -* :index:`addFreetextAnnot` -- :meth:`Page.add_freetext_annot` -* :index:`addHighlightAnnot` -- :meth:`Page.add_highlight_annot` -* :index:`addInkAnnot` -- :meth:`Page.add_ink_annot` -* :index:`addLineAnnot` -- :meth:`Page.add_line_annot` -* :index:`addPolygonAnnot` -- :meth:`Page.add_polygon_annot` -* :index:`addPolylineAnnot` -- :meth:`Page.add_polyline_annot` -* :index:`addRectAnnot` -- :meth:`Page.add_rect_annot` -* :index:`addRedactAnnot` -- :meth:`Page.add_redact_annot` -* :index:`addSquigglyAnnot` -- :meth:`Page.add_squiggly_annot` -* :index:`addStampAnnot` -- :meth:`Page.add_stamp_annot` -* :index:`addStrikeoutAnnot` -- :meth:`Page.add_strikeout_annot` -* :index:`addTextAnnot` -- :meth:`Page.add_text_annot` -* :index:`addUnderlineAnnot` -- :meth:`Page.add_underline_annot` -* :index:`addWidget` -- :meth:`Page.add_widget` -* :index:`chapterCount` -- :attr:`Document.chapter_count` -* :index:`chapterPageCount` -- :meth:`Document.chapter_page_count` -* :index:`cleanContents` -- :meth:`Page.clean_contents` -* :index:`clearWith` -- :meth:`Pixmap.clear_with` -* :index:`convertToPDF` -- :meth:`Document.convert_to_pdf` -* :index:`copyPage` -- :meth:`Document.copy_page` -* :index:`copyPixmap` -- :meth:`Pixmap.copy` -* :index:`CropBox` -- :attr:`Page.cropbox` -* :index:`CropBoxPosition` -- :attr:`Page.cropbox_position` -* :index:`deleteAnnot` -- :meth:`Page.delete_annot` -* :index:`deleteLink` -- :meth:`Page.delete_link` -* :index:`deletePage` -- :meth:`Document.delete_page` -* :index:`deletePageRange` -- :meth:`Document.delete_pages` -* :index:`deleteWidget` -- :meth:`Page.delete_widget` -* :index:`derotationMatrix` -- :attr:`Page.derotation_matrix` -* :index:`drawBezier` -- :meth:`Page.draw_bezier` -* :index:`drawBezier` -- :meth:`Shape.draw_bezier` -* :index:`drawCircle` -- :meth:`Page.draw_circle` -* :index:`drawCircle` -- :meth:`Shape.draw_circle` -* :index:`drawCurve` -- :meth:`Page.draw_curve` -* :index:`drawCurve` -- :meth:`Shape.draw_curve` -* :index:`drawLine` -- :meth:`Page.draw_line` -* :index:`drawLine` -- :meth:`Shape.draw_line` -* :index:`drawOval` -- :meth:`Page.draw_oval` -* :index:`drawOval` -- :meth:`Shape.draw_oval` -* :index:`drawPolyline` -- :meth:`Page.draw_polyline` -* :index:`drawPolyline` -- :meth:`Shape.draw_polyline` -* :index:`drawQuad` -- :meth:`Page.draw_quad` -* :index:`drawQuad` -- :meth:`Shape.draw_quad` -* :index:`drawRect` -- :meth:`Page.draw_rect` -* :index:`drawRect` -- :meth:`Shape.draw_rect` -* :index:`drawSector` -- :meth:`Page.draw_sector` -* :index:`drawSector` -- :meth:`Shape.draw_sector` -* :index:`drawSquiggle` -- :meth:`Page.draw_squiggle` -* :index:`drawSquiggle` -- :meth:`Shape.draw_squiggle` -* :index:`drawZigzag` -- :meth:`Page.draw_zigzag` -* :index:`drawZigzag` -- :meth:`Shape.draw_zigzag` -* :index:`embeddedFileAdd` -- :meth:`Document.embfile_add` -* :index:`embeddedFileCount` -- :meth:`Document.embfile_count` -* :index:`embeddedFileDel` -- :meth:`Document.embfile_del` -* :index:`embeddedFileGet` -- :meth:`Document.embfile_get` -* :index:`embeddedFileInfo` -- :meth:`Document.embfile_info` -* :index:`embeddedFileNames` -- :meth:`Document.embfile_names` -* :index:`embeddedFileUpd` -- :meth:`Document.embfile_upd` -* :index:`extractFont` -- :meth:`Document.extract_font` -* :index:`extractImage` -- :meth:`Document.extract_image` -* :index:`fileGet` -- :meth:`Annot.get_file` -* :index:`fileUpd` -- :meth:`Annot.update_file` -* :index:`fillTextbox` -- :meth:`TextWriter.fill_textbox` -* :index:`findBookmark` -- :meth:`Document.find_bookmark` -* :index:`firstAnnot` -- :attr:`Page.first_annot` -* :index:`firstLink` -- :attr:`Page.first_link` -* :index:`firstWidget` -- :attr:`Page.first_widget` -* :index:`fullcopyPage` -- :meth:`Document.fullcopy_page` -* :index:`gammaWith` -- :meth:`Pixmap.gamma_with` -* :index:`getArea` -- :meth:`Rect.get_area` -* :index:`getArea` -- :meth:`IRect.get_area` -* :index:`getCharWidths` -- :meth:`Document.get_char_widths` -* :index:`getContents` -- :meth:`Page.get_contents` -* :index:`getDisplayList` -- :meth:`Page.get_displaylist` -* :index:`getDrawings` -- :meth:`Page.get_drawings` -* :index:`getFontList` -- :meth:`Page.get_fonts` -* :index:`getImageBbox` -- :meth:`Page.get_image_bbox` -* :index:`getImageData` -- :meth:`Pixmap.tobytes` -* :index:`getImageList` -- :meth:`Page.get_images` -* :index:`getLinks` -- :meth:`Page.get_links` -* :index:`getOCGs` -- :meth:`Document.get_ocgs` -* :index:`getPageFontList` -- :meth:`Document.get_page_fonts` -* :index:`getPageImageList` -- :meth:`Document.get_page_images` -* :index:`getPagePixmap` -- :meth:`Document.get_page_pixmap` -* :index:`getPageText` -- :meth:`Document.get_page_text` -* :index:`getPageXObjectList` -- :meth:`Document.get_page_xobjects` -* :index:`getPDFnow` -- :meth:`get_pdf_now` -* :index:`getPDFstr` -- :meth:`get_pdf_str` -* :index:`getPixmap` -- :meth:`Page.get_pixmap` -* :index:`getPixmap` -- :meth:`Annot.get_pixmap` -* :index:`getPixmap` -- :meth:`DisplayList.get_pixmap` -* :index:`getPNGData` -- :meth:`Pixmap.tobytes` -* :index:`getPNGdata` -- :meth:`Pixmap.tobytes` -* :index:`getRectArea` -- :meth:`Rect.get_area` -* :index:`getRectArea` -- :meth:`IRect.get_area` -* :index:`getSigFlags` -- :meth:`Document.get_sigflags` -* :index:`getSVGimage` -- :meth:`Page.get_svg_image` -* :index:`getText` -- :meth:`Page.get_text` -* :index:`getText` -- :meth:`Annot.get_text` -* :index:`getTextBlocks` -- :meth:`Page.get_text_blocks` -* :index:`getTextbox` -- :meth:`Page.get_textbox` -* :index:`getTextbox` -- :meth:`Annot.get_textbox` -* :index:`getTextLength` -- :meth:`get_text_length` -* :index:`getTextPage` -- :meth:`Page.get_textpage` -* :index:`getTextPage` -- :meth:`Annot.get_textpage` -* :index:`getTextPage` -- :meth:`DisplayList.get_textpage` -* :index:`getTextWords` -- :meth:`Page.get_text_words` -* :index:`getToC` -- :meth:`Document.get_toc` -* :index:`getXmlMetadata` -- :meth:`Document.get_xml_metadata` -* :index:`ImageProperties` -- :meth:`image_properties` -* :index:`includePoint` -- :meth:`Rect.include_point` -* :index:`includePoint` -- :meth:`IRect.include_point` -* :index:`includeRect` -- :meth:`Rect.include_rect` -* :index:`includeRect` -- :meth:`IRect.include_rect` -* :index:`insertFont` -- :meth:`Page.insert_font` -* :index:`insertImage` -- :meth:`Page.insert_image` -* :index:`insertLink` -- :meth:`Page.insert_link` -* :index:`insertPage` -- :meth:`Document.insert_page` -* :index:`insertPDF` -- :meth:`Document.insert_pdf` -* :index:`insertText` -- :meth:`Page.insert_text` -* :index:`insertText` -- :meth:`Shape.insert_text` -* :index:`insertTextbox` -- :meth:`Page.insert_textbox` -* :index:`insertTextbox` -- :meth:`Shape.insert_textbox` -* :index:`invertIRect` -- :meth:`Pixmap.invert_irect` -* :index:`isConvex` -- :attr:`Quad.is_convex` -* :index:`isDirty` -- :attr:`Document.is_dirty` -* :index:`isEmpty` -- :attr:`Rect.is_empty` -* :index:`isEmpty` -- :attr:`IRect.is_empty` -* :index:`isEmpty` -- :attr:`Quad.is_empty` -* :index:`isFormPDF` -- :attr:`Document.is_form_pdf` -* :index:`isInfinite` -- :attr:`Rect.is_infinite` -* :index:`isInfinite` -- :attr:`IRect.is_infinite` -* :index:`isPDF` -- :attr:`Document.is_pdf` -* :index:`isRectangular` -- :attr:`Quad.is_rectangular` -* :index:`isRectilinear` -- :attr:`Matrix.is_rectilinear` -* :index:`isReflowable` -- :attr:`Document.is_reflowable` -* :index:`isRepaired` -- :attr:`Document.is_repaired` -* :index:`isStream` -- :meth:`Document.is_stream` -* :index:`lastLocation` -- :attr:`Document.last_location` -* :index:`lineEnds` -- :attr:`Annot.line_ends` -* :index:`loadAnnot` -- :meth:`Page.load_annot` -* :index:`loadLinks` -- :meth:`Page.load_links` -* :index:`loadPage` -- :meth:`Document.load_page` -* :index:`makeBookmark` -- :meth:`Document.make_bookmark` -* :index:`MediaBox` -- :attr:`Page.mediabox` -* :index:`MediaBoxSize` -- :attr:`Page.mediabox_size` -* :index:`metadataXML` -- :meth:`Document.xref_xml_metadata` -* :index:`movePage` -- :meth:`Document.move_page` -* :index:`needsPass` -- :attr:`Document.needs_pass` -* :index:`newPage` -- :meth:`Document.new_page` -* :index:`newShape` -- :meth:`Page.new_shape` -* :index:`nextLocation` -- :meth:`Document.next_location` -* :index:`pageCount` -- :attr:`Document.page_count` -* :index:`pageCropBox` -- :meth:`Document.page_cropbox` -* :index:`pageXref` -- :meth:`Document.page_xref` -* :index:`PaperRect` -- :meth:`paper_rect` -* :index:`PaperSize` -- :meth:`paper_size` -* :index:`paperSizes` -- :attr:`paper_sizes` -* :index:`PDFCatalog` -- :meth:`Document.pdf_catalog` -* :index:`PDFTrailer` -- :meth:`Document.pdf_trailer` -* :index:`pillowData` -- :meth:`Pixmap.pil_tobytes` -* :index:`pillowWrite` -- :meth:`Pixmap.pil_save` -* :index:`planishLine` -- :meth:`planish_line` -* :index:`preRotate` -- :meth:`Matrix.prerotate` -* :index:`preScale` -- :meth:`Matrix.prescale` -* :index:`preShear` -- :meth:`Matrix.preshear` -* :index:`preTranslate` -- :meth:`Matrix.pretranslate` -* :index:`previousLocation` -- :meth:`Document.prev_location` -* :index:`readContents` -- :meth:`Page.read_contents` -* :index:`resolveLink` -- :meth:`Document.resolve_link` -* :index:`rotationMatrix` -- :attr:`Page.rotation_matrix` -* :index:`searchFor` -- :meth:`Page.search_for` -* :index:`searchPageFor` -- :meth:`Document.search_page_for` -* :index:`setAlpha` -- :meth:`Pixmap.set_alpha` -* :index:`setBlendMode` -- :meth:`Annot.set_blendmode` -* :index:`setBorder` -- :meth:`Annot.set_border` -* :index:`setColors` -- :meth:`Annot.set_colors` -* :index:`setCropBox` -- :meth:`Page.set_cropbox` -* :index:`setFlags` -- :meth:`Annot.set_flags` -* :index:`setInfo` -- :meth:`Annot.set_info` -* :index:`setLanguage` -- :meth:`Document.set_language` -* :index:`setLineEnds` -- :meth:`Annot.set_line_ends` -* :index:`setMediaBox` -- :meth:`Page.set_mediabox` -* :index:`setMetadata` -- :meth:`Document.set_metadata` -* :index:`setName` -- :meth:`Annot.set_name` -* :index:`setOC` -- :meth:`Annot.set_oc` -* :index:`setOpacity` -- :meth:`Annot.set_opacity` -* :index:`setOrigin` -- :meth:`Pixmap.set_origin` -* :index:`setPixel` -- :meth:`Pixmap.set_pixel` -* :index:`setRect` -- :meth:`Annot.set_rect` -* :index:`setRect` -- :meth:`Pixmap.set_rect` -* :index:`setResolution` -- :meth:`Pixmap.set_dpi` -* :index:`setRotation` -- :meth:`Page.set_rotation` -* :index:`setToC` -- :meth:`Document.set_toc` -* :index:`setXmlMetadata` -- :meth:`Document.set_xml_metadata` -* :index:`showPDFpage` -- :meth:`Page.show_pdf_page` -* :index:`soundGet` -- :meth:`Annot.get_sound` -* :index:`tintWith` -- :meth:`Pixmap.tint_with` -* :index:`transformationMatrix` -- :attr:`Page.transformation_matrix` -* :index:`updateLink` -- :meth:`Page.update_link` -* :index:`updateObject` -- :meth:`Document.update_object` -* :index:`updateStream` -- :meth:`Document.update_stream` -* :index:`wrapContents` -- :meth:`Page.wrap_contents` -* :index:`writeImage` -- :meth:`Pixmap.save` -* :index:`writePNG` -- :meth:`Pixmap.save` -* :index:`writeText` -- :meth:`Page.write_text` -* :index:`writeText` -- :meth:`TextWriter.write_text` -* :index:`xrefLength` -- :meth:`Document.xref_length` -* :index:`xrefObject` -- :meth:`Document.xref_object` -* :index:`xrefStream` -- :meth:`Document.xref_stream` -* :index:`xrefStreamRaw` -- :meth:`Document.xref_stream_raw` diff --git a/docs/device.rst b/docs/device.rst index 3f264162a..35f2d8c78 100644 --- a/docs/device.rst +++ b/docs/device.rst @@ -16,19 +16,19 @@ The different format handlers (pdf, xps, etc.) interpret pages to a "device". De Constructor for either a pixel map or a display list device. - :arg object: either a *Pixmap* or a *DisplayList*. + :arg object: either a ``Pixmap`` or a ``DisplayList``. :type object: :ref:`Pixmap` or :ref:`DisplayList` - :arg clip: An optional `IRect` for *Pixmap* devices to restrict rendering to a certain area of the page. If the complete page is required, specify *None*. For display list devices, this parameter must be omitted. + :arg clip: An optional `IRect` for ``Pixmap`` devices to restrict rendering to a certain area of the page. If the complete page is required, specify ``None``. For display list devices, this parameter must be omitted. :type clip: :ref:`IRect` .. method:: __init__(self, textpage, flags=0) Constructor for a text page device. - :arg textpage: *TextPage* object + :arg textpage: ``TextPage`` object :type textpage: :ref:`TextPage` - :arg int flags: control the way how text is parsed into the text page. Currently 3 options can be coded into this parameter, see :ref:`TextPreserve`. To set these options use something like *flags=0 | TEXT_PRESERVE_LIGATURES | ...*. + :arg int flags: control the way how text is parsed into the text page. Currently 3 options can be coded into this parameter, see :ref:`TextPreserve`. To set these options use something like `flags=0 | TEXT_PRESERVE_LIGATURES | ...`. .. include:: footer.rst diff --git a/docs/displaylist.rst b/docs/displaylist.rst index 8fcce8320..a96391c9d 100644 --- a/docs/displaylist.rst +++ b/docs/displaylist.rst @@ -37,7 +37,7 @@ A display list is populated with objects from a page, usually by executing :meth :arg mediabox: The page's rectangle. :type mediabox: :ref:`Rect` - :rtype: *DisplayList* + :rtype: ``DisplayList`` .. method:: run(device, matrix, area) @@ -60,7 +60,7 @@ A display list is populated with objects from a page, usually by executing :meth pair: clip; DisplayList.get_pixmap pair: alpha; DisplayList.get_pixmap - .. method:: get_pixmap(matrix=fitz.Identity, colorspace=fitz.csRGB, alpha=0, clip=None) + .. method:: get_pixmap(matrix=pymupdf.Identity, colorspace=pymupdf.csRGB, alpha=0, clip=None) Run the display list through a draw device and return a pixmap. diff --git a/docs/document-writer-class.rst b/docs/document-writer-class.rst index 515d50ce6..a2ee3d008 100644 --- a/docs/document-writer-class.rst +++ b/docs/document-writer-class.rst @@ -6,11 +6,14 @@ DocumentWriter ================ +|pdf_only_class| + + * New in v1.21.0 -This class represents a utility which can output various :ref:`document types supported by MuPDF`. +This class represents a utility which can output various :ref:`document types supported by PyMuPDF`. -In PyMuPDF only used for outputting PDF documents whose pages are populated by :ref:`Story` DOMs. +In |PyMuPDF| only used for outputting PDF documents whose pages are populated by :ref:`Story` DOMs. Using DocumentWriter_ also for other document types might happen in the future. diff --git a/docs/document.rst b/docs/document.rst index 1f8801e49..352347da9 100644 --- a/docs/document.rst +++ b/docs/document.rst @@ -10,7 +10,7 @@ Document This class represents a document. It can be constructed from a file or from memory. -There exists the alias *open* for this class, i.e. `fitz.Document(...)` and `fitz.open(...)` do exactly the same thing. +There exists the alias *open* for this class, i.e. `pymupdf.Document(...)` and `pymupdf.open(...)` do exactly the same thing. For details on **embedded files** refer to Appendix 3. @@ -30,6 +30,7 @@ For details on **embedded files** refer to Appendix 3. :meth:`Document.add_layer` PDF only: make new optional content configuration :meth:`Document.add_ocg` PDF only: add new optional content group :meth:`Document.authenticate` gain access to an encrypted document +:meth:`Document.bake` PDF only: make annotations / fields permanent content :meth:`Document.can_save_incrementally` check if incremental save is possible :meth:`Document.chapter_page_count` number of pages in chapter :meth:`Document.close` close the document @@ -95,7 +96,10 @@ For details on **embedded files** refer to Appendix 3. :meth:`Document.pdf_catalog` PDF only: :data:`xref` of catalog (root) :meth:`Document.pdf_trailer` PDF only: trailer source :meth:`Document.prev_location` return (chapter, pno) of preceding page +:meth:`Document.rewrite_images` PDF only: rewrite / extra compression for images +:meth:`Document.recolor` PDF only: execute :meth:`Page.recolor` for all pages :meth:`Document.reload_page` PDF only: provide a new copy of a page +:meth:`Document.resolve_names` PDF only: Convert destination names into a Python dict :meth:`Document.save` PDF only: save the document :meth:`Document.saveIncr` PDF only: save the document incrementally :meth:`Document.scrub` PDF only: remove sensitive data @@ -150,1853 +154,2022 @@ For details on **embedded files** refer to Appendix 3. .. class:: Document - .. index:: - pair: filename; open - pair: stream; open - pair: filetype; open - pair: rect; open - pair: width; open - pair: height; open - pair: fontsize; open - pair: open; Document - pair: filename; Document - pair: stream; Document - pair: filetype; Document - pair: rect; Document - pair: fontsize; Document + .. index:: + pair: filename; open + pair: stream; open + pair: filetype; open + pair: rect; open + pair: width; open + pair: height; open + pair: fontsize; open + pair: open; Document + pair: filename; Document + pair: stream; Document + pair: filetype; Document + pair: rect; Document + pair: fontsize; Document - .. method:: __init__(self, filename=None, stream=None, *, filetype=None, rect=None, width=0, height=0, fontsize=11) + .. method:: __init__(self, filename=None, stream=None, *, filetype=None, rect=None, width=0, height=0, fontsize=11) - * Changed in v1.14.13: support `io.BytesIO` for memory documents. - * Changed in v1.19.6: Clearer, shorter and more consistent exception messages. File type "pdf" is always assumed if not specified. Empty files and memory areas will always lead to exceptions. + Create a ``Document`` object. - Creates a *Document* object. + * With default parameters, a **new empty PDF** document will be created. + * If ``stream`` is given, then the document is created from memory. + * If ``stream`` is `None`, then a document is created from the file given by ``filename``. - * With default parameters, a **new empty PDF** document will be created. - * If *stream* is given, then the document is created from memory and, if not a PDF, either *filename* or *filetype* must indicate its type. - * If *stream* is `None`, then a document is created from the file given by *filename*. Its type is inferred from the extension. This can be overruled by *filetype.* + :arg str,pathlib filename: A UTF-8 string or ``pathlib.Path`` object containing a file path. The document type is always determined from the file content. The ``filetype`` parameter is ignored, except when content inspection was unsuccessful. This is regularly the case for plain text types like "txt", "html", "xml" etc. with a wrong or missing file extension. - :arg str,pathlib filename: A UTF-8 string or *pathlib* object containing a file path. The document type is inferred from the filename extension. If not present or not matching :ref:`a supported type`, a PDF document is assumed. For memory documents, this argument may be used instead of `filetype`, see below. + :arg bytes,bytearray,BytesIO stream: A memory area containing file data. The document type is always detected from the data content. The ``filetype`` parameter is ignored, except when content inspection was unsuccessful. This is regularly the case for plain text types like "txt", "html", "xml" etc. - :arg bytes,bytearray,BytesIO stream: A memory area containing a supported document. If not a PDF, its type **must** be specified by either `filename` or `filetype`. + :arg str filetype: A string specifying the type of document. This is only ever needed when file content inspection fails. Text types like "txt", "html", "xml" etc. cannot be disambiguated by their content. When such files are provided in memory or being provided with the wrong file extension, this parameter **must** be used. - :arg str filetype: A string specifying the type of document. This may be anything looking like a filename (e.g. "x.pdf"), in which case MuPDF uses the extension to determine the type, or a mime type like *application/pdf*. Just using strings like "pdf" or ".pdf" will also work. May be omitted for PDF documents, otherwise must match :ref:`a supported document type`. + :arg rect_like rect: a rectangle specifying the desired page size. This parameter is only meaningful for documents with a variable page layout ("reflowable" documents), like e-books or HTML, and ignored otherwise. If specified, it must be a non-empty, finite rectangle with top-left coordinates (0, 0). Together with parameter :data:`fontsize`, each page will be accordingly laid out and hence also determine the number of pages. - :arg rect_like rect: a rectangle specifying the desired page size. This parameter is only meaningful for documents with a variable page layout ("reflowable" documents), like e-books or HTML, and ignored otherwise. If specified, it must be a non-empty, finite rectangle with top-left coordinates (0, 0). Together with parameter *fontsize*, each page will be accordingly laid out and hence also determine the number of pages. + :arg float width: may used together with ``height`` as an alternative to ``rect`` to specify layout information. - :arg float width: may used together with *height* as an alternative to *rect* to specify layout information. + :arg float height: may used together with ``width`` as an alternative to ``rect`` to specify layout information. - :arg float height: may used together with *width* as an alternative to *rect* to specify layout information. + :arg float fontsize: the default :data:`fontsize` for reflowable document types. This parameter is ignored if none of the parameters ``rect`` or ``width`` and ``height`` are specified. Will be used to calculate the page layout. - :arg float fontsize: the default fontsize for reflowable document types. This parameter is ignored if none of the parameters *rect* or *width* and *height* are specified. Will be used to calculate the page layout. + :raises TypeError: if the *type* of any parameter does not conform. + :raises FileNotFoundError: if the file / path cannot be found. Re-implemented as subclass of `RuntimeError`. + :raises EmptyFileError: if the file / path is empty or the `bytes` object in memory has zero length. A subclass of `FileDataError` and `RuntimeError`. + :raises ValueError: if an unknown file type is explicitly specified. + :raises FileDataError: if the document has an invalid structure for the given type -- or is no file at all (but e.g. a folder). A subclass of `RuntimeError`. - :raises TypeError: if the *type* of any parameter does not conform. - :raises FileNotFoundError: if the file / path cannot be found. Re-implemented as subclass of `RuntimeError`. - :raises EmptyFileError: if the file / path is empty or the `bytes` object in memory has zero length. A subclass of `FileDataError` and `RuntimeError`. - :raises ValueError: if an unknown file type is explicitly specified. - :raises FileDataError: if the document has an invalid structure for the given type -- or is no file at all (but e.g. a folder). A subclass of `RuntimeError`. + :return: A document object. If the document cannot be created, an exception is raised in the above sequence. Note that PyMuPDF-specific exceptions, `FileNotFoundError`, `EmptyFileError` and `FileDataError` are intercepted if you check for `RuntimeError`. - :return: A document object. If the document cannot be created, an exception is raised in the above sequence. Note that PyMuPDF-specific exceptions, `FileNotFoundError`, `EmptyFileError` and `FileDataError` are intercepted if you check for `RuntimeError`. + In case of problems you can see more detail in the internal messages store: `print(pymupdf.TOOLS.mupdf_warnings())` (which will be emptied by this call, but you can also prevent this -- consult :meth:`Tools.mupdf_warnings`). - In case of problems you can see more detail in the internal messages store: `print(fitz.TOOLS.mupdf_warnings())` (which will be emptied by this call, but you can also prevent this -- consult :meth:`Tools.mupdf_warnings`). + Overview of possible forms, note: `open` is a synonym of `Document`:: - .. note:: Not all document types are checked for valid formats already at open time. Raster images for example will raise exceptions only later, when trying to access the content. Other types (notably with non-binary content) may also be opened (and sometimes **accessed**) successfully -- sometimes even when having invalid content for the format: + >>> # from a file + >>> doc = pymupdf.open("some.xps") + >>> # handle wrong extension + >>> doc = pymupdf.open("some.file", filetype="xps") # assert expected type + >>> doc = pymupdf.open("some.file", filetype="txt") # treat as plain text + >>> + >>> # from memory + >>> doc = pymupdf.open(stream=mem_area) # works for any supported type + >>> doc = pymupdf.open(stream=unknown-type, filetype="txt") # treat as plain text + >>> + >>> # new empty PDF + >>> doc = pymupdf.open() + >>> doc = pymupdf.open(None) + >>> doc = pymupdf.open("") + + .. note:: Raster images with a wrong (but supported) file extension **are no problem**. MuPDF will determine the correct image type when file **content** is actually accessed and will process it without complaint. + + The Document class can be also be used as a **context manager**. Exiting the content manager will close the document automatically. + + >>> import pymupdf + >>> with pymupdf.open(...) as doc: + for page in doc: print("page %i" % page.number) + page 0 + page 1 + page 2 + page 3 + >>> doc.is_closed + True + >>> - * HTM, HTML, XHTML: **always** opened, `metadata["format"]` is "HTML5", resp. "XHTML". - * XML, FB2: **always** opened, `metadata["format"]` is "FictionBook2". - Overview of possible forms, note: `open` is a synonym of `Document`:: + .. method:: get_oc(xref) - >>> # from a file - >>> doc = fitz.open("some.xps") - >>> # handle wrong extension - >>> doc = fitz.open("some.file", filetype="xps") - >>> - >>> # from memory, filetype is required if not a PDF - >>> doc = fitz.open("xps", mem_area) - >>> doc = fitz.open(None, mem_area, "xps") - >>> doc = fitz.open(stream=mem_area, filetype="xps") - >>> - >>> # new empty PDF - >>> doc = fitz.open() - >>> doc = fitz.open(None) - >>> doc = fitz.open("") + * New in v1.18.4 - .. note:: Raster images with a wrong (but supported) file extension **are no problem**. MuPDF will determine the correct image type when file **content** is actually accessed and will process it without complaint. So `fitz.open("file.jpg")` will work even for a PNG image. - - The Document class can be also be used as a **context manager**. On exit, the document will automatically be closed. + Return the cross reference number of an :data:`OCG` or :data:`OCMD` attached to an image or form xobject. - >>> import fitz - >>> with fitz.open(...) as doc: - for page in doc: print("page %i" % page.number) - page 0 - page 1 - page 2 - page 3 - >>> doc.is_closed - True - >>> + :arg int xref: the :data:`xref` of an image or form xobject. Valid such cross reference numbers are returned by :meth:`Document.get_page_images`, resp. :meth:`Document.get_page_xobjects`. For invalid numbers, an exception is raised. + :rtype: int + :returns: the cross reference number of an optional contents object or zero if there is none. + .. method:: set_oc(xref, ocxref) - .. method:: get_oc(xref) + * New in v1.18.4 - * New in v1.18.4 + If :data:`xref` represents an image or form xobject, set or remove the cross reference number *ocxref* of an optional contents object. - Return the cross reference number of an :data:`OCG` or :data:`OCMD` attached to an image or form xobject. + :arg int xref: the :data:`xref` of an image or form xobject [#f5]_. Valid such cross reference numbers are returned by :meth:`Document.get_page_images`, resp. :meth:`Document.get_page_xobjects`. For invalid numbers, an exception is raised. + :arg int ocxref: the :data:`xref` number of an :data:`OCG` / :data:`OCMD`. If not zero, an invalid reference raises an exception. If zero, any OC reference is removed. - :arg int xref: the :data:`xref` of an image or form xobject. Valid such cross reference numbers are returned by :meth:`Document.get_page_images`, resp. :meth:`Document.get_page_xobjects`. For invalid numbers, an exception is raised. - :rtype: int - :returns: the cross reference number of an optional contents object or zero if there is none. - .. method:: set_oc(xref, ocxref) + .. method:: get_layers() - * New in v1.18.4 + * New in v1.18.3 - If *xref* represents an image or form xobject, set or remove the cross reference number *ocxref* of an optional contents object. + Show optional layer configurations. There always is a standard one, which is not included in the response. - :arg int xref: the :data:`xref` of an image or form xobject [#f5]_. Valid such cross reference numbers are returned by :meth:`Document.get_page_images`, resp. :meth:`Document.get_page_xobjects`. For invalid numbers, an exception is raised. - :arg int ocxref: the :data:`xref` number of an :data:`OCG` / :data:`OCMD`. If not zero, an invalid reference raises an exception. If zero, any OC reference is removed. + >>> for item in doc.get_layers(): print(item) + {'number': 0, 'name': 'my-config', 'creator': ''} + >>> # use 'number' as config identifier in add_ocg + .. method:: add_layer(name, creator=None, on=None) - .. method:: get_layers() + * New in v1.18.3 - * New in v1.18.3 + Add an optional content configuration. Layers serve as a collection of ON / OFF states for optional content groups and allow fast visibility switches between different views on the same document. - Show optional layer configurations. There always is a standard one, which is not included in the response. + :arg str name: arbitrary name. + :arg str creator: (optional) creating software. + :arg sequ on: a sequence of OCG :data:`xref` numbers which should be set to ON when this layer gets activated. All OCGs not listed here will be set to OFF. - >>> for item in doc.get_layers(): print(item) - {'number': 0, 'name': 'my-config', 'creator': ''} - >>> # use 'number' as config identifier in add_ocg - .. method:: add_layer(name, creator=None, on=None) + .. method:: switch_layer(number, as_default=False) - * New in v1.18.3 + * New in v1.18.3 - Add an optional content configuration. Layers serve as a collection of ON / OFF states for optional content groups and allow fast visibility switches between different views on the same document. + Switch to a document view as defined by the optional layer's configuration number. This is temporary, except if established as default. - :arg str name: arbitrary name. - :arg str creator: (optional) creating software. - :arg sequ on: a sequence of OCG :data:`xref` numbers which should be set to ON when this layer gets activated. All OCGs not listed here will be set to OFF. + :arg int number: config number as returned by :meth:`Document.layer_configs`. + :arg bool as_default: make this the default configuration. + Activates the ON / OFF states of OCGs as defined in the identified layer. If ``as_default=True``, then additionally all layers, including the standard one, are merged and the result is written back to the standard layer, and **all optional layers are deleted**. - .. method:: switch_layer(number, as_default=False) - * New in v1.18.3 + .. method:: add_ocg(name, config=-1, on=True, intent="View", usage="Artwork") - Switch to a document view as defined by the optional layer's configuration number. This is temporary, except if established as default. + * New in v1.18.3 - :arg int number: config number as returned by :meth:`Document.layer_configs`. - :arg bool as_default: make this the default configuration. + Add an optional content group. An OCG is the most important unit of information to determine object visibility. For a PDF, in order to be regarded as having optional content, at least one OCG must exist. - Activates the ON / OFF states of OCGs as defined in the identified layer. If *as_default=True*, then additionally all layers, including the standard one, are merged and the result is written back to the standard layer, and **all optional layers are deleted**. + :arg str name: arbitrary name. Will show up in supporting PDF viewers. + :arg int config: layer configuration number. Default -1 is the standard configuration. + :arg bool on: standard visibility status for objects pointing to this OCG. + :arg str,list intent: a string or list of strings declaring the visibility intents. There are two PDF standard values to choose from: "View" and "Design". Default is "View". Correct **spelling is important**. + :arg str usage: another influencer for OCG visibility. This will become part of the OCG's `/Usage` key. There are two PDF standard values to choose from: "Artwork" and "Technical". Default is "Artwork". Please only change when required. + :returns: :data:`xref` of the created OCG. Use as entry for `oc` parameter in supporting objects. - .. method:: add_ocg(name, config=-1, on=True, intent="View", usage="Artwork") + .. note:: Multiple OCGs with identical parameters may be created. This will not cause problems. Garbage option 3 of :meth:`Document.save` will get rid of any duplicates. - * New in v1.18.3 - Add an optional content group. An OCG is the most important unit of information to determine object visibility. For a PDF, in order to be regarded as having optional content, at least one OCG must exist. + .. method:: set_ocmd(xref=0, ocgs=None, policy="AnyOn", ve=None) - :arg str name: arbitrary name. Will show up in supporting PDF viewers. - :arg int config: layer configuration number. Default -1 is the standard configuration. - :arg bool on: standard visibility status for objects pointing to this OCG. - :arg str,list intent: a string or list of strings declaring the visibility intents. There are two PDF standard values to choose from: "View" and "Design". Default is "View". Correct **spelling is important**. - :arg str usage: another influencer for OCG visibility. This will become part of the OCG's `/Usage` key. There are two PDF standard values to choose from: "Artwork" and "Technical". Default is "Artwork". Please only change when required. + * New in v1.18.4 - :returns: :data:`xref` of the created OCG. Use as entry for `oc` parameter in supporting objects. + Create or update an :data:`OCMD`, **Optional Content Membership Dictionary.** - .. note:: Multiple OCGs with identical parameters may be created. This will not cause problems. Garbage option 3 of :meth:`Document.save` will get rid of any duplicates. + :arg int xref: :data:`xref` of the OCMD to be updated, or 0 for a new OCMD. + :arg list ocgs: a sequence of :data:`xref` numbers of existing :data:`OCG` PDF objects. + :arg str policy: one of "AnyOn" (default), "AnyOff", "AllOn", "AllOff" (mixed or lower case). + :arg list ve: a "visibility expression". This is a list of arbitrarily nested other lists -- see explanation below. Use as an alternative to the combination *ocgs* / *policy* if you need to formulate more complex conditions. + :rtype: int + :returns: :data:`xref` of the OCMD. Use as `oc=xref` parameter in supporting objects, and respectively in :meth:`Document.set_oc` or :meth:`Annot.set_oc`. + .. note:: - .. method:: set_ocmd(xref=0, ocgs=None, policy="AnyOn", ve=None) + Like an OCG, an OCMD has a visibility state ON or OFF, and it can be used like an OCG. In contrast to an OCG, the OCMD state is determined by evaluating the state of one or more OCGs via special forms of **boolean expressions.** If the expression evaluates to true, the OCMD state is ON and OFF for false. - * New in v1.18.4 + There are two ways to formulate OCMD visibility: - Create or update an :data:`OCMD`, **Optional Content Membership Dictionary.** + 1. Use the combination of *ocgs* and *policy*: The *policy* value is interpreted as follows: - :arg int xref: :data:`xref` of the OCMD to be updated, or 0 for a new OCMD. - :arg list ocgs: a sequence of :data:`xref` numbers of existing :data:`OCG` PDF objects. - :arg str policy: one of "AnyOn" (default), "AnyOff", "AllOn", "AllOff" (mixed or lower case). - :arg list ve: a "visibility expression". This is a list of arbitrarily nested other lists -- see explanation below. Use as an alternative to the combination *ocgs* / *policy* if you need to formulate more complex conditions. - :rtype: int - :returns: :data:`xref` of the OCMD. Use as `oc=xref` parameter in supporting objects, and respectively in :meth:`Document.set_oc` or :meth:`Annot.set_oc`. + - AnyOn -- (default) true if at least one OCG is ON. + - AnyOff -- true if at least one OCG is OFF. + - AllOn -- true if all OCGs are ON. + - AllOff -- true if all OCGs are OFF. - .. note:: + Suppose you want two PDF objects be displayed exactly one at a time (if one is ON, then the other one must be OFF): + + Solution: use an **OCG** for object 1 and an **OCMD** for object 2. Create the OCMD via `set_ocmd(ocgs=[xref], policy="AllOff")`, with the :data:`xref` of the OCG. - Like an OCG, an OCMD has a visibility state ON or OFF, and it can be used like an OCG. In contrast to an OCG, the OCMD state is determined by evaluating the state of one or more OCGs via special forms of **boolean expressions.** If the expression evaluates to true, the OCMD state is ON and OFF for false. + 2. Use the **visibility expression** *ve*: This is a list of two or more items. The **first item** is a logical keyword: one of the strings **"and"**, **"or"**, or **"not"**. The **second** and all subsequent items must either be an integer or another list. An integer must be the :data:`xref` number of an OCG. A list must again have at least two items starting with one of the boolean keywords. This syntax is a bit awkward, but quite powerful: - There are two ways to formulate OCMD visibility: + - Each list must start with a logical keyword. + - If the keyword is a **"not"**, then the list must have exactly two items. If it is **"and"** or **"or"**, any number of other items may follow. + - Items following the logical keyword may be either integers or again a list. An *integer* must be the xref of an OCG. A *list* must conform to the previous rules. - 1. Use the combination of *ocgs* and *policy*: The *policy* value is interpreted as follows: + **Examples:** - - AnyOn -- (default) true if at least one OCG is ON. - - AnyOff -- true if at least one OCG is OFF. - - AllOn -- true if all OCGs are ON. - - AllOff -- true if all OCGs are OFF. + - `set_ocmd(ve=["or", 4, ["not", 5], ["and", 6, 7]])`. This delivers ON if the following is true: **"4 is ON, or 5 is OFF, or 6 and 7 are both ON"**. + - `set_ocmd(ve=["not", xref])`. This has the same effect as the OCMD example created under 1. - Suppose you want two PDF objects be displayed exactly one at a time (if one is ON, then the other one must be OFF): + For more details and examples see page 224 of :ref:`AdobeManual`. Also do have a look at example scripts `here `_. - Solution: use an **OCG** for object 1 and an **OCMD** for object 2. Create the OCMD via `set_ocmd(ocgs=[xref], policy="AllOff")`, with the :data:`xref` of the OCG. + Visibility expressions, `/VE`, are part of PDF specification version 1.6. So not all PDF viewers / readers may already support this feature and hence will react in some standard way for those cases. - 2. Use the **visibility expression** *ve*: This is a list of two or more items. The **first item** is a logical keyword: one of the strings **"and"**, **"or"**, or **"not"**. The **second** and all subsequent items must either be an integer or another list. An integer must be the :data:`xref` number of an OCG. A list must again have at least two items starting with one of the boolean keywords. This syntax is a bit awkward, but quite powerful: - - Each list must start with a logical keyword. - - If the keyword is a **"not"**, then the list must have exactly two items. If it is **"and"** or **"or"**, any number of other items may follow. - - Items following the logical keyword may be either integers or again a list. An *integer* must be the xref of an OCG. A *list* must conform to the previous rules. + .. method:: get_ocmd(xref) - **Examples:** + * New in v1.18.4 - - `set_ocmd(ve=["or", 4, ["not", 5], ["and", 6, 7]])`. This delivers ON if the following is true: **"4 is ON, or 5 is OFF, or 6 and 7 are both ON"**. - - `set_ocmd(ve=["not", xref])`. This has the same effect as the OCMD example created under 1. + Retrieve the definition of an :data:`OCMD`. - For more details and examples see page 224 of :ref:`AdobeManual`. Also do have a look at example scripts `here `_. + :arg int xref: the :data:`xref` of the OCMD. + :rtype: dict + :returns: a dictionary with the keys :data:`xref`, *ocgs*, *policy* and *ve*. - Visibility expressions, `/VE`, are part of PDF specification version 1.6. So not all PDF viewers / readers may already support this feature and hence will react in some standard way for those cases. + .. method:: get_layer(config=-1) - .. method:: get_ocmd(xref) + * New in v1.18.3 - * New in v1.18.4 + List of optional content groups by status in the specified configuration. This is a dictionary with lists of cross reference numbers for OCGs that occur in the arrays `/ON`, `/OFF` or in some radio button group (`/RBGroups`). - Retrieve the definition of an :data:`OCMD`. + :arg int config: the configuration layer (default is the standard config layer). - :arg int xref: the :data:`xref` of the OCMD. - :rtype: dict - :returns: a dictionary with the keys *xref*, *ocgs*, *policy* and *ve*. + >>> pprint(doc.get_layer()) + {'off': [8, 9, 10], 'on': [5, 6, 7], 'rbgroups': [[7, 10]]} + >>> + .. method:: set_layer(config, *, on=None, off=None, basestate=None, rbgroups=None, locked=None) - .. method:: get_layer(config=-1) + * New in v1.18.3 - * New in v1.18.3 + * Changed in v1.22.5: Support list of *locked* OCGs. - List of optional content groups by status in the specified configuration. This is a dictionary with lists of cross reference numbers for OCGs that occur in the arrays `/ON`, `/OFF` or in some radio button group (`/RBGroups`). + Mass status changes of optional content groups. **Permanently** sets the status of OCGs. - :arg int config: the configuration layer (default is the standard config layer). + :arg int config: desired configuration layer, choose -1 for the default one. + :arg list on: list of :data:`xref` of OCGs to set ON. Replaces previous values. An empty list will cause no OCG being set to ON anymore. Should be specified if `basestate="ON"` is used. + :arg list off: list of :data:`xref` of OCGs to set OFF. Replaces previous values. An empty list will cause no OCG being set to OFF anymore. Should be specified if `basestate="OFF"` is used. + :arg str basestate: state of OCGs that are not mentioned in *on* or *off*. Possible values are "ON", "OFF" or "Unchanged". Upper / lower case possible. + :arg list rbgroups: a list of lists. Replaces previous values. Each sublist should contain two or more OCG xrefs. OCGs in the same sublist are handled like buttons in a radio button group: setting one to ON automatically sets all other group members to OFF. + :arg list locked: a list of OCG xref number that cannot be changed by the user interface. + Values `None` will not change the corresponding PDF array. + + >>> doc.set_layer(-1, basestate="OFF") # only changes the base state >>> pprint(doc.get_layer()) - {'off': [8, 9, 10], 'on': [5, 6, 7], 'rbgroups': [[7, 10]]} + {'basestate': 'OFF', 'off': [8, 9, 10], 'on': [5, 6, 7], 'rbgroups': [[7, 10]]} + + + .. method:: get_ocgs() + + * New in v1.18.3 + + Details of all optional content groups. This is a dictionary of dictionaries like this (key is the OCG's :data:`xref`): + + >>> pprint(doc.get_ocgs()) + {13: {'on': True, + 'intent': ['View', 'Design'], + 'name': 'Circle', + 'usage': 'Artwork'}, + 14: {'on': True, + 'intent': ['View', 'Design'], + 'name': 'Square', + 'usage': 'Artwork'}, + 15: {'on': False, 'intent': ['View'], 'name': 'Square', 'usage': 'Artwork'}} >>> - .. method:: set_layer(config, *, on=None, off=None, basestate=None, rbgroups=None, locked=None) + .. method:: layer_ui_configs() - * New in v1.18.3 + * New in v1.18.3 - * Changed in v1.22.5: Support list of *locked* OCGs. + Show the visibility status of optional content that is modifiable by the user interface of supporting PDF viewers. - Mass status changes of optional content groups. **Permanently** sets the status of OCGs. + * Only reports items contained in the currently selected layer configuration. - :arg int config: desired configuration layer, choose -1 for the default one. - :arg list on: list of :data:`xref` of OCGs to set ON. Replaces previous values. An empty list will cause no OCG being set to ON anymore. Should be specified if `basestate="ON"` is used. - :arg list off: list of :data:`xref` of OCGs to set OFF. Replaces previous values. An empty list will cause no OCG being set to OFF anymore. Should be specified if `basestate="OFF"` is used. - :arg str basestate: state of OCGs that are not mentioned in *on* or *off*. Possible values are "ON", "OFF" or "Unchanged". Upper / lower case possible. - :arg list rbgroups: a list of lists. Replaces previous values. Each sublist should contain two or more OCG xrefs. OCGs in the same sublist are handled like buttons in a radio button group: setting one to ON automatically sets all other group members to OFF. - :arg list locked: a list of OCG xref number that cannot be changed by the user interface. + * The meaning of the dictionary keys is as follows: + - *depth:* item's nesting level in the `/Order` array + - *locked:* true if cannot be changed via user interfaces + - *number:* running sequence number + - *on:* item state + - *text:* text string or name field of the originating OCG + - *type:* one of "label" (set by a text string), "checkbox" (set by a single OCG) or "radiobox" (set by a set of connected OCGs) - Values `None` will not change the corresponding PDF array. + .. method:: set_layer_ui_config(number, action=0) - >>> doc.set_layer(-1, basestate="OFF") # only changes the base state - >>> pprint(doc.get_layer()) - {'basestate': 'OFF', 'off': [8, 9, 10], 'on': [5, 6, 7], 'rbgroups': [[7, 10]]} + * New in v1.18.3 + Modify OC visibility status of content groups. This is analog to what supporting PDF viewers would offer. - .. method:: get_ocgs() + Please note that visibility is **not** a property stored with the OCG. It is not even information necessarily present in the PDF document at all. Instead, the current visibility is **temporarily** set using the user interface of some supporting PDF consumer software. The same type of functionality is offered by this method. - * New in v1.18.3 + To make **permanent** changes, use :meth:`Document.set_layer`. - Details of all optional content groups. This is a dictionary of dictionaries like this (key is the OCG's :data:`xref`): + :arg int,str number: either the sequence number of the item in list :meth:`Document.layer_configs` or the "text" of one of these items. + :arg int action: `PDF_OC_ON` = set on (default), `PDF_OC_TOGGLE` = toggle on/off, `PDF_OC_OFF` = set off. - >>> pprint(doc.get_ocgs()) - {13: {'on': True, - 'intent': ['View', 'Design'], - 'name': 'Circle', - 'usage': 'Artwork'}, - 14: {'on': True, - 'intent': ['View', 'Design'], - 'name': 'Square', - 'usage': 'Artwork'}, - 15: {'on': False, 'intent': ['View'], 'name': 'Square', 'usage': 'Artwork'}} - >>> - .. method:: layer_ui_configs() + .. method:: authenticate(password) - * New in v1.18.3 + Decrypts the document with the string *password*. If successful, document data can be accessed. For PDF documents, the "owner" and the "user" have different privileges, and hence different passwords may exist for these authorization levels. The method will automatically establish the appropriate (owner or user) access rights for the provided password. - Show the visibility status of optional content that is modifiable by the user interface of supporting PDF viewers. + :arg str password: owner or user password. - * Only reports items contained in the currently selected layer configuration. + :rtype: int + :returns: a positive value if successful, zero otherwise (the string does not match either password). If positive, the indicator :attr:`Document.is_encrypted` is set to ``False``. **Positive** return codes carry the following information detail: - * The meaning of the dictionary keys is as follows: - - *depth:* item's nesting level in the `/Order` array - - *locked:* true if cannot be changed via user interfaces - - *number:* running sequence number - - *on:* item state - - *text:* text string or name field of the originating OCG - - *type:* one of "label" (set by a text string), "checkbox" (set by a single OCG) or "radiobox" (set by a set of connected OCGs) + * 1 => authenticated, but the PDF has neither owner nor user passwords. + * 2 => authenticated with the **user** password. + * 4 => authenticated with the **owner** password. + * 6 => authenticated and both passwords are equal -- probably a rare situation. - .. method:: set_layer_ui_config(number, action=0) + .. note:: - * New in v1.18.3 + The document may be protected by an owner, but **not** by a user password. Detect this situation via `doc.authenticate("") == 2`. This allows opening and reading the document without authentication, but, depending on the :attr:`Document.permissions` value, other actions may be prohibited. PyMuPDF (like MuPDF) in this case **ignores those restrictions**. So, -- in contrast to any PDF viewers -- you can for example extract text and add or modify content, even if the respective permission flags `PDF_PERM_COPY`, `PDF_PERM_MODIFY`, `PDF_PERM_ANNOTATE`, etc. are set off! It is your responsibility building a legally compliant application where applicable. - Modify OC visibility status of content groups. This is analog to what supporting PDF viewers would offer. + .. method:: get_page_numbers(label, only_one=False) - Please note that visibility is **not** a property stored with the OCG. It is not even information necessarily present in the PDF document at all. Instead, the current visibility is **temporarily** set using the user interface of some supporting PDF consumer software. The same type of functionality is offered by this method. + * New in v 1.18.6 - To make **permanent** changes, use :meth:`Document.set_layer`. + PDF only: Return a list of page numbers that have the specified label -- note that labels may not be unique in a PDF. This implies a sequential search through **all page numbers** to compare their labels. - :arg int,str number: either the sequence number of the item in list :meth:`Document.layer_configs` or the "text" of one of these items. - :arg int action: `PDF_OC_ON` = set on (default), `PDF_OC_TOGGLE` = toggle on/off, `PDF_OC_OFF` = set off. + .. note:: Implementation detail -- pages are **not loaded** for this purpose. + :arg str label: the label to look for, e.g. "vii" (Roman number 7). + :arg bool only_one: stop after first hit. Useful e.g. if labelling is known to be unique, or there are many pages, etc. The default will check every page number. + :rtype: list + :returns: list of page numbers that have this label. Empty if none found, no labels defined, etc. - .. method:: authenticate(password) - Decrypts the document with the string *password*. If successful, document data can be accessed. For PDF documents, the "owner" and the "user" have different privileges, and hence different passwords may exist for these authorization levels. The method will automatically establish the appropriate (owner or user) access rights for the provided password. + .. method:: get_page_labels() - :arg str password: owner or user password. + * New in v1.18.7 - :rtype: int - :returns: a positive value if successful, zero otherwise (the string does not match either password). If positive, the indicator :attr:`Document.is_encrypted` is set to *False*. **Positive** return codes carry the following information detail: + PDF only: Extract the list of page label definitions. Typically used for modifications before feeding it into :meth:`Document.set_page_labels`. - * 1 => authenticated, but the PDF has neither owner nor user passwords. - * 2 => authenticated with the **user** password. - * 4 => authenticated with the **owner** password. - * 6 => authenticated and both passwords are equal -- probably a rare situation. + :returns: a list of dictionaries as defined in :meth:`Document.set_page_labels`. - .. note:: + .. method:: set_page_labels(labels) - The document may be protected by an owner, but **not** by a user password. Detect this situation via `doc.authenticate("") == 2`. This allows opening and reading the document without authentication, but, depending on the :attr:`Document.permissions` value, other actions may be prohibited. PyMuPDF (like MuPDF) in this case **ignores those restrictions**. So, -- in contrast to any PDF viewers -- you can for example extract text and add or modify content, even if the respective permission flags `PDF_PERM_COPY`, `PDF_PERM_MODIFY`, `PDF_PERM_ANNOTATE`, etc. are set off! It is your responsibility building a legally compliant application where applicable. + * New in v1.18.6 - .. method:: get_page_numbers(label, only_one=False) + PDF only: Add or update the page label definitions of the PDF. - * New in v 1.18.6 + :arg list labels: a list of dictionaries. Each dictionary defines a label building rule and a 0-based "start" page number. That start page is the first for which the label definition is valid. Each dictionary has up to 4 items and looks like `{'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}` and has the following items. - PDF only: Return a list of page numbers that have the specified label -- note that labels may not be unique in a PDF. This implies a sequential search through **all page numbers** to compare their labels. + - `startpage`: (int) the first page number (0-based) to apply the label rule. This key **must be present**. The rule is applied to all subsequent pages until either end of document or superseded by the rule with the next larger page number. + - `prefix`: (str) an arbitrary string to start the label with, e.g. "A-". Default is "". + - `style`: (str) the numbering style. Available are "D" (decimal), "r"/"R" (Roman numbers, lower / upper case), and "a"/"A" (lower / upper case alphabetical numbering: "a" through "z", then "aa" through "zz", etc.). Default is "". If "", no numbering will take place and the pages in that range will receive the same label consisting of the `prefix` value. If prefix is also omitted, then the label will be "". + - `firstpagenum`: (int) start numbering with this value. Default is 1, smaller values are ignored. - .. note:: Implementation detail -- pages are **not loaded** for this purpose. + For example:: - :arg str label: the label to look for, e.g. "vii" (Roman number 7). - :arg bool only_one: stop after first hit. Useful e.g. if labelling is known to be unique, or there are many pages, etc. The default will check every page number. - :rtype: list - :returns: list of page numbers that have this label. Empty if none found, no labels defined, etc. + [{'startpage': 6, 'prefix': 'A-', 'style': 'D', 'firstpagenum': 10}, + {'startpage': 10, 'prefix': '', 'style': 'D', 'firstpagenum': 1}] + will generate the labels "A-10", "A-11", "A-12", "A-13", "1", "2", "3", ... for pages 6, 7 and so on until end of document. Pages 0 through 5 will have the label "". - .. method:: get_page_labels() - * New in v1.18.7 + .. method:: make_bookmark(loc) - PDF only: Extract the list of page label definitions. Typically used for modifications before feeding it into :meth:`Document.set_page_labels`. + * New in v.1.17.3 - :returns: a list of dictionaries as defined in :meth:`Document.set_page_labels`. + Return a page pointer in a reflowable document. After re-layouting the document, the result of this method can be used to find the new location of the page. - .. method:: set_page_labels(labels) + .. note:: Do not confuse with items of a table of contents, TOC. - * New in v1.18.6 + :arg list,tuple loc: page location. Must be a valid *(chapter, pno)*. - PDF only: Add or update the page label definitions of the PDF. + :rtype: pointer + :returns: a long integer in pointer format. To be used for finding the new location of the page after re-layouting the document. Do not touch or re-assign. - :arg list labels: a list of dictionaries. Each dictionary defines a label building rule and a 0-based "start" page number. That start page is the first for which the label definition is valid. Each dictionary has up to 4 items and looks like `{'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}` and has the following items. - - `startpage`: (int) the first page number (0-based) to apply the label rule. This key **must be present**. The rule is applied to all subsequent pages until either end of document or superseded by the rule with the next larger page number. - - `prefix`: (str) an arbitrary string to start the label with, e.g. "A-". Default is "". - - `style`: (str) the numbering style. Available are "D" (decimal), "r"/"R" (Roman numbers, lower / upper case), and "a"/"A" (lower / upper case alphabetical numbering: "a" through "z", then "aa" through "zz", etc.). Default is "". If "", no numbering will take place and the pages in that range will receive the same label consisting of the `prefix` value. If prefix is also omitted, then the label will be "". - - `firstpagenum`: (int) start numbering with this value. Default is 1, smaller values are ignored. + .. method:: find_bookmark(bookmark) - For example:: + * New in v.1.17.3 - [{'startpage': 6, 'prefix': 'A-', 'style': 'D', 'firstpagenum': 10}, - {'startpage': 10, 'prefix': '', 'style': 'D', 'firstpagenum': 1}] + Return the new page location after re-layouting the document. - will generate the labels "A-10", "A-11", "A-12", "A-13", "1", "2", "3", ... for pages 6, 7 and so on until end of document. Pages 0 through 5 will have the label "". + :arg pointer bookmark: created by :meth:`Document.make_bookmark`. + :rtype: tuple + :returns: the new (chapter, pno) of the page. - .. method:: make_bookmark(loc) - * New in v.1.17.3 - - Return a page pointer in a reflowable document. After re-layouting the document, the result of this method can be used to find the new location of the page. + .. method:: chapter_page_count(chapter) - .. note:: Do not confuse with items of a table of contents, TOC. + * New in v.1.17.0 - :arg list,tuple loc: page location. Must be a valid *(chapter, pno)*. + Return the number of pages of a chapter. - :rtype: pointer - :returns: a long integer in pointer format. To be used for finding the new location of the page after re-layouting the document. Do not touch or re-assign. + :arg int chapter: the 0-based chapter number. + :rtype: int + :returns: number of pages in chapter. Relevant only for document types with chapter support (EPUB currently). - .. method:: find_bookmark(bookmark) - * New in v.1.17.3 - - Return the new page location after re-layouting the document. + .. method:: next_location(page_id) - :arg pointer bookmark: created by :meth:`Document.make_bookmark`. + * New in v.1.17.0 - :rtype: tuple - :returns: the new (chapter, pno) of the page. + Return the location of the following page. + :arg tuple page_id: the current page id. This must be a tuple *(chapter, pno)* identifying an existing page. - .. method:: chapter_page_count(chapter) + :returns: The tuple of the following page, i.e. either *(chapter, pno + 1)* or *(chapter + 1, 0)*, **or** the empty tuple *()* if the argument was the last page. Relevant only for document types with chapter support (EPUB currently). - * New in v.1.17.0 - - Return the number of pages of a chapter. - :arg int chapter: the 0-based chapter number. + .. method:: prev_location(page_id) - :rtype: int - :returns: number of pages in chapter. Relevant only for document types with chapter support (EPUB currently). + * New in v.1.17.0 + Return the locator of the preceding page. - .. method:: next_location(page_id) + :arg tuple page_id: the current page id. This must be a tuple *(chapter, pno)* identifying an existing page. - * New in v.1.17.0 + :returns: The tuple of the preceding page, i.e. either *(chapter, pno - 1)* or the last page of the preceding chapter, **or** the empty tuple *()* if the argument was the first page. Relevant only for document types with chapter support (EPUB currently). - Return the location of the following page. - :arg tuple page_id: the current page id. This must be a tuple *(chapter, pno)* identifying an existing page. + .. method:: load_page(page_id=0) - :returns: The tuple of the following page, i.e. either *(chapter, pno + 1)* or *(chapter + 1, 0)*, **or** the empty tuple *()* if the argument was the last page. Relevant only for document types with chapter support (EPUB currently). + * Changed in v1.17.0: For document types supporting a so-called "chapter structure" (like EPUB), pages can also be loaded via the combination of chapter number and relative page number, instead of the absolute page number. This should **significantly speed up access** for large documents. + Create a :ref:`Page` object for further processing (like rendering, text searching, etc.). - .. method:: prev_location(page_id) + :arg int,tuple page_id: *(Changed in v1.17.0)* - * New in v.1.17.0 - - Return the locator of the preceding page. + Either a 0-based page number, or a tuple *(chapter, pno)*. For an **integer**, any `-∞ < page_id < page_count` is acceptable. While page_id is negative, :attr:`page_count` will be added to it. For example: to load the last page, you can use *doc.load_page(-1)*. After this you have page.number = doc.page_count - 1. - :arg tuple page_id: the current page id. This must be a tuple *(chapter, pno)* identifying an existing page. + For a tuple, *chapter* must be in range :attr:`Document.chapter_count`, and *pno* must be in range :meth:`Document.chapter_page_count` of that chapter. Both values are 0-based. Using this notation, :attr:`Page.number` will equal the given tuple. Relevant only for document types with chapter support (EPUB currently). - :returns: The tuple of the preceding page, i.e. either *(chapter, pno - 1)* or the last page of the preceding chapter, **or** the empty tuple *()* if the argument was the first page. Relevant only for document types with chapter support (EPUB currently). + :rtype: :ref:`Page` + .. note:: - .. method:: load_page(page_id=0) + Documents also follow the Python sequence protocol with page numbers as indices: *doc.load_page(n) == doc[n]*. - * Changed in v1.17.0: For document types supporting a so-called "chapter structure" (like EPUB), pages can also be loaded via the combination of chapter number and relative page number, instead of the absolute page number. This should **significantly speed up access** for large documents. + For **absolute page numbers** only, expressions like *"for page in doc: ..."* and *"for page in reversed(doc): ..."* will successively yield the document's pages. Refer to :meth:`Document.pages` which allows processing pages as with slicing. - Create a :ref:`Page` object for further processing (like rendering, text searching, etc.). + You can also use index notation with the new chapter-based page identification: use *page = doc[(5, 2)]* to load the third page of the sixth chapter. - :arg int,tuple page_id: *(Changed in v1.17.0)* + To maintain a consistent API, for document types not supporting a chapter structure (like PDFs), :attr:`Document.chapter_count` is 1, and pages can also be loaded via tuples *(0, pno)*. See this [#f3]_ footnote for comments on performance improvements. - Either a 0-based page number, or a tuple *(chapter, pno)*. For an **integer**, any `-∞ < page_id < page_count` is acceptable. While page_id is negative, :attr:`page_count` will be added to it. For example: to load the last page, you can use *doc.load_page(-1)*. After this you have page.number = doc.page_count - 1. - For a tuple, *chapter* must be in range :attr:`Document.chapter_count`, and *pno* must be in range :meth:`Document.chapter_page_count` of that chapter. Both values are 0-based. Using this notation, :attr:`Page.number` will equal the given tuple. Relevant only for document types with chapter support (EPUB currently). + .. method:: rewrite_images(dpi_threshold=None, dpi_target=0, quality=0, lossy=True, lossless=True, bitonal=True, color=True, gray=True, set_to_gray=False, options=None) - :rtype: :ref:`Page` + PDF only: Walk through all images and rewrite them according to the specified parameters. This is useful for reducing file size, changing image formats, or converting color spaces. - .. note:: + The typical usage is extra compression of images for significantly reducing the file size of the PDF. When setting quality and the dpi parameters to positive values and accepting defaults for the rest, the following will happen: - Documents also follow the Python sequence protocol with page numbers as indices: *doc.load_page(n) == doc[n]*. + * Lossy and lossless images will be rewritten as JPEG images (FZ_RECOMPRESS_JPEG) as far as technically possible. - For **absolute page numbers** only, expressions like *"for page in doc: ..."* and *"for page in reversed(doc): ..."* will successively yield the document's pages. Refer to :meth:`Document.pages` which allows processing pages as with slicing. + * Bitonal (monochrome) images will be rewritten in FAX format (FZ_RECOMPRESS_FAX). - You can also use index notation with the new chapter-based page identification: use *page = doc[(5, 2)]* to load the third page of the sixth chapter. + * Subsampling method is **FZ_SUBSAMPLE_AVERAGE** (see below). - To maintain a consistent API, for document types not supporting a chapter structure (like PDFs), :attr:`Document.chapter_count` is 1, and pages can also be loaded via tuples *(0, pno)*. See this [#f3]_ footnote for comments on performance improvements. + :arg int dpi_target: target DPI value for the resampled images. Ignored if `dpi_threshold` is `None`, otherwise must be less than `dpi_threshold` and positive. - .. method:: reload_page(page) + :arg int dpi_threshold: If None (the default) no resampling takes place. Otherwise images with a DPI value larger than this will be resampled to `dpi_target` (which must be less than `dpi_threshold`). - * New in v1.16.10 + :arg int quality: desired target JPEG quality, a value between 0 and 100. 0 means no quality change, 100 means best quality. - PDF only: Provide a new copy of a page after finishing and updating all pending changes. + :arg bool lossy: include lossy image types (e.g. JPEG). - :arg page: page object. - :type page: :ref:`Page` + :arg bool lossless: include lossless image types (e.g. PNG). - :rtype: :ref:`Page` + :arg bool bitonal: include black-and-white images (e.g. FAX). - :returns: a new copy of the same page. All pending updates (e.g. to annotations or widgets) will be finalized and a fresh copy of the page will be loaded. + :arg bool color: include colored images. - .. note:: In a typical use case, a page :ref:`Pixmap` should be taken after annotations / widgets have been added or changed. To force all those changes being reflected in the page structure, this method re-instates a fresh copy while keeping the object hierarchy "document -> page -> annotations/widgets" intact. + :arg bool gray: include grayscale images. + :arg bool set_to_gray: if True, the PDF will be converted to grayscale by executing :meth:`Document.recolor` before all image processing. Please note that this will also change text and vector graphics to grayscale -- not just the images. - .. method:: page_cropbox(pno) + :arg dict options: This parameter is intended for expert users. Except ``set_to_gray``, all other parameters are ignored. It must be an object prepared in the following way: ``options = pymupdf.mupdf.PdfImageRewriterOptions()``. Then attributes of this object can be set to achieve fine-grained control. Following are the adjustable attributes of the ``options`` object and their default (do nothing) values. - * New in v1.17.7 + :: + + options.bitonal_image_recompress_method = FZ_RECOMPRESS_NEVER + options.bitonal_image_recompress_quality = None + options.bitonal_image_subsample_method = FZ_SUBSAMPLE_AVERAGE + options.bitonal_image_subsample_threshold = 0 + options.bitonal_image_subsample_to = 0 + options.color_lossless_image_recompress_method = FZ_RECOMPRESS_NEVER + options.color_lossless_image_recompress_quality = None + options.color_lossless_image_subsample_method = FZ_SUBSAMPLE_AVERAGE + options.color_lossless_image_subsample_threshold = 0 + options.color_lossless_image_subsample_to = 0 + options.color_lossy_image_recompress_method = FZ_RECOMPRESS_NEVER + options.color_lossy_image_recompress_quality = None + options.color_lossy_image_subsample_method = FZ_SUBSAMPLE_AVERAGE + options.color_lossy_image_subsample_threshold = 0 + options.color_lossy_image_subsample_to = 0 + options.gray_lossless_image_recompress_method = FZ_RECOMPRESS_NEVER + options.gray_lossless_image_recompress_quality = None + options.gray_lossless_image_subsample_method = FZ_SUBSAMPLE_AVERAGE + options.gray_lossless_image_subsample_threshold = 0 + options.gray_lossless_image_subsample_to = 0 + options.gray_lossy_image_recompress_method = FZ_RECOMPRESS_NEVER + options.gray_lossy_image_recompress_quality = None + options.gray_lossy_image_subsample_method = FZ_SUBSAMPLE_AVERAGE + options.gray_lossy_image_subsample_threshold = 0 + options.gray_lossy_image_subsample_to = 0 - PDF only: Return the unrotated page rectangle -- **without loading the page** (via :meth:`Document.load_page`). This is meant for internal purpose requiring best possible performance. + The ``*_recompress_method`` attributes may be one of the values **FZ_RECOMPRESS_NEVER (0), FZ_RECOMPRESS_SAME (1), FZ_RECOMPRESS_LOSSLESS (2), FZ_RECOMPRESS_JPEG (3), FZ_RECOMPRESS_J2K (4), FZ_RECOMPRESS_FAX (5)**. Value FZ_RECOMPRESS_NEVER will skip this image type altogether and FZ_RECOMPRESS_SAME will not change the type. The other values will execute type conversions (as far as technically possible). + + The ``*_quality`` values are strings of integers from "0" to "100" or ``None``. + + The ``*_subsample_method`` attributes are either **FZ_SUBSAMPLE_AVERAGE (0)** or **FZ_SUBSAMPLE_BICUBIC (1)** and refer to how a pixel value is derived from its neighboring pixels during subsampling. For some background see `this Wikipedia article about bicubic interpolation `_. + + Attributes ``*_subsample_threshold`` excludes images from subsampling which have a lower DPI. Participating images will be subsampled to the DPI values given by the ``*_subsample_to`` values. Values of 0 mean that no subsampling will take place. + + The ``*_subsample_threshold`` values should be chosen notably larger than the ``*_subsample_to`` values to ensure that there are enough size savings. After all, every subsampling inevitably incurs quality losses. + + An example for a good choice is ``threshold=100`` and ``to=72``. - :arg int pno: 0-based page number. - :returns: :ref:`Rect` of the page like :meth:`Page.rect`, but ignoring any rotation. + .. method:: recolor(components=1) - .. method:: page_xref(pno) + PDF only: Change the color component counts for all object types text, images and vector graphics for all pages. - * New in v1.17.7 + :arg int components: desired color space indicated by the number of color components: 1 = DeviceGRAY, 3 = DeviceRGB, 4 = DeviceCMYK. - PDF only: Return the :data:`xref` of the page -- **without loading the page** (via :meth:`Document.load_page`). This is meant for internal purpose requiring best possible performance. + The typical use case is 1 (DeviceGRAY) which converts the PDF to grayscale. - :arg int pno: 0-based page number. - :returns: :data:`xref` of the page like :attr:`Page.xref`. + .. method:: reload_page(page) - .. method:: pages(start=None, [stop=None, [step=None]]) + * New in v1.16.10 - * New in v1.16.4 + PDF only: Provide a new copy of a page after finishing and updating all pending changes. - A generator for a range of pages. Parameters have the same meaning as in the built-in function *range()*. Intended for expressions of the form *"for page in doc.pages(start, stop, step): ..."*. + :arg page: page object. + :type page: :ref:`Page` - :arg int start: start iteration with this page number. Default is zero, allowed values are `-∞ < start < page_count`. While this is negative, :attr:`page_count` is added **before** starting the iteration. - :arg int stop: stop iteration at this page number. Default is :attr:`page_count`, possible are `-∞ < stop <= page_count`. Larger values are **silently replaced** by the default. Negative values will cyclically emit the pages in reversed order. As with the built-in *range()*, this is the first page **not** returned. - :arg int step: stepping value. Defaults are 1 if start < stop and -1 if start > stop. Zero is not allowed. + :rtype: :ref:`Page` - :returns: a generator iterator over the document's pages. Some examples: + :returns: a new copy of the same page. All pending updates (e.g. to annotations or widgets) will be finalized and a fresh copy of the page will be loaded. - * "doc.pages()" emits all pages. - * "doc.pages(4, 9, 2)" emits pages 4, 6, 8. - * "doc.pages(0, None, 2)" emits all pages with even numbers. - * "doc.pages(-2)" emits the last two pages. - * "doc.pages(-1, -1)" emits all pages in reversed order. - * "doc.pages(-1, -10)" always emits 10 pages in reversed order, starting with the last page -- **repeatedly** if the document has less than 10 pages. So for a 4-page document the following page numbers are emitted: 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3. + .. note:: In a typical use case, a page :ref:`Pixmap` should be taken after annotations / widgets have been added or changed. To force all those changes being reflected in the page structure, this method re-instates a fresh copy while keeping the object hierarchy "document -> page -> annotations/widgets" intact. - .. index:: - pair: from_page; Document.convert_to_pdf - pair: to_page; Document.convert_to_pdf - pair: rotate; Document.convert_to_pdf - .. method:: convert_to_pdf(from_page=-1, to_page=-1, rotate=0) + .. method:: resolve_names() - Create a PDF version of the current document and write it to memory. **All document types** are supported. The parameters have the same meaning as in :meth:`insert_pdf`. In essence, you can restrict the conversion to a page subset, specify page rotation, and revert page sequence. + PDF only: Convert destination names into a Python dict. - :arg int from_page: first page to copy (0-based). Default is first page. + :returns: + A dictionary with the following layout: - :arg int to_page: last page to copy (0-based). Default is last page. + * *key*: (str) the name. + * *value*: (dict) with the following layout: + * "page": target page number (0-based). If no page number found -1. + * "to": (x, y) target point on page. Currently in PDF coordinates, + i.e. point (0,0) is the bottom-left of the page. + * "zoom": (float) the zoom factor. + * "dest": (str) only present if the target location on the page has + not been provided as "/XYZ" or if no page number was found. + Examples:: - :arg int rotate: rotation angle. Default is 0 (no rotation). Should be *n * 90* with an integer n (not checked). + { + '__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0}, + '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}, + } - :rtype: bytes - :returns: a Python *bytes* object containing a PDF file image. It is created by internally using `tobytes(garbage=4, deflate=True)`. See :meth:`tobytes`. You can output it directly to disk or open it as a PDF. Here are some examples:: + or:: - >>> # convert an XPS file to PDF - >>> xps = fitz.open("some.xps") - >>> pdfbytes = xps.convert_to_pdf() - >>> - >>> # either do this --> - >>> pdf = fitz.open("pdf", pdfbytes) - >>> pdf.save("some.pdf") - >>> - >>> # or this --> - >>> pdfout = open("some.pdf", "wb") - >>> pdfout.tobytes(pdfbytes) - >>> pdfout.close() + { + '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, + ... + } - >>> # copy image files to PDF pages - >>> # each page will have image dimensions - >>> doc = fitz.open() # new PDF - >>> imglist = [ ... image file names ...] # e.g. a directory listing - >>> for img in imglist: - imgdoc=fitz.open(img) # open image as a document - pdfbytes=imgdoc.convert_to_pdf() # make a 1-page PDF of it - imgpdf=fitz.open("pdf", pdfbytes) - doc.insert_pdf(imgpdf) # insert the image PDF - >>> doc.save("allmyimages.pdf") + All names found in the catalog under keys "/Dests" and "/Names/Dests" are + included. - .. note:: The method uses the same logic as the *mutool convert* CLI. This works very well in most cases -- however, beware of the following limitations. + * New in v1.23.6 - * Image files: perfect, no issues detected. However, image transparency is ignored. If you need that (like for a watermark), use :meth:`Page.insert_image` instead. Otherwise, this method is recommended for its much better performance. - * XPS: appearance very good. Links work fine, outlines (bookmarks) are lost, but can easily be recovered [#f2]_. - * EPUB, CBZ, FB2: similar to XPS. - * SVG: medium. Roughly comparable to `svglib `_. - .. method:: get_toc(simple=True) + .. method:: page_cropbox(pno) - Creates a table of contents (TOC) out of the document's outline chain. + * New in v1.17.7 - :arg bool simple: Indicates whether a simple or a detailed TOC is required. If *False*, each item of the list also contains a dictionary with :ref:`linkDest` details for each outline entry. + PDF only: Return the unrotated page rectangle -- **without loading the page** (via :meth:`Document.load_page`). This is meant for internal purpose requiring best possible performance. - :rtype: list + :arg int pno: 0-based page number. - :returns: a list of lists. Each entry has the form *[lvl, title, page, dest]*. Its entries have the following meanings: + :returns: :ref:`Rect` of the page like :meth:`Page.rect`, but ignoring any rotation. - * *lvl* -- hierarchy level (positive *int*). The first entry is always 1. Entries in a row are either **equal**, **increase** by 1, or **decrease** by any number. - * *title* -- title (*str*) - * *page* -- 1-based page number (*int*). If `-1` either no destination or outside document. - * *dest* -- (*dict*) included only if *simple=False*. Contains details of the TOC item as follows: + .. method:: page_xref(pno) - - kind: destination kind, see :ref:`linkDest Kinds`. - - file: filename if kind is :data:`LINK_GOTOR` or :data:`LINK_LAUNCH`. - - page: target page, 0-based, :data:`LINK_GOTOR` or :data:`LINK_GOTO` only. - - to: position on target page (:ref:`Point`). - - zoom: (float) zoom factor on target page. - - xref: :data:`xref` of the item (0 if no PDF). - - color: item color in PDF RGB format `(red, green, blue)`, or omitted (always omitted if no PDF). - - bold: true if bold item text or omitted. PDF only. - - italic: true if italic item text, or omitted. PDF only. - - collapse: true if sub-items are folded, or omitted. PDF only. + * New in v1.17.7 + PDF only: Return the :data:`xref` of the page -- **without loading the page** (via :meth:`Document.load_page`). This is meant for internal purpose requiring best possible performance. - .. method:: xref_get_keys(xref) + :arg int pno: 0-based page number. - * New in v1.18.7 + :returns: :data:`xref` of the page like :attr:`Page.xref`. - PDF only: Return the PDF dictionary keys of the :data:`dictionary` object provided by its xref number. + .. method:: pages(start=None, [stop=None, [step=None]]) - :arg int xref: the :data:`xref`. *(Changed in v1.18.10)* Use `-1` to access the special dictionary "PDF trailer". + * New in v1.16.4 - :returns: a tuple of dictionary keys present in object :data:`xref`. Examples: + A generator for a range of pages. Parameters have the same meaning as in the built-in function *range()*. Intended for expressions of the form *"for page in doc.pages(start, stop, step): ..."*. - >>> from pprint import pprint - >>> import fitz - >>> doc=fitz.open("pymupdf.pdf") - >>> xref = doc.page_xref(0) # xref of page 0 - >>> pprint(doc.xref_get_keys(xref)) # primary level keys of a page - ('Type', 'Contents', 'Resources', 'MediaBox', 'Parent') - >>> pprint(doc.xref_get_keys(-1)) # primary level keys of the trailer - ('Type', 'Index', 'Size', 'W', 'Root', 'Info', 'ID', 'Length', 'Filter') - >>> + :arg int start: start iteration with this page number. Default is zero, allowed values are `-∞ < start < page_count`. While this is negative, :attr:`page_count` is added **before** starting the iteration. + :arg int stop: stop iteration at this page number. Default is :attr:`page_count`, possible are `-∞ < stop <= page_count`. Larger values are **silently replaced** by the default. Negative values will cyclically emit the pages in reversed order. As with the built-in *range()*, this is the first page **not** returned. + :arg int step: stepping value. Defaults are 1 if start < stop and -1 if start > stop. Zero is not allowed. + + :returns: a generator iterator over the document's pages. Some examples: + + * "doc.pages()" emits all pages. + * "doc.pages(4, 9, 2)" emits pages 4, 6, 8. + * "doc.pages(0, None, 2)" emits all pages with even numbers. + * "doc.pages(-2)" emits the last two pages. + * "doc.pages(-1, -1)" emits all pages in reversed order. + * "doc.pages(-1, -10)" always emits 10 pages in reversed order, starting with the last page -- **repeatedly** if the document has less than 10 pages. So for a 4-page document the following page numbers are emitted: 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3. + + .. index:: + pair: from_page; Document.convert_to_pdf + pair: to_page; Document.convert_to_pdf + pair: rotate; Document.convert_to_pdf + + .. method:: convert_to_pdf(from_page=-1, to_page=-1, rotate=0) + + Create a PDF version of the current document and write it to memory. **All document types** are supported. The parameters have the same meaning as in :meth:`insert_pdf`. In essence, you can restrict the conversion to a page subset, specify page rotation, and revert page sequence. + :arg int from_page: first page to copy (0-based). Default is first page. - .. method:: xref_get_key(xref, key) - - * New in v1.18.7 - - PDF only: Return type and value of a PDF dictionary key of a :data:`dictionary` object given by its xref. - - :arg int xref: the :data:`xref`. *Changed in v1.18.10:* Use `-1` to access the special dictionary "PDF trailer". - - :arg str key: the desired PDF key. Must **exactly** match (case-sensitive) one of the keys contained in :meth:`Document.xref_get_keys`. - - :rtype: tuple - - :returns: A tuple (type, value) of strings, where type is one of "xref", "array", "dict", "int", "float", "null", "bool", "name", "string" or "unknown" (should not occur). Independent of "type", the value of the key is **always** formatted as a string -- see the following example -- and (almost always) a faithful reflection of what is stored in the PDF. In most cases, the format of the value string also gives a clue about the key type: - - * A "name" always starts with a "/" slash. - * An "xref" always ends with " 0 R". - * An "array" is always enclosed in "[...]" brackets. - * A "dict" is always enclosed in "<<...>>" brackets. - * A "bool", resp. "null" always equal either "true", "false", resp. "null". - * "float" and "int" are represented by their string format -- and are thus not always distinguishable. - * A "string" is converted to UTF-8 and may therefore deviate from what is stored in the PDF. For example, the PDF key "Author" may have a value of "" in the file, but the method will return `('string', 'Jorj X. McKie')`. - - >>> for key in doc.xref_get_keys(xref): - print(key, "=" , doc.xref_get_key(xref, key)) - Type = ('name', '/Page') - Contents = ('xref', '1297 0 R') - Resources = ('xref', '1296 0 R') - MediaBox = ('array', '[0 0 612 792]') - Parent = ('xref', '1301 0 R') - >>> # - >>> # Now same thing for the PDF trailer. - >>> # It has no xref, so -1 must be used instead. - >>> # - >>> for key in doc.xref_get_keys(-1): - print(key, "=", doc.xref_get_key(-1, key)) - Type = ('name', '/XRef') - Index = ('array', '[0 8802]') - Size = ('int', '8802') - W = ('array', '[1 3 1]') - Root = ('xref', '8799 0 R') - Info = ('xref', '8800 0 R') - ID = ('array', '[]') - Length = ('int', '21111') - Filter = ('name', '/FlateDecode') + :arg int to_page: last page to copy (0-based). Default is last page. + + :arg int rotate: rotation angle. Default is 0 (no rotation). Should be *n * 90* with an integer n (not checked). + + :rtype: bytes + :returns: a Python *bytes* object containing a PDF file image. It is created by internally using `tobytes(garbage=4, deflate=True)`. See :meth:`tobytes`. You can output it directly to disk or open it as a PDF. Here are some examples:: + + >>> # convert an XPS file to PDF + >>> xps = pymupdf.open("some.xps") + >>> pdfbytes = xps.convert_to_pdf() >>> + >>> # either do this --> + >>> pdf = pymupdf.open("pdf", pdfbytes) + >>> pdf.save("some.pdf") + >>> + >>> # or this --> + >>> pdfout = open("some.pdf", "wb") + >>> pdfout.tobytes(pdfbytes) + >>> pdfout.close() + + >>> # copy image files to PDF pages + >>> # each page will have image dimensions + >>> doc = pymupdf.open() # new PDF + >>> imglist = [ ... image file names ...] # e.g. a directory listing + >>> for img in imglist: + imgdoc=pymupdf.open(img) # open image as a document + pdfbytes=imgdoc.convert_to_pdf() # make a 1-page PDF of it + imgpdf=pymupdf.open("pdf", pdfbytes) + doc.insert_pdf(imgpdf) # insert the image PDF + >>> doc.save("allmyimages.pdf") + .. note:: The method uses the same logic as the *mutool convert* CLI. This works very well in most cases -- however, beware of the following limitations. - .. method:: xref_set_key(xref, key, value) + * Image files: perfect, no issues detected. However, image transparency is ignored. If you need that (like for a watermark), use :meth:`Page.insert_image` instead. Otherwise, this method is recommended for its much better performance. + * XPS: appearance very good. Links work fine, outlines (bookmarks) are lost, but can easily be recovered [#f2]_. + * EPUB, CBZ, FB2: similar to XPS. + * SVG: medium. Roughly comparable to `svglib `_. - * New in v1.18.7, changed in v 1.18.13 - * Changed in v1.19.4: remove a key "physically" if set to "null". + .. method:: get_toc(simple=True) - PDF only: Set (add, update, delete) the value of a PDF key for the :data:`dictionary` object given by its xref. - - .. caution:: This is an expert function: if you do not know what you are doing, there is a high risk to render (parts of) the PDF unusable. Please do consult :ref:`AdobeManual` about object specification formats (page 18) and the structure of special dictionary types like page objects. + Creates a table of contents (TOC) out of the document's outline chain. - :arg int xref: the :data:`xref`. *Changed in v1.18.13:* To update the PDF trailer, specify -1. - :arg str key: the desired PDF key (without leading "/"). Must not be empty. Any valid PDF key -- whether already present in the object (which will be overwritten) -- or new. It is possible to use PDF path notation like `"Resources/ExtGState"` -- which sets the value for key `"/ExtGState"` as a sub-object of `"/Resources"`. - :arg str value: the value for the key. It must be a non-empty string and, depending on the desired PDF object type, the following rules must be observed. There is some syntax checking, but **no type checking** and no checking if it makes sense PDF-wise, i.e. **no semantics checking**. Upper / lower case is important! + :arg bool simple: Indicates whether a simple or a detailed TOC is required. If ``False``, each item of the list also contains a dictionary with :ref:`linkDest` details for each outline entry. - * **xref** -- must be provided as `"nnn 0 R"` with a valid :data:`xref` number nnn of the PDF. The suffix "`0 R`" is required to be recognizable as an xref by PDF applications. - * **array** -- a string like `"[a b c d e f]"`. The brackets are required. Array items must be separated by at least one space (not commas like in Python). An empty array `"[]"` is possible and *equivalent* to removing the key. Array items may be any PDF objects, like dictionaries, xrefs, other arrays, etc. Like in Python, array items may be of different types. - * **dict** -- a string like `"<< ... >>"`. The brackets are required and must enclose a valid PDF dictionary definition. The empty dictionary `"<<>>"` is possible and *equivalent* to removing the key. - * **int** -- an integer formatted **as a string**. - * **float** -- a float formatted **as a string**. Scientific notation (with exponents) is **not allowed by PDF**. - * **null** -- the string `"null"`. This is the PDF equivalent to Python's `None` and causes the key to be ignored -- however not necessarily removed, resp. removed on saves with garbage collection. *Changed in v1.19.4:* If the key is no path hierarchy (i.e. contains no slash "/"), then it will be completely removed. - * **bool** -- one of the strings `"true"` or `"false"`. - * **name** -- a valid PDF name with a leading slash like this: `"/PageLayout"`. See page 16 of the :ref:`AdobeManual`. - * **string** -- a valid PDF string. **All PDF strings must be enclosed by brackets**. Denote the empty string as `"()"`. Depending on its content, the possible brackets are - - - "(...)" for ASCII-only text. Reserved PDF characters must be backslash-escaped and non-ASCII characters must be provided as 3-digit backslash-escaped octals -- including leading zeros. Example: 12 = 0x0C must be encoded as `\014`. - - "<...>" for hex-encoded text. Every character must be represented by two hex-digits (lower or upper case). - - - If in doubt, we **strongly recommend** to use :meth:`get_pdf_str`! This function automatically generates the right brackets, escapes, and overall format. It will for example do conversions like these: + :rtype: list - >>> # because of the € symbol, the following yields UTF-16BE BOM - >>> fitz.get_pdf_str("Pay in $ or €.") - '' - >>> # escapes for brackets and non-ASCII - >>> fitz.get_pdf_str("Prices in EUR (USD also accepted). Areas are in m².") - '(Prices in EUR \\(USD also accepted\\). Areas are in m\\262.)' + :returns: a list of lists. Each entry has the form *[lvl, title, page, dest]*. Its entries have the following meanings: + * *lvl* -- hierarchy level (positive *int*). The first entry is always 1. Entries in a row are either **equal**, **increase** by 1, or **decrease** by any number. + * *title* -- title (*str*) + * *page* -- 1-based source page number (*int*). `-1` if no destination or outside document. + * *dest* -- (*dict*) included only if *simple=False*. Contains details of the TOC item as follows: - .. method:: get_page_pixmap(pno: int, *, matrix: matrix_like = Identity, dpi=None, colorspace: Colorspace = csRGB, clip: rect_like = None, alpha: bool = False, annots: bool = True) + - kind: destination kind, see :ref:`linkDest Kinds`. + - file: filename if kind is :data:`LINK_GOTOR` or :data:`LINK_LAUNCH`. + - page: target page, 0-based, :data:`LINK_GOTOR` or :data:`LINK_GOTO` only. + - to: position on target page (:ref:`Point`). + - zoom: (float) zoom factor on target page. + - xref: :data:`xref` of the item (0 if no PDF). + - color: item color in PDF RGB format `(red, green, blue)`, or omitted (always omitted if no PDF). + - bold: true if bold item text or omitted. PDF only. + - italic: true if italic item text, or omitted. PDF only. + - collapse: true if sub-items are folded, or omitted. PDF only. + - nameddest: target name if kind=4. PDF only. (New in 1.23.7.) - Creates a pixmap from page *pno* (zero-based). Invokes :meth:`Page.get_pixmap`. - All parameters except `pno` are *keyword-only.* + .. method:: xref_get_keys(xref) - :arg int pno: page number, 0-based in `-∞ < pno < page_count`. + * New in v1.18.7 - :rtype: :ref:`Pixmap` + PDF only: Return the PDF dictionary keys of the :data:`dictionary` object provided by its xref number. - .. method:: get_page_xobjects(pno) + :arg int xref: the :data:`xref`. *(Changed in v1.18.10)* Use `-1` to access the special dictionary "PDF trailer". - * New in v1.16.13 - * Changed in v1.18.11 + :returns: a tuple of dictionary keys present in object :data:`xref`. Examples: - PDF only: Return a list of all XObjects referenced by a page. + >>> from pprint import pprint + >>> import pymupdf + >>> doc=pymupdf.open("pymupdf.pdf") + >>> xref = doc.page_xref(0) # xref of page 0 + >>> pprint(doc.xref_get_keys(xref)) # primary level keys of a page + ('Type', 'Contents', 'Resources', 'MediaBox', 'Parent') + >>> pprint(doc.xref_get_keys(-1)) # primary level keys of the trailer + ('Type', 'Index', 'Size', 'W', 'Root', 'Info', 'ID', 'Length', 'Filter') + >>> - :arg int pno: page number, 0-based, `-∞ < pno < page_count`. - :rtype: list - :returns: a list of (non-image) XObjects. These objects typically represent pages *embedded* (not copied) from other PDFs. For example, :meth:`Page.show_pdf_page` will create this type of object. An item of this list has the following layout: `(xref, name, invoker, bbox)`, where + .. method:: xref_get_key(xref, key) + + * New in v1.18.7 + + PDF only: Return type and value of a PDF dictionary key of a :data:`dictionary` object given by its xref. + + :arg int xref: the :data:`xref`. *Changed in v1.18.10:* Use `-1` to access the special dictionary "PDF trailer". + + :arg str key: the desired PDF key. Must **exactly** match (case-sensitive) one of the keys contained in :meth:`Document.xref_get_keys`. + + :rtype: tuple + + :returns: A tuple (type, value) of strings, where type is one of "xref", "array", "dict", "int", "float", "null", "bool", "name", "string" or "unknown" (should not occur). Independent of "type", the value of the key is **always** formatted as a string -- see the following example -- and (almost always) a faithful reflection of what is stored in the PDF. In most cases, the format of the value string also gives a clue about the key type: + + * A "name" always starts with a "/" slash. + * An "xref" always ends with " 0 R". + * An "array" is always enclosed in "[...]" brackets. + * A "dict" is always enclosed in "<<...>>" brackets. + * A "bool", resp. "null" always equal either "true", "false", resp. "null". + * "float" and "int" are represented by their string format -- and are thus not always distinguishable. + * A "string" is converted to UTF-8 and may therefore deviate from what is stored in the PDF. For example, the PDF key "Author" may have a value of "" in the file, but the method will return `('string', 'Jorj X. McKie')`. + + >>> for key in doc.xref_get_keys(xref): + print(key, "=" , doc.xref_get_key(xref, key)) + Type = ('name', '/Page') + Contents = ('xref', '1297 0 R') + Resources = ('xref', '1296 0 R') + MediaBox = ('array', '[0 0 612 792]') + Parent = ('xref', '1301 0 R') + >>> # + >>> # Now same thing for the PDF trailer. + >>> # It has no xref, so -1 must be used instead. + >>> # + >>> for key in doc.xref_get_keys(-1): + print(key, "=", doc.xref_get_key(-1, key)) + Type = ('name', '/XRef') + Index = ('array', '[0 8802]') + Size = ('int', '8802') + W = ('array', '[1 3 1]') + Root = ('xref', '8799 0 R') + Info = ('xref', '8800 0 R') + ID = ('array', '[]') + Length = ('int', '21111') + Filter = ('name', '/FlateDecode') + >>> - * **xref** (*int*) is the XObject's :data:`xref`. - * **name** (*str*) is the symbolic name to reference the XObject. - * **invoker** (*int*) the :data:`xref` of the invoking XObject or zero if the page directly invokes it. - * **bbox** (:ref:`Rect`) the boundary box of the XObject's location on the page **in untransformed coordinates**. To get actual, non-rotated page coordinates, multiply with the page's transformation matrix :attr:`Page.transformation_matrix`. *Changed in v.18.11:* the bbox is now formatted as :ref:`Rect`. + .. method:: xref_set_key(xref, key, value) - .. method:: get_page_images(pno, full=False) + * New in v1.18.7, changed in v 1.18.13 + * Changed in v1.19.4: remove a key "physically" if set to "null". - PDF only: Return a list of all images (directly or indirectly) referenced by the page. + PDF only: Set (add, update, delete) the value of a PDF key for the :data:`dictionary` object given by its xref. - :arg int pno: page number, 0-based, `-∞ < pno < page_count`. - :arg bool full: whether to also include the referencer's :data:`xref` (which is zero if this is the page). + .. caution:: This is an expert function: if you do not know what you are doing, there is a high risk to render (parts of) the PDF unusable. Please do consult :ref:`AdobeManual` about object specification formats (page 18) and the structure of special dictionary types like page objects. - :rtype: list + :arg int xref: the :data:`xref`. *Changed in v1.18.13:* To update the PDF trailer, specify -1. + :arg str key: the desired PDF key (without leading "/"). Must not be empty. Any valid PDF key -- whether already present in the object (which will be overwritten) -- or new. It is possible to use PDF path notation like `"Resources/ExtGState"` -- which sets the value for key `"/ExtGState"` as a sub-object of `"/Resources"`. + :arg str value: the value for the key. It must be a non-empty string and, depending on the desired PDF object type, the following rules must be observed. There is some syntax checking, but **no type checking** and no checking if it makes sense PDF-wise, i.e. **no semantics checking**. Upper / lower case is important! - :returns: a list of images **referenced** by this page. Each item looks like + * *:data:`xref`* -- must be provided as `"nnn 0 R"` with a valid :data:`xref` number nnn of the PDF. The suffix "`0 R`" is required to be recognizable as an xref by PDF applications. + * **array** -- a string like `"[a b c d e f]"`. The brackets are required. Array items must be separated by at least one space (not commas like in Python). An empty array `"[]"` is possible and *equivalent* to removing the key. Array items may be any PDF objects, like dictionaries, xrefs, other arrays, etc. Like in Python, array items may be of different types. + * **dict** -- a string like `"<< ... >>"`. The brackets are required and must enclose a valid PDF dictionary definition. The empty dictionary `"<<>>"` is possible and *equivalent* to removing the key. + * **int** -- an integer formatted **as a string**. + * **float** -- a float formatted **as a string**. Scientific notation (with exponents) is **not allowed by PDF**. + * **null** -- the string `"null"`. This is the PDF equivalent to Python's `None` and causes the key to be ignored -- however not necessarily removed, resp. removed on saves with garbage collection. *Changed in v1.19.4:* If the key is no path hierarchy (i.e. contains no slash "/"), then it will be completely removed. + * **bool** -- one of the strings `"true"` or `"false"`. + * **name** -- a valid PDF name with a leading slash like this: `"/PageLayout"`. See page 16 of the :ref:`AdobeManual`. + * **string** -- a valid PDF string. **All PDF strings must be enclosed by brackets**. Denote the empty string as `"()"`. Depending on its content, the possible brackets are - `(xref, smask, width, height, bpc, colorspace, alt. colorspace, name, filter, referencer)` + - "(...)" for ASCII-only text. Reserved PDF characters must be backslash-escaped and non-ASCII characters must be provided as 3-digit backslash-escaped octals -- including leading zeros. Example: 12 = 0x0C must be encoded as `\014`. + - "<...>" for hex-encoded text. Every character must be represented by two hex-digits (lower or upper case). - Where + - If in doubt, we **strongly recommend** to use :meth:`get_pdf_str`! This function automatically generates the right brackets, escapes, and overall format. It will for example do conversions like these: - * **xref** (*int*) is the image object number - * **smask** (*int*) is the object number of its soft-mask image - * **width** and **height** (*ints*) are the image dimensions - * **bpc** (*int*) denotes the number of bits per component (normally 8) - * **colorspace** (*str*) a string naming the colorspace (like **DeviceRGB**) - * **alt. colorspace** (*str*) is any alternate colorspace depending on the value of **colorspace** - * **name** (*str*) is the symbolic name by which the image is referenced - * **filter** (*str*) is the decode filter of the image (:ref:`AdobeManual`, pp. 22). - * **referencer** (*int*) the :data:`xref` of the referencer. Zero if directly referenced by the page. Only present if *full=True*. + >>> # because of the € symbol, the following yields UTF-16BE BOM + >>> pymupdf.get_pdf_str("Pay in $ or €.") + '' + >>> # escapes for brackets and non-ASCII + >>> pymupdf.get_pdf_str("Prices in EUR (USD also accepted). Areas are in m².") + '(Prices in EUR \\(USD also accepted\\). Areas are in m\\262.)' - .. note:: In general, this is not the list of images that are **actually displayed**. This method only parses several PDF objects to collect references to embedded images. It does not analyse the page's :data:`contents`, where all the actual image display commands are defined. To get this information, please use :meth:`Page.get_image_info`. Also have a look at the discussion in section :ref:`textpagedict`. + .. method:: get_page_pixmap(pno: int, *, matrix: matrix_like = Identity, dpi=None, colorspace: Colorspace = csRGB, clip: rect_like = None, alpha: bool = False, annots: bool = True) - .. method:: get_page_fonts(pno, full=False) + Creates a pixmap from page *pno* (zero-based). Invokes :meth:`Page.get_pixmap`. - PDF only: Return a list of all fonts (directly or indirectly) referenced by the page. + All parameters except `pno` are *keyword-only.* - :arg int pno: page number, 0-based, `-∞ < pno < page_count`. - :arg bool full: whether to also include the referencer's :data:`xref`. If *True*, the returned items are one entry longer. Use this option if you need to know, whether the page directly references the font. In this case the last entry is 0. If the font is referenced by an `/XObject` of the page, you will find its :data:`xref` here. + :arg int pno: page number, 0-based in `-∞ < pno < page_count`. - :rtype: list + :rtype: :ref:`Pixmap` - :returns: a list of fonts referenced by this page. Each entry looks like + .. method:: get_page_xobjects(pno) - **(xref, ext, type, basefont, name, encoding, referencer)**, + * New in v1.16.13 + * Changed in v1.18.11 - where + PDF only: Return a list of all XObjects referenced by a page. - * **xref** (*int*) is the font object number (may be zero if the PDF uses one of the builtin fonts directly) - * **ext** (*str*) font file extension (e.g. "ttf", see :ref:`FontExtensions`) - * **type** (*str*) is the font type (like "Type1" or "TrueType" etc.) - * **basefont** (*str*) is the base font name, - * **name** (*str*) is the symbolic name, by which the font is referenced - * **encoding** (*str*) the font's character encoding if different from its built-in encoding (:ref:`AdobeManual`, p. 254): - * **referencer** (*int* optional) the :data:`xref` of the referencer. Zero if directly referenced by the page, otherwise the xref of an XObject. Only present if *full=True*. + :arg int pno: page number, 0-based, `-∞ < pno < page_count`. - Example:: + :rtype: list + :returns: a list of (non-image) XObjects. These objects typically represent pages *embedded* (not copied) from other PDFs. For example, :meth:`Page.show_pdf_page` will create this type of object. An item of this list has the following layout: `(xref, name, invoker, bbox)`, where - >>> pprint(doc.get_page_fonts(0, full=False)) - [(12, 'ttf', 'TrueType', 'FNUUTH+Calibri-Bold', 'R8', ''), - (13, 'ttf', 'TrueType', 'DOKBTG+Calibri', 'R10', ''), - (14, 'ttf', 'TrueType', 'NOHSJV+Calibri-Light', 'R12', ''), - (15, 'ttf', 'TrueType', 'NZNDCL+CourierNewPSMT', 'R14', ''), - (16, 'ttf', 'Type0', 'MNCSJY+SymbolMT', 'R17', 'Identity-H'), - (17, 'cff', 'Type1', 'UAEUYH+Helvetica', 'R20', 'WinAnsiEncoding'), - (18, 'ttf', 'Type0', 'ECPLRU+Calibri', 'R23', 'Identity-H'), - (19, 'ttf', 'Type0', 'TONAYT+CourierNewPSMT', 'R27', 'Identity-H')] + * *:data:`xref`* (*int*) is the XObject's :data:`xref`. + * **name** (*str*) is the symbolic name to reference the XObject. + * **invoker** (*int*) the :data:`xref` of the invoking XObject or zero if the page directly invokes it. + * **bbox** (:ref:`Rect`) the boundary box of the XObject's location on the page **in untransformed coordinates**. To get actual, non-rotated page coordinates, multiply with the page's transformation matrix :attr:`Page.transformation_matrix`. *Changed in v.18.11:* the bbox is now formatted as :ref:`Rect`. - .. note:: - * This list has no duplicate entries: the combination of :data:`xref`, *name* and *referencer* is unique. - * In general, this is a superset of the fonts actually in use by this page. The PDF creator may e.g. have specified some global list, of which each page only makes partial use. - .. method:: get_page_text(pno, output="text", flags=3, textpage=None, sort=False) + .. method:: get_page_images(pno, full=False) - Extracts the text of a page given its page number *pno* (zero-based). Invokes :meth:`Page.get_text`. + PDF only: Return a list of all images (directly or indirectly) referenced by the page. - :arg int pno: page number, 0-based, any value `-∞ < pno < page_count`. + :arg int pno: page number, 0-based, `-∞ < pno < page_count`. + :arg bool full: whether to also include the referencer's :data:`xref` (which is zero if this is the page). - For other parameter refer to the page method. + :rtype: list - :rtype: str + :returns: a list of images **referenced** by this page. Each item looks like: - .. index:: - pair: fontsize; Document.layout - pair: rect; Document.layout - pair: width; Document.layout - pair: height; Document.layout + `(xref, smask, width, height, bpc, colorspace, alt_colorspace, name, filter, referencer)` - .. method:: layout(rect=None, width=0, height=0, fontsize=11) + * ``xref`` (*int*) is the image object number + * ``smask`` (*int*) is the object number of its soft-mask image + * ``width`` (*int*) is the image width + * ``height`` (*int*) is the image height + * ``bpc`` (*int*) denotes the number of bits per component (normally 8) + * ``colorspace`` (*str*) a string naming the colorspace (like **DeviceRGB**) + * ``alt_colorspace`` (*str*) is any alternate colorspace depending on the value of **colorspace** + * ``name`` (*str*) is the symbolic name by which the image is referenced + * ``filter`` (*str*) is the decode filter of the image (:ref:`AdobeManual`, pp. 22). + * ``referencer`` (*int*) the :data:`xref` of the referencer. Zero if directly referenced by the page. Only present if *full=True*. - Re-paginate ("reflow") the document based on the given page dimension and fontsize. This only affects some document types like e-books and HTML. Ignored if not supported. Supported documents have *True* in property :attr:`is_reflowable`. + .. note:: In general, this is not the list of images that are **actually displayed**. This method only parses several PDF objects to collect references to embedded images. It does not analyse the page's :data:`contents`, where all the actual image display commands are defined. To get this information, please use :meth:`Page.get_image_info`. Also have a look at the discussion in section :ref:`textpagedict`. - :arg rect_like rect: desired page size. Must be finite, not empty and start at point (0, 0). - :arg float width: use it together with *height* as alternative to *rect*. - :arg float height: use it together with *width* as alternative to *rect*. - :arg float fontsize: the desired default fontsize. - .. method:: select(s) + .. method:: get_page_fonts(pno, full=False) - PDF only: Keeps only those pages of the document whose numbers occur in the list. Empty sequences or elements outside `range(doc.page_count)` will cause a *ValueError*. For more details see remarks at the bottom or this chapter. + PDF only: Return a list of all fonts (directly or indirectly) referenced by the page object definition. - :arg sequence s: The sequence (see :ref:`SequenceTypes`) of page numbers (zero-based) to be included. Pages not in the sequence will be deleted (from memory) and become unavailable until the document is reopened. **Page numbers can occur multiple times and in any order:** the resulting document will reflect the sequence exactly as specified. + :arg int pno: page number, 0-based, `-∞ < pno < page_count`. + :arg bool full: whether to also include the referencer's :data:`xref`. If ``True``, the returned items are one entry longer. Use this option if you need to know, whether the page directly references the font. In this case the last entry is 0. If the font is referenced by an `/XObject` of the page, you will find its :data:`xref` here. - .. note:: + :rtype: list + + :returns: a list of fonts referenced by the object definition of the page. Each entry looks like: - * Page numbers in the sequence need not be unique nor be in any particular order. This makes the method a versatile utility to e.g. select only the even or the odd pages or meeting some other criteria and so forth. + `(xref, ext, type, basefont, name, encoding, referencer)` - * On a technical level, the method will always create a new :data:`pagetree`. + * ``xref`` (*int*) is the font object number (may be zero if the PDF uses one of the builtin fonts directly) + * ``ext`` (*str*) font file extension (e.g. "ttf", see :ref:`FontExtensions`) + * ``type`` (*str*) is the font type (like "Type1" or "TrueType" etc.) + * ``basefont`` (*str*) is the base font name, + * ``name`` (*str*) is the symbolic name, by which the font is referenced + * ``encoding`` (*str*) the font's character encoding if different from its built-in encoding (:ref:`AdobeManual`, p. 254): + * ``referencer`` (*int* optional) the :data:`xref` of the referencer. Zero if directly referenced by the page, otherwise the xref of an XObject. Only present if *full=True*. - * When dealing with only a few pages, methods :meth:`copy_page`, :meth:`move_page`, :meth:`delete_page` are easier to use. In fact, they are also **much faster** -- by at least one order of magnitude when the document has many pages. + Example:: + >>> pprint(doc.get_page_fonts(0, full=False)) + [(12, 'ttf', 'TrueType', 'FNUUTH+Calibri-Bold', 'R8', ''), + (13, 'ttf', 'TrueType', 'DOKBTG+Calibri', 'R10', ''), + (14, 'ttf', 'TrueType', 'NOHSJV+Calibri-Light', 'R12', ''), + (15, 'ttf', 'TrueType', 'NZNDCL+CourierNewPSMT', 'R14', ''), + (16, 'ttf', 'Type0', 'MNCSJY+SymbolMT', 'R17', 'Identity-H'), + (17, 'cff', 'Type1', 'UAEUYH+Helvetica', 'R20', 'WinAnsiEncoding'), + (18, 'ttf', 'Type0', 'ECPLRU+Calibri', 'R23', 'Identity-H'), + (19, 'ttf', 'Type0', 'TONAYT+CourierNewPSMT', 'R27', 'Identity-H')] - .. method:: set_metadata(m) + .. note:: + * This list has no duplicate entries: the combination of :data:`xref`, *name* and *referencer* is unique. + * In general, this is a true superset of the fonts actually in use by this page. The PDF creator may e.g. have specified some global list, of which each page make only partial use. + * Be aware that font names returned by some variants of :meth:`Page.get_text` (respectively :ref:`TextPage` methods) need not (exactly) equal the base font name shown here. Reasons for any differences include: - PDF only: Sets or updates the metadata of the document as specified in *m*, a Python dictionary. + - This method always shows any subset prefixes (the pattern ``ABCDEF+``), whereas text extractions do not do this by default. + - Text extractions use the base library to access the font name, which has a length cap of 31 bytes and generally interrogates the font file binary to access the name. Method ``get_page_fonts()`` however looks at the PDF definition source. + - Text extractions work for all supported document types in exactly the same way -- not just for PDFs. Consequently they do not contain PDF-specifics. - :arg dict m: A dictionary with the same keys as *metadata* (see below). All keys are optional. A PDF's format and encryption method cannot be set or changed and will be ignored. If any value should not contain data, do not specify its key or set the value to `None`. If you use *{}* all metadata information will be cleared to the string *"none"*. If you want to selectively change only some values, modify a copy of *doc.metadata* and use it as the argument. Arbitrary unicode values are possible if specified as UTF-8-encoded. + .. method:: get_page_text(pno, output="text", flags=3, textpage=None, sort=False) - *(Changed in v1.18.4)* Empty values or "none" are no longer written, but completely omitted. + Extracts the text of a page given its page number *pno* (zero-based). Invokes :meth:`Page.get_text`. - .. method:: get_xml_metadata() + :arg int pno: page number, 0-based, any value `-∞ < pno < page_count`. - PDF only: Get the document XML metadata. + For other parameter refer to the page method. - :rtype: str - :returns: XML metadata of the document. Empty string if not present or not a PDF. + :rtype: str - .. method:: set_xml_metadata(xml) + .. index:: + pair: fontsize; Document.layout + pair: rect; Document.layout + pair: width; Document.layout + pair: height; Document.layout - PDF only: Sets or updates XML metadata of the document. + .. method:: layout(rect=None, width=0, height=0, fontsize=11) - :arg str xml: the new XML metadata. Should be XML syntax, however no checking is done by this method and any string is accepted. + Re-paginate ("reflow") the document based on the given page dimension and fontsize. This only affects some document types like e-books and HTML. Ignored if not supported. Supported documents have ``True`` in property :attr:`is_reflowable`. + :arg rect_like rect: desired page size. Must be finite, not empty and start at point (0, 0). + :arg float width: use it together with ``height`` as alternative to ``rect``. + :arg float height: use it together with ``width`` as alternative to ``rect``. + :arg float fontsize: the desired default fontsize. - .. method:: set_pagelayout(value) + .. method:: select(s) - * New in v1.22.2 + PDF only: Keeps only those pages of the document whose numbers occur in the list. Empty sequences or elements outside `range(doc.page_count)` will cause a *ValueError*. For more details see remarks at the bottom or this chapter. - PDF only: Set the `/PageLayout`. + :arg sequence s: The sequence (see :ref:`SequenceTypes`) of page numbers (zero-based) to be included. Pages not in the sequence will be deleted (from memory) and become unavailable until the document is reopened. **Page numbers can occur multiple times and in any order:** the resulting document will reflect the sequence exactly as specified. - :arg str value: one of the strings "SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight". Lower case is supported. + .. note:: + * Page numbers in the sequence need not be unique nor be in any particular order. This makes the method a versatile utility to e.g. select only the even or the odd pages or meeting some other criteria and so forth. - .. method:: set_pagemode(value) + * On a technical level, the method will always create a new :data:`pagetree`. - * New in v1.22.2 + * When dealing with only a few pages, methods :meth:`copy_page`, :meth:`move_page`, :meth:`delete_page` are easier to use. In fact, they are also **much faster** -- by at least one order of magnitude when the document has many pages. - PDF only: Set the `/PageMode`. - :arg str value: one of the strings "UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments". Lower case is supported. + .. method:: set_metadata(m) + PDF only: Sets or updates the metadata of the document as specified in *m*, a Python dictionary. - .. method:: set_markinfo(value) + :arg dict m: A dictionary with the same keys as *metadata* (see below). All keys are optional. A PDF's format and encryption method cannot be set or changed and will be ignored. If any value should not contain data, do not specify its key or set the value to `None`. If you use *{}* all metadata information will be cleared to the string *"none"*. If you want to selectively change only some values, modify a copy of *doc.metadata* and use it as the argument. Arbitrary unicode values are possible if specified as UTF-8-encoded. - * New in v1.22.2 + *(Changed in v1.18.4)* Empty values or "none" are no longer written, but completely omitted. - PDF only: Set the `/MarkInfo` values. + .. method:: get_xml_metadata() - :arg dict value: a dictionary like this one: `{"Marked": False, "UserProperties": False, "Suspects": False}`. This dictionary contains information about the usage of Tagged PDF conventions. For details please see the `PDF specifications `_. + PDF only: Get the document XML metadata. + :rtype: str + :returns: XML metadata of the document. Empty string if not present or not a PDF. - .. method:: set_toc(toc, collapse=1) + .. method:: set_xml_metadata(xml) - PDF only: Replaces the **complete current outline** tree (table of contents) with the one provided as the argument. After successful execution, the new outline tree can be accessed as usual via :meth:`Document.get_toc` or via :attr:`Document.outline`. Like with other output-oriented methods, changes become permanent only via :meth:`save` (incremental save supported). Internally, this method consists of the following two steps. For a demonstration see example below. + PDF only: Sets or updates XML metadata of the document. - - Step 1 deletes all existing bookmarks. + :arg str xml: the new XML metadata. Should be XML syntax, however no checking is done by this method and any string is accepted. - - Step 2 creates a new TOC from the entries contained in *toc*. - :arg sequence toc: + .. method:: set_pagelayout(value) - A list / tuple with **all bookmark entries** that should form the new table of contents. Output variants of :meth:`get_toc` are acceptable. To completely remove the table of contents specify an empty sequence or None. Each item must be a list with the following format. + * New in v1.22.2 - * [lvl, title, page [, dest]] where + PDF only: Set the `/PageLayout`. - - **lvl** is the hierarchy level (int > 0) of the item, which **must be 1** for the first item and at most 1 larger than the previous one. + :arg str value: one of the strings "SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight". Lower case is supported. - - **title** (str) is the title to be displayed. It is assumed to be UTF-8-encoded (relevant for multibyte code points only). - - **page** (int) is the target page number **(attention: 1-based)**. Must be in valid range if positive. Set it to -1 if there is no target, or the target is external. + .. method:: set_pagemode(value) - - **dest** (optional) is a dictionary or a number. If a number, it will be interpreted as the desired height (in points) this entry should point to on the page. Use a dictionary (like the one given as output by `get_toc(False)`) for a detailed control of the bookmark's properties, see :meth:`Document.get_toc` for a description. + * New in v1.22.2 - :arg int collapse: *(new in v1.16.9)* controls the hierarchy level beyond which outline entries should initially show up collapsed. The default 1 will hence only display level 1, higher levels must be unfolded using the PDF viewer. To unfold everything, specify either a large integer, 0 or None. + PDF only: Set the `/PageMode`. - :rtype: int - :returns: the number of inserted, resp. deleted items. + :arg str value: one of the strings "UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments". Lower case is supported. - .. method:: outline_xref(idx) - * New in v1.17.7 + .. method:: set_markinfo(value) - PDF only: Return the :data:`xref` of the outline item. This is mainly used for internal purposes. + * New in v1.22.2 - arg int idx: index of the item in list :meth:`Document.get_toc`. + PDF only: Set the `/MarkInfo` values. - :returns: :data:`xref`. + :arg dict value: a dictionary like this one: `{"Marked": False, "UserProperties": False, "Suspects": False}`. This dictionary contains information about the usage of Tagged PDF conventions. For details please see the `PDF specifications `_. - .. method:: del_toc_item(idx) - * New in v1.17.7 - * Changed in v1.18.14: no longer remove the item's text, but show it grayed-out. + .. method:: set_toc(toc, collapse=1) - PDF only: Remove this TOC item. This is a high-speed method, which **disables** the respective item, but leaves the overall TOC structure intact. Physically, the item still exists in the TOC tree, but is shown grayed-out and will no longer point to any destination. + PDF only: Replaces the **complete current outline** tree (table of contents) with the one provided as the argument. After successful execution, the new outline tree can be accessed as usual via :meth:`Document.get_toc` or via :attr:`Document.outline`. Like with other output-oriented methods, changes become permanent only via :meth:`save` (incremental save supported). Internally, this method consists of the following two steps. For a demonstration see example below. - This also implies that you can reassign the item to a new destination using :meth:`Document.set_toc_item`, when required. + - Step 1 deletes all existing bookmarks. - :arg int idx: the index of the item in list :meth:`Document.get_toc`. + - Step 2 creates a new TOC from the entries contained in *toc*. + :arg sequence toc: - .. method:: set_toc_item(idx, dest_dict=None, kind=None, pno=None, uri=None, title=None, to=None, filename=None, zoom=0) + A list / tuple with **all bookmark entries** that should form the new table of contents. Output variants of :meth:`get_toc` are acceptable. To completely remove the table of contents specify an empty sequence or None. Each item must be a list with the following format. - * New in v1.17.7 - * Changed in v1.18.6 + * [lvl, title, page [, dest]] where - PDF only: Changes the TOC item identified by its index. Change the item **title**, **destination**, **appearance** (color, bold, italic) or collapsing sub-items -- or to remove the item altogether. + - **lvl** is the hierarchy level (int > 0) of the item, which **must be 1** for the first item and at most 1 larger than the previous one. - Use this method if you need specific changes for selected entries only and want to avoid replacing the complete TOC. This is beneficial especially when dealing with large table of contents. + - **title** (str) is the title to be displayed. It is assumed to be UTF-8-encoded (relevant for multibyte code points only). - :arg int idx: the index of the entry in the list created by :meth:`Document.get_toc`. - :arg dict dest_dict: the new destination. A dictionary like the last entry of an item in `doc.get_toc(False)`. Using this as a template is recommended. When given, **all other parameters are ignored** -- except title. - :arg int kind: the link kind, see :ref:`linkDest Kinds`. If :data:`LINK_NONE`, then all remaining parameter will be ignored, and the TOC item will be removed -- same as :meth:`Document.del_toc_item`. If None, then only the title is modified and the remaining parameters are ignored. All other values will lead to making a new destination dictionary using the subsequent arguments. - :arg int pno: the 1-based page number, i.e. a value 1 <= pno <= doc.page_count. Required for LINK_GOTO. - :arg str uri: the URL text. Required for LINK_URI. - :arg str title: the desired new title. None if no change. - :arg point_like to: (optional) points to a coordinate on the target page. Relevant for LINK_GOTO. If omitted, a point near the page's top is chosen. - :arg str filename: required for LINK_GOTOR and LINK_LAUNCH. - :arg float zoom: use this zoom factor when showing the target page. + - **page** (int) is the target page number **(attention: 1-based)**. Must be in valid range if positive. Set it to -1 if there is no target, or the target is external. - **Example use:** Change the TOC of the SWIG manual to achieve this: + - **dest** (optional) is a dictionary or a number. If a number, it will be interpreted as the desired height (in points) this entry should point to on the page. Use a dictionary (like the one given as output by `get_toc(False)`) for a detailed control of the bookmark's properties, see :meth:`Document.get_toc` for a description. - Collapse everything below top level and show the chapter on Python support in red, bold and italic:: + :arg int collapse: *(new in v1.16.9)* controls the hierarchy level beyond which outline entries should initially show up collapsed. The default 1 will hence only display level 1, higher levels must be unfolded using the PDF viewer. To unfold everything, specify either a large integer, 0 or None. - >>> import fitz - >>> doc=fitz.open("SWIGDocumentation.pdf") - >>> toc = doc.get_toc(False) # we need the detailed TOC - >>> # list of level 1 indices and their titles - >>> lvl1 = [(i, item[1]) for i, item in enumerate(toc) if item[0] == 1] - >>> for i, title in lvl1: - d = toc[i][3] # get the destination dict - d["collapse"] = True # collapse items underneath - if "Python" in title: # show the 'Python' chapter - d["color"] = (1, 0, 0) # in red, - d["bold"] = True # bold and - d["italic"] = True # italic - doc.set_toc_item(i, dest_dict=d) # update this toc item - >>> doc.save("NEWSWIG.pdf",garbage=3,deflate=True) + :rtype: int + :returns: the number of inserted, resp. deleted items. - In the previous example, we have changed only 42 of the 1240 TOC items of the file. + Changed in v1.23.8: Destination 'to' coordinates should now be in the + same coordinate system as those returned by `get_toc()` (internally they + are now transformed with `page.cropbox` and `page.rotation_matrix`). So + for example `set_toc(get_toc())` now gives unchanged destination 'to' + coordinates. - .. method:: can_save_incrementally() + .. method:: outline_xref(idx) - * New in v1.16.0 + * New in v1.17.7 - Check whether the document can be saved incrementally. Use it to choose the right option without encountering exceptions. + PDF only: Return the :data:`xref` of the outline item. This is mainly used for internal purposes. - .. method:: scrub(attached_files=True, clean_pages=True, embedded_files=True, hidden_text=True, javascript=True, metadata=True, redactions=True, redact_images=0, remove_links=True, reset_fields=True, reset_responses=True, thumbnails=True, xml_metadata=True) + :arg int idx: index of the item in list :meth:`Document.get_toc`. - * New in v1.16.14 - - PDF only: Remove potentially sensitive data from the PDF. This function is inspired by the similar "Sanitize" function in Adobe Acrobat products. The process is configurable by a number of options. + :returns: :data:`xref`. - :arg bool attached_files: Search for 'FileAttachment' annotations and remove the file content. - :arg bool clean_pages: Remove any comments from page painting sources. If this option is set to *False*, then this is also done for *hidden_text* and *redactions*. - :arg bool embedded_files: Remove embedded files. - :arg bool hidden_text: Remove OCRed text and invisible text [#f7]_. - :arg bool javascript: Remove JavaScript sources. - :arg bool metadata: Remove PDF standard metadata. - :arg bool redactions: Apply redaction annotations. - :arg int redact_images: how to handle images if applying redactions. One of 0 (ignore), 1 (blank out overlaps) or 2 (remove). - :arg bool remove_links: Remove all links. - :arg bool reset_fields: Reset all form fields to their defaults. - :arg bool reset_responses: Remove all responses from all annotations. - :arg bool thumbnails: Remove thumbnail images from pages. - :arg bool xml_metadata: Remove XML metadata. + .. method:: del_toc_item(idx) + * New in v1.17.7 + * Changed in v1.18.14: no longer remove the item's text, but show it grayed-out. - .. method:: save(outfile, garbage=0, clean=False, deflate=False, deflate_images=False, deflate_fonts=False, incremental=False, ascii=False, expand=0, linear=False, pretty=False, no_new_id=False, encryption=PDF_ENCRYPT_NONE, permissions=-1, owner_pw=None, user_pw=None) + PDF only: Remove this TOC item. This is a high-speed method, which **disables** the respective item, but leaves the overall TOC structure intact. Physically, the item still exists in the TOC tree, but is shown grayed-out and will no longer point to any destination. - * Changed in v1.18.7 - * Changed in v1.19.0 + This also implies that you can reassign the item to a new destination using :meth:`Document.set_toc_item`, when required. - PDF only: Saves the document in its **current state**. + :arg int idx: the index of the item in list :meth:`Document.get_toc`. - :arg str,Path,fp outfile: The file path, `pathlib.Path` or file object to save to. A file object must have been created before via `open(...)` or `io.BytesIO()`. Choosing `io.BytesIO()` is similar to :meth:`Document.tobytes` below, which equals the `getvalue()` output of an internally created `io.BytesIO()`. - :arg int garbage: Do garbage collection. Positive values exclude "incremental". + .. method:: set_toc_item(idx, dest_dict=None, kind=None, pno=None, uri=None, title=None, to=None, filename=None, zoom=0) - * 0 = none - * 1 = remove unused (unreferenced) objects. - * 2 = in addition to 1, compact the :data:`xref` table. - * 3 = in addition to 2, merge duplicate objects. - * 4 = in addition to 3, check :data:`stream` objects for duplication. This may be slow because such data are typically large. + * New in v1.17.7 + * Changed in v1.18.6 - :arg bool clean: Clean and sanitize content streams [#f1]_. Corresponds to "mutool clean -sc". + PDF only: Changes the TOC item identified by its index. Change the item **title**, **destination**, **appearance** (color, bold, italic) or collapsing sub-items -- or to remove the item altogether. - :arg bool deflate: Deflate (compress) uncompressed streams. - :arg bool deflate_images: *(new in v1.18.3)* Deflate (compress) uncompressed image streams [#f4]_. - :arg bool deflate_fonts: *(new in v1.18.3)* Deflate (compress) uncompressed fontfile streams [#f4]_. + Use this method if you need specific changes for selected entries only and want to avoid replacing the complete TOC. This is beneficial especially when dealing with large table of contents. - :arg bool incremental: Only save changes to the PDF. Excludes "garbage" and "linear". Can only be used if *outfile* is a string or a `pathlib.Path` and equal to :attr:`Document.name`. Cannot be used for files that are decrypted or repaired and also in some other cases. To be sure, check :meth:`Document.can_save_incrementally`. If this is false, saving to a new file is required. + :arg int idx: the index of the entry in the list created by :meth:`Document.get_toc`. + :arg dict dest_dict: the new destination. A dictionary like the last entry of an item in `doc.get_toc(False)`. Using this as a template is recommended. When given, **all other parameters are ignored** -- except title. + :arg int kind: the link kind, see :ref:`linkDest Kinds`. If :data:`LINK_NONE`, then all remaining parameter will be ignored, and the TOC item will be removed -- same as :meth:`Document.del_toc_item`. If None, then only the title is modified and the remaining parameters are ignored. All other values will lead to making a new destination dictionary using the subsequent arguments. + :arg int pno: the 1-based page number, i.e. a value 1 <= pno <= doc.page_count. Required for LINK_GOTO. + :arg str uri: the URL text. Required for LINK_URI. + :arg str title: the desired new title. None if no change. + :arg point_like to: (optional) points to a coordinate on the target page. Relevant for LINK_GOTO. If omitted, a point near the page's top is chosen. + :arg str filename: required for LINK_GOTOR and LINK_LAUNCH. + :arg float zoom: use this zoom factor when showing the target page. - :arg bool ascii: convert binary data to ASCII. + **Example use:** Change the TOC of the SWIG manual to achieve this: - :arg int expand: Decompress objects. Generates versions that can be better read by some other programs and will lead to larger files. + Collapse everything below top level and show the chapter on Python support in red, bold and italic:: - * 0 = none - * 1 = images - * 2 = fonts - * 255 = all + >>> import pymupdf + >>> doc=pymupdf.open("SWIGDocumentation.pdf") + >>> toc = doc.get_toc(False) # we need the detailed TOC + >>> # list of level 1 indices and their titles + >>> lvl1 = [(i, item[1]) for i, item in enumerate(toc) if item[0] == 1] + >>> for i, title in lvl1: + d = toc[i][3] # get the destination dict + d["collapse"] = True # collapse items underneath + if "Python" in title: # show the 'Python' chapter + d["color"] = (1, 0, 0) # in red, + d["bold"] = True # bold and + d["italic"] = True # italic + doc.set_toc_item(i, dest_dict=d) # update this toc item + >>> doc.save("NEWSWIG.pdf",garbage=3,deflate=True) - :arg bool linear: Save a linearised version of the document. This option creates a file format for improved performance for Internet access. Excludes "incremental". + In the previous example, we have changed only 42 of the 1240 TOC items of the file. - :arg bool pretty: Prettify the document source for better readability. PDF objects will be reformatted to look like the default output of :meth:`Document.xref_object`. + .. method:: bake(*, annots=True, widgets=True) - :arg bool no_new_id: Suppress the update of the file's `/ID` field. If the file happens to have no such field at all, also suppress creation of a new one. Default is `False`, so every save will lead to an updated file identification. + PDF only: Convert annotations and / or widgets to become permanent parts of the pages. The PDF **will be changed** by this method. If `widgets` is `True`, the document will also no longer be a "Form PDF". + + All pages will look the same, but will no longer have annotations, respectively fields. The visible parts will be converted to standard text, vector graphics or images as required. - :arg int permissions: *(new in v1.16.0)* Set the desired permission levels. See :ref:`PermissionCodes` for possible values. Default is granting all. + The method may thus be a viable **alternative for PDF-to-PDF conversions** using :meth:`Document.convert_to_pdf`. - :arg int encryption: *(new in v1.16.0)* set the desired encryption method. See :ref:`EncryptionMethods` for possible values. + Please consider that annotations are complex objects and may consist of more data "underneath" their visual appearance. Examples are "Text" and "FileAttachment" annotations. When "baking in" annotations / widgets with this method, all this underlying information (attached files, comments, associated PopUp annotations, etc.) will be lost and be removed on next garbage collection. - :arg str owner_pw: *(new in v1.16.0)* set the document's owner password. *(Changed in v1.18.3)* If not provided, the user password is taken if provided. The string length must not exceed 40 characters. + Use this feature for instance for :meth:`Page.show_pdf_page` (which supports neither annotations nor widgets) when the source pages should look exactly the same in the target. - :arg str user_pw: *(new in v1.16.0)* set the document's user password. The string length must not exceed 40 characters. - .. note:: The method does not check, whether a file of that name already exists, will hence not ask for confirmation, and overwrite the file. It is your responsibility as a programmer to handle this. + :arg bool annots: convert annotations. + :arg bool widgets: convert fields / widgets. After execution, the document will no longer be a "Form PDF". - .. method:: ez_save(*args, **kwargs) - * New in v1.18.11 + .. method:: can_save_incrementally() - PDF only: The same as :meth:`Document.save` but with the changed defaults `deflate=True, garbage=3`. + * New in v1.16.0 - .. method:: saveIncr() + Check whether the document can be saved incrementally. Use it to choose the right option without encountering exceptions. - PDF only: saves the document incrementally. This is a convenience abbreviation for *doc.save(doc.name, incremental=True, encryption=PDF_ENCRYPT_KEEP)*. + .. method:: scrub(attached_files=True, clean_pages=True, embedded_files=True, hidden_text=True, javascript=True, metadata=True, redactions=True, redact_images=0, remove_links=True, reset_fields=True, reset_responses=True, thumbnails=True, xml_metadata=True) - .. note:: + * New in v1.16.14 + + PDF only: Remove potentially sensitive data from the PDF. This function is inspired by the similar "Sanitize" function in Adobe Acrobat products. The process is configurable by a number of options. + + :arg bool attached_files: Search for 'FileAttachment' annotations and remove the file content. + :arg bool clean_pages: Remove any comments from page painting sources. If this option is set to ``False``, then this is also done for *hidden_text* and *redactions*. + :arg bool embedded_files: Remove embedded files. + :arg bool hidden_text: Remove OCRed text and invisible text [#f7]_. + :arg bool javascript: Remove JavaScript sources. + :arg bool metadata: Remove PDF standard metadata. + :arg bool redactions: Apply redaction annotations. + :arg int redact_images: how to handle images if applying redactions. One of 0 (ignore), 1 (blank out overlaps) or 2 (remove). + :arg bool remove_links: Remove all links. + :arg bool reset_fields: Reset all form fields to their defaults. + :arg bool reset_responses: Remove all responses from all annotations. + :arg bool thumbnails: Remove thumbnail images from pages. + :arg bool xml_metadata: Remove XML metadata. - Saving incrementally may be required if the document contains verified signatures which would be invalidated by saving to a new file. + .. method:: save(outfile, garbage=0, clean=False, deflate=False, deflate_images=False, deflate_fonts=False, incremental=False, ascii=False, expand=0, linear=False, pretty=False, no_new_id=False, encryption=PDF_ENCRYPT_NONE, permissions=-1, owner_pw=None, user_pw=None, use_objstms=0) - .. method:: tobytes(garbage=0, clean=False, deflate=False, deflate_images=False, deflate_fonts=False, ascii=False, expand=0, linear=False, pretty=False, no_new_id=False, encryption=PDF_ENCRYPT_NONE, permissions=-1, owner_pw=None, user_pw=None) + * Changed in v1.18.7 + * Changed in v1.19.0 + * Changed in v1.24.1 - * Changed in v1.18.7 - * Changed in v1.19.0 + PDF only: Saves the document in its **current state**. - PDF only: Writes the **current content of the document** to a bytes object instead of to a file. Obviously, you should be wary about memory requirements. The meanings of the parameters exactly equal those in :meth:`save`. Chapter :ref:`FAQ` contains an example for using this method as a pre-processor to `pdfrw `_. + :arg str,Path,fp outfile: The file path, `pathlib.Path` or file object to save to. A file object must have been created before via `open(...)` or `io.BytesIO()`. Choosing `io.BytesIO()` is similar to :meth:`Document.tobytes` below, which equals the `getvalue()` output of an internally created `io.BytesIO()`. - *(Changed in v1.16.0)* for extended encryption support. + :arg int garbage: Do garbage collection. Positive values exclude "incremental". - :rtype: bytes - :returns: a bytes object containing the complete document. + * 0 = none + * 1 = remove unused (unreferenced) objects. + * 2 = in addition to 1, compact the :data:`xref` table. + * 3 = in addition to 2, merge duplicate objects. + * 4 = in addition to 3, check :data:`stream` objects for duplication. This may be slow because such data are typically large. - .. method:: search_page_for(pno, text, quads=False) + :arg bool clean: Clean and sanitize content streams [#f1]_. Corresponds to "mutool clean -sc". - Search for "text" on page number "pno". Works exactly like the corresponding :meth:`Page.search_for`. Any integer `-∞ < pno < page_count` is acceptable. + :arg bool deflate: Deflate (compress) uncompressed streams. + :arg bool deflate_images: *(new in v1.18.3)* Deflate (compress) uncompressed image streams [#f4]_. + :arg bool deflate_fonts: *(new in v1.18.3)* Deflate (compress) uncompressed fontfile streams [#f4]_. - .. index:: - pair: append; Document.insert_pdf - pair: join; Document.insert_pdf - pair: merge; Document.insert_pdf - pair: from_page; Document.insert_pdf - pair: to_page; Document.insert_pdf - pair: start_at; Document.insert_pdf - pair: rotate; Document.insert_pdf - pair: links; Document.insert_pdf - pair: annots; Document.insert_pdf - pair: show_progress; Document.insert_pdf + :arg bool incremental: Only save changes to the PDF. Excludes "garbage" and "linear". Can only be used if *outfile* is a string or a `pathlib.Path` and equal to :attr:`Document.name`. Cannot be used for files that are decrypted or repaired and also in some other cases. To be sure, check :meth:`Document.can_save_incrementally`. If this is false, saving to a new file is required. - .. method:: insert_pdf(docsrc, from_page=-1, to_page=-1, start_at=-1, rotate=-1, links=True, annots=True, show_progress=0, final=1) + :arg bool ascii: convert binary data to ASCII. - * Changed in v1.19.3 - as a fix to issue `#537 `_, form fields are always excluded. + :arg int expand: Decompress objects. Generates versions that can be better read by some other programs and will lead to larger files. - PDF only: Copy the page range **[from_page, to_page]** (including both) of PDF document *docsrc* into the current one. Inserts will start with page number *start_at*. Value -1 indicates default values. All pages thus copied will be rotated as specified. Links and annotations can be excluded in the target, see below. All page numbers are 0-based. + * 0 = none + * 1 = images + * 2 = fonts + * 255 = all - :arg docsrc: An opened PDF *Document* which must not be the current document. However, it may refer to the same underlying file. - :type docsrc: *Document* + :arg bool linear: Save a linearised version of the document. This option creates a file format for improved performance for Internet access. Excludes "incremental" and "use_objstms". - :arg int from_page: First page number in *docsrc*. Default is zero. + :arg bool pretty: Prettify the document source for better readability. PDF objects will be reformatted to look like the default output of :meth:`Document.xref_object`. - :arg int to_page: Last page number in *docsrc* to copy. Defaults to last page. + :arg bool no_new_id: Suppress the update of the file's `/ID` field. If the file happens to have no such field at all, also suppress creation of a new one. Default is `False`, so every save will lead to an updated file identification. - :arg int start_at: First copied page, will become page number *start_at* in the target. Default -1 appends the page range to the end. If zero, the page range will be inserted before current first page. + :arg int permissions: *(new in v1.16.0)* Set the desired permission levels. See :ref:`PermissionCodes` for possible values. Default is granting all. - :arg int rotate: All copied pages will be rotated by the provided value (degrees, integer multiple of 90). + :arg int encryption: *(new in v1.16.0)* set the desired encryption method. See :ref:`EncryptionMethods` for possible values. - :arg bool links: Choose whether (internal and external) links should be included in the copy. Default is *True*. Internal links to outside the copied page range are **always excluded**. - :arg bool annots: *(new in v1.16.1)* choose whether annotations should be included in the copy. *(Fixed in v1.19.3)* Form fields can never be copied. - :arg int show_progress: *(new in v1.17.7)* specify an interval size greater zero to see progress messages on `sys.stdout`. After each interval, a message like `Inserted 30 of 47 pages.` will be printed. - :arg int final: *(new in v1.18.0)* controls whether the list of already copied objects should be **dropped** after this method, default *True*. Set it to 0 except for the last one of multiple insertions from the same source PDF. This saves target file size and speeds up execution considerably. + :arg str owner_pw: *(new in v1.16.0)* set the document's owner password. *(Changed in v1.18.3)* If not provided, the user password is taken if provided. The string length must not exceed 40 characters. + + :arg str user_pw: *(new in v1.16.0)* set the document's user password. The string length must not exceed 40 characters. + + :arg int use_objstms: *(new in v1.24.0)* compression option that converts eligible PDF object definitions to information that is stored in some other object's :data:`stream` data. Depending on the `deflate` parameter value, the converted object definitions will be compressed -- which can lead to very significant file size reductions. + + .. warning:: The method does not check, whether a file of that name already exists, will hence not ask for confirmation, and overwrite the file. It is your responsibility as a programmer to handle this. .. note:: - 1. If *from_page > to_page*, pages will be **copied in reverse order**. If *0 <= from_page == to_page*, then one page will be copied. + **File size reduction** - 2. *docsrc* TOC entries **will not be copied**. It is easy however, to recover a table of contents for the resulting document. Look at the examples below and at program `join.py `_ in the *examples* directory: it can join PDF documents and at the same time piece together respective parts of the tables of contents. + 1. Use the save options like `garbage=3|4, deflate=True, use_objstms=True|1`. Do not touch the default values `expand=False|0, clean=False|0, incremental=False|0, linear=False|0`. + This is a "lossless" file size reduction. There is a convenience version of this method with these values set by default, :meth:`Document.ez_save` -- please see below. + 2. "Lossy" file size reduction in essence must give up something with respect to images, like (a) remove all images (b) replace images by their grayscale versions (c) reduce image resolutions. Find examples in the `PyMuPDF Utilities "replace-image" folder `_. - .. index:: - pair: append; Document.insert_file - pair: join; Document.insert_file - pair: merge; Document.insert_file - pair: from_page; Document.insert_file - pair: to_page; Document.insert_file - pair: start_at; Document.insert_file - pair: rotate; Document.insert_file - pair: links; Document.insert_file - pair: annots; Document.insert_file - pair: show_progress; Document.insert_file + .. method:: ez_save(*args, **kwargs) - .. method:: insert_file(infile, from_page=-1, to_page=-1, start_at=-1, rotate=-1, links=True, annots=True, show_progress=0, final=1) + * New in v1.18.11 - * New in v1.22.0 + PDF only: The same as :meth:`Document.save` but with changed defaults `deflate=True, garbage=3, use_objstms=1`. - PDF only: Add an arbitrary supported document to the current PDF. Opens "infile" as a document, converts it to a PDF and then invokes :meth:`Document.insert_pdf`. Parameters are the same as for that method. Among other things, this features an easy way to append images as full pages to an output PDF. + .. method:: saveIncr() - :arg multiple infile: the input document to insert. May be a filename specification as is valid for creating a :ref:`Document` or a :ref:`Pixmap`. + PDF only: saves the document incrementally. This is a convenience abbreviation for ``doc.save(doc.name, incremental=True, encryption=PDF_ENCRYPT_KEEP)``. + .. note:: - .. index:: - pair: width; Document.new_page - pair: height; Document.new_page + Saving incrementally may be required if the document contains verified signatures which would be invalidated by saving to a new file. - .. method:: new_page(pno=-1, width=595, height=842) - PDF only: Insert an empty page. + .. method:: tobytes(garbage=0, clean=False, deflate=False, deflate_images=False, deflate_fonts=False, ascii=False, expand=0, linear=False, pretty=False, no_new_id=False, encryption=PDF_ENCRYPT_NONE, permissions=-1, owner_pw=None, user_pw=None, use_objstms=0) - :arg int pno: page number in front of which the new page should be inserted. Must be in *1 < pno <= page_count*. Special values -1 and *doc.page_count* insert **after** the last page. + * Changed in v1.18.7 + * Changed in v1.19.0 + * Changed in v1.24.1 - :arg float width: page width. - :arg float height: page height. + PDF only: Writes the **current content of the document** to a bytes object instead of to a file. Obviously, you should be wary about memory requirements. The meanings of the parameters exactly equal those in :meth:`save`. Chapter :ref:`FAQ` contains an example for using this method as a pre-processor to `pdfrw `_. - :rtype: :ref:`Page` - :returns: the created page object. + *(Changed in v1.16.0)* for extended encryption support. - .. index:: - pair: fontsize; Document.insert_page - pair: width; Document.insert_page - pair: height; Document.insert_page - pair: fontname; Document.insert_page - pair: fontfile; Document.insert_page - pair: color; Document.insert_page + :rtype: bytes + :returns: a bytes object containing the complete document. - .. method:: insert_page(pno, text=None, fontsize=11, width=595, height=842, fontname="helv", fontfile=None, color=None) + .. method:: search_page_for(pno, text, quads=False) - PDF only: Insert a new page and insert some text. Convenience function which combines :meth:`Document.new_page` and (parts of) :meth:`Page.insert_text`. + Search for "text" on page number "pno". Works exactly like the corresponding :meth:`Page.search_for`. Any integer `-∞ < pno < page_count` is acceptable. - :arg int pno: page number (0-based) **in front of which** to insert. Must be in `range(-1, doc.page_count + 1)`. Special values -1 and `doc.page_count` insert **after** the last page. + .. index:: + pair: append; Document.insert_pdf + pair: join; Document.insert_pdf + pair: merge; Document.insert_pdf + pair: from_page; Document.insert_pdf + pair: to_page; Document.insert_pdf + pair: start_at; Document.insert_pdf + pair: rotate; Document.insert_pdf + pair: links; Document.insert_pdf + pair: annots; Document.insert_pdf + pair: widgets; Document.insert_pdf + pair: join_duplicates; Document.insert_pdf + pair: show_progress; Document.insert_pdf - Changed in v1.14.12 - This is now a positional parameter + .. method:: insert_pdf(docsrc, *, from_page=-1, to_page=-1, start_at=-1, rotate=-1, links=True, annots=True, widgets=True, join_duplicates=False, show_progress=0, final=1) - For the other parameters, please consult the aforementioned methods. + PDF only: Copy the page range **[from_page, to_page]** (including both) of PDF document *docsrc* into the current one. Inserts will start with page number *start_at*. Value -1 indicates default values. All pages thus copied will be rotated as specified. Links, annotations and widgets can be excluded in the target, see below. All page numbers are 0-based. - :rtype: int - :returns: the result of :meth:`Page.insert_text` (number of successfully inserted lines). + :arg docsrc: An opened PDF *Document* which must not be the current document. However, it may refer to the same underlying file. + :type docsrc: *Document* - .. method:: delete_page(pno=-1) + :arg int from_page: First page number in *docsrc*. Default is zero. - PDF only: Delete a page given by its 0-based number in `-∞ < pno < page_count - 1`. + :arg int to_page: Last page number in *docsrc* to copy. Defaults to last page. - * Changed in v1.18.14: support Python's `del` statement. + :arg int start_at: First copied page, will become page number *start_at* in the target. Default -1 appends the page range to the end. If zero, the page range will be inserted before current first page. - :arg int pno: the page to be deleted. Negative number count backwards from the end of the document (like with indices). Default is the last page. + :arg int rotate: All copied pages will be rotated by the provided value (degrees, integer multiple of 90). - .. method:: delete_pages(*args, **kwds) + :arg bool links: Choose whether (internal and external) links should be included in the copy. Default is `True`. *Named* links (:data:`LINK_NAMED`) and internal links to outside the copied page range are **always excluded**. + + :arg bool annots: choose whether annotations should be included in the copy. + + :arg bool widgets: choose whether annotations should be included in the copy. If `True` and at least one of the source pages contains form fields, the target PDF will be turned into a Form PDF (if not already being one). + + :arg bool join_duplicates: *(New in version 1.25.5)* Choose how to handle duplicate root field names in the source pages. This parameter is ignored if `widgets=False`. + + Default is ``False`` which will add unifying strings to the name of those source root fields which have a duplicate in the target. For instance, if "name" already occurs in the target, the source widget's name will be changed to "name [text]" with a suitably chosen string "text". - * Changed in v1.18.13: more flexibility specifying pages to delete. - * Changed in v1.18.14: support Python's `del` statement. + If ``True``, root fields with duplicate names in source and target will be converted to so-called "Kids" of a "Parent" object (which lists all kid widgets in a PDF array). This will effectively turn those kids into instances of the "same" widget: if e.g. one of the kids is changed, then all its instances will automatically inherit this change -- no matter on which page they happen to be displayed. + + :arg int show_progress: *(new in v1.17.7)* specify an interval size greater zero to see progress messages on `sys.stdout`. After each interval, a message like `Inserted 30 of 47 pages.` will be printed. + + :arg int final: *(new in v1.18.0)* controls whether the list of already copied objects should be **dropped** after this method, default ``True``. Set it to 0 except for the last one of multiple insertions from the same source PDF. This saves target file size and speeds up execution considerably. - PDF only: Delete multiple pages given as 0-based numbers. + .. note:: - **Format 1:** Use keywords. Represents the old format. A contiguous range of pages is removed. - * "from_page": first page to delete. Zero if omitted. - * "to_page": last page to delete. Last page in document if omitted. Must not be less then "from_page". + 1. This is a page-based method. Document-level information of source documents is therefore mostly ignored. Examples include Optional Content, Embedded Files, `StructureElem`, table of contents, page labels, metadata, named destinations (and other named entries) and some more. - **Format 2:** Two page numbers as positional parameters. Handled like Format 1. + 2. If `from_page > to_page`, pages will be **copied in reverse order**. If `0 <= from_page == to_page`, then one page will be copied. - **Format 3:** One positional integer parameter. Equivalent to :meth:`Page.delete_page`. + 3. `docsrc` TOC entries **will not be copied**. It is easy however, to recover a table of contents for the resulting document. Look at the examples below and at program `join.py `_ in the *examples* directory: it can join PDF documents and at the same time piece together respective parts of the tables of contents. - **Format 4:** One positional parameter of type *list*, *tuple* or *range()* of page numbers. The items of this sequence may be in any order and may contain duplicates. - **Format 5:** *(New in v1.18.14)* Using the Python `del` statement and index / slice notation is now possible. + .. index:: + pair: append; Document.insert_file + pair: join; Document.insert_file + pair: merge; Document.insert_file + pair: from_page; Document.insert_file + pair: to_page; Document.insert_file + pair: start_at; Document.insert_file + pair: rotate; Document.insert_file + pair: links; Document.insert_file + pair: annots; Document.insert_file + pair: show_progress; Document.insert_file - .. note:: + .. method:: insert_file(infile, from_page=-1, to_page=-1, start_at=-1, rotate=-1, links=True, annots=True, show_progress=0, final=1) - *(Changed in v1.14.17, optimized in v1.17.7)* In an effort to maintain a valid PDF structure, this method and :meth:`delete_page` will also deactivate items in the table of contents which point to deleted pages. "Deactivation" here means, that the bookmark will point to nowhere and the title will be shown grayed-out by supporting PDF viewers. The overall TOC structure is left intact. + * New in v1.22.0 - It will also remove any **links on remaining pages** which point to a deleted one. This action may have an extended response time for documents with many pages. + PDF only: Add an arbitrary supported document to the current PDF. Opens "infile" as a document, converts it to a PDF and then invokes :meth:`Document.insert_pdf`. Parameters are the same as for that method. Among other things, this features an easy way to append images as full pages to an output PDF. - Following examples will all delete pages 500 through 519: - - * `doc.delete_pages(500, 519)` - * `doc.delete_pages(from_page=500, to_page=519)` - * `doc.delete_pages((500, 501, 502, ... , 519))` - * `doc.delete_pages(range(500, 520))` - * `del doc[500:520]` - * `del doc[(500, 501, 502, ... , 519)]` - * `del doc[range(500, 520)]` + :arg multiple infile: the input document to insert. May be a filename specification as is valid for creating a :ref:`Document` or a :ref:`Pixmap`. - For the :ref:`AdobeManual` the above takes about 0.6 seconds, because the remaining 1290 pages must be cleaned from invalid links. - In general, the performance of this method is dependent on the number of remaining pages -- **not** on the number of deleted pages: in the above example, **deleting all pages except** those 20, will need much less time. + .. index:: + pair: width; Document.new_page + pair: height; Document.new_page + .. method:: new_page(pno=-1, width=595, height=842) - .. method:: copy_page(pno, to=-1) + PDF only: Insert an empty page. - PDF only: Copy a page reference within the document. + :arg int pno: page number index (zero-indexed) at which to insert page. Special values -1 and *doc.page_count* insert **after** the last page. - :arg int pno: the page to be copied. Must be in range `0 <= pno < page_count`. + :arg float width: page width. + :arg float height: page height. - :arg int to: the page number in front of which to copy. The default inserts **after** the last page. + :rtype: :ref:`Page` + :returns: the created page object. Be aware that the page numbers of pages after the inserted one will have changed after method execution. For the same reason, **all existing page objects will be invalidated.** Using them will lead to exceptions. - .. note:: Only a new **reference** to the page object will be created -- not a new page object, all copied pages will have identical attribute values, including the :attr:`Page.xref`. This implies that any changes to one of these copies will appear on all of them. + .. index:: + pair: fontsize; Document.insert_page + pair: width; Document.insert_page + pair: height; Document.insert_page + pair: fontname; Document.insert_page + pair: fontfile; Document.insert_page + pair: color; Document.insert_page - .. method:: fullcopy_page(pno, to=-1) + .. method:: insert_page(pno, text=None, fontsize=11, width=595, height=842, fontname="helv", fontfile=None, color=None) - * New in v1.14.17 + PDF only: Insert a new page and insert some text. Convenience function which combines :meth:`Document.new_page` and (parts of) :meth:`Page.insert_text`. - PDF only: Make a full copy (duplicate) of a page. + :arg int pno: page number index (zero-indexed) at which to insert page. Special values -1 and `doc.page_count` insert **after** the last page. - :arg int pno: the page to be duplicated. Must be in range `0 <= pno < page_count`. + Changed in v1.14.12 + This is now a positional parameter - :arg int to: the page number in front of which to copy. The default inserts **after** the last page. + For the other parameters, please consult the aforementioned methods. - .. note:: + :rtype: int + :returns: the result of :meth:`Page.insert_text` (number of successfully inserted lines). - * In contrast to :meth:`copy_page`, this method creates a new page object (with a new :data:`xref`), which can be changed independently from the original. + .. method:: delete_page(pno=-1) - * Any Popup and "IRT" ("in response to") annotations are **not copied** to avoid potentially incorrect situations. + PDF only: Delete a page given by its 0-based number in `-∞ < pno < page_count`. - .. method:: move_page(pno, to=-1) + * Changed in v1.18.14: support Python's `del` statement. - PDF only: Move (copy and then delete original) a page within the document. + :arg int pno: the page to be deleted. Negative number count backwards from the end of the document (like with indices). Default is the last page. - :arg int pno: the page to be moved. Must be in range `0 <= pno < page_count`. + .. method:: delete_pages(*args, **kwds) - :arg int to: the page number in front of which to insert the moved page. The default moves **after** the last page. + * Changed in v1.18.13: more flexibility specifying pages to delete. + * Changed in v1.18.14: support Python's `del` statement. + PDF only: Delete multiple pages given as 0-based numbers. - .. method:: need_appearances(value=None) + **Format 1:** Use keywords. Represents the old format. A contiguous range of pages is removed. + * "from_page": first page to delete. Zero if omitted. + * "to_page": last page to delete. Last page in document if omitted. Must not be less then "from_page". - * New in v1.17.4 + **Format 2:** Two page numbers as positional parameters. Handled like Format 1. - PDF only: Get or set the */NeedAppearances* property of Form PDFs. Quote: *"(Optional) A flag specifying whether to construct appearance streams and appearance dictionaries for all widget annotations in the document ... Default value: false."* This may help controlling the behavior of some readers / viewers. + **Format 3:** One positional integer parameter. Equivalent to :meth:`Page.delete_page`. - :arg bool value: set the property to this value. If omitted or `None`, inquire the current value. + **Format 4:** One positional parameter of type *list*, *tuple* or *range()* of page numbers. The items of this sequence may be in any order and may contain duplicates. - :rtype: bool - :returns: - * None: not a Form PDF, or property not defined. - * True / False: the value of the property (either just set or existing for inquiries). Has no effect if no Form PDF. + **Format 5:** *(New in v1.18.14)* Using the Python `del` statement and index / slice notation is now possible. + .. note:: + *(Changed in v1.14.17, optimized in v1.17.7)* In an effort to maintain a valid PDF structure, this method and :meth:`delete_page` will also deactivate items in the table of contents which point to deleted pages. "Deactivation" here means, that the bookmark will point to nowhere and the title will be shown grayed-out by supporting PDF viewers. The overall TOC structure is left intact. - .. method:: get_sigflags() + It will also remove any **links on remaining pages** which point to a deleted one. This action may have an extended response time for documents with many pages. - PDF only: Return whether the document contains signature fields. This is an optional PDF property: if not present (return value -1), no conclusions can be drawn -- the PDF creator may just not have bothered using it. + Following examples will all delete pages 500 through 519: - :rtype: int - :returns: - * -1: not a Form PDF / no signature fields recorded / no *SigFlags* found. - * 1: at least one signature field exists. - * 3: contains signatures that may be invalidated if the file is saved (written) in a way that alters its previous contents, as opposed to an incremental update. + * `doc.delete_pages(500, 519)` + * `doc.delete_pages(from_page=500, to_page=519)` + * `doc.delete_pages((500, 501, 502, ... , 519))` + * `doc.delete_pages(range(500, 520))` + * `del doc[500:520]` + * `del doc[(500, 501, 502, ... , 519)]` + * `del doc[range(500, 520)]` - .. index:: - pair: filename; Document.embfile_add - pair: ufilename; Document.embfile_add - pair: desc; Document.embfile_add + For the :ref:`AdobeManual` the above takes about 0.6 seconds, because the remaining 1290 pages must be cleaned from invalid links. - .. method:: embfile_add(name, buffer, filename=None, ufilename=None, desc=None) + In general, the performance of this method is dependent on the number of remaining pages -- **not** on the number of deleted pages: in the above example, **deleting all pages except** those 20, will need much less time. - * Changed in v1.14.16: The sequence of positional parameters "name" and "buffer" has been changed to comply with the call pattern of other functions. - PDF only: Embed a new file. All string parameters except the name may be unicode (in previous versions, only ASCII worked correctly). File contents will be compressed (where beneficial). + .. method:: copy_page(pno, to=-1) - :arg str name: entry identifier, **must not already exist**. - :arg bytes,bytearray,BytesIO buffer: file contents. + PDF only: Copy a page reference within the document. - *(Changed in v1.14.13)* *io.BytesIO* is now also supported. + :arg int pno: the page to be copied. Must be in range `0 <= pno < page_count`. - :arg str filename: optional filename. Documentation only, will be set to *name* if `None`. - :arg str ufilename: optional unicode filename. Documentation only, will be set to *filename* if `None`. - :arg str desc: optional description. Documentation only, will be set to *name* if `None`. + :arg int to: the page number in front of which to copy. The default inserts **after** the last page. - :rtype: int - :returns: *(Changed in v1.18.13)* The method now returns the :data:`xref` of the inserted file. In addition, the file object now will be automatically given the PDF keys `/CreationDate` and `/ModDate` based on the current date-time. + .. note:: Only a new **reference** to the page object will be created -- not a new page object, all copied pages will have identical attribute values, including the :attr:`Page.xref`. This implies that any changes to one of these copies will appear on all of them. + .. method:: fullcopy_page(pno, to=-1) - .. method:: embfile_count() + * New in v1.14.17 - * Changed in v1.14.16: This is now a method. In previous versions, this was a property. + PDF only: Make a full copy (duplicate) of a page. - PDF only: Return the number of embedded files. + :arg int pno: the page to be duplicated. Must be in range `0 <= pno < page_count`. - .. method:: embfile_get(item) + :arg int to: the page number in front of which to copy. The default inserts **after** the last page. - PDF only: Retrieve the content of embedded file by its entry number or name. If the document is not a PDF, or entry cannot be found, an exception is raised. + .. note:: - :arg int,str item: index or name of entry. An integer must be in `range(embfile_count())`. + * In contrast to :meth:`copy_page`, this method creates a new page object (with a new :data:`xref`), which can be changed independently from the original. - :rtype: bytes + * Any Popup and "IRT" ("in response to") annotations are **not copied** to avoid potentially incorrect situations. - .. method:: embfile_del(item) + .. method:: move_page(pno, to=-1) - * Changed in v1.14.16: Items can now be deleted by index, too. + PDF only: Move (copy and then delete original) a page within the document. - PDF only: Remove an entry from `/EmbeddedFiles`. As always, physical deletion of the embedded file content (and file space regain) will occur only when the document is saved to a new file with a suitable garbage option. + :arg int pno: the page to be moved. Must be in range `0 <= pno < page_count`. - :arg int/str item: index or name of entry. + :arg int to: the page number in front of which to insert the moved page. The default moves **after** the last page. - .. warning:: When specifying an entry name, this function will only **delete the first item** with that name. Be aware that PDFs not created with PyMuPDF may contain duplicate names. So you may want to take appropriate precautions. - .. method:: embfile_info(item) + .. method:: need_appearances(value=None) - * Changed in v1.18.13 + * New in v1.17.4 - PDF only: Retrieve information of an embedded file given by its number or by its name. + PDF only: Get or set the */NeedAppearances* property of Form PDFs. Quote: *"(Optional) A flag specifying whether to construct appearance streams and appearance dictionaries for all widget annotations in the document ... Default value: false."* This may help controlling the behavior of some readers / viewers. - :arg int/str item: index or name of entry. An integer must be in `range(embfile_count())`. + :arg bool value: set the property to this value. If omitted or `None`, inquire the current value. - :rtype: dict - :returns: a dictionary with the following keys: + :rtype: bool + :returns: + * None: not a Form PDF, or property not defined. + * True / False: the value of the property (either just set or existing for inquiries). Has no effect if no Form PDF. - * *name* -- (*str*) name under which this entry is stored - * *filename* -- (*str*) filename - * *ufilename* -- (*unicode*) filename - * *desc* -- (*str*) description - * *size* -- (*int*) original file size - * *length* -- (*int*) compressed file length - * *creationDate* -- *(New in v1.18.13)* (*str*) date-time of item creation in PDF format - * *modDate* -- *(New in v1.18.13)* (*str*) date-time of last change in PDF format - * *collection* -- *(New in v1.18.13)* (*int*) :data:`xref` of the associated PDF portfolio item if any, else zero. - * *checksum* -- *(New in v1.18.13)* (*str*) a hashcode of the stored file content as a hexadecimal string. Should be MD5 according to PDF specifications, but be prepared to see other hashing algorithms. - .. method:: embfile_names() - * New in v1.14.16 + .. method:: get_sigflags() - PDF only: Return a list of embedded file names. The sequence of the names equals the physical sequence in the document. + PDF only: Return whether the document contains signature fields. This is an optional PDF property: if not present (return value -1), no conclusions can be drawn -- the PDF creator may just not have bothered using it. - :rtype: list + :rtype: int + :returns: + * -1: not a Form PDF / no signature fields recorded / no *SigFlags* found. + * 1: at least one signature field exists. + * 3: contains signatures that may be invalidated if the file is saved (written) in a way that alters its previous contents, as opposed to an incremental update. - .. index:: - pair: filename; Document.embfile_upd - pair: ufilename; Document.embfile_upd - pair: desc; Document.embfile_upd + .. index:: + pair: filename; Document.embfile_add + pair: ufilename; Document.embfile_add + pair: desc; Document.embfile_add - .. method:: embfile_upd(item, buffer=None, filename=None, ufilename=None, desc=None) + .. method:: embfile_add(name, buffer, filename=None, ufilename=None, desc=None) - PDF only: Change an embedded file given its entry number or name. All parameters are optional. Letting them default leads to a no-operation. + * Changed in v1.14.16: The sequence of positional parameters "name" and "buffer" has been changed to comply with the call pattern of other functions. - :arg int/str item: index or name of entry. An integer must be in `range(embfile_count())`. - :arg bytes,bytearray,BytesIO buffer: the new file content. + PDF only: Embed a new file. All string parameters except the name may be unicode (in previous versions, only ASCII worked correctly). File contents will be compressed (where beneficial). - *(Changed in v1.14.13)* *io.BytesIO* is now also supported. + :arg str name: entry identifier, **must not already exist**. + :arg bytes,bytearray,BytesIO buffer: file contents. - :arg str filename: the new filename. - :arg str ufilename: the new unicode filename. - :arg str desc: the new description. + *(Changed in v1.14.13)* *io.BytesIO* is now also supported. - *(Changed in v1.18.13)* The method now returns the :data:`xref` of the file object. + :arg str filename: optional filename. Documentation only, will be set to *name* if `None`. + :arg str ufilename: optional unicode filename. Documentation only, will be set to *filename* if `None`. + :arg str desc: optional description. Documentation only, will be set to *name* if `None`. - :rtype: int - :returns: xref of the file object. Automatically, its `/ModDate` PDF key will be updated with the current date-time. + :rtype: int + :returns: *(Changed in v1.18.13)* The method now returns the :data:`xref` of the inserted file. In addition, the file object now will be automatically given the PDF keys `/CreationDate` and `/ModDate` based on the current date-time. - .. method:: close() + .. method:: embfile_count() - Release objects and space allocations associated with the document. If created from a file, also closes *filename* (releasing control to the OS). Explicitly closing a document is equivalent to deleting it, `del doc`, or assigning it to something else like `doc = None`. + * Changed in v1.14.16: This is now a method. In previous versions, this was a property. - .. method:: xref_object(xref, compressed=False, ascii=False) + PDF only: Return the number of embedded files. - * New in v1.16.8 - * Changed in v1.18.10 + .. method:: embfile_get(item) - PDF only: Return the definition source of a PDF object. + PDF only: Retrieve the content of embedded file by its entry number or name. If the document is not a PDF, or entry cannot be found, an exception is raised. - :arg int xref: the object's :data`xref`. *Changed in v1.18.10:* A value of -1 returns the PDF trailer source. - :arg bool compressed: whether to generate a compact output with no line breaks or spaces. - :arg bool ascii: whether to ASCII-encode binary data. + :arg int,str item: index or name of entry. An integer must be in `range(embfile_count())`. - :rtype: str - :returns: The object definition source. + :rtype: bytes - .. method:: pdf_catalog() + .. method:: embfile_del(item) - * New in v1.16.8 + * Changed in v1.14.16: Items can now be deleted by index, too. - PDF only: Return the :data:`xref` number of the PDF catalog (or root) object. Use that number with :meth:`Document.xref_object` to see its source. + PDF only: Remove an entry from `/EmbeddedFiles`. As always, physical deletion of the embedded file content (and file space regain) will occur only when the document is saved to a new file with a suitable garbage option. + :arg int/str item: index or name of entry. - .. method:: pdf_trailer(compressed=False) + .. warning:: When specifying an entry name, this function will only **delete the first item** with that name. Be aware that PDFs not created with PyMuPDF may contain duplicate names. So you may want to take appropriate precautions. - * New in v1.16.8 + .. method:: embfile_info(item) - PDF only: Return the trailer source of the PDF, which is usually located at the PDF file's end. This is :meth:`Document.xref_object` with an *xref* argument of -1. + * Changed in v1.18.13 + PDF only: Retrieve information of an embedded file given by its number or by its name. - .. method:: xref_stream(xref) + :arg int/str item: index or name of entry. An integer must be in `range(embfile_count())`. - * New in v1.16.8 + :rtype: dict + :returns: a dictionary with the following keys: - PDF only: Return the **decompressed** contents of the :data:`xref` stream object. + * ``name`` -- (*str*) name under which this entry is stored + * ``filename`` -- (*str*) filename + * ``ufilename`` -- (*unicode*) filename + * ``description`` -- (*str*) description + * ``size`` -- (*int*) original file size + * ``length`` -- (*int*) compressed file length + * ``creationDate`` -- (*str*) date-time of item creation in PDF format + * ``modDate`` -- (*str*) date-time of last change in PDF format + * ``collection`` -- (*int*) :data:`xref` of the associated PDF portfolio item if any, else zero. + * ``checksum`` -- (*str*) a hashcode of the stored file content as a hexadecimal string. Should be MD5 according to PDF specifications, but be prepared to see other hashing algorithms. - :arg int xref: :data:`xref` number. + .. method:: embfile_names() - :rtype: bytes - :returns: the (decompressed) stream of the object. + PDF only: Return a list of embedded file names. The sequence of the names equals the physical sequence in the document. - .. method:: xref_stream_raw(xref) + :rtype: list - * New in v1.16.8 + .. index:: + pair: filename; Document.embfile_upd + pair: ufilename; Document.embfile_upd + pair: desc; Document.embfile_upd - PDF only: Return the **unmodified** (esp. **not decompressed**) contents of the :data:`xref` stream object. Otherwise equal to :meth:`Document.xref_stream`. + .. method:: embfile_upd(item, buffer=None, filename=None, ufilename=None, desc=None) - :rtype: bytes - :returns: the (original, unmodified) stream of the object. + PDF only: Change an embedded file given its entry number or name. All parameters are optional. Letting them default leads to a no-operation. - .. method:: update_object(xref, obj_str, page=None) + :arg int/str item: index or name of entry. An integer must be in `range(embfile_count())`. + :arg bytes,bytearray,BytesIO buffer: the new file content. - * New in v1.16.8 + *(Changed in v1.14.13)* *io.BytesIO* is now also supported. - PDF only: Replace object definition of :data:`xref` with the provided string. The xref may also be new, in which case this instruction completes the object definition. If a page object is also given, its links and annotations will be reloaded afterwards. + :arg str filename: the new filename. + :arg str ufilename: the new unicode filename. + :arg str desc: the new description. - :arg int xref: :data:`xref` number. + *(Changed in v1.18.13)* The method now returns the :data:`xref` of the file object. - :arg str obj_str: a string containing a valid PDF object definition. + :rtype: int + :returns: xref of the file object. Automatically, its `/ModDate` PDF key will be updated with the current date-time. - :arg page: a page object. If provided, indicates, that annotations of this page should be refreshed (reloaded) to reflect changes incurred with links and / or annotations. - :type page: :ref:`Page` - :rtype: int - :returns: zero if successful, otherwise an exception will be raised. + .. method:: close() + Release objects and space allocations associated with the document. If created from a file, also closes *filename* (releasing control to the OS). Explicitly closing a document is equivalent to deleting it, `del doc`, or assigning it to something else like `doc = None`. - .. method:: update_stream(xref, data, new=False, compress=True) + .. method:: xref_object(xref, compressed=False, ascii=False) - * New in v.1.16.8 - * Changed in v1.19.2: added parameter "compress" - * Changed in v1.19.6: deprecated parameter "new". Now confirms that the object is a PDF dictionary object. + * New in v1.16.8 + * Changed in v1.18.10 - Replace the stream of an object identified by *xref*, which must be a PDF dictionary. If the object is no :data:`stream`, it will be turned into one. The function automatically performs a compress operation ("deflate") where beneficial. + PDF only: Return the definition source of a PDF object. - :arg int xref: :data:`xref` number. + :arg int xref: the object's :data:`xref`. *Changed in v1.18.10:* A value of `-1` returns the PDF trailer source. + :arg bool compressed: whether to generate a compact output with no line breaks or spaces. + :arg bool ascii: whether to ASCII-encode binary data. - :arg bytes|bytearray|BytesIO stream: the new content of the stream. + :rtype: str + :returns: The object definition source. - *(Changed in v1.14.13:)* *io.BytesIO* objects are now also supported. + .. method:: pdf_catalog() - :arg bool new: *deprecated* and ignored. Will be removed some time after v1.20.0. - :arg bool compress: whether to compress the inserted stream. If `True` (default), the stream will be inserted using `/FlateDecode` compression (if beneficial), otherwise the stream will inserted as is. + * New in v1.16.8 - :raises ValueError: if *xref* does not represent a PDF :data:`dict`. An empty dictionary `<<>>` is accepted. So if you just created the xref and want to give it a stream, first execute `doc.update_object(xref, "<<>>")`, and then insert the stream data with this method. + PDF only: Return the :data:`xref` number of the PDF catalog (or root) object. Use that number with :meth:`Document.xref_object` to see its source. - The method is primarily (but not exclusively) intended to manipulate streams containing PDF operator syntax (see pp. 643 of the :ref:`AdobeManual`) as it is the case for e.g. page content streams. - If you update a contents stream, consider using save parameter *clean=True* to ensure consistency between PDF operator source and the object structure. + .. method:: pdf_trailer(compressed=False) - Example: Let us assume that you no longer want a certain image appear on a page. This can be achieved by deleting the respective reference in its contents source(s) -- and indeed: the image will be gone after reloading the page. But the page's :data:`resources` object would still show the image as being referenced by the page. This save option will clean up any such mismatches. + * New in v1.16.8 + PDF only: Return the trailer source of the PDF, which is usually located at the PDF file's end. This is :meth:`Document.xref_object` with an :data:`xref` argument of -1. - .. method:: Document.xref_copy(source, target, *, keep=None) - * New in v1.19.5 + .. method:: xref_stream(xref) - PDF Only: Make *target* xref an exact copy of *source*. If *source* is a :data:`stream`, then these data are also copied. + * New in v1.16.8 - :arg int source: the source :data:`xref`. It must be an existing **dictionary** object. - :arg int target: the target xref. Must be an existing **dictionary** object. If the xref has just been created, make sure to initialize it as a PDF dictionary with the minimum specification `<<>>`. - :arg list keep: an optional list of top-level keys in *target*, that should not be removed in preparation of the copy process. + PDF only: Return the **decompressed** contents of the :data:`xref` stream object. - .. note:: + :arg int xref: :data:`xref` number. - * This method has much in common with Python's *dict* method `copy()`. - * Both xref numbers must represent existing dictionaries. - * Before data is copied from *source*, all *target* dictionary keys are deleted. You can specify exceptions from this in the *keep* list. If *source* however has a same-named key, its value will still replace the target. - * If *source* is a :data:`stream` object, then these data will also be copied over, and *target* will be converted to a stream object. - * A typical use case is to replace or remove an existing image without using redaction annotations. Example scripts can be seen `here `_. + :rtype: bytes + :returns: the (decompressed) stream of the object. - .. method:: Document.extract_image(xref) + .. method:: xref_stream_raw(xref) - PDF Only: Extract data and meta information of an image stored in the document. The output can directly be used to be stored as an image file, as input for PIL, :ref:`Pixmap` creation, etc. This method avoids using pixmaps wherever possible to present the image in its original format (e.g. as JPEG). + * New in v1.16.8 - :arg int xref: :data:`xref` of an image object. If this is not in `range(1, doc.xref_length())`, or the object is no image or other errors occur, `None` is returned and no exception is raised. + PDF only: Return the **unmodified** (esp. **not decompressed**) contents of the :data:`xref` stream object. Otherwise equal to :meth:`Document.xref_stream`. - :rtype: dict - :returns: a dictionary with the following keys + :rtype: bytes + :returns: the (original, unmodified) stream of the object. - * *ext* (*str*) image type (e.g. *'jpeg'*), usable as image file extension - * *smask* (*int*) :data:`xref` number of a stencil (/SMask) image or zero - * *width* (*int*) image width - * *height* (*int*) image height - * *colorspace* (*int*) the image's *colorspace.n* number. - * *cs-name* (*str*) the image's *colorspace.name*. - * *xres* (*int*) resolution in x direction. Please also see :data:`resolution`. - * *yres* (*int*) resolution in y direction. Please also see :data:`resolution`. - * *image* (*bytes*) image data, usable as image file content + .. method:: update_object(xref, obj_str, page=None) - >>> d = doc.extract_image(1373) - >>> d - {'ext': 'png', 'smask': 2934, 'width': 5, 'height': 629, 'colorspace': 3, 'xres': 96, - 'yres': 96, 'cs-name': 'DeviceRGB', - 'image': b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x05\ ...'} - >>> imgout = open(f"image.{d['ext']}", "wb") - >>> imgout.write(d["image"]) - 102 - >>> imgout.close() + * New in v1.16.8 - .. note:: There is a functional overlap with *pix = fitz.Pixmap(doc, xref)*, followed by a *pix.tobytes()*. Main differences are that extract_image, **(1)** does not always deliver PNG image formats, **(2)** is **very** much faster with non-PNG images, **(3)** usually results in much less disk storage for extracted images, **(4)** returns `None` in error cases (generates no exception). Look at the following example images within the same PDF. + PDF only: Replace object definition of :data:`xref` with the provided string. The xref may also be new, in which case this instruction completes the object definition. If a page object is also given, its links and annotations will be reloaded afterwards. - * xref 1268 is a PNG -- Comparable execution time and identical output:: + :arg int xref: :data:`xref` number. - In [23]: %timeit pix = fitz.Pixmap(doc, 1268);pix.tobytes() - 10.8 ms ± 52.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) - In [24]: len(pix.tobytes()) - Out[24]: 21462 + :arg str obj_str: a string containing a valid PDF object definition. - In [25]: %timeit img = doc.extract_image(1268) - 10.8 ms ± 86 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) - In [26]: len(img["image"]) - Out[26]: 21462 + :arg page: a page object. If provided, indicates, that annotations of this page should be refreshed (reloaded) to reflect changes incurred with links and / or annotations. + :type page: :ref:`Page` - * xref 1186 is a JPEG -- :meth:`Document.extract_image` is **many times faster** and produces a **much smaller** output (2.48 MB vs. 0.35 MB):: + :rtype: int + :returns: zero if successful, otherwise an exception will be raised. - In [27]: %timeit pix = fitz.Pixmap(doc, 1186);pix.tobytes() - 341 ms ± 2.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) - In [28]: len(pix.tobytes()) - Out[28]: 2599433 - In [29]: %timeit img = doc.extract_image(1186) - 15.7 µs ± 116 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each) - In [30]: len(img["image"]) - Out[30]: 371177 + .. method:: update_stream(xref, data, new=False, compress=True) + * New in v.1.16.8 + * Changed in v1.19.2: added parameter "compress" + * Changed in v1.19.6: deprecated parameter "new". Now confirms that the object is a PDF dictionary object. - .. method:: Document.extract_font(xref, info_only=False, named=None) + Replace the stream of an object identified by :data:`xref`, which must be a PDF dictionary. If the object is no :data:`stream`, it will be turned into one. The function automatically performs a compress operation ("deflate") where beneficial. - * Changed in v1.19.4: return a dictionary if `named == True`. + :arg int xref: :data:`xref` number. - PDF Only: Return an embedded font file's data and appropriate file extension. This can be used to store the font as an external file. The method does not throw exceptions (other than via checking for PDF and valid :data:`xref`). + :arg bytes|bytearray|BytesIO stream: the new content of the stream. - :arg int xref: PDF object number of the font to extract. - :arg bool info_only: only return font information, not the buffer. To be used for information-only purposes, avoids allocation of large buffer areas. - :arg bool named: If true, a dictionary with the following keys is returned: 'name' (font base name), 'ext' (font file extension), 'type' (font type), 'content' (font file content). + *(Changed in v1.14.13:)* *io.BytesIO* objects are now also supported. - :rtype: tuple,dict - :returns: a tuple `(basename, ext, type, content)`, where *ext* is a 3-byte suggested file extension (*str*), *basename* is the font's name (*str*), *type* is the font's type (e.g. "Type1") and *content* is a bytes object containing the font file's content (or *b""*). For possible extension values and their meaning see :ref:`FontExtensions`. Return details on error: + :arg bool new: *deprecated* and ignored. Will be removed some time after v1.20.0. + :arg bool compress: whether to compress the inserted stream. If `True` (default), the stream will be inserted using `/FlateDecode` compression (if beneficial), otherwise the stream will inserted as is. - * `("", "", "", b"")` -- invalid xref or xref is not a (valid) font object. - * `(basename, "n/a", "Type1", b"")` -- *basename* is not embedded and thus cannot be extracted. This is the case for e.g. the :ref:`Base-14-Fonts` and Type 3 fonts. + :raises ValueError: if :data:`xref` does not represent a PDF :data:`dict`. An empty dictionary ``<<>>`` is accepted. So if you just created the xref and want to give it a stream, first execute `doc.update_object(xref, "<<>>")`, and then insert the stream data with this method. - Example: + The method is primarily (but not exclusively) intended to manipulate streams containing PDF operator syntax (see pp. 643 of the :ref:`AdobeManual`) as it is the case for e.g. page content streams. - >>> # store font as an external file - >>> name, ext, _, content = doc.extract_font(4711) - >>> # assuming content is not None: - >>> ofile = open(name + "." + ext, "wb") - >>> ofile.write(content) - >>> ofile.close() + If you update a contents stream, consider using save parameter *clean=True* to ensure consistency between PDF operator source and the object structure. - .. warning:: The basename is returned unchanged from the PDF. So it may contain characters (such as blanks) which may disqualify it as a filename for your operating system. Take appropriate action. + Example: Let us assume that you no longer want a certain image appear on a page. This can be achieved by deleting the respective reference in its contents source(s) -- and indeed: the image will be gone after reloading the page. But the page's :data:`resources` object would still show the image as being referenced by the page. This save option will clean up any such mismatches. - .. note:: - * The returned *basename* in general is **not** the original file name, but it probably has some similarity. - * If parameter `named == True`, a dictionary with the following keys is returned: `{'name': 'T1', 'ext': 'n/a', 'type': 'Type3', 'content': b''}`. + + .. method:: Document.xref_copy(source, target, *, keep=None) + + * New in v1.19.5 + + PDF Only: Make ``target`` xref an exact copy of ``source``. If ``source`` is a :data:`stream`, then this data is also copied. + + :arg int source: the source :data:`xref`. It must be an existing **dictionary** object. + :arg int target: the target xref. Must be an existing **dictionary** object. If the xref has just been created, make sure to initialize it as a PDF dictionary with the minimum specification ``<<>>``. + :arg list keep: an optional list of top-level keys in ``target``, that should not be removed in preparation of the copy process. + + .. note:: + + * This method has much in common with Python's *dict* method `copy()`. + * Both xref numbers must represent existing dictionaries. + * Before data is copied from *source*, all *target* dictionary keys are deleted. You can specify exceptions from this in the ``keep`` list. If *source* however has a same-named key, its value will still replace the target. + * If ``source`` is a :data:`stream` object, then these data will also be copied over, and ``target`` will be converted to a stream object. + * A typical use case is to replace or remove an existing image without using redaction annotations. Example scripts can be seen `in this PyMuPDF Utilities example `_. + + .. method:: Document.extract_image(xref) + + PDF Only: Extract data and meta information of an image stored in the document. The output can directly be used to be stored as an image file, as input for PIL, :ref:`Pixmap` creation, etc. This method avoids using pixmaps wherever possible to present the image in its original format (e.g. as JPEG). + + :arg int xref: :data:`xref` of an image object. If this is not in `range(1, doc.xref_length())`, or the object is no image or other errors occur, `None` is returned and no exception is raised. + + :rtype: dict + :returns: a dictionary with the following keys + + * *ext* (*str*) image type (e.g. *'jpeg'*), usable as image file extension + * *smask* (*int*) :data:`xref` number of a stencil (/SMask) image or zero + * ``width`` (*int*) image width + * ``height`` (*int*) image height + * *colorspace* (*int*) the image's *colorspace.n* number. + * *cs-name* (*str*) the image's *colorspace.name*. + * *xres* (*int*) resolution in x direction. Please also see :data:`resolution`. + * *yres* (*int*) resolution in y direction. Please also see :data:`resolution`. + * *image* (*bytes*) image data, usable as image file content + + >>> d = doc.extract_image(1373) + >>> d + {'ext': 'png', 'smask': 2934, 'width': 5, 'height': 629, 'colorspace': 3, 'xres': 96, + 'yres': 96, 'cs-name': 'DeviceRGB', + 'image': b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x05\ ...'} + >>> imgout = open(f"image.{d['ext']}", "wb") + >>> imgout.write(d["image"]) + 102 + >>> imgout.close() + + .. note:: There is a functional overlap with *pix = pymupdf.Pixmap(doc, xref)*, followed by a *pix.tobytes()*. Main differences are that extract_image, **(1)** does not always deliver PNG image formats, **(2)** is **very** much faster with non-PNG images, **(3)** usually results in much less disk storage for extracted images, **(4)** returns `None` in error cases (generates no exception). Look at the following example images within the same PDF. + + * xref 1268 is a PNG -- Comparable execution time and identical output:: + + In [23]: %timeit pix = pymupdf.Pixmap(doc, 1268);pix.tobytes() + 10.8 ms ± 52.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + In [24]: len(pix.tobytes()) + Out[24]: 21462 + + In [25]: %timeit img = doc.extract_image(1268) + 10.8 ms ± 86 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + In [26]: len(img["image"]) + Out[26]: 21462 + + * xref 1186 is a JPEG -- :meth:`Document.extract_image` is **many times faster** and produces a **much smaller** output (2.48 MB vs. 0.35 MB):: + + In [27]: %timeit pix = pymupdf.Pixmap(doc, 1186);pix.tobytes() + 341 ms ± 2.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + In [28]: len(pix.tobytes()) + Out[28]: 2599433 + + In [29]: %timeit img = doc.extract_image(1186) + 15.7 µs ± 116 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each) + In [30]: len(img["image"]) + Out[30]: 371177 + + + .. method:: Document.extract_font(xref, info_only=False, named=None) + + * Changed in v1.19.4: return a dictionary if `named == True`. + + PDF Only: Return an embedded font file's data and appropriate file extension. This can be used to store the font as an external file. The method does not throw exceptions (other than via checking for PDF and valid :data:`xref`). + + :arg int xref: PDF object number of the font to extract. + :arg bool info_only: only return font information, not the buffer. To be used for information-only purposes, avoids allocation of large buffer areas. + :arg bool named: If true, a dictionary with the following keys is returned: 'name' (font base name), 'ext' (font file extension), 'type' (font type), 'content' (font file content). + + :rtype: tuple,dict + :returns: a tuple `(basename, ext, type, content)`, where *ext* is a 3-byte suggested file extension (*str*), *basename* is the font's name (*str*), *type* is the font's type (e.g. "Type1") and *content* is a bytes object containing the font file's content (or *b""*). For possible extension values and their meaning see :ref:`FontExtensions`. Return details on error: + + * `("", "", "", b"")` -- invalid xref or xref is not a (valid) font object. + * `(basename, "n/a", "Type1", b"")` -- *basename* is not embedded and thus cannot be extracted. This is the case for e.g. the :ref:`Base-14-Fonts` and Type 3 fonts. + + Example: + + >>> # store font as an external file + >>> name, ext, _, content = doc.extract_font(4711) + >>> # assuming content is not None: + >>> ofile = open(name + "." + ext, "wb") + >>> ofile.write(content) + >>> ofile.close() + + .. warning:: The basename is returned unchanged from the PDF. So it may contain characters (such as blanks) which may disqualify it as a filename for your operating system. Take appropriate action. + + .. note:: + * The returned *basename* in general is **not** the original file name, but it probably has some similarity. + * If parameter `named == True`, a dictionary with the following keys is returned: `{'name': 'T1', 'ext': 'n/a', 'type': 'Type3', 'content': b''}`. + + + .. method:: xref_xml_metadata() + + * New in v1.16.8 + + PDF only: Return the :data:`xref` of the document's XML metadata. - .. method:: xref_xml_metadata() + .. method:: has_links() - * New in v1.16.8 + .. method:: has_annots() - PDF only: Return the :data:`xref` of the document's XML metadata. + * New in v1.18.7 + PDF only: Check whether there are links, resp. annotations anywhere in the document. - .. method:: has_links() + :returns: ``True`` / ``False``. As opposed to fields, which are also stored in a central place of a PDF document, the existence of links / annotations can only be detected by parsing each page. These methods are tuned to do this efficiently and will immediately return, if the answer is ``True`` for a page. For PDFs with many thousand pages however, an answer may take some time [#f6]_ if no link, resp. no annotation is found. - .. method:: has_annots() - * New in v1.18.7 + .. method:: subset_fonts(verbose=False, fallback=False) - PDF only: Check whether there are links, resp. annotations anywhere in the document. + PDF only: Investigate eligible fonts for their use by text in the document. If a font is supported and a size reduction is possible, that font is replaced by a version with a subset of its characters. - :returns: *True* / *False*. As opposed to fields, which are also stored in a central place of a PDF document, the existence of links / annotations can only be detected by parsing each page. These methods are tuned to do this efficiently and will immediately return, if the answer is *True* for a page. For PDFs with many thousand pages however, an answer may take some time [#f6]_ if no link, resp. no annotation is found. + Use this method immediately before saving the document. + :arg bool verbose: write various progress information to sysout. This currently only has an effect if `fallback` is `True`. + :arg bool fallback: if `True` use the deprecated algorithm that makes use of package `fontTools `_ (which hence must be installed). If using the recommended value `False` (default), MuPDF's native function is used -- which is **very much faster** and can subset a broader range of font types. Package fontTools is not required then. - .. method:: subset_fonts() + The greatest benefit can be achieved when creating new PDFs using large fonts like is typical for Asian scripts. When using the :ref:`Story` class or method :meth:`Page.insert_htmlbox`, multiple fonts may automatically be included -- without the programmer becoming aware of it. + + In all these cases, the set of actually used unicodes mostly is very small compared to the number of glyphs available in the used fonts. Using this method can easily reduce the embedded font binaries by two orders of magnitude -- from several megabytes down to a low two-digit kilobyte amount. - * New in v1.18.7, changed in v1.18.9 + Creating font subsets leaves behind a large number of large, now unused PDF objects ("ghosts"). Therefore, make sure to compress and garbage-collect when saving the file. We recommend to use :meth:`Document.ez_save`. - PDF only: Investigate eligible fonts for their use by text in the document. If a font is supported and a size reduction is possible, that font is replaced by a version with a character subset. + |history_begin| - Use this method immediately before saving the document. The following features and restrictions apply for the time being: + * New in v1.18.7 + * Changed in v1.18.9 + * Changed in v1.24.2 use native function of MuPDF. - * Package `fontTools `_ **must be installed**. It is required for creating the font subsets. If not installed, the method raises an `ImportError` exception. - * Supported font types only include embedded OTF, TTF and WOFF that are **not already subsets**. - * **Changed in v1.18.9:** A subset font directly replaces its original -- text remains untouched and **is not rewritten.** It thus should retain all its properties, like spacing, hiddenness, control by Optional Content, etc. + |history_end| - The greatest benefit can be achieved when creating new PDFs using large fonts like is typical for Asian scripts. In these cases, the set of actually used unicodes mostly is small compared to the number of glyphs in the font. Using this feature can easily reduce the embedded font binary by two orders of magnitude -- from several megabytes to a low two-digit kilobyte amount. + .. method:: journal_enable() - .. method:: journal_enable() + * New in v1.19.0 - * New in v1.19.0 + PDF only: Enable journalling. Use this before you start logging operations. - PDF only: Enable journalling. Use this before you start logging operations. + .. method:: journal_start_op(name) - .. method:: journal_start_op(name) + * New in v1.19.0 - * New in v1.19.0 + PDF only: Start journalling an *"operation"* identified by a string "name". Updates will fail for a journal-enabled PDF, if no operation has been started. - PDF only: Start journalling an *"operation"* identified by a string "name". Updates will fail for a journal-enabled PDF, if no operation has been started. + .. method:: journal_stop_op() - .. method:: journal_stop_op() + * New in v1.19.0 - * New in v1.19.0 + PDF only: Stop the current operation. The updates between start and stop of an operation belong to the same unit of work and will be undone / redone together. - PDF only: Stop the current operation. The updates between start and stop of an operation belong to the same unit of work and will be undone / redone together. + .. method:: journal_position() - .. method:: journal_position() + * New in v1.19.0 - * New in v1.19.0 + PDF only: Return the numbers of the current operation and the total operation count. - PDF only: Return the numbers of the current operation and the total operation count. + :returns: a tuple `(step, steps)` containing the current operation number and the total number of operations in the journal. If **step** is 0, we are at the top of the journal. If **step** equals **steps**, we are at the bottom. Updating the PDF with anything other than undo or redo will automatically remove all journal entries after the current one and the new update will become the new last entry in the journal. The updates corresponding to the removed journal entries will be permanently lost. - :returns: a tuple `(step, steps)` containing the current operation number and the total number of operations in the journal. If **step** is 0, we are at the top of the journal. If **step** equals **steps**, we are at the bottom. Updating the PDF with anything other than undo or redo will automatically remove all journal entries after the current one and the new update will become the new last entry in the journal. The updates corresponding to the removed journal entries will be permanently lost. + .. method:: journal_op_name(step) - .. method:: journal_op_name(step) + * New in v1.19.0 - * New in v1.19.0 + PDF only: Return the name of operation number *step.* - PDF only: Return the name of operation number *step.* + .. method:: journal_can_do() - .. method:: journal_can_do() + * New in v1.19.0 - * New in v1.19.0 + PDF only: Show whether forward ("redo") and / or backward ("undo") executions are possible from the current journal position. - PDF only: Show whether forward ("redo") and / or backward ("undo") executions are possible from the current journal position. + :returns: a dictionary `{"undo": bool, "redo": bool}`. The respective method is available if its value is `True`. - :returns: a dictionary `{"undo": bool, "redo": bool}`. The respective method is available if its value is `True`. + .. method:: journal_undo() - .. method:: journal_undo() + * New in v1.19.0 - * New in v1.19.0 + PDF only: Revert (undo) the current step in the journal. This moves towards the journal's top. - PDF only: Revert (undo) the current step in the journal. This moves towards the journal's top. + .. method:: journal_redo() - .. method:: journal_redo() + * New in v1.19.0 - * New in v1.19.0 + PDF only: Re-apply (redo) the current step in the journal. This moves towards the journal's bottom. - PDF only: Re-apply (redo) the current step in the journal. This moves towards the journal's bottom. + .. method:: journal_save(filename) - .. method:: journal_save(filename) + * New in v1.19.0 - * New in v1.19.0 + PDF only: Save the journal to a file. - PDF only: Save the journal to a file. + :arg str,fp filename: either a filename as string or a file object opened as "wb" (or an `io.BytesIO()` object). - :arg str,fp filename: either a filename as string or a file object opened as "wb" (or an `io.BytesIO()` object). + .. method:: journal_load(filename) - .. method:: journal_load(filename) + * New in v1.19.0 - * New in v1.19.0 + PDF only: Load journal from a file. Enables journalling for the document. If journalling is already enabled, an exception is raised. - PDF only: Load journal from a file. Enables journalling for the document. If journalling is already enabled, an exception is raised. + :arg str,fp filename: the filename (str) of the journal or a file object opened as "rb" (or an `io.BytesIO()` object). - :arg str,fp filename: the filename (str) of the journal or a file object opened as "rb" (or an `io.BytesIO()` object). + .. method:: save_snapshot() - .. method:: save_snapshot() + * New in v1.19.0 - * New in v1.19.0 + PDF only: Saves a "snapshot" of the document. This is a PDF document with a special, incremental-save format compatible with journalling -- therefore no save options are available. Saving a snapshot is not possible for new documents. - PDF only: Saves a "snapshot" of the document. This is a PDF document with a special, incremental-save format compatible with journalling -- therefore no save options are available. Saving a snapshot is not possible for new documents. + This is a normal PDF document with no usage restrictions whatsoever. If it is not being changed in any way, it can be used together with its journal to undo / redo operations or continue updating. - This is a normal PDF document with no usage restrictions whatsoever. If it is not being changed in any way, it can be used together with its journal to undo / redo operations or continue updating. + .. attribute:: outline - .. attribute:: outline + Contains the first :ref:`Outline` entry of the document (or `None`). Can be used as a starting point to walk through all outline items. Accessing this property for encrypted, not authenticated documents will raise an *AttributeError*. - Contains the first :ref:`Outline` entry of the document (or `None`). Can be used as a starting point to walk through all outline items. Accessing this property for encrypted, not authenticated documents will raise an *AttributeError*. + :type: :ref:`Outline` - :type: :ref:`Outline` + .. attribute:: is_closed - .. attribute:: is_closed + ``False`` if document is still open. If closed, most other attributes and methods will have been deleted / disabled. In addition, :ref:`Page` objects referring to this document (i.e. created with :meth:`Document.load_page`) and their dependent objects will no longer be usable. For reference purposes, :attr:`Document.name` still exists and will contain the filename of the original document (if applicable). - *False* if document is still open. If closed, most other attributes and methods will have been deleted / disabled. In addition, :ref:`Page` objects referring to this document (i.e. created with :meth:`Document.load_page`) and their dependent objects will no longer be usable. For reference purposes, :attr:`Document.name` still exists and will contain the filename of the original document (if applicable). + :type: bool - :type: bool + .. attribute:: is_dirty - .. attribute:: is_dirty + ``True`` if this is a PDF document and contains unsaved changes, else ``False``. - *True* if this is a PDF document and contains unsaved changes, else *False*. + :type: bool - :type: bool + .. attribute:: is_pdf - .. attribute:: is_pdf + ``True`` if this is a PDF document, else ``False``. - *True* if this is a PDF document, else *False*. + :type: bool - :type: bool + .. attribute:: is_form_pdf - .. attribute:: is_form_pdf + ``False`` if this is not a PDF or has no form fields, otherwise the number of root form fields (fields with no ancestors). - *False* if this is not a PDF or has no form fields, otherwise the number of root form fields (fields with no ancestors). + *(Changed in v1.16.4)* Returns the total number of (root) form fields. - *(Changed in v1.16.4)* Returns the total number of (root) form fields. + :type: bool,int - :type: bool,int + .. attribute:: is_reflowable - .. attribute:: is_reflowable + ``True`` if document has a variable page layout (like e-books or HTML). In this case you can set the desired page dimensions during document creation (open) or via method :meth:`layout`. - *True* if document has a variable page layout (like e-books or HTML). In this case you can set the desired page dimensions during document creation (open) or via method :meth:`layout`. + :type: bool - :type: bool + .. attribute:: is_repaired - .. attribute:: is_repaired + * New in v1.18.2 - * New in v1.18.2 + ``True`` if PDF has been repaired during open (because of major structure issues). Always ``False`` for non-PDF documents. If true, more details have been stored in `TOOLS.mupdf_warnings()`, and :meth:`Document.can_save_incrementally` will return ``False``. - *True* if PDF has been repaired during open (because of major structure issues). Always *False* for non-PDF documents. If true, more details have been stored in `TOOLS.mupdf_warnings()`, and :meth:`Document.can_save_incrementally` will return *False*. + :type: bool - :type: bool + .. attribute:: is_fast_webaccess - .. attribute:: is_fast_webaccess + * New in v1.22.2 - * New in v1.22.2 + ``True`` if PDF is in linearized format. ``False`` for non-PDF documents. - *True* if PDF is in linearized format. *False* for non-PDF documents. + :type: bool - :type: bool + .. attribute:: markinfo - .. attribute:: markinfo + * New in v1.22.2 - * New in v1.22.2 + A dictionary indicating the `/MarkInfo` value. If not specified, the empty dictionary is returned. If not a PDF, `None` is returned. - A dictionary indicating the `/MarkInfo` value. If not specified, the empty dictionary is returned. If not a PDF, `None` is returned. + :type: dict - :type: dict + .. attribute:: pagemode - .. attribute:: pagemode + * New in v1.22.2 - * New in v1.22.2 + A string containing the `/PageMode` value. If not specified, the default "UseNone" is returned. If not a PDF, `None` is returned. - A string containing the `/PageMode` value. If not specified, the default "UseNone" is returned. If not a PDF, `None` is returned. + :type: str - :type: str + .. attribute:: pagelayout - .. attribute:: pagelayout + * New in v1.22.2 - * New in v1.22.2 + A string containing the `/PageLayout` value. If not specified, the default "SinglePage" is returned. If not a PDF, `None` is returned. - A string containing the `/PageLayout` value. If not specified, the default "SinglePage" is returned. If not a PDF, `None` is returned. + :type: str - :type: str + .. attribute:: version_count - .. attribute:: version_count + * New in v1.22.2 - * New in v1.22.2 + An integer counting the number of versions present in the document. Zero if not a PDF, otherwise the number of incremental saves plus one. - An integer counting the number of versions present in the document. Zero if not a PDF, otherwise the number of incremental saves plus one. + :type: int - :type: int + .. attribute:: needs_pass - .. attribute:: needs_pass + Indicates whether the document is password-protected against access. This indicator remains unchanged -- **even after the document has been authenticated**. Precludes incremental saves if true. - Indicates whether the document is password-protected against access. This indicator remains unchanged -- **even after the document has been authenticated**. Precludes incremental saves if true. + :type: bool - :type: bool + .. attribute:: is_encrypted - .. attribute:: is_encrypted + This indicator initially equals :attr:`Document.needs_pass`. After successful authentication, it is set to ``False`` to reflect the situation. - This indicator initially equals :attr:`Document.needs_pass`. After successful authentication, it is set to *False* to reflect the situation. + :type: bool - :type: bool + .. attribute:: permissions - .. attribute:: permissions + * Changed in v1.16.0: This is now an integer comprised of bit indicators. Was a dictionary previously. - * Changed in v1.16.0: This is now an integer comprised of bit indicators. Was a dictionary previously. + Contains the permissions to access the document. This is an integer containing bool values in respective bit positions. For example, if *doc.permissions & pymupdf.PDF_PERM_MODIFY > 0*, you may change the document. See :ref:`PermissionCodes` for details. - Contains the permissions to access the document. This is an integer containing bool values in respective bit positions. For example, if *doc.permissions & fitz.PDF_PERM_MODIFY > 0*, you may change the document. See :ref:`PermissionCodes` for details. + :type: int - :type: int + .. attribute:: metadata - .. attribute:: metadata + Contains the document's meta data as a Python dictionary or `None` (if *is_encrypted=True* and *needPass=True*). Keys are *format*, *encryption*, *title*, *author*, *subject*, *keywords*, *creator*, *producer*, *creationDate*, *modDate*, *trapped*. All item values are strings or `None`. - Contains the document's meta data as a Python dictionary or `None` (if *is_encrypted=True* and *needPass=True*). Keys are *format*, *encryption*, *title*, *author*, *subject*, *keywords*, *creator*, *producer*, *creationDate*, *modDate*, *trapped*. All item values are strings or `None`. + Except *format* and *encryption*, for PDF documents, the key names correspond in an obvious way to the PDF keys */Creator*, */Producer*, */CreationDate*, */ModDate*, */Title*, */Author*, */Subject*, */Trapped* and */Keywords* respectively. - Except *format* and *encryption*, for PDF documents, the key names correspond in an obvious way to the PDF keys */Creator*, */Producer*, */CreationDate*, */ModDate*, */Title*, */Author*, */Subject*, */Trapped* and */Keywords* respectively. + - *format* contains the document format (e.g. 'PDF-1.6', 'XPS', 'EPUB'). - - *format* contains the document format (e.g. 'PDF-1.6', 'XPS', 'EPUB'). + - *encryption* either contains `None` (no encryption), or a string naming an encryption method (e.g. *'Standard V4 R4 128-bit RC4'*). Note that an encryption method may be specified **even if** *needs_pass=False*. In such cases not all permissions will probably have been granted. Check :attr:`Document.permissions` for details. - - *encryption* either contains `None` (no encryption), or a string naming an encryption method (e.g. *'Standard V4 R4 128-bit RC4'*). Note that an encryption method may be specified **even if** *needs_pass=False*. In such cases not all permissions will probably have been granted. Check :attr:`Document.permissions` for details. + - If the date fields contain valid data (which need not be the case at all!), they are strings in the PDF-specific timestamp format "D:", where - - If the date fields contain valid data (which need not be the case at all!), they are strings in the PDF-specific timestamp format "D:", where + - is the 12 character ISO timestamp *YYYYMMDDhhmmss* (*YYYY* - year, *MM* - month, *DD* - day, *hh* - hour, *mm* - minute, *ss* - second), and - - is the 12 character ISO timestamp *YYYYMMDDhhmmss* (*YYYY* - year, *MM* - month, *DD* - day, *hh* - hour, *mm* - minute, *ss* - second), and + - is a time zone value (time interval relative to GMT) containing a sign ('+' or '-'), the hour (*hh*), and the minute (*'mm'*, note the apostrophes!). - - is a time zone value (time interval relative to GMT) containing a sign ('+' or '-'), the hour (*hh*), and the minute (*'mm'*, note the apostrophes!). + - A Paraguayan value might hence look like *D:20150415131602-04'00'*, which corresponds to the timestamp April 15, 2015, at 1:16:02 pm local time Asuncion. - - A Paraguayan value might hence look like *D:20150415131602-04'00'*, which corresponds to the timestamp April 15, 2015, at 1:16:02 pm local time Asuncion. + :type: dict - :type: dict + .. Attribute:: name - .. Attribute:: name + Contains the *filename* or *filetype* value with which *Document* was created. - Contains the *filename* or *filetype* value with which *Document* was created. + :type: str - :type: str + .. Attribute:: page_count - .. Attribute:: page_count + Contains the number of pages of the document. May return 0 for documents with no pages. Function `len(doc)` will also deliver this result. - Contains the number of pages of the document. May return 0 for documents with no pages. Function `len(doc)` will also deliver this result. + :type: int - :type: int + .. Attribute:: chapter_count - .. Attribute:: chapter_count + * New in v1.17.0 - * New in v1.17.0 - - Contains the number of chapters in the document. Always at least 1. Relevant only for document types with chapter support (EPUB currently). Other documents will return 1. + Contains the number of chapters in the document. Always at least 1. Relevant only for document types with chapter support (EPUB currently). Other documents will return 1. - :type: int + :type: int - .. Attribute:: last_location + .. Attribute:: last_location - * New in v1.17.0 + * New in v1.17.0 - Contains (chapter, pno) of the document's last page. Relevant only for document types with chapter support (EPUB currently). Other documents will return `(0, page_count - 1)` and `(0, -1)` if it has no pages. + Contains (chapter, pno) of the document's last page. Relevant only for document types with chapter support (EPUB currently). Other documents will return `(0, page_count - 1)` and `(0, -1)` if it has no pages. - :type: int + :type: int - .. Attribute:: FormFonts + .. Attribute:: FormFonts - A list of form field font names defined in the */AcroForm* object. `None` if not a PDF. + A list of form field font names defined in the */AcroForm* object. `None` if not a PDF. - :type: list + :type: list .. NOTE:: For methods that change the structure of a PDF (:meth:`insert_pdf`, :meth:`select`, :meth:`copy_page`, :meth:`delete_page` and others), be aware that objects or properties in your program may have been invalidated or orphaned. Examples are :ref:`Page` objects and their children (links, annotations, widgets), variables holding old page counts, tables of content and the like. Remember to keep such variables up to date or delete orphaned objects. Also refer to :ref:`ReferenialIntegrity`. @@ -2004,8 +2177,8 @@ For details on **embedded files** refer to Appendix 3. ------------------------------- Clear metadata information. If you do this out of privacy / data protection concerns, make sure you save the document as a new file with *garbage > 0*. Only then the old */Info* object will also be physically removed from the file. In this case, you may also want to clear any XML metadata inserted by several PDF editors: ->>> import fitz ->>> doc=fitz.open("pymupdf.pdf") +>>> import pymupdf +>>> doc=pymupdf.open("pymupdf.pdf") >>> doc.metadata # look at what we currently have {'producer': 'rst2pdf, reportlab', 'format': 'PDF 1.4', 'encryption': None, 'author': 'Jorj X. McKie', 'modDate': "D:20160611145816-04'00'", 'keywords': 'PDF, XPS, EPUB, CBZ', @@ -2016,15 +2189,15 @@ Clear metadata information. If you do this out of privacy / data protection conc {'producer': 'none', 'format': 'PDF 1.4', 'encryption': None, 'author': 'none', 'modDate': 'none', 'keywords': 'none', 'title': 'none', 'creationDate': 'none', 'creator': 'none', 'subject': 'none'} ->>> doc._delXmlMetadata() # clear any XML metadata +>>> doc.del_xml_metadata() # clear any XML metadata >>> doc.save("anonymous.pdf", garbage = 4) # save anonymized doc :meth:`set_toc` Demonstration ---------------------------------- This shows how to modify or add a table of contents. Also have a look at `import.py `_ and `export.py `_ in the examples directory. ->>> import fitz ->>> doc = fitz.open("test.pdf") +>>> import pymupdf +>>> doc = pymupdf.open("test.pdf") >>> toc = doc.get_toc() >>> for t in toc: print(t) # show what we have [1, 'The PyMuPDF Documentation', 1] @@ -2044,8 +2217,8 @@ This shows how to modify or add a table of contents. Also have a look at `import ---------------------------- **(1) Concatenate two documents including their TOCs:** ->>> doc1 = fitz.open("file1.pdf") # must be a PDF ->>> doc2 = fitz.open("file2.pdf") # must be a PDF +>>> doc1 = pymupdf.open("file1.pdf") # must be a PDF +>>> doc2 = pymupdf.open("file2.pdf") # must be a PDF >>> pages1 = len(doc1) # save doc1's page count >>> toc1 = doc1.get_toc(False) # save TOC 1 >>> toc2 = doc2.get_toc(False) # save TOC 2 @@ -2075,11 +2248,11 @@ Other Examples imglist = doc.get_page_images(i) for img in imglist: xref = img[0] # xref number - pix = fitz.Pixmap(doc, xref) # make pixmap from image + pix = pymupdf.Pixmap(doc, xref) # make pixmap from image if pix.n - pix.alpha < 4: # can be saved as PNG pix.save("p%s-%s.png" % (i, xref)) else: # CMYK: must convert first - pix0 = fitz.Pixmap(fitz.csRGB, pix) + pix0 = pymupdf.Pixmap(pymupdf.csRGB, pix) pix0.save("p%s-%s.png" % (i, xref)) pix0 = None # free Pixmap resources pix = None # free Pixmap resources @@ -2100,7 +2273,7 @@ Other Examples .. [#f5] Examples for "Form XObjects" are created by :meth:`Page.show_pdf_page`. -.. [#f6] For a *False* the **complete document** must be scanned. Both methods **do not load pages,** but only scan object definitions. This makes them at least 10 times faster than application-level loops (where total response time roughly equals the time for loading all pages). For the :ref:`AdobeManual` (756 pages) and the Pandas documentation (over 3070 pages) -- both have no annotations -- the method needs about 11 ms for the answer *False*. So response times will probably become significant only well beyond this order of magnitude. +.. [#f6] For a ``False`` the **complete document** must be scanned. Both methods **do not load pages,** but only scan object definitions. This makes them at least 10 times faster than application-level loops (where total response time roughly equals the time for loading all pages). For the :ref:`AdobeManual` (756 pages) and the Pandas documentation (over 3070 pages) -- both have no annotations -- the method needs about 11 ms for the answer ``False``. So response times will probably become significant only well beyond this order of magnitude. .. [#f7] This only works under certain conditions. For example, if there is normal text covered by some image on top of it, then this is undetectable and the respective text is **not** removed. Similar is true for white text on white background, and so on. diff --git a/docs/font.rst b/docs/font.rst index 355db87d7..875213c17 100644 --- a/docs/font.rst +++ b/docs/font.rst @@ -8,9 +8,9 @@ Font * New in v1.16.18 -This class represents a font as defined in MuPDF (*fz_font_s* structure). It is required for the new class :ref:`TextWriter` and the new :meth:`Page.write_text`. Currently, it has no connection to how fonts are used in methods :meth:`Page.insert_text` or :meth:`Page.insert_textbox`, respectively. +This class represents a font as defined in |MuPDF| (``fz_font_s`` structure). It is required for the new class :ref:`TextWriter` and the new :meth:`Page.write_text`. Currently, it has no connection to how fonts are used in methods :meth:`Page.insert_text` or :meth:`Page.insert_textbox`, respectively. -A Font object also contains useful general information, like the font bbox, the number of defined glyphs, glyph names or the bbox of a single glyph. +A ``Font`` object also contains useful general information, like the font bbox, the number of defined glyphs, glyph names or the ``bbox`` of a single glyph. ==================================== ============================================ @@ -31,7 +31,10 @@ A Font object also contains useful general information, like the font bbox, the :attr:`~Font.flags` Collection of font properties :attr:`~Font.glyph_count` Number of supported glyphs :attr:`~Font.name` Name of font -:attr:`~Font.is_writable` Font usable with :ref:`TextWriter` +:attr:`~Font.is_bold` `True` if bold +:attr:`~Font.is_monospaced` `True` if mono-spaced +:attr:`~Font.is_serif` `True` if serif, `False` if sans-serif +:attr:`~Font.is_italic` `True` if italic ==================================== ============================================ @@ -71,7 +74,7 @@ A Font object also contains useful general information, like the font bbox, the :arg bool is_italic: look for an italic font. :arg bool is_serif: look for a serifed font. - :returns: a MuPDF font if successful. This is the overall sequence of checks to determine an appropriate font: + :returns: a |MuPDF| font if successful. This is the overall sequence of checks to determine an appropriate font: =========== ============================================================ Argument Action @@ -135,9 +138,9 @@ A Font object also contains useful general information, like the font bbox, the .. method:: has_glyph(chr, language=None, script=0, fallback=False) - Check whether the unicode *chr* exists in the font or (option) some fallback font. May be used to check whether any "TOFU" symbols will appear on output. + Check whether the unicode ``chr`` exists in the font or (option) some fallback font. May be used to check whether any "TOFU" symbols will appear on output. - :arg int chr: the unicode of the character (i.e. *ord()*). + :arg int chr: the unicode of the character (i.e. ``ord()``). :arg str language: the language -- currently unused. :arg int script: the UCDN script number. :arg bool fallback: *(new in v1.17.5)* perform an extended search in fallback fonts or restrict to current font (default). @@ -149,10 +152,10 @@ A Font object also contains useful general information, like the font bbox, the Return an array of unicodes supported by this font. - :returns: an *array.array* [#f2]_ of length at most :attr:`Font.glyph_count`. I.e. *chr()* of every item in this array has a glyph in the font without using fallbacks. This is an example display of the supported glyphs: + :returns: an ``array.array`` [#f2]_ of length at most :attr:`Font.glyph_count`. I.e. ``chr()`` of every item in this array has a glyph in the font without using fallbacks. This is an example display of the supported glyphs: - >>> import fitz - >>> font = fitz.Font("math") + >>> import pymupdf + >>> font = pymupdf.Font("math") >>> vuc = font.valid_codepoints() >>> for i in vuc: print("%04X %s (%s)" % (i, chr(i), font.unicode_to_glyph_name(i))) @@ -188,8 +191,8 @@ A Font object also contains useful general information, like the font bbox, the Calculate the "width" of the character's glyph (visual representation). - :arg int chr: the unicode number of the character. Use *ord()*, not the character itself. Again, this should normally work even if a character is not supported by that font, because fallback fonts will be checked where necessary. - :arg int wmode: write mode, 0 = horizontal, 1 = vertical. + :arg int chr: the unicode number of the character. Use ``ord()``, not the character itself. Again, this should normally work even if a character is not supported by that font, because fallback fonts will be checked where necessary. + :arg int wmode: write mode, ``0`` = horizontal, ``1`` = vertical. The other parameters are not in use currently. @@ -203,7 +206,7 @@ A Font object also contains useful general information, like the font bbox, the :returns: The unicode integer, or 65533 = 0xFFFD if the name is unknown. Examples: `font.glyph_name_to_unicode("Sigma") = 931`, `font.glyph_name_to_unicode("sigma") = 963`. Refer to the `Adobe Glyph List `_ publication for a list of glyph names and their unicode numbers. Example: - >>> font = fitz.Font("helv") + >>> font = pymupdf.Font("helv") >>> font.has_glyph(font.glyph_name_to_unicode("infinity")) True @@ -213,9 +216,9 @@ A Font object also contains useful general information, like the font bbox, the .. method:: glyph_bbox(chr, language=None, script=0) - The glyph rectangle relative to fontsize 1. + The glyph rectangle relative to :data:`fontsize` 1. - :arg int chr: *ord()* of the character. + :arg int chr: ``ord()`` of the character. :returns: a :ref:`Rect`. @@ -224,11 +227,11 @@ A Font object also contains useful general information, like the font bbox, the Show the name of the character's glyph. - :arg int ch: the unicode number of the character. Use *ord()*, not the character itself. + :arg int ch: the unicode number of the character. Use ``ord()``, not the character itself. :returns: a string representing the glyph's name. E.g. `font.glyph_name(ord("#")) = "numbersign"`. For an invalid code ".notfound" is returned. - .. note:: *(Changed in v1.18.0)* This method and :meth:`Font.glyph_name_to_unicode` no longer depend on a font and instead retrieve information from the **Adobe Glyph List**. Also available as `fitz.unicode_to_glyph_name()` and resp. `fitz.glyph_name_to_unicode()`. + .. note:: *(Changed in v1.18.0)* This method and :meth:`Font.glyph_name_to_unicode` no longer depend on a font and instead retrieve information from the **Adobe Glyph List**. Also available as `pymupdf.unicode_to_glyph_name()` and resp. `pymupdf.glyph_name_to_unicode()`. .. index:: pair: text_length, fontsize @@ -241,7 +244,7 @@ A Font object also contains useful general information, like the font bbox, the :arg str text: a text string, UTF-8 encoded. - :arg float fontsize: the fontsize. + :arg float fontsize: the :data:`fontsize`. :rtype: float @@ -265,17 +268,17 @@ A Font object also contains useful general information, like the font bbox, the :arg str text: a text string, UTF-8 encoded. - :arg float fontsize: the fontsize. + :arg float fontsize: the :data:`fontsize`. :rtype: tuple :returns: the lengths in points of the characters of a string when stored in the PDF. It works like :meth:`Font.text_length` broken down to single characters. This is a high speed method, used e.g. in :meth:`TextWriter.fill_textbox`. The following is true (allowing rounding errors): `font.text_length(text) == sum(font.char_lengths(text))`. - >>> font = fitz.Font("helv") + >>> font = pymupdf.Font("helv") >>> text = "PyMuPDF" >>> font.text_length(text) 50.115999937057495 - >>> fitz.get_text_length(text, fontname="helv") + >>> pymupdf.get_text_length(text, fontname="helv") 50.115999937057495 >>> sum(font.char_lengths(text)) 50.115999937057495 @@ -337,7 +340,7 @@ A Font object also contains useful general information, like the font bbox, the * New in v1.18.0 - The ascender value of the font, see `here `_ for details. Please note that there is a difference to the strict definition: our value includes everything above the baseline -- not just the height difference between upper case "A" and and lower case "a". + The ascender value of the font, see `ascender typography `_ for details. Please note that there is a difference to the strict definition: our value includes everything above the baseline -- not just the height difference between upper case "A" and and lower case "a". :rtype: float @@ -345,22 +348,26 @@ A Font object also contains useful general information, like the font bbox, the * New in v1.18.0 - The descender value of the font, see `here `_ for details. This value always is negative and is the portion that some glyphs descend below the base line, for example "g" or "y". As a consequence, the value `ascender - descender` is the total height, that every glyph of the font fits into. This is true at least for most fonts -- as always, there are exceptions, especially for calligraphic fonts, etc. + The descender value of the font, see `descender typography `_ for details. This value always is negative and is the portion that some glyphs descend below the base line, for example "g" or "y". As a consequence, the value `ascender - descender` is the total height, that every glyph of the font fits into. This is true at least for most fonts -- as always, there are exceptions, especially for calligraphic fonts, etc. :rtype: float - .. attribute:: is_writable + .. attribute:: is_bold - * New in v1.18.0 + .. attribute:: is_italic + + .. attribute:: is_monospaced + + .. attribute:: is_serif - Indicates whether this font can be used with :ref:`TextWriter`. + A number of attributes with obvious meanings. Reflect some values of the :attr:`Font.flags` dictionary. :rtype: bool .. rubric:: Footnotes -.. [#f1] MuPDF does not support all fontfiles with this feature and will raise exceptions like *"mupdf: FT_New_Memory_Face((null)): unknown file format"*, if it encounters issues. The :ref:`TextWriter` methods check :attr:`Font.is_writable`. +.. [#f1] MuPDF does not support all fontfiles with this feature and will raise exceptions like *"mupdf: FT_New_Memory_Face((null)): unknown file format"*, if it encounters issues. -.. [#f2] The built-in module *array* has been chosen for its speed and its compact representation of values. +.. [#f2] The built-in Python module `array` has been chosen for its speed and low memory requirement. .. include:: footer.rst diff --git a/docs/footer.rst b/docs/footer.rst index ee36058e1..7e652a942 100644 --- a/docs/footer.rst +++ b/docs/footer.rst @@ -1,38 +1,53 @@ -.. raw:: html - - - ---- .. raw:: html +

This software is provided AS-IS with no warranty, either express or implied. This software is distributed under license and may not be copied, modified or distributed except as expressly authorized under the terms of that license. Refer to licensing information at artifex.com or contact Artifex Software Inc., 39 Mesa Street, Suite 108A, San Francisco CA 94129, United States for further information.

+ -

This software is provided AS-IS with no warranty, either express or implied. This software is distributed under license and may not be copied, modified or distributed except as expressly authorized under the terms of that license. Refer to licensing information at artifex.com or contact Artifex Software Inc., 39 Mesa Street, Suite 108A, San Francisco CA 94129, United States for further information.

+.. rst-class:: footer-version + + This documentation covers all versions up to |version|. + + +.. External Links: -.. note - this ensures that the Sphinx build system will pull in the image (as it is referenced in an RST file) to _images, - we don't want to display it via rst markup due to limitations (hence width:0), however we do want it available for our raw HTML - which we use in header.rst. +.. _pdf2docx: https://pdf2docx.readthedocs.io/en/latest/ +.. _pdf2docx extract tables method: https://pdf2docx.readthedocs.io/en/latest/quickstart.table.html -.. image:: images/discord-mark-blue.svg - :alt: Discord logo - :width: 0 - :height: 0 - :target: https://discord.gg/TSpYGBW4eq diff --git a/docs/functions.rst b/docs/functions.rst index a7bbc156e..6f467ba11 100644 --- a/docs/functions.rst +++ b/docs/functions.rst @@ -34,7 +34,7 @@ Yet others are handy, general-purpose utilities. :meth:`EMPTY_RECT` return the (standard) empty / invalid rectangle :meth:`get_pdf_now` return the current timestamp in PDF format :meth:`get_pdf_str` return PDF-compatible string -:meth:`get_text_length` return string length for a given font & fontsize +:meth:`get_text_length` return string length for a given font & :data:`fontsize` :meth:`glyph_name_to_unicode` return unicode from a glyph name :meth:`image_profile` return a dictionary of basic image properties :meth:`INFINITE_IRECT` return the (only existing) infinite rectangle @@ -60,14 +60,16 @@ Yet others are handy, general-purpose utilities. :meth:`recover_char_quad` compute the quad of a char ("rawdict") :meth:`recover_line_quad` compute the quad of a subset of line spans :meth:`recover_quad` compute the quad of a span ("dict", "rawdict") -:meth:`recover_quad` return the quad for a text span ("dict" / "rawdict") :meth:`recover_span_quad` compute the quad of a subset of span characters +:meth:`set_messages` set destination of |PyMuPDF| messages. :meth:`sRGB_to_pdf` return PDF RGB color tuple from an sRGB integer :meth:`sRGB_to_rgb` return (R, G, B) color tuple from an sRGB integer :meth:`unicode_to_glyph_name` return glyph name from a unicode :meth:`get_tessdata` locates the language support of the Tesseract-OCR installation +:meth:`colors_pdf_dict` return dict of color names. +:meth:`colors_wx_list` return list of color names. :attr:`fitz_fontdescriptors` dictionary of available supplement fonts -:attr:`TESSDATA_PREFIX` a copy of `os.environ["TESSDATA_PREFIX"]` +:attr:`PYMUPDF_MESSAGE` destination of |PyMuPDF| messages. :attr:`pdfcolor` dictionary of almost 500 RGB colors in PDF format. ==================================== ============================================================== @@ -82,7 +84,7 @@ Yet others are handy, general-purpose utilities. :arg str s: any format name from above in upper or lower case, like *"A4"* or *"letter-l"*. :rtype: tuple - :returns: *(width, height)* of the paper format. For an unknown format *(-1, -1)* is returned. Examples: *fitz.paper_size("A4")* returns *(595, 842)* and *fitz.paper_size("letter-l")* delivers *(792, 612)*. + :returns: *(width, height)* of the paper format. For an unknown format *(-1, -1)* is returned. Examples: *pymupdf.paper_size("A4")* returns *(595, 842)* and *pymupdf.paper_size("letter-l")* delivers *(792, 612)*. ----- @@ -93,13 +95,48 @@ Yet others are handy, general-purpose utilities. :arg str s: any format name supported by :meth:`paper_size`. :rtype: :ref:`Rect` - :returns: *fitz.Rect(0, 0, width, height)* with *width, height=fitz.paper_size(s)*. + :returns: *pymupdf.Rect(0, 0, width, height)* with *width, height=pymupdf.paper_size(s)*. - >>> import fitz - >>> fitz.paper_rect("letter-l") - fitz.Rect(0.0, 0.0, 792.0, 612.0) + >>> import pymupdf + >>> pymupdf.paper_rect("letter-l") + pymupdf.Rect(0.0, 0.0, 792.0, 612.0) >>> +----- + + .. method:: set_messages(*, text=None, fd=None, stream=None, path=None, path_append=None, pylogging=None, pylogging_logger=None, pylogging_level=None, pylogging_name=None, ) + + Sets destination of |PyMuPDF| messages to a file descriptor, + a file, an existing stream or `Python's logging system + `_. + + Usually one would only set one arg, or one or more `pylogging*` args. + + :arg str text: + A text specification of destination; for details see description of + environmental variable `PYMUPDF_MESSAGE`. + :arg int fd: + Write to file descriptor. + :arg stream: + Write to existing stream, which must have methods `.write(text)` and + `.flush()`. + :arg str path: + Write to a file. + :arg str path_append: + Append to a file. + :arg pylogging: + Write to Python's `logging` system. + :arg logging.Logger pylogging_logger: + Write to Python's `logging` system using specified Logger. + :arg int pylogging_level: + Write to Python's `logging` system using specified level. + :arg str pylogging_name: + Write to Python's `logging` system using specified logger name. + Only used if `pylogging_logger` is ``None``. Default is `pymupdf`. + + If any `pylogging*` arg is not ``None``, we write to `Python's logging system + `_. + ----- .. method:: sRGB_to_pdf(srgb) @@ -135,7 +172,7 @@ Yet others are handy, general-purpose utilities. :arg str name: the name of some glyph. The function is based on the `Adobe Glyph List `_. :rtype: int - :returns: the unicode. Invalid *name* entries return `0xfffd (65533)`. + :returns: the unicode. Invalid ``name`` entries return `0xfffd (65533)`. .. note:: A similar functionality is provided by package `fontTools `_ in its *agl* sub-package. @@ -150,7 +187,7 @@ Yet others are handy, general-purpose utilities. :arg int ch: the unicode given by e.g. `ord("ß")`. The function is based on the `Adobe Glyph List `_. :rtype: str - :returns: the glyph name. E.g. `fitz.unicode_to_glyph_name(ord("Ä"))` returns `'Adieresis'`. + :returns: the glyph name. E.g. `pymupdf.unicode_to_glyph_name(ord("Ä"))` returns `'Adieresis'`. .. note:: A similar functionality is provided by package `fontTools `_: in its *agl* sub-package. @@ -205,7 +242,7 @@ Yet others are handy, general-purpose utilities. For example to replace the "sans-serif" HTML standard (i.e. Helvetica) with the above "notos", execute the following. Whenever "sans-serif" is used (whether explicitly or implicitly), the Noto Sans fonts will be selected. - `CSS = fitz.css_for_pymupdf_font("notos", name="sans-serif", archive=...)` + `CSS = pymupdf.css_for_pymupdf_font("notos", name="sans-serif", archive=...)` Expects and returns the CSS source, with the new CSS definitions appended. @@ -215,27 +252,14 @@ Yet others are handy, general-purpose utilities. :arg str name: the name under which the "fontcode" fonts should be found. If omitted, "fontcode" will be used. :rtype: str - :returns: Modified CSS, with appended `@font-face` statements for each font variant of fontcode. Fontbuffers associated with "fontcode" will have been added to 'archive'. The function will automatically find up to 4 font variants. All pymupdf-fonts (that are no special purpose like math or music, etc.) have regular, bold, italic and bold-italic variants. To see currently available font codes check `fitz.fitz_fontdescriptors.keys()`. This will show something like `dict_keys(['cascadia', 'cascadiai', 'cascadiab', 'cascadiabi', 'figbo', 'figo', 'figbi', 'figit', 'fimbo', 'fimo', 'spacembo', 'spacembi', 'spacemit', 'spacemo', 'math', 'music', 'symbol1', 'symbol2', 'notosbo', 'notosbi', 'notosit', 'notos', 'ubuntu', 'ubuntubo', 'ubuntubi', 'ubuntuit', 'ubuntm', 'ubuntmbo', 'ubuntmbi', 'ubuntmit'])`. + :returns: Modified CSS, with appended `@font-face` statements for each font variant of fontcode. Fontbuffers associated with "fontcode" will have been added to 'archive'. The function will automatically find up to 4 font variants. All pymupdf-fonts (that are no special purpose like math or music, etc.) have regular, bold, italic and bold-italic variants. To see currently available font codes check `pymupdf.fitz_fontdescriptors.keys()`. This will show something like `dict_keys(['cascadia', 'cascadiai', 'cascadiab', 'cascadiabi', 'figbo', 'figo', 'figbi', 'figit', 'fimbo', 'fimo', 'spacembo', 'spacembi', 'spacemit', 'spacemo', 'math', 'music', 'symbol1', 'symbol2', 'notosbo', 'notosbi', 'notosit', 'notos', 'ubuntu', 'ubuntubo', 'ubuntubi', 'ubuntuit', 'ubuntm', 'ubuntmbo', 'ubuntmbi', 'ubuntmit'])`. Here is a complete snippet for using the "Noto Sans" font instead of "Helvetica":: - arch = fitz.Archive() - CSS = fitz.css_for_pymupdf_font("notos", name="sans-serif", archive=arch) - story = fitz.Story(user_css=CSS, archive=arch) - - ------ - - .. method:: recover_quad(line_dir, span) - - *New in v1.18.9* + arch = pymupdf.Archive() + CSS = pymupdf.css_for_pymupdf_font("notos", name="sans-serif", archive=arch) + story = pymupdf.Story(user_css=CSS, archive=arch) - Convenience function returning the quadrilateral enveloping the text of a text span, as returned by :meth:`Page.get_text` using the "dict" or "rawdict" options. - - :arg tuple line_dict: the value `line["dir"]` of the span's line. - :arg dict span: the span sub-dictionary. - - :returns: the quadrilateral of the span's text. ----- @@ -245,12 +269,12 @@ Yet others are handy, general-purpose utilities. *New in v1.17.4* - Convenience function to split a rectangle into sub-rectangles. Returns a list of *rows* lists, each containing *cols* :ref:`Rect` items. Each sub-rectangle can then be addressed by its row and column index. + Convenience function to split a rectangle into sub-rectangles of equal size. Returns a list of `rows` lists, each containing `cols` :ref:`Rect` items. Each sub-rectangle can then be addressed by its row and column index. :arg rect_like rect: the rectangle to split. :arg int cols: the desired number of columns. :arg int rows: the desired number of rows. - :returns: a list of :ref:`Rect` objects of equal size, whose union equals *rect*. Here is the layout of a 3x4 table created by `cell = fitz.make_table(rect, cols=4, rows=3)`: + :returns: a list of :ref:`Rect` objects of equal size, whose union equals *rect*. Here is the layout of a 3x4 table created by `cell = pymupdf.make_table(rect, cols=4, rows=3)`: .. image:: images/img-make-table.* :scale: 60 @@ -270,11 +294,11 @@ Yet others are handy, general-purpose utilities. :rtype: :ref:`Matrix` :returns: a matrix which combines a rotation and a translation:: - >>> p1 = fitz.Point(1, 1) - >>> p2 = fitz.Point(4, 5) + >>> p1 = pymupdf.Point(1, 1) + >>> p2 = pymupdf.Point(4, 5) >>> abs(p2 - p1) # distance of points 5.0 - >>> m = fitz.planish_line(p1, p2) + >>> m = pymupdf.planish_line(p1, p2) >>> p1 * m Point(0.0, 0.0) >>> p2 * m @@ -302,11 +326,11 @@ Yet others are handy, general-purpose utilities. A dictionary of usable fonts from repository `pymupdf-fonts `_. Items are keyed by their reserved fontname and provide information like this:: - In [2]: fitz.fitz_fontdescriptors.keys() + In [2]: pymupdf.fitz_fontdescriptors.keys() Out[2]: dict_keys(['figbo', 'figo', 'figbi', 'figit', 'fimbo', 'fimo', 'spacembo', 'spacembi', 'spacemit', 'spacemo', 'math', 'music', 'symbol1', 'symbol2']) - In [3]: fitz.fitz_fontdescriptors["fimo"] + In [3]: pymupdf.fitz_fontdescriptors["fimo"] Out[3]: {'name': 'Fira Mono Regular', 'size': 125712, @@ -318,19 +342,43 @@ Yet others are handy, general-purpose utilities. If `pymupdf-fonts` is not installed, the dictionary is empty. - The dictionary keys can be used to define a :ref:`Font` via e.g. `font = fitz.Font("fimo")` -- just like you can do it with the builtin fonts "Helvetica" and friends. - ------ - - .. attribute:: TESSDATA_PREFIX - - * New in v1.19.4 - - Copy of `os.environ["TESSDATA_PREFIX"]` for convenient checking whether there is integrated Tesseract OCR support. + The dictionary keys can be used to define a :ref:`Font` via e.g. `font = pymupdf.Font("fimo")` -- just like you can do it with the builtin fonts "Helvetica" and friends. + +----- + + .. attribute:: PYMUPDF_MESSAGE + + If in `os.environ` when |PyMuPDF| is imported, sets destination of + |PyMuPDF| messages. Otherwise messages are sent to `sys.stdout`. + + * + If the value starts with `fd:`, the remaining text should be an integer + file descriptor to which messages are written. + + * For example `PYMUPDF_MESSAGE=fd:2` will send messages to stderr. + * + If the value starts with `path:`, the remaining text is the path of a + file to which messages are written. If the file already exists, it is + truncated. + * + If the value starts with `path+:`, the remaining text is the path of + file to which messages are written. If the file already exists, we + append output. + + * + If the value starts with `logging:`, messages are written to `Python's + logging system `_. The + remaining text can contain comma-separated name=value items: + + * `level=` sets the logging level. + * `name=` sets the logger name (default is `pymupdf`). + + Other items are ignored. + + * Other prefixes will cause an error. + + Also see `set_messages()`. - If this attribute is `None`, Tesseract-OCR is either not installed, or the environment variable is not set to point to Tesseract's language support folder. - - .. note:: This variable is now checked before OCR functions are tried. This prevents verbose messages from MuPDF. ----- @@ -338,13 +386,13 @@ Yet others are handy, general-purpose utilities. * New in v1.19.6 - Contains about 500 RGB colors in PDF format with the color name as key. To see what is there, you can obviously look at `fitz.pdfcolor.keys()`. + Contains about 500 RGB colors in PDF format with the color name as key. To see what is there, you can obviously look at `pymupdf.pdfcolor.keys()`. Examples: - * `fitz.pdfcolor["red"] = (1.0, 0.0, 0.0)` - * `fitz.pdfcolor["skyblue"] = (0.5294117647058824, 0.807843137254902, 0.9215686274509803)` - * `fitz.pdfcolor["wheat"] = (0.9607843137254902, 0.8705882352941177, 0.7019607843137254)` + * `pymupdf.pdfcolor["red"] = (1.0, 0.0, 0.0)` + * `pymupdf.pdfcolor["skyblue"] = (0.5294117647058824, 0.807843137254902, 0.9215686274509803)` + * `pymupdf.pdfcolor["wheat"] = (0.9607843137254902, 0.8705882352941177, 0.7019607843137254)` ----- @@ -361,11 +409,11 @@ Yet others are handy, general-purpose utilities. * New in version 1.14.7 - Calculate the length of text on output with a given **builtin** font, fontsize and encoding. + Calculate the length of text on output with a given **builtin** font, :data:`fontsize` and encoding. :arg str text: the text string. - :arg str fontname: the fontname. Must be one of either the :ref:`Base-14-Fonts` or the CJK fonts, identified by their "reserved" fontnames (see table in :meth:`Page.insert_font`). - :arg float fontsize: the fontsize. + :arg str fontname: the font name. Must be one of either the :ref:`Base-14-Fonts` or the CJK fonts, identified by their "reserved" fontnames (see table in :meth:`Page.insert_font`). + :arg float fontsize: the :data:`fontsize`. :arg int encoding: the encoding to use. Besides 0 = Latin, 1 = Greek and 2 = Cyrillic (Russian) are available. Relevant for Base-14 fonts "Helvetica", "Courier" and "Times" and their variants only. Make sure to use the same value as in the corresponding text insertion. :rtype: float :returns: the length in points the string will have (e.g. when used in :meth:`Page.insert_text`). @@ -374,7 +422,7 @@ Yet others are handy, general-purpose utilities. .. note:: The :ref:`Font` class offers a similar method, :meth:`Font.text_length`, which supports Base-14 fonts and any font with a character map (CMap, Type 0 fonts). - .. warning:: If you use this function to determine the required rectangle width for the (:ref:`Page` or :ref:`Shape`) *insert_textbox* methods, be aware that they calculate on a **by-character level**. Because of rounding effects, this will mostly lead to a slightly larger number: *sum([fitz.get_text_length(c) for c in text]) > fitz.get_text_length(text)*. So either (1) do the same, or (2) use something like *fitz.get_text_length(text + "'")* for your calculation. + .. warning:: If you use this function to determine the required rectangle width for the (:ref:`Page` or :ref:`Shape`) *insert_textbox* methods, be aware that they calculate on a **by-character level**. Because of rounding effects, this will mostly lead to a slightly larger number: *sum([pymupdf.get_text_length(c) for c in text]) > pymupdf.get_text_length(text)*. So either (1) do the same, or (2) use something like *pymupdf.get_text_length(text + "'")* for your calculation. ----- @@ -403,7 +451,7 @@ Yet others are handy, general-purpose utilities. :returns: No exception is ever raised. In case of an error, `None` is returned. Otherwise, there are the following items:: - In [2]: fitz.image_profile(open("nur-ruhig.jpg", "rb").read()) + In [2]: pymupdf.image_profile(open("nur-ruhig.jpg", "rb").read()) Out[2]: {'width': 439, 'height': 501, @@ -432,7 +480,7 @@ Yet others are handy, general-purpose utilities. .. note:: * For some "exotic" images (FAX encodings, RAW formats and the like), this method will not work. You can however still work with such images in PyMuPDF, e.g. by using :meth:`Document.extract_image` or create pixmaps via `Pixmap(doc, xref)`. These methods will automatically convert exotic images to the PNG format before returning results. - * You can also get the properties of images embedded in a PDF, via their :data:`xref`. In this case make sure to extract the raw stream: `fitz.image_profile(doc.xref_stream_raw(xref))`. + * You can also get the properties of images embedded in a PDF, via their :data:`xref`. In this case make sure to extract the raw stream: `pymupdf.image_profile(doc.xref_stream_raw(xref))`. * Images as returned by the image blocks of :meth:`Page.get_text` using "dict" or "rawdict" options are also supported. @@ -559,7 +607,7 @@ Yet others are handy, general-purpose utilities. 1. Information above tagged with "(1)" has the same meaning and value as explained in :ref:`TextPage`. - - Please note that the font `flags` value will never contain a *superscript* flag bit: the detection of superscripts is done within MuPDF :ref:`TextPage` code -- it is not a property of any font. + - Please note that the font ``flags`` value will never contain a *superscript* flag bit: the detection of superscripts is done within MuPDF :ref:`TextPage` code -- it is not a property of any font. - Also note, that the text *color* is encoded as the usual tuple of floats 0 <= f <= 1 -- not in sRGB format. Depending on `span["type"]`, interpret this as fill color or stroke color. 2. There are 3 text span types: @@ -568,7 +616,7 @@ Yet others are handy, general-purpose utilities. - 1: Stroked text -- equivalent to `1 Tr`, only the character borders are shown. - 3: Ignored text -- equivalent to `3 Tr` (hidden text). - 3. Line width in this context is important only for processing `span["type"] != 0`: it determines the thickness of the character's border line. This value may not be provided at all with the text data. In this case, a value of 5% of the fontsize (`span["size"] * 0,05`) is generated. Often, an "artificial" bold text in PDF is created by `2 Tr`. There is no equivalent span type for this case. Instead, respective text is represented by two consecutive spans -- which are identical in every aspect, except for their types, which are 0, resp 1. It is your responsibility to handle this type of situation - in :meth:`Page.get_text`, MuPDF is doing this for you. + 3. Line width in this context is important only for processing `span["type"] != 0`: it determines the thickness of the character's border line. This value may not be provided at all with the text data. In this case, a value of 5% of the :data:`fontsize` (`span["size"] * 0,05`) is generated. Often, an "artificial" bold text in PDF is created by `2 Tr`. There is no equivalent span type for this case. Instead, respective text is represented by two consecutive spans -- which are identical in every aspect, except for their types, which are 0, resp 1. It is your responsibility to handle this type of situation - in :meth:`Page.get_text`, MuPDF is doing this for you. 4. For data compactness, the character's unicode is provided here. Use built-in function `chr()` for the character itself. 5. The alpha / opacity value of the span's text, `0 <= opacity <= 1`, 0 is invisible text, 1 (100%) is intransparent. Depending on `span["type"]`, interpret this value as *fill* opacity or, resp. *stroke* opacity. 6. *(Changed in v1.19.0)* This value is equal or close to `char["bbox"]` of "rawdict". In particular, the bbox **height** value is always computed as if **"small glyph heights"** had been requested. @@ -603,7 +651,7 @@ Yet others are handy, general-purpose utilities. So you may want to replace the two example tuples above by the following single one: `(0xFB01, glyph, (x, y), (x0, y0, x1, y1))` (there is usually no need to lookup the correct glyph id for 0xFB01 in the resp. font, but you may execute `font.has_glyph(0xFB01)` and use its return value). - * **Changed in v1.19.3:** Similar to other text extraction methods, the character and span bboxes envelop the character quads. To recover the quads, follow the same methods :meth:`recover_quad`, :meth:`recover_char_quad` or :meth:´recover_span_quad` as explained in :ref:`textpagedict`. Use either `None` or `span["dir"]` for the writing direction. + * **Changed in v1.19.3:** Similar to other text extraction methods, the character and span bboxes envelop the character quads. To recover the quads, follow the same methods :meth:`recover_quad`, :meth:`recover_char_quad` or :meth:`recover_span_quad` as explained in :ref:`textpagedict`. Use either `None` or `span["dir"]` for the writing direction. * **Changed in v1.21.1:** If applicable, the name of the OCG is shown in `"layer"`. @@ -611,15 +659,17 @@ Yet others are handy, general-purpose utilities. .. method:: Page.wrap_contents() - Put string pair "q" / "Q" before, resp. after a page's */Contents* object(s) to ensure that any "geometry" changes are **local** only. + Ensures that the page's so-called graphics state is balanced and new content can be inserted correctly. + + In versions 1.24.1+ of PyMuPDF the method was improved and is being executed automatically as required, so you should no longer need to concern yourself with it. - Use this method as an alternative, minimalist version of :meth:`Page.clean_contents`. Its advantage is a small footprint in terms of processing time and impact on the data size of incremental saves. Multiple executions of this method are no problem and have no functional impact: `b"q q contents Q Q"` is treated like `b"q contents Q"`. + We discourage using :meth:`Page.clean_contents` to achieve this. ----- .. attribute:: Page.is_wrapped - Indicate whether :meth:`Page.wrap_contents` may be required for object insertions in standard PDF geometry. Note that this is a quick, basic check only: a value of *False* may still be a false alarm. But nevertheless executing :meth:`Page.wrap_contents` will have no negative side effects. + Indicate whether the page's so-called graphic state is balanced. If `False`, :meth:`Page.wrap_contents` should be executed if new content is inserted (only relevant in `overlay=True` mode). In newer versions (1.24.1+), this check and corresponding adjustments are automatically executed -- you therefore should not be concerned about this anymore. :rtype: bool @@ -633,7 +683,7 @@ Yet others are handy, general-purpose utilities. ----- - .. method:: Page.get_text_words(flags=None) + .. method:: Page.get_text_words(flags=None, delimiters=None) Deprecated wrapper for :meth:`TextPage.extractWORDS`. Use :meth:`Page.get_text` with the "words" option instead. @@ -652,7 +702,7 @@ Yet others are handy, general-purpose utilities. .. method:: Page.get_contents() - PDF only: Retrieve a list of :data:`xref` of :data:`contents` objects of a page. May be empty or contain multiple integers. If the page is cleaned (:meth:`Page.clean_contents`), it will be one entry at most. The "source" of each `/Contents` object can be individually read by :meth:`Document.xref_stream` using an item of this list. Method :meth:`Page.read_contents` in contrast walks through this list and concatenates the corresponding sources into one `bytes` object. + PDF only: Retrieve a list of :data:`xref` of :data:`contents` objects of a page. May be empty or contain multiple integers. If the page is cleaned (:meth:`Page.clean_contents`), it will be no more than one entry. The "source" of each `/Contents` object can be individually read by :meth:`Document.xref_stream` using an item of this list. Method :meth:`Page.read_contents` in contrast walks through this list and concatenates the corresponding sources into one `bytes` object. :rtype: list[int] @@ -676,6 +726,8 @@ Yet others are handy, general-purpose utilities. .. warning:: This is a complex function which may generate large amounts of new data and render old data unused. It is **not recommended** using it together with the **incremental save** option. Also note that the resulting singleton new */Contents* object is **uncompressed**. So you should save to a **new file** using options *"deflate=True, garbage=3"*. + Do not any longer use this method to ensure correct insertions on PDF pages. Since PyMuPDF version 1.24.2 this is taken care of automatically. + ----- .. method:: Page.read_contents() @@ -703,7 +755,7 @@ Yet others are handy, general-purpose utilities. :arg int limit: limits the number of returned entries. The default of 256 is enforced for all fonts that only support 1-byte characters, so-called "simple fonts" (checked by this method). All :ref:`Base-14-Fonts` are simple fonts. :rtype: list - :returns: a list of *limit* tuples. Each character *c* has an entry *(g, w)* in this list with an index of *ord(c)*. Entry *g* (integer) of the tuple is the glyph id of the character, and float *w* is its normalized width. The actual width for some fontsize can be calculated as *w * fontsize*. For simple fonts, the *g* entry can always be safely ignored. In all other cases *g* is the basis for graphically representing *c*. + :returns: a list of *limit* tuples. Each character *c* has an entry *(g, w)* in this list with an index of *ord(c)*. Entry *g* (integer) of the tuple is the glyph id of the character, and float *w* is its normalized width. The actual width for some :data:`fontsize` can be calculated as *w * fontsize*. For simple fonts, the *g* entry can always be safely ignored. In all other cases *g* is the basis for graphically representing *c*. This function calculates the pixel width of a string called *text*:: @@ -719,11 +771,11 @@ Yet others are handy, general-purpose utilities. * New in version 1.14.14 - PDF only: Check whether the object represented by :data:`xref` is a :data:`stream` type. Return is *False* if not a PDF or if the number is outside the valid xref range. + PDF only: Check whether the object represented by :data:`xref` is a :data:`stream` type. Return is ``False`` if not a PDF or if the number is outside the valid xref range. :arg int xref: :data:`xref` number. - :returns: *True* if the object definition is followed by data wrapped in keyword pair *stream*, *endstream*. + :returns: ``True`` if the object definition is followed by data wrapped in keyword pair *stream*, *endstream*. ----- @@ -772,7 +824,7 @@ Yet others are handy, general-purpose utilities. :arg tuple line_dir: `line["dir"]` of the owning line. Use `None` for a span from :meth:`Page.get_texttrace`. :arg dict span: the span. - :arg list chars: the characters to consider. If omitted, identical to :meth:`recoer_span`. If given, the selected extraction option must be "rawdict". + :arg list chars: the characters to consider. If given, the selected extraction option must be "rawdict". :returns: the :ref:`Quad` of the selected characters, usable for text marker annotations ('Highlight', etc.). ----- @@ -787,13 +839,22 @@ Yet others are handy, general-purpose utilities. ----- - .. method:: get_tessdata() + .. method:: get_tessdata(tessdata=None) + + Detect Tesseract language support folder. - Return the name of Tesseract's language support folder. Use this function if the environment variable `TESSDATA_PREFIX` has not been set. + This function is used to enable OCR via Tesseract even if the language + support folder is not specified directly or in environment variable + TESSDATA_PREFIX. - :returns: `os.getenv("TESSDATA_PREFIX")` if not `None`. Otherwise, if Tesseract-OCR is installed, locate the name of `tessdata`. If no installation is found, return `False`. + * If is set we return it directly. + + * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set. + + * Otherwise we search for a Tesseract installation and return its language + support folder. - The folder name can be used as parameter `tessdata` in methods :meth:`Page.get_textpage_ocr`, :meth:`Pixmap.pdfocr_save` and :meth:`Pixmap.pdfocr_tobytes`. + * Otherwise we raise an exception. ----- @@ -815,4 +876,17 @@ Yet others are handy, general-purpose utilities. Return the "standard" empty and invalid rectangle `Rect(2147483520.0, 2147483520.0, -2147483648.0, -2147483648.0)` resp. quad. Its top-left and bottom-right point values are reversed compared to the infinite rectangle. It will e.g. be used to indicate empty bboxes in `page.get_text("dict")` dictionaries. There are however infinitely many empty or invalid rectangles. +----- + + .. method:: colors_pdf_dict() + + Returns a dict mapping lower-case color name to `(red, green, blue)` + tuple, and `red`, `green`, `blue` are floats in range 0..1. + + .. method:: colors_wx_list() + + Returns a list of `(colorname, red, green, blue)` tuples, where + `colorname` is upper case and `red`, `green`, `blue` are integers in + range 0..255. + .. include:: footer.rst diff --git a/docs/glossary.rst b/docs/glossary.rst index 46d8b6030..057cf448c 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -6,6 +6,10 @@ Glossary ============== +.. data:: coordinate + + This is an essential general mathematical / geometrical term for understanding this documentation. Please see this section for a more detailed discussion: :ref:`Coordinates`. + .. data:: matrix_like A Python sequence of 6 numbers. @@ -136,7 +140,12 @@ Glossary .. data:: xref - Abbreviation for cross-reference number: this is an integer unique identification for objects in a PDF. There exists a cross-reference table (which may physically consist of several separate segments) in each PDF, which stores the relative position of each object for quick lookup. The cross-reference table is one entry longer than the number of existing object: item zero is reserved and must not be used in any way. Many PyMuPDF classes have an *xref* attribute (which is zero for non-PDFs), and one can find out the total number of objects in a PDF via :meth:`Document.xref_length` *- 1*. + Abbreviation for cross-reference number: this is an integer unique identification for objects in a PDF. There exists a cross-reference table (which may physically consist of several separate segments) in each PDF, which stores the relative position of each object for quick lookup. The cross-reference table is one entry longer than the number of existing object: item zero is reserved and must not be used in any way. Many PyMuPDF classes have an :data:`xref` attribute (which is zero for non-PDFs), and one can find out the total number of objects in a PDF via :meth:`Document.xref_length` *- 1*. + + +.. data:: fontsize + + When referring to font size this metric is measured in points where 1 inch = 72 points. .. data:: resolution diff --git a/docs/header-404.rst b/docs/header-404.rst new file mode 100644 index 000000000..8202cb898 --- /dev/null +++ b/docs/header-404.rst @@ -0,0 +1,16 @@ +.. meta:: + :author: Artifex + :description: PyMuPDF is a high-performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents. + :keywords: PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, PDF Splitting, PDF Creation, Pyodide, PyScript + + +.. raw:: html + + +
+ +
diff --git a/docs/header.rst b/docs/header.rst index 98c0062e1..d44778e8c 100644 --- a/docs/header.rst +++ b/docs/header.rst @@ -1,52 +1,176 @@ +.. meta:: + :author: Artifex + :description: PyMuPDF is a high-performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents. + :keywords: PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, PDF Splitting, PDF Creation, Pyodide, PyScript + + +.. |history_begin| raw:: html + +
+ Show/hide history + +.. |history_end| raw:: html + +
+ +.. |pdf_only_class| raw:: html + +
This class is for PDF only.
+ + +.. names of common things + +.. |PyMuPDF| raw:: html + + PyMuPDF + +.. |PyMuPDF Pro| raw:: html + + PyMuPDF Pro + +.. |PDF| raw:: html + + PDF + +.. |PyMuPDF4LLM| raw:: html + + PyMuPDF4LLM + +.. |Markdown| raw:: html + + Markdown + +.. |MuPDF| raw:: html + + MuPDF + +.. |PDF| raw:: html + + PDF + +.. |AGPL| raw:: html + + AGPL + .. raw:: html - + + @media all and (max-width : 375px) { + #button-select-en , #button-select-ja { + font-size: 11px; + } + } - + - +
+ + +
diff --git a/docs/how-to-open-a-file.rst b/docs/how-to-open-a-file.rst new file mode 100644 index 000000000..d40fb2cf9 --- /dev/null +++ b/docs/how-to-open-a-file.rst @@ -0,0 +1,196 @@ +.. include:: header.rst + +.. _HowToOpenAFile: + +============================== +Opening Files +============================== + + + + +.. _Supported_File_Types: + + +Supported File Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +| + +PyMuPDF +""""""""" + +|PyMuPDF| can open files other than just |PDF|. + +The following file types are supported: + +.. include:: supported-files-table.rst + + +---- + + +PyMuPDF Pro +""""""""""""""" + +|PyMuPDF Pro| can open Office files. + +The following file types are supported: + +.. list-table:: + :header-rows: 1 + + * - **DOC/DOCX** + - **XLS/XLSX** + - **PPT/PPTX** + - **HWP/HWPX** + * - .. image:: images/icons/icon-docx.svg + :width: 40 + :height: 40 + - .. image:: images/icons/icon-xlsx.svg + :width: 40 + :height: 40 + - .. image:: images/icons/icon-pptx.svg + :width: 40 + :height: 40 + - .. image:: images/icons/icon-hangul.svg + :width: 40 + :height: 40 + + + +How to Open a File +~~~~~~~~~~~~~~~~~~~~~ + +To open a file, do the following: + +.. code-block:: python + + doc = pymupdf.open("a.pdf") + + +.. note:: The above creates a :ref:`Document`. The instruction `doc = pymupdf.Document("a.pdf")` does exactly the same. So, `open` is just a convenient alias and you can find its full API documented in that chapter. + + +File Recognizer: Opening with :index:`a Wrong File Extension ` +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +If you have a document with a wrong file extension for its type, do not worry: it will still be opened correctly, thanks to the integrated file "content recognizer". + +This component looks at the actual data in the file using a number of heuristics -- independent of the file extension. This of course is also true for file names **without** an extension. + +Here is a list of details about how the file content recognizer works: + +* When opening from a file name, use the ``filetype`` parameter if your file format cannot be determined by content inspection. This is for instance the case for all text files: "txt", "html", "xml" or source files. If the file extension is missing or wrong or the file resides in memory, the ``filetype`` must be used. File formats that can successfully be recognized will be opened even without or wrong extensions, and the ``filetype`` paraneter will be ignored. + +* Files based on text content do not contain unambiguously recognizable internal structures. This is true for source files (Python, C, etc.) but also HTML, XML and so on. Here, the file extensions and the ``filetype`` parameter continue to play a role and are used to create a "Tex" / "HTML" / ... document. Correspondingly, text files with other / no extensions, can successfully be opened using ``filetype``. + +---------- + + +Opening Remote Files +~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +For remote files on a server (i.e. non-local files), you will need to *stream* the file data to |PyMuPDF|. + +For example use the `requests `_ library as follows: + +.. code-block:: python + + import pymupdf + import requests + + r = requests.get('https://mupdf.com/docs/mupdf_explored.pdf') + data = r.content + doc = pymupdf.Document(stream=data) + + +Opening Files from Cloud Services +"""""""""""""""""""""""""""""""""""""" + +For further examples which deal with files held on typical cloud services please see these `Cloud Interactions code snippets `_. + + + +---------- + + +Opening Django Files +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Django implements a `File Storage API `_ to store files. The default is the `FileSystemStorage `_, but the `django-storages `_ library provides a number of other storage backends. + +You can open the file, move the contents into memory, then pass the contents to |PyMuPDF| as a stream. + +.. code-block:: python + + import pymupdf + from django.core.files.storage import default_storage + + from .models import MyModel + + obj = MyModel.objects.get(id=1) + with default_storage.open(obj.file.name) as f: + data = f.read() + + doc = pymupdf.Document(stream=data) + +Please note that if the file you open is large, you may run out of memory. + +The File Storage API works well if you're using different storage backends in different environments. If you're only using the `FileSystemStorage`, you can simply use the `obj.file.name` to open the file directly with |PyMuPDF| as shown in an earlier example. + + +---------- + + + +Opening Files as Text +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +|PyMuPDF| has the capability to open any plain text file as a document. In order to do this you should provide the `filetype` parameter for the `pymupdf.open` function as `"txt"`. + +.. code-block:: python + + doc = pymupdf.open("my_program.py", filetype="txt") + + +In this way you are able to open a variety of file types and perform the typical **non-PDF** specific features like text searching, text extracting and page rendering. Obviously, once you have rendered your `txt` content, then saving as |PDF| or merging with other |PDF| files is no problem. + + +Examples +"""""""""""""""""" + + +Opening a `C#` file +........................... + + +.. code-block:: python + + doc = pymupdf.open("MyClass.cs", filetype="txt") + + +Opening an ``XML`` file +........................... + +.. code-block:: python + + doc = pymupdf.open("my_data.xml", filetype="txt") + + +Opening a `JSON` file +........................... + +.. code-block:: python + + doc = pymupdf.open("more_of_my_data.json", filetype="txt") + + +And so on! + +As you can imagine many text based file formats can be *very simply opened* and *interpreted* by |PyMuPDF|. This can make data analysis and extraction for a wide range of previously unavailable files possible. + + +.. include:: footer.rst diff --git a/docs/identity.rst b/docs/identity.rst index 7e87ade68..ae9de70a9 100644 --- a/docs/identity.rst +++ b/docs/identity.rst @@ -6,15 +6,15 @@ Identity ============ -Identity is a :ref:`Matrix` that performs no action -- to be used whenever the syntax requires a matrix, but no actual transformation should take place. It has the form *fitz.Matrix(1, 0, 0, 1, 0, 0)*. +Identity is a :ref:`Matrix` that performs no action -- to be used whenever the syntax requires a matrix, but no actual transformation should take place. It has the form *pymupdf.Matrix(1, 0, 0, 1, 0, 0)*. Identity is a constant, an "immutable" object. So, all of its matrix properties are read-only and its methods are disabled. If you need a **mutable** identity matrix as a starting point, use one of the following statements:: - >>> m = fitz.Matrix(1, 0, 0, 1, 0, 0) # specify the values - >>> m = fitz.Matrix(1, 1) # use scaling by factor 1 - >>> m = fitz.Matrix(0) # use rotation by zero degrees - >>> m = fitz.Matrix(fitz.Identity) # make a copy of Identity + >>> m = pymupdf.Matrix(1, 0, 0, 1, 0, 0) # specify the values + >>> m = pymupdf.Matrix(1, 1) # use scaling by factor 1 + >>> m = pymupdf.Matrix(0) # use rotation by zero degrees + >>> m = pymupdf.Matrix(pymupdf.Identity) # make a copy of Identity .. include:: footer.rst diff --git a/docs/images/icons/icon-docx.svg b/docs/images/icons/icon-docx.svg new file mode 100644 index 000000000..dfc9105e8 --- /dev/null +++ b/docs/images/icons/icon-docx.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + diff --git a/docs/images/icons/icon-hangul.svg b/docs/images/icons/icon-hangul.svg new file mode 100644 index 000000000..58c8f8725 --- /dev/null +++ b/docs/images/icons/icon-hangul.svg @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + diff --git a/docs/images/icons/icon-pptx.svg b/docs/images/icons/icon-pptx.svg new file mode 100644 index 000000000..b61433426 --- /dev/null +++ b/docs/images/icons/icon-pptx.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + diff --git a/docs/images/icons/icon-txt.svg b/docs/images/icons/icon-txt.svg new file mode 100644 index 000000000..357627612 --- /dev/null +++ b/docs/images/icons/icon-txt.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/images/icons/icon-xlsx.svg b/docs/images/icons/icon-xlsx.svg new file mode 100644 index 000000000..55aa7c261 --- /dev/null +++ b/docs/images/icons/icon-xlsx.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + diff --git a/docs/images/img-coordinate-space.png b/docs/images/img-coordinate-space.png new file mode 100644 index 000000000..ef3bcd5c9 Binary files /dev/null and b/docs/images/img-coordinate-space.png differ diff --git a/docs/images/img-encoding.jpg b/docs/images/img-encoding.jpg deleted file mode 100644 index 02ce10581..000000000 Binary files a/docs/images/img-encoding.jpg and /dev/null differ diff --git a/docs/images/img-findtables.jpg b/docs/images/img-findtables.jpg new file mode 100644 index 000000000..acbaf9845 Binary files /dev/null and b/docs/images/img-findtables.jpg differ diff --git a/docs/images/img-freetext.jpg b/docs/images/img-freetext1.jpg similarity index 100% rename from docs/images/img-freetext.jpg rename to docs/images/img-freetext1.jpg diff --git a/docs/images/img-freetext2.jpg b/docs/images/img-freetext2.jpg new file mode 100644 index 000000000..36fe4f47c Binary files /dev/null and b/docs/images/img-freetext2.jpg differ diff --git a/docs/images/img-htmlbox1.png b/docs/images/img-htmlbox1.png new file mode 100644 index 000000000..6d5f798ee Binary files /dev/null and b/docs/images/img-htmlbox1.png differ diff --git a/docs/images/img-htmlbox2.png b/docs/images/img-htmlbox2.png new file mode 100644 index 000000000..7d46a493f Binary files /dev/null and b/docs/images/img-htmlbox2.png differ diff --git a/docs/images/img-htmlbox3.png b/docs/images/img-htmlbox3.png new file mode 100644 index 000000000..539fe376b Binary files /dev/null and b/docs/images/img-htmlbox3.png differ diff --git a/docs/images/img-htmlbox4.png b/docs/images/img-htmlbox4.png new file mode 100644 index 000000000..633339061 Binary files /dev/null and b/docs/images/img-htmlbox4.png differ diff --git a/docs/images/img-htmlbox5.png b/docs/images/img-htmlbox5.png new file mode 100644 index 000000000..69702f560 Binary files /dev/null and b/docs/images/img-htmlbox5.png differ diff --git a/docs/images/img-matrix-9.png b/docs/images/img-matrix-9.png new file mode 100644 index 000000000..ee6be7e2d Binary files /dev/null and b/docs/images/img-matrix-9.png differ diff --git a/docs/images/img-rotate.png b/docs/images/img-rotate.png index dcfcf5de2..22c19f85e 100644 Binary files a/docs/images/img-rotate.png and b/docs/images/img-rotate.png differ diff --git a/docs/images/img-textbox.jpg b/docs/images/img-textbox.jpg deleted file mode 100644 index 361772423..000000000 Binary files a/docs/images/img-textbox.jpg and /dev/null differ diff --git a/docs/images/pymupdf-logo.png b/docs/images/pymupdf-logo.png index 500be0230..2ea48ca60 100644 Binary files a/docs/images/pymupdf-logo.png and b/docs/images/pymupdf-logo.png differ diff --git a/docs/images/pymupdf-sidebar-logo.png b/docs/images/pymupdf-sidebar-logo.png deleted file mode 100644 index 5a1e37729..000000000 Binary files a/docs/images/pymupdf-sidebar-logo.png and /dev/null differ diff --git a/docs/images/spikes-no.png b/docs/images/spikes-no.png new file mode 100644 index 000000000..ac26eeedb Binary files /dev/null and b/docs/images/spikes-no.png differ diff --git a/docs/images/spikes-yes.png b/docs/images/spikes-yes.png new file mode 100644 index 000000000..ac68dad5e Binary files /dev/null and b/docs/images/spikes-yes.png differ diff --git a/docs/index.rst b/docs/index.rst index 75ca5178f..c61404358 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,21 +2,31 @@ .. This is the TOC in the sidebar! +.. raw:: html -Welcome to :title:`PyMuPDF` + + +Welcome to |PyMuPDF| ================================ -.. - .. image:: images/pymupdf-logo.png - :align: left - :scale: 10% +|PyMuPDF| is a high-performance **Python** library for data extraction, analysis, conversion & manipulation of |PDF| (and other) documents. +|PyMuPDF| is hosted on `GitHub `_ and registered on `PyPI `_. -:title:`PyMuPDF` is an enhanced :title:`Python` binding for `MuPDF `_ -- a lightweight :title:`PDF`, :title:`XPS`, and :title:`E-book` viewer, renderer, and toolkit, which is maintained and developed by :title:`Artifex Software, Inc`. -:title:`PyMuPDF` is hosted on `GitHub `_ and registered on `PyPI `_. +---- -| +This documentation covers all versions up to |version|. ---- @@ -25,6 +35,8 @@ Welcome to :title:`PyMuPDF` :maxdepth: 1 about.rst + pymupdf4llm/index.rst + pymupdf-pro.rst .. toctree:: @@ -34,6 +46,8 @@ Welcome to :title:`PyMuPDF` installation.rst the-basics.rst tutorial.rst + rag.rst + resources.rst @@ -68,16 +82,8 @@ Welcome to :title:`PyMuPDF` changes.rst znames.rst - - -Find out about PyMuPDF Utilities -------------------------------------------------- - -The :title:`GitHub` repository `PyMuPDF-Utilities `_ contains a full range of examples, demonstrations and use cases. - - - - +| +---- diff --git a/docs/installation.rst b/docs/installation.rst index f31bdb606..c0056ae29 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -6,21 +6,33 @@ Installation ============= Requirements ---------------- +--------------------------------------------------------- All the examples below assume that you are running inside a Python virtual environment. See: https://docs.python.org/3/library/venv.html for details. +We also assume that `pip` is up to date. + +For example: + +* Windows:: -For example:: + py -m venv pymupdf-venv + .\pymupdf-venv\Scripts\activate + python -m pip install --upgrade pip + +* Linux, MacOS:: python -m venv pymupdf-venv . pymupdf-venv/bin/activate + python -m pip install --upgrade pip -PyMuPDF should be installed using pip with:: +Installation +--------------------------------------------------------- + +|PyMuPDF| should be installed using pip with:: - python -m pip install --upgrade pip - python -m pip install --upgrade pymupdf + pip install --upgrade pymupdf This will install from a Python wheel if one is available for your platform. @@ -31,127 +43,208 @@ Installation when a suitable wheel is not available If a suitable Python wheel is not available, pip will automatically build from source using a Python sdist. -**This requires C/C++ development tools and SWIG to be installed**: - -* On Unix-style systems such as Linux, OpenBSD and FreeBSD, - use the system package manager to install SWIG. - - * For example on Debian Linux, do: `sudo apt install swig` +**This requires C/C++ development tools to be installed**: * On Windows: - * Install Visual Studio 2019. If not installed in a standard location, set + * + Install Visual Studio 2019. If not installed in a standard location, set environmental variable `PYMUPDF_SETUP_DEVENV` to the location of the `devenv.com` binary. - - * Having other installed versions of Visual Studio, for example Visual - Studio 2022, can cause problems because one can end up with MuPDF and - PyMuPDF code being compiled with different compiler versions. - - * Install SWIG by following the instructions at: - https://swig.org/Doc4.0/Windows.html#Windows_installation -* On MacOS, install MacPorts using the instructions at: - https://www.macports.org/install.php + * + Having other installed versions of Visual Studio, for example Visual Studio + 2022, can cause problems because one can end up with MuPDF and PyMuPDF code + being compiled with different compiler versions. - * Then install SWIG with: `sudo port install swig` - * You may also need: `sudo port install swig-python` +The build will automatically download and build MuPDF. -As of `PyMuPDF-1.20.0`, the required MuPDF source code is already in the -sdist and is automatically built into PyMuPDF. +.. _problems-after-installation: -Notes +Problems after installation --------------------------------------------------------- -Wheels are available for Windows (32-bit Intel, 64-bit Intel), Linux (64-bit Intel, 64-bit ARM) and Mac OSX (64-bit Intel, 64-bit ARM), Python versions 3.7 and up. +* On Windows, Python error:: -Wheels are not available for Python installed with `Chocolatey -`_ on Windows. Instead install Python -using the Windows installer from the python.org website, see: -http://www.python.org/downloads + ImportError: DLL load failed while importing _extra -PyMuPDF does not support Python versions prior to 3.7. Older wheels can be found in `this `_ repository and on `PyPI `_. -Please note that we generally follow the official Python release schedules. For Python versions dropping out of official support this means, that generation of wheels will also be ceased for them. + This has been occasionally seen if `MSVCP140.dll` is missing, and appears + to be caused by a bug in some versions (2015-2017) of `Microsoft Visual C++ + Redistributables`. -There are no **mandatory** external dependencies. However, some optional feature are available only if additional components are installed: + It is recommended to search for `MSVCP140.dll` in https://msdn.com + to find instructions for how to reinstall it. For example + https://learn.microsoft.com/cpp/windows/latest-supported-vc-redist has + permalinks to the latest supported versions. -* `Pillow `_ is required for :meth:`Pixmap.pil_save` and :meth:`Pixmap.pil_tobytes`. -* `fontTools `_ is required for :meth:`Document.subset_fonts`. -* `pymupdf-fonts `_ is a collection of nice fonts to be used for text output methods. -* `Tesseract-OCR `_ for optical character recognition in images and document pages. Tesseract is separate software, not a Python package. To enable OCR functions in PyMuPDF, the software must be installed and the system environment variable `"TESSDATA_PREFIX"` must be defined and contain the `tessdata` folder name of the Tesseract installation location. See below. + See https://github.com/pymupdf/PyMuPDF/issues/2678 for more details. -.. note:: You can install these additional components at any time -- before or after installing PyMuPDF. PyMuPDF will detect their presence during import or when the respective functions are being used. +* + Python error:: + ModuleNotFoundError: No module named 'frontend' + + This can happen if PyMuPDF's legacy name `fitz` is used (for example `import + fitz` instead of `import pymupdf`), and an unrelated Python package called + `fitz` (https://pypi.org/project/fitz/) is installed. -Installation from source without using an sdist ---------------------------------------------------------- + The fitz package appears to be no longer maintained (the latest release is + from 2017), but unfortunately it does not seem possible to remove it from + pypi.org. It does not even work on its own, as well as breaking the use of + PyMuPDF's legacy name. -* First get a PyMuPDF source tree: + There are a few ways to avoid this problem: - * Clone the git repository at https://github.com/pymupdf/PyMuPDF, - for example:: + * + Use `import pymupdf` instead of `import fitz`, and update one's code to + match. - git clone https://github.com/pymupdf/PyMuPDF.git + * Or uninstall the `fitz` package and reinstall PyMuPDF:: + + pip uninstall fitz + pip install --force-reinstall pymupdf - * Or download and extract a `.zip` or `.tar.gz` source release from - https://github.com/pymupdf/PyMuPDF/releases. + * Or use `import pymupdf as fitz`. However this has not been well tested. -* Install C/C++ development tools and SWIG as described above. +* With Jupyter labs on Apple Silicon (arm64), Python error:: -* Build and install PyMuPDF:: + ImportError: /opt/conda/lib/python3.11/site-packages/pymupdf/libmupdf.so.24.4: undefined symbol: fz_pclm_write_options_usage - cd PyMuPDF && python setup.py install + This appears to be a problem in Jupyter labs; see: + https://github.com/pymupdf/PyMuPDF/issues/3643#issuecomment-2210588778. + +* On Windows, Python error:: + + ImportError: dynamic module does not define module export function (PyInit__extra) - This will automatically download a specific hard-coded MuPDF source release, - and build it into PyMuPDF. + This was reported 2025-03-26 in https://github.com/pymupdf/PyMuPDF/issues/4405. -.. note:: When running Python scripts that use PyMuPDF, make sure that the - current directory is not the `PyMuPDF/` directory. + The fix appears to be to install the latest `VC_redist.x64.exe`. - Otherwise, confusingly, Python will attempt to import `fitz` from the local - `fitz/` directory, which will fail because it only contains source files. +Notes +--------------------------------------------------------- -Running tests +* + Wheels are available for the following platforms: + + * Windows 32-bit Intel. + * Windows 64-bit Intel. + * Linux 64-bit Intel. + * Linux 64-bit ARM. + * MacOS 64-bit Intel. + * MacOS 64-bit ARM. + + Details: + + * We release a single wheel for each of the above platforms. + + * + Each wheel uses the Python Stable ABI of the current oldest supported + Python version (currently 3.9), and so works with all later Python + versions, including new Python releases. + + * + Wheels are tested on all Python versions currently marked as "Supported" + on https://devguide.python.org/versions/, currently |python_versions|. + +* + Wheels are not available for Python installed with `Chocolatey + `_ on Windows. Instead install Python + using the Windows installer from the python.org website, see: + http://www.python.org/downloads + +* + Wheels are not available for Linux-aarch64 with `Musl libc + `_ (For example `Alpine Linux + `_ on aarch64), and building from source is known + to fail. + +* There are no **mandatory** external dependencies. However, some optional feature are available only if additional components are installed: + + * `Pillow `_ is required for :meth:`Pixmap.pil_save` and :meth:`Pixmap.pil_tobytes`. + * `fontTools `_ is required for :meth:`Document.subset_fonts`. + * `pymupdf-fonts `_ is a collection of nice fonts to be used for text output methods. + * + `Tesseract-OCR `_ for optical + character recognition in images and document pages. Tesseract is separate + software, not a Python package. To enable OCR functions in PyMuPDF, + Tesseract must be installed and the `tessdata` folder name specified; see + below. + + .. note:: You can install these additional components at any time -- before or after installing PyMuPDF. PyMuPDF will detect their presence during import or when the respective functions are being used. + + +Build and install from a local PyMuPDF source tree --------------------------------------------------------- -Having a PyMuPDF tree available allows one to run PyMuPDF's `pytest` test -suite:: +Initial setup: - pip install pytest fontTools - pytest PyMuPDF/tests +* Install C/C++ development tools as described above. +* Enter a Python venv and update pip, as described above. +* Get a PyMuPDF source tree: -Building and testing with git checkouts of PyMuPDF and MuPDF ------------------------------------------------------------------------------------------------------------------- + * Clone the PyMuPDF git repository:: -Things to do: + git clone https://github.com/pymupdf/PyMuPDF.git -* Install C/C++ development tools and SWIG as described above. -* Get PyMuPDF. -* Get MuPDF. -* Create a Python virtual environment. -* Build PyMuPDF with environmental variable `PYMUPDF_SETUP_MUPDF_BUILD` set - to the path of the local MuPDF checkout. -* Run PyMuPDF tests. + * + Or download and extract a `.zip` or `.tar.gz` source release from + https://github.com/pymupdf/PyMuPDF/releases. -For example:: +Then one can build PyMuPDF in two ways: - git clone -b 1.22 https://github.com/pymupdf/PyMuPDF.git - git clone -b 1.22.x --recursive https://ghostscript.com:/home/git/mupdf.git - python -m venv pymupdf-venv - . pymupdf-venv/bin/activate - cd PyMuPDF - PYMUPDF_SETUP_MUPDF_BUILD=../mupdf python setup.py install - cd .. - pip install pytest fontTools - pytest PyMuPDF +* Build and install PyMuPDF with default MuPDF version:: + + cd PyMuPDF && pip install . + + This will automatically download a specific hard-coded MuPDF source + release, and build it into PyMuPDF. + +* Or build and install PyMuPDF using a local MuPDF source tree: + + * Clone the MuPDF git repository:: + git clone --recursive https://git.ghostscript.com/mupdf.git -Using a non-default MuPDF + * + Build PyMuPDF, specifying the location of the local MuPDF tree with the + environmental variables `PYMUPDF_SETUP_MUPDF_BUILD`:: + + cd PyMuPDF && PYMUPDF_SETUP_MUPDF_BUILD=../mupdf pip install . + +Also, one can build for different Python versions in the same PyMuPDF tree: + +* + PyMuPDF will build for the version of Python that is being used to run + `pip`. To run `pip` with a specific Python version, use `python -m pip` + instead of `pip`. + + So for example on Windows one can build different versions with:: + + cd PyMuPDF && py -3.9 -m pip install . + + or:: + + cd PyMuPDF && py -3.10-32 -m pip install . + + +Running tests --------------------------------------------------------- +Having a PyMuPDF tree available allows one to run PyMuPDF's `pytest` test +suite:: + + pip install pytest fontTools + pytest PyMuPDF/tests + + + +Notes about using a non-default MuPDF +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + Using a non-default build of MuPDF by setting environmental variable `PYMUPDF_SETUP_MUPDF_BUILD` can cause various things to go wrong and so is not generally supported: @@ -171,27 +264,70 @@ not generally supported: command line. +Official PyMuPDF Linux wheels may not install on older Linux systems +-------------------------------------------------------------------- + +Releases of PyMuPDF are incompatible with older Linux systems. + +For example as of 2025-09-03, `pip install pymupdf` does not work on some AWS +Lambda systems - see https://github.com/pymupdf/PyMuPDF/discussions/4631. + +This is because official PyMuPDF Linux wheels are built with a version of +glibc determined by the current Python manylinux environment. These wheels are +incompatible with Linux systems that have an older glibc. + +The official Python manylinux environment is updated periodically to use newer +glibc versions, so new releases of PyMuPDF become increasingly incompatible +with older Linux systems. + +There is nothing that can be done about this, other than updating older Linux +systems, or building PyMuPDF locally from source. + +For more details, please see: `Python Packaging Authority `_. + + +Packaging +--------- + +See :doc:`packaging`. + + +Using with Pyodide +------------------ + +See :doc:`pyodide`. + + +.. _installation_ocr: + Enabling Integrated OCR Support --------------------------------------------------------- If you do not intend to use this feature, skip this step. Otherwise, it is required for both installation paths: **from wheels and from sources.** -PyMuPDF will already contain all the logic to support OCR functions. But it additionally does need Tesseract's language support data, so installation of Tesseract-OCR is still required. +PyMuPDF will already contain all the logic to support OCR functions. But it additionally does need `Tesseract’s language support data `_. -The language support folder location must be communicated either via storing it in the environment variable `"TESSDATA_PREFIX"`, or as a parameter in the applicable functions. +If not specified explicitly, PyMuPDF will attempt to find the installed +Tesseract's tessdata, but this should probably not be relied upon. -So for a working OCR functionality, make sure to complete this checklist: +Otherwise PyMuPDF requires that Tesseract's language support folder is +specified explicitly either in PyMuPDF OCR functions' `tessdata` arguments or +`os.environ["TESSDATA_PREFIX"]`. -1. Install Tesseract. +So for a working OCR functionality, make sure to complete this checklist: -2. Locate Tesseract's language support folder. Typically you will find it here: - - Windows: `C:/Program Files/Tesseract-OCR/tessdata` - - Unix systems: `/usr/share/tesseract-ocr/4.00/tessdata` +1. Locate Tesseract's language support folder. Typically you will find it here: -3. Set the environment variable `TESSDATA_PREFIX` - - Windows: `setx TESSDATA_PREFIX "C:/Program Files/Tesseract-OCR/tessdata"` - - Unix systems: `declare -x TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata` + * Windows: `C:/Program Files/Tesseract-OCR/tessdata` + * Unix systems: `/usr/share/tesseract-ocr/4.00/tessdata` -.. note:: On Windows systems, this must happen outside Python -- before starting your script. Just manipulating `os.environ` will not work! +2. Specify the language support folder when calling PyMuPDF OCR functions: + + * Set the `tessdata` argument. + * Or set `os.environ["TESSDATA_PREFIX"]` from within Python. + * Or set environment variable `TESSDATA_PREFIX` before running Python, for example: + + * Windows: `setx TESSDATA_PREFIX "C:/Program Files/Tesseract-OCR/tessdata"` + * Unix systems: `declare -x TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata` .. include:: footer.rst diff --git a/docs/intro.rst b/docs/intro.rst deleted file mode 100644 index 4d5ae70b3..000000000 --- a/docs/intro.rst +++ /dev/null @@ -1,64 +0,0 @@ -.. include:: header.rst - -Introduction -============== - -.. image:: images/pymupdf-logo.png - :align: center - :scale: 10% - -.. - Don't delete the bar symbol - it forces a line break beneath the image - which is required. - -| - -**PyMuPDF** is a Python binding for `MuPDF `_ -- a lightweight PDF, XPS, and E-book viewer, renderer, and toolkit, which is maintained and developed by Artifex Software, Inc - -MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB, MOBI and FB2 (e-books) formats, and it is known for its top performance and high rendering quality. - -MuPDF stands out among all similar products for its top rendering capability and unsurpassed processing speed. At the same time, its "light weight" makes it an excellent choice for platforms where resources are typically limited, like smartphones. - -Check this out yourself and compare the various free PDF-viewers. In terms of speed and rendering quality `SumatraPDF `_ ranges at the top (apart from MuPDF's own standalone viewer) -- since it has changed its library basis to MuPDF! - -With PyMuPDF you can access files with extensions like “.pdf”, “.xps”, “.oxps”, “.cbz”, “.fb2”, ".mobi" or “.epub”. In addition, about 10 popular image formats can also be opened and handled like documents. - -PyMuPDF provides access to many important functions of MuPDF from within a Python environment, and we are continuously seeking to expand this function set. - -PyMuPDF runs and has been tested on Mac, Linux and Windows for Python versions 3.7 [#f1]_ and up. Other platforms should work too, as long as MuPDF and Python support them. - -PyMuPDF is hosted on `GitHub `_ and registered on `PyPI `_. - -For MS Windows, Mac OSX and Linux Python wheels are available -- please see the installation chapter. - -The GitHub repository `PyMuPDF-Utilities `_ contains a full range of examples, demonstrations and use cases. - -Note on the Name *fitz* --------------------------- -The top level Python import name for this library is **"fitz"**. This has historical reasons: - -The original rendering library for MuPDF was called *Libart*. - -*"After Artifex Software acquired the MuPDF project, the development focus shifted on writing a new modern graphics library called "Fitz". Fitz was originally intended as an R&D project to replace the aging Ghostscript graphics library, but has instead become the rendering engine powering MuPDF."* (Quoted from `Wikipedia `_). - -So PyMuPDF **cannot coexist** with packages named "fitz" in the same Python environment. - -License and Copyright ----------------------- -In order to comply with MuPDF’s dual licensing model, PyMuPDF has entered into an agreement with Artifex who has the right to sublicense PyMuPDF to third parties. - -PyMuPDF and MuPDF are now available under both, open-source AGPL and commercial license agreements. Please read the full text of the AGPL license agreement, available in the distribution material (file COPYING) and `here `_, to ensure that your use case complies with the guidelines of the license. If you determine you cannot meet the requirements of the AGPL, please contact `Artifex `_ for more information regarding a commercial license. - -Artifex is the exclusive commercial licensing agent for MuPDF. - -Artifex, the Artifex logo, MuPDF, and the MuPDF logo are registered trademarks of Artifex Software Inc. © 2022 Artifex Software, Inc. All rights reserved. - -.. include:: version.rst - ------ - -.. rubric:: Footnotes - - -.. [#f1] PyMuPDF generally only supports Python versions that are still maintained by the Python Software Foundation. Once a Python version is being retired, PyMuPDF support will also be ended. This means that wheels for a retired Python platform will no longer be provided, and that Python language features may be used that did not exist in the retired Python version. - -.. include:: footer.rst diff --git a/docs/irect.rst b/docs/irect.rst index c61115589..4d1fc55bd 100644 --- a/docs/irect.rst +++ b/docs/irect.rst @@ -72,16 +72,16 @@ IRect is a rectangular bounding box, very similar to :ref:`Rect`, except that al .. method:: contains(x) - Checks whether *x* is contained in the rectangle. It may be :data:`rect_like`, :data:`point_like` or a number. If *x* is an empty rectangle, this is always true. Conversely, if the rectangle is empty this is always *False*, if *x* is not an empty rectangle and not a number. If *x* is a number, it will be checked to be one of the four components. *x in irect* and *irect.contains(x)* are equivalent. + Checks whether *x* is contained in the rectangle. It may be :data:`rect_like`, :data:`point_like` or a number. If *x* is an empty rectangle, this is always true. Conversely, if the rectangle is empty this is always ``False``, if *x* is not an empty rectangle and not a number. If *x* is a number, it will be checked to be one of the four components. *x in irect* and *irect.contains(x)* are equivalent. :arg x: the object to check. - :type x: :ref:`IRect` or :ref:`Rect` or :ref:`Point` or int + :type x: :ref:`IRect` or :ref:`Rect` or :ref:`Point` or `int`. :rtype: bool .. method:: intersects(r) - Checks whether the rectangle and the :data:`rect_like` "r" contain a common non-empty :ref:`IRect`. This will always be *False* if either is infinite or empty. + Checks whether the rectangle and the :data:`rect_like` "r" contain a common non-empty :ref:`IRect`. This will always be ``False`` if either is infinite or empty. :arg rect_like r: the rectangle to check. @@ -200,13 +200,13 @@ IRect is a rectangular bounding box, very similar to :ref:`Rect`, except that al .. attribute:: is_infinite - *True* if rectangle is infinite, *False* otherwise. + ``True`` if rectangle is infinite, ``False`` otherwise. :type: bool .. attribute:: is_empty - *True* if rectangle is empty, *False* otherwise. + ``True`` if rectangle is empty, ``False`` otherwise. :type: bool diff --git a/docs/link.rst b/docs/link.rst index 6159db88a..e1761d5f9 100644 --- a/docs/link.rst +++ b/docs/link.rst @@ -18,10 +18,10 @@ There is a parent-child relationship between a link and its page. If the page ob :attr:`Link.border` border characteristics :attr:`Link.colors` border line color :attr:`Link.dest` points to destination details -:attr:`Link.is_external` external destination? +:attr:`Link.is_external` checks if the link is an external destination :attr:`Link.flags` link annotation flags :attr:`Link.next` points to next link -:attr:`Link.rect` clickable area in untransformed coordinates. +:attr:`Link.rect` clickable area in untransformed coordinates :attr:`Link.uri` link destination :attr:`Link.xref` :data:`xref` number of the entry ========================= ============================================ @@ -69,13 +69,13 @@ There is a parent-child relationship between a link and its page. If the page ob .. attribute:: colors - Meaningful for PDF only: A dictionary of two tuples of floats in range `0 <= float <= 1` specifying the *stroke* and the interior (*fill*) colors. If not a PDF, *None* is returned. As mentioned above, the fill color is always `None` for links. The stroke color is used for the border of the link rectangle. The length of the tuple implicitly determines the colorspace: 1 = GRAY, 3 = RGB, 4 = CMYK. So `(1.0, 0.0, 0.0)` stands for RGB color red. The value of each float *f* is mapped to the integer value *i* in range 0 to 255 via the computation *f = i / 255*. + Meaningful for PDF only: A dictionary of two tuples of floats in range `0 <= float <= 1` specifying the *stroke* and the interior (*fill*) colors. If not a PDF, ``None`` is returned. As mentioned above, the fill color is always `None` for links. The stroke color is used for the border of the link rectangle. The length of the tuple implicitly determines the colorspace: 1 = GRAY, 3 = RGB, 4 = CMYK. So `(1.0, 0.0, 0.0)` stands for RGB color red. The value of each float *f* is mapped to the integer value *i* in range 0 to 255 via the computation *f = i / 255*. :rtype: dict .. attribute:: border - Meaningful for PDF only: A dictionary containing border characteristics. It will be *None* for non-PDFs and an empty dictionary if no border information exists. The following keys can occur: + Meaningful for PDF only: A dictionary containing border characteristics. It will be ``None`` for non-PDFs and an empty dictionary if no border information exists. The following keys can occur: * *width* -- a float indicating the border thickness in points. The value is -1.0 if no width is specified. @@ -91,7 +91,7 @@ There is a parent-child relationship between a link and its page. If the page ob :type: :ref:`Rect` - .. attribute:: isExternal + .. attribute:: is_external A bool specifying whether the link target is outside of the current document. @@ -99,7 +99,22 @@ There is a parent-child relationship between a link and its page. If the page ob .. attribute:: uri - A string specifying the link target. The meaning of this property should be evaluated in conjunction with property *isExternal*. The value may be *None*, in which case *isExternal == False*. If *uri* starts with *file://*, *mailto:*, or an internet resource name, *isExternal* is *True*. In all other cases *isExternal == False* and *uri* points to an internal location. In case of PDF documents, this should either be *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named location. The format varies for other document types, e.g. *uri = '../FixedDoc.fdoc#PG_2_LNK_1'* for page number 2 (1-based) in an XPS document. + A string specifying the link target. The meaning of this property should + be evaluated in conjunction with property `is_external`: + + * + `is_external` is true: `uri` points to some target outside the current + PDF, which may be an internet resource (`uri` starts with ``http://`` or + similar), another file (`uri` starts with "file:" or "file://") or some + other service like an e-mail address (`uri` starts with ``mailto:``). + + * + `is_external` is false: `uri` will be `None` or point to an + internal location. In case of PDF documents, this should either be + *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named + location. The format varies for other document types, for example + "../FixedDoc.fdoc#PG_2_LNK_1" for page number 2 (1-based) in an XPS + document. :type: str @@ -111,7 +126,7 @@ There is a parent-child relationship between a link and its page. If the page ob .. attribute:: next - The next link or *None*. + The next link or ``None``. :type: *Link* diff --git a/docs/locales/ja/.readthedocs.yaml b/docs/locales/ja/.readthedocs.yaml new file mode 100644 index 000000000..2941cb293 --- /dev/null +++ b/docs/locales/ja/.readthedocs.yaml @@ -0,0 +1,25 @@ +# .readthedocs.yaml +# Note: We use this dedicated yaml inside the locales/ja folder as RTD was having problems building a PDF +# This yaml is the same as the main one - it just removes the PDF build option + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-20.04 + tools: + python: "3.9" + # You can also specify other tool versions: + # nodejs: "16" + # rust: "1.55" + # golang: "1.17" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally declare the Python requirements required to build your docs +python: + install: + - requirements: docs/requirements.txt diff --git a/docs/locales/ja/LC_MESSAGES/404.mo b/docs/locales/ja/LC_MESSAGES/404.mo new file mode 100644 index 000000000..939d7612e Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/404.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/404.po b/docs/locales/ja/LC_MESSAGES/404.po new file mode 100644 index 000000000..d87a5d25b --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/404.po @@ -0,0 +1,55 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header-404.rst:-1 7d1db678351e48a589812b0fc43a3c6e +msgid "Artifex" +msgstr "" + +#: ../../header-404.rst:-1 1240b3bfde8e4c75af72a7add2aa6ece +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "" + +#: ../../header-404.rst:-1 e9db42018d9749a3a1b1fcd7d2ffbad7 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "" + +#: ../../404.rst:4 47500cd0e1e4472092d58a3f1eb0de05 +msgid "404!" +msgstr "" + +#: ../../404.rst:7 00f23d61373848a99a4874c470e1479a +msgid "**This page is not available.**" +msgstr "**このページは利用できません。**" + +#: ../../404.rst:10 108326f9c443411e998790ba1e98be35 +msgid "Please use the menu or search to find what you are looking for." +msgstr "メニューまたは検索を使用して、お探しのものを見つけてください。" + +#: ../../footer.rst:60 eab1290efc024ba1b73c0d32ce382104 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/about-feature-matrix.mo b/docs/locales/ja/LC_MESSAGES/about-feature-matrix.mo new file mode 100644 index 000000000..48abd9ab2 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/about-feature-matrix.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/about-feature-matrix.po b/docs/locales/ja/LC_MESSAGES/about-feature-matrix.po new file mode 100644 index 000000000..d2adae1de --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/about-feature-matrix.po @@ -0,0 +1,21 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2023-08-16 14:20+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + diff --git a/docs/locales/ja/LC_MESSAGES/about-performance.mo b/docs/locales/ja/LC_MESSAGES/about-performance.mo new file mode 100644 index 000000000..48abd9ab2 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/about-performance.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/about-performance.po b/docs/locales/ja/LC_MESSAGES/about-performance.po new file mode 100644 index 000000000..d2adae1de --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/about-performance.po @@ -0,0 +1,21 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2023-08-16 14:20+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + diff --git a/docs/locales/ja/LC_MESSAGES/about.mo b/docs/locales/ja/LC_MESSAGES/about.mo new file mode 100644 index 000000000..c90b81e39 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/about.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/about.po b/docs/locales/ja/LC_MESSAGES/about.po new file mode 100644 index 000000000..26d2ca396 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/about.po @@ -0,0 +1,222 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 5d238527c8e04fd5914f93988259bb02 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 3a68f07f8c764256a00c938c461be585 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 5af89f9406714567bc9b29c2e8724cdf +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../about.rst:10 24b91e30bb5842a783ab4c2dd528a96e +msgid "Features Comparison" +msgstr "機能の比較" + +#: ../../about.rst:16 57b8145e5e9549d3821d11e7559032da +msgid "Feature Matrix" +msgstr "機能比較表" + +#: ../../about.rst:18 e847545ce066499184063dec33a7ea41 +msgid "" +"The following table illustrates how |PyMuPDF| compares with other typical" +" solutions." +msgstr "以下の表は、|PyMuPDF| が他の典型的な解決策と比較した場合の違いを示しています。" + +#: ../../about.rst:47 12835a57de7d46edb91ab07d15e05149 +msgid "" +"A note about **Office** document types (DOCX, XLXS, PPTX) and **Hangul** " +"documents (HWPX). These documents can be loaded into |PyMuPDF| and you " +"will receive a :ref:`Document ` object." +msgstr "" + +#: ../../about.rst:49 f5c04dbbbd644019b42ccb6ed85023bc +msgid "There are some caveats:" +msgstr "" + +#: ../../about.rst:52 d325d0602b4840538d3af13f9ec4ce4d +msgid "we convert the input to **HTML** to layout the content." +msgstr "" + +#: ../../about.rst:53 b4b91b8b878140bca6308694c93edcad +msgid "because of this the original page separation has gone." +msgstr "" + +#: ../../about.rst:55 a54824efe7304d39ad9cfdfdbb3e60d1 +msgid "" +"When saving out the result any faithful representation of the original " +"layout cannot be expected." +msgstr "" + +#: ../../about.rst:57 2d37135544024d269b3719fba49d1f61 +msgid "" +"Therefore input files are mostly in a form that's useful for text " +"extraction." +msgstr "" + +#: ../../about.rst:65 f6793685c5514f36b0ce8ce27739aaa9 +msgid "Performance" +msgstr "パフォーマンス" + +#: ../../about.rst:69 5389aefbda2f4d8f99a537554c49d9e6 +msgid "" +"To benchmark |PyMuPDF| performance against a range of tasks a test suite " +"with a fixed set of :ref:`8 PDFs with a total of 7,031 " +"pages` containing text & images is used to obtain " +"performance timings." +msgstr "" +":ref:`8つのPDFファイル(合計7,031ページ)` " +"にテキストと画像が含まれている固定されたセットのテストスイートを使用して、|PyMuPDF| " +"のパフォーマンスをさまざまなタスクに対してベンチマークします。" + +#: ../../about.rst:72 a54635ae0fec493eaa26d5e43a746700 +msgid "Here are current results, grouped by task:" +msgstr "以下は、タスクごとにグループ化された現在の結果です:" + +#: ../../about.rst:81 3bcf7e6697dd4a299d5fc7bb0ce5828b +msgid "" +"For more detail regarding the methodology for these performance timings " +"see: :ref:`Performance Comparison Methodology`." +msgstr "これらのパフォーマンスのタイミングに関する方法の詳細については、:ref:`パフォーマンス比較方法` を参照してください。" + +#: ../../about.rst:86 4ada17d39d3d4a5581189a0db14b0791 +msgid "License and Copyright" +msgstr "ライセンスと著作権" + +#: ../../about.rst:90 d2b1eb94b7a9461b9b9bd6171afc1f0c +msgid "" +"|PyMuPDF| and |MuPDF| are now available under both, open-source " +"|AGPL| and commercial license agreements. Please read the full " +"text of the |AGPL| license agreement, available in the " +"distribution material (file COPYING) and `on the GNU license page " +"`_, to ensure that your use " +"case complies with the guidelines of the license. If you determine you " +"cannot meet the requirements of the |AGPL|, please contact " +"`Artifex `_ for more information " +"regarding a commercial license." +msgstr "" +"PyMuPDFとMuPDFは現在、オープンソースのAGPLと商用ライセンス契約の両方で提供されています。ライセンスのガイドラインに従うことを確認するため、配布資料(COPYINGファイル)と" +" `ここ `_ " +"にあるAGPLライセンス契約の全文をお読みください。AGPLの要件を満たせないと判断された場合は、商用ライセンスに関する詳細情報については、 " +"`Artifex `_ にお問い合わせください。" + +#: ../../about.rst:108 2dc4abc678434142a4c2d84996a9a606 +msgid "" +":title:`Artifex` is the exclusive commercial licensing agent for " +":title:`MuPDF`." +msgstr ":title:`Artifex` Artifexは、:title:`MuPDF` の独占的な商業ライセンスエージェントです。" + +#: ../../about.rst:110 f7805b9d0ffa4880a589062048e09c27 +msgid "" +":title:`Artifex`, the :title:`Artifex` logo, :title:`MuPDF`, and the " +":title:`MuPDF` logo are registered trademarks of :title:`Artifex Software" +" Inc.`" +msgstr "" +":title:`Artifex` 、:title:`Artifex` のロゴ、:title:`MuPDF` " +"、およびMuPDFのロゴは、:title:`Artifex Software Inc.` の登録商標です。" + +#: ../../version.rst:3 2f2f403fc96a411687d859e4bfbabdc9 +msgid "" +"This documentation covers **PyMuPDF v1.25.5** features as of **2025-03-31" +" 00:00:01**." +msgstr "" + +#: ../../version.rst:5 ed936de961bc470d8e4b0f8c4693effb +msgid "" +"The major and minor versions of |PyMuPDF| and |MuPDF| will always be the " +"same. Only the third qualifier (patch level) may deviate from that of " +"|MuPDF|." +msgstr "" + +#: ../../version.rst:7 54e62d11a1ba4032809ce7da1c4bf38d +msgid "" +"Typically PyMuPDF is released more frequently than MuPDF so it will often" +" be the case that the patch level of PyMuPDF will be greater than the " +"embedded MuPDF." +msgstr "" + +#: ../../version.rst:11 cc2b0e17f25a45278b71bb561fe3ec99 +msgid "For example PyMuPDF-1.24.5 contains MuPDF-1.24.2." +msgstr "" + +#: ../../version.rst:13 b8d1b658b77e49a681dbfe3cb80f1612 +msgid "Also see `pymupdf_version` and `mupdf_version`." +msgstr "" + +#: ../../footer.rst:60 b59252015bda410cabe17ba393eb9bfa +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF " +#~ "v1.23.0rc1** features as of **2023-08-10 " +#~ "00:00:01**." +#~ msgstr "" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.23.4**" +#~ " features as of **2023-09-26 00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.23.5**" +#~ " features as of **2023-10-11 00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.23.8**" +#~ " features as of **2023-12-19 00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.23.26**" +#~ " features as of **2024-02-29 00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.24.2**" +#~ " features as of **2024-04-17 00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.24.10**" +#~ " features as of **2024-09-02 00:00:01**." +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/algebra.mo b/docs/locales/ja/LC_MESSAGES/algebra.mo new file mode 100644 index 000000000..30cb275bc Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/algebra.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/algebra.po b/docs/locales/ja/LC_MESSAGES/algebra.po new file mode 100644 index 000000000..5a4a00987 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/algebra.po @@ -0,0 +1,476 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 a244f13b9c764d21b6a4e35ef573b3fc +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 9bd00aa1c6e2464c98edbdfe23c4f225 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 0bb8ce2fa7b8439cb16f9ec725fae345 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../algebra.rst:6 a46fc8868909429d85fea1440fd28881 +msgid "Operator Algebra for Geometry Objects" +msgstr "幾何学オブジェクトのためのオペレーター代数" + +#: ../../algebra.rst:10 f2868bfbb77d48739b417240e859be75 +msgid "" +"Instances of classes :ref:`Point`, :ref:`IRect`, :ref:`Rect`, :ref:`Quad`" +" and :ref:`Matrix` are collectively also called \"geometry\" objects." +msgstr "" +"クラス :ref:`Point` 、 :ref:`IRect` 、 :ref:`Rect` 、 :ref:`Quad` 、および " +":ref:`Matrix` のインスタンスは、集合的に「幾何学オブジェクト」とも呼ばれます。" + +#: ../../algebra.rst:12 7c7e99aaa08449cc9a3e4a9c1891053e +msgid "" +"They all are special cases of Python sequences, see :ref:`SequenceTypes` " +"for more background." +msgstr "これらはすべてPythonシーケンスの特殊なケースであり、詳細については、PyMuPDFでのPythonシーケンスを引数として使用するを参照してください。" + +#: ../../algebra.rst:14 b6a45b1271a0423d9097d983f99dac54 +msgid "" +"We have defined operators for these classes that allow dealing with them " +"(almost) like ordinary numbers in terms of addition, subtraction, " +"multiplication, division, and some others." +msgstr "これらのクラスには、加算、減算、乗算、除算など、通常の数値とほぼ同じように取り扱うための演算子が定義されています。" + +#: ../../algebra.rst:16 b84d121c948844b78d31027bcf1cc8bb +msgid "This chapter is a synopsis of what is possible." +msgstr "この章では、可能な操作の要約を説明します。" + +#: ../../algebra.rst:19 8da4401664bf45689416d5a624615e21 +msgid "General Remarks" +msgstr "一般的な注意事項" + +#: ../../algebra.rst:20 b27e0413604140b6a4e3d9dd5604397e +msgid "" +"Operators can be either **binary** (i.e. involving two objects) or " +"**unary**." +msgstr "演算子は2つのオブジェクトを含むバイナリ演算子または単項演算子になります。" + +#: ../../algebra.rst:22 ff83ad4ef4924a928f7fe7c91f007de7 + +msgid "" +"The resulting type of **binary** operations is either a **new object of " +"the left operand's class,** a bool or (for dot products) a float." +msgstr "バイナリ演算の結果の型は、左オペランドのクラスの新しいオブジェクトまたはbool型です。" + +#: ../../algebra.rst:24 b6935b28e0494757bdefacd86e1f3a36 +msgid "" +"The result of **unary** operations is either a **new object** of the same" +" class, a bool or a float." +msgstr "単項演算の結果は、同じクラスの新しいオブジェクト、bool型、またはfloat型のいずれかです。" + +#: ../../algebra.rst:26 a1336f1afec74044ada260ccd870436a + +msgid "" +"The binary operators `+, -, *, /` are defined for all classes. They " +"*roughly* do what you would expect -- **except, that the second operand " +"...**" +msgstr "" +"バイナリ演算子 ``+`` 、``-`` 、``*`` 、 ``/`` " +"はすべてのクラスで定義されています。これらはおおよその期待通りの動作をしますが、2番目のオペランドは...(省略されました)。" + +#: ../../algebra.rst:28 e18cb846582247cb975a9ddc6340a73e +msgid "" +"may always be a number which then performs the operation on every " +"component of the first one," +msgstr "第2のオペランドは常に数値であり、それにより最初のオペランドのすべての要素に対して操作を行います。" + +#: ../../algebra.rst:29 e60ed0a7fa554df0a16910687c2c88be +msgid "" +"may always be a numeric sequence of the same length (2, 4 or 6) -- we " +"call such sequences :data:`point_like`, :data:`rect_like`, " +":data:`quad_like` or :data:`matrix_like`, respectively." +msgstr "" +"第2のオペランドは常に同じ長さの数値のシーケンスであることができます(2つ、4つ、または6つ) - それぞれ :data:`point_like`" +" 、 :data:`rect_like` 、 :data:`quad_like` 、または :data:`matrix_like` と呼びます。" + +#: ../../algebra.rst:31 d93daa3887f04faead0f46a53b4c2a9a + +msgid "" +"Rectangles support **additional binary** operations: **intersection** " +"(operator `\"&\"`), **union** (operator `\"|\"`) and **containment** " +"checking." +msgstr "矩形は追加のバイナリ演算をサポートしています:交差(演算子“&”)、結合(演算子“|”)、および包含チェック。" + +#: ../../algebra.rst:33 03d276c145154e428f81b4e854a12723 +msgid "" +"Binary operators fully support in-place operations. So if \"°\" is a " +"binary operator then the expression `a °= b` is always valid and the same" +" as `a = a ° b`. Therefore, be careful and do **not** do `p1 *= p2` for " +"two points, because thereafter \"p1\" is a **float**." +msgstr "バイナリ演算子は「インプレース演算」を完全にサポートしています。つまり、"°"がバイナリ演算子である場合、式 ``a °= b`` は常に有効であり、 ``a = a ° b`` と同じです。したがって、2つの点に対して ``p1 *= p2`` を行わないように注意してください。なぜなら、その後「p1」は浮動小数点数になってしまうからです。" + +#: ../../algebra.rst:37 b197551ac91841fba96a5406efd0e076 +msgid "Unary Operations" +msgstr "単項演算" + +#: ../../algebra.rst:40 ../../algebra.rst:59 299776843ce14340b3ffec69dbf18e83 +#: 9ef10cc27f674df49def7dbf4b132848 +msgid "Oper." +msgstr "演算" + +#: ../../algebra.rst:40 ../../algebra.rst:59 0723d28bc56b478c85107cedb40b5793 +#: 7bff453d3e3548e4910efaf0d7e280d4 +msgid "Result" +msgstr "結果" + +#: ../../algebra.rst:42 d7ac030d071642528ff3bf57fcc11bd3 +msgid "bool(OBJ)" +msgstr "" + +#: ../../algebra.rst:42 0d27ce4c315a43758b0c7f988ac2baca +msgid "is false exactly if all components of OBJ are zero" +msgstr "OBJのすべての成分がゼロの場合にのみfalse" + +#: ../../algebra.rst:43 3a98a962f2cb4317ad6986bbdec24e65 +msgid "abs(OBJ)" +msgstr "" + +#: ../../algebra.rst:43 6c1997e211ee4d939dd044c813508abb +msgid "the rectangle area -- equal to norm(OBJ) for the other types" +msgstr "他のタイプに対してnorm(OBJ)と等しい長方形の面積" + +#: ../../algebra.rst:44 94bcec1a0e3d4e96be8a96074d02378d +msgid "norm(OBJ)" +msgstr "" + +#: ../../algebra.rst:44 e0b98f2206cf4032b715abe02f8be8cd +msgid "square root of the component squares (Euclidean norm)" +msgstr "成分の二乗の平方根(ユークリッドノルム)" + +#: ../../algebra.rst:45 00744445072349b8b253bc092cc7bb87 +msgid "+OBJ" +msgstr "" + +#: ../../algebra.rst:45 0b56037ce3e04d528042a1a6098bb2f9 +msgid "new copy of OBJ" +msgstr "OBJの新しいコピー" + +#: ../../algebra.rst:46 0c1dfe4fe3344199b8b76d70f4f7d7f3 +msgid "-OBJ" +msgstr "" + +#: ../../algebra.rst:46 ef82c60af2dc4d238eba6b421e3047d9 +msgid "new copy of OBJ with negated components" +msgstr "成分が反転されたOBJの新しいコピー" + +#: ../../algebra.rst:47 ee6061816bb3437fa7e09c6d431dc788 +msgid "~m" +msgstr "" + +#: ../../algebra.rst:47 1e8a0c10344348238fc0f64c83edd642 +msgid "inverse of matrix \"m\", or the null matrix if not invertible" +msgstr "行列 \"m\" の逆行列、または逆行列が存在しない場合は零行列" + +#: ../../algebra.rst:52 398158a768f84b89a520021ea6d0a25f +msgid "Binary Operations" +msgstr "二項演算" + +#: ../../algebra.rst:53 01209ef26eeb4f6284e42a5165ad12aa +msgid "" +"These are expressions like `a ° b` where \"°\" is any of the operators " +"`+, -, *, /`. Also binary operations are expressions of the form `a == b`" +" and `b in a`." +msgstr "これらは ``a ° b`` のような式であり、ここで「°」は ``+``, ``-``, ``*``, ``/`` などの演算子のいずれかです。また、バイナリ演算には ``a == b`` や ``b in a`` のような形式の式も含まれます。" + +#: ../../algebra.rst:55 347337c08b6e49ba8133b876c65de703 +msgid "" +"If \"b\" is a number, then the respective operation is executed for each " +"component of \"a\". Otherwise, if \"b\" is **not a number,** then the " +"following happens:" +msgstr "もし「b」が数値である場合、それぞれの演算は「a」の各コンポーネントに対して実行されます。一方、「b」が数値ではない場合、以下のことが起こります:" + +#: ../../algebra.rst:61 882647f50a3548ada51850492e59336c +msgid "a+b, a-b" +msgstr "" + +#: ../../algebra.rst:61 529c2b3a1c8f440f8afd4fba9ce0396e +msgid "component-wise execution, \"b\" must be \"a-like\"." +msgstr "成分ごとの実行。\"b\" は \"a\" と同様の要素数である必要があります。" + +#: ../../algebra.rst:62 2606cc3b045c4835abe58d011464d50a +msgid "a*m, a/m" +msgstr "" + +#: ../../algebra.rst:62 7d3849829e3b4508827663e012314f30 + +msgid "" +"\"a\" can be a point, rectangle or matrix and \"m\" is a " +":data:`matrix_like`. *\"a/m\"* is treated as *\"a*~m\"* (see note below " +"for non-invertible matrices). If \"a\" is a **point** or a **rectangle**," +" then *\"a.transform(m)\"* is executed. If \"a\" is a matrix, then matrix" +" concatenation takes place." +msgstr "" +"\"a\" はポイント、矩形、または行列になりますが、\"m\" は :data:`matrix_like` でなければなりません。\"a/m\"" +" は \"a*〜m\" として処理されます(非逆行列の場合は以下の注記を参照)。\"a\" " +"がポイントまたは矩形の場合、\"a.transform(m)\" が実行されます。\"a\" が行列の場合、行列の連結が行われます。" + +#: ../../algebra.rst:67 b98d2bd8c76145a093239f6b2583ff78 +msgid "a*b" +msgstr "" + +#: ../../algebra.rst:67 55f9b0922451482484ef7fe2110fefb7 +msgid "returns the **vector dot product** for a point \"a\" and point-like \"b\"." +msgstr "点「a」と点のような「b」に対して、ベクトルの内積を返します。" + +#: ../../algebra.rst:68 2c02e64b04dd47a89a57341e5557c6f4 +msgid "a&b" +msgstr "" + +#: ../../algebra.rst:68 b1ebf38a599942378b28ecc93be28dbc +msgid "" +"**intersection rectangle:** \"a\" must be a rectangle and \"b\" " +":data:`rect_like`. Delivers the **largest rectangle** contained in both " +"operands." +msgstr "" +"共通の長方形: \"a\" は長方形であり、\"b\" は :data:`rect_like` " +"形式である必要があります。両方のオペランドに含まれる最大の長方形を返します。" + +#: ../../algebra.rst:71 6b94ffc704eb47269f5a3961a01cc914 +msgid "a|b" +msgstr "" + +#: ../../algebra.rst:71 f10408473ce74344941deb9adedaea81 +msgid "" +"**union rectangle:** \"a\" must be a rectangle, and \"b\" may be " +":data:`point_like` or :data:`rect_like`. Delivers the **smallest " +"rectangle** containing both operands." +msgstr "" +"合併した長方形: \"a\" は長方形であり、\"b\" は 点のような :data:`point_like` :data:`rect_like`" +" 形式である必要があります。両方のオペランドを含む最小の長方形を返します。" + +#: ../../algebra.rst:74 68ba14dfcb0944fb8f12d32ccd74acfc +msgid "b in a" +msgstr "" + +#: ../../algebra.rst:74 20c6156b787d407bba8ac0eb66d0003f +msgid "" +"if \"b\" is a number, then `b in tuple(a)` is returned. If \"b\" is " +":data:`point_like`, :data:`rect_like` or :data:`quad_like`, then \"a\" " +"must be a rectangle, and `a.contains(b)` is returned." +msgstr "" +"もし \"b\" が数値である場合、 `b in tuple(a)` が返されます。もし \"b\" が :data:`point_like` " +"形式、 :data:`rect_like` 形式、または :data:`quad_like` 形式である場合、\"a\" " +"は長方形である必要があり、`a.contains(b)` が返されます。" + +#: ../../algebra.rst:77 14c642acfd6a4cc09dd70b7edfc9479d +msgid "a == b" +msgstr "" + +#: ../../algebra.rst:77 b5f52a2ccb364df6aafabfc00b804bfd +msgid "``True`` if *bool(a-b)* is ``False`` (\"b\" may be \"a-like\")." +msgstr "bool(a-b) が `False` であれば `True` を返します(\"b\"は\"a-like\"である可能性があります)。" + +#: ../../algebra.rst:81 91cf0864d56a40f89a084004444ece8e +msgid "Please note an important difference to usual arithmetic:" +msgstr "以下は通常の算術との重要な違いに注意してください:" + +#: ../../algebra.rst:83 c2257dafe2e54f93bdf6e878a27abb1e +msgid "" +"Matrix multiplication is **not commutative**, i.e. in general we have " +"`m*n != n*m` for two matrices. Also, there are non-zero matrices which " +"have no inverse, for example `m = Matrix(1, 0, 1, 0, 1, 0)`. If you try " +"to divide by any of these, you will receive a `ZeroDivisionError` " +"exception using operator *\"/\"*, e.g. for the expression " +"`pymupdf.Identity / m`. But if you formulate `pymupdf.Identity * ~m`, the" +" result will be `pymupdf.Matrix()` (the null matrix)." +msgstr "" +"行列の乗算は可換ではありません。つまり、一般に2つの行列に対して `m*n != n*m` " +"が成り立ちます。また、逆行列を持たないゼロでない行列も存在します。例えば、`m = Matrix(1, 0, 1, 0, 1, 0)` " +"のような行列があります。これらの行列で除算しようとすると、演算子 *\"/\"* を使用して `pymupdf.Identity / m` " +"のような式で `ZeroDivisionError` 例外が発生します。しかし、`pymupdf.Identity * ~m` " +"のように記述すると、結果は `pymupdf.Matrix()` (零行列)となります。" + +#: ../../algebra.rst:85 0e187d91aa3543128b18a63bdf168871 +msgid "" +"Admittedly, this represents an inconsistency, and we are considering to " +"remove it. For the time being, you can choose to avoid an exception and " +"check whether ~m is the null matrix, or accept a potential " +"*ZeroDivisionError* by using `pymupdf.Identity / m`." +msgstr "" +"認めるところがあるかもしれませんが、これは矛盾を示しており、私たちはこれを取り除くことを検討しています。当面の間は、例外を回避し、`~m` " +"が零行列であるかどうかをチェックするか、`pymupdf.Identity / m` を使用して `ZeroDivisionError` " +"の可能性を受け入れることができます。" + +#: ../../algebra.rst:89 de603159545946e8bdc68fe78cd85024 +msgid "" +"With these conventions, all the usual algebra rules apply. For example, " +"arbitrarily using brackets **(among objects of the same class!)** is " +"possible: if r1, r2 are rectangles and m1, m2 are matrices, you can do " +"this `(r1 + r2) * m1 * m2`." +msgstr "" +"これらの規則に従うと、すべての通常の代数のルールが適用されます。例えば、任意の括弧を使うことができます(同じクラスのオブジェクトの間で!):もし " +"`r1`, `r2` が長方形であり、`m1`, `m2` が行列であれば、次のようにできます: `(r1 + r2) * m1 * m2` 。" + +#: ../../algebra.rst:90 2e9035f1be214ad5993be5461f301e62 +msgid "" +"For all objects of the same class, `a + b + c == (a + b) + c == a + (b + " +"c)` is true." +msgstr "同じクラスのオブジェクトに対して、 `a + b + c == (a + b) + c == a + (b + c)` が成り立ちます。" + +#: ../../algebra.rst:91 695f00e56afc43e8bbde8536925b9ffc +msgid "" +"For matrices in addition the following is true: `(m1 + m2) * m3 == m1 * " +"m3 + m2 * m3` (distributivity property)." +msgstr "行列の加法については、次のような性質が成り立ちます: `(m1 + m2) * m3 == m1 * m3 + m2 * m3` (分配律)。" + +#: ../../algebra.rst:92 85174e316c684b489921150e95d1aed3 +msgid "" +"**But the sequence of applying matrices is important:** If r is a " +"rectangle and m1, m2 are matrices, then -- **caution!:**" +msgstr "ただし、行列の適用順序が重要です:もし `r` が長方形であり、 `m1` , `m2` が行列である場合、注意してください!次のような場合:" + +#: ../../algebra.rst:93 8db6b272b39a4051b95fd6474751c244 +msgid "`r * m1 * m2 == (r * m1) * m2 != r * (m1 * m2)`" +msgstr "" + +#: ../../algebra.rst:96 5a5c13a7f2b542b59628658c7a3217f1 +msgid "Some Examples" +msgstr "いくつかの例" + +#: ../../algebra.rst:99 b685ff344b354dd6b03324c45389e661 +msgid "Manipulation with numbers" +msgstr "数値の操作" + +#: ../../algebra.rst:100 92463489f90f4556b3d5a7b306e1e7ac +msgid "" +"For the usual arithmetic operations, numbers are always allowed as second" +" operand. In addition, you can formulate `\"x in OBJ\"`, where x is a " +"number. It is implemented as `\"x in tuple(OBJ)\"`::" +msgstr "" +"通常の算術演算では、数値は常に第二オペランドとして使用できます。さらに、`\"x in OBJ\"` " +"のように式を記述することもできます。ここで、xは数値です。これは `\"x in tuple(OBJ)\"` として実装されています::" + +#: ../../algebra.rst:108 d3192c1a63bb460ba17fe04f2de5b3ff +msgid "" +"The following will create the upper left quarter of a document page " +"rectangle::" +msgstr "以下は、ドキュメントページの四角形の左上の四分の一を作成します::" + +#: ../../algebra.rst:116 9f9d44b5091a4a65844a451e8d7c92c4 +msgid "" +"The following will deliver the **middle point of a line** that connects " +"two points **p1** and **p2**::" +msgstr "以下は、点 `p1` と点 `p2` を結ぶ直線の中点を求めます::" + +#: ../../algebra.rst:125 2908e3f62351463a817217bed9ee7101 +msgid "" +"Compute the **vector dot product** of two points. You can compute the " +"**cosine of angles** and check orthogonality." +msgstr "2つの点のベクトル内積を計算します。これにより、角度のコサインを計算したり、直交性を確認したりすることができます。" + +#: ../../algebra.rst:149 05936628739f4f5392d49359e0f8860d +msgid "Manipulation with \"like\" Objects" +msgstr "「Like」オブジェクトを用いた操作" + +#: ../../algebra.rst:151 64ff208b10184910a7052e2d2fde5560 +msgid "" +"The second operand of a binary operation can always be \"like\" the left " +"operand. \"Like\" in this context means \"a sequence of numbers of the " +"same length\". With the above examples::" +msgstr "2項演算の第2オペランドは常に左オペランドと「like」することができます。「Like」とは、この文脈では「同じ長さの数列」という意味です。上記の例を用いて説明します。" + +#: ../../algebra.rst:162 7626451d4f89418ea6f21c0194766c1e +msgid "To shift a rectangle for 5 pixels to the right, do this::" +msgstr "長方形を右に5ピクセルシフトさせるには、次のようにします::" + +#: ../../algebra.rst:168 ffbacdbea5aa4828ab455e4e9da44bf2 +msgid "" +"Points, rectangles and matrices can be *transformed* with matrices. In " +"PyMuPDF, we treat this like a **\"multiplication\"** (or resp. " +"**\"division\"**), where the second operand may be \"like\" a matrix. " +"Division in this context means \"multiplication with the inverted " +"matrix\"::" +msgstr "点、長方形、および行列は行列で変換できます。PyMuPDFでは、これを「乗算」(または「除算」とも)として扱います。ここで、第2オペランドは行列と「like」することができます。「除算」とは、この文脈では「逆行列との乗算」を意味します。" + +#: ../../algebra.rst:208 f6063ded1f794fdfa330d05aab12b59a +msgid "As a specialty, rectangles support additional binary operations:" +msgstr "特に、長方形は追加の2項演算をサポートしています:" + +#: ../../algebra.rst:210 8ddb30b3a1e6414cb4ab801649852302 +msgid "**intersection** -- the common area of rectangle-likes, operator *\"&\"*" +msgstr "**交差** -- 長方形や類似の長方形の共通領域を示す演算子 *\"&\"*" + +#: ../../algebra.rst:211 44b3d1c0634a46f1ae3506fe5d9c9046 +msgid "" +"**inclusion** -- enlarge to include a point-like or rect-like, operator " +"*\"|\"*" +msgstr "**含有** -- 点状または長方形状を含むように拡大する演算子 *\"|\"*" + +#: ../../algebra.rst:212 462847c9ff494bef9387ce0aff093bfd +msgid "**containment** check -- whether a point-like or rect-like is inside" +msgstr "**含有確認** 点状または長方形状が内部にあるかどうかを確認します" + +#: ../../algebra.rst:214 8733fac916f74438a2cecf7b54a9c218 +msgid "" +"Here is an example for creating the smallest rectangle enclosing given " +"points::" +msgstr "以下は、与えられた点を囲む最小の長方形を作成する例です::" + +#: ../../footer.rst:60 1533c6c3a8344ce0b45e9762c0145328 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ "Binary operators fully support in-place" +#~ " operations, so expressions like `a " +#~ "/= b` are valid if b is " +#~ "numeric or \"a_like\"." +#~ msgstr "" +#~ "バイナリ演算子は、すべての場所で完全にサポートされているため、``b`` が数値または「a_like」の場合、 " +#~ "``a /= b`` のような式が有効です" + +#~ msgid "" +#~ "For every geometry object \"a\" and " +#~ "every number \"b\", the operations \"a" +#~ " ° b\" and \"a °= b\" are " +#~ "always defined for the operators *+, " +#~ "-, *, /*. The respective operation " +#~ "is simply executed for each component" +#~ " of \"a\". If the **second operand" +#~ " is not a number**, then the " +#~ "following is defined:" +#~ msgstr "" +#~ "あらゆるジオメトリオブジェクト\"a\"とあらゆる数値 \"b\" に対して、演算子 ``+``," +#~ " ``-`` , ``*`` , ``/`` については常に " +#~ "\"a ° b\" と \"a °= b\" " +#~ "の操作が定義されています。それぞれの操作は、\"a\" " +#~ "の各成分に対して単純に実行されます。もし第二オペランドが数値でない場合、次のように定義されます:" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/annot.mo b/docs/locales/ja/LC_MESSAGES/annot.mo new file mode 100644 index 000000000..ea0995b90 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/annot.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/annot.po b/docs/locales/ja/LC_MESSAGES/annot.po new file mode 100644 index 000000000..f01dc55f2 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/annot.po @@ -0,0 +1,1660 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 ff7a088e26e441d29ec4e3aca594d728 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 18977871afd44db590c7a3b2bb53f215 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 1b2c3384886c41a0827a54c74b8505d9 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../annot.rst:7 5000811e2d7e4bf0b31e898f6141c30a +msgid "Annot" +msgstr "Annot (注釈)" + +#: ../../annot.rst:9 6572aa4344904647b2861639064ec562 +msgid "|pdf_only_class|" +msgstr "PDFのみ。" + +#: ../../annot.rst:11 58f32778ecd843849cffbac30da6580a +msgid "" +"Quote from the :ref:`AdobeManual`:" +msgstr "" +":ref:`AdobeManual` からの引用: " + +msgid "" +"*\"An annotation associates an object " +"such as a note, sound, or movie with a location on a page of a PDF " +"document, or provides a way to interact with the user by means of the " +"mouse and keyboard.\"*" +msgstr "" +"「注釈は、ノート、音声、動画などのオブジェクトをPDFドキュメントのページ上の位置に関連付けるか、マウスとキーボードを介してユーザーと対話する手段を提供します。」" + + + +#: ../../annot.rst:13 a1ef865437ed4e5b8622fb4c309b374d +msgid "" +"There is a parent-child relationship between an annotation and its page. " +"If the page object becomes unusable (closed document, any document " +"structure change, etc.), then so does every of its existing annotation " +"objects -- an exception is raised saying that the object is \"orphaned\"," +" whenever an annotation property or method is accessed." +msgstr "注釈とそのページとの間には親子関係があります。ページオブジェクトが使用できなくなる場合(閉じたドキュメント、文書構造の変更など)、そのページに存在するすべての注釈オブジェクトも同様に使用できなくなります。注釈のプロパティやメソッドにアクセスされるたびに、オブジェクトが「孤立した」という例外が発生します。" + +#: ../../annot.rst:16 c126f4b877914becad42b5b9654e9e4e +msgid "**Attribute**" +msgstr "**アトリビュート** " + +#: ../../annot.rst:16 3fa27aa576664040b2fb04bfaf95f590 +msgid "**Short Description**" +msgstr "**短い説明** " + +#: ../../annot.rst:18 ea087de99b44415f897643e65decb123 +msgid ":meth:`Annot.delete_responses`" +msgstr "" + +#: ../../annot.rst:18 7f486bb052864bc2a47e8dfe9f73e9e8 +msgid "delete all responding annotations" +msgstr "すべての応答アノテーションを削除します" + +#: ../../annot.rst:19 7b62bdd30e3e486b88c51a3324a55249 +msgid ":meth:`Annot.get_file`" +msgstr "" + +#: ../../annot.rst:19 ad45a655b150485fb69b9ddefe467eee +msgid "get attached file content" +msgstr "添付ファイルの内容を取得します" + +#: ../../annot.rst:20 c938adec44fd42ebbdfd6b8ab384b6dc +msgid ":meth:`Annot.get_oc`" +msgstr "" + +#: ../../annot.rst:20 ede08aa7b8a9490fb82654ba462331fe +msgid "get :data:`xref` of an :data:`OCG` / :data:`OCMD`" +msgstr ":data:`xref` を :data:`OCG` または :data:`OCMD` に取得します。" + +#: ../../annot.rst:21 d2693fb2e1ba408bb519d0c17278ac67 +msgid ":meth:`Annot.get_pixmap`" +msgstr "" + +#: ../../annot.rst:21 80fd13f75a364a16a5170ad843148df0 +msgid "image of the annotation as a pixmap" +msgstr "アノテーションの画像をピクマップとして取得します" + +#: ../../annot.rst:22 cff57bdb24884692afe25a6a3850650d +msgid ":meth:`Annot.get_sound`" +msgstr "" + +#: ../../annot.rst:22 6994a4c09b9b405597ab42a1bec163f6 +msgid "get the sound of an audio annotation" +msgstr "オーディオアノテーションの音声を取得します" + +#: ../../annot.rst:23 58ec6233ee8c46cd9b32b2d4537d1c4e +msgid ":meth:`Annot.get_text`" +msgstr "" + +#: ../../annot.rst:23 ../../annot.rst:24 7991062612ee456d9d02ac909cb903ab +#: b049c9c1cfe14a53841638c277c3dc9d +msgid "extract annotation text" +msgstr "アノテーションテキストを抽出します" + +#: ../../annot.rst:24 3293e6c0727e4c7fbb897607e9abda13 +msgid ":meth:`Annot.get_textbox`" +msgstr "" + +#: ../../annot.rst:25 c7107ff6b91244629a578499c2693f97 +msgid ":meth:`Annot.get_textpage`" +msgstr "" + +#: ../../annot.rst:25 f960592668bb4d61a14db7770f245ff7 +msgid "create a TextPage for the annotation" +msgstr "アノテーションのためのポップアップを作成します" + +#: ../../annot.rst:26 08f9508de1fc426b8b0dad3648e4a0ab +msgid ":meth:`Annot.set_border`" +msgstr "" + +#: ../../annot.rst:26 7f667fc22c864184a53baf1b41f2f422 +msgid "set annotation's border properties" +msgstr "アノテーションの境界線のプロパティを設定します" + +#: ../../annot.rst:27 d24ba1cc1ce34a149bcbf60b2434df4e +msgid ":meth:`Annot.set_blendmode`" +msgstr "" + +#: ../../annot.rst:27 922e48bd5c9c4d01bbcbdfa4a956b427 +msgid "set annotation's blend mode" +msgstr "アノテーションのブレンドモードを設定します" + +#: ../../annot.rst:28 50bf516a0b764d70a67a2b30e60aeacb +msgid ":meth:`Annot.set_colors`" +msgstr "" + +#: ../../annot.rst:28 34603b17e7e24106b41fdcf554ba1360 +msgid "set annotation's colors" +msgstr "アノテーションの色を設定します" + +#: ../../annot.rst:29 ../../annot.rst:342 1c824645834845a0ae5154e55b3cdbf4 +#: cb309c0b399f41ffa927d9853ab58985 +msgid ":meth:`Annot.set_flags`" +msgstr "" + +#: ../../annot.rst:29 163cd8ded8aa4fe89173df3c1b0d8d6b +msgid "set annotation's flags field" +msgstr "アノテーションのフラグフィールドを設定します" + +#: ../../annot.rst:30 ac5223c7a43f49589c2d85e8b1380675 +msgid ":meth:`Annot.set_irt_xref`" +msgstr "" + +#: ../../annot.rst:30 6acda0a7b8404cf7ae6381140fd376ab +msgid "define the annotation to being \"In Response To\"" +msgstr "アノテーションを「応答対象」として定義します" + +#: ../../annot.rst:31 71e625c5f2854e86acf0da6bdbecafc6 +msgid ":meth:`Annot.set_name`" +msgstr "" + +#: ../../annot.rst:31 93e021065f1249bca197aba645a1ae23 +msgid "set annotation's name field" +msgstr "アノテーションの名前フィールドを設定します" + +#: ../../annot.rst:32 ../../annot.rst:343 d35d1936521f4902a7308fb2833b06f8 +#: dd1fc298be724240b0bbc96ae65e503d +msgid ":meth:`Annot.set_oc`" +msgstr "" + +#: ../../annot.rst:32 375a7e1792f34e01a55dd16d3f73d808 +msgid "set :data:`xref` to an :data:`OCG` / :data:`OCMD`" +msgstr ":data:`xref` を :data:`OCG` または :data:`OCMD` に設定します。" + +#: ../../annot.rst:33 c8b80eb861434f07bd261aa9435ac037 +msgid ":meth:`Annot.set_opacity`" +msgstr "" + +#: ../../annot.rst:33 0798272be98b4e6284d7e419c7f6d4d6 +msgid "change transparency" +msgstr "透明度を変更します" + +#: ../../annot.rst:34 11786c94965b4da98e8467611d3aacc5 +msgid ":meth:`Annot.set_open`" +msgstr "" + +#: ../../annot.rst:34 84272bd5f9ba495f958777194709def4 +msgid "open / close annotation or its Popup" +msgstr "アノテーションまたはそのポップアップを開く/閉じる" + +#: ../../annot.rst:35 cf0b1c55f79c449299f2c02061edc6ed +msgid ":meth:`Annot.set_popup`" +msgstr "" + +#: ../../annot.rst:35 85d219e184364d30a1dc9a4764ea4c7d +msgid "create a Popup for the annotation" +msgstr "アノテーションのためのポップアップを作成します" + +#: ../../annot.rst:36 ../../annot.rst:341 5d07938cf9484386bbf4154a889a6a30 +#: 8015d3625a67455f88892d3ed0e7cda7 +msgid ":meth:`Annot.set_rect`" +msgstr "" + +#: ../../annot.rst:36 871ad36e30794a4cb1ef6a47d85fd43b +msgid "change annotation rectangle" +msgstr "アノテーションの長方形を変更します" + +#: ../../annot.rst:37 a1ad49c8d1884520aac479a12df9f827 +msgid ":meth:`Annot.set_rotation`" +msgstr "" + +#: ../../annot.rst:37 cacca8eff36344cc8374cc89c957483e +msgid "change rotation" +msgstr "回転を変更します" + +#: ../../annot.rst:38 ../../annot.rst:344 6485afe67e3442258d9d3698bfc4b536 +#: 7557e579699f4dd79e2202a136f61f23 +msgid ":meth:`Annot.update_file`" +msgstr "" + +#: ../../annot.rst:38 817586391bcb49fb9b9ee6f69cde7868 +msgid "update attached file content" +msgstr "添付ファイルの内容を更新します" + +#: ../../annot.rst:39 2ab6962c0ead4a75b45dd6cf2abaa51d +msgid ":meth:`Annot.update`" +msgstr "" + +#: ../../annot.rst:39 51fc956198004f8d9a16bc9ac938de48 +msgid "apply accumulated annot changes" +msgstr "蓄積されたアノテーションの変更を適用します" + +#: ../../annot.rst:40 0ae5ddfde6a840b3bb5570b800f1d653 +msgid ":attr:`Annot.blendmode`" +msgstr "" + +#: ../../annot.rst:40 c3b350ed72634609bf89ec671eb98821 +msgid "annotation BlendMode" +msgstr "アノテーションのブレンドモード" + +#: ../../annot.rst:41 7c0ad77081df47acb4fb5fc50e2adc57 +msgid ":attr:`Annot.border`" +msgstr "" + +#: ../../annot.rst:41 524c0edf6aaa466ebcdf8d43118eebd6 +msgid "border details" +msgstr "境界線の詳細" + +#: ../../annot.rst:42 391ec574189d4e5d99dcc5de5d15b441 +msgid ":attr:`Annot.colors`" +msgstr "" + +#: ../../annot.rst:42 491d801bcbdf44f0a1cc60e0e0b43785 +msgid "border / background and fill colors" +msgstr "境界線/背景および塗りつぶしの色" + +#: ../../annot.rst:43 70881f5fbe3f444ca69c132b8525a8a4 +msgid ":attr:`Annot.file_info`" +msgstr "" + +#: ../../annot.rst:43 ed4a1667427941a0a42c016cdb472ecc +msgid "get attached file information" +msgstr "添付ファイル情報を取得します" + +#: ../../annot.rst:44 fc587e8b72b24093ba42f0200439612f +msgid ":attr:`Annot.flags`" +msgstr "" + +#: ../../annot.rst:44 412be177d1f04bb7aa7efbae8a26b2b9 +msgid "annotation flags" +msgstr "アノテーションフラグ" + +#: ../../annot.rst:45 e1730d3ddaf94a5ba8a40ae46d1ebd27 +msgid ":attr:`Annot.has_popup`" +msgstr "" + +#: ../../annot.rst:45 d60eec1bd2e24dda97956603fa503999 +msgid "whether annotation has a Popup" +msgstr "アノテーションにポップアップがあるかどうか" + +#: ../../annot.rst:46 c2bf5bf4df6844c48801c20befcd9327 +msgid ":attr:`Annot.irt_xref`" +msgstr "" + +#: ../../annot.rst:46 7448ababb2e746a394c74eb1d1d51424 +msgid "annotation to which this one responds" +msgstr "このアノテーションへの応答としてのアノテーション" + +#: ../../annot.rst:47 722b5d2dc3fa466d87a301aa2009ce91 +msgid ":attr:`Annot.info`" +msgstr "" + +#: ../../annot.rst:47 0f31b4b67f6c4f3580312e80d84264b1 +msgid "various information" +msgstr "さまざまな情報" + +#: ../../annot.rst:48 b80dffed8e3748148d2ccc8227450602 +msgid ":attr:`Annot.is_open`" +msgstr "" + +#: ../../annot.rst:48 ddfc282227b548df89328272f9ef3b46 +msgid "whether annotation or its Popup is open" +msgstr "アノテーションまたはそのポップアップが開いているかどうか" + +#: ../../annot.rst:49 2023405da922439f980b6e1c5fa916af +msgid ":attr:`Annot.line_ends`" +msgstr "" + +#: ../../annot.rst:49 96322f741c9c4c3cad018601e21b70f4 +msgid "start / end appearance of line-type annotations" +msgstr "線タイプのアノテーションの始点/終点の外観" + +#: ../../annot.rst:50 6440c9f13ce742aeb84a3ff8c0a58ba6 +msgid ":attr:`Annot.next`" +msgstr "" + +#: ../../annot.rst:50 c697cfff2b7d432aa56de4e2f3d84bf5 +msgid "link to the next annotation" +msgstr "次のアノテーションへのリンク" + +#: ../../annot.rst:51 69859d25e053434eb9f58f06a5e84215 +msgid ":attr:`Annot.opacity`" +msgstr "" + +#: ../../annot.rst:51 2346e2f7904f4f4f89b78e3d4cc5191a +msgid "the annot's transparency" +msgstr "アノテーションの透明度" + +#: ../../annot.rst:52 dc09b3803b4b40708d90ee360746806e +msgid ":attr:`Annot.parent`" +msgstr "" + +#: ../../annot.rst:52 d4300a6a5817452a904ca1424d707494 +msgid "page object of the annotation" +msgstr "アノテーションのページオブジェクト" + +#: ../../annot.rst:53 4a1c85bcc643456eb893652c94e1ffb1 +msgid ":attr:`Annot.popup_rect`" +msgstr "" + +#: ../../annot.rst:53 310a3a9c999e45aa8530e9e0406ae7a1 +msgid "rectangle of the annotation's Popup" +msgstr "アノテーションのポップアップの長方形" + +#: ../../annot.rst:54 29e2ed85a69c4705aa62eb078926195e +msgid ":attr:`Annot.popup_xref`" +msgstr "" + +#: ../../annot.rst:54 a58383e6b647405dae30968ba753daff +msgid "the PDF :data:`xref` number of the annotation's Popup" +msgstr "アノテーションのポップアップのPDF :data:`xref` 番号" + +#: ../../annot.rst:55 54f3ad9d61224189875abeaa829739fc +msgid ":attr:`Annot.rect`" +msgstr "" + +#: ../../annot.rst:55 ed12e42c92214db1a837f18c33ec864e +msgid "rectangle containing the annotation" +msgstr "アノテーションを含む長方形" + +#: ../../annot.rst:56 b322c2136f6641d6b3700eb40d06354c +msgid ":attr:`Annot.type`" +msgstr "" + +#: ../../annot.rst:56 d05081e51bda43e1aed2a07fecee14ae +msgid "type of the annotation" +msgstr "アノテーションのタイプ" + +#: ../../annot.rst:57 7760d9f11dc1455bbce39fef1d726122 +msgid ":attr:`Annot.vertices`" +msgstr "" + +#: ../../annot.rst:57 b65ac6b16aef48679a1d885ce92fd12c +msgid "point coordinates of Polygons, PolyLines, etc." +msgstr "ポリゴン、ポリラインなどの点の座標" + +#: ../../annot.rst:58 324031e7bc8344b7891eea42262c24eb +msgid ":attr:`Annot.xref`" +msgstr "" + +#: ../../annot.rst:58 45eb2c6f94d241b9a99b179f77e1c730 +msgid "the PDF :data:`xref` number" +msgstr "PDF :data:`xref` 番号" + +#: ../../annot.rst:61 47b2c93409ed420491e9d1165df4fceb +msgid "**Class API**" +msgstr " **APIクラス** " + +#: ../../annot.rst:73 86260e9a3eee40d8be9f41ac0a98d586 +msgid "Changed in v1.19.2: added support of dpi parameter." +msgstr "v1.19.2で変更: dpiパラメータのサポートが追加されました。" + +#: ../../annot.rst:75 880909430980416ea32acec47fd74620 +msgid "" +"Creates a pixmap from the annotation as it appears on the page in " +"untransformed coordinates. The pixmap's :ref:`IRect` equals " +"*Annot.rect.irect* (see below). **All parameters are keyword only.**" +msgstr "" +"変換されていない座標でページ上に表示される注釈からピクスマップを作成します。ピクスマップの :ref:`IRect` は " +"*Annot.rect.irect* と同じです(以下を参照)。 **すべてのパラメータはキーワード専用です。** " + +#: ../../annot.rst 089ffb55a5c54d31b2a099695ee8e8b5 +#: 0e4c6fa44dde49ac847bd4cc76f84095 2d7cda1dee384169b88e5f69f469a86b +#: 31ec6dab43204aea9cc8515ad1bcab55 345b4a70f79d44b0ad9a4fe11ae6749d +#: 4df572c9469347c7b2d092efd5937020 57f3253f02da4f71af4c62f36a91b1f0 +#: 6311f05f830242038f6be5d788bd8b3a 83f0ce983b3149f19f1937976f26c38c +#: 8a15651d997b433694fb86a9a61cf34e 9812334b860241feb0f05f5542163c29 +#: 9a80f3d4399746328022f9a04fb35393 a3f5c2dfd7d8447da39f78b071ecec3d +#: ac371b0005864d5f867373b568c23cff bedb0b087bb84f0a8c0728a11f6879c9 +#: d115b2f26341443f9931ddf8b3474e58 d2a145a6bb0a425aaf5dba567f435051 +#: d99a889618da40e8a0fc713671dcf33e e38967643d4246a199e2b38e2a034778 +#: ecb79981326049eab38b7dd94daff1dd +msgid "Parameters" +msgstr "パラメータ" + +#: ../../annot.rst:77 e417565017cd45dbb90662a4eb077f40 +msgid "a matrix to be used for image creation. Default is :ref:`Identity`." +msgstr "画像の作成に使用される行列。デフォルトは :ref:`Identity` です。" + +#: ../../annot.rst:79 b405de362c454c6e98060e7acc458ee5 +msgid "" +"(new in v1.19.2) desired resolution in dots per inch. If not `None`, the " +"matrix parameter is ignored." +msgstr "(v1.19.2で新規追加)インチあたりのドット数で指定された解像度。 `None` でない場合、matrixパラメータは無視されます。" + +#: ../../annot.rst:81 b77081c57af64088978ad2f41b248bd4 +msgid "a colorspace to be used for image creation. Default is ``pymupdf.csRGB``." +msgstr "画像の作成に使用されるカラースペース。デフォルトは ``pymupdf.csRGB`` です。" + +#: ../../annot.rst:84 2dcd533cb5c24787bf3ef2ea667576c1 +msgid "whether to include transparency information. Default is ``False``." +msgstr "透明情報を含めるかどうか。デフォルトは ``False`` です。" + +#: ../../annot.rst 03bdb651fd5d43e0b50daa68b3f29301 +#: 07b682c9b1494fd680a932f123bea564 0d44e48248b94319939438ec421594ba +#: 10ea7432521545c1b51851cb404aaa0a 1ddb2fc5b5524df78da3cbde901f4e28 +#: 23944f91176a44299ff38be1a9b2fcf0 2be3544d8d6f4593bc5ad0d1d8327da7 +#: 361c2326f3c041f6bd559b8d3528012d 3891e2a8472640c3be4bdedf20c6cc9f +#: 49ff50d3391a465a93c0558311214b07 72e96380ba7442b789c04b2cef259e6b +#: 776268181a164c45856b7f26c6d91330 797c9a492b094ed58f03d179a0c36352 +#: 7e96eff6b04d4d18ad9b50324bff1890 8e3aba14e0704da497c753ab9da937ce +#: 9cdc041b2f43450fb713e2a0b8c9caba ae746abd2c394decb52eef9ad542d93f +#: b4cfe4c19ed249038ce618565b456e1c b5b693248d7b4439a7ce3b2316508ee1 +#: c9b331263c504d9798802d4c76edf0f3 d112cfe7ae6f4091aba2610588a0ce1d +#: dcac44df5c924b6d9b805a49b1754ba4 eb59a0c6a52e4d15ad132925deb11e2e +#: f55053f9e847488593d336020468cebb +msgid "Return type" +msgstr "戻り値の型" + +#: ../../annot.rst:86 8bc98996e43b49e98902ab152c1681ae +msgid ":ref:`Pixmap`" +msgstr "" + +#: ../../annot.rst:90 0b7db7e99f7746abad597233349afdaf +msgid "" +"If the annotation has just been created or modified, you should " +":meth:`Document.reload_page` the page first via `page = " +"doc.reload_page(page)`." +msgstr "" +"注釈が作成または変更されたばかりの場合、まず ``page = doc.reload_page(page)`` " +"を使用してページを再読み込みする必要があります。" + +#: ../../annot.rst:92 672b6b5650554da988247bd501528456 +msgid "" +"The pixmap will have *\"premultiplied\"* pixels if `alpha=True`. To learn" +" about some background, e.g. look for \"Premultiplied alpha\" `in this online glossary " +"`_." +msgstr "" + +#: ../../annot.rst:110 ../../annot.rst:131 2c14274b38ca40928460b67999b1c1ec +#: 5079bb8a71b44da3b5ef92fff930f701 +msgid "New in 1.18.0" +msgstr "1.18.0 で新たに追加" + +#: ../../annot.rst:112 328e54dec81c4a1dbe69080e8e98eb9c +msgid "" +"Retrieves the content of the annotation in a variety of formats -- much " +"like the same method for :ref:`Page`.. This currently only delivers " +"relevant data for annotation types 'FreeText' and 'Stamp'. Other types " +"return an empty string (or equivalent objects)." +msgstr "" +"さまざまなフォーマットで注釈の内容を取得します - これは :ref:`Page` " +"の同じメソッドのように動作します。現在、これはアノテーションタイプ「FreeText」と「Stamp」に関連するデータのみを提供します。他の種類は空の文字列(または同等のオブジェクト)を返します。" + +#: ../../annot.rst:114 58feb90344fd49c8b4824a369da850b4 +msgid "" +"(positional only) the desired format - one of the following values. " +"Please note that this method works exactly like the same-named method of " +":ref:`Page`. * \"text\" -- :meth:`TextPage.extractTEXT`, default * " +"\"blocks\" -- :meth:`TextPage.extractBLOCKS` * \"words\" -- " +":meth:`TextPage.extractWORDS` * \"html\" -- :meth:`TextPage.extractHTML` " +"* \"xhtml\" -- :meth:`TextPage.extractXHTML` * \"xml\" -- " +":meth:`TextPage.extractXML` * \"dict\" -- :meth:`TextPage.extractDICT` * " +"\"json\" -- :meth:`TextPage.extractJSON` * \"rawdict\" -- " +":meth:`TextPage.extractRAWDICT`" +msgstr "" + +#: ../../annot.rst:114 c9e62c7b73724f1c8e6235f8f6cbda04 +msgid "" +"(positional only) the desired format - one of the following values. " +"Please note that this method works exactly like the same-named method of " +":ref:`Page`." +msgstr "" +"(位置指定のみ) 望ましいフォーマット - 以下のいずれかの値の1つです。このメソッドは、:ref:`Page` " +"の同名のメソッドとまったく同じように動作することに注意してください。" + +#: ../../annot.rst:116 41033f33feec4d6cbb8e872e49834e35 +msgid "\"text\" -- :meth:`TextPage.extractTEXT`, default" +msgstr "\"text\" -- :meth:`TextPage.extractTEXT`、デフォルト" + +#: ../../annot.rst:117 0f63370b1d564689a30b933603e4c3e6 +msgid "\"blocks\" -- :meth:`TextPage.extractBLOCKS`" +msgstr "" + +#: ../../annot.rst:118 913527d283fe4c76926fe0d4e4eaaf65 +msgid "\"words\" -- :meth:`TextPage.extractWORDS`" +msgstr "" + +#: ../../annot.rst:119 dfeba93112b743e9b6b22423e828ae9c +msgid "\"html\" -- :meth:`TextPage.extractHTML`" +msgstr "" + +#: ../../annot.rst:120 80912195e04d40f9baf509203c257b1f +msgid "\"xhtml\" -- :meth:`TextPage.extractXHTML`" +msgstr "" + +#: ../../annot.rst:121 cd4deb7f93034a8bbfd0bd74e2f12450 +msgid "\"xml\" -- :meth:`TextPage.extractXML`" +msgstr "" + +#: ../../annot.rst:122 b286687300f54486ae11f97ee845c847 +msgid "\"dict\" -- :meth:`TextPage.extractDICT`" +msgstr "" + +#: ../../annot.rst:123 9dc28b03f1ea41e8b66cf1bb75ede2af +msgid "\"json\" -- :meth:`TextPage.extractJSON`" +msgstr "" + +#: ../../annot.rst:124 38208eb8ae134ee1a76f75088217b67d +msgid "\"rawdict\" -- :meth:`TextPage.extractRAWDICT`" +msgstr "" + +#: ../../annot.rst:126 09f1c69a2a314fe4b0c7a41518666c60 +msgid "" +"(keyword only) restrict the extraction to this area. Should hardly ever " +"be required, defaults to :attr:`Annot.rect`." +msgstr "(キーワードのみ) このエリアに抽出を制限します。ほとんど必要ない場合がほとんどで、デフォルトは :attr:`Annot.rect` です。" + +#: ../../annot.rst:127 10c8554551bc4032abd2670e94cdb2b8 +msgid "" +"(keyword only) control the amount of data returned. Defaults to simple " +"text extraction." +msgstr "(キーワードのみ) 返されるデータの量を制御します。単純なテキスト抽出がデフォルトです。" + +#: ../../annot.rst:133 da44ee14361e4946b9f738c42752dbf1 +msgid "" +"Return the annotation text. Mostly (except line breaks) equal to " +":meth:`Annot.get_text` with the \"text\" option." +msgstr "注釈のテキストを返します。主に(改行を除く)「text」オプションを使用した :meth:`Annot.get_text` と同等です" + +#: ../../annot.rst:135 775ed8af3b514f7cb93fca3075fb7b66 +msgid "the area to consider, defaults to :attr:`Annot.rect`." +msgstr "rect (rect-like) – 考慮する領域、デフォルトは :attr:`Annot.rect` です。" + +#: ../../annot.rst:140 a37b251418e1438eb6557034157f78d6 +#, fuzzy +msgid "Create a :ref:`TextPage` for the annotation." +msgstr "アノテーションのためのポップアップを作成します" + +#: ../../annot.rst:142 853a6807eb2e4838b3a76e058d318a16 +msgid "" +"indicator bits controlling the content available for subsequent text " +"extractions and searches -- see the parameter of :meth:`Annot.get_text`." +msgstr "後続のテキスト抽出や検索に利用可能なコンテンツを制御する指標ビット -- :meth:`Annot.get_text` のパラメータを参照してください。" + +#: ../../annot.rst:144 a0916e69962b40559ccefc18b36fe7ab +msgid "restrict extracted text to this area." +msgstr "抽出されるテキストをこのエリアに制限します。" + +#: ../../annot.rst 10c25cc74cf14a4487da85267fd798e1 +#: 23ab237bc68240aab0c5942ffd105dbc 491acd8d88934a05b86aa6b039da0c4b +#: 61ef90c5224d40d2a863f618127ffcbd 67a45cfcd15047fea621454ad26174e8 +#: 944c95687ff74ea6ad35abc0e299761a c03ec076d8fa47a889fcd76c059f864c +msgid "Returns" +msgstr "戻り値" + +#: ../../annot.rst:146 8ffa753f1b664b66bd4b57101af0dcac +msgid ":ref:`TextPage`" +msgstr "" + +#: ../../annot.rst:148 a9f9910300164ed1b6a9d9d35e97ccc7 +msgid "|history_begin|" +msgstr "" + +#: ../../annot.rst:150 faba4699392d4b29a960fcfd7e0f9e79 +msgid "v1.25.5: fixed `clip` arg." +msgstr "" + +#: ../../annot.rst:152 55dcf062ed404c9289bb09e898b26e1a +msgid "|history_end|" +msgstr "" + +#: ../../annot.rst:156 2a88cb05dd094eca83c54413f0cf5ce8 +msgid "Changed in version 1.16.10" +msgstr "バージョン 1.16.10 で変更" + +#: ../../annot.rst:158 3d7812355216454aaffbae4bba32aae5 +msgid "" +"Changes annotation properties. These include dates, contents, subject and" +" author (title). Changes for *name* and *id* will be ignored. The update " +"happens selectively: To leave a property unchanged, set it to ``None``. " +"To delete existing data, use an empty string." +msgstr "" +"注釈のプロパティを変更します。これには日付、内容、題名、および著者(タイトル)が含まれます。*名前* と *ID* " +"の変更は無視されます。更新は選択的に行われます:プロパティを変更しない場合は、それを ``None`` " +"に設定します。既存のデータを削除するには、空の文字列を使用します。" + +#: ../../annot.rst:160 99d8112ddc2a49fdacf5c1ba5e3c4e53 +msgid "" +"a dictionary compatible with the *info* property (see below). All entries" +" must be strings. If this argument is not a dictionary, the other " +"arguments are used instead -- else they are ignored." +msgstr "" +"*info* " +"プロパティと互換性のある辞書(以下参照)。すべてのエントリは文字列である必要があります。この引数が辞書でない場合、他の引数が代わりに使用されます。それ以外の場合、無視されます。" + +#: ../../annot.rst:161 ../../annot.rst:162 ../../annot.rst:165 +#: 298f8bd243c74fd8815ee77d8616eb88 4f949b0c27064158a50619fef16f97c1 +#: d48ad8dfb0234e2f8a3fb95edc52b5ec +msgid "*(new in v1.16.10)* see description in :attr:`info`." +msgstr "*(v1.16.10 で新規追加)* :attr:`info` の説明を参照してください。" + +#: ../../annot.rst:163 b37754ca35e947d187046764f489d377 +msgid "" +"*(new in v1.16.10)* date of annot creation. If given, should be in PDF " +"datetime format." +msgstr "*(v1.16.10 で新規追加)* 注釈の作成日。指定する場合、PDF 日時形式である必要があります。" + +#: ../../annot.rst:164 4f5cc39695f54dd1ba9374889dd6ab3a +msgid "" +"*(new in v1.16.10)* date of last modification. If given, should be in PDF" +" datetime format." +msgstr "*(v1.16.10 で新規追加)* 最終変更日。指定する場合、PDF 日時形式である必要があります。" + +#: ../../annot.rst:169 1e74b028b8224bbf9d84b29aa7a1f114 +msgid "" +"Sets an annotation's line ending styles. Each of these annotation types " +"is defined by a list of points which are connected by lines. The symbol " +"identified by *start* is attached to the first point, and *end* to the " +"last point of this list. For unsupported annotation types, a no-operation" +" with a warning message results." +msgstr "" +"注釈の線終端スタイルを設定します。これらの注釈タイプの各々は、線で接続された点のリストによって定義されています。*start* " +"で指定されたシンボルは、最初の点に、*end* " +"はこのリストの最後の点に取り付けられます。サポートされていない注釈タイプの場合、警告メッセージとともに無操作となります。" + +#: ../../annot.rst:173 f51503de04174f5892b72d2795bc07b6 +msgid "" +"While 'FreeText', 'Line', 'PolyLine', and 'Polygon' annotations can have " +"these properties, (Py-) MuPDF does not support line ends for 'FreeText', " +"because the call-out variant of it is not supported." +msgstr "" +"「FreeText」、「Line」、「PolyLine」、および「Polygon」注釈はこれらのプロパティを持つことができますが、(Py-)MuPDF" +" は「FreeText」の線終端をサポートしていません。なぜなら、そのコールアウトバリアントはサポートされていないからです。" + +#: ../../annot.rst:174 a700a60706ca4e569abfa259b0b9fb63 +msgid "" +"*(Changed in v1.16.16)* Some symbols have an interior area (diamonds, " +"circles, squares, etc.). By default, these areas are filled with the fill" +" color of the annotation. If this is ``None``, then white is chosen. The " +"*fill_color* argument of :meth:`Annot.update` can now be used to override" +" this and give line end symbols their own fill color." +msgstr "" +"*(v1.16.16 で変更)* " +"一部のシンボルには内部領域(ダイヤモンド、円、正方形など)があります。デフォルトでは、これらの領域は注釈の塗りつぶし色で塗りつぶされます。これが " +"``None`` の場合、白色が選択されます。:meth:`Annot.update` の *fill_color* " +"引数を使用して、線終端シンボルに独自の塗りつぶし色を設定することができるようになりました。" + +#: ../../annot.rst:176 db8246f575e5488d96aa74a3a2514029 +msgid "The symbol number for the first point." +msgstr "最初の点のシンボル番号。" + +#: ../../annot.rst:177 3cc0f00120ce499a934058c5ccaf4de9 +msgid "The symbol number for the last point." +msgstr "最後の点のシンボル番号。" + +#: ../../annot.rst:181 9b8aa01ea6a148488442f0b9f54fd48b +msgid "" +"Set the annotation's visibility using PDF optional content mechanisms. " +"This visibility is controlled by the user interface of supporting PDF " +"viewers. It is independent from other attributes like " +":attr:`Annot.flags`." +msgstr "" +"PDFのオプションコンテンツメカニズムを使用して、注釈の表示/非表示を設定します。この表示は、サポートするPDFビューアのユーザーインターフェースによって制御されます。:attr:`Annot.flags`" +" のような他の属性とは独立しています。" + +#: ../../annot.rst:183 b8604fe16b22458ea3a6c3ebed57b912 +msgid "" +"the :data:`xref` of an optional contents group (OCG or OCMD). Any " +"previous xref will be overwritten. If zero, a previous entry will be " +"removed. An exception occurs if the xref is not zero and does not point " +"to a valid PDF object." +msgstr "" +"オプションコンテンツグループ(OCGまたはOCMD)のxref。以前の :data:`xref` " +"は上書きされます。ゼロの場合、以前のエントリが削除されます。xrefがゼロでない場合かつ有効なPDFオブジェクトを指していない場合、例外が発生します。" + +#: ../../annot.rst:185 552f6fa51ec3485693b27f15c1ad9e28 +msgid "This does **not require executing** :meth:`Annot.update` to take effect." +msgstr "これは :meth:`Annot.update` の **実行を必要としません。** " + +#: ../../annot.rst:189 883041291cbb4087af3abbc8d2db4d3e +msgid "" +"Return the :data:`xref` of an optional content object, or zero if there " +"is none." +msgstr "オプションコンテンツオブジェクトの :data:`xref` 、または存在しない場合はゼロを返します。" + +#: ../../annot.rst:191 da9806aa82134c699ec1298c08405974 +msgid "zero or the xref of an OCG (or OCMD)." +msgstr "ゼロまたはOCG(またはOCMD)のxref。" + +#: ../../annot.rst:196 0232d0f9f89c4b9caf445de434761a86 +msgid "New in v1.19.3" +msgstr "v1.19.3 で新規追加" + +#: ../../annot.rst:198 bd8f5da402b344e7a57080ae00f8f8ea +msgid "Set annotation to be \"In Response To\" another one." +msgstr "注釈を別の注釈への「応答として」設定します。" + +#: ../../annot.rst:200 29a518aa3ac14b4fb96189039d52a919 +msgid "" +"The :data:`xref` of another annotation. .. note:: Must refer to an " +"existing annotation on this page. Setting this property requires no " +"subsequent `update()`." +msgstr "" + +#: ../../annot.rst:200 044062a21f8a4ffba1e8c12a4dbfc7da +msgid "The :data:`xref` of another annotation." +msgstr "別の注釈の :data:`xref` 。" + +#: ../../annot.rst:202 056ca100a6c744158b0f80b623e231b8 +msgid "" +"Must refer to an existing annotation on this page. Setting this property " +"requires no subsequent `update()`." +msgstr "このプロパティを設定するには、このページの既存の注釈を参照する必要があります。このプロパティを設定する際、後続の `update()` は不要です。" + +#: ../../annot.rst:207 ../../annot.rst:216 ../../annot.rst:236 +#: 4f1bdf826df24156ae246c89b4778e32 712a3a2f72694e64a884ee7a5034ee28 +#: a6e6686854de4108a873d9d33f171c1c +msgid "New in v1.18.4" +msgstr "v1.18.4 で新たに追加" + +#: ../../annot.rst:209 1f213b949eb642e28f99585ec1a407d4 +msgid "" +"Set the annotation's Popup annotation to open or closed -- **or** the " +"annotation itself, if its type is 'Text' (\"sticky note\")." +msgstr "注釈のポップアップ注釈を開いた状態または閉じた状態に設定します – *または* その注釈自体、そのタイプが 'Text'(「付箋」)の場合。" + +#: ../../annot.rst:211 92f2770d0bff4361a606a0310fa7bbef +msgid "the desired open state." +msgstr "望ましい開いた状態。" + +#: ../../annot.rst:218 5b8c653d8a8843e8babb5fceb37261ea +msgid "" +"Create a Popup annotation for the annotation and specify its rectangle. " +"If the Popup already exists, only its rectangle is updated." +msgstr "注釈のためのポップアップ注釈を作成し、その矩形を指定します。ポップアップが既に存在する場合、その矩形のみが更新されます。" + +#: ../../annot.rst:220 7909b167cec241d99b7c2d3413fe1fbe +msgid "the desired rectangle." +msgstr "望ましい矩形。" + +#: ../../annot.rst:226 0026e882e4004b07b13c1e01e1fc77e4 +msgid "" +"Set the annotation's transparency. Opacity can also be set in " +":meth:`Annot.update`." +msgstr "注釈の透明度を設定します。透明度は :meth:`Annot.update` でも設定できます。" + +#: ../../annot.rst:228 5405316104d84f4abcecbc698904a721 +msgid "" +"a float in range *[0, 1]*. Any value outside is assumed to be 1. E.g. a " +"value of 0.5 sets the transparency to 50%." +msgstr "*[0、1]* の範囲内の浮動小数点数。範囲外の値は 1 と見なされます。例:0.5 の値は透明度を 50% に設定します。" + +#: ../../annot.rst:230 9a3c8b905c7949bbb3e5acb4a19981b6 +msgid "Three overlapping 'Circle' annotations with each opacity set to 0.5:" +msgstr "透明度がそれぞれ 0.5 に設定された3つの重なる「Circle」注釈:" + +#: ../../annot.rst:238 95cea4131fbd4be4ac5581cfa874b1e4 +msgid "" +"The annotation's blend mode. See :ref:`AdobeManual`, page 324 for " +"explanations." +msgstr "注釈のブレンドモード。詳細については :ref:`AdobeManual` 、ページ324を参照してください。" + +#: ../../annot.rst:241 2ff0fa0dc5fe4ec8a25c8bc7b2d4fcef +msgid "the blend mode or ``None``." +msgstr "ブレンドモードまたは ``None`` 。" + +#: ../../annot.rst:246 e1a9e6a9cfe74f7ab8cc8cffb93c59bc +msgid "New in v1.16.14" +msgstr "v1.16.14 で新たに追加" + +#: ../../annot.rst:248 e7ba898f2d6a4a198a520db5457bbad2 +msgid "" +"Set the annotation's blend mode. See :ref:`AdobeManual`, page 324 for " +"explanations. The blend mode can also be set in :meth:`Annot.update`." +msgstr "" +"注釈のブレンドモードを設定します。詳細については :ref:`AdobeManual` 、ページ324を参照してください。ブレンドモードは " +":meth:`Annot.update` でも設定できます。" + +#: ../../annot.rst:250 ddb5fbe792ab4eae98a254729382b924 +msgid "" +"set the blend mode. Use :meth:`Annot.update` to reflect this in the " +"visual appearance. For predefined values see :ref:`BlendModes`. Use " +"`PDF_BM_Normal` to **remove** a blend mode." +msgstr "" +"ブレンドモードを設定します。視覚的な外観に反映するには :meth:`Annot.update` " +"を使用します。事前定義の値についてはPDF標準のブレンドモードを参照してください。ブレンドモードを **削除する** には " +"`PDF_BM_Normal` を使用します。" + +#: ../../annot.rst:255 0cd5afc2c9a14bbfa787d0023b2604f0 +msgid "New in version 1.16.0" +msgstr "バージョン1.16.0で新たに追加" + +#: ../../annot.rst:257 64b51d5037e246d29be36ea82f374e63 +msgid "" +"Change the name field of any annotation type. For 'FileAttachment' and " +"'Text' annotations, this is the icon name, for 'Stamp' annotations the " +"text in the stamp. The visual result (if any) depends on your PDF viewer." +" See also :ref:`mupdficons`." +msgstr "" +"すべての注釈タイプの名前フィールドを変更します。「FileAttachment」と「Text」注釈の場合、これはアイコンの名前であり、「Stamp」注釈の場合はスタンプのテキストです。視覚的な結果(ある場合)は、PDFビューアに依存します。:ref:`mupdficons`" +" も参照してください。" + +#: ../../annot.rst:259 ea1dcd069e554ea7882c4893488c637c +msgid "the new name." +msgstr "新しい名前。" + +#: ../../annot.rst:261 01e8667a8ae34ab1a9f4fe7311f976b3 +msgid "" +"If you set the name of a 'Stamp' annotation, then this will **not " +"change** the rectangle, nor will the text be layouted in any way. If you " +"choose a standard text from :ref:`StampIcons` (the **exact** name piece " +"after `\"STAMP_\"`), you should receive the original layout. An " +"**arbitrary text** will not be changed to upper case, but be written in " +"font \"Times-Bold\" as is, horizontally centered in **one line** and be " +"shortened to fit. To get your text fully displayed, its length using " +":data:`fontsize` 20 must not exceed 190 points. So please make sure that " +"the following inequality is true: `pymupdf.get_text_length(text, " +"fontname=\"tibo\", fontsize=20) <= 190`." +msgstr "" +"「Stamp」注釈の名前を設定した場合、これは矩形を **変更せず** 、テキストもレイアウトされません。:ref:`StampIcons` " +"から標準のテキストを選択すると( `\"STAMP_\"` の後の **正確な** 名前部分)、元のレイアウトが表示されるはずです。**任意** " +"のテキストは大文字に変更されず、「Times-Bold」フォントで水平中央揃えで **1行** " +"に表示され、収まるように短縮されます。テキストを完全に表示するには、フォントサイズ20を使用してのテキストの長さが190ピクセルを超えてはいけません。したがって、以下の不等式が成り立つことを確認してください:" +" `pymupdf.get_text_length(text, fontname=\"tibo\", fontsize=20) <= 190` 。" + +#: ../../annot.rst:265 86ab16b4319d42389d6edde9d42dc4f0 +msgid "" +"Change the rectangle of an annotation. The annotation can be moved around" +" and both sides of the rectangle can be independently scaled. However, " +"the annotation appearance will never get rotated, flipped or sheared. " +"This method only affects certain annotation types [#f2]_ and will lead to" +" a message on Python's `sys.stderr` in other cases. No exception will be " +"raised, but `False` will be returned." +msgstr "" +"注釈の矩形を変更します。注釈は移動し、矩形の両側を独立してスケーリングできます。ただし、注釈の外観は回転、反転、またはせん断されることはありません。このメソッドは特定の注釈タイプにのみ影響を与えます" +" [#f2]_ 、それ以外の場合はPythonの `sys.stderr` にメッセージが表示されます。例外は発生しませんが、`False` " +"が返されます。" + +#: ../../annot.rst:267 a9a53793892b498e943440f97163a7d6 +msgid "" +"the new rectangle of the annotation (finite and not empty). E.g. using a " +"value of *annot.rect + (5, 5, 5, 5)* will shift the annot position 5 " +"pixels to the right and downwards." +msgstr "" +"rect (rect_like) – 注釈の新しい矩形(有限で空ではない)です。たとえば、*annot.rect + (5, 5, 5, 5)* " +"の値を使用すると、注釈を右に5ピクセル、下に5ピクセル移動させます。" + +#: ../../annot.rst:269 6898411fb0da40fa867760423a53258f +msgid "You **need not** invoke :meth:`Annot.update` for activation of the effect." +msgstr "効果の有効化には :meth:`Annot.update` を呼び出す **必要はありません** 。" + +#: ../../annot.rst:274 e67ffff21f5f4efaa7d4c8275c30a8fe +msgid "" +"Set the rotation of an annotation. This rotates the annotation rectangle " +"around its center point. Then a **new annotation rectangle** is " +"calculated from the resulting quad." +msgstr "注釈の回転を設定します。これにより、注釈の矩形はその中心点を中心に回転します。その後、結果の四角形から **新しい注釈の矩形** が計算されます。" + +#: ../../annot.rst:276 308edadc7a93427ea4c587eeedf6a4cb +msgid "" +"rotation angle in degrees. Arbitrary values are possible, but will be " +"clamped to the interval `[0, 360)`." +msgstr "度数法での回転角度。任意の値が可能ですが、区間 `[0, 360)` にクランプされます。" + +#: ../../annot.rst:279 f81088d72d42429a81b58cbe0635cf58 +msgid "You **must invoke** :meth:`Annot.update` to activate the effect." +msgstr "効果を有効にするには、:meth:`Annot.update` を **呼び出す必要があります** 。" + +#: ../../annot.rst:280 cb61e0b1ff9649bc874eb5669b39fda1 +msgid "" +"For PDF_ANNOT_FREE_TEXT, only one of the values 0, 90, 180 and 270 is " +"possible and will **rotate the text** inside the current rectangle (which" +" remains unchanged). Other values are silently ignored and replaced by 0." +msgstr "" +"PDF_ANNOT_FREE_TEXTの場合、値0、90、180、270のいずれかの値のみが可能で、現在の矩形内の **テキストを回転させます**" +" (矩形自体は変更されません)。その他の値は静かに無視され、0に置き換えられます。" + +#: ../../annot.rst:281 2b42b8dba5254e0b9d8f787fc6ce8197 +msgid "" +"Otherwise, only the following :ref:`AnnotationTypes` can be rotated: " +"'Square', 'Circle', 'Caret', 'Text', 'FileAttachment', 'Ink', 'Line', " +"'Polyline', 'Polygon', and 'Stamp'. For all others the method is a no-op." +msgstr "" +"それ以外の場合、次の :ref:`AnnotationTypes` " +"のみが回転できます:'Square'、'Circle'、'Caret'、'Text'、'FileAttachment'、'Ink'、'Line'、'Polyline'、'Polygon'、および'Stamp'。それ以外のすべての場合、このメソッドは無効です。" + +#: ../../annot.rst:286 9b7d7c0af36c4997a1b2014f00fd693a +msgid "" +"Changed in version 1.16.9: Allow specification without using a " +"dictionary. The direct parameters are used if *border* is not a " +"dictionary." +msgstr "バージョン1.16.9で変更: 辞書を使用せずに指定を許可。*border* が辞書でない場合、直接のパラメータが使用されます。" + +#: ../../annot.rst:288 6a3bcf3fd55e40f0906bfc9c73321125 +msgid "Changed in version 1.22.5: Support of the \"cloudy\" border effect." +msgstr "バージョン1.22.5で変更: \"cloudy\"ボーダーエフェクトのサポート。" + +#: ../../annot.rst:290 766f20871ac840ad82e2a534a5d7428c +msgid "" +"PDF only: Change border width, dashing, style and cloud effect. See the " +":attr:`Annot.border` attribute for more details." +msgstr "" +"PDFのみ: ボーダーの幅、点線、スタイル、およびクラウド効果を変更します。詳細については :attr:`Annot.border` " +"属性を参照してください。" + +#: ../../annot.rst:293 5412a3c876dc445b8991e6bfe6206c97 +msgid "" +"a dictionary as returned by the :attr:`border` property, with keys " +"*\"width\"* (*float*), *\"style\"* (*str*), *\"dashes\"* (*sequence*) " +"and *clouds* (*int*). Omitted keys will leave the resp. property " +"unchanged. Set the border argument to `None` (the default) to use the " +"other arguments." +msgstr "" +":attr:`border` プロパティから返されるような辞書で、キーに " +"\"width\"(float)、\"style\"(str)、\"dashes\"(シーケンス)、および " +"\"clouds\"(int)を持っています。省略されたキーはそれぞれのプロパティを変更しないままにします。他の引数を使用する場合、border引数を" +" `None` (デフォルト)に設定します。" + +#: ../../annot.rst:295 a1a029503d4f45a6bfc58610e6294894 +msgid "A non-negative value will change the border line width." +msgstr "非負の値はボーダーの線幅を変更します。" + +#: ../../annot.rst:296 3133c2f7605a4781b259ff26dab31bcf +msgid "A value other than `None` will change this border property." +msgstr "`None` 以外の値はこのボーダープロパティを変更します。" + +#: ../../annot.rst:297 1243de5fc0aa4006bdde153c4e9f164c +msgid "" +"All items of the sequence must be integers, otherwise the parameter is " +"ignored. To remove dashing use: `dashes=[]`. If dashes is a non-empty " +"sequence, \"style\" will automatically be set to \"D\" (dashed)." +msgstr "" +"シーケンスのすべてのアイテムは整数である必要があり、それ以外の場合、パラメータは無視されます。点線を削除するには、`dashes=[]` " +"を使用します。dashesが空でないシーケンスの場合、\"style\"は自動的に \"D\"(点線)に設定されます。" + +#: ../../annot.rst:298 03237562991b48e080aec6fb53dfc4aa +msgid "" +"A value >= 0 will change this property. Use `clouds=0` to remove the " +"cloudy appearance completely. Only annotation types 'Square', 'Circle', " +"and 'Polygon' are supported with this property." +msgstr "" +"0以上の値はこのプロパティを変更します。クラウディな外観を完全に削除するには、`clouds=0` " +"を使用します。このプロパティはアノテーションタイプ 'Square'、'Circle'、および 'Polygon' のみでサポートされています。" + +#: ../../annot.rst:302 4f67c0516f2e43d2a6432673d5bf3c8f +msgid "Changes the annotation flags. Use the `|` operator to combine several." +msgstr "注釈のフラグを変更します。複数のフラグを組み合わせるには `|` 演算子を使用します。" + +#: ../../annot.rst:304 9837a34f7e344f5db3646bd8c2935c08 +msgid "an integer specifying the required flags." +msgstr "必要なフラグを指定する整数。" + +#: ../../annot.rst:308 717edac376fa4c2781212ccf4f1300fb +msgid "" +"Changed in version 1.16.9: Allow colors to be directly set. These " +"parameters are used if *colors* is not a dictionary." +msgstr "バージョン1.16.9で変更: 色を直接設定できるようにしました。これらのパラメータは、*colors* が辞書でない場合に使用されます。" + +#: ../../annot.rst:310 d388c00f532d4aecafa4def64bd6c525 +msgid "" +"Changes the \"stroke\" and \"fill\" colors for supported annotation types -- not all annotation types accept both. **Do not use this method at all for FreeText annotations** because it has its special conventions to deal with up to three colors (border, fill, text)." +msgstr "サポートされている注釈タイプに対して「ストローク」と「塗りつぶし」の色を変更します – すべての注釈が両方を受け入れるわけではありません。FreeText注釈にはこのメソッドを一切使用しないでください。 +このタイプの注釈は、境界線、塗りつぶし、テキストの最大3つの色を扱うための特別な規則があるためです。" + +#: ../../annot.rst:312 6de264581c6f4f85a50cbe3ab18a7397 +msgid "" +"a dictionary containing color specifications. For accepted dictionary " +"keys and values see below. The most practical way should be to first make" +" a copy of the *colors* property and then modify this dictionary as " +"required." +msgstr "" +"色の仕様を含む辞書。受け入れられる辞書のキーと値については以下を参照してください。最も実用的な方法は、まず ** " +"プロパティのコピーを作成し、その後必要に応じてこの辞書を変更することです。" + +#: ../../annot.rst:313 ../../annot.rst:314 36d2b5665df049e29f372ec83d1b2090 +#: f841d09bf133425690dda9c556072aa1 +msgid "see above." +msgstr "上記を参照してください。" + +#: ../../annot.rst:316 13a268dfb4a44682b64f2e122fe775e3 +msgid "" +"To completely remove a color specification, use an empty sequence like `[]`. If you specify `None`, an existing specification will not be changed." +msgstr "" +"色の仕様を完全に削除するには、`[]` のような空のシーケンスを使用してください。`None` " +"を指定した場合、既存の仕様は変更されません。" + +#: ../../annot.rst:321 941d573346c9417a9e600b67f4c4cdfb +msgid "New in version 1.16.12" +msgstr "バージョン1.16.12で新規追加" + +#: ../../annot.rst:323 b371873f742e46ad9b68e6d4496f6e92 +msgid "" +"Delete annotations referring to this one. This includes any 'Popup' " +"annotations and all annotations responding to it." +msgstr "この注釈を参照する注釈を削除します。これには「ポップアップ」注釈とそれに応答するすべての注釈が含まれます。" + +#: ../../annot.rst:337 826c48ae25be4dc5a9aa146fc165e55f +msgid "" +"Synchronize the appearance of an annotation with its properties after " +"relevant changes." +msgstr "関連する変更後に注釈の外観をプロパティに同期させます。" + +#: ../../annot.rst:339 a2fd472b5bf84f209bce5c88b39a1804 +msgid "You can safely **omit** this method **only** for the following changes:" +msgstr "以下の変更に関して **のみ** 、このメソッドを安全に **省略できます** :" + +#: ../../annot.rst:345 883f006d1b2a459bbf7d268dc47bd9eb +msgid ":meth:`Annot.set_info` (except any changes to *\"content\"*)" +msgstr ":meth:`Annot.set_info` (*「content」* 以外の変更を除く)" + +#: ../../annot.rst:347 0e9409972acc4017918ec20f51e46ce4 +msgid "" +"All arguments are optional. *(Changed in v1.16.14)* Blend mode and " +"opacity are applicable to **all annotation types**. The other arguments " +"are mostly special use, as described below." +msgstr "" +"すべての引数はオプションです。 *(v1.16.14で変更)* ブレンドモードと不透明度は **すべての注釈タイプ** に適用されます。 " +"他の引数は主に特殊な用途として、以下で説明されています。" + +#: ../../annot.rst:349 f2acfb9d00cd4a1f8886ea7b59416839 +msgid "" +"Color specifications may be made in the usual format used in PuMuPDF as " +"sequences of floats ranging from 0.0 to 1.0 (including both). The " +"sequence length must be 1, 3 or 4 (supporting GRAY, RGB and CMYK " +"colorspaces respectively). For GRAY, just a float is also acceptable." +msgstr "カラーの仕様は、通常のPuMuPDFで使用される形式で指定できます。0.0から1.0までの浮動小数点数のシーケンスです(両方を含む)。シーケンスの長さは1、3、または4である必要があります(GRAY、RGB、およびCMYKのカラースペースをサポートしています)。GRAYの場合、浮動小数点数だけでも受け入れられます。" + +#: ../../annot.rst:351 c579a7f52a8c4efba4049c2f1011da6d +msgid "" +"*(new in v1.16.14)* **valid for all annotation types:** change or set the" +" annotation's transparency. Valid values are *0 <= opacity < 1*." +msgstr "" +"*(v1.16.14で新規)* **すべての注釈タイプに対して有効:** 注釈の透明度を変更または設定します。有効な値は *0 <= " +"opacity < 1です* 。" + +#: ../../annot.rst:352 a81a6b9db111497fb78b5b3ceb27e377 +msgid "" +"*(new in v1.16.14)* **valid for all annotation types:** change or set the" +" annotation's blend mode. For valid values see :ref:`BlendModes`." +msgstr "" +"*(v1.16.14で新規)* **すべての注釈タイプに対して有効:** " +"注釈のブレンドモードを変更または設定します。有効な値についてはPDF標準のブレンドモードを参照してください。" + +#: ../../annot.rst:353 ae8e841246674f4ab53acaa575475903 +msgid "change :data:`fontsize` of the text. 'FreeText' annotations only." +msgstr "テキストのフォントサイズを変更します。 'FreeText' 注釈のみです。" + +#: ../../annot.rst:354 26cc7f290781487ea2c8260b485b91f9 +msgid "change the text color. 'FreeText' annotations only. This has the same effect as ``border_color``. Note that the text color of rich-text annotations cannot be changed at all because it is set by HTML / CSS syntax and part of the text itself." +msgstr "テキストの色を変更します。 'FreeText' 注釈のみです。" + +#: ../../annot.rst:355 b98848bcd4c84b94b4f2339e929facdb +msgid "change the border color. 'FreeText' annotations only. This has the same effect as ``text_color``." +msgstr "境界線の色を変更します。 'FreeText' 注釈のみです。" + +#: ../../annot.rst:356 ea1d7e13b3fc45dcaeaa3bd564ccba38 +msgid "" +"the fill color. * 'Line', 'Polyline', 'Polygon' annotations: use it to " +"give applicable line end symbols a fill color other than that of the " +"annotation *(changed in v1.16.16)*." +msgstr "" + +#: ../../annot.rst:356 70408a2e0957432fafc595f63a72dac3 +msgid "the fill color." +msgstr "塗りつぶしの色です。" + +#: ../../annot.rst:358 56698a0491524cf1839cfd2e29a272a8 +msgid "" +"'Line', 'Polyline', 'Polygon' annotations: use it to give applicable line" +" end symbols a fill color other than that of the annotation *(changed in " +"v1.16.16)*." +msgstr "" +"'Line'、'Polyline'、'Polygon' 注釈:適用可能な線の端のシンボルに注釈の色以外の塗りつぶしの色を与えるために使用します " +"*(v1.16.16で変更)* 。" + +#: ../../annot.rst:360 98b738882d314e1ca9e47acfb29b9c4c +msgid "" +"*(new in v1.17.2)* add two diagonal lines to the annotation rectangle. " +"'Redact' annotations only. If not desired, ``False`` must be specified " +"even if the annotation was created with ``False``." +msgstr "" +"*(v1.17.2で新規)* 注釈の矩形に2本の対角線を追加します。 'Redact' 注釈のみです。希望しない場合、注釈が``False`` " +"で作成された場合でも ``False`` を指定する必要があります。" + +#: ../../annot.rst:361 317ef91a944a47d2bcd414dfa48b595b +msgid "" +"new rotation value. Default (-1) means no change. Supports 'FreeText' and" +" several other annotation types (see :meth:`Annot.set_rotation`), [#f1]_." +" Only choose 0, 90, 180, or 270 degrees for 'FreeText'. Otherwise any " +"integer is acceptable." +msgstr "" +"新しい回転値。デフォルト値(-1)は変更なしを意味します。 'FreeText' " +"およびいくつかの他の注釈タイプをサポートします:meth:`Annot.set_rotation` を参照) [#f1]_ 。 " +"'FreeText' の場合、0、90、180、または270度を選択してください。それ以外の場合、任意の整数が受け入れられます。" + +#: ../../annot.rst:365 5d8619257f4e40e5be96294c5adbc57f +msgid "" +"Using this method inside a :meth:`Page.annots` loop is **not " +"recommended!** This is because most annotation updates require the owning" +" page to be reloaded -- which cannot be done inside this loop. Please use" +" the example coding pattern given in the documentation of this generator." +msgstr "" +":meth:`Page.annots` ループ内でこのメソッドを使用することは **お勧めしません!** " +"これは、ほとんどの注釈の更新には所有ページの再読み込みが必要であり、このループ内で行うことはできないためです。このジェネレータのドキュメントで提供されている例のコーディングパターンを使用してください。" + +#: ../../annot.rst:368 +msgid "This method is the only way to change the colors of a FreeText annotation. You cannot use :meth:`Annot.set_colors` for this purpose. But be aware that for rich-text annotations, the text color is never changed. The text color is set by the ``text_color`` entry of the ``info`` dictionary. This is a limitation of |MuPDF| and not a bug." +msgstr "このメソッドは、FreeText注釈の色を変更する唯一の方法です。この目的には、 :meth:`Annot.set_colors()` を使用することはできません。 ただし、リッチテキスト注釈の場合は注意が必要です。テキストの色は変更されず、 ``info`` 辞書の ``text_color`` エントリによって設定されます。 これは |MuPDF| の制限であり、不具合ではありません。" + +#: ../../annot.rst:370 ff9bc4bf0cdd4acc976c156f5a315306 +msgid "Basic information of the annot's attached file." +msgstr "アノットが添付されたファイルの基本情報。" + +#: ../../annot.rst:373 c32d33e3cdca401092ffe639bf885cc5 +msgid "" +"a dictionary with keys *filename*, *ufilename*, *desc* (description), " +"*size* (uncompressed file size), *length* (compressed length) for " +"FileAttachment annot types, else ``None``." +msgstr "" +"FileAttachmentアノットタイプの場合、*filename* (ファイル名)、*ufilename* " +"(Unicodeファイル名)、*desc* (説明)、*size* (非圧縮ファイルサイズ)、*length* " +"(圧縮長さ)のキーを持つ辞書。それ以外の場合は ``None`` 。" + +#: ../../annot.rst:377 4ac1fb9aefc542cf945016ead24f219e +msgid "Returns attached file content." +msgstr "添付ファイルの内容を返します。" + +#: ../../annot.rst:380 4f235876a19742f5b965f2ae9491d61f +msgid "the content of the attached file." +msgstr "添付ファイルの内容。" + +#: ../../annot.rst:390 be5cd657efdd473d928763bba0df5b4e +msgid "" +"Updates the content of an attached file. All arguments are optional. No " +"arguments lead to a no-op." +msgstr "添付ファイルの内容を更新します。すべての引数はオプションです。引数が指定されない場合、操作は行われません。" + +#: ../../annot.rst:392 a0ad11a9b58947a286714487e1c2087e +msgid "" +"the new file content. Omit to only change meta-information. *(Changed in" +" version 1.14.13)* *io.BytesIO* is now also supported." +msgstr "" + +#: ../../annot.rst:392 804bc0250bb544798eca747f4537b547 +msgid "the new file content. Omit to only change meta-information." +msgstr "新しいファイルの内容。メタ情報のみ変更する場合は省略します。" + +#: ../../annot.rst:394 11834fed4d684da79f297310c1d94791 +msgid "*(Changed in version 1.14.13)* *io.BytesIO* is now also supported." +msgstr "*(バージョン1.14.13で変更)io.BytesIO* もサポートされるようになりました。" + +#: ../../annot.rst:396 7985a4acc30f428ca3928e1fd5bb9186 +msgid "new filename to associate with the file." +msgstr "ファイルに関連付ける新しいファイル名。" + +#: ../../annot.rst:398 c51d1187235645a19eb686c1ec3a154d +msgid "new unicode filename to associate with the file." +msgstr "ファイルに関連付ける新しいUnicodeファイル名。" + +#: ../../annot.rst:400 9312c3798d7e49c7b0526c1498516ca0 +msgid "new description of the file content." +msgstr "ファイル内容の新しい説明。" + +#: ../../annot.rst:404 8242ad3e8f6b4974832e88f31eac0d67 +msgid "Return the embedded sound of an audio annotation." +msgstr "オーディオアノテーションの埋め込まれた音声を返します。" + +#: ../../annot.rst:407 38833c4151ab4eb1a89dd8bceb570228 +msgid "" +"the sound audio file and accompanying properties. These are the possible " +"dictionary keys, of which only \"rate\" and \"stream\" are always " +"present. =========== " +"======================================================= Key " +"Description =========== " +"======================================================= rate " +"(float, requ.) samples per second channels (int, opt.) number of sound" +" channels bps (int, opt.) bits per sample value per channel " +"encoding (str, opt.) encoding format: Raw, Signed, muLaw, ALaw " +"compression (str, opt.) name of compression filter stream (bytes, " +"requ.) the sound file content =========== " +"=======================================================" +msgstr "" + +#: ../../annot.rst:407 79ae636e3b544cd6b621add147e138b5 +msgid "" +"the sound audio file and accompanying properties. These are the possible " +"dictionary keys, of which only \"rate\" and \"stream\" are always " +"present." +msgstr "音声オーディオファイルと関連するプロパティ。これらは可能な辞書キーで、常に「rate」と「stream」が存在します。" + +#: ../../annot.rst:410 0b4fc6b85ef1479090143f2663f4c999 +msgid "Key" +msgstr "キー" + +#: ../../annot.rst:410 1c994b43bf0d44c0820ed3b48815120a +msgid "Description" +msgstr "説明" + +#: ../../annot.rst:412 10203784bab44814ac64618438deabc8 +msgid "rate" +msgstr "" + +#: ../../annot.rst:412 a983a057a13d497d8f91e9f59e5eac09 +msgid "(float, requ.) samples per second" +msgstr "(float, 必須) 1秒あたりのサンプル数" + +#: ../../annot.rst:413 3c97dcc4efdc4791a046bca23054cb1b +msgid "channels" +msgstr "" + +#: ../../annot.rst:413 758eb73453cc466fbe0b3e537183b810 +msgid "(int, opt.) number of sound channels" +msgstr "(int, オプション) サウンドチャンネルの数" + +#: ../../annot.rst:414 618e9b1f317145ab9b2b4f65a918ecbb +msgid "bps" +msgstr "" + +#: ../../annot.rst:414 5273044bf12f464b986e788dfcb075bc +msgid "(int, opt.) bits per sample value per channel" +msgstr "(int, オプション) チャンネルごとのサンプル値のビット数" + +#: ../../annot.rst:415 09e58c52e10d4eec90e2798ea701c1b7 +msgid "encoding" +msgstr "" + +#: ../../annot.rst:415 9a81a8db10c54364aea6ec3b8e317efe +msgid "(str, opt.) encoding format: Raw, Signed, muLaw, ALaw" +msgstr "(str, オプション) エンコーディング形式: Raw, Signed, muLaw, ALaw" + +#: ../../annot.rst:416 e8c893531a1348178d13e91dfd70d89b +msgid "compression" +msgstr "" + +#: ../../annot.rst:416 ce945113b1be4736bc132b8adfb4aac1 +msgid "(str, opt.) name of compression filter" +msgstr "(str, オプション) 圧縮フィルタの名前" + +#: ../../annot.rst:417 9e1d46ec4ad34aa7bec8e3020fc5b66c +msgid "stream" +msgstr "" + +#: ../../annot.rst:417 4de00f39b9834412b3cb30f1a730c6c2 +msgid "(bytes, requ.) the sound file content" +msgstr "(bytes, 必須) サウンドファイルの内容" + +#: ../../annot.rst:423 dad1a748ee124360ac7a5a31d4f5c0a3 +msgid "" +"The annotation's transparency. If set, it is a value in range *[0, 1]*. " +"The PDF default is 1. However, in an effort to tell the difference, we " +"return *-1.0* if not set." +msgstr "" +"アノテーションの透明度。設定されている場合、範囲 *[0, 1]* の値です。PDFのデフォルトは1ですが、違いを示すために設定されていない場合は" +" *-1.0* を返します。" + +#: ../../annot.rst:429 3ad7eb5c46b5403bb06e364a56cacbfd +msgid "The owning page object of the annotation." +msgstr "アノテーションの所属ページオブジェクト。" + +#: ../../annot.rst:431 1b5914cbce69437181992042c380a420 +msgid ":ref:`Page`" +msgstr "" + +#: ../../annot.rst:435 3e199076c06343159be68b9b9ac5854c +msgid "The annot rotation." +msgstr "アノテーションの回転角度。" + +#: ../../annot.rst:438 f064f446736f4f3ca1bdda570466d81d +msgid "" +"a value [-1, 359]. If rotation is not at all, -1 is returned (and implies" +" a rotation angle of 0). Other possible values are normalized to some " +"value value 0 <= angle < 360." +msgstr "" +"値は[-1, 359]です。回転が全くない場合、-1が返されます(回転角度0を意味します)。その他の可能な値は、一定の値0 <= angle < " +"360に正規化されます。" + +#: ../../annot.rst:442 1f476437e6b3410f9645674ced6eeb13 +msgid "The rectangle containing the annotation." +msgstr "アノテーションを含む長方形領域。" + +#: ../../annot.rst:444 ../../annot.rst:541 0103de8760f04f8b89b8e02781091781 +#: 86935ea548d34e7faf9b648b84bb5422 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../annot.rst:448 419bed750a274cb29401beaa0ab8594e +msgid "The next annotation on this page or None." +msgstr "このページ上の次のアノテーションまたはNone。" + +#: ../../annot.rst:450 571c7db54b9c4cc895a02cc3323aa52b +msgid "*Annot*" +msgstr "" + +#: ../../annot.rst:454 3478938110934003bb0b27db9dd1d0f6 +msgid "" +"A number and one or two strings describing the annotation type, like " +"**[2, 'FreeText', 'FreeTextCallout']**. The second string entry is " +"optional and may be empty. See the appendix :ref:`AnnotationTypes` for a " +"list of possible values and their meanings." +msgstr "" +"アノテーションタイプ を説明する数値と1つまたは2つの文字列、例: **[2, 'FreeText', 'FreeTextCallout']** " +"。第2の文字列エントリはオプションで、空であるかもしれません。可能な値とその意味については、付録の :ref:`AnnotationTypes` " +"のリストを参照してください。" + +#: ../../annot.rst:460 d18d81d2c0e74065996a20b62876188b +msgid "" +"A dictionary containing various information. All fields are optional " +"strings. For information items not provided, an empty string is returned." +msgstr "異なる情報を含む辞書。すべてのフィールドはオプションの文字列です。提供されない情報項目については、空の文字列が返されます。" + +#: ../../annot.rst:462 1cdd4502a1294093bfc18ad103a2792b +msgid "" +"*name* -- e.g. for 'Stamp' annotations it will contain the stamp text " +"like \"Sold\" or \"Experimental\", for other annot types you will see the" +" name of the annot's icon here (\"PushPin\" for FileAttachment)." +msgstr "" +"*name* – たとえば、 'Stamp' 注釈の場合、 'Sold' または 'Experimental' " +"のようなスタンプテキストが含まれ、他の注釈のタイプでは注釈のアイコンの名前がここに表示されます(FileAttachment の場合は " +"'PushPin')。" + +#: ../../annot.rst:464 d8988903bd564a26815802c44906eb95 +msgid "" +"*content* -- a string containing the text for type *Text* and *FreeText* " +"annotations. Commonly used for filling the text field of annotation pop-" +"up windows." +msgstr "" +"*content* – *テキスト* タイプと *FreeText* " +"注釈のテキストを含む文字列。注釈のポップアップウィンドウのテキストフィールドを埋めるために一般的に使用されます。" + +#: ../../annot.rst:466 b2ba350b46154e08a3e4b2ac58f36f9c +msgid "" +"*title* -- a string containing the title of the annotation pop-up window." +" By convention, this is used for the **annotation author**." +msgstr "*title* – 注釈のポップアップウィンドウのタイトルを含む文字列。通常、これは **注釈の著者** に使用されます。" + +#: ../../annot.rst:468 56e95e9ee6f64bfe83d55681934676a0 +msgid "*creationDate* -- creation timestamp." +msgstr "*creationDate* – 作成タイムスタンプ。" + +#: ../../annot.rst:469 94225a86dd7e41ce95cf8ec26e18ceb5 +msgid "*modDate* -- last modified timestamp." +msgstr "*modDate* – 最終変更タイムスタンプ。" + +#: ../../annot.rst:470 3a3948aa631a4dc1bc04385e2dca9396 +msgid "*subject* -- subject." +msgstr "*subject* – 主題。" + +#: ../../annot.rst:471 0bc8e45b8d36435684bfd646d1f1e5b4 +msgid "" +"*id* -- *(new in version 1.16.10)* a unique identification of the " +"annotation. This is taken from PDF key */NM*. Annotations added by " +"PyMuPDF will have a unique name, which appears here." +msgstr "" +"*id* – (バージョン1.16.10で新規追加)注釈の一意の識別子。これはPDFキー */ NM* " +"から取得されます。PyMuPDFによって追加された注釈には一意の名前があり、ここに表示されます" + +#: ../../annot.rst:478 a128a345685340cfa13587bc266a1624 +msgid "" +"An integer whose low order bits contain flags for how the annotation " +"should be presented." +msgstr "注釈の表示方法を示すフラグを含む低位ビットを持つ整数。" + +#: ../../annot.rst:484 a55e047cd36041628d5919c2b49276d0 +msgid "" +"A pair of integers specifying start and end symbol of annotations types " +"'FreeText', 'Line', 'PolyLine', and 'Polygon'. ``None`` if not " +"applicable. For possible values and descriptions in this list, see the " +":ref:`AdobeManual`, table 1.76 on page 400." +msgstr "" +"'FreeText'、'Line'、'PolyLine'、および'Polygon'の注釈タイプの開始および終了シンボルを指定する2つの整数のペア。該当しない場合は" +" *なし* 。このリストでの可能な値と説明については、:ref:`AdobeManual` のページ400の表1.76を参照してください。" + +#: ../../annot.rst:490 13761897f939405a97d8a3498f0040d7 +msgid "" +"A list containing a variable number of point (\"vertices\") coordinates " +"(each given by a pair of floats) for various types of annotations:" +msgstr "さまざまな種類の注釈に対する、可変数の点(\"頂点\")座標(各々が浮動小数点数のペアで指定される)を含むリスト:" + +#: ../../annot.rst:492 9b008a64e3ec4e7c8eea518475d9ac74 +msgid "'Line' -- the starting and ending coordinates (2 float pairs)." +msgstr "'Line' – 開始座標と終了座標(2つの浮動小数点数のペア)。" + +#: ../../annot.rst:493 e58aa9b8392c4e12a2e4df235a8ad9e5 +msgid "" +"'FreeText' -- 2 or 3 float pairs designating the starting, the (optional)" +" knee point, and the ending coordinates." +msgstr "'FreeText' – 開始座標、(オプションの)曲線点、および終了座標を指定する2または3つの浮動小数点数のペア。" + +#: ../../annot.rst:494 7bfa92a879014f19b320c26616f6da2a +msgid "" +"'PolyLine' / 'Polygon' -- the coordinates of the edges connected by line " +"pieces (n float pairs for n points)." +msgstr "'PolyLine' / 'Polygon' – 線分で接続されたエッジの座標(nポイントのためのn個の浮動小数点数のペア)。" + +#: ../../annot.rst:495 e2e1fefa75324b12af2a2e152477d2b4 +msgid "" +"text markup annotations -- 4 float pairs specifying the *QuadPoints* of " +"the marked text span (see :ref:`AdobeManual`, page 403)." +msgstr "" +"テキストのマークアップ注釈 – マークされたテキストスパンのQuadPointsを指定する4つの浮動小数点数のペア(Adobe " +"PDFリファレンス、ページ403を参照)。" + +#: ../../annot.rst:496 edec46c715024d748ae4737d4f9d8966 +msgid "" +"'Ink' -- list of one to many sublists of vertex coordinates. Each such " +"sublist represents a separate line in the drawing." +msgstr "'Ink' – 頂点座標の1つから多数のサブリストのリスト。各サブリストは、描画内の別々の線を表します。" + +#: ../../annot.rst:503 bd59b247d3c44bf38a70ffe1720bc49c +msgid "" +"dictionary of two lists of floats in range *0 <= float <= 1* specifying " +"the \"stroke\" and the interior (\"fill\") colors. The stroke color is " +"used for borders and everything that is actively painted or written " +"(\"stroked\"). The fill color is used for the interior of objects like " +"line ends, circles and squares. The lengths of these lists implicitly " +"determine the colorspaces used: 1 = GRAY, 3 = RGB, 4 = CMYK. So \"[1.0, " +"0.0, 0.0]\" stands for RGB color red. Both lists can be empty if no color" +" is specified." +msgstr "" +"*0 <= 浮動小数点数 <= 1の範囲内* で指定された \"ストローク\" " +"および内部(\"塗りつぶし\")カラーの2つの浮動小数点数のリストから成る辞書。ストロークカラーは、境界線やアクティブに塗装されたり書かれたりするすべてに使用されます。塗りつぶしカラーは、線の端、円、正方形などのオブジェクトの内部に使用されます。これらのリストの長さは、暗黙的に使用されるカラースペースを決定します:1" +" = GRAY、3 = RGB、4 = CMYK。したがって、\"[1.0, 0.0, 0.0]\" " +"はRGBカラーの赤を表します。どちらのリストも指定されていない場合、空にすることができます。" + +#: ../../annot.rst:509 c49c07a1c4fd499da22862b729bd0114 +msgid "The PDF :data:`xref`." +msgstr "PDFの :data:`xref` 。" + +#: ../../annot.rst:515 229d9683a092484caea218d27e8a82ab +msgid "" +"The PDF :data:`xref` of an annotation to which this one responds. Return " +"zero if this is no response annotation." +msgstr "この注釈が応答する注釈のPDF :data:`xref` 。これが応答注釈でない場合はゼロを返します。" + +#: ../../annot.rst:521 22cd8346d0a041539ab2b885daeb29c3 +msgid "" +"The PDF :data:`xref` of the associated Popup annotation. Zero if non-" +"existent." +msgstr "関連するポップアップ注釈のPDF :data:`xref` 。存在しない場合はゼロ。" + +#: ../../annot.rst:527 a0ed476461bc407ea14ba28c1b792565 +msgid "Whether the annotation has a Popup annotation." +msgstr "注釈にポップアップ注釈があるかどうか。" + +#: ../../annot.rst:533 e7941e1065454f0f8b62f6df310fb8e0 +msgid "" +"Whether the annotation's Popup is open -- **or** the annotation itself " +"('Text' annotations only)." +msgstr "注釈のポップアップが開いているかどうか - **または** 注釈自体('テキスト'注釈のみ)。" + +#: ../../annot.rst:539 943360bdb1974c80a2eb12cba6b49595 +msgid "" +"The rectangle of the associated Popup annotation. Infinite rectangle if " +"non-existent." +msgstr "関連するポップアップ注釈の矩形。存在しない場合は無限の矩形。" + +#: ../../annot.rst:545 b0adc4587d03481aa617e7c63a5c89d5 +msgid "" +"A tuple of four floats representing the `/RD` entry of the annotation. " +"The four numbers describe the numerical differences (left, top, -right, " +"-bottom) between two rectangles: the :attr:`rect` of the annotation and a" +" rectangle contained within that rectangle. If the entry is missing, this" +" property is `(0, 0, 0, 0)`. If the annotation border is a normal, " +"straight line, these numbers are typically border width divided by 2. If " +"the annotation has a \"cloudy\" border, you will see the breadth of the " +"cloud semi-circles here. In general, the numbers need not be identical. " +"To compute the inner rectangle do `a.rect + a.rect_delta`." +msgstr "" +"注釈の `/RD` " +"エントリを表す4つの浮動小数点数のタプル。これらの4つの数値は、2つの矩形間の数値の差(左、上、-右、-下)を説明しています:注釈の " +":attr:`rect` とその矩形内に含まれる矩形。エントリが存在しない場合、このプロパティは. `(0、0、0、0)` " +"です。注釈の境界線が通常のまっすぐな線である場合、これらの数値は通常、境界線の幅を2で割ったものです。注釈に「雲状」の境界線がある場合、ここで雲の半円の幅が表示されます。一般的に、これらの数値は同一である必要はありません。内側の矩形を計算するには、`a.rect" +" + a.rect_delta` を使用します。" + +#: ../../annot.rst:549 ac7cd9df53604c578d2e0e038b4f2aa3 +msgid "" +"A dictionary containing border characteristics. Empty if no border " +"information exists. The following keys may be present:" +msgstr "境界線の特性を含む辞書。境界線情報が存在しない場合は空です。次のキーが存在する可能性があります:" + +#: ../../annot.rst:551 e32ec3cdef384d7390414599188bf214 +msgid "" +"*width* -- a float indicating the border thickness in points. The value " +"is -1.0 if no width is specified." +msgstr "*width* – ポイントでの境界線の太さを示す浮動小数点数。幅が指定されていない場合、値は -1.0 です。" + +#: ../../annot.rst:553 3c4027a4271343f794a2c09a46c35ffe +msgid "" +"*dashes* -- a sequence of integers specifying a line dashing pattern. " +"*[]* means no dashes, *[n]* means equal on-off lengths of *n* points, " +"longer lists will be interpreted as specifying alternating on-off length " +"values. See the :ref:`AdobeManual` page 126 for more details." +msgstr "" +"*dashes* – ラインダッシングパターンを指定する整数のシーケンス。*[]* はダッシュなし、*[n]* は *n* " +"ポイントの等しいオンオフの長さを意味し、より長いリストは交互のオンオフ長さ値を指定して解釈されます。詳細については、:ref:`AdobeManual`" +" のページ126を参照してください。" + +#: ../../annot.rst:555 6f0beca4771340bc950824e0586004f7 +msgid "" +"*style* -- 1-byte border style: **\"S\"** (Solid) = solid line " +"surrounding the annotation, **\"D\"** (Dashed) = dashed line surrounding " +"the annotation, the dash pattern is specified by the *dashes* entry, " +"**\"B\"** (Beveled) = a simulated embossed rectangle that appears to be " +"raised above the surface of the page, **\"I\"** (Inset) = a simulated " +"engraved rectangle that appears to be recessed below the surface of the " +"page, **\"U\"** (Underline) = a single line along the bottom of the " +"annotation rectangle." +msgstr "" +"*style* – 1バイトの境界線スタイル: **\"S\"** (Solid)= 注釈を囲む実線、 **\"D\"** (Dashed)= " +"注釈を囲む破線、破線パターンは *dashes* エントリによって指定され、 **“B”** (Beveled)= " +"ページの表面よりも上に浮かぶように見える模擬的な浮き出し矩形、 **\"I\"** (Inset)= " +"ページの表面よりも下に凹んだように見える模擬的な浮き彫りの矩形、 **\"U\"** (Underline)= 注釈矩形の底部に沿った単一の線。" + +#: ../../annot.rst:557 43804865211f4957b4ab00f844766472 + +msgid "" +"*clouds* -- an integer indicating a \"cloudy\" border, where ``n`` is an " +"integer `-1 <= n <= 2`. A value `n = 0` indicates a straight line (no " +"clouds), 1 means small and 2 means large semi-circles, mimicking the " +"cloudy appearance. If -1, then no specification is present." +msgstr "" +"*clouds* – “雲状”の境界を示す整数。``n`` は整数 `-1 <= n <= 2` を指します。値 `n = 0` " +"は直線(雲なし)を示し、1 は小さな半円、2 は大きな半円を模倣した雲の外観を示します。-1 の場合、仕様が存在しないことを示します。" + +#: ../../annot.rst:565 ad29546a4c774e9696b2fb1753e2bed6 +msgid "Annotation Icons in MuPDF" +msgstr "MuPDFの注釈アイコン" + +#: ../../annot.rst:566 e01c8a73c9a44cf8839c89752851d01c +msgid "" +"This is a list of icons referenceable by name for annotation types 'Text'" +" and 'FileAttachment'. You can use them via the *icon* parameter when " +"adding an annotation, or use the as argument in :meth:`Annot.set_name`. " +"It is left to your discretion which item to choose when -- no mechanism " +"will keep you from using e.g. the \"Speaker\" icon for a " +"'FileAttachment'." +msgstr "" +"これは「Text」および「FileAttachment」注釈タイプの名前で参照可能なアイコンのリストです。注釈を追加する際に*icon* " +"パラメータを使用したり、:meth:`Annot.set_name` " +"の引数として使用したりできます。どのアイテムを選択するかはあなたの裁量に任されています - " +"たとえば、「Speaker」アイコンを「FileAttachment」に使用することを防ぐメカニズムは存在しません。" + +#: ../../annot.rst:572 7c089224f63344fcba8697097a564015 +msgid "Example" +msgstr "例" + +#: ../../annot.rst:573 583a7a5d57f44851b86fd376a6aec42b +msgid "" +"Change the graphical image of an annotation. Also update the \"author\" " +"and the text to be shown in the popup window::" +msgstr "注釈のグラフィカルな画像を変更します。また、ポップアップウィンドウに表示される「著者」とテキストを更新します。" + +#: ../../annot.rst:596 d29c75fc8898424e806cde474df35881 +msgid "" +"This is how the circle annotation looks like before and after the change " +"(pop-up windows displayed using Nitro PDF viewer):" +msgstr "これが、変更前と変更後のサークル注釈の見た目です(Nitro PDFビューアを使用して表示されるポップアップウィンドウ):" + +#: ../../annot.rst:598 fb7ea614d50d49a3a8491368e2e82516 +msgid "|circle|" +msgstr "" + +#: ../../annot.rst:600 a6222a025a02454abe58230b96b2a2a8 +#: fa94e2075bf7415597643866c18ab1b4 +msgid "circle" +msgstr "" + +#: ../../annot.rst:604 00c8769e28fc422fb7202705ebb74eeb +msgid "Footnotes" +msgstr "脚注" + +#: ../../annot.rst:605 5372e13a7237496289e4410fa7074f92 +msgid "" +"Rotating an annotation also changes its rectangle. Depending on how the " +"annotation was defined, the original rectangle is **cannot be " +"reconstructed** by setting the rotation value to zero again and will be " +"lost." +msgstr "" +"注釈を回転させると、その矩形も変更されます。注釈がどのように定義されたかによって、元の矩形は回転値を再びゼロに設定しても **再構築できず** " +"、失われます。" + +#: ../../annot.rst:607 812e239885c44b8c95d47d70dab6a2d6 +msgid "" +"Only the following annotation types support method " +":meth:`Annot.set_rect`: Text, FreeText, Square, Circle, Redact, Stamp, " +"Caret, FileAttachment, Sound, and Movie." +msgstr "" +":meth:`Annot.set_rect` " +"をサポートする注釈タイプは、次のものだけです:Text、FreeText、Square、Circle、Redact、Stamp、Caret、FileAttachment、Sound、およびMovie。" + +#: ../../footer.rst:60 3e6e795dbb954a1f90e29e5c581fe2a0 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "**This class is supported for PDF documents only.**" +#~ msgstr "**このクラスはPDFドキュメントのみに対応しています。** " + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/app1.mo b/docs/locales/ja/LC_MESSAGES/app1.mo new file mode 100644 index 000000000..70a0c18e4 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/app1.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/app1.po b/docs/locales/ja/LC_MESSAGES/app1.po new file mode 100644 index 000000000..3d5da3f93 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/app1.po @@ -0,0 +1,705 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 fdfbfd4ec6524865abf05d4f02f40bd4 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 4ef893e3d65549bdb97761ade9d2fbe0 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 93b3b77eb5fb4d609385d37e1b52f560 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../app1.rst:7 32522e45284f47b9a2baab2001f554fc +msgid "Appendix 1: Details on Text Extraction" +msgstr "付録1: テキスト抽出の詳細" + +#: ../../app1.rst:8 7a46450d4e694b47ba30ff347aa1295a +msgid "" +"This chapter provides background on the text extraction methods of " +"PyMuPDF." +msgstr "この章では、PyMuPDFのテキスト抽出メソッドに関する背景情報を提供します。" + +#: ../../app1.rst:10 433c42d12e1e466887a62caef1d48c04 +msgid "Information of interest are" +msgstr "興味のある情報は以下です" + +#: ../../app1.rst:12 855bc392f61942508bcfcfee3ff9ad79 +msgid "what do they provide?" +msgstr "彼らは何を提供するのか?" + +#: ../../app1.rst:13 7bfcfb2ef4424ff1bfff9c532da582fc +msgid "what do they imply (processing time / data sizes)?" +msgstr "それらは何を意味するのか(処理時間 / データサイズ)?" + +#: ../../app1.rst:16 f054be020fee4f7dbe78d82ec1f9a215 +msgid "General structure of a TextPage" +msgstr "TextPageの一般的な構造" + +#: ../../app1.rst:17 c38170b0abfd4137afa0635a2f080530 +msgid "" +":ref:`TextPage` is one of (Py-) MuPDF's classes. It is normally created " +"(and destroyed again) behind the curtain, when :ref:`Page` text " +"extraction methods are used, but it is also available directly and can be" +" used as a persistent object. Other than its name suggests, images may " +"optionally also be part of a text page::" +msgstr "" +":ref:`TextPage` は(Py-)MuPDFのクラスの一つです。通常、:ref:`Page` " +"のテキスト抽出メソッドが使用されるときにカーテンの後ろで作成され(および破棄され)ますが、直接利用することもでき、永続オブジェクトとして使用することができます。その名前が示すよりも、テキストページにはオプションで画像も含まれる場合があります:" + +#: ../../app1.rst:27 fa5133b907134789b7b5bcf5427df499 +msgid "A **text page** consists of blocks (= roughly paragraphs)." +msgstr "**テキストページ** は、ブロック(おおよそ段落)で構成されています。" + +#: ../../app1.rst:29 8a84fe5ed0fa4b028ce78c0283f42e05 +msgid "A **block** consists of either lines and their characters, or an image." +msgstr "**ブロック** は、行とその文字、または画像のいずれかから成り立っています。" + +#: ../../app1.rst:31 b618a5ef31c6418385e67337d450a352 +msgid "A **line** consists of spans." +msgstr "**行** は、スパンから成り立っています。" + +#: ../../app1.rst:33 10807e13bcf148d1901a347693b84092 +msgid "" +"A **span** consists of adjacent characters with identical font " +"properties: name, size, flags and color." +msgstr "**スパン** は、同一のフォントプロパティ(名前、サイズ、フラグ、色)を持つ隣接する文字から成り立っています。" + +#: ../../app1.rst:36 94770e1286bd45faa80dd24ab08a2b98 +msgid "Plain Text" +msgstr "プレーンテキスト" + +#: ../../app1.rst:38 4e59da8e7882467d9e3fc99a17ebcd20 +msgid "" +"Function :meth:`TextPage.extractText` (or *Page.get_text(\"text\")*) " +"extracts a page's plain **text in original order** as specified by the " +"creator of the document." +msgstr "" +"関数 :meth:`TextPage.extractText` (または *Page.get_text(\"text\")* " +")は、ドキュメントの作成者によって指定された元の順序で、ページのプレーンテキストを抽出します。" + +#: ../../app1.rst:40 43eb12dcbd3741a39565376bce991deb +msgid "An example output::" +msgstr "例の出力:" + +#: ../../app1.rst:45 9b508c1639d948d0bb34f53480a454de +msgid "" +"The output may not equal an accustomed \"natural\" reading order. " +"However, you can request a reordering following the scheme \"top-left to " +"bottom-right\" by executing `page.get_text(\"text\", sort=True)`." +msgstr "" +"出力は通常の「自然な」読み順と一致しない場合があります。ただし、`page.get_text(\"text\", sort=True)` " +"を実行することで、「左上から右下」のスキームに従った並べ替えを要求することができます。" + +#: ../../app1.rst:49 ../../app1.rst:337 01218363ec8b4f25b01be035183f2bad +#: ea3b77edca944acd87dfae64354fc218 +msgid "BLOCKS" +msgstr "ブロック" + +#: ../../app1.rst:51 84c30a08712041efbd0d51b5d1f3b869 +msgid "" +"Function :meth:`TextPage.extractBLOCKS` (or *Page.get_text(\"blocks\")*) " +"extracts a page's text blocks as a list of items like::" +msgstr "" +"関数 :meth:`TextPage.extractBLOCKS` (または `Page.get_text(\"blocks\")` " +")は、ページのテキストブロックを以下のような項目のリストとして抽出します:" + +#: ../../app1.rst:55 eca4a5b7ac1e429d922a4caaf3b1508a +msgid "" +"Where the first 4 items are the float coordinates of the block's bbox. " +"The lines within each block are concatenated by a new-line character." +msgstr "最初の4つの項目は、ブロックのバウンディングボックスの浮動小数点座標です。各ブロック内の行は改行文字で連結されます。" + +#: ../../app1.rst:57 a2ec0b47ebce4d5a924b99a85953372a +msgid "" +"This is a high-speed method, which by default also extracts image meta " +"information: Each image appears as a block with one text line, which " +"contains meta information. The image itself is not shown." +msgstr "これは高速なメソッドであり、デフォルトでは画像のメタ情報も抽出されます。各画像はメタ情報を含む1行のテキスト行で表されるブロックとして表示されます。画像そのものは表示されません。" + +#: ../../app1.rst:59 c7b5a7b9a23e458a96b54934b3ed775f +msgid "" +"As with simple text output above, the `sort` argument can be used as well" +" to obtain a reading order." +msgstr "前述の単純なテキスト出力と同様に、`sort` 引数を使用して読み順を取得することもできます。" + +#: ../../app1.rst:61 ../../app1.rst:79 a78d72a8fc184086a8d50d1cf5a9ef1a +#: e2343d105e624aeb882debda02386431 +msgid "Example output::" +msgstr "例の出力:" + +#: ../../app1.rst:69 ../../app1.rst:338 acff28e048b34b4ba229b68304cfb76b +#: cddad20f52224312b51a7f0c5053ac5a +msgid "WORDS" +msgstr "単語" + +#: ../../app1.rst:71 89a704f3ac384146b0b14fac38aa5a65 +msgid "" +"Function :meth:`TextPage.extractWORDS` (or *Page.get_text(\"words\")*) " +"extracts a page's text **words** as a list of items like::" +msgstr "" +"関数 :meth:`TextPage.extractWORDS` (または *Page.get_text(\"words\")* " +")は、ページのテキスト単語を以下のような項目のリストとして抽出します:" + +#: ../../app1.rst:75 f050c877e9a940cdbec7cc265b1bb992 +msgid "" +"Where the first 4 items are the float coordinates of the words's bbox. " +"The last three integers provide some more information on the word's " +"whereabouts." +msgstr "最初の4つの項目は、単語のバウンディングボックスの浮動小数点座標です。最後の3つの整数は、単語の位置に関する追加情報を提供します" + +#: ../../app1.rst:77 0f54ed7a09db4671bf357a69f4eaf0cf +msgid "" +"This is a high-speed method. As with the previous methods, argument " +"`sort=True` will reorder the words." +msgstr "これは高速なメソッドです。前のメソッドと同様に、引数 `sort=True` を使用すると単語が再並べ替えされます。" + +#: ../../app1.rst:95 ../../app1.rst:341 9c256b41e353481a8f9d1120383e185d +#: b01eed8bd2214d029f98ed8e402c64ec +msgid "HTML" +msgstr "" + +#: ../../app1.rst:97 ae629776c0304c299e0e8d208a5c09fa +msgid "" +":meth:`TextPage.extractHTML` (or *Page.get_text(\"html\")* output fully " +"reflects the structure of the page's ``TextPage`` -- much like DICT / " +"JSON below. This includes images, font information and text positions. If" +" wrapped in HTML header and trailer code, it can readily be displayed by " +"an internet browser. Our above example::" +msgstr "" +":meth:`TextPage.extractHTML` (または *Page.get_text(\"html\")* の出力は、ページの " +"`TextPage` の構造を完全に反映します。これは、以下のDICT / " +"JSONのようなものです。これには画像、フォント情報、テキスト位置が含まれます。HTMLヘッダーとトレイラーコードで囲むと、インターネットブラウザで簡単に表示できます。上記の例:" + +#: ../../app1.rst:113 7390b832da484722ac5b9460920b0c9b +msgid "Controlling Quality of HTML Output" +msgstr "HTML出力の品質の制御" + +#: ../../app1.rst:114 cda88645b63f404c8de487595a91b015 +msgid "" +"While HTML output has improved a lot in MuPDF v1.12.0, it is not yet bug-" +"free: we have found problems in the areas **font support** and **image " +"positioning**." +msgstr "" +"MuPDF v1.12.0でHTML出力はかなり改善されましたが、まだバグがないわけではありません。**フォントサポート** や**画像の配置**" +" に関する問題が見つかっています。" + +#: ../../app1.rst:116 8e67f6c0b40547eb8c013174c0840a60 +msgid "" +"HTML text contains references to the fonts used of the original document." +" If these are not known to the browser (a fat chance!), it will replace " +"them with others; the results will probably look awkward. This issue " +"varies greatly by browser -- on my Windows machine, MS Edge worked just " +"fine, whereas Firefox looked horrible." +msgstr "" +"HTMLテキストには元のドキュメントで使用されたフォントへの参照が含まれています。もしブラウザがそれらを認識できない場合(少ない確率ですが)、他のフォントで置き換えられ、結果が奇妙に見えるかもしれません。この問題はブラウザによって大きく異なります。Windowsマシンでは、MS" +" Edgeはうまく動作するかもしれませんが、Firefoxはひどく見えるかもしれません。" + +#: ../../app1.rst:118 a84f551c6b3c4c0daaeda98b73eff629 +msgid "" +"For PDFs with a complex structure, images may not be positioned and / or " +"sized correctly. This seems to be the case for rotated pages and pages, " +"where the various possible page bbox variants do not coincide (e.g. " +"*MediaBox != CropBox*). We do not know yet, how to address this -- we " +"filed a bug at MuPDF's site." +msgstr "" +"複雑な構造を持つPDFの場合、画像の位置やサイズが正しく配置されないことがあります。これは回転したページや、さまざまなページbboxのバリアントが一致しない場合に起こる可能性があります(たとえば、*MediaBox" +" != CropBox* )。これに対処する方法はまだわかっていませんが、MuPDFのサイトにバグを報告しました。" + +#: ../../app1.rst:120 ede45259306f4b25a8a69c2642ee7790 +msgid "" +"To address the font issue, you can use a simple utility script to scan " +"through the HTML file and replace font references. Here is a little " +"example that replaces all fonts with one of the :ref:`Base-14-Fonts`: " +"serifed fonts will become \"Times\", non-serifed \"Helvetica\" and " +"monospaced will become \"Courier\". Their respective variations for " +"\"bold\", \"italic\", etc. are hopefully done correctly by your browser::" +msgstr "フォントの問題に対処するために、HTMLファイルをスキャンし、フォントの参照を置換するシンプルなユーティリティスクリプトを使用できます。以下は、すべてのフォントをPDFのベース14フォントの一つに置き換える例です:セリフフォントは「Times」になり、セリフのないフォントは「Helvetica」になり、等幅フォントは「Courier」になります。太字、斜体などの各バリエーションは、おそらくブラウザによって正しく処理されるでしょう。" + +#: ../../app1.rst:160 604f24c1899b445a8e5f147bbea58c6b +msgid "DICT (or JSON)" +msgstr "DICT(またはJSON)" + +#: ../../app1.rst:162 2b4124eaa6184e5abeb4751eb27b7d28 +msgid "" +":meth:`TextPage.extractDICT` (or *Page.get_text(\"dict\", sort=False)*) " +"output fully reflects the structure of a ``TextPage`` and provides image " +"content and position detail (*bbox* -- boundary boxes in pixel units) for" +" every block, line and span. Images are stored as *bytes* for DICT output" +" and base64 encoded strings for JSON output." +msgstr "" +":meth:`TextPage.extractDICT` (または *Page.get_text(\"dict\", sort=False)* " +")の出力は、 ``TextPage`` の構造を完全に反映し、各ブロック、行、スパンのために画像の内容と位置の詳細( *bbox* – " +"ピクセル単位の境界ボックス)を提供します。画像はDICT出力では *バイト* " +"として格納され、JSON出力ではbase64エンコードされた文字列として格納されます。" + +#: ../../app1.rst:164 c899504e3d93462ebf94a0ae253c284d +msgid "" +"For a visualization of the dictionary structure have a look at " +":ref:`textpagedict`." +msgstr "辞書の構造の可視化については、辞書出力の構造をご覧ください。" + +#: ../../app1.rst:166 a85a77c0a97a4b999a9608db09a62055 +msgid "Here is how this looks like::" +msgstr "以下がその様子です:" + +#: ../../app1.rst:192 a39d5bea096643a481b871a235198902 +msgid "RAWDICT (or RAWJSON)" +msgstr "RAWDICT(またはRAWJSON)" + +#: ../../app1.rst:193 e30e534d09664d99a59c74a2819edd1f +msgid "" +":meth:`TextPage.extractRAWDICT` (or *Page.get_text(\"rawdict\", " +"sort=False)*) is an **information superset of DICT** and takes the detail" +" level one step deeper. It looks exactly like the above, except that the " +"*\"text\"* items (*string*) in the spans are replaced by the list " +"*\"chars\"*. Each *\"chars\"* entry is a character *dict*. For example, " +"here is what you would see in place of item *\"text\": \"Text in black " +"color.\"* above::" +msgstr "" +":meth:`TextPage.extractRAWDICT` (または *Page.get_text(\"rawdict\", " +"sort=False)* )は、**DICTの情報のスーパーセット** " +"であり、詳細レベルを一段階深くします。これは上記のように見えますが、スパン内の *「text」* アイテム(文字列)は *「chars」* " +"というリストに置き換えられます。各 *「chars」* エントリは文字の *dict* です。例えば、 *「Text in black " +"color.」* の代わりに以下のような項目が表示されます:" + +#: ../../app1.rst:224 ../../app1.rst:339 1019709da94c447c87b21a8889ff73c6 +#: 2e6cf3faa00c40b99808a48b7890f736 +msgid "XML" +msgstr "" + +#: ../../app1.rst:226 3e2cf1ef340e453c9ee25d4f7a082534 +msgid "" +"The :meth:`TextPage.extractXML` (or *Page.get_text(\"xml\")*) version " +"extracts text (no images) with the detail level of RAWDICT::" +msgstr "" +":meth:`TextPage.extractXML` (または *Page.get_text(\"xml\")* " +"バージョンは、RAWDICTの詳細レベルでテキスト(画像なし)を抽出します:" + +#: ../../app1.rst:255 b27a0017f5ac4d09a62ef96d7158aa58 +msgid "" +"We have successfully tested `lxml `_ to " +"interpret this output." +msgstr "この出力を解釈するためにlxmlを使用して正常にテストしました。" + +#: ../../app1.rst:258 ../../app1.rst:340 769cf57a03fb4d8ebc77e12339dfe126 +#: dfb6a028240f4f25ace3456d80f1e379 +msgid "XHTML" +msgstr "" + +#: ../../app1.rst:259 9489175cb6d1431fab9e8388a1b5ee8d +msgid "" +":meth:`TextPage.extractXHTML` (or *Page.get_text(\"xhtml\")*) is a " +"variation of TEXT but in HTML format, containing the bare text and images" +" (\"semantic\" output)::" +msgstr "" +":meth:`TextPage.extractXHTML` (または *Page.get_text(\"xhtml\")* " +"は、テキストと画像を含むHTML形式のTEXTのバリエーションです(「セマンティック」出力):" + +#: ../../app1.rst:268 e1dc4d16be0744a9a7a2b424f7a34574 +msgid "Text Extraction Flags Defaults" +msgstr "テキスト抽出フラグのデフォルト値" + +#: ../../app1.rst:269 d6b62f1e66934d9391f12416f05cf4c3 +msgid "" +"New in version 1.16.2: Method :meth:`Page.get_text` supports a keyword " +"parameter *flags* *(int)* to control the amount and the quality of " +"extracted data. The following table shows the defaults settings (flags " +"parameter omitted or None) for each extraction variant. If you specify " +"flags with a value other than ``None``, be aware that you must set **all " +"desired** options. A description of the respective bit settings can be " +"found in :ref:`TextPreserve`." +msgstr "" +"バージョン1.16.2で新しく追加されたメソッド :meth:`Page.get_text` " +"は、抽出されるデータの量と品質を制御するためのキーワードパラメータ `flags` " +"(整数)をサポートしています。以下の表は、各抽出バリエーションのデフォルト設定( `flags` " +"パラメータが省略されたかNoneの場合)を示しています。 `None` 以外の値でflagsを指定する場合は、**すべての必要なオプション** " +"を設定する必要があることに注意してください。各ビット設定の説明は「テキスト抽出フラグ」で確認できます。" + +#: ../../app1.rst:271 e8bfb1e18c8a40ef9482158b1b48b799 +msgid "" +"New in v1.19.6: The default combinations in the following table are now " +"available as Python constants: :data:`TEXTFLAGS_TEXT`, " +":data:`TEXTFLAGS_WORDS`, :data:`TEXTFLAGS_BLOCKS`, " +":data:`TEXTFLAGS_DICT`, :data:`TEXTFLAGS_RAWDICT`, " +":data:`TEXTFLAGS_HTML`, :data:`TEXTFLAGS_XHTML`, :data:`TEXTFLAGS_XML`, " +"and :data:`TEXTFLAGS_SEARCH`. You can now easily modify a default flag, " +"e.g." +msgstr "" +"バージョン1.19.6で新しく追加された変更:次の表のデフォルトの組み合わせは、Pythonの定数として利用可能です: " +":data:`TEXTFLAGS_TEXT` 、 :data:`TEXTFLAGS_WORDS` 、 " +":data:`TEXTFLAGS_BLOCKS` 、 :data:`TEXTFLAGS_DICT` 、 " +":data:`TEXTFLAGS_RAWDICT` 、 :data:`TEXTFLAGS_HTML` 、 " +":data:`TEXTFLAGS_XHTML` 、 :data:`TEXTFLAGS_XML` 、 " +":data:`TEXTFLAGS_SEARCH` 。これにより、デフォルトのフラグを簡単に変更できます。例えば、" + +#: ../../app1.rst:273 6b7d5e078dee4d8faefcc18f13641d84 +msgid "**include** images in a \"blocks\" output:" +msgstr "「blocks」出力に画像を **含める** 場合:" + +#: ../../app1.rst:275 920d23e221534056b885ac4d2aa40959 +msgid "`flags = TEXTFLAGS_BLOCKS | TEXT_PRESERVE_IMAGES`" +msgstr "" + +#: ../../app1.rst:277 442e0b5271e84503b89c3efda021a625 +msgid "**exclude** images from a \"dict\" output:" +msgstr "「dict」出力から画像を **除外する** 場合:" + +#: ../../app1.rst:279 8cc83a12e452488bbb0828a5b22c007b +msgid "`flags = TEXTFLAGS_DICT & ~TEXT_PRESERVE_IMAGES`" +msgstr "" + +#: ../../app1.rst:281 883ad6fe76334652937ecd82fe8393a8 +msgid "set **dehyphenation off** in text searches:" +msgstr "テキスト検索での **ハイフネーション** をオフに設定する:" + +#: ../../app1.rst:283 46170567ecc240c88108dc6a14ac7982 +msgid "`flags = TEXTFLAGS_SEARCH & ~TEXT_DEHYPHENATE`" +msgstr "" + +#: ../../app1.rst:287 82de7610a8c84cdaa15aafa49960bf9a +msgid "Indicator" +msgstr "指標" + +#: ../../app1.rst:287 3892913f9254485a94780c276805766c +msgid "text" +msgstr "" + +#: ../../app1.rst:287 37c756db16324c458f86f66f086d1941 +msgid "html" +msgstr "" + +#: ../../app1.rst:287 5063d3e2784149f188ffae7dcee78a89 +msgid "xhtml" +msgstr "" + +#: ../../app1.rst:287 0f75631f443a4653a3622a073d27d5a5 +msgid "xml" +msgstr "" + +#: ../../app1.rst:287 5a83675663804474931ef4cde1e02911 +msgid "dict" +msgstr "" + +#: ../../app1.rst:287 78bc13d9ae104947a657ed2b864c5af2 +msgid "rawdict" +msgstr "" + +#: ../../app1.rst:287 c2b7a5a2bcd040c48f4832612cda9e17 +msgid "words" +msgstr "" + +#: ../../app1.rst:287 e0b980adf7324d1cbf814983a363fe72 +msgid "blocks" +msgstr "" + +#: ../../app1.rst:287 dd0b10b8c86e4f088602b83a1b03aa0d +msgid "search" +msgstr "" + +#: ../../app1.rst:289 9782eb815e0b457292e05170f64c6ada +msgid "preserve ligatures" +msgstr "連結を保持" + +#: ../../app1.rst:289 ../../app1.rst:290 ../../app1.rst:291 ../../app1.rst:293 +#: ../../app1.rst:294 ../../app1.rst:295 1e983a6cb6d44aceb3853c9a83ba5aaf +#: 20b8ab4098ee4007bac8fd0a40bfdbb3 233a030d06984ee593bb98e31c636824 +#: 2b27ec6f790744eb9312d79895d0f846 3b76c164a3bd43af8ddd602b9fb58732 +#: 3b9f9e75e6cc47f79c07151895da4ffd 47460e3c55b04dc4b8ace07d8270df1c +#: 4bdb19637b214e6491ff869852694c6a 567e1e72445749069d46f1f2eff63e0f +#: 59a08f97de8d4245a3e54d4fb26b0291 5c07f51342ce4bba9fc7be3cda6af8df +#: 5d843f7031c14f799bd56886af16313f 6f96176fe534473abd3d0a3b10886daf +#: 775cb0f5f029464d8b6b265a6bd9fbcb 7f8c04030eb2428a8b6e3e4aa1434875 +#: 808f822d5cb846f8bf7e619119fb9318 92acb46014f34d0e94841db5fa7aee7f +#: 93a5eb0806364fc79a03a21a7e1aae61 98b5b944f37445a483864fd0c3c1acc4 +#: 99ab73e65de34fa382f9740422544e20 a2e3210d3b93435d8ae28371d790b824 +#: a3d6d493a3b043b98c7cb1f8282d9733 a8ca4f38d35b49a399948cd811aa950a +#: b718c1a0f6744595bea22305cb934b23 bbbec0ca7da34f32aab5fff3961bc220 +#: bf51c38c6b3b4c3aa9e4a844f049a028 c89bbe13e12949ddb0baa34db3c1e2dc +#: cee800543a0646cca1e99c244818c547 d21544b4657c404689ea4a1442e7b770 +#: d540a50f31844489b476bc3b2eccf0bf df435d6f1108468683ceacd21cfd87c9 +#: e0d49fa814f943faac85b4230fc44880 ed76c68df9d6403bb3828c2fa2b236f3 +#: ef74eb0f02c24d33becc781ce950b651 f20c32d4888f4d029ee7074ede80e23a +#: f5ce82fd58e648bd8434116561a19697 f6559ff8a2154209bc3e256c2035e51a +#: fbd7fda4af014d3882244ca311699849 fc0e3ef690de498a853cfa1bbf8aa702 +msgid "1" +msgstr "" + +#: ../../app1.rst:289 ../../app1.rst:291 ../../app1.rst:292 ../../app1.rst:293 +#: ../../app1.rst:295 06a775ff9e024c69b2b20b72b87e2f39 +#: 0dcfeff6364f486b85e82098cd35dad9 251e9b0cc0c1420fae3dd91f99f24d7f +#: 3698b2725d17402fb0a6d21a9f8528a6 46a337a4a68e49d7a800dd9701077648 +#: 4ba8146f24be48239504f8a299500f64 4dda4b0faa7a41b4bd93757bfa5e48fc +#: 52d30ea082894e04b0ccd40e6bafc7a5 5994e677f5c94c30828758452ab35e2c +#: 6063573b774c440dabdd54208c398123 62ed92eea3bd495b99addd70a3fd7823 +#: 6d98e245478f48c590bdc7e8f8585eda 709d6cddbcef491aa2d2d653bb6ecb16 +#: 86e73f4e80894ea5b04fa62a9ca34165 9f9ddfd3cdab40c58298f9300c4e6ad0 +#: a8f2711acc19466397e196137cd3d96a bc6bd4d438e8412289b2d55436bd3341 +#: dbb1a8bf2fa344d9916b1ec4ebf075db ef81628962544a8ebf82a15aac960331 +#: f18b0d81b3da4760894d1814700e43c4 fe2557a2e37b4bf29c6cd13888e1ba55 +msgid "0" +msgstr "" + +#: ../../app1.rst:290 1beaf7102f1945d8bbb04d397b9227f1 +msgid "preserve whitespace" +msgstr "空白を保持" + +#: ../../app1.rst:291 4691c88b06074b38896de74a86973a95 +msgid "preserve images" +msgstr "画像を保持" + +#: ../../app1.rst:291 b2eacea067054a2c89b01026f348d200 +#: cde51036a4774390b6b3b86bad72cebf d34b28c25d6a4b8eb72ae032050e8ef2 +msgid "n/a" +msgstr "" + +#: ../../app1.rst:292 6280ef433e7648fd83936d5d9fabf2e3 +msgid "inhibit spaces" +msgstr "スペースの抑制" + +#: ../../app1.rst:293 ffbb4fff9bc14f4b89c7138073298de3 +msgid "dehyphenate" +msgstr "ハイフネーション解除" + +#: ../../app1.rst:294 1c61f64ea3c046aa98ef330907277d28 +msgid "clip to mediabox" +msgstr "メディアボックスにクリップ" + +#: ../../app1.rst:295 46e9e87165ad4603ad27fca406580f85 +msgid "use CID instead of U+FFFD" +msgstr "" + +#: ../../app1.rst:298 d03c9f5449f345a3b73490298a0485f9 +msgid "**search** refers to the text search function." +msgstr "**検索** はテキスト検索機能を指します。" + +#: ../../app1.rst:299 e69a3f6563a349f7ac389cad653d8f9f +msgid "**\"json\"** is handled exactly like **\"dict\"** and is hence left out." +msgstr "**「json」** は **「dict」** とまったく同様に処理されるため、省略されています。" + +#: ../../app1.rst:300 00f7c04e64ee45e09727561d7a20c052 +msgid "" +"**\"rawjson\"** is handled exactly like **\"rawdict\"** and is hence left" +" out." +msgstr "**「rawjson」** は **「rawdict」** とまったく同様に処理されるため、省略されています。" + +#: ../../app1.rst:301 6b8b4b0d4438489eb25ff507f4d3b024 +msgid "" +"An \"n/a\" specification means a value of 0 and setting this bit never " +"has any effect on the output (but an adverse effect on performance)." +msgstr "「n/a」の指定は値が0であり、このビットを設定しても出力に影響を与えることはありません(ただしパフォーマンスに悪影響を及ぼす可能性があります)。" + +#: ../../app1.rst:302 9e71d45dfa8a432a85b4c21b50596e46 +msgid "" +"If you are not interested in images when using an output variant which " +"includes them by default, then by all means set the respective bit off: " +"You will experience a better performance and much lower space " +"requirements." +msgstr "画像を含む出力バリアントを使用する際に画像に興味がない場合、必ず該当するビットをオフに設定してください。これにより、パフォーマンスが向上し、スペース要件が大幅に削減されます。" + +#: ../../app1.rst:304 4babe75226f64adeb13494c473d77bda +msgid "To show the effect of `TEXT_INHIBIT_SPACES` have a look at this example::" +msgstr "`TEXT_INHIBIT_SPACES` の効果を示すために、この例をご覧ください:" + +#: ../../app1.rst:324 791593433363447884bf96a0f495a7ed +msgid "Performance" +msgstr "パフォーマンス" + +#: ../../app1.rst:325 0da8da4ddf0640e08849ffc214bbc392 +msgid "" +"The text extraction methods differ significantly both: in terms of " +"information they supply, and in terms of resource requirements and " +"runtimes. Generally, more information of course means, that more " +"processing is required and a higher data volume is generated." +msgstr "テキスト抽出メソッドは、情報の提供方法とリソース要件、実行時間の両方で大きく異なります。一般的に、情報が多いほど処理が必要であり、より多くのデータが生成されることを意味します。" + +#: ../../app1.rst:327 20586902aec24daab2b5f3efb0591035 +#, python-format +msgid "" +"Especially images have a **very significant** impact. Make sure to " +"exclude them (via the *flags* parameter) whenever you do not need them. " +"To process the below mentioned 2'700 total pages with default flags " +"settings required 160 seconds across all extraction methods. When all " +"images where excluded, less than 50% of that time (77 seconds) were " +"needed." +msgstr "" +"特に画像は **非常に大きな** " +"影響を持ちます。必要のない場合は、必ず画像を除外する(フラグパラメータを使用)ようにしてください。以下で言及されている2,700ページの総ページ数をデフォルトのフラグ設定で処理するには、全ての抽出メソッドで160秒が必要でした。画像をすべて除外した場合、その時間の50%未満(77秒)が必要でした。" + +#: ../../app1.rst:329 2e8a95a956e644ac8d4696dce6c61cc2 +msgid "" +"To begin with, all methods are **very fast** in relation to other " +"products out there in the market. In terms of processing speed, we are " +"not aware of a faster (free) tool. Even the most detailed method, " +"RAWDICT, processes all 1'310 pages of the :ref:`AdobeManual` in less than" +" 5 seconds (simple text needs less than 2 seconds here)." +msgstr "" +"まず始めに、すべてのメソッドは市場にある他の製品と比べて **非常に高速** " +"です。処理速度の観点から、より速い(無料の)ツールは私たちの知る限り存在しません。最も詳細なメソッドであるRAWDICTでも、 " +":ref:`AdobeManual` リファレンスの1,310ページを5秒未満で処理できます(ここでは簡単なテキストは2秒未満で処理されます)。" + +#: ../../app1.rst:331 b478d8651f884657b5f2968d78bde08e +msgid "" +"The following table shows average relative speeds (\"RSpeed\", baseline " +"1.00 is TEXT), taken across ca. 1400 text-heavy and 1300 image-heavy " +"pages." +msgstr "以下の表は、約1400ページのテキストが多く、約1300ページが画像が多いページでの平均相対速度(ベースライン1.00はTEXT)を示しています。" + +#: ../../app1.rst:334 e4cd1e92a16243b9bdc20d79c041f5ab +msgid "Method" +msgstr "メソッド" + +#: ../../app1.rst:334 e7cdf152a1914df4b50d9e29af7c6414 +msgid "RSpeed" +msgstr "平均相対" + +#: ../../app1.rst:334 d842d0f8ae3a48d0a969867502970a0c +msgid "Comments" +msgstr "コメント" + +#: ../../app1.rst:334 48d44a0956cd43a1b911aeb4417696ea +msgid "no images" +msgstr "画像なし" + +#: ../../app1.rst:336 52d85a2f5b704adc8c686c36a9f92fe2 +msgid "TEXT" +msgstr "" + +#: ../../app1.rst:336 ../../app1.rst:337 ../../app1.rst:340 +#: 3bcb90f59d8f4da58255a7a86093aa07 59f27de687b642ddb52e91f17533cc5b +#: a0b2dcf2b2ef4bd7ad4cff46bb823559 db1033e32d794e808303b3da34f587ac +#: f1cf54ba71c34f339ccf48d538136125 +msgid "1.00" +msgstr "" + +#: ../../app1.rst:336 6dabc232af8f4901aea1c358a0ae9564 +msgid "no images, **plain** text, line breaks" +msgstr "画像なし、 **プレーン** テキスト、改行" + +#: ../../app1.rst:337 23cec37141ab47548b08ce6a51c47532 +msgid "image bboxes (only), **block** level text with bboxes, line breaks" +msgstr "画像のバウンディングボックス(のみ)、 **ブロック** レベルのテキストとバウンディングボックス、改行" + +#: ../../app1.rst:338 04eaefca5e1f41eda6d2831fd36c1a5d +#: 4f0d944bd46143d298a7a366f9b35d36 +msgid "1.02" +msgstr "" + +#: ../../app1.rst:338 f4c82b98aa294a4ba829c5ba6dd0b736 +msgid "no images, **word** level text with bboxes" +msgstr "画像なし、 **ワード** レベルのテキストとバウンディングボックス" + +#: ../../app1.rst:339 6b2dcc5f62e242a3bc9579407a6568a1 +#: f95901f3f681472a947f2f30ee664a42 +msgid "2.72" +msgstr "" + +#: ../../app1.rst:339 8a17afd3b00a478db3c1a02c3174167f +msgid "no images, **char** level text, layout and font details" +msgstr "画像なし、**文字** レベルのテキスト、レイアウトとフォントの詳細" + +#: ../../app1.rst:340 0f0de8a5134b47348c6bb22b8c2a813a +msgid "3.32" +msgstr "" + +#: ../../app1.rst:340 fa9159767f484518983dcfa06467eead +msgid "**base64** images, **span** level text, no layout info" +msgstr "**base64** 画像、 **スパン** レベルのテキスト、レイアウト情報なし" + +#: ../../app1.rst:341 efb4a91cc528491c86864634ab1c8962 +msgid "3.54" +msgstr "" + +#: ../../app1.rst:341 612c89df3d5046f399cd591cbecfa4d1 +msgid "**base64** images, **span** level text, layout and font details" +msgstr "**base64画像** 、 **スパン** レベルのテキスト、レイアウトとフォントの詳細" + +#: ../../app1.rst:341 82007c0df4b04de7b342be7f608efe6d +msgid "1.01" +msgstr "" + +#: ../../app1.rst:342 22c93672d1754b45979767b416822fc9 +msgid "DICT" +msgstr "" + +#: ../../app1.rst:342 73dbc0957638492db3cb33c44afc4ff0 +msgid "3.93" +msgstr "" + +#: ../../app1.rst:342 081b9112703a4d6f8b31780c1f8d4708 +msgid "**binary** images, **span** level text, layout and font details" +msgstr "**バイナリ** 画像、 **スパン** レベルのテキスト、レイアウトとフォントの詳細" + +#: ../../app1.rst:342 c0b41f51a7214a12a3efec0ecbba1e5f +msgid "1.04" +msgstr "" + +#: ../../app1.rst:343 5de542c1e80e424294d32a859e17ca43 +msgid "RAWDICT" +msgstr "" + +#: ../../app1.rst:343 8bec4991b902406e931341dbb186a6d0 +msgid "4.50" +msgstr "" + +#: ../../app1.rst:343 c4f7dd03bce440c6a1820660104e7a31 +msgid "**binary** images, **char** level text, layout and font details" +msgstr "**バイナリ** 画像、**文字** レベルのテキスト、レイアウトとフォントの詳細" + +#: ../../app1.rst:343 71321cf4b722412abdf5f6eaa793ec1d +msgid "1.68" +msgstr "" + +#: ../../app1.rst:346 e12d6cb6be1b4701a2b213d3f27afa27 +#, python-format +msgid "" +"As mentioned: when excluding image extraction (last column), the relative" +" speeds are changing drastically: except RAWDICT and XML, the other " +"methods are almost equally fast, and RAWDICT requires 40% less execution " +"time than the **now slowest XML**." +msgstr "" +"前述のように、画像の抽出を除外する場合(最後の列)、相対速度は大きく変わります。RAWDICTとXMLを除いて、他のメソッドはほぼ同じ速さであり、RAWDICTは" +" **今では遅いXML** よりも40%少ない実行時間を必要とします。" + +#: ../../app1.rst:348 6750dde161a54ad6adb3ada9203f0253 +msgid "Look at chapter **Appendix 1** for more performance information." +msgstr "もっとパフォーマンス情報については、 **付録1章** をご覧ください。" + +#: ../../footer.rst:60 05804a3786b14f7fa5098ddfaf65f873 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/app2.mo b/docs/locales/ja/LC_MESSAGES/app2.mo new file mode 100644 index 000000000..64600705d Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/app2.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/app2.po b/docs/locales/ja/LC_MESSAGES/app2.po new file mode 100644 index 000000000..b766aaa96 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/app2.po @@ -0,0 +1,156 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 ce178b1ad1a74ee4b4c40ab17fe257ae +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 419c2281e2e246658e42483c0a1fc0d6 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 ebb04a69cb76400eb6074b4c9c39ec1f +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../app2.rst:7 64d6bda691354b77987e85bd9a04296b +msgid "Appendix 2: Considerations on Embedded Files" +msgstr "付録2:埋め込みファイルに関する考慮事項" + +#: ../../app2.rst:8 d15d38307aa649f983ce5b15f6c6a3fd +msgid "" +"This chapter provides some background on embedded files support in " +"PyMuPDF." +msgstr "この章では、PyMuPDFにおける埋め込みファイルのサポートに関する背景情報を提供します。" + +#: ../../app2.rst:11 20abd6d046f14fb19306663331d645c0 +msgid "General" +msgstr "一般" + +#: ../../app2.rst:12 7dd965f6d81847bb95992e2db8036f26 +msgid "" +"Starting with version 1.4, PDF supports embedding arbitrary files as part" +" (\"Embedded File Streams\") of a PDF document file (see chapter \"7.11.4" +" Embedded File Streams\", pp. 103 of the :ref:`AdobeManual`)." +msgstr "" +"バージョン1.4から、PDFはPDFドキュメントファイルの一部として任意のファイルを埋め込むことができるようになりました(「7.11.4 " +"埋め込みファイルストリーム」章を参照、 :ref:`AdobeManual` リファレンスの103ページ)。" + +#: ../../app2.rst:15 9673011500be45a5b2780b4f29fcb427 +msgid "" +"In many aspects, this is comparable to concepts also found in ZIP files " +"or the OLE technique in MS Windows. PDF embedded files do, however, *not*" +" support directory structures as does the ZIP format. An embedded file " +"can in turn contain embedded files itself." +msgstr "" +"多くの側面で、これはZIPファイルやMS " +"WindowsのOLE技術でも見られる概念に類似しています。ただし、PDFの埋め込みファイルはZIP形式とは異なり、ディレクトリ構造をサポート " +"*しません* 。埋め込みファイル自体もさらに埋め込みファイルを含むことができます。" + +#: ../../app2.rst:17 21d8ec87be9d4021aec6fa647434f686 +msgid "" +"Advantages of this concept are that embedded files are under the PDF " +"umbrella, benefitting from its permissions / password protection and " +"integrity aspects: all data, which a PDF may reference or even may be " +"dependent on, can be bundled into it and so form a single, consistent " +"unit of information." +msgstr "このコンセプトの利点は、埋め込みファイルがPDFの枠組みに含まれ、その権限/パスワード保護および整合性の側面を活用できることです。PDFが参照するデータや依存する可能性があるデータはすべて、PDFにまとめて1つの一貫した情報ユニットを形成することができます。" + +#: ../../app2.rst:19 27f0645490d24afd94260b80f51190e2 +msgid "" +"In addition to embedded files, PDF 1.7 adds *collections* to its support " +"range. This is an advanced way of storing and presenting meta information" +" (i.e. arbitrary and extensible properties) of embedded files." +msgstr "" +"埋め込みファイルに加えて、PDF 1.7は *コレクション* " +"をサポート範囲に追加しました。これは、埋め込みファイルのメタ情報(任意で拡張可能なプロパティ)を格納し、表示する高度な方法です" + +#: ../../app2.rst:22 6461638e9a0c4f6d8dd9404f02cae88c +msgid "MuPDF Support" +msgstr "MuPDFのサポート" + +#: ../../app2.rst:23 7a4bd68162ca408082e0fe0981edfb0e +msgid "" +"After adding initial support for collections (portfolios) and " +"*/EmbeddedFiles* in MuPDF version 1.11, this support was dropped again in" +" version 1.15." +msgstr "" +"MuPDFバージョン1.11でコレクション(ポートフォリオ)と */EmbeddedFiles* " +"への初期サポートを追加した後、このサポートはバージョン1.15で再び削除されました。" + +#: ../../app2.rst:25 5d971b234a074542a7fe67243445baac +msgid "" +"As a consequence, the cli utility *mutool* no longer offers access to " +"embedded files." +msgstr "その結果、cliユーティリティ *mutool* ではもはや埋め込みファイルにアクセスできなくなりました。" + +#: ../../app2.rst:27 121edfcf69c34685bbfd37adf5eed763 +msgid "" +"PyMuPDF -- having implemented an */EmbeddedFiles* API in response in its " +"version 1.11.0 -- was therefore forced to change gears starting with its " +"version 1.16.0 (we never published a MuPDF v1.15.x compatible PyMuPDF)." +msgstr "" +"PyMuPDFは、バージョン1.11.0で */EmbeddedFiles* " +"APIを実装したため、バージョン1.16.0からギアを変更せざるを得なくなりました(MuPDF " +"v1.15.x互換のPyMuPDFは公開されませんでした)。" + +#: ../../app2.rst:29 179951424a4d4e85899ade12ee77d71b +msgid "" +"We are now maintaining our own code basis supporting embedded files. This" +" code makes use of basic MuPDF dictionary and array functions only." +msgstr "私たちは現在、埋め込みファイルをサポートする独自のコードベースを維持しています。このコードは、基本的なMuPDFの辞書と配列の機能のみを使用しています。" + +#: ../../app2.rst:32 9f89c97ca1ff44a59f6b127c6a772547 +msgid "PyMuPDF Support" +msgstr "PyMuPDFのサポート" + +#: ../../app2.rst:33 cdda1b955d134a22825d992b8d121028 +msgid "" +"We continue to support the full old API with respect to embedded files --" +" with only minor, cosmetic changes." +msgstr "私たちは、埋め込みファイルに関する古いAPIを、わずかな見た目の変更のみを行いながら引き続きサポートしています。" + +#: ../../app2.rst:35 77890c822c424653bfeba8e464bc73a5 +msgid "" +"There even also is a new function, which delivers a list of all names " +"under which embedded data are registered in a PDF, " +":meth:`Document.embfile_names`." +msgstr "" +"また、PDF内の埋め込みデータが登録されているすべての名前のリストを返す新しい関数も存在します。:meth:`Document.embfile_names`" +" です。" + +#: ../../footer.rst:60 0a64be666c834c3aaf01bfaa636d73ad +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/app3.mo b/docs/locales/ja/LC_MESSAGES/app3.mo new file mode 100644 index 000000000..912c04854 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/app3.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/app3.po b/docs/locales/ja/LC_MESSAGES/app3.po new file mode 100644 index 000000000..dd74c6948 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/app3.po @@ -0,0 +1,895 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 60f6be1d195a47dfa71e36c01046a6f2 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 a4c0fceff01a49b8af3eeac93d6dad96 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 98d12c2cb9034b46a9688c6079f6ba7d +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../app3.rst:7 cca161444d5f424ea46010c939dda2bf +msgid "Appendix 3: Assorted Technical Information" +msgstr "付録3:さまざまな技術情報" + +#: ../../app3.rst:8 13009e428fa54f0f8b0fd4f28fd0135d +msgid "" +"This section deals with various technical topics, that are not " +"necessarily related to each other." +msgstr "このセクションでは、必ずしも関連しないさまざまな技術的なトピックについて取り扱います。" + +#: ../../app3.rst:15 13e1d68350c14d2296f3b76f13faaaab +msgid "Image Transformation Matrix" +msgstr "画像変換行列" + +#: ../../app3.rst:16 848f582117934433a927f0dbb689c00c +msgid "" +"Starting with version 1.18.11, the image transformation matrix is " +"returned by some methods for text and image extraction: " +":meth:`Page.get_text` and :meth:`Page.get_image_bbox`." +msgstr "" +"バージョン1.18.11から、テキストと画像の抽出に関する一部のメソッドで画像変換行列が返されます::meth:`Page.get_text` " +"および :meth:`Page.get_image_bbox` 。" + +#: ../../app3.rst:18 0654c7ad36c640a3b0cab3d7cd9c247d +msgid "" +"The transformation matrix contains information about how an image was " +"transformed to fit into the rectangle (its \"boundary box\" = \"bbox\") " +"on some document page. By inspecting the image's bbox on the page and " +"this matrix, one can determine for example, whether and how the image is " +"displayed scaled or rotated on a page." +msgstr "変換行列には、画像が文書ページ上の矩形(「境界ボックス」=「bbox」)に適合するためにどのように変換されたかに関する情報が含まれています。ページ上の画像のbboxとこの行列を検査することにより、例えば画像がページ上で拡大縮小または回転して表示されるかどうか、そしてどのように表示されるかを判断することができます。" + +#: ../../app3.rst:20 20f8cdadb1584a6ab210701b4ad8f4c5 +msgid "" +"The relationship between image dimension and its bbox on a page is the " +"following:" +msgstr "画像の寸法とページ上のbboxとの関係は次のとおりです:" + +#: ../../app3.rst:24 64e0b4b249684b7881f7532d0a28a921 +msgid "Using the original image's width and height," +msgstr "元の画像の幅と高さを使用して、" + +#: ../../app3.rst:23 9ec9e8f5100a4c169aa6fb31af4f7ee7 +msgid "define the image rectangle `imgrect = pymupdf.Rect(0, 0, width, height)`" +msgstr "画像の矩形を `imgrect = pymupdf.Rect(0, 0, width, height)` と定義します。" + +#: ../../app3.rst:24 9849a7a8eaf843a48b675921279bce2d +msgid "" +"define the \"shrink matrix\" `shrink = pymupdf.Matrix(1/width, 0, 0, " +"1/height, 0, 0)`." +msgstr "「縮小行列」を `shrink = pymupdf.Matrix(1/width, 0, 0,1/height, 0, 0)` と定義します。" + +#: ../../app3.rst:26 69793febeda4444a95c8d6b2e8b73251 +msgid "" +"Transforming the image rectangle with its shrink matrix, will result in " +"the unit rectangle: `imgrect * shrink = pymupdf.Rect(0, 0, 1, 1)`." +msgstr "" +"画像矩形を縮小行列で変換すると、単位矩形が得られます: `imgrect * shrink = pymupdf.Rect(0, 0, 1, 1)`" +" 。" + +#: ../../app3.rst:28 e1934c547348415db72046991c64653a +msgid "" +"Using the image **transformation matrix** \"transform\", the following " +"steps will compute the bbox::" +msgstr "画像 **変換行列** 「transform」を使用して、次の手順でbboxを計算します:" + +#: ../../app3.rst:34 0e8eccf66ebc4a85873f62a2bd80c6d1 +msgid "" +"Inspecting the matrix product `shrink * transform` will reveal all " +"information about what happened to the image rectangle to make it fit " +"into the bbox on the page: rotation, scaling of its sides and translation" +" of its origin. Let us look at an example:" +msgstr "" +"行列の積 `shrink * transform` " +"を検査することで、画像矩形がページ上のbboxに適合させるために何が起こったかに関するすべての情報が明らかになります。回転、辺のスケーリング、および原点の移動です。例を見てみましょう:" + +#: ../../app3.rst:71 b14abce2fb5d425297600773378533df +msgid "PDF Base 14 Fonts" +msgstr "PDFベース14フォント" + +#: ../../app3.rst:72 0ae7c8fe1e22485bbc633ff8d3a9bc46 +msgid "" +"The following 14 builtin font names **must be supported by every PDF " +"viewer** application. They are available as a dictionary, which maps " +"their full names amd their abbreviations in lower case to the full font " +"basename. Wherever a **fontname** must be provided in PyMuPDF, any **key " +"or value** from the dictionary may be used::" +msgstr "" +"以下の14の組み込みフォント名は、**すべてのPDFビューアアプリケーションでサポートされる必要があります** " +"。これらは辞書として利用可能で、それぞれのフルネームとその略称を小文字で完全な **フォントベース名** " +"にマッピングします。PyMuPDFでフォント名を提供する必要がある場合、辞書からの任意の **キーまたは値** を使用できます:" + +#: ../../app3.rst:105 3e6f6703b8da46708e3fc8c2384de523 +msgid "" +"In contrast to their obligation, not all PDF viewers support these fonts " +"correctly and completely -- this is especially true for Symbol and " +"ZapfDingbats. Also, the glyph (visual) images will be specific to every " +"reader." +msgstr "義務とは対照的に、すべてのPDFビューアがこれらのフォントを正確かつ完全にサポートしているわけではありません。特にSymbolとZapfDingbatsについては、これが特に当てはまります。また、グリフ(視覚的な)イメージは、それぞれの閲覧者に固有のものになります。" + +#: ../../app3.rst:107 a14f874dae6142fca8307f5dc4d5ea59 +msgid "" +"To see how these fonts can be used -- including the **CJK built-in** " +"fonts -- look at the table in :meth:`Page.insert_font`." +msgstr "" +"これらのフォントがどのように使用されるか( **CJK組み込みフォント** も含めて)、:meth:`Page.insert_font` " +"のテーブルをご覧ください。" + +#: ../../app3.rst:114 e74c51f5d9244a07ba5ff41d7d1f7eae +msgid "Adobe PDF References" +msgstr "Adobe PDFリファレンス" + +#: ../../app3.rst:116 f437d3bd70b8474d9c453efcf4fde51e +msgid "" +"This PDF Reference manual published by Adobe is frequently quoted " +"throughout this documentation. It can be viewed and downloaded from `opensource.adobe.com" +" `_." +msgstr "" +"Adobeによって公開されたこのPDFリファレンスマニュアルは、このドキュメンテーション全体で頻繁に引用されています。 `こちら " +"`_ から閲覧およびダウンロードが可能です。" + +#: ../../app3.rst:118 62c01b0557a14537b1af278fbf1e9d7d +msgid "" +"For a long time, an older version was also available under `this " +"`_" +" link. It seems to be taken off of the web site in October 2021. Earlier " +"(pre 1.19.*) versions of the PyMuPDF documentation used to refer to this " +"document. We have undertaken an effort to replace referrals to the " +"current specification above." +msgstr "" +"長い間、古いバージョンも `この " +"`_" +" " +"リンクで利用可能でしたが、2021年10月にウェブサイトから取り下げられたようです。以前(1.19.*より前)のPyMuPDFドキュメンテーションはこの文書を参照していました。私たちは、上記の現行仕様への参照を置き換える取り組みを行っています。" + +#: ../../app3.rst:125 ff92f05294a745e2935ab9010d0cece0 +msgid "Using Python Sequences as Arguments in PyMuPDF" +msgstr "PythonシーケンスをPyMuPDFで引数として使用する場合" + +#: ../../app3.rst:126 457903724c494cda86653c06537c9dc2 +msgid "" +"When PyMuPDF objects and methods require a Python **list** of numerical " +"values, other Python **sequence types** are also allowed. Python classes " +"are said to implement the **sequence protocol**, if they have a " +"`__getitem__()` method." +msgstr "" +"PyMuPDFのオブジェクトとメソッドが数値の値のPython **リスト** を必要とする場合、他のPython **シーケンス型** " +"も許可されています。Pythonのクラスは、`__getitem__()` メソッドを持つ場合、**シーケンスプロトコル** " +"を実装していると言われています。" + +#: ../../app3.rst:128 509b0d726a2a421b8b96fc497e097ef9 +msgid "" +"This basically means, you can interchangeably use Python *list* or " +"*tuple* or even *array.array*, *numpy.array* and *bytearray* types in " +"these cases." +msgstr "" +"基本的には、これらの場合にPythonの *リスト* や *タプル* 、*array.array* 、*numpy.array* " +"、*bytearray* 型を互換性を持って使用できることを意味しています。" + +#: ../../app3.rst:130 d7a8445cad7a4ff4915c3b219e40a525 +msgid "For example, specifying a sequence `\"s\"` in any of the following ways" +msgstr "例えば、次のいずれかの方法でシーケンス `\"s\"` を指定すると" + +#: ../../app3.rst:132 be280649629141ceaa48ab75c52aac03 +msgid "`s = [1, 2]` -- a list" +msgstr "`s = [1, 2]` – リスト" + +#: ../../app3.rst:133 37f22b1ed77f40f3a6f9d8062d8e3529 +msgid "`s = (1, 2)` -- a tuple" +msgstr "`s = (1, 2)` – タプル" + +#: ../../app3.rst:134 c0ff25efcfc84116b5686e2a4d499d2a +msgid "`s = array.array(\"i\", (1, 2))` -- an array.array" +msgstr "`s = array.array(\"i\", (1, 2))` – array.array" + +#: ../../app3.rst:135 dcc0cf4fb2d44b299145d5420fc5fe4d +msgid "`s = numpy.array((1, 2))` -- a numpy array" +msgstr "`s = numpy.array((1, 2))` – numpy配列" + +#: ../../app3.rst:136 07b6af8c8b6c4d01b9053e873478532d +msgid "`s = bytearray((1, 2))` -- a bytearray" +msgstr "`s = bytearray((1, 2))` – bytearray" + +#: ../../app3.rst:138 734fab1b094844babc00897aa6e59617 +msgid "will make it usable in the following example expressions:" +msgstr "これによって、以下の例の式で使用可能になります:" + +#: ../../app3.rst:140 d44ca9b7519149f6a404444a778d68ff +msgid "`pymupdf.Point(s)`" +msgstr "" + +#: ../../app3.rst:141 ee6a0b7d41b8429fbae8f23229104cf2 +msgid "`pymupdf.Point(x, y) + s`" +msgstr "" + +#: ../../app3.rst:142 54ccac4c6a6f44ffb1d176c095907782 +msgid "`doc.select(s)`" +msgstr "" + +#: ../../app3.rst:144 dae021bd0ba3466daa809ae6c1c994c2 +msgid "" +"Similarly with all geometry objects :ref:`Rect`, :ref:`IRect`, " +":ref:`Matrix` and :ref:`Point`." +msgstr "" +"同様に、すべてのジオメトリオブジェクト :ref:`Rect` 、:ref:`IRect` 、:ref:`Matrix` 、 " +":ref:`Point` も同様です。" + +#: ../../app3.rst:146 5c26e587a0964c99927a3ca8dc6f2704 +msgid "" +"Because all PyMuPDF geometry classes themselves are special cases of " +"sequences, they (with the exception of :ref:`Quad` -- see below) can be " +"freely used where numerical sequences can be used, e.g. as arguments for " +"functions like *list()*, *tuple()*, *array.array()* or *numpy.array()*. " +"Look at the following snippet to see this work." +msgstr "" +"なぜなら、すべてのPyMuPDFジオメトリクラス自体がシーケンスの特殊なケースであるため、( :ref:`Quad` " +"を除く)数値のシーケンスが使用可能な場所で自由に使用できるからです。例えば、 *list()* 、 *tuple()* 、 " +"*array.array()* " + +#: ../../app3.rst:163 1b2a7bd2869c448db2997c66e5c8ede9 +msgid ":ref:`Quad` is a Python sequence object as well and has a length of 4. Its items however are :data:`point_like` -- not numbers. Therefore, the above remarks do not apply." +msgstr "" +":ref:`Quad` もPythonのシーケンスオブジェクトであり、長さが4です。ただし、そのアイテムは数値ではなく " +":data:`point_like` です。したがって、上記の注釈は適用されません。" + +#: ../../app3.rst:170 94e21a5bc8ee4477a04441d5c30d724d +msgid "Ensuring Consistency of Important Objects in PyMuPDF" +msgstr "重要なオブジェクトの整合性を確保するためのPyMuPDF" + +#: ../../app3.rst:171 88e6f720f7994790a9e0537ae0a79c24 +msgid "" +"PyMuPDF is a Python binding for the C library MuPDF. While a lot of " +"effort has been invested by MuPDF's creators to approximate some sort of " +"an object-oriented behavior, they certainly could not overcome basic " +"shortcomings of the C language in that respect." +msgstr "PyMuPDFは、CライブラリMuPDFのPythonバインディングです。MuPDFの開発者たちは、ある種のオブジェクト指向の振る舞いを模倣するために多くの努力をしてきましたが、C言語の基本的な制約を克服することはできませんでした。" + +#: ../../app3.rst:173 cba001279ead48629806d8dd14a397a7 +msgid "" +"Python on the other hand implements the OO-model in a very clean way. The" +" interface code between PyMuPDF and MuPDF consists of two basic files: " +"*pymupdf.py* and *fitz_wrap.c*. They are created by the excellent SWIG " +"tool for each new version." +msgstr "" +"一方で、Pythonは非常にクリーンな方法でOOモデルを実装しています。PyMuPDFとMuPDFの間のインターフェースコードは、基本的に2つのファイルから構成されています:" +" *pymupdf.py* と *fitz_wrap.c* 。これらは、新バージョンごとに優れたSWIGツールによって作成されます。" + +#: ../../app3.rst:175 97bdc84ef0d442ddb2f51fed23efdce2 +msgid "" +"When you use one of PyMuPDF's objects or methods, this will result in " +"execution of some code in *pymupdf.py*, which in turn will call some C " +"code compiled with *fitz_wrap.c*." +msgstr "" +"PyMuPDFのオブジェクトやメソッドのいずれかを使用すると、これにより *pymupdf.py* でいくつかのコードが実行され、それがさらに " +"*fitz_wrap.c* でコンパイルされたCコードを呼び出します。" + +#: ../../app3.rst:177 4f2a76ae2d334cbaa03fad1952397145 +msgid "" +"Because SWIG goes a long way to keep the Python and the C level in sync, " +"everything works fine, if a certain set of rules is being strictly " +"followed. For example: **never access** a :ref:`Page` object, after you " +"have closed (or deleted or set to ``None``) the owning :ref:`Document`. " +"Or, less obvious: **never access** a page or any of its children (links " +"or annotations) after you have executed one of the document methods " +"*select()*, *delete_page()*, *insert_page()* ... and more." +msgstr "" +"SWIGはPythonとCレベルを同期させるために大いに役立っているため、一定のルールが厳密に守られる限り、すべてが正常に動作します。例えば、所有している" +" :ref:`ドキュメント` を閉じたり(または削除したり、Noneに設定したり)した後に :ref:`Page` オブジェクトに " +"**アクセスしないでください** 。また、より明確な例では、 *select()* 、*delete_page()* 、 " +"*insert_page()* などのドキュメントメソッドを実行した後にページやその子要素(リンクや注釈など)に **アクセスしないでください**" +" 。" + +#: ../../app3.rst:179 1ca68a43b1a54a1ca23b33b1a82350cf +msgid "" +"But just no longer accessing invalidated objects is actually not enough: " +"They should rather be actively deleted entirely, to also free C-level " +"resources (meaning allocated memory)." +msgstr "ただし、無効なオブジェクトへのアクセスをやめるだけでは実際には十分ではありません。これらのオブジェクトは、Cレベルのリソース(割り当てられたメモリ)も解放するために、完全に削除されるべきです。" + +#: ../../app3.rst:181 8f76f7db96944107970598e7bedd8e4e +msgid "" +"The reason for these rules lies in the fact that there is a hierarchical " +"2-level one-to-many relationship between a document and its pages and " +"also between a page and its links / annotations. To maintain a consistent" +" situation, any of the above actions must lead to a complete reset -- in " +"**Python and, synchronously, in C**." +msgstr "" +"これらのルールの理由は、ドキュメントとそのページ、およびページとそのリンク/注釈の間に階層的な2段階の1対多の関係があるためです。一貫した状況を維持するために、上記のアクションのいずれもが" +" **PythonとCの両方で** 完全なリセットを引き起こさなければなりません。" + +#: ../../app3.rst:183 1de141a55ff04b70aaaf88a5e4986383 +msgid "SWIG cannot know about this and consequently does not do it." +msgstr "SWIGはこれを知ることはできないため、それを実行しません。" + +#: ../../app3.rst:185 ae8c62aa153947348c48a7365666a43f +msgid "" +"The required logic has therefore been built into PyMuPDF itself in the " +"following way." +msgstr "必要な論理はしたがって、PyMuPDF自体に以下のように組み込まれています。" + +#: ../../app3.rst:187 e8321451c76f4ae4ba926dd0c87b7277 +msgid "" +"If a page \"loses\" its owning document or is being deleted itself, all " +"of its currently existing annotations and links will be made unusable in " +"Python, and their C-level counterparts will be deleted and deallocated." +msgstr "ページが所有するドキュメントを失ったり、それ自体が削除されると、現在存在するすべての注釈とリンクはPythonで使用できなくなり、それらのCレベルの対応部分が削除されて解放されます。" + +#: ../../app3.rst:189 96f99f420a1c4ef185b7e412c245ad07 +msgid "" +"If a document is closed (or deleted or set to ``None``) or if its " +"structure has changed, then similarly all currently existing pages and " +"their children will be made unusable, and corresponding C-level deletions" +" will take place. \"Structure changes\" include methods like *select()*, " +"*delePage()*, *insert_page()*, *insert_pdf()* and so on: all of these " +"will result in a cascade of object deletions." +msgstr "" +"ドキュメントが閉じられたり(または削除されたり、 ``None`` " +"に設定されたり)したり、構造が変更されたりすると、同様に現在存在するすべてのページとその子要素は使用できなくなり、対応するCレベルの削除が行われます。「構造の変更」とは、" +" *select()* 、 *delete_page()* 、 *insert_page()* 、 *insert_pdf()* " +"などのメソッドを含みます。これらのすべてはオブジェクトの削除の連鎖を引き起こします。" + +#: ../../app3.rst:191 bcc9663628be4e6aba0eda43e8714ec4 +msgid "" +"The programmer will normally not realize any of this. If he, however, " +"tries to access invalidated objects, exceptions will be raised." +msgstr "プログラマーは通常、これらのいずれも気づかないでしょう。ただし、無効なオブジェクトにアクセスしようとすると、例外が発生します。" + +#: ../../app3.rst:193 2bae3cd4fe2b4fbbb03c916169069d20 +msgid "" +"Invalidated objects cannot be directly deleted as with Python statements " +"like *del page* or *page = None*, etc. Instead, their *__del__* method " +"must be invoked." +msgstr "" +"無効なオブジェクトは、 *del page* または *page = None* " +"などのPythonステートメントで直接削除することはできません。代わりに、その *__del__* メソッドを呼び出す必要があります。" + +#: ../../app3.rst:195 ce99d9952b6246ab8d51aed6697384d0 +msgid "" +"All pages, links and annotations have the property *parent*, which points" +" to the owning object. This is the property that can be checked on the " +"application level: if *obj.parent == None* then the object's parent is " +"gone, and any reference to its properties or methods will raise an " +"exception informing about this \"orphaned\" state." +msgstr "" +"すべてのページ、リンク、注釈には、所有するオブジェクトを指す *親* " +"プロパティがあります。これはアプリケーションレベルでチェックできるプロパティです: *obj.parent == None* " +"ならば、そのオブジェクトの親は存在せず、そのプロパティやメソッドへの参照は例外を発生させてこの「孤立」した状態について通知します。" + +#: ../../app3.rst:197 85bda416e78f4e74ab6528e5a1a5049f +msgid "A sample session:" +msgstr "サンプルセッション:" + +#: ../../app3.rst:214 17bb59c9cda1416c9b5d94fa4c8a6e85 +msgid "This shows the cascading effect:" +msgstr "これは連鎖効果を示しています。" + +#: ../../app3.rst:231 2bcef2f15dab4059900aa81e56f44ec8 +msgid "" +"Objects outside the above relationship are not included in this " +"mechanism. If you e.g. created a table of contents by *toc = " +"doc.get_toc()*, and later close or change the document, then this cannot " +"and does not change variable *toc* in any way. It is your responsibility " +"to refresh such variables as required." +msgstr "" +"上記の関係外のオブジェクトは、このメカニズムに含まれていません。たとえば、 `toc = doc.get_toc()` " +"のように目次を作成し、後で文書を閉じたり変更したりする場合、これは変数 toc " +"をどのようにも変更しません。必要に応じてそのような変数を更新する責任はあなたにあります。" + +#: ../../app3.rst:238 14e8b8a4d4a542eb81903ececa2ef219 +msgid "Design of Method :meth:`Page.show_pdf_page`" +msgstr "メソッド :meth:`Page.show_pdf_page` の設計" + +#: ../../app3.rst:241 5e29628b2cf546ffaea96853a2799ead +msgid "Purpose and Capabilities" +msgstr "目的と機能" + +#: ../../app3.rst:243 1f05906717f343c4a5b20953e30cd42d +msgid "" +"The method displays an image of a (\"source\") page of another PDF " +"document within a specified rectangle of the current (\"containing\", " +"\"target\") page." +msgstr "このメソッドは、現在の(「含まれる」、「ターゲット」)ページの指定された矩形内に別のPDF文書の(「ソース」)ページの画像を表示します。" + +#: ../../app3.rst:245 610835e36fb248bb9aaec3c84f56d479 +msgid "" +"**In contrast** to :meth:`Page.insert_image`, this display is vector-" +"based and hence remains accurate across zooming levels." +msgstr ":meth:`Page.insert_image` **とは異なり** 、この表示はベクターベースであり、ズームレベルを超えて正確に保たれます。" + +#: ../../app3.rst:246 d9f42a6ad2df465282bc7f547af785a1 +msgid "" +"**Just like** :meth:`Page.insert_image`, the size of the display is " +"adjusted to the given rectangle." +msgstr ":meth:`Page.insert_image` **と同様に** 、表示のサイズは指定された矩形に調整されます。" + +#: ../../app3.rst:248 c04d0bb6dc87492d861387305afd04a9 +msgid "The following variations of the display are currently supported:" +msgstr "現在、次のバリエーションの表示がサポートされています:" + +#: ../../app3.rst:250 7793c6362929447eb6d633a4972e90cf +msgid "" +"Bool parameter `\"keep_proportion\"` controls whether to maintain the " +"aspect ratio (default) or not." +msgstr "Bool パラメーター `\"keep_proportion\"` はアスペクト比を保持するかどうかを制御します(デフォルト)。" + +#: ../../app3.rst:251 f04a25c8fef448039df5ba3f241c77ad +msgid "" +"Rectangle parameter `\"clip\"` restricts the visible part of the source " +"page rectangle. Default is the full page." +msgstr "矩形パラメーター `\"clip\"` はソースページの矩形の可視部分を制限します。デフォルトはフルページです。" + +#: ../../app3.rst:252 bd5c3926320048e0b41f2646d25bd84f +msgid "" +"float `\"rotation\"` rotates the display by an arbitrary angle (degrees)." +" If the angle is not an integer multiple of 90, only 2 of the 4 corners " +"may be positioned on the target border if also `\"keep_proportion\"` is " +"true." +msgstr "" +"float `\"rotation\"` " +"は表示を任意の角度(度)で回転させます。角度が90の倍数でない場合、`\"keep_proportion\"` も true " +"の場合、ターゲットの境界に4つのうち2つのコーナーのみが配置される場合があります。" + +#: ../../app3.rst:253 bc7eb9db1e2a4bbd909ab1c051a53490 +msgid "" +"Bool parameter `\"overlay\"` controls whether to put the image on top " +"(foreground, default) of current page content or not (background)." +msgstr "" +"Bool パラメーター `\"overlay\"` " +"は、画像を現在のページコンテンツの上(前景、デフォルト)に配置するか、そうでないか(背景)を制御します。" + +#: ../../app3.rst:255 ed8c5c5838af4734971f7bcbc99ece4b +msgid "Use cases include (but are not limited to) the following:" +msgstr "使用例は以下のようなものがありますが、これに限定されません:" + +#: ../../app3.rst:257 f4e19bbd27d64e8daed6a71bd1586047 +msgid "" +"\"Stamp\" a series of pages of the current document with the same image, " +"like a company logo or a watermark." +msgstr "現在の文書の複数のページに同じ画像(企業のロゴや透かし)を「スタンプ」する。" + +#: ../../app3.rst:258 f14819856e0a47a8828aff5410df1a9e +msgid "" +"Combine arbitrary input pages into one output page to support “booklet” " +"or double-sided printing (known as \"4-up\", \"n-up\")." +msgstr "任意の入力ページを1つの出力ページに組み合わせ、\"ブックレット\"や両面印刷をサポートする(「4-up」、「n-up」としても知られています)。" + +#: ../../app3.rst:259 e3c34f8c6c5c4f4a80b1d05f8d857c73 +msgid "" +"Split up (large) input pages into several arbitrary pieces. This is also " +"called “posterization”, because you e.g. can split an A4 page " +"horizontally and vertically, print the 4 pieces enlarged to separate A4 " +"pages, and end up with an A2 version of your original page." +msgstr "(大きな)入力ページをいくつかの任意のピースに分割する。これは「ポスタリゼーション」とも呼ばれ、たとえばA4ページを水平および垂直に分割し、4つのピースを別々のA4ページに拡大印刷して、元のページのA2バージョンを作成することができます。" + +#: ../../app3.rst:262 758ca9db46c44e75be2fb1cf2c44ea91 +msgid "Technical Implementation" +msgstr "テクニカル実装" + +#: ../../app3.rst:264 ecec59169e68440ea430229d4b285d11 +msgid "" +"This is done using PDF **\"Form XObjects\"**, see section 8.10 on page " +"217 of :ref:`AdobeManual`. On execution of a :meth:`Page.show_pdf_page`, " +"the following things happen:" +msgstr "" +"これはPDF **「フォームXObject」** を使用して行われます。 :ref:`AdobeManual` リファレンス " +"の217ページ、セクション8.10を参照してください。 :meth:`Page.show_pdf_page` " +"が実行されると、次のことが起こります。" + +#: ../../app3.rst:266 4cf6aa869dc647639227684417fea492 +msgid "" +"The :data:`resources` and :data:`contents` objects of source page in " +"source document are copied over to the target document, jointly creating " +"a new **Form XObject** with the following properties. The PDF " +":data:`xref` number of this object is returned by the method." +msgstr "" +"ソースドキュメント内のソースページの :data:`resources` と :data:`contents` " +"オブジェクトは、ターゲットドキュメントにコピーされ、共同で新しい **フォームXObject** が作成されます。このオブジェクトのPDF " +":data:`xref` 番号がメソッドによって返されます。" + +#: ../../app3.rst:268 6a4e90e689f34c1b9227d77cc202848e +msgid "`/BBox` equals `/Mediabox` of the source page" +msgstr "`/BBox` はソースページの `/Mediabox` に等しいです。" + +#: ../../app3.rst:269 a10cd43bfe234839887ebf03e6f9f60d +msgid "`/Matrix` equals the identity matrix." +msgstr "`/Matrix` は単位行列と等しいです。" + +#: ../../app3.rst:270 14c5d12da8294828b0c1ce38065dbce8 +msgid "" +"`/Resources` equals that of the source page. This involves a “deep-copy” " +"of hierarchically nested other objects (including fonts, images, etc.). " +"The complexity involved here is covered by MuPDF's grafting [#f1]_ " +"technique functions." +msgstr "" +"`/Resources` " +"はソースページのものに等しいです。これには、階層的にネストされた他のオブジェクト(フォント、画像など)の「ディープコピー」が含まれます。ここでの複雑さは、MuPDFのグラフティング[1]技術関数によってカバーされています。" + +#: ../../app3.rst:271 aeef902e98f74b2c934243459b384b22 +msgid "" +"This is a stream object type, and its stream is an exact copy of the " +"combined data of the source page's :data:`contents` objects." +msgstr "" +"これはストリームオブジェクトタイプであり、そのストリームはソースページの :data:`contents` " +"オブジェクトの結合データの正確なコピーです。" + +#: ../../app3.rst:273 9221ae2797b044898b9ccc33853b1d18 +msgid "" +"This Form XObject is only executed once per shown source page. Subsequent" +" displays of the same source page will skip this step and only create " +"\"pointer\" Form XObjects (done in next step) to this object." +msgstr "このフォームXObjectは、表示されるソースページごとに1回だけ実行されます。同じソースページの後続の表示では、このステップはスキップされ、このオブジェクトへの「ポインター」フォームXObject(次のステップで行われる)のみが作成されます。" + +#: ../../app3.rst:275 226210faaf584ae1824537a7c5a2a6b2 +msgid "" +"A second **Form XObject** is then created which the target page uses to " +"invoke the display. This object has the following properties:" +msgstr "" +"次に、ターゲットページが表示を呼び出すために使用する2番目の **フォームXObject** " +"が作成されます。このオブジェクトには次のような特性があります。" + +#: ../../app3.rst:277 41fc749a67944e429e2d3bb946515aec +msgid "`/BBox` equals the `/CropBox` of the source page (or `\"clip\"`)." +msgstr "`/BBox` はソースページの `/CropBox`(または `\"クリップ\"` )に等しいです。" + +#: ../../app3.rst:278 eec5f4bf421146d6a8a42621e6e510ac +msgid "`/Matrix` represents the mapping of `/BBox` to the target rectangle." +msgstr "`/Matrix` は `/BBox` からターゲット矩形へのマッピングを表します。" + +#: ../../app3.rst:279 d62e8a586ddf4f09af38ebddd1230061 +msgid "" +"`/XObject` references the previous Form XObject via the fixed name " +"`fullpage`." +msgstr "`/XObject` は、以前のフォームXObjectを固定された名前 `fullpage` を介して参照します。" + +#: ../../app3.rst:280 7d246f2a490343a3ab683bc628df56b9 +msgid "" +"The stream of this object contains exactly one fixed statement: " +"`/fullpage Do`." +msgstr "このオブジェクトのストリームには、正確に1つの固定されたステートメントが含まれています: `/fullpage Do` 。" + +#: ../../app3.rst:281 1ccfb2d1358b478492c3700a4538218e +msgid "" +"If the method's `\"oc\"` argument is given, its value is assigned to this" +" Form XObject as `/OC`." +msgstr "メソッドの `「oc」` 引数が指定された場合、その値はこのフォームXObjectに `/OC` として割り当てられます。" + +#: ../../app3.rst:283 7bf4dc3eb07941deb6a5193e0a042d23 +msgid "" +"The :data:`resources` and :data:`contents` objects of the target page are" +" now modified as follows." +msgstr "ターゲットページの :data:`リソース` と :data:`コンテンツ` オブジェクトは以下のように変更されます。" + +#: ../../app3.rst:285 aad7990a3a3d45639b13246d835a0c65 +msgid "" +"Add an entry to the `/XObject` dictionary of `/Resources` with the name " +"`fzFrm` (with n chosen such that this entry is unique on the page)." +msgstr "" +"`/Resources` の `/XObject` 辞書に、 ` fzFrm ` " +"という名前のエントリをページ内で一意であるように追加します(nはこのエントリがページで一意であるように選択されます)。" + +#: ../../app3.rst:286 b82b302f499f410b9e350f36bbfdda59 +msgid "" +"Depending on `\"overlay\"`, prepend or append a new object to the page's " +"`/Contents` array, containing the statement `q /fzFrm Do Q`." +msgstr "" +"`\"overlay\"` に応じて、ページの `/ Contents` 配列に新しいオブジェクトを前または後に追加し、ステートメント `q / " +"fzFrm Do Q` を含めます。" + +#: ../../app3.rst:288 b071c7d341434d26bdeed45392697a34 +msgid "This design approach ensures that:" +msgstr "この設計アプローチは次を保証します:" + +#: ../../app3.rst:290 63f00a3e1dd04f6781934140e874c7a7 +msgid "" +"The (potentially large) source page is only copied once to the target " +"PDF. Only small \"pointer\" Form XObjects objects are created per each " +"target page to show the source page." +msgstr "(潜在的に大きな)ソースページは、ターゲットPDFに1度だけコピーされます。各ターゲットページごとに、ソースページを表示するための小さな「ポインター」フォームXObjectオブジェクトが作成されます。" + +#: ../../app3.rst:291 b41e1ced86c94cf8a082e74176de111f +msgid "" +"Each referring target page can have its own `\"oc\"` parameter to control" +" the source page's visibility individually." +msgstr "参照する各ターゲットページは、ソースページの表示を個別に制御するための独自の `「oc」` パラメータを持つことができます。" + +#: ../../app3.rst:298 76a4f77bd1e14da79d353bd8260eed70 +msgid "Diagnostics" +msgstr "" + +#: ../../app3.rst:303 6fc791083a8748a1859e411cc8953c59 +msgid "|PyMuPDF| messages" +msgstr "" + +#: ../../app3.rst:305 c601036e9eab4c8ebbeef49f120f5f4c +msgid "|PyMuPDF| has a Message system for showing text diagnostics." +msgstr "" + +#: ../../app3.rst:307 8b50daec304d440da53e24b598f0bf34 +msgid "" +"By default messages are written to `sys.stdout`. This can be controlled " +"in two ways:" +msgstr "" + +#: ../../app3.rst:311 0254e3834f4642e18301ef987f609bae +msgid "Set environment variable `PYMUPDF_MESSAGE` before |PyMuPDF| is imported." +msgstr "" + +#: ../../app3.rst:314 70440a09782b4278ba14d7c690dd6b5e +msgid "Call `set_messages()`:" +msgstr "" + +#: ../../app3.rst:318 5a458f1861c547ccb584f969867943b7 +msgid "MuPDF errors and warnings" +msgstr "" + +#: ../../app3.rst:320 c1737a8bacca4f438f61217eaacc4fe9 +msgid "MuPDF generates text errors and warnings." +msgstr "" + +#: ../../app3.rst:323 13acc645814b4b3c886c1b149f0d8f16 +msgid "" +"These errors and warnings are appended to an internal list, accessible " +"with `Tools.mupdf_warnings()`. Also see `Tools.reset_mupdf_warnings()`." +msgstr "" + +#: ../../app3.rst:327 492bd13e7c93436488a4721a9822f908 +msgid "" +"By default these errors and warnings are also sent to the |PyMuPDF| " +"message system." +msgstr "" + +#: ../../app3.rst:330 a61e2baba2164b7f92e239307e36c6aa +msgid "" +"This can be controlled with `mupdf_display_errors()` and " +"`mupdf_display_warnings()`." +msgstr "" + +#: ../../app3.rst:334 e6ff12f897e24b62989474208bea60a4 +msgid "" +"These messages are prefixed with `MuPDF error:` and `MuPDF warning:` " +"respectively." +msgstr "" + +#: ../../app3.rst:337 8d7c2c04a7ca425c8569ca5c0aabdba3 +msgid "Some MuPDF errors may lead to Python exceptions." +msgstr "" + +#: ../../app3.rst:339 5a00219693a446a4963089abe7ad80bd +msgid "" +"Example output for a **recoverable error**. We are opening a damaged PDF," +" but MuPDF is able to repair it and gives us a little information on what" +" happened. Then we illustrate how to find out whether the document can " +"later be saved incrementally. Checking the :attr:`Document.is_dirty` " +"attribute at this point also indicates that during `pymupdf.open` the " +"document had to be repaired:" +msgstr "" + +#: ../../app3.rst:360 c64e229b9b34446f899be35325d09c35 +msgid "Example output for an **unrecoverable error**:" +msgstr "" + +#: ../../app3.rst:378 77eea1b26e664197b0bb4bcaf68a0f7b + +msgid "Coordinates" +msgstr "脚注" + +#: ../../app3.rst:381 dc043632f05e4719bd2eaddd7df7acaf +msgid "" +"This is one of the most frequently used terms in this documentation. A " +"**coordinate** generally means a pair of numbers `(x, y)` referring to " +"some location, like a corner of a rectangle (:ref:`Rect`), a :ref:`Point`" +" and so forth. The two values usually are floats, but there a objects " +"like images which only allow them to be integers." +msgstr "" + +#: ../../app3.rst:383 56f1c800efe94faca41120039a3bdcf6 +msgid "" +"To actually *find* a coordinate's location, we also need to know the " +"*reference* point for ``x`` and ``y`` - in other words, we must know " +"where location `(0, 0)` is positioned. Once `(0, 0)` (the \"origin\") is " +"known, we speak of a \"coordinate system\"." +msgstr "" + +#: ../../app3.rst:385 680606534deb4bbeb64f63e5e7974fde +msgid "" +"Several coordinate systems exist in document processing. For instance, " +"the coordinate systems of a PDF page and the image created from it are " +"**different**. We therefore need ways to *transform* coordinates from one" +" system to another (and also back occasionally). This is the task of a " +":ref:`Matrix`. It is a mathematical function which works much like a " +"factor that can be \"multiplied\" with a point or rectangle to give us " +"the corresponding point / rectangle in another coordinate system. The " +"inverse of a transformation matrix can be used to revert the " +"transformation. Much like multiplying by some factor, say 3, can be " +"reverted by dividing the result by 3 (or multiplying it with 1/3)." +msgstr "" + +#: ../../app3.rst:388 70a2643cf5834d0f8bd9a519bb39e5c8 +msgid "Coordinates and Images" +msgstr "" + +#: ../../app3.rst:390 d674cd731bb3481a93ffb0323a2cd62b +msgid "" +"Images have a coordinate system with integer coordinates. Origin `(0, 0)`" +" is the top-left point. ``x`` values must be in `range(width)`, and ``y``" +" values in `range(height)`. Therefore, ``y`` values *increase* if we go " +"*downwards*. For every image, there is only a **finite number** of " +"coordinates, namely `width * height`. A location in an image is also " +"called a \"pixel\"." +msgstr "" + +#: ../../app3.rst:392 57ae46c4d2954c38903fba3ab34553f7 +msgid "" +"How **large** an image will be (in centimeters or inches) when e.g. " +"printed, depends on additional information: the \"resolution\". This is " +"measured in **DPI** (dots per inch, or pixels per inch). To find the " +"printed size of some image, we therefore must divide its width and its " +"height by the corresponding DPI values (there may separate ones for width" +" and for height) and will get the respective number of inches." +msgstr "" + +#: ../../app3.rst:396 5e0511df1f0a483c8fe7410639ec7301 +msgid "Origin Point, Point Size and Y-Axis" +msgstr "" + +#: ../../app3.rst:398 2594cf765c404df0baaeb3e9e5522bd2 +msgid "" +"In |PDF|, the origin `(0, 0)` of a page is located at its **bottom-left " +"point**. In |MuPDF|, the origin `(0, 0)` of a page is located at its " +"**top-left point**." +msgstr "" + +#: ../../app3.rst:403 88d467234422454ea94c59e3bd6b8b84 +msgid "Coordinates are float numbers and measured in **points**, where:" +msgstr "" + +#: ../../app3.rst:405 3900c8d8ab694352a7d5ba3d88cc7cf6 +msgid "**one point equals 1/72 inches**." +msgstr "" + +#: ../../app3.rst:407 5887ebc05faa46d582e5cd548d983d0c +msgid "" +"Typical document page sizes are **ISO A4** and **Letter**. A **Letter** " +"page has a size of **8.5 x 11 inches**, corresponding to **612 x 792 " +"points**. In the |PDF| coordinate system, the top-left point of a " +"**Letter** page hence has the coordinate `(0, 792)` as **the y-axis " +"points upwards**. Now we know our document size the |MuPDF| coordinate " +"system for the bottom right would be coordinate `(612, 792)` (and for " +"|PDF| this coordinate would then be `(612,0)`)." +msgstr "" + +#: ../../app3.rst:409 5d8680c9d78d4c46bbabf6025dbcd498 +msgid "" +"Theoretically, there are **infinitely many** coordinate positions on a " +"|PDF| page. In practice however, at most the first 5 decimal places are " +"sufficient for a reasonable precision." +msgstr "" + +#: ../../app3.rst:412 6976bdd2380c4367b9f3f0a94c4e1a2b +msgid "" +"In |MuPDF|, multiple document formats are supported - |PDF| just being " +"one among **over a dozen others**. Images are also supported as documents" +" in |MuPDF| (therefore having one page usually). This is one of the " +"reasons why |MuPDF| uses a coordinate system with the origin `(0, 0)` " +"being the **top-left** point of any document page. **The y-axis points " +"downwards**, like with images. Coordinates in |MuPDF| in any case are " +"floats, like in |PDF|." +msgstr "" + +#: ../../app3.rst:414 cb098c0514b6405b819a098b185365a8 +msgid "" +"A rectangle `Rect(0, 0, 100, 100)` for instance in |MuPDF| (and thus " +"|PyMuPDF|) therefore is a square with edges of length 100 points (= 1.39 " +"inches or 3.53 centimeters). Its top-left corner is the origin. To switch" +" between the two coordinate systems |PDF| to |MuPDF|, every :ref:`Page` " +"object has a :attr:`Page.transformation_matrix`. Its inverse can be used " +"to compute a rectangle's PDF coordinates. In this way we can conveniently" +" find that `Rect(0, 0, 100, 100)` in |MuPDF| is the same as `Rect(0, 692," +" 100, 792)` in |PDF|. See this code snippet::" +msgstr "" + +#: ../../app3.rst:425 ce9f7604adb041b5a42884b1fbcf2458 +msgid "Footnotes" +msgstr "脚注" + +#: ../../app3.rst:426 6710c6c4a0cb4c8f95a2db0a2d0154a1 +msgid "" +"MuPDF supports \"deep-copying\" objects between PDF documents. To avoid " +"duplicate data in the target, it uses so-called \"graftmaps\", like a " +"form of scratchpad: for each object to be copied, its :data:`xref` number" +" is looked up in the graftmap. If found, copying is skipped. Otherwise, " +"the new :data:`xref` is recorded and the copy takes place. PyMuPDF makes " +"use of this technique in two places so far: :meth:`Document.insert_pdf` " +"and :meth:`Page.show_pdf_page`. This process is fast and very efficient, " +"because it prevents multiple copies of typically large and frequently " +"referenced data, like images and fonts. However, you may still want to " +"consider using garbage collection (option 4) in any of the following " +"cases:" +msgstr "" +"MuPDFはPDFドキュメント間でオブジェクトを「ディープコピー」することをサポートしています。対象の中で重複するデータを避けるために、「グラフトマップ」と呼ばれる仕組みを使用します。これはスクラッチパッドのようなもので、コピーされる各オブジェクトについて、その" +" :data:`xref` 番号をグラフトマップで調べます。もし見つかれば、コピーはスキップされます。それ以外の場合は、新しい " +":data:`xref` が記録され、コピーが行われます。PyMuPDFは、:meth:`Document.insert_pdf` と " +":meth:`Page.show_pdf_page` " +"の2つの場所でこの技術を使用しています。このプロセスは高速で非常に効率的です。なぜなら、通常大きなデータや頻繁に参照されるデータ(画像やフォントなど)の複数のコピーを防ぐためです。ただし、次のいずれかの場合にはガベージコレクション(オプション4)の使用を検討することをお勧めします:" + +#: ../../app3.rst:428 461400f147fb42298ea228749fce0741 +msgid "" +"The target PDF is not new / empty: grafting does not check for resources " +"that already existed (e.g. images, fonts) in the target document before " +"opening it." +msgstr "対象のPDFが新しい/空でない場合:グラフティングは、対象ドキュメント内で既に存在しているリソース(例:画像、フォント)をチェックしません。" + +#: ../../app3.rst:429 180ae4a257254392a4e34e44abe0f57c +msgid "" +"Using :meth:`Page.show_pdf_page` for more than one source document: each " +"grafting occurs **within one source** PDF only, not across multiple. So " +"if e.g. the same image exists in pages from different source PDFs, then " +"this will not be detected until garbage collection." +msgstr "" +"複数のソースドキュメントで :meth:`Page.show_pdf_page` を使用する場合:グラフティングは **1つのソース** " +"PDF内でのみ発生し、複数のソースPDF間では発生しません。したがって、同じ画像が異なるソースPDFのページに存在する場合、これはガベージコレクションまで検出されません" + +#: ../../footer.rst:60 2489ee1d71db47c19985990e4efb9382 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Redirecting Error and Warning Messages" +#~ msgstr "エラーと警告メッセージのリダイレクト" + +#~ msgid "" +#~ "Since MuPDF version 1.16 error and " +#~ "warning messages can be redirected via" +#~ " an official plugin." +#~ msgstr "MuPDFバージョン1.16以降、エラーと警告メッセージは公式プラグインを介してリダイレクトできます。" + +#~ msgid "" +#~ "PyMuPDF will put error messages to " +#~ "`sys.stderr` prefixed with the string " +#~ "\"mupdf:\". Warnings are internally stored " +#~ "and can be accessed via " +#~ "*pymupdf.TOOLS.mupdf_warnings()*. There also is " +#~ "a function to empty this store." +#~ msgstr "" +#~ "PyMuPDFはエラーメッセージを、先頭に文字列「mupdf:」を付けて `sys.stderr` " +#~ "に表示します。警告は内部で保存され、*pymupdf.TOOLS.mupdf_warnings()* " +#~ "を通じてアクセスできます。また、この保存領域を空にするための関数も存在します。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/app4.mo b/docs/locales/ja/LC_MESSAGES/app4.mo new file mode 100644 index 000000000..f77060e73 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/app4.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/app4.po b/docs/locales/ja/LC_MESSAGES/app4.po new file mode 100644 index 000000000..fefc37692 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/app4.po @@ -0,0 +1,1143 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 b65281ab54ae48fe8a1bcd0250c11271 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 d0836161a9da4e78816c23129af0bc7c +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 eafd58bedac04f25b65821d0bfc7ddc2 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../app4.rst:14 e875c5ebdbc3422d99a76e5b307d8488 +msgid "Appendix 4: Performance Comparison Methodology" +msgstr "付録4:性能比較方法" + +#: ../../app4.rst:16 0f8302b49495485b86460818f44f4dc7 +msgid "" +"This article documents the approach to measure :title:`PyMuPDF's` " +"performance and the tools and example files used to do comparisons." +msgstr "この記事では、|PyMuPDF| の性能を測定するアプローチと、比較を行うために使用されるツールとサンプルファイルについて説明します。" + +#: ../../app4.rst:18 6c84be39d4424fd4babfb03d8db54dd3 +msgid "The following three sections deal with different performance aspects:" +msgstr "以下の3つのセクションでは、異なる性能の側面に取り組んでいます:" + +#: ../../app4.rst:20 e000313e5c5c42028877968fe5acc812 +msgid "" +":ref:`Document Copying` - This includes opening and parsing" +" :title:`PDFs`, then writing them to an output file. Because the same " +"basic activities are also used for joining (merging) :title:`PDFs`, the " +"results also apply to these use cases." +msgstr "" +":ref:`ドキュメントのコピー` - これには |PDF| " +"ファイルの開閉と解析、そしてそれらを出力ファイルに書き込む作業が含まれます。同じ基本的なアクティビティは、|PDF| " +"ファイルの結合(マージ)にも使用されるため、結果はこれらのユースケースにも適用されます。" + +#: ../../app4.rst:21 7ad2ccb2be0e437abb94eb2fcc7abed3 +msgid "" +":ref:`Text Extraction` - This extracts plain text " +"from :title:`PDFs` and writes it to an output text file." +msgstr "" +":ref:`テキスト抽出` - これにより |PDF| " +"ファイルから平文テキストが抽出され、テキストファイルに書き込まれます。" + +#: ../../app4.rst:22 174dc9345ebd4759a0a7ad41fd107693 +msgid "" +":ref:`Page Rendering` - This converts |PDF| pages to" +" image files looking identical to the pages. This ability is the basic " +"prerequisite for using a tool in :title:`Python GUI` scripts to scroll " +"through documents. We have chosen a medium-quality (resolution 150 DPI) " +"version." +msgstr "" +":ref:`ページレンダリング` - " +"これによりPDFページがページと同じような見た目の画像ファイルに変換されます。この機能は、:title:`Python GUI` " +"スクリプトでドキュメントをスクロールするための基本的な前提条件です。中画質版(解像度150 DPI)を選択しました。" + +#: ../../app4.rst:24 f509ac94d6d440c4bcd83cf9d7f17a91 +msgid "" +"Please note that in all cases the actual speed in dealing with |PDF| " +"structures is not directly measured: instead, the timings also include " +"the durations of writing files to the operating system's file system. " +"This cannot be avoided because tools other than |PyMuPDF| do not offer " +"the option to e.g., separate the image **creation** step from the " +"following step, which **writes** the image into a file." +msgstr "" +"|PDF| " +"構造の処理速度そのものを直接測定するのではなく、すべての場合において、タイミングにはファイルをオペレーティングシステムのファイルシステムに書き込む時間も含まれることに注意してください。これは回避できない要因です。なぜなら、|PyMuPDF|" +" 以外のツールでは、例えばイメージの **作成** ステップと、イメージをファイルに **書き込む** " +"後続のステップを分離するオプションが提供されていないためです。" + +#: ../../app4.rst:26 3e62d5606ac74cd4a8f0ed624824cf23 +msgid "" +"So all timings documented include a common, OS-oriented base effort. " +"Therefore, performance **differences per tool are actually larger** than " +"the numbers suggest." +msgstr "" +"したがって、すべての記録されたタイミングには共通のOS指向の基本的な努力が含まれています。したがって、**ツールごとの性能の違いは、数字が示す以上に実際には大きいです**" +" 。" + +#: ../../app4.rst:33 16b81a9d40ac477cb3545a8d137dc0c6 +msgid "Files used" +msgstr "使用されるファイル" + +#: ../../app4.rst:35 575ba43a14f4487a82561a8b1ff49e3b +msgid "" +"A set of eight files is used for the performance testing. With each file " +"we have the following information:" +msgstr "性能テストには、8つのファイルセットが使用されます。各ファイルには、次の情報があります:" + +#: ../../app4.rst:37 025950c5125d45a2904b55403d4c42db +msgid "**Name** of the file and download **link**." +msgstr "ファイル **名** とダウンロード **リンク**。" + +#: ../../app4.rst:38 28053451a91c46ecb551604c6f0bb5ed +msgid "**Size** in bytes." +msgstr "バイト単位の **サイズ**。" + +#: ../../app4.rst:39 e80d08e617524a9f9a4e8714b118b62a +msgid "Total number of **pages** in file." +msgstr "ファイル内の総 **ページ** 数。" + +#: ../../app4.rst:40 96a719d117644267a368982b482e4556 +msgid "Total number of bookmarks (**Table of Contents** entries)." +msgstr "ブックマーク( **目次** エントリー)の総数。" + +#: ../../app4.rst:41 21dcfb30e5464a5ca3ff95401f654ba1 +msgid "Total number of **links**." +msgstr "**リンク** の総数。" + +#: ../../app4.rst:42 272c50da2cbb42a68c1af1a6f20912e6 +msgid "**KB size** per page." +msgstr "ページごとの **KBサイズ** 。" + +#: ../../app4.rst:43 25798cb6bcab4e729fd943d321bf2d1c +msgid "" +"**Textsize per page** is the amount text in the whole file in KB, divided" +" by the number of pages." +msgstr "**ページごとのテキストサイズ** は、ファイル全体のテキスト量をKBで割ったものです。" + +#: ../../app4.rst:44 8772653d01c44b08a0b6d348c16e2dee +msgid "Any **notes** to generally describe the type of file." +msgstr "ファイルのタイプを一般的に説明するための **メモ** 。" + +#: ../../app4.rst:50 ../../app4.rst:217 ../../app4.rst:295 ../../app4.rst:407 +#: 3a7f7d366c9d4cd68933b3aa52e47c36 4e932ab9bfe544f69f78419d7c3e77d2 +#: c09fd426aca746048611fb93deb100e6 fef672efd3ec4a55a97ed6f722884100 +msgid "**Name**" +msgstr "**ファイル名** " + +#: ../../app4.rst:51 b0d124f56c87433db55a5f7b2d41cd7d +msgid "**Size (bytes)**" +msgstr "**サイズ(バイト)** " + +#: ../../app4.rst:52 d362b0c636e74244a325f72e9a9087e8 +msgid "**Pages**" +msgstr "**ページ数** " + +#: ../../app4.rst:53 c3abbdf31f0b4a53aaec94f1bac2d08e +msgid "**TOC size**" +msgstr "**目次サイズ** " + +#: ../../app4.rst:54 f20cbe59815944cab6401c135434edfe +msgid "**Links**" +msgstr "**リンク数** " + +#: ../../app4.rst:55 12ea07a140a043f7ac87d72a38bc120c +msgid "**KB/page**" +msgstr "**KB/ページ** " + +#: ../../app4.rst:56 9b9cb4615b2343088d290a2784145225 +msgid "**Textsize/page**" +msgstr "**テキストサイズ/ページ** " + +#: ../../app4.rst:57 439c3fd84f444720b730ea5b6fcbdb3d +msgid "**Notes**" +msgstr "**メモ** " + +#: ../../app4.rst:58 d4339a4e1e534a7aa58c7ac24f8e7a8e +msgid "`adobe.pdf`_" +msgstr "" + +#: ../../app4.rst:59 5590dd3c46464c09982ab3ed961c145f +msgid "32,472,771" +msgstr "" + +#: ../../app4.rst:60 6e4f5d3e15a84754a874b75ca12be644 +msgid "1,310" +msgstr "" + +#: ../../app4.rst:61 df8f51d9677f40bba42c33c044215698 +msgid "794" +msgstr "" + +#: ../../app4.rst:62 16e15d894e844594b9a2d07a288423a5 +msgid "32,096" +msgstr "" + +#: ../../app4.rst:63 8a98b81dba6249eab5d2944c50b72d4a +msgid "24" +msgstr "" + +#: ../../app4.rst:64 bd2a9d8a806540dbbb1296a17631c958 +msgid "1,942" +msgstr "" + +#: ../../app4.rst:65 59a50531261d44a28f76c8aa543ecaa6 +msgid "linearized, many links / bookmarks" +msgstr "線形化、多くのリンク/ブックマーク" + +#: ../../app4.rst:66 918a002a29be4fe7baebe58c7040cc78 +msgid "`artifex-website.pdf`_" +msgstr "" + +#: ../../app4.rst:67 ca287cee43264f9084bb82bc73abce35 +msgid "31,570,732" +msgstr "" + +#: ../../app4.rst:68 a9346a34d4374b8fb980e49b6cee10f4 +msgid "47" +msgstr "" + +#: ../../app4.rst:69 727136fb21b34d80a7052875d82f44bb +msgid "46" +msgstr "" + +#: ../../app4.rst:70 2e69102a3d8c457e842d25c89204c3d2 +msgid "2,035" +msgstr "" + +#: ../../app4.rst:71 a171d97ad167415991ac238932d6a9f5 +msgid "656" +msgstr "" + +#: ../../app4.rst:72 4541105ef1a445d4b8c49a35ecd7dde3 +msgid "3,538" +msgstr "" + +#: ../../app4.rst:73 ef59301c5b9d4e0c83d6057c216fcc34 +msgid "graphics oriented" +msgstr "グラフィックス志向" + +#: ../../app4.rst:74 d446e251c6c84a5882e0dc0f16c66f13 +msgid "`db-systems.pdf`_" +msgstr "" + +#: ../../app4.rst:75 c4c7b19fb04943539a856abebb039a26 +msgid "29,326,355" +msgstr "" + +#: ../../app4.rst:76 c8493e97e0894a68b17fb5ca43bb8cb4 +msgid "1,241" +msgstr "" + +#: ../../app4.rst:77 ../../app4.rst:78 ../../app4.rst:117 ../../app4.rst:118 +#: 469b9e8ab80f4fbca62f66c6408aac56 8701883ca58149b49edd938b79c60632 +#: 907210a5050c446c8573c0cbd9305e49 a393131c2bb84aa0ab0134c342843a10 +msgid "0" +msgstr "" + +#: ../../app4.rst:79 0e86ba770bd340adb24cd0f1c14bab71 +msgid "23" +msgstr "" + +#: ../../app4.rst:80 e5801056aea44dfcac5a1abaf16e96cc +msgid "2,142" +msgstr "" + +#: ../../app4.rst:82 b97519331469472d8522f2bfe1653363 +msgid "`fontforge.pdf`_" +msgstr "" + +#: ../../app4.rst:83 b9d13fcb288d4b76af8964f8a77cecdb +msgid "8,222,384" +msgstr "" + +#: ../../app4.rst:84 17b5576ab80f412f8addb5a765efc53b +msgid "214" +msgstr "" + +#: ../../app4.rst:85 2dcd5ffb91694173815fd5f71f79eb29 +msgid "31" +msgstr "" + +#: ../../app4.rst:86 594a6a6536504058bf95efddf6f6241d +msgid "242" +msgstr "" + +#: ../../app4.rst:87 63ea1c4d3ee945c1ba5b17686c4089d5 +msgid "38" +msgstr "" + +#: ../../app4.rst:88 4d250accb71d41558e2255b397be8194 +msgid "1,058" +msgstr "" + +#: ../../app4.rst:89 570063202acb4eb7b7faa8d4a517aba4 +msgid "mix of text & graphics" +msgstr "テキストとグラフィックスのミックス" + +#: ../../app4.rst:90 c7fa7da4992f4a548139415feb963ae8 +msgid "`pandas.pdf`_" +msgstr "" + +#: ../../app4.rst:91 695d9ae1a2ae47a296647ae03b5d6b4e +msgid "10,585,962" +msgstr "" + +#: ../../app4.rst:92 7d45c29e31804c6280a247e8ffdb66a3 +msgid "3,071" +msgstr "" + +#: ../../app4.rst:93 df3b7957e23941dc8bd3a5c1ccb34ebe +msgid "536" +msgstr "" + +#: ../../app4.rst:94 eecc97d4b5154ef4ae351b54d39f3e1a +msgid "16,554" +msgstr "" + +#: ../../app4.rst:95 4810f249456f4515bbf7ca801b43a8e7 +msgid "3" +msgstr "" + +#: ../../app4.rst:96 df8c5b5bfd4d4565997129cdf635cba2 +msgid "1,539" +msgstr "" + +#: ../../app4.rst:97 a786d6f5903541f9bcaed7139c8357fd +msgid "many pages" +msgstr "多くのページ" + +#: ../../app4.rst:98 1240a135b2f64163a1bea7936c79fde2 +msgid "`pymupdf.pdf`_" +msgstr "" + +#: ../../app4.rst:99 0ce0c9c9ed33422c80ce3dd06f7fe3b4 +msgid "6,805,176" +msgstr "" + +#: ../../app4.rst:100 8c8a3312f9fe4963bc455f248bea71a2 +msgid "478" +msgstr "" + +#: ../../app4.rst:101 87be3ce662e94922abd94b769e71ecf9 +msgid "276" +msgstr "" + +#: ../../app4.rst:102 80b0bc3f3ed84a4aa8793b4af2cd4956 +msgid "5,277" +msgstr "" + +#: ../../app4.rst:103 577fd220421f40a0a43d0cf490f79d2c +msgid "14" +msgstr "" + +#: ../../app4.rst:104 fb6b22b5018c47f2a7f9e88029c8985d +msgid "1,937" +msgstr "" + +#: ../../app4.rst:105 f80fcdc04c5f4e36b7d4c1a846e0f246 +msgid "text oriented" +msgstr "テキスト志向" + +#: ../../app4.rst:106 790f8fe0e5744003b596a599ff91e410 +msgid "`pythonbook.pdf`_" +msgstr "" + +#: ../../app4.rst:107 dcba3c0f2197467dbbcd6ddd003531cf +msgid "9,983,856" +msgstr "" + +#: ../../app4.rst:108 872d5e20259a40c5b3ed0940f9b0290a +msgid "669" +msgstr "" + +#: ../../app4.rst:109 91f57c0fd1ac43648f867600ac2c1ebd +msgid "198" +msgstr "" + +#: ../../app4.rst:110 56b5755eb40943a09dda0732419266ce +msgid "1,953" +msgstr "" + +#: ../../app4.rst:111 9616347f3bf1430c8eae364c8ef574bc +msgid "15" +msgstr "" + +#: ../../app4.rst:112 1ea93df464024ab6b87a27ae372ef096 +msgid "1,929" +msgstr "" + +#: ../../app4.rst:114 d9920f2bcf9840d3b873e5d0cc977671 +msgid "`sample-50-MB-pdf-file.pdf`_" +msgstr "" + +#: ../../app4.rst:115 b18d527f1aca48b2883a9475d7db6b3b +msgid "52,521,850" +msgstr "" + +#: ../../app4.rst:116 742fbc8385714d6ca4512df1cf504d7b +msgid "1" +msgstr "" + +#: ../../app4.rst:119 2a418619503d4d36b8657e4ac2867973 +msgid "51,291" +msgstr "" + +#: ../../app4.rst:120 357f7e11d9dd4340976e7d66497e2f49 +msgid "23,860" +msgstr "" + +#: ../../app4.rst:121 fbaa663f47ee4cc6aff794ab32f0b7d6 +msgid "single page, graphics oriented, large file size" +msgstr "単一ページ、グラフィックス志向、大きなファイルサイズ" + +#: ../../app4.rst:127 c16b155f0d724ad2acf5477dbc58284d +msgid "" +"**adobe.pdf** and **pymupdf.pdf** are clearly text oriented, **artifex-" +"website.pdf** and **sample-50-MB-pdf-file.pdf** are graphics oriented. " +"Other files are a mix of both." +msgstr "" +"**adobe.pdf** と **pymupdf.pdf** は明らかにテキスト志向です。 **artifex-website.pdf** と " +"**sample-50-MB-pdf-file.pdf** はグラフィックス志向です。その他のファイルは両方のミックスです。" + +#: ../../app4.rst:131 cae4cecbbc174e919e8030a349897d8b +msgid "Tools used" +msgstr "使用されるツール" + +#: ../../app4.rst:133 eae2a9f6d309449b857fbfa679cb2331 +msgid "" +"In each section, the same fixed set of |PDF| files is being processed by " +"a set of tools. The set of tools used per performance aspect however " +"varies, depending on the supported tool features." +msgstr "" +"各セクションでは、同じ固定されたセットの |PDF| " +"ファイルが一連のツールによって処理されます。ただし、性能の側面ごとに使用されるツールのセットは、サポートされるツールの機能に応じて異なります。" + +#: ../../app4.rst:135 08a0f8900a2447e783f29f4dd30d0418 +msgid "" +"All tools are either platform independent, or at least can run on both, " +":title:`Windows` and :title:`Unix` / :title:`Linux`." +msgstr "" +"すべてのツールは、プラットフォームに依存しないか、少なくとも :title:`Windows` と :title:`Unix` / " +":title:`Linux` の両方で実行できます。" + +#: ../../app4.rst:141 cc45988d1c5e48dea6a914496770536c +msgid "**Tool**" +msgstr "ツール" + +#: ../../app4.rst:142 7085f0539d914435bd0d32e5a4cb95f6 +msgid "**Description**" +msgstr "説明" + +#: ../../app4.rst:143 ../../app4.rst:172 ../../app4.rst:218 ../../app4.rst:296 +#: ../../app4.rst:368 ../../app4.rst:408 3c40ad65813f477ca2de31da49502eed +#: 798aac00deeb417eb59f68e4df844cba 80d4513349494a9faa8aece1ed1361b4 +#: aa4f955d930b450ea437268a7f2cf87a cdc949e0918d4811a4c5752a67c7ee65 +#: ea31fb90cdc74c078ebc2d8c098c488a +msgid "|PyMuPDF|" +msgstr "" + +#: ../../app4.rst:144 efd6c72c23f040599c7f47ec248dccf5 +msgid "The tool of this manual." +msgstr "このマニュアルのツールです。" + +#: ../../app4.rst:145 f67f53847c2d4c378e7b2fb84da3b4a6 +msgid "PDFrw_" +msgstr "" + +#: ../../app4.rst:146 7d5c52a641bd4e44ba33ed6bc0890066 +msgid "" +"A pure :title:`Python` tool, being used by :title:`rst2pdf`, has " +"interface to :title:`ReportLab`." +msgstr "" +":title:`rst2pdf` で使用される純粋な :title:`Python` " +"ツールで、ReportLabとのインターフェースを持っています。" + +#: ../../app4.rst:147 5d1ea91abf13423085cf728fd811c571 +msgid "PyPDF2_" +msgstr "" + +#: ../../app4.rst:148 56cbc1cdc6bf430598c52ff85f2d7884 +msgid "A pure :title:`Python` tool with a large function set." +msgstr "多くの機能を備えた純粋な :title:`Python` ツールです。" + +#: ../../app4.rst:149 3ef4edf361f7465fa759bdf99abb55b4 +msgid "PDFMiner_" +msgstr "" + +#: ../../app4.rst:150 e3c71e821e5a4715818a9b9af2bd9cd0 +msgid "A pure :title:`Python` to extract text and other data from |PDF|." +msgstr "|PDF| からテキストやその他のデータを抽出するための純粋な :title:`Python` ツールです。" + +#: ../../app4.rst:151 cabaa299d5c2474f91c5401f19cb5b85 +msgid "XPDF_" +msgstr "" + +#: ../../app4.rst:152 386ca2bb4cdf40508bc69d8b02f4e6a0 +msgid "A command line utility with multiple functions." +msgstr "複数の機能を備えたコマンドラインユーティリティです。" + +#: ../../app4.rst:153 f947112ae9044c999fa5f5be7d6f1aee +msgid "PikePDF_" +msgstr "" + +#: ../../app4.rst:154 7b8d4ac2f3c940b299dad00d2492eb7c +msgid "" +"A :title:`Python` package similar to :title:`PDFrw`, but based on " +":title:`C++` library :title:`QPDF`." +msgstr "" +":title:`C++` ライブラリ :title:`QPDF` に基づいた :title:`Python` パッケージで、 " +":title:`PDFrw` に類似しています。" + +#: ../../app4.rst:155 4435c7075dd5458bac29fae9a9dfc6ab +msgid "PDF2JPG_" +msgstr "" + +#: ../../app4.rst:156 26a923f0cb1d4874b83a9973a4cf7521 +msgid "" +"A :title:`Python` package specialized on rendering |PDF| pages to " +":title:`JPG` images." +msgstr "|PDF| ページを :title:`JPG` 画像にレンダリングすることに特化した :title:`Python` パッケージです。" + +#: ../../app4.rst:164 ab71856b012d4951a4dd1cfe45e2c8b1 +msgid "Copying / Joining / Merging" +msgstr "コピー / 結合 / マージ" + +#: ../../app4.rst:166 cc77bf393e184b30864374b448d7eca9 +msgid "" +"How fast is a |PDF| file read and its content parsed for further " +"processing? The sheer parsing performance cannot directly be compared, " +"because batch utilities always execute a requested task completely, in " +"one go, front to end. :title:`PDFrw` too, has a *lazy* strategy for " +"parsing, meaning it only parses those parts of a document that are " +"required in any moment." +msgstr "" +"|PDF| " +"ファイルの読み取りおよびそのコンテンツの解析は、どれだけ高速に行われるのでしょうか?純粋な解析の性能を直接比較することはできません。なぜなら、バッチユーティリティは常に要求されたタスクを一度に完全に実行するからです。" +" :title:`PDFrw` も解析の際には *レイジー* な戦略を採用しており、必要な瞬間に必要な部分のみを解析します。" + +#: ../../app4.rst:168 d34888daf2f24ed08d5e5baa7df81caf +msgid "" +"To find an answer to the question, we therefore measure the time to copy " +"a |PDF| file to an output file with each tool, and do nothing else." +msgstr "したがって、この質問に答えるために、各ツールで |PDF| ファイルを出力ファイルにコピーする時間を計測し、それ以外の操作は行いません。" + +#: ../../app4.rst:170 ../../app4.rst:365 39ee5d59922f43f6a4632ac84e87a16c +#: 98012a5b4e2d4381807c7c384ae505ed +msgid "These are the :title:`Python` commands for how each tool is used:" +msgstr "各ツールの使用方法に関する :title:`Python` コマンドは以下の通りです:" + +#: ../../app4.rst:180 098415bfb0c84fd698d9dd8767503706 +msgid ":title:`PDFrw`" +msgstr "" + +#: ../../app4.rst:190 bb3be02428f749d0bb3d7f2a2c02f4e6 +msgid ":title:`PikePDF`" +msgstr "" + +#: ../../app4.rst:198 a7ff02af2cbc46a982f6a95712187623 +msgid ":title:`PyPDF2`" +msgstr "" + +#: ../../app4.rst:210 ../../app4.rst:288 ../../app4.rst:399 +#: 000ff0e8f68b48bf855c210b76e56070 0e9f42a5643a440fae93b73bf99f1986 +#: 2bd5f4833ba3402db744ab7377b6e00c +msgid "**Observations**" +msgstr "**観察結果**" + +#: ../../app4.rst:212 ../../app4.rst:290 ../../app4.rst:401 +#: 31ae5e61007c44c190595a23966bb19d 807225688a854c0280044fff4f169db1 +#: ea572448a1104daaa2cc30a51bd5c530 +msgid "" +"These are our run time findings in **seconds** along with a base rate " +"summary compared to |PyMuPDF|:" +msgstr "以下は、実行時間の結果( **秒単位**)と、 |PyMuPDF| との比較における基本レートの要約です:" + +#: ../../app4.rst:219 087eaa06d5254f66ae93934c8c7f977c +msgid "**PDFrw**" +msgstr "" + +#: ../../app4.rst:220 fe97f76c5ff74774ab450a54bd366ab7 +msgid "**PikePDF**" +msgstr "" + +#: ../../app4.rst:221 ../../app4.rst:298 12504bf36e0e498a8a28bdc8a6642f2a +#: d7098a58a32240789d0013891ec1887d +msgid "**PyPDF2**" +msgstr "" + +#: ../../app4.rst:222 ../../app4.rst:300 ../../app4.rst:411 +#: 0579bf77dff54eafb32b534d82d727d4 172966661215461eb37399d216f46858 +#: e0f9034aa1734071a0c2b5d7940283fa +msgid "adobe.pdf" +msgstr "" + +#: ../../app4.rst:223 33ccfd6a08d84bdaa980894ad37141dd +msgid "1.75" +msgstr "" + +#: ../../app4.rst:224 d5bba5be203247a9b5af5890023bff83 +msgid "5.15" +msgstr "" + +#: ../../app4.rst:225 bb7f98c73d5d4cd38989b8b32ba68f14 +msgid "22.37" +msgstr "" + +#: ../../app4.rst:226 017bd19c7ef44a88b67a56301a916f92 +msgid "374.05" +msgstr "" + +#: ../../app4.rst:227 ../../app4.rst:305 ../../app4.rst:415 +#: 4083c98e040a4fb08a63a15984eddca2 84f4b14e5b0d426e997567b51d864c58 +#: c6ad8c14d5d64caca80a2b8f3381c0f9 +msgid "artifex-website.pdf" +msgstr "" + +#: ../../app4.rst:228 446b60c823134f8cb7a0963b5882f5d2 +msgid "0.26" +msgstr "" + +#: ../../app4.rst:229 ../../app4.rst:243 b094c7cdf2a6446c8a3a643d1cd5f6e7 +#: f771a5135e6b4b969e355ff296c38e5c +msgid "0.38" +msgstr "" + +#: ../../app4.rst:230 558139fa94304bfba8e9455825d290f2 +msgid "1.41" +msgstr "" + +#: ../../app4.rst:231 3fa9a0e4d22642a0a9118b44433c1f84 +msgid "2.81" +msgstr "" + +#: ../../app4.rst:232 ../../app4.rst:310 ../../app4.rst:419 +#: 0b636048d8e5485192740128dde856e2 1c1feb7c9759462191f279d0b351413c +#: c78432807979474ab55c794f1f70844c +msgid "db-systems.pdf" +msgstr "" + +#: ../../app4.rst:233 0f96d2adb69b40f1ae61b3abf24c4996 +msgid "0.15" +msgstr "" + +#: ../../app4.rst:234 6d7a157a40ae4db5aa1e6fec05e121e1 +msgid "0.8" +msgstr "" + +#: ../../app4.rst:235 57df5bda2616424b810b890b91d052f1 +msgid "1.68" +msgstr "" + +#: ../../app4.rst:236 8f52d19199904d0fa372857a413a20fe +msgid "2.46" +msgstr "" + +#: ../../app4.rst:237 ../../app4.rst:315 ../../app4.rst:423 +#: 014fb850a0ba4288b0a98f06d22913df 203bf305798d4baca89428d6acd63ca1 +#: 47c5c20d621042cb937b7d8077b810d5 +msgid "fontforge.pdf" +msgstr "" + +#: ../../app4.rst:238 d5a4d52e01344a58b77fed8bc4aa6cc3 +msgid "0.09" +msgstr "" + +#: ../../app4.rst:239 07ad8cb575224bd0a77c8f73498da2dd +msgid "0.14" +msgstr "" + +#: ../../app4.rst:240 2782db1a31d642eb8ee71aa12d0a07fb +msgid "0.28" +msgstr "" + +#: ../../app4.rst:241 ../../app4.rst:308 9730467f6988422984fbaa469746b258 +#: fa2e4397421748668c97bae4cf7cfecd +msgid "1.1" +msgstr "" + +#: ../../app4.rst:242 ../../app4.rst:320 ../../app4.rst:427 +#: 84964178ddc641d9ad9b042dc0859a4d aa8101aab6b346d2a8b1bcd13e098da1 +#: e6d6998730614fcd8fb1633dbd0acab1 +msgid "pandas.pdf" +msgstr "" + +#: ../../app4.rst:244 cb4fe72df12d4edd984f17c82aef0aa2 +msgid "2.21" +msgstr "" + +#: ../../app4.rst:245 2cb7408ebc3a42cb875be0b5d5f7b44e +msgid "2.73" +msgstr "" + +#: ../../app4.rst:246 671b631294b64a6ba1526e4c16fb3c96 +msgid "70.3" +msgstr "" + +#: ../../app4.rst:247 ../../app4.rst:325 ../../app4.rst:431 +#: 408b3e0d9fe04dc89a0270114047810e 8f397007025a4db6ae4c9d7e8cecbce7 +#: bb8942ae4b494cb9ba13152658b9d55c +msgid "pymupdf.pdf" +msgstr "" + +#: ../../app4.rst:248 17d86881be2843b3b98599f55c03956e +msgid "0.11" +msgstr "" + +#: ../../app4.rst:249 1b5371380cce431b907e18a035c69c16 +msgid "0.56" +msgstr "" + +#: ../../app4.rst:250 ada41a74da3547fe853c66b22b06602c +msgid "0.83" +msgstr "" + +#: ../../app4.rst:251 f8ac8502e0864a779969183a5f2bdcae +msgid "6.05" +msgstr "" + +#: ../../app4.rst:252 ../../app4.rst:330 ../../app4.rst:435 +#: 23f158ffa8d0494c98401845f612953a 5e5934902a7d42a5a59bf81197dd75db +#: feba334429eb4cab84e66c248a94a9d2 +msgid "pythonbook.pdf" +msgstr "" + +#: ../../app4.rst:253 d18ff9da68324e21a5601b7b701ceb98 +msgid "0.19" +msgstr "" + +#: ../../app4.rst:254 5a674cedef4b4b2eae9a1fc8a4d351bf +msgid "1.2" +msgstr "" + +#: ../../app4.rst:255 8180e14731504f1fa81a41548f784f56 +msgid "1.34" +msgstr "" + +#: ../../app4.rst:256 b0c26ff6981f4d9cab541d3f1ed8aa51 +msgid "37.19" +msgstr "" + +#: ../../app4.rst:257 ../../app4.rst:335 ../../app4.rst:439 +#: 5009b04279b94732b74b775e625fe7f0 9bcbd9860e1f41aea886358cf29f58f4 +#: b89f45f5198e4479b7ff10753175bf62 +msgid "sample-50-MB-pdf-file.pdf" +msgstr "" + +#: ../../app4.rst:258 93dbe783461f45249a5769552aa1a29d +msgid "0.12" +msgstr "" + +#: ../../app4.rst:259 5959fcd2a9eb4f6abf11091dbca739cf +msgid "0.1" +msgstr "" + +#: ../../app4.rst:260 ffe5c7ee78244a7ab937b9e4f69b048b +msgid "2.93" +msgstr "" + +#: ../../app4.rst:261 4a2af1db49284e5e9a51d346bf69e7a5 +msgid "0.08" +msgstr "" + +#: ../../app4.rst:262 ../../app4.rst:340 ../../app4.rst:443 +#: 0d75e663cff74ea7bbe177733e9bf035 80bd2b30f2884e87afda2a1c1a838ced +#: fa3773c82e14451f82f2da1a00a73021 +msgid "**Total**" +msgstr "**合計** " + +#: ../../app4.rst:263 a72960766ecb4cadb4dd01b0c0673c5b +msgid "**3.05**" +msgstr "" + +#: ../../app4.rst:264 d801960d9b034bddac657801dba14227 +msgid "**10.54**" +msgstr "" + +#: ../../app4.rst:265 a1fda8841ebe42c3ab1952d319a16253 +msgid "**33.57**" +msgstr "" + +#: ../../app4.rst:266 b48e9c691850419c8f12c4279a11abea +msgid "**494.04**" +msgstr "" + +#: ../../app4.rst:272 ../../app4.rst:350 ../../app4.rst:451 +#: 0b46b231245b4f61a0d1767d82afc825 ab7fcc058b4a4068abbb243821783349 +#: fac28cdc461b45e3a48b2c1656519c5f +msgid "**Rate compared to PyMuPDF**" +msgstr "**PyMuPDFに対する比率** " + +#: ../../app4.rst:273 ../../app4.rst:351 ../../app4.rst:452 +#: 0a28b89f605a4780b1783dd5f2924e73 ef4c56771d1e4b5e8f6f23cfa41c9d2d +#: f380c4b084454cef8f5b7ac7b43dc7a9 +msgid ":green-color:`1.0`" +msgstr "" + +#: ../../app4.rst:274 2593873cdfa64fbd9d0bbb573c8150aa +msgid ":orange-color:`3.5`" +msgstr "" + +#: ../../app4.rst:275 cdab03a80afe4d4dad11da5acdd86300 +msgid ":orange-color:`11.0`" +msgstr "" + +#: ../../app4.rst:276 1dce58c46b2443a0a84790ccf0ad2467 +msgid ":red-color:`162`" +msgstr "" + +#: ../../app4.rst:283 1c4d51591dae4ca394f9b073c9091d5f +msgid "Text Extraction" +msgstr "テキスト抽出" + +#: ../../app4.rst:285 334c3aa502504d3b898bbad1e7821cb4 +msgid "" +"The following table shows plain text extraction durations. All tools have" +" been used with their most basic functionality - i.e. no layout re-" +"arrangements, etc." +msgstr "" +"以下の表は、プレーンテキストの抽出にかかる時間を示しています。すべてのツールは、基本的な機能のみを使用しています - " +"レイアウトの再配置などはありません。" + +#: ../../app4.rst:297 ../../app4.rst:409 295f0feafbe34e729724d6cf4b3fc701 +#: e4108f33a0fa482dbc8a47e11a38f042 +msgid "**XPDF**" +msgstr "" + +#: ../../app4.rst:299 e6077890cead4338a8cf65b0d0b8e2b0 +msgid "**PDFMiner**" +msgstr "" + +#: ../../app4.rst:301 e41e7a1dc96e4aecb746deda8f9ee02e +msgid "2.01" +msgstr "" + +#: ../../app4.rst:302 7e91cb2e541f4112bea1a21ef0a0aec5 +msgid "6.19" +msgstr "" + +#: ../../app4.rst:303 e69b13a658b54cecaabb6dcbbdf84ebe +msgid "22.2" +msgstr "" + +#: ../../app4.rst:304 ef36308cc32944ca857d2e9cae5241a4 +msgid "49.15" +msgstr "" + +#: ../../app4.rst:306 8427d1895767420b8db08fac9011f8c8 +msgid "0.18" +msgstr "" + +#: ../../app4.rst:307 b8317699623a427382ebaee1ad5471f2 +msgid "0.3" +msgstr "" + +#: ../../app4.rst:309 0e798465fa7f4c3199db25b7a90ed51f +msgid "4.06" +msgstr "" + +#: ../../app4.rst:311 edecce9ed3144f17af063e167071243a +msgid "1.57" +msgstr "" + +#: ../../app4.rst:312 2d95636039ea426f9af05c8dfa8dad01 +msgid "4.26" +msgstr "" + +#: ../../app4.rst:313 16f29fb00e814fd5887126e2daaa2812 +msgid "25.75" +msgstr "" + +#: ../../app4.rst:314 70feafb46d0d4f50a3851a1eab533e12 +msgid "42.19" +msgstr "" + +#: ../../app4.rst:316 4d97900eec554c03b3388b7c098658d8 +msgid "0.24" +msgstr "" + +#: ../../app4.rst:317 1d74bd9d01854bad89885673bd068cfc +msgid "0.47" +msgstr "" + +#: ../../app4.rst:318 b479325345d643709a60c3c14c691d25 +msgid "2.69" +msgstr "" + +#: ../../app4.rst:319 ecd2657831b5452891c8a9d1fbf8ab16 +msgid "4.2" +msgstr "" + +#: ../../app4.rst:321 cbaf2aceaa4a4993b9d800eb26779434 +msgid "2.41" +msgstr "" + +#: ../../app4.rst:322 eea9e2a95efd4c1dafdc9ecff46be723 +msgid "10.54" +msgstr "" + +#: ../../app4.rst:323 ed5fdd8b39db431698e4a400489da733 +msgid "25.38" +msgstr "" + +#: ../../app4.rst:324 2dfb2ea581c848e99b5c1807d5055b53 +msgid "76.56" +msgstr "" + +#: ../../app4.rst:326 9a3ad03cca2148deafd533a76ce0d720 +msgid "0.49" +msgstr "" + +#: ../../app4.rst:327 b35253abb7ce482abf42d1de8d8125a2 +msgid "2.34" +msgstr "" + +#: ../../app4.rst:328 80f91c94a7ca41f58445fc0257332c35 +msgid "6.44" +msgstr "" + +#: ../../app4.rst:329 d86f8d1eea1c4b82ac1f3cd16673122d +msgid "13.55" +msgstr "" + +#: ../../app4.rst:331 55eab9a8dfdc47029124d6d18d78344d +msgid "0.84" +msgstr "" + +#: ../../app4.rst:332 4f1b94156e834ae5a9ec9d4b34757ccf +msgid "2.88" +msgstr "" + +#: ../../app4.rst:333 96cd7243be2e4820958bdeeb2dd2508a +msgid "9.28" +msgstr "" + +#: ../../app4.rst:334 c0c4f35e622143f0a33db035fd36fd10 +msgid "24.27" +msgstr "" + +#: ../../app4.rst:336 e3984686022e4f68931cdc516f2d8aa5 +msgid "0.27" +msgstr "" + +#: ../../app4.rst:337 daebf37f3def48e4a97906466d04eddb +msgid "0.44" +msgstr "" + +#: ../../app4.rst:338 6672b13d4e13495cabfea568b0bf8f0c +msgid "8.8" +msgstr "" + +#: ../../app4.rst:339 d973f08abf1448c2b1600095618c8455 +msgid "13.29" +msgstr "" + +#: ../../app4.rst:341 4bef96a664b64a38ad41dfccc95f8c4a +msgid "**8.01**" +msgstr "" + +#: ../../app4.rst:342 f06c580572554b3497daa397db37622b +msgid "**27.42**" +msgstr "" + +#: ../../app4.rst:343 befe94d6c0fe4af58b97aa9e5229d296 +msgid "**101.64**" +msgstr "" + +#: ../../app4.rst:344 41585c7e46094055ab1ba5e8ff0f41eb +msgid "**227.27**" +msgstr "" + +#: ../../app4.rst:352 c0f856a911ad45f08009409761dfd331 +msgid ":orange-color:`3.42`" +msgstr "" + +#: ../../app4.rst:353 c17aad6698c14585aa7d33aea9a24e65 +msgid ":orange-color:`12.69`" +msgstr "" + +#: ../../app4.rst:354 02e40dd6a6304c36bdff04c962495c02 +msgid ":red-color:`28.37`" +msgstr "" + +#: ../../app4.rst:360 9e9d8efb70054aa68772026b10852763 +msgid "Page Rendering" +msgstr "ページのレンダリング" + +#: ../../app4.rst:362 61af2c85f2f646fe9110aa48420274d6 +msgid "" +"We have tested rendering speed of |PyMuPDF| against :title:`pdf2jpg` and " +":title:`XPDF` at a resolution of 150 DPI," +msgstr "" +"私たちは、解像度150 DPIで |PyMuPDF| のレンダリング速度を :title:`pdf2jpg` と :title:`XPDF` " +"と比較しました。" + +#: ../../app4.rst:382 95c8b368baa24d13ac96dcdea58459e3 +msgid ":title:`XPDF`" +msgstr "" + +#: ../../app4.rst:389 dccffc19be6c4fde8b7ff341c14f8476 +msgid ":title:`PDF2JPG`" +msgstr "" + +#: ../../app4.rst:410 9ef3379555224426b70f7ce9d999824e +msgid "**PDF2JPG**" +msgstr "" + +#: ../../app4.rst:412 d6f81b8351394333877861849ea2eca9 +msgid "51.33" +msgstr "" + +#: ../../app4.rst:413 8faab5bff0384035a7634264e2035744 +msgid "98.16" +msgstr "" + +#: ../../app4.rst:414 1b6d590a314d48cb9512951b0d2aca92 +msgid "75.71" +msgstr "" + +#: ../../app4.rst:416 7ed949dea1fc49fba14c7290f5f0410a +msgid "26.35" +msgstr "" + +#: ../../app4.rst:417 16f978c1e7584cf589151174cbd3e0c6 +msgid "51.28" +msgstr "" + +#: ../../app4.rst:418 97e3a62c6e9147379314df4e4b8c0752 +msgid "54.11" +msgstr "" + +#: ../../app4.rst:420 9796145e23124e0d94d9fd4bc453c764 +msgid "84.59" +msgstr "" + +#: ../../app4.rst:421 80fad20e9b2840e5890ffdda456c5cd4 +msgid "143.16" +msgstr "" + +#: ../../app4.rst:422 b13a7b03f5cf4e6181341a9d8943657f +msgid "405.22" +msgstr "" + +#: ../../app4.rst:424 b0865450a8b54d689e7c4c18b70dba48 +msgid "12.23" +msgstr "" + +#: ../../app4.rst:425 0e3ee45be04d49df904ad0b9b84cc59e +msgid "22.18" +msgstr "" + +#: ../../app4.rst:426 fb92cb655ccc4ef2b616e48f86f03592 +msgid "20.14" +msgstr "" + +#: ../../app4.rst:428 76327bf519c9498dadb7762d2d613b85 +msgid "138.74" +msgstr "" + +#: ../../app4.rst:429 87eac25439c1466ba9f8037334de82c1 +msgid "241.67" +msgstr "" + +#: ../../app4.rst:430 921ffdd160e1440bac4018a5913dc5a0 +msgid "202.06" +msgstr "" + +#: ../../app4.rst:432 7f7143de4eb24a3a99c917ad8ed3f872 +msgid "22.35" +msgstr "" + +#: ../../app4.rst:433 4f2027532127445691955867748ad040 +msgid "39.11" +msgstr "" + +#: ../../app4.rst:434 487d6c2a3364452a8bf0bc9cfcc7cfe8 +msgid "33.38" +msgstr "" + +#: ../../app4.rst:436 1e241237259441cab94fca061a9cd505 +msgid "30.44" +msgstr "" + +#: ../../app4.rst:437 c71f5db7561c4511bf550bbdb402e815 +msgid "49.12" +msgstr "" + +#: ../../app4.rst:438 c8143a7a48224a7f8ea8912623bd116c +msgid "55.68" +msgstr "" + +#: ../../app4.rst:440 bfd54d2725ca41f58c2ed566e663fcd8 +msgid "1.01" +msgstr "" + +#: ../../app4.rst:441 5db9427489fd453c82a8e02a9ccdc3e9 +msgid "1.32" +msgstr "" + +#: ../../app4.rst:442 4fe3a8898e4a404d8e0b776f3258d834 +msgid "5.22" +msgstr "" + +#: ../../app4.rst:444 80cbb4bda94a413bb2e12bd12b290317 +msgid "**367.04**" +msgstr "" + +#: ../../app4.rst:445 25f73ae8ec0b4d19b88ff761311afa17 +msgid "**646**" +msgstr "" + +#: ../../app4.rst:446 2b18d417b7504b0ca812fb6dc4d81e3f +msgid "**851.52**" +msgstr "" + +#: ../../app4.rst:453 2529c3f932184badb6e059b2d241020c +msgid ":orange-color:`1.76`" +msgstr "" + +#: ../../app4.rst:454 f918a4ed3d7f4d64be43baa58c5aeaa3 +msgid ":red-color:`2.32`" +msgstr "" + +#: ../../footer.rst:60 c341e8f540ee456ab7d1314671ca2dd5 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/archive-class.mo b/docs/locales/ja/LC_MESSAGES/archive-class.mo new file mode 100644 index 000000000..5aef0ec15 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/archive-class.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/archive-class.po b/docs/locales/ja/LC_MESSAGES/archive-class.po new file mode 100644 index 000000000..543ee432a --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/archive-class.po @@ -0,0 +1,296 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 d617abc2bac44b79b8895c5a477d435e +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 11676cd8a070416581b0f70844041acd +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 e1542b64861144078ef1fec53974ec47 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../archive-class.rst:7 2f0bf46e9ac54fd68b23bc9210c74150 +msgid "Archive" +msgstr "Archive (アーカイブ)" + +#: ../../archive-class.rst:9 46f4ce49429542a1b693c99eadca160d +msgid "New in v1.21.0" +msgstr "v1.21.0での新機能" + +#: ../../archive-class.rst:11 90ae46c1e95142ec8216ccb598850553 +msgid "" +"This class represents a generalization of file folders and container " +"files like ZIP and TAR archives. Archives allow accessing arbitrary " +"collections of file folders, ZIP / TAR files and single binary data " +"elements as if they all were part of one hierarchical tree of folders." +msgstr "このクラスは、ファイルフォルダーやZIPやTARアーカイブのようなコンテナファイルの一般化を表します。アーカイブは、ファイルフォルダー、ZIP/TARファイル、および単一のバイナリデータ要素の任意のコレクションにアクセスできるようにし、すべてが1つの階層的なフォルダーツリーの一部であるかのようにします。" + +#: ../../archive-class.rst:13 3fd435f3261d48d4a0a727f172c9ddb6 +msgid "" +"In PyMuPDF, archives are currently only used by :ref:`Story` objects to " +"specify where to look for fonts, images and other resources." +msgstr "" +"PyMuPDFでは、アーカイブは現在、フォント、画像、およびその他のリソースを検索する場所を指定するために :ref:`Story` " +"オブジェクトによってのみ使用されています。" + +#: ../../archive-class.rst:16 1829b6e552a94dc5a3c61784652f1d73 +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../archive-class.rst:16 2bf68b939c1d44f7be1ca6d449c27e53 +msgid "**Short Description**" +msgstr "**簡単な説明**" + +#: ../../archive-class.rst:18 5b94b5437aa84ff9a245301a956f8d18 +msgid ":meth:`Archive.add`" +msgstr "" + +#: ../../archive-class.rst:18 8c9d95f042814c25a324f3ce9ec79170 +msgid "add new data to the archive" +msgstr "アーカイブに新しいデータを追加する" + +#: ../../archive-class.rst:19 d5b520ec1d214317ac63ec60b509eb43 +msgid ":meth:`Archive.has_entry`" +msgstr "" + +#: ../../archive-class.rst:19 054f56f20f6d4fa8a033a1dd80bd3cb6 +msgid "check if given name is a member" +msgstr "名前で指定されたデータを読む" + +#: ../../archive-class.rst:20 dc2f8062523044d19ec994b1828e3195 +msgid ":meth:`Archive.read_entry`" +msgstr "" + +#: ../../archive-class.rst:20 e680d9fb1dd44a7f85a61af9853e4e6d +msgid "read the data given by the name" +msgstr "名前で指定されたデータを読む" + +#: ../../archive-class.rst:21 e41ca685715b4dcbb45e53b209029663 +msgid ":attr:`Archive.entry_list`" +msgstr "" + +#: ../../archive-class.rst:21 f31453b71aeb4ade88edbebb982ff071 +msgid "list[dict] of archive items" +msgstr "アーカイブアイテムのリスト[辞書]" + +#: ../../archive-class.rst:24 98357f302ae84859bd401682492995fa +msgid "**Class API**" +msgstr "クラス API" + +#: ../../archive-class.rst:30 422de49b171f4d18afbb8516afbc9095 +msgid "Creates a new archive. Without parameters, an empty archive is created." +msgstr "新しいアーカイブを作成します。パラメーターが指定されない場合、空のアーカイブが作成されます。" + +#: ../../archive-class.rst:32 ae5782f0b4a54bc3be2c009daab1a7ca +msgid "If provided, `content` may be one of the following:" +msgstr "提供される場合、`content` は次のいずれかであることができます:" + +#: ../../archive-class.rst:34 fbf461b873ef4da5b1fda4e484d46cbf +msgid "another Archive: the archive is being made a sub-archive of the new one." +msgstr "別の Archive: アーカイブは新しいアーカイブのサブアーカイブになります。" + +#: ../../archive-class.rst:36 6ac58a1199014e0a932f80579a01fff4 +msgid "" +"a string: this must be the name of a local folder or file. `pathlib.Path`" +" objects are also supported." +msgstr "文字列: これはローカルフォルダまたはファイルの名前である必要があります。`pathlib.Path` オブジェクトもサポートされています。" + +#: ../../archive-class.rst:38 63b7f15541554ca28bb94dacd2e33b99 +msgid "" +"A **folder** will be converted to a sub-archive, so its files (and any " +"sub-folders) can be accessed by their names." +msgstr "**フォルダ** はサブアーカイブに変換され、そのファイル(およびサブフォルダ)は名前でアクセスできます。" + +#: ../../archive-class.rst:39 547200864ba043b295790af8b0b2a6cf +msgid "" +"A **file** will be read with mode `\"rb\"` and these binary data (a " +"`bytes` object) be treated as a single-member sub-archive. In this case, " +"the `path` parameter is **mandatory** and should be the member name under" +" which this item can be found / retrieved." +msgstr "" +"**ファイル** はモード `\"rb\"` で読み取られ、これらのバイナリデータ( `bytes` " +"オブジェクト)は単一のメンバーサブアーカイブとして扱われます。この場合、`path` パラメーターは **必須** " +"で、このアイテムが見つかる/取得できるメンバー名である必要があります。" + +#: ../../archive-class.rst:41 26fc387cc92a49b18be5fbb6e276303a +msgid "" +"a `zipfile.ZipFile` or `tarfile.TarFile` object: Will be added as a sub-" +"archive." +msgstr "`zipfile.ZipFile` または `tarfile.TarFile` オブジェクト: サブアーカイブとして追加されます。" + +#: ../../archive-class.rst:43 a95ece3a221d4379bbdf468f3d625687 +msgid "" +"a Python binary object (`bytes`, `bytearray`, `io.BytesIO`): this will " +"add a single-member sub-archive. In this case, the `path` parameter is " +"**mandatory** and should be the member name under which this item can be " +"found / retrieved." +msgstr "" +"Python バイナリオブジェクト( `bytes`、`bytearray` 、`io.BytesIO` ): " +"これは単一のメンバーサブアーカイブを追加します。この場合、`path` パラメーターは **必須** " +"で、このアイテムが見つかる/取得できるメンバー名である必要があります。" + +#: ../../archive-class.rst:45 6cefb5b8a5ae4538b432a671e5624634 + +msgid "" +"a tuple `(data, name)`: This will add a single-member sub-archive with " +"the member name ``name``. ``data`` may be a Python binary object or a " +"local file name (in which case its binary file content is used). Use this" +" format if you need to specify `path`." +msgstr "" +"タプル `(data, name)` : これはメンバー名 ``name`` を持つ単一のメンバーサブアーカイブを追加します。 ``data`` " +"はPythonバイナリオブジェクトまたはローカルファイル名である可能性があります(その場合、バイナリファイルのコンテンツが使用されます)。`path`" +" を指定する必要がある場合は、このフォーマットを使用してください。" + +#: ../../archive-class.rst:47 8916743a3d2e482e8fac00263e7c6839 +msgid "" +"a Python sequence: This is a convenience format to specify any " +"combination of the above." +msgstr "Pythonシーケンス: これは上記のいずれかの組み合わせを指定するための便益フォーマットです。" + +#: ../../archive-class.rst:49 ea2b6c8e209e48e7a488431fc4d5dc5b +msgid "If provided, `path` must be a string." +msgstr "提供される場合、`path` は文字列である必要があります。" + +#: ../../archive-class.rst:51 ce6b5084838e47998c1c0930a1c90a85 +msgid "" +"If `content` is either binary data or a file name, this parameter is " +"mandatory and must be the name under which the data can be found." +msgstr "`content` がバイナリデータまたはファイル名の場合、このパラメーターは必須で、データが見つかる名前である必要があります。" + +#: ../../archive-class.rst:53 bfdfb9696f9d4267b9a0abcd4944814d +msgid "" +"Otherwise this parameter is optional. It can be used to simulate a folder" +" name or a mount point, under which this sub-archive's elements can be " +"found. For example this specification `Archive((data, \"name\"), " +"\"path\")` means that `data` will be found using the element name " +"`\"path/name\"`. Similar is true for other sub-archives: to retrieve " +"members of a ZIP sub-archive, their names must be prefixed with " +"`\"path/\"`. The main purpose of this parameter probably is to " +"differentiate between duplicate names." +msgstr "" +"それ以外の場合、このパラメーターはオプションです。これは、このサブアーカイブの要素が見つかるマウントポイントまたはフォルダ名をシミュレートするために使用できます。たとえば、この仕様" +" `Archive((data, \"name\"), \"path\")` は、データが要素名 \"`\"path/name\"` " +"で見つかります。他のサブアーカイブについても同様です:ZIPサブアーカイブのメンバーを取得するには、その名前に `\"path/\"` " +"を接頭辞として追加する必要があります。このパラメーターの主な目的は、重複する名前を区別することである可能性があります。" + +#: ../../archive-class.rst:55 61677063f42641209e56a228534c33f2 +msgid "" +"If duplicate entry names exist in the archive, always the last entry with" +" that name will be found / retrieved. During archive creation, or " +"appending more data to an archive (see :meth:`Archive.add`) no check for " +"duplicates will be made. Use the `path` parameter to prevent this from " +"happening." +msgstr "" +"アーカイブ内に重複するエントリ名が存在する場合、常にその名前の最後のエントリが見つかり/取得されます。アーカイブの作成中、またはアーカイブにさらにデータを追加する際" +" :meth:`Archive.add` を参照)、重複をチェックしません。この問題を防ぐために path パラメーターを使用してください。" + +#: ../../archive-class.rst:59 dc6f307620b443e59c60965c461c7642 +msgid "" +"Append a sub-archive. The meaning of the parameters are exactly the same " +"as explained above. Of course, parameter `content` is not optional here." +msgstr "サブアーカイブを追加します。パラメータの意味は上記とまったく同じです。もちろん、ここではパラメータ `content` はオプションではありません。" + +#: ../../archive-class.rst:63 e1ad4f204f754654a6584c25842135fb +msgid "Checks whether an entry exists in any of the sub-archives." +msgstr "エントリがサブアーカイブのいずれかに存在するかどうかを確認します。" + +#: ../../archive-class.rst 6748d18e7f6946d6b1497df192130d43 +#: feda328ffa8445dea3f5c2af2f98fdad +msgid "Parameters" +msgstr "パラメータ:" + +#: ../../archive-class.rst:65 ../../archive-class.rst:73 +#: 0458d0fa598349cf8a25c5752c8f840b 1806b51351774f19944fb27526851e14 +msgid "" +"The fully qualified name of the entry. So must include any `path` prefix " +"under which the entry's sub-archive has been added." +msgstr "エントリの完全修飾名。エントリのサブアーカイブが追加されたパスのプレフィックスを含む必要があります。" + +#: ../../archive-class.rst 0fb6926638df4e5d88376ea0eea6219d +#: b0c94ca65df04318aad6ff6b1d22633e +msgid "Returns" +msgstr "戻り値:" + +#: ../../archive-class.rst:67 484ce7feed2141d4b486ae9b49c1c29e +msgid "`True` or `False`." +msgstr "`True` または `False` 。" + +#: ../../archive-class.rst:71 affe45e0783c46f782832cb77fb3eb49 +msgid "Retrieve the data of an entry." +msgstr "エントリのデータを取得します。" + +#: ../../archive-class.rst:75 dc88278b5cee4df6a0867e93b0e234f2 +msgid "" +"The binary data (`bytes`) of the entry. If not found, an exception is " +"raised." +msgstr "エントリのバイナリデータ( `bytes` )です。見つからない場合は例外が発生します。" + +#: ../../archive-class.rst:79 3d32676328d34e37939a65792096baf5 +msgid "" +"A list of the archive's sub-archives. Each list item is a dictionary with" +" the following keys:" +msgstr "アーカイブのサブアーカイブのリストです。各リストアイテムは、次のキーを持つ辞書です:" + +#: ../../archive-class.rst:81 f4a44efa6e934e2ab4d5b59c1abb3212 +msgid "`entries` -- a list of (top-level) entry names in this sub-archive." +msgstr "`entries` - このサブアーカイブ内の(トップレベルの)エントリ名のリスト。" + +#: ../../archive-class.rst:82 5aefdd53977e4a2aa8c559d7bac700f6 +msgid "" +"`fmt` -- the format of the sub-archive. This is one of the strings " +"\"dir\" (file folder), \"zip\" (ZIP archive), \"tar\" (TAR archive), or " +"\"tree\" for single binary entries or file content." +msgstr "" +"`fmt` - サブアーカイブの形式。これは文字列 " +"\"dir\"(ファイルフォルダ)、\"zip\"(ZIPアーカイブ)、\"tar\"(TARアーカイブ)、または単一のバイナリエントリまたはファイルコンテンツの場合は" +" \"tree\" のいずれかです。" + +#: ../../archive-class.rst:83 ef04652561cc41bca0c8d60f067bb118 +msgid "" +"`path` -- the value of the `path` parameter under which this sub-archive " +"was added." +msgstr "`path` - このサブアーカイブが追加されたパラメータの値です。" + +#: ../../archive-class.rst:85 ddd26ce7aa1e42c09e3a7daa19769a5a +msgid "**Example:**" +msgstr "**例:** " + +#: ../../footer.rst:60 9a53a999980d4a2da538998c8bb29234 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/changes.mo b/docs/locales/ja/LC_MESSAGES/changes.mo new file mode 100644 index 000000000..42dda8e96 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/changes.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/changes.po b/docs/locales/ja/LC_MESSAGES/changes.po new file mode 100644 index 000000000..14487956a --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/changes.po @@ -0,0 +1,8389 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 e6ef528c5fdf45fea6c7bb8dc84b7a73 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 fa3067ffc93b4ec490d385c7c51db147 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 65b8350063cd4becbb6318725958b4a2 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../../changes.txt:2 6d40d72e343d4d098b4a6606bb398d57 +msgid "Change Log" +msgstr "" + +#: ../../../changes.txt:5 1f939ea110594a84b498a674a00e8ef9 +msgid "**Changes in version 1.25.5 (2025-03-31)**" +msgstr "" + +#: ../../../changes.txt:7 ../../../changes.txt:26 ../../../changes.txt:46 +#: ../../../changes.txt:69 ../../../changes.txt:92 ../../../changes.txt:102 +#: ../../../changes.txt:117 ../../../changes.txt:137 ../../../changes.txt:147 +#: ../../../changes.txt:164 ../../../changes.txt:190 ../../../changes.txt:223 +#: ../../../changes.txt:239 ../../../changes.txt:249 ../../../changes.txt:281 +#: ../../../changes.txt:318 ../../../changes.txt:345 ../../../changes.txt:367 +#: ../../../changes.txt:384 ../../../changes.txt:427 ../../../changes.txt:448 +#: ../../../changes.txt:462 ../../../changes.txt:475 ../../../changes.txt:501 +#: ../../../changes.txt:514 0d7ab8cafe384b21a13e9fa4e94fa25e +#: 22e0ba5e60b24940b98a79eb72236dc6 24b601119509480e85e64d5086770935 +#: 3eb1033ee4a748dc995302a89d3428ea 4a4d816e7f774fd6b4391e300089a64a +#: 54335ad1c1a347fb987701adba326e16 6f873b93c9b84cbbb35a8250ad726318 +#: 73562614ba9c4136a6f770b401f4a0a5 844916a474fe4777953bba7042c5c62e +#: 871494ee32db43f1a660fb2befb0faa3 87c42d84426b4b9d8b2e3e78b4015f20 +#: 8bc6fd3cd5d54d6a9b7432052d2e8ea1 90f7c06af142401485d2f81351f22a0e +#: 9b32e1eaaa574591be694ef7cdf4c9f3 b5507a0845b24c7dbd900eb8bcf3e28e +#: b61fa9157fd74900880c7ed69cef5779 bdd6dc046ff04ed59321e524d6f5bc8f +#: be3dbe1555564086a67190c4805b6b15 c5d00869cb3a4b0eb5529257897bc166 +#: c6bcf8cffbaf41e2bcfb5158822a6133 ca5934716f614b169002912c2851bef0 +#: ce828cf6c0b84569836352afcadd244e e5441c44488943f9916be452eecab44e +#: ec6cd4cb63fd47059d2ebb14184acbd7 f278fe4ecee043e5845ebfda88e9ed17 +msgid "Fixed issues:" +msgstr "" + +#: ../../../changes.txt:9 6c676547ecbd4885ac4f07306a041c75 +msgid "" +"**Fixed** `4372 `_: Text " +"insertion fails due to missing /Resources object" +msgstr "" + +#: ../../../changes.txt:10 233a37e829224f618adad63d88800d90 +msgid "" +"**Fixed** `4400 `_: " +"Infinite loop in fill_textbox" +msgstr "" + +#: ../../../changes.txt:11 8f504d94ef6e4fc6adbc0beff80e915a +msgid "" +"**Fixed** `4403 `_: " +"Unable to get_text() - layer/clip nesting too deep" +msgstr "" + +#: ../../../changes.txt:12 530b0f7079ee4851b59003ede7babd33 +msgid "" +"**Fixed** `4415 `_: PDF " +"page is mirrored, origin is at bottom-left" +msgstr "" + +#: ../../../changes.txt:14 ../../../changes.txt:35 ../../../changes.txt:55 +#: ../../../changes.txt:79 ../../../changes.txt:128 ../../../changes.txt:178 +#: ../../../changes.txt:209 ../../../changes.txt:230 ../../../changes.txt:266 +#: ../../../changes.txt:286 ../../../changes.txt:300 ../../../changes.txt:328 +#: ../../../changes.txt:350 ../../../changes.txt:373 ../../../changes.txt:399 +#: ../../../changes.txt:432 ../../../changes.txt:454 ../../../changes.txt:468 +#: ../../../changes.txt:485 ../../../changes.txt:507 ../../../changes.txt:516 +#: ../../../changes.txt:529 ../../../changes.txt:541 ../../../changes.txt:558 +#: ../../../changes.txt:571 ../../../changes.txt:584 ../../../changes.txt:597 +#: ../../../changes.txt:609 ../../../changes.txt:621 ../../../changes.txt:644 +#: ../../../changes.txt:676 ../../../changes.txt:702 ../../../changes.txt:742 +#: ../../../changes.txt:769 ../../../changes.txt:791 ../../../changes.txt:1043 +#: ../../../changes.txt:1177 022b6bfa6afb493cbf48260d9587ef96 +#: 0686f711812848d792aac1363acffadd 1320d7afdd2f4419b2a541a5032c7019 +#: 1e86d34c248d4a459adb4f4bcde3cf88 21907423a81745e08b59769313e1b134 +#: 2de90b23ccfa49398e439c6df69ccdf0 2f2b4a73362249b097a23bbea1c9e692 +#: 3825f4e0159e47049c342df98533b1f7 38a7babda0e1488cb4d62919735a1089 +#: 410661ce41c247e99b61ff558d4087db 440026bb912d40eca64ba81b279fa8de +#: 4505918a333e43a59cf4f2c12b2c1df0 4cb975e037494000a70d5e23e9642828 +#: 5e4b4e3e0687431a8d6f844e2ab7a1fc 5ed077d0f32743e7a73f2d39158998df +#: 639102a5dfea4fe7b81479fbaf9ba12e 653df2dc9c604eda9969fcdcfafbe289 +#: 76f2a0c606ce446181f504edbe5332f5 78ddcaab7357478691e827a12330fe01 +#: 7d7db1e8ab8e488ea73621d71a975688 7ff57395a6fa4591bbabdec956c6f742 +#: 81f75b2052e1487ea21a8a811ccb3275 9bd7098628404d3d9071c6223e0ea81b +#: af2a2168396b46fbb7bb533450a15609 bc33b4ea237e4ee897dfa9dfcafa449e +#: c13270d0f6ce4852b9aa500219eb7630 c1404ec3199e4ea192eb052c92b03495 +#: c3864e0acf1d4b5ea2d2b0344ee17e2a c3ceabd966f343469282be9ea1058a55 +#: c589e958bed544369dc0cf396e983ab5 c66a98e3ba5f4e44a100c1f020d68f07 +#: e1bad465c75f4ccebd3299b3cc9b7e44 e5f1e56358304f56a04354365ab60793 +#: e687ffd8f9d94081aec0299f6cb9e94c e7f33c9c6dd7438686123488c4852680 +#: ee02ea89392a4ad18c6b5079bf2f028a fd720a2cef374f37a052f6addee367c9 +msgid "Other:" +msgstr "" + +#: ../../../changes.txt:16 92abd73bd337417a94bbeacc151f90c4 +msgid "Use MuPDF-1.25.6." +msgstr "" + +#: ../../../changes.txt:17 098902c27c924f22b0da5268a65944ce +msgid "Fixed MuPDF SEGV on MacOS with particular fonts." +msgstr "" + +#: ../../../changes.txt:18 c8b2c415a5394403986202b6202f8128 +msgid "Fixed `Annot.get_textpage()`'s `clip` arg." +msgstr "" + +#: ../../../changes.txt:19 a89acf34e5f5400597c796f531beb4ec +msgid "Fixed Python-3.14 (pre-release) build error." +msgstr "" + +#: ../../../changes.txt:22 6a71249e0cd44311bc8ee2a5c1280cf3 +msgid "**Changes in version 1.25.4 (2025-03-14)**" +msgstr "" + +#: ../../../changes.txt:24 510cb2fe09864b5d8e031e145c6bf4dc +msgid "Use MuPDF-1.25.5." +msgstr "" + +#: ../../../changes.txt:28 f86cdbdb5f5e4d31a44799e36a0f8362 +msgid "" +"**Fixed** `4079 `_: " +"Unexpected result for apply_redactions()" +msgstr "" + +#: ../../../changes.txt:29 a6f9fbcdc8c84431b4595594cedff2cd +msgid "" +"**Fixed** `4224 `_: MuPDF" +" error: format error: negative code in 1d faxd" +msgstr "" + +#: ../../../changes.txt:30 1b0923c703684308a15af3d37ac4bfe7 +msgid "" +"**Fixed** `4303 `_: " +"page.get_image_info() returns outdated cached results after replacing " +"image" +msgstr "" + +#: ../../../changes.txt:31 4505fdd4497b4e8aa586994665507661 +msgid "" +"**Fixed** `4309 `_: " +"FzErrorFormat Error When Deleting First Page" +msgstr "" + +#: ../../../changes.txt:32 321213d8c9fe43e28107a432957d7f91 +msgid "" +"**Fixed** `4336 `_: Major" +" Performance Regression: pix.color_count is 150x slower in version 1.25.3" +" compared to 1.23.8" +msgstr "" + +#: ../../../changes.txt:33 7245eb8a03214b09bd3fc8fc27f88d96 +msgid "" +"**Fixed** `4341 `_: " +"Invalid label retrieval when /Kids is an array of multiple /Nums" +msgstr "" + +#: ../../../changes.txt:37 22b0e9deda174e80abda0d228909524c +msgid "Fixed handling of duplicate widget names when joining PDFs (PR #4347)." +msgstr "" + +#: ../../../changes.txt:38 3dad4548088c4a59af94fb80501679d5 +msgid "Improved Pyodide build." +msgstr "" + +#: ../../../changes.txt:39 cbb70c3a10364399b4b23d5af63a2824 +msgid "" +"Avoid SWIG-related build errors with Python-3.13 by disabling " +"PY_LIMITED_API." +msgstr "" + +#: ../../../changes.txt:42 79fec98c80a3475faef419d32b8e7aac +msgid "**Changes in version 1.25.3 (2025-02-06)**" +msgstr "" + +#: ../../../changes.txt:44 3906d5e93f9c420ea6362c8745d97a7a +msgid "Use MuPDF-1.25.4." +msgstr "" + +#: ../../../changes.txt:48 fd3dc6e30f1247caac716513dcf14fb5 +msgid "" +"**Fixed** `4139 `_: Text " +"color numbers change between 1.24.14 and 1.25.0" +msgstr "" + +#: ../../../changes.txt:49 be1304c92780409db80b52f432ddf998 +msgid "" +"**Fixed** `4141 `_: Some " +"insertion methods fails for pages without a /Resources object" +msgstr "" + +#: ../../../changes.txt:50 ddd02a63b7c84fa98adb923edd5188a5 +msgid "" +"**Fixed** `4180 `_: " +"Search problems" +msgstr "" + +#: ../../../changes.txt:51 1b417bfe27f84b139a26a15af365bd0c +msgid "" +"**Fixed** `4182 `_: Text " +"coordinate extraction error" +msgstr "" + +#: ../../../changes.txt:52 01c3c01de5aa44e495395a3cac8fe621 +msgid "" +"**Fixed** `4245 `_: " +"Highlighting issue distorted on recent versions" +msgstr "" + +#: ../../../changes.txt:53 608e056859f64b2caa8788ea94fedabd +msgid "" +"**Fixed** `4254 `_: " +"add_freetext_annot is drawing text outside the annotation box" +msgstr "" + +#: ../../../changes.txt:57 b75ad06fa2424331b3fc542b1713a09a +msgid "" +"In annotations: * Added support for subtype FreeTextCallout. * Added " +"support for rich text." +msgstr "" + +#: ../../../changes.txt:60 3be875b6e2324d438c977d9c46348f70 +msgid "" +"Added miter_limit arg to insert_text*() to allow suppression of spikes " +"caused by long miters." +msgstr "" + +#: ../../../changes.txt:61 994387965fff4603aa6a829344ad3ca4 +msgid "Add Widget Support to `Document.insert_pdf()`." +msgstr "" + +#: ../../../changes.txt:62 5f029e712cfa434bb0a9a9e5856f5f4f +msgid "Add `bibi` to span dicts." +msgstr "" + +#: ../../../changes.txt:63 8cf302d1d9a94f3ca18ccea31c3a99b2 +msgid "Add `synthetic' to char dict." +msgstr "" + +#: ../../../changes.txt:64 ../../../changes.txt:806 +#: 7f730e0ab55e4a848c89fd1269c9067e 91984e278f7b4898966c0d68a2b344f0 +msgid "Fixed Pyodide builds." +msgstr "" + +#: ../../../changes.txt:67 2197b1f6183f4787a98624f68d0c90b3 +msgid "**Changes in version 1.25.2 (2025-01-17)**" +msgstr "" + +#: ../../../changes.txt:71 bcd9d6d9c30e4f9089697c05f37952d4 +msgid "" +"**Fixed** `4055 `_: " +"\"Yes\" for all checkboxes does not work for all PDF rendering engines." +msgstr "" + +#: ../../../changes.txt:72 2dd19144b42b4b6da2fc49b5060d585a +msgid "" +"**Fixed** `4155 `_: " +"samples_mv is unsafe" +msgstr "" + +#: ../../../changes.txt:73 5b3668fc1e3b4af59d74879ef82706e5 +msgid "" +"**Fixed** `4162 `_: Got " +"AttributeError, when tried to add Signature field" +msgstr "" + +#: ../../../changes.txt:74 e7a9224374f442179d949e8359c86467 +msgid "" +"**Fixed** `4186 `_: " +"Incorrect handling of JPEG with color space CMYK image extraction" +msgstr "" + +#: ../../../changes.txt:75 085153beee204ec486180c680f272516 +msgid "" +"**Fixed** `4195 `_: " +"Pixmaps that are inverted and have an alpha channel are not rendered " +"properly" +msgstr "" + +#: ../../../changes.txt:76 fe083792a7954be0991c57c125174ae9 +msgid "" +"**Fixed** `4225 `_: " +"pixmap.pil_save() fails due to colorspace definition" +msgstr "" + +#: ../../../changes.txt:77 b6569feb175f41d78cec167d8de2b240 +msgid "" +"**Fixed** `4232 `_: " +"Incorrect Font style and Size" +msgstr "" + +#: ../../../changes.txt:81 9913d7fc8e4348108de8a630c084f1f8 +msgid "Use Python's built-in glyphname <> unicode conversion." +msgstr "" + +#: ../../../changes.txt:82 94d69880bac14607af7c4c9471bc3cd9 +msgid "Improve speed of pixmap color inversion." +msgstr "" + +#: ../../../changes.txt:83 e993a109d5574b8fb5a26c54cc55bc88 +msgid "" +"Add new `char_flags` member to span dictionary, for example allows " +"detection of invisible text." +msgstr "" + +#: ../../../changes.txt:84 00118ce8f3ee4885b763ddaad90b9d62 +msgid "Detect image masks in TextPage output." +msgstr "" + +#: ../../../changes.txt:85 7f4fbdd15be64328b08e0cbeaaa5d614 +msgid "Added `Pixmap.pil_image()`." +msgstr "" + +#: ../../../changes.txt:88 e7e078580e7d47d286bfbac687ebc119 +msgid "**Changes in version 1.25.1 (2024-12-11)**" +msgstr "" + +#: ../../../changes.txt:90 10ebd7cd55b34d178984a28f4b740ec3 +msgid "Use MuPDF-1.25.2." +msgstr "" + +#: ../../../changes.txt:94 65e54c9d1ca04480ac40be5bb8035e18 +msgid "" +"**Fixed** `4125 `_: " +"memory leak while convert Pixmap's colorspace" +msgstr "" + +#: ../../../changes.txt:95 21b47126d40247068762123f765da080 +msgid "" +"**Fixed** `4034 `_: " +"Possible regression in pdf cleaning during save." +msgstr "" + +#: ../../../changes.txt:98 f166abb249fc4ac6860cc8a35dab1995 +msgid "**Changes in version 1.25.0 (2024-12-05)**" +msgstr "" + +#: ../../../changes.txt:100 431f75b52aa24575abf3fd54665b0e69 +msgid "Use MuPDF-1.25.1." +msgstr "" + +#: ../../../changes.txt:104 e496b4dcb26f4a23b3caa3c6b878580f +msgid "" +"**Fixed** `4026 `_: " +"page.get_text('blocks') output two piece of very similar text with " +"different bbox" +msgstr "" + +#: ../../../changes.txt:105 4650742dd04a4029a85b61a77a51fe19 +msgid "" +"**Fixed** `4004 `_: " +"Segmentation Fault When Updating PDF Form Field Value" +msgstr "" + +#: ../../../changes.txt:106 78df31cddbd64c11b359cc8feab8bff7 +msgid "" +"**Fixed** `3887 `_: " +"Subset Fonts problem using Fallback Font" +msgstr "" + +#: ../../../changes.txt:107 6adac36cb18a48abb9273fe6cab6eed7 +msgid "" +"**Fixed** `3886 `_: " +"Another issue with destroying PDF when inserting html" +msgstr "" + +#: ../../../changes.txt:108 fdc420491fc74691ba2f5e6f46ae1076 +msgid "" +"**Fixed** `3751 `_: " +"apply_redactions causes part of the page content to be hidden / " +"transparent" +msgstr "" + +#: ../../../changes.txt:113 befad62f34a24b3f8b7f8a09e7044da3 +msgid "**Changes in version 1.24.14 (2024-11-19)**" +msgstr "" + +#: ../../../changes.txt:115 404ba5c29a1649b7b4ce67ac70e08854 +msgid "Use MuPDF-1.24.11." +msgstr "" + +#: ../../../changes.txt:119 2cbdee80702348d190fc8ba1fd7d7432 +msgid "" +"**Fixed** `3448 `_: " +"get_pixmap function removes the table and leaves just the content behind" +msgstr "" + +#: ../../../changes.txt:120 1626f95053674bd09bdb7fdc429a328b +msgid "" +"**Fixed** `3758 `_: Got " +"\"malloc(): unaligned tcache chunk detected Aborted (core dumped)\" while" +" using add_redact_annot/apply_redactions" +msgstr "" + +#: ../../../changes.txt:121 1058fbbf9e324668b5c01e9dc1475d52 +msgid "" +"**Fixed** `3813 `_: " +"Stories: Ordered list count broken with nested unordered list" +msgstr "" + +#: ../../../changes.txt:122 3fa9af56bd864ddcb15a94e032ca8959 +msgid "" +"**Fixed** `3933 `_: " +"font.valid_codepoints() - malfunction" +msgstr "" + +#: ../../../changes.txt:123 3c813ccc9f7d49c48f3b7989e75cc9cc +msgid "" +"**Fixed** `4018 `_: " +"PyMuPDF hangs when iterating over zero page PDF pages backwards" +msgstr "" + +#: ../../../changes.txt:124 6655246a1ef440979b582c17892554ac +msgid "" +"**Fixed** `4043 `_: " +"fullcopypage bug" +msgstr "" + +#: ../../../changes.txt:125 c411234000614f55965c36781983b6e9 +msgid "" +"**Fixed** `4047 `_: " +"Segmentation Fault in add_redact_annot" +msgstr "" + +#: ../../../changes.txt:126 479cdee401e140458f3465045bd11688 +msgid "" +"**Fixed** `4050 `_: " +"Content of dict returned by doc.embfile_info() does not fit to " +"documentation" +msgstr "" + +#: ../../../changes.txt:130 5ab00d77297a4c758e2d1720c2435d8d +msgid "" +"Ensure that words from `Page.get_text()` never contain RTL/LTR char " +"mixtures." +msgstr "" + +#: ../../../changes.txt:131 81cf6c6cdbee49b0a3f1c41680450647 +msgid "Fix building with system MuPDF." +msgstr "" + +#: ../../../changes.txt:132 2a5a72ed4d554f3cab847f2da8f12bd7 +msgid "Add dot product for points and vectors." +msgstr "" + +#: ../../../changes.txt:135 fdd277e9a55844cea500c5df64dc77d8 +msgid "**Changes in version 1.24.13 (2024-10-29)**" +msgstr "" + +#: ../../../changes.txt:139 e40f2063f2f74ba7a9b50836c92c3c35 +msgid "" +"**Fixed** `3848 `_: " +"Piximap program crash" +msgstr "" + +#: ../../../changes.txt:140 ../../../changes.txt:151 +#: 1cecbbf45b6448d8b35c9733469fb800 cbe153d5efc34453adf93d3d712be058 +msgid "" +"**Fixed** `3950 `_: " +"Unable to consistently extract field labels from PDFs" +msgstr "" + +#: ../../../changes.txt:141 c82866a736b142f7ace9f54bb02e43f1 +msgid "" +"**Fixed** `3981 `_: " +"PyMuPDF 1.24.12 with pyinstaller throws error." +msgstr "" + +#: ../../../changes.txt:142 3d60c900807443f398fb9f5a24ccfdd3 +msgid "" +"**Fixed** `3994 `_: " +"pix.color_topusage raise Segmentation fault (core dumped)" +msgstr "" + +#: ../../../changes.txt:145 3283ce76daf74ef989e550c533f30f80 +msgid "**Changes in version 1.24.12 (2024-10-21)**" +msgstr "" + +#: ../../../changes.txt:149 f6541b76bb2342c98a3ad12f058bb33e +msgid "" +"**Fixed** `3914 `_: " +"Ability to print MuPDF errors to logging instead of stdout" +msgstr "" + +#: ../../../changes.txt:150 4c8a8c254557474288408bdcdf340a80 +msgid "" +"**Fixed** `3916 `_: " +"insert_htmlbox error: int too large to convert to float" +msgstr "" + +#: ../../../changes.txt:153 64d0512b1bb640839c19174dd7575212 +msgid "Supported Python versions are now 3.9-3.13." +msgstr "" + +#: ../../../changes.txt:155 3f67810537984f01a9fab7268f77d6c4 +msgid "Dropped support for Python-3.8 because end-of-life." +msgstr "" + +#: ../../../changes.txt:156 391ae1d264e6466394afae9b34ed5e7e +msgid "Added support for Python-3.13 because now released." +msgstr "" + +#: ../../../changes.txt:157 eaa7296b536f41708ce11d20fb58f937 +msgid "See: https://devguide.python.org/versions/" +msgstr "" + +#: ../../../changes.txt:160 e4a68838097c42d5a09860102beae49b +msgid "**Changes in version 1.24.11 (2024-10-03)**" +msgstr "" + +#: ../../../changes.txt:162 a0e3c6145d4f48bf8b84de559955b88d +msgid "Use MuPDF-1.24.10." +msgstr "" + +#: ../../../changes.txt:166 3b0f700925b14e7eb9253cece173ef21 +msgid "" +"**Fixed** `3624 `_: Pdf " +"file transform to image have a black block" +msgstr "" + +#: ../../../changes.txt:167 d2ee88adc92b4684a7f6031898da91f3 +msgid "" +"**Fixed** `3859 `_: " +"doc.need_appearances() fails with \"AttributeError: module " +"'pymupdf.mupdf' has no attribute 'PDF_TRUE' \"" +msgstr "" + +#: ../../../changes.txt:168 f4316694781c46c5b3e4b6007c4f9637 +msgid "" +"**Fixed** `3863 `_: " +"apply_redactions() does not work as expected" +msgstr "" + +#: ../../../changes.txt:169 dbdda385386e443fb32bab3c3c403bf4 +msgid "" +"**Fixed** `3905 `_: open " +"stream can raise a FzErrorFormat error instead of FileDataError" +msgstr "" + +#: ../../../changes.txt:171 315ef728a85c4f25a22111cb9119e908 +msgid "Wheels now use the Python Stable ABI:" +msgstr "" + +#: ../../../changes.txt:173 8bb5cfb556b447b98fd75ab78a2c78bd +msgid "There is one PyMuPDF wheel for each platform." +msgstr "" + +#: ../../../changes.txt:174 26bfbd6448824100b418c25086422c6a +msgid "Each wheel works with all supported Python versions." +msgstr "" + +#: ../../../changes.txt:175 604c7e1c324c4008860a6957267ecbea +msgid "" +"Each wheel is built using the oldest supported Python version (currently " +"3.8)." +msgstr "" + +#: ../../../changes.txt:176 d6eb8ebe8d2045438fd3c184ab14e93b +msgid "There is no PyMuPDFb wheel." +msgstr "" + +#: ../../../changes.txt:180 a8f8ac736b8d47e7949081f1a21efec1 +msgid "Improvements to get_text_words() with sort=True." +msgstr "" + +#: ../../../changes.txt:181 596315d45d644ae082739e9d77f3502e +msgid "Tests now always get the latest versions of required Python packages." +msgstr "" + +#: ../../../changes.txt:182 a37a1491df594b928a6f964a83f9a190 +msgid "Removed dependency on setuptools." +msgstr "" + +#: ../../../changes.txt:183 f49604ba7909442495b3f512e738a573 +msgid "Added item to PyMuPDF-1.24.10 changes below - fix of #3630." +msgstr "" + +#: ../../../changes.txt:186 417faf427eba4d01b5dd48a964869021 +msgid "**Changes in version 1.24.10 (2024-09-02)**" +msgstr "" + +#: ../../../changes.txt:188 f91c0ed05c2c4fd09edc380141d28b1b +msgid "Use MuPDF-1.24.9." +msgstr "" + +#: ../../../changes.txt:192 2b4cd5189b694438af8491de2ea10836 +msgid "" +"**Fixed** `3450 `_: " +"get_pixmap function takes too long to process" +msgstr "" + +#: ../../../changes.txt:193 5c9db385314c46bb9c6f9ea1f69d46f5 +msgid "" +"**Fixed** `3569 `_: " +"Invalid OCGs not ignored by SVG image creation" +msgstr "" + +#: ../../../changes.txt:194 05ddb72b9e634cd7af213cb7162237dc +msgid "" +"**Fixed** `3603 `_: " +"ObjStm compression and PDF linearization doesn't work together" +msgstr "" + +#: ../../../changes.txt:195 53f90f39195c45f3bcbb5cc1961035af +msgid "" +"**Fixed** `3650 `_: " +"Linebreak inserted between each letter" +msgstr "" + +#: ../../../changes.txt:196 3db179fa45464dad9e70414c224247af +msgid "" +"**Fixed** `3661 `_: " +"Update Document to check the /XYZ len" +msgstr "" + +#: ../../../changes.txt:197 6d8ca1f356f142279e8ea68611225998 +msgid "" +"**Fixed** `3698 `_: " +"documentation issue - old code in the annotations documentation" +msgstr "" + +#: ../../../changes.txt:198 eef001d4e6ee4f0897dd8c9febd92e82 +msgid "" +"**Fixed** `3705 `_: " +"Document.select() behaves weirdly in some particular kind of pdf files" +msgstr "" + +#: ../../../changes.txt:199 ca5ca69d6ed24b58ba0338ee8c4020bc +msgid "" +"**Fixed** `3706 `_: " +"extend Document.__getitem__ type annotation to reflect that the method " +"also accepts slices" +msgstr "" + +#: ../../../changes.txt:200 d0edc9f50f66477f9196b9ea1fb49e80 +msgid "" +"**Fixed** `3727 `_: " +"Method get_pixmap() make the program exit without any exceptions or " +"messages" +msgstr "" + +#: ../../../changes.txt:201 f650b43986ff4603bc42fcbbde6eda0b +msgid "" +"**Fixed** `3767 `_: " +"Cannot get Tessdata with Tesseract-OCR 5" +msgstr "" + +#: ../../../changes.txt:202 4fc2a8f8d613411cb5c354a887854dcf +msgid "" +"**Fixed** `3773 `_: " +"Link.set_border gives TypeError: '<' not supported between instances of " +"'NoneType' and 'int'" +msgstr "" + +#: ../../../changes.txt:203 d900024628ca4d089aecb0aef12682a8 +msgid "" +"**Fixed** `3774 `_: " +"fitz.__version__` does not work anymore" +msgstr "" + +#: ../../../changes.txt:204 4007c30d95f84ca8babd2a2350cf4977 +msgid "" +"**Fixed** `3789 `_: " +"ValueError: not enough values to unpack (expected 3, got 2) is thrown " +"when call insert_pdf" +msgstr "" + +#: ../../../changes.txt:205 21102be692a24f46b22dddb3a6e21995 +msgid "" +"**Fixed** `3820 `_: class" +" improves namedDest handling" +msgstr "" + +#: ../../../changes.txt:207 da3eb59070604923a017daf4c18f72a1 +msgid "" +"**Fixed** `3630 `_: " +"page.apply_redactions gives unwanted black rectangle" +msgstr "" + +#: ../../../changes.txt:211 03c28d16dea641f7952aa8ed37aea9fb +msgid "" +"Object streams and linearization cannot be used together; attempting to " +"do so will raise an exception. (#3603)" +msgstr "" + +#: ../../../changes.txt:213 9b2c3f3e7cc34e72b672faa146c88b78 +msgid "Fixed handling of non-existing /Contents object." +msgstr "" + +#: ../../../changes.txt:216 61a174d09cf64eafbdc2367fea6883f5 +msgid "**Changes in version 1.24.9 (2024-07-24)**" +msgstr "" + +#: ../../../changes.txt:218 724a59f99c4945c4a6b63a8b411cf9cd +msgid "Use MuPDF-1.24.8." +msgstr "" + +#: ../../../changes.txt:221 cfdccbd0f77a4cb8bd18f2b27bddc501 +msgid "**Changes in version 1.24.8 (2024-07-22)**" +msgstr "" + +#: ../../../changes.txt:225 36055284b5504d479b15dbbb81526dd9 +msgid "" +"**Fixed** `3636 `_: API " +"documentation for the open function is not obvious to find." +msgstr "" + +#: ../../../changes.txt:226 1c90ee48b14d4e07960a62223b31bb43 +msgid "" +"**Fixed** `3654 `_: docx " +"parsing was broken in 1.24.7" +msgstr "" + +#: ../../../changes.txt:227 4611661f80ab4aeca47440e95b0b3f6a +msgid "" +"**Fixed** `3677 `_: " +"Unable to extract subset font name using the newer versions of PyMuPDF : " +"1.24.6 and 1.24.7." +msgstr "" + +#: ../../../changes.txt:228 35bc435eff28466db744f7b71046c0d2 +msgid "" +"**Fixed** `3687 `_: " +"Page.get_text results in AssertionError for epub files" +msgstr "" + +#: ../../../changes.txt:232 cdd79859c92c4b4d80079607b6625111 +msgid "Fixed various spelling mistakes spotted by codespell." +msgstr "" + +#: ../../../changes.txt:233 90874632a02d437889e6667ae38f29b9 +msgid "Improved how we modify MuPDF's default configuration on Windows." +msgstr "" + +#: ../../../changes.txt:234 9a07aff8242f4c7bbc6341a3f228cb9a +msgid "Make text search to work with ligatures." +msgstr "" + +#: ../../../changes.txt:237 2031962ffa474ccda6060efab36150a7 +msgid "**Changes in version 1.24.7 (2024-06-26)**" +msgstr "" + +#: ../../../changes.txt:241 065fa22156bc476b854ad82b616afd6b +msgid "" +"**Fixed** `3615 `_: " +"Document.pagemode or Document.pagelayout crashes for epub files" +msgstr "" + +#: ../../../changes.txt:242 fa3f95dd896e424f9e2565650c51f51a +msgid "" +"**Fixed** `3616 `_: not " +"last version reported" +msgstr "" + +#: ../../../changes.txt:245 e2cc189dc2bb4b6da5807f8c63c309ee +msgid "**Changes in version 1.24.6 (2024-06-25)**" +msgstr "" + +#: ../../../changes.txt:247 275f70bd5623499798385de0013c8399 +msgid "Use MuPDF-1.24.4" +msgstr "" + +#: ../../../changes.txt:251 79e3898da4734f3481710ed52b676997 +msgid "" +"**Fixed** `3599 `_: " +"Story.fit_width() has a weird line" +msgstr "" + +#: ../../../changes.txt:252 91a6a2f3edfd482aa9e2f515d9baab01 +msgid "" +"**Fixed** `3594 `_: " +"Garbled extraction for Amazon Sustainability Report" +msgstr "" + +#: ../../../changes.txt:253 70cce662a06947b3bae489a22703fca3 +msgid "" +"**Fixed** `3591 `_: " +"'width' in Page.get_drawings() returns width equal as 0" +msgstr "" + +#: ../../../changes.txt:254 de435aab81bf4c479ced7f8a9446c028 +msgid "" +"**Fixed** `3561 `_: " +"ZeroDivisionError: float division by zero with page.apply_redactions()" +msgstr "" + +#: ../../../changes.txt:255 44ec6c6728bd4c22be81f25974b6d97c +msgid "" +"**Fixed** `3559 `_: " +"SegFault 11 when empty H1 H2 H3 H4 etc element is used in insert_htmlbox" +msgstr "" + +#: ../../../changes.txt:256 31e1c8d1ff9c495dac1362f65a21fbe9 +msgid "" +"**Fixed** `3539 `_: Add " +"dotted gridline detection to table recognition" +msgstr "" + +#: ../../../changes.txt:257 8ca698e815434768bdc95208f6e5c273 +msgid "" +"**Fixed** `3519 `_: " +"get_toc(simple=False) AttributeError: 'Outline' object has no attribute " +"'rect'" +msgstr "" + +#: ../../../changes.txt:258 247415af0e46448cab170bc8a456d15f +msgid "" +"**Fixed** `3510 `_: " +"page.get_label() gets wrong label on the first page of doc" +msgstr "" + +#: ../../../changes.txt:259 a39d2c878ec842e3bfaaad8a0b6ee170 +msgid "" +"**Fixed** `3494 `_: " +"1.24.2/1.24.3: spurious characters introduced when using subset_fonts and" +" insert_pdf" +msgstr "" + +#: ../../../changes.txt:260 20cca7e66cf14f38b869027fe95cb925 +msgid "" +"**Fixed** `3470 `_: " +"subset_fonts error exit without exception/warning" +msgstr "" + +#: ../../../changes.txt:261 2d654390f9f34e3da7c49ac149eb3421 +msgid "" +"**Fixed** `3400 `_: " +"set_toc alters link coordinates for some rotated pages on pymupdf 1.24.2" +msgstr "" + +#: ../../../changes.txt:262 a168c8281de84b57bd2fb3fd05c938cf +msgid "" +"**Fixed** `3347 `_: " +"Incorrect links to points on pages having different heights" +msgstr "" + +#: ../../../changes.txt:263 c2ac43a9173a42b9a52f445c0f1c1e21 +msgid "" +"**Fixed** `3237 `_: " +"Set_metadata() does not work" +msgstr "" + +#: ../../../changes.txt:264 a3a409d493b4421cb9bec08391e3fe3b +msgid "" +"**Fixed** `3493 `_: " +"Isolate PyMuPDF from other libraries; issues when PyMuPDF is loaded with " +"other libraries like GdkPixbuf" +msgstr "" + +#: ../../../changes.txt:268 d5df9d7f9bd641a0bf62fd6c664622d8 +msgid "" +"Fixed concurrent use of PyMuPDF caused by use of constant temporary " +"filenames." +msgstr "" + +#: ../../../changes.txt:270 7740388431174845b2d788a15675dee6 +msgid "Add musllinux x86_64 wheels to release." +msgstr "" + +#: ../../../changes.txt:272 362e94b043be493ba75a17c338748497 +msgid "Added clearer version information:" +msgstr "" + +#: ../../../changes.txt:274 c4d0619a541a45cb92e5361065bb48b1 +msgid "`pymupdf.pymupdf_version`." +msgstr "" + +#: ../../../changes.txt:275 bcde4e1785e54d109152991515f06628 +msgid "`pymupdf.mupdf_version`." +msgstr "" + +#: ../../../changes.txt:276 8076fdcb1f5140b88e61570245c98693 +msgid "`pymupdf.pymupdf_date`." +msgstr "" + +#: ../../../changes.txt:279 2dd3fcec4770444d8894974e3508f752 +msgid "**Changes in version 1.24.5 (2024-05-30)**" +msgstr "" + +#: ../../../changes.txt:283 f666428db30d40e6b94b0244290bd045 +msgid "" +"**Fixed** `3479 `_: " +"regression: fill_textbox: IndexError: pop from empty list" +msgstr "" + +#: ../../../changes.txt:284 6bbeef393d754a509f3b5e4b7821549d +msgid "" +"**Fixed** `3488 `_: " +"set_toc method error" +msgstr "" + +#: ../../../changes.txt:288 0ea44f262e8d4df995dcedebc1053fe2 +msgid "Some more fixes to use MuPDF floating formatting." +msgstr "" + +#: ../../../changes.txt:289 8dc51219115e4d24849d39ebd871f3e3 +msgid "Removed/disabled some unnecessary diagnostics." +msgstr "" + +#: ../../../changes.txt:290 0b9a119f8faa451cba61e353c6d55481 +msgid "Fixed utils.do_links() crash." +msgstr "" + +#: ../../../changes.txt:291 147edf83380d4249b8c8e6248b5bfa3e +msgid "" +"Experimental new functions `pymupdf.apply_pages()` and " +"`pymupdf.get_text()`." +msgstr "" + +#: ../../../changes.txt:292 c01954be993f4eb79afe8c14db8cc079 +msgid "Addresses wrong label generation for label styles \"a\" and \"A\"." +msgstr "" + +#: ../../../changes.txt:295 17f8e93957d448988cad9a8f2bbd1856 +msgid "**Changes in version 1.24.4 (2024-05-16)**" +msgstr "" + +#: ../../../changes.txt:297 8739bae3939441f4b5fa661332b0292d +msgid "" +"**Fixed** `3418 `_: Re-" +"introduced bug, text align add_redact_annot" +msgstr "" + +#: ../../../changes.txt:298 7364b0e0566b48b48ab5379a8133dc60 +msgid "" +"**Fixed** `3472 `_: " +"insert_pdf gives SystemError" +msgstr "" + +#: ../../../changes.txt:302 bf397a0dd1834ca3baea98e4b30749e5 +msgid "" +"Fixed sysinstall test failing to remove all of prior installation before " +"new install." +msgstr "" + +#: ../../../changes.txt:304 4f4d2aa8272445c98a5af91511057bf1 +msgid "Fixed `utils.do_links()` crash." +msgstr "" + +#: ../../../changes.txt:305 827df41b996b49cf8389603b3342e5e8 +msgid "Correct `TextPage` creation Code." +msgstr "" + +#: ../../../changes.txt:306 4861824e32fc4bc7a1f4c6652ecaa071 +msgid "Unified various diagnostics." +msgstr "" + +#: ../../../changes.txt:307 756402854b584d1fb9461309d6f24941 +msgid "Fix bug in `page_merge()`." +msgstr "" + +#: ../../../changes.txt:310 01ab99257c0b4d99ae865583a74141c7 +msgid "**Changes in version 1.24.3 (2024-05-09)**" +msgstr "" + +#: ../../../changes.txt:313 4ffc23d07b214eeba05e2d48125f92a9 +msgid "" +"The Python module is now called `pymupdf`. `fitz` is still supported for " +"backwards compatibility." +msgstr "" + +#: ../../../changes.txt:316 7bd8f476d4774611b422f963389b013f +msgid "Use MuPDF-1.24.2." +msgstr "" + +#: ../../../changes.txt:320 c20ab64b9e7a49949e0555bad8ced17f +msgid "" +"**Fixed** `3357 `_: " +"PyMuPDF==1.24.0 will hanging when using page.get_text(\"text\")" +msgstr "" + +#: ../../../changes.txt:321 c10db0fa5995470888a5adfbc25004e7 +msgid "" +"**Fixed** `3376 `_: " +"Redacting results are not as expected in 1.24.x." +msgstr "" + +#: ../../../changes.txt:322 bef45b21f5a347399f100596e98c46be +msgid "" +"**Fixed** `3379 `_: " +"Documentation mismatch for get_text_blocks return value order." +msgstr "" + +#: ../../../changes.txt:323 14283f78151946ae9f3ce5e5dafc8294 +msgid "" +"**Fixed** `3381 `_: " +"Contents stream contains floats in scientific notation" +msgstr "" + +#: ../../../changes.txt:324 e88a6a4318f84720a88b41dad94489b9 +msgid "" +"**Fixed** `3402 `_: " +"Cannot add Widgets containing inter-field-calculation JavaScript" +msgstr "" + +#: ../../../changes.txt:325 10f281049ef04d24a0b23f6bef0424ea +msgid "" +"**Fixed** `3414 `_: " +"missing attribute set_dpi()" +msgstr "" + +#: ../../../changes.txt:326 77dbfa0dd18549f3a95c01ed17ec1de9 +msgid "" +"**Fixed** `3430 `_: " +"page.get_text() cause process freeze with certain pdf on v1.24.2" +msgstr "" + +#: ../../../changes.txt:330 ../../../changes.txt:352 +#: 13f90c207375462d9e95a5b927305290 b96f74bc921c4b8d8f60b6c26827655e +msgid "New/modified methods:" +msgstr "" + +#: ../../../changes.txt:332 a8c00735e8c44a398f31ba5211f477a9 +msgid "" +"`Page.remove_rotation()`: new, set page rotation to zero while keeping " +"appearance." +msgstr "" + +#: ../../../changes.txt:334 c0f0a981cebb4324bb0d6cbb9a488d2f +msgid "Fixed some problems when checking for PDF properties." +msgstr "" + +#: ../../../changes.txt:335 4156f92b39764681b2af919228763a1d +msgid "" +"Fixed pip builds from sdist (see discussion `3360 " +"`_: Alpine linux " +"docker build failing \"No matching distribution found for " +"pymupdfb==1.24.1\")." +msgstr "" + +#: ../../../changes.txt:340 81fe36d8869a443da05135ea366195cb +msgid "**Changes in version 1.24.2 (2024-04-17)**" +msgstr "" + +#: ../../../changes.txt:342 0a9a3d2c9bba493792d89bc7cc65d28e +msgid "" +"Removed obsolete classic implementation from releases (previously " +"available as module `fitz_old`)." +msgstr "" + +#: ../../../changes.txt:347 7488698997974b779a59ce150f0bbf76 +msgid "" +"**Fixed** `3331 `_: " +"Document.pages() is incorrectly type-hinted" +msgstr "" + +#: ../../../changes.txt:348 2f00b9289d9240e3a3b48c0ebaef16ba +msgid "" +"**Fixed** `3354 `_: " +"PyMuPDF==1.24.1: AttributeError: property 'metadata' of 'Document' object" +" has no setter" +msgstr "" + +#: ../../../changes.txt:354 2d9e11b01ffb4035b7b8e491a602bf1a +msgid "`Document.bake()`: new, make annotations / fields permanent content." +msgstr "" + +#: ../../../changes.txt:355 92bbbf2759024b76a4aedeb72838309a +msgid "" +"`Page.cluster_drawings()`: new, identifies drawing items (i.e. vector " +"graphics or line-art) that belong together based on their geometrical " +"vicinity." +msgstr "" + +#: ../../../changes.txt:358 8c8e94031bc549368b1ac7e9a3718cd7 +msgid "`Page.apply_redactions()`: added new parameter `text`." +msgstr "" + +#: ../../../changes.txt:359 2b42cb04d83b440b8af4da151920f352 +msgid "" +"`Document.subset_fonts()`: use MuPDF's `pdf_subset_fonts()` instead of " +"PyMuPDF code." +msgstr "" + +#: ../../../changes.txt:361 5e5df78f5bee47d897bf743b1b8ef355 +msgid "The `Document` class now supports page numbers specified as slices." +msgstr "" + +#: ../../../changes.txt:362 9c93dc2dbbdf41ba9096a815dbd42ad3 +msgid "Avoid causing MuPDF warnings." +msgstr "" + +#: ../../../changes.txt:365 c79d56bf10734acaa3b2b7f6a97e4e53 +msgid "**Changes in version 1.24.1 (2024-04-02)**" +msgstr "" + +#: ../../../changes.txt:369 56f52960edef4cf599266101cda45e41 +msgid "" +"**Fixed** `3278 `_: " +"apply_redactions moves some unredacted text" +msgstr "" + +#: ../../../changes.txt:370 2691e44a30b14dd7a0cafb5b685b912b +msgid "" +"**Fixed** `3301 `_: Be " +"more permissive when classifying links as kind LINK_URI" +msgstr "" + +#: ../../../changes.txt:371 e5e29a5f647b4d238ed6a0f2ae20b57b +msgid "" +"**Fixed** `3306 `_: Text " +"containing capital 'ET' not appearing as annotation" +msgstr "" + +#: ../../../changes.txt:375 f40db39c2b1641688f82891b969ca3ba +msgid "Use MuPDF-1.24.1." +msgstr "" + +#: ../../../changes.txt:376 f665ada34f4c42a5a9dc3abb208d526e +msgid "" +"Support ObjStm Compression. Methods `Document.save()`, " +"`Document.ez_save()` and `Document.write()` now support new parameters " +"`use_objstm`, compression_effort` and `preserve_metadata`." +msgstr "" + +#: ../../../changes.txt:382 0f152f480505446891857278644223a6 +msgid "**Changes in version 1.24.0 (2024-03-21)**" +msgstr "" + +#: ../../../changes.txt:386 b01ae7a8cfa74c74ba633d65d02bcaac +msgid "" +"**Fixed** `3281 `_: " +"Preparing metadata (pyproject.toml) did not run successfully" +msgstr "" + +#: ../../../changes.txt:387 3c9ba180fba24f1ca991629b2eb8b9c8 +msgid "" +"**Fixed** `3279 `_: " +"PyMuPDF no longer builds in Alpine Linux" +msgstr "" + +#: ../../../changes.txt:388 0c35d5ebac3a487fa1d60e2f48d711fd +msgid "" +"**Fixed** `3257 `_: " +"apply_redactions() deleting text outside of annoted box" +msgstr "" + +#: ../../../changes.txt:389 8f163469fef6461dbda83305263f0d7f +msgid "" +"**Fixed** `3216 `_: " +"AttributeError: 'Annot' object has no attribute '__del__'" +msgstr "" + +#: ../../../changes.txt:390 a65a40e81c994596b74cc40aaeed0cc2 +msgid "" +"**Fixed** `3207 `_: " +"get_drawings's items is missing line from h path operator" +msgstr "" + +#: ../../../changes.txt:391 21fc37fb9749416e833408ce04822880 +msgid "" +"**Fixed** `3201 `_: " +"Memory leaks when merging PDFs" +msgstr "" + +#: ../../../changes.txt:392 7948f671aa924ce1a8bbc2a83a1fde30 +msgid "" +"**Fixed** `3197 `_: " +"page.get_text() returns hexadecimal text for some characters" +msgstr "" + +#: ../../../changes.txt:393 7e5c6015e0d44ee3bdade1dfd9f5b502 +msgid "" +"**Fixed** `3196 `_: " +"Remove text not working in 1.23.25 version vs 1.20.2" +msgstr "" + +#: ../../../changes.txt:394 c072490998984668b0cf1c209cbfa50b +msgid "" +"**Fixed** `3172 `_: PDF's" +" 45º lines dissapearing in png conversion" +msgstr "" + +#: ../../../changes.txt:395 502c8ffdae6f4d79ad2b008e4e3df49d +msgid "" +"**Fixed** `3135 `_: Do " +"not log warnings to stdout" +msgstr "" + +#: ../../../changes.txt:396 88aa9921095b44e5a24f553963d3edd8 +msgid "" +"**Fixed** `3125 `_: " +"get_pixmap method stuck on one page and runs forever" +msgstr "" + +#: ../../../changes.txt:397 a086b013741242e78fbe36d569881bfb +msgid "" +"**Fixed** `2964 `_: There" +" is an issue with the image generated by the page.get_pixmap() function" +msgstr "" + +#: ../../../changes.txt:401 de6e5174a6054dfc8c68e805edbb47e1 +msgid "Use MuPDF-1.24.0." +msgstr "" + +#: ../../../changes.txt:402 ea4d02f0f40848ae9ea749edf5d94048 +msgid "Add support for redacting vector graphics." +msgstr "" + +#: ../../../changes.txt:403 c72b7b0c692046aa8483eac92d99df40 +msgid "Several fixes for table module" +msgstr "" + +#: ../../../changes.txt:405 e2a901b4ec2f4d59afdac23bbeaa3f99 +msgid "Add new method for outputting the table as a markdown string." +msgstr "" + +#: ../../../changes.txt:407 bce2303d13e643c2b9b41b1ebabfa8a8 +msgid "Address errors in computing the table header object:" +msgstr "" + +#: ../../../changes.txt:409 e2d8f9b142b74ee8adfe325018d9edc4 +msgid "" +"We now allow None as the cell value, because this will be resolved where " +"needed (e.g. in the pandas DataFrame)." +msgstr "" + +#: ../../../changes.txt:412 c7e3e62b890348239221f07f267b4f10 +msgid "" +"We previously tried to enforce rect-like tuples in all header cell " +"bboxes, however this fails for tables with all-None columns. This fix " +"enables this and constructs an empty string in the corresponding cell " +"string." +msgstr "" + +#: ../../../changes.txt:417 1a9c7e169b7e42019997dd03a0d31a68 +msgid "" +"We now correctly include start / stop points of lines in the bbox of the " +"clustered graphic. We previously joined the line's rectangle - which had" +" no effect because this is always empty." +msgstr "" + +#: ../../../changes.txt:421 e810eae0f82142a689d28c2b5388d785 +msgid "Improved exception text if we fail to open document." +msgstr "" + +#: ../../../changes.txt:422 b466bce735a2416da76262d5b4a5968d +msgid "Fixed build with new libclang 18." +msgstr "" + +#: ../../../changes.txt:425 8c5b68d084164dafbf64758c244d9e8d +msgid "**Changes in version 1.23.26 (2024-02-29)**" +msgstr "" + +#: ../../../changes.txt:429 2ccf8fac8a4b446194628eda23d3f584 +msgid "" +"**Fixed** `3199 `_: Add " +"entry_points to setuptools configuration to provide command-line console " +"scripts" +msgstr "" + +#: ../../../changes.txt:430 41d8053f4c2e43d5aaf9257dd8beeb07 +msgid "" +"**Fixed** `3209 `_: Empty" +" vertices in ink annotation" +msgstr "" + +#: ../../../changes.txt:434 902727a625874df6ad9e266c98d97b6a +msgid "Improvements to table detection:" +msgstr "" + +#: ../../../changes.txt:436 b57aa583ca8743b9988c9580bc62d293 +msgid "" +"Improved check for empty tables, fixes bugs when determining table " +"headers." +msgstr "" + +#: ../../../changes.txt:437 59b877fa13a44dfc806abfeef1f21a15 +msgid "Improved computation of enveloping vector graphic rectangles." +msgstr "" + +#: ../../../changes.txt:438 9c0e7c8d4573405dbb14c78024bd29c9 +msgid "Ignore more meaningless \"pseudo\" tables" +msgstr "" + +#: ../../../changes.txt:440 233a4100525444829bb2577a7690ba01 +msgid "Install command-line 'pymupdf' command that runs fitz/__main__.py." +msgstr "" + +#: ../../../changes.txt:441 ed88f84097ca4b6e801b41e90c81f002 +msgid "Don't overwrite MuPDF's config.h when building on non-Windows." +msgstr "" + +#: ../../../changes.txt:442 59cc8cec3ce643d2b573db42561040ab +msgid "" +"Fix `Story` constructor's `archive` arg to match docs - now accepts a " +"single `Archive` constructor arg." +msgstr "" + +#: ../../../changes.txt:443 30af29dadb6b4ec1a19ec56a3f5d231a +msgid "" +"Do not include MuPDF source in sdist; will be downloaded automatically " +"when building." +msgstr "" + +#: ../../../changes.txt:446 d1b07a84a69c464cbc1872f66a847288 +msgid "**Changes in version 1.23.25 (2024-02-20)**" +msgstr "" + +#: ../../../changes.txt:450 cdf25137e0fb4ce98670f3181f765448 +msgid "" +"**Fixed** `3182 `_: " +"Pixmap.invert_irect argument type error" +msgstr "" + +#: ../../../changes.txt:451 1d72c835607449caa408e50688dfb503 +msgid "" +"**Fixed** `3186 `_: " +"extractText() extracts broken text from pdf" +msgstr "" + +#: ../../../changes.txt:452 de265933324348f58529a7f8b8d955c4 +msgid "" +"**Fixed** `3191 `_: Error" +" on .find_tables()" +msgstr "" + +#: ../../../changes.txt:456 c9d376636fe340e889660b3bf5d0e99a +msgid "" +"When building, be able to specify python-config directly, with " +"environment variable `PIPCL_PYTHON_CONFIG`." +msgstr "" + +#: ../../../changes.txt:460 128746cdefa94dc58c8ef1587ace5b5a +msgid "**Changes in version 1.23.24 (2024-02-19)**" +msgstr "" + +#: ../../../changes.txt:464 0777e9a86a4744479f489d2fa4ea0607 +msgid "" +"**Fixed** `3148 `_: Table" +" extraction - vertical text not handled correctly" +msgstr "" + +#: ../../../changes.txt:465 d6f5b711c4d94d8c9ca83cbb81c78789 +msgid "" +"**Fixed** `3179 `_: Table" +" Detection: Incorrect Separation of Vector Graphics Clusters" +msgstr "" + +#: ../../../changes.txt:466 4c0194ff72e346a7ad233ea6d81875b4 +msgid "" +"**Fixed** `3180 `_: " +"Cannot show optional content group: AttributeError: module 'fitz.mupdf' " +"has no attribute 'pdf_array_push_drop'" +msgstr "" + +#: ../../../changes.txt:470 759fccf21d2f4a74a4a2b91ebab5d482 +msgid "Be able to test system install using `sudo pip install` instead of a venv." +msgstr "" + +#: ../../../changes.txt:473 788dfa9273434895b432ccb00c93e69a +msgid "**Changes in version 1.23.23 (2024-02-18)**" +msgstr "" + +#: ../../../changes.txt:477 2bcb0741150641bd9887ffbbda0317c8 +msgid "" +"**Fixed** `3126 `_: " +"Initialising Archive with a pathlib.Path fails." +msgstr "" + +#: ../../../changes.txt:478 e35256f482a449c08e2d6672dff6d343 +msgid "" +"**Fixed** `3131 `_: " +"Calling the next attribute of an Annot raises a \"No attribute .parent\" " +"warning" +msgstr "" + +#: ../../../changes.txt:479 88b148b9eba840b6ac253d98a2093281 +msgid "" +"**Fixed** `3134 `_: Using" +" an IRect as clip parameter in Page.get_pixmap no longer works since " +"1.23.9" +msgstr "" + +#: ../../../changes.txt:480 9a53f89e825048439025863c9d4eae37 +msgid "" +"**Fixed** `3140 `_: PDF " +"document stays in use after closing" +msgstr "" + +#: ../../../changes.txt:481 8bfed5fa15c74c5b85cf71e521ab6842 +msgid "" +"**Fixed** `3150 `_: " +"doc.select() hangs on this doc." +msgstr "" + +#: ../../../changes.txt:482 d8cc3717f051461cb6673fd2dac2c31d +msgid "" +"**Fixed** `3163 `_: " +"AssertionError on using fitz.IRect" +msgstr "" + +#: ../../../changes.txt:483 4f03f7c527ce440faa03a653869192ae +msgid "" +"**Fixed** `3177 `_: " +"fitz.Pixmap(None, pix) Unrecognised args for constructing Pixmap" +msgstr "" + +#: ../../../changes.txt:488 fb974c6a5bb64b938bfa47a71b1eb83e +msgid "" +"Improved `Document.select() by using new MuPDF function " +"`pdf_rearrange_pages()`. This is a more complete (and faster) " +"implementation of what needs to be done here in that not only pages will " +"be rearranged, but also consequential changes will be made to the table " +"of contents, links to removed pages and affected entries in the Optional " +"Content definitions." +msgstr "" + +#: ../../../changes.txt:494 e69a3589ce184698bcc2f17f20427379 +msgid "`TextWriter.appendv()`: added `small_caps` arg." +msgstr "" + +#: ../../../changes.txt:495 884a541495bd45ae8adbf342ca3ba2f0 +msgid "Fixed some valgrind errors with MuPDF master." +msgstr "" + +#: ../../../changes.txt:496 3c640f756bdc4375801570fef032166d +msgid "Fixed `Document.insert_image()` when build with MuPDF master." +msgstr "" + +#: ../../../changes.txt:499 d8fd69d2be0e4ca092fd23c9235ae02c +msgid "**Changes in version 1.23.22 (2024-02-12)**" +msgstr "" + +#: ../../../changes.txt:503 e378f2374df04952a72be635791bdea3 +msgid "" +"**Fixed** `3143 `_: " +"Difference in decoding of OCGs names between doc.get_ocgs() and " +"page.get_drawings()" +msgstr "" + +#: ../../../changes.txt:505 c4c2fa5c579b46b280156b50a2e4aff1 +msgid "" +"**Fixed** `3139 `_: " +"Pixmap resizing needs positional arg \"clip\" - even if None." +msgstr "" + +#: ../../../changes.txt:509 1d6171c3fe414d3492c08c2877177935 +msgid "Removed the use of MuPDF function `fz_image_size()` from PyMuPDF." +msgstr "" + +#: ../../../changes.txt:512 12a0c3980aba477ba7505402411f7e1f +msgid "**Changes in version 1.23.21 (2024-02-01)**" +msgstr "" + +#: ../../../changes.txt:518 1dee7842885a428383fdd421f24e82ec +msgid "" +"Fixed bug in set_xml_metadata(), PR `3112 " +"https://github.com/pymupdf/PyMuPDF/pull/3112>`_: Fix pdf_add_stream " +"metadata error" +msgstr "" + +#: ../../../changes.txt:519 4f2d5b1a58814d3c80f8daaf4c0b7ee0 +msgid "Fixed lack of `.parent` member in `TextPage` from `Annot.get_textpage()`." +msgstr "" + +#: ../../../changes.txt:520 0bfd6a220a4649ccb00804b1fe8b7201 +msgid "Fixed bug in `Page.add_widget()`." +msgstr "" + +#: ../../../changes.txt:523 340415b6e2ce4df48b9487e08c196ff7 +msgid "**Changes in version 1.23.20 (2024-01-29)**" +msgstr "" + +#: ../../../changes.txt:525 ../../../changes.txt:536 ../../../changes.txt:554 +#: ../../../changes.txt:566 ../../../changes.txt:580 ../../../changes.txt:593 +#: ../../../changes.txt:604 ../../../changes.txt:616 ../../../changes.txt:640 +#: ../../../changes.txt:760 ../../../changes.txt:777 ../../../changes.txt:809 +#: ../../../changes.txt:866 ../../../changes.txt:942 ../../../changes.txt:976 +#: ../../../changes.txt:985 ../../../changes.txt:994 ../../../changes.txt:1012 +#: ../../../changes.txt:1112 ../../../changes.txt:1151 +#: 2d61dd717b814ff094d3662012f2d18c 3e4ed138579743b39a32f8813c3b2e06 +#: 3ef6efffac1f4b7e9a653e9d1f296c3d 51fd65e4747f4d8f8b81de6b8a90a514 +#: 5fb04d0ff03b4789bf5c4aeabe2c7e15 606d6531e6554337b6c3ab8452c45ab0 +#: 65214ceb29b245868a158a9bfffa92d1 8a7880aed95644ea8aade26da406a37b +#: 968705d72d6f435a8b62f7f17b79c2d3 a27ecb9925af4dac91cd4af226a1d2d5 +#: a3d74a497ea148e7a1c7de31c00ee374 a46cd0d40ef5421fac9eb75e4fb898b9 +#: ac66e2f1e7004c37833e32fcc0116465 ada78a48dce74af3a3b5d181fb6fee38 +#: af10bc204bce4e21954c335e62a51ec9 be7d714ee714447e9ff7eb5c49530854 +#: c293b9e060844d05acc3178a91050794 cff5c1ea2c9a44d1b056e9e19549ce5e +#: ee8758a13e7e455699f88815363ce8ac f053f08242e7453db1d7144d88942d73 +msgid "Bug fixes:" +msgstr "" + +#: ../../../changes.txt:527 9464a8a8f8674bcdb9371ce12c044530 +msgid "" +"**Fixed** `3100 `_: Wrong" +" internal property accessed in get_xml_metadata" +msgstr "" + +#: ../../../changes.txt:531 bb2df9b714cc473e957180178300aae2 +msgid "Significantly improved speed of `Document.get_toc()`." +msgstr "" + +#: ../../../changes.txt:534 79bdfec9e4464ada983f653b5c5e97c7 +msgid "**Changes in version 1.23.19 (2024-01-25)**" +msgstr "" + +#: ../../../changes.txt:538 1da592507a0340538b130185c9fa3779 +msgid "" +"**Fixed** `3087 `_: " +"Exception in insert_image with mask specified" +msgstr "" + +#: ../../../changes.txt:539 2c4fa7f3a51d45889f0bb63a0e78b9c5 +msgid "" +"**Fixed** `3094 `_: " +"TypeError: '<' not supported between instances of 'FzLocation' and 'int' " +"in doc.delete_pages" +msgstr "" + +#: ../../../changes.txt:543 c3074effd2ad45da9d466e7c4721fe3a +msgid "When finding tables:" +msgstr "" + +#: ../../../changes.txt:545 4cf8c152b4bd4b7ea3628a8ea7acd0a2 +msgid "" +"Allow addition of user-defined \"virtual\" vector graphics when finding " +"tables." +msgstr "" + +#: ../../../changes.txt:546 503232beafd64d76a6173e6094362d87 +msgid "" +"Confirm that the enveloping bboxes of vector graphics are inside the clip" +" rectangle." +msgstr "" + +#: ../../../changes.txt:547 01bb57ea92bd451f8e6fd2e087768146 +msgid "Avoid slow finding of rectangle intersections." +msgstr "" + +#: ../../../changes.txt:549 991bc2787a2f4d19b775a8e4422f6e21 +msgid "Added `Font.bbox` property." +msgstr "" + +#: ../../../changes.txt:552 10fa3c83c527445db0d4b0f7e7b24cd8 +msgid "**Changes in version 1.23.18 (2024-01-23)**" +msgstr "" + +#: ../../../changes.txt:556 9ccb3bb34ff1478d864bf982c9ae5339 +msgid "" +"**Fixed** `3081 `_: " +"doc.close() not closing the document" +msgstr "" + +#: ../../../changes.txt:560 7512c8338a574c56b0649dfa66ab175f +msgid "" +"Reduced size of sdist to fit on pypi.org (by reducing size of two test " +"files)." +msgstr "" + +#: ../../../changes.txt:561 650a20e03bfb4dd6a57f5e07d95ccde8 +msgid "Fix `Annot.file_info()` if no `Desc` item." +msgstr "" + +#: ../../../changes.txt:564 736369637e864833b46a42f98fb0a3fb +msgid "**Changes in version 1.23.17 (2024-01-22)**" +msgstr "" + +#: ../../../changes.txt:568 55d1a4b2511e4e8f9f1d4f7acc76cf73 +msgid "" +"**Fixed** `3062 `_: " +"page_rotation_reset does not return page to original rotation" +msgstr "" + +#: ../../../changes.txt:569 1b87ec3ef9ea440491ee7ffaf5442dc8 +msgid "" +"**Fixed** `3070 `_: " +"update_link(): AttributeError: 'Page' object has no attribute 'super'" +msgstr "" + +#: ../../../changes.txt:573 3354ae51b2604ef1bbd91c15d0765dfe +msgid "Fixed bug in `Page.links()` (PR #3075)." +msgstr "" + +#: ../../../changes.txt:574 f9597ac8b8c04a87b6355b6381e9c173 +msgid "Fixed bug in `Page.get_bboxlog()` with layers." +msgstr "" + +#: ../../../changes.txt:575 61706baf4b1a4827ad7d8a87e267ee9c +msgid "Add support for timeouts in scripts/ and tests/run_compound.py." +msgstr "" + +#: ../../../changes.txt:578 87825e0e0580489298d3e62719284757 +msgid "**Changes in version 1.23.16 (2024-01-18)**" +msgstr "" + +#: ../../../changes.txt:582 6fbf8912a655447883da79f095cf40f3 +msgid "" +"**Fixed** `3058 `_: " +"Pixmap created from CMYK JPEG delivers RGB format" +msgstr "" + +#: ../../../changes.txt:586 3fcc09104dc54f6b8d270fa03621bb00 +msgid "" +"In table detection strategy \"lines_strict\", exclude fill-only vector " +"graphics." +msgstr "" + +#: ../../../changes.txt:587 18db20b909ed41d3bf2cac2deb0fa70b +msgid "Fixed sysinstall test failure." +msgstr "" + +#: ../../../changes.txt:588 e362018465f2455daa6c79f51bae7526 +msgid "In documentation, update feature matrix with item about text writing." +msgstr "" + +#: ../../../changes.txt:591 cca812b622a748ca83a5db1bceb35a29 +msgid "**Changes in version 1.23.15 (2024-01-16)**" +msgstr "" + +#: ../../../changes.txt:595 1c597102d34744a398afce8e168a32b3 +msgid "" +"**Fixed** `3050 `_: " +"python3.9 pix.set_pixel has something wrong in c.append( ord(i))" +msgstr "" + +#: ../../../changes.txt:599 17297b8ae3e14dc1b006574871337dfe +msgid "Improved docs for Page.find_tables()." +msgstr "" + +#: ../../../changes.txt:602 0be3eb0c81b842048bb25d1707fbc004 +msgid "**Changes in version 1.23.14 (2024-01-15)**" +msgstr "" + +#: ../../../changes.txt:606 adf2fde25feb42ee8e8dc348a940227c +msgid "" +"**Fixed** `3038 `_: " +"JM_pixmap_from_display_list > Assertion Error : Checking for wrong type" +msgstr "" + +#: ../../../changes.txt:607 8b1406193b814638b27734a78c18c7b4 +msgid "" +"**Fixed** `3039 `_: Issue" +" with doc.close() not closing the document in PyMuPDF" +msgstr "" + +#: ../../../changes.txt:611 c9aa04500860461dabc38810c046cd6a +msgid "" +"Ensure valid \"re\" rectangles in `Page.get_drawings()` with derotated " +"pages." +msgstr "" + +#: ../../../changes.txt:614 18aa23cc095044229dd7f2195997032c +msgid "**Changes in version 1.23.13 (2024-01-15)**" +msgstr "" + +#: ../../../changes.txt:618 2dbe91af362a4a399e3733f21966661d +msgid "" +"**Fixed** `2979 `_: list " +"index out of range in to_pandas()" +msgstr "" + +#: ../../../changes.txt:619 3a7d51d26e2d40bdb6dacee4ff2962ed +msgid "" +"**Fixed** `3001 `_: " +"Calling find_tables() on one document alters the bounding boxes of a " +"subsequent document" +msgstr "" + +#: ../../../changes.txt:623 8e60dc7df1b04c808b398ba3b5c52a56 +msgid "Fixed `Rect.height` and `Rect.width` to never return negative values." +msgstr "" + +#: ../../../changes.txt:624 507a19087e1c40e8ab2cac0737e3bc2d +msgid "Fixed `TextPage.extractIMGINFO()`'s returned `dictkey_yres` value." +msgstr "" + +#: ../../../changes.txt:627 bafc5067041f4769b3ed5a5aeca8c925 +msgid "**Changes in version 1.23.12 (2024-01-12)**" +msgstr "" + +#: ../../../changes.txt:629 e74b60af0b4b494da3e81f08ff009346 +msgid "" +"**Fixed** `3027 `_: " +"Page.get_text throws Attribute Error for 'parent'" +msgstr "" + +#: ../../../changes.txt:632 310201258dd94b6286f7e3ec62fc609b +msgid "**Changes in version 1.23.11 (2024-01-12)**" +msgstr "" + +#: ../../../changes.txt:634 f3a1d737a1e34534a21c64422ef7a16d +msgid "Fixed some Pixmap construction bugs." +msgstr "" + +#: ../../../changes.txt:635 ce6a20341ecf4f58a0b492347fbe901f +msgid "Fixed Pixmap.yres()." +msgstr "" + +#: ../../../changes.txt:638 718d5223c71645788572cc52333b0c6b +msgid "**Changes in version 1.23.10 (2024-01-12)**" +msgstr "" + +#: ../../../changes.txt:642 1bfe098162d04a6181bdae28701c7aca +msgid "" +"**Fixed** `3020 `_: Can't" +" resize a PixMap" +msgstr "" + +#: ../../../changes.txt:646 81b4d02c910443928a7ba27b146ce644 +msgid "Fixed Page.delete_image()." +msgstr "" + +#: ../../../changes.txt:649 2f563b04be5d48acbb8d66e2933ff1ac +msgid "**Changes in version 1.23.9 (2024-01-11)**" +msgstr "" + +#: ../../../changes.txt:651 6d20fb5bb4c243d588cbda8bc937bcfe +msgid "Default to new \"rebased\" implementation." +msgstr "" + +#: ../../../changes.txt:653 b20b8456cbf9479cac6a539bd20e0f6b +msgid "" +"The old \"classic\" implementation is available with `import fitz_old as " +"fitz`." +msgstr "" + +#: ../../../changes.txt:654 a0b841fa56684519a65856b953999265 +msgid "" +"For more information about why we are changing to the rebased " +"implementation, see: https://github.com/pymupdf/PyMuPDF/discussions/2680" +msgstr "" + +#: ../../../changes.txt:657 6f5720005cb948d0b9d22671750280b9 +msgid "Use MuPDF-1.23.9." +msgstr "" + +#: ../../../changes.txt:659 ../../../changes.txt:691 +#: d2565dd858fe4809876140f81ca1d770 efcc0badb1bb4a608130b04d97190234 +msgid "Bug fixes (rebased implementation only):" +msgstr "" + +#: ../../../changes.txt:661 20bdf1ba55b84a3b9382264646f0bcfa +msgid "" +"**Fixed** `2911 `_: " +"Page.derotation_matrix returns a tuple instead of a Matrix with rebased " +"implementation" +msgstr "" + +#: ../../../changes.txt:662 ac7ebec2bed84d62be57b5f5f4444099 +msgid "" +"**Fixed** `2919 `_: " +"Rebased version: KeyError in resolve_names when merging pdfs" +msgstr "" + +#: ../../../changes.txt:663 5a77fe644c304e3c9613de239eaf6f83 +msgid "" +"**Fixed** `2922 `_: New " +"feature that allows inserting named-destination links doesn't work" +msgstr "" + +#: ../../../changes.txt:664 746e5e364e4b43cebab8657aea2c8939 +msgid "" +"**Fixed** `2943 `_: " +"ZeroDivisionError: float division by zero when use apply_redactions()" +msgstr "" + +#: ../../../changes.txt:665 0f4cbfe997144e80a1ecb838706ef7da +msgid "" +"**Fixed** `2950 `_: " +"Shelling out to pip during tests is problematic" +msgstr "" + +#: ../../../changes.txt:666 7e81791c8ae94fb392d4aa6614a89b66 +msgid "" +"**Fixed** `2954 `_: " +"Replacement unicode character in text extraction" +msgstr "" + +#: ../../../changes.txt:667 2c26f1c85a284af597498e52289657a8 +msgid "" +"**Fixed** `2957 `_: " +"apply_redactions() moving text" +msgstr "" + +#: ../../../changes.txt:668 dd270b5826c24236859e17b3a657e043 +msgid "" +"**Fixed** `2961 `_: " +"Passing a string as a page number raises IndexError instead of TypeError." +msgstr "" + +#: ../../../changes.txt:669 e997aefd50bf4fa6af9b8fdbbb69071d +msgid "" +"**Fixed** `2969 `_: " +"annot.next throws AttributeError" +msgstr "" + +#: ../../../changes.txt:670 ac556f9c78514dd3b79831a806f2a39b +msgid "" +"**Fixed** `2978 `_: " +"1.23.9rc1: module 'fitz.mupdf' has no attribute 'fz_copy_pixmap_rect'" +msgstr "" + +#: ../../../changes.txt:672 a9c0241f244a445f821fdf6787b33919 +msgid "" +"**Fixed** `2907 `_: " +"segfault trying to call clean_contents on certain pdfs with python 3.12" +msgstr "" + +#: ../../../changes.txt:673 7d51a6025fdd44948dbc17637dc12062 +msgid "" +"**Fixed** `2905 `_: " +"SystemError: returned a " +"result with an exception set" +msgstr "" + +#: ../../../changes.txt:674 38ea8b10ef9c410ab1c2990c35563be7 +msgid "" +"**Fixed** `2742 `_: " +"Segmentation Fault when inserting three (but not two) copies of the same " +"source page into one destination page" +msgstr "" + +#: ../../../changes.txt:678 181e94f319da45c2868eb7f80a6a2c5f +msgid "Add optional setting of opacity to `Page.insert_htmlbox()`." +msgstr "" + +#: ../../../changes.txt:679 dcf0014ae1d54482b470bae54bcf9c68 +msgid "Fixed issue with add_redact_annot() mentioned in #2934." +msgstr "" + +#: ../../../changes.txt:680 aa5dc3c1ab8c4a6389e94f6d9a4111e6 +msgid "" +"Fixed `Page.rotation()` to return 0 for non-PDF documents instead of " +"raising an exception." +msgstr "" + +#: ../../../changes.txt:681 48c15430aa454909aac1d2c988486768 +msgid "Fixed internal quad detection to cope with any Python sequence." +msgstr "" + +#: ../../../changes.txt:682 45e881e1daaa47c8871ffa544bc60c46 +msgid "" +"Fixed rebased `fitz.pymupdf_version_tuple` - was previously set to mupdf " +"version." +msgstr "" + +#: ../../../changes.txt:683 e0283fe9d2f1459f85915e361dfd8b11 +msgid "" +"Improved support for Linux system installs, including adding regular " +"testing on Github." +msgstr "" + +#: ../../../changes.txt:684 20d0efc7f5094d35ada7f05f8ac2fdfd +msgid "Add missing `flake8` to `scripts/gh_release.py:test_packages`." +msgstr "" + +#: ../../../changes.txt:685 578cbeccb8484e299da16b03106d149b +msgid "Use newly public functions in MuPDF-1.23.8." +msgstr "" + +#: ../../../changes.txt:686 3db94cc7bfc54321b25942d0d2040ec5 +msgid "Improved `scripts/test.py` to help investigation of MuPDF issues." +msgstr "" + +#: ../../../changes.txt:689 673b7a0e8db64275b8740204f43bbaa0 +msgid "**Changes in version 1.23.8 (2023-12-19)**" +msgstr "" + +#: ../../../changes.txt:693 27bee795f9db48a1af8c2173e470344f +msgid "" +"**Fixed** `2634 `_: " +"get_toc and set_toc do not behave consistently for rotated pages" +msgstr "" + +#: ../../../changes.txt:694 86d8eddf3d654428b7a4ccc6bdd65c53 +msgid "" +"**Fixed** `2861 `_: " +"AttributeError in getLinkDict during PDF Merge" +msgstr "" + +#: ../../../changes.txt:695 f68c5320044a4f9ab5d48e60e9d064d1 +msgid "" +"**Fixed** `2871 `_: " +"KeyError in getLinkDict during PDF merge" +msgstr "" + +#: ../../../changes.txt:696 03fafa317ff34cc5ba294bec5fc4a46f +msgid "" +"**Fixed** `2886 `_: Error" +" in Skeleton for Named Link Destinations" +msgstr "" + +#: ../../../changes.txt:698 ../../../changes.txt:731 +#: 2ed69860112e407a8e4399a82a72eee4 8ab85d5c3d684f5f9c99fec79a96af7a +msgid "Bug fixes (rebased and classic implementations):" +msgstr "" + +#: ../../../changes.txt:700 4f8cdaebded3474393bcd7df8c6b024d +msgid "" +"**Fixed** `2885 `_: " +"pymupdf find tables too slow" +msgstr "" + +#: ../../../changes.txt:704 ../../../changes.txt:744 +#: 62f080e1b1104d6f9f0776495ed8e257 c3af2467a2ca49c781374590dc47adac +msgid "Rebased implementation:" +msgstr "" + +#: ../../../changes.txt:706 8a0141451676455e983d7a0f4c5f571b +msgid "" +"`Page.insert_htmlbox()`: new, much more powerful alternative to " +"`Page.insert_textbox()` or `TextWriter.fill_textbox()`, using `Story`." +msgstr "" + +#: ../../../changes.txt:707 6ca89e418e874daba4e230dacf411ccd +msgid "`Story.fit*()`: new methods for fitting a Story into an expanded rect." +msgstr "" + +#: ../../../changes.txt:708 f0205212a50b43769bd3999683570c72 +msgid "`Story.write_with_links()`: add support for external links." +msgstr "" + +#: ../../../changes.txt:709 21f8dcec43f54291a78d41070e8bf3ec +msgid "" +"`Document.language()`: fixed to use MuPDF's new " +"`mupdf.fz_string_from_text_language2()`." +msgstr "" + +#: ../../../changes.txt:710 2801099cf0f14a9f9c9037e4aa10b76f +msgid "`Document.subset_fonts()` - fixed." +msgstr "" + +#: ../../../changes.txt:711 906b2a0aa08e47f0b0f3dec950e6dda0 +msgid "Fixed internal `Archive._add_treeitem()` method." +msgstr "" + +#: ../../../changes.txt:712 9498eb7fa8b54530baa58998e349319a +msgid "" +"Fixed `fitz_new.__doc__` to contain PyMuPDF and Python version " +"information, and OS name." +msgstr "" + +#: ../../../changes.txt:713 b16e18a9cd7043b1acdfe4d3feb43607 +msgid "" +"Removed use of `(*args, **kwargs)` in API, we now specify keyword args " +"explicitly." +msgstr "" + +#: ../../../changes.txt:714 ccb9c77581ba4b9da3cca7b915aeaf85 +msgid "Work with new MuPDF Python exception classes." +msgstr "" + +#: ../../../changes.txt:716 021169327c414972afa9eba3d09c63a7 +msgid "" +"Fixed bug where `button_states()` returns None when `/AP` points to an " +"indirect object." +msgstr "" + +#: ../../../changes.txt:717 97298fff9afc41548002a48459f1cc69 +msgid "" +"Fixed pillow test to not ignore all errors, and install pillow when " +"testing." +msgstr "" + +#: ../../../changes.txt:718 b082ce6c6ad44f21b5f2a29e1a59665a +msgid "" +"Added test for `fitz.css_for_pymupdf_font()` (uses package `pymupdf-" +"fonts`)." +msgstr "" + +#: ../../../changes.txt:719 1407d4a104764d73bb318596f91e4364 +msgid "Simplified Github Actions test specifications." +msgstr "" + +#: ../../../changes.txt:720 5f6270725b28426a8cd4971ad54e74dd +msgid "Updated `tests/README.md`." +msgstr "" + +#: ../../../changes.txt:723 4c895961905c49f68ab2650c2b8a9fe6 +msgid "**Changes in version 1.23.7 (2023-11-30)**" +msgstr "" + +#: ../../../changes.txt:725 ee5d38f2ea9e42219cccad407131b04f +msgid "Bug fixes in rebased implementation, not fixed in classic implementation:" +msgstr "" + +#: ../../../changes.txt:727 339b320cee7741c9932825cde87fef69 +msgid "" +"**Fixed** `2232 `_: " +"Geometry helper classes should support keyword arguments" +msgstr "" + +#: ../../../changes.txt:728 2fa707dd9edf4f94bb5637bd4a28d0eb +msgid "" +"**Fixed** `2788 `_: " +"Problem with get_toc in pymupdf 1.23.6" +msgstr "" + +#: ../../../changes.txt:729 9a1815349395436d91b72b0c7e56d6b4 +msgid "" +"**Fixed** `2791 `_: " +"Experiencing small memory leak in save()" +msgstr "" + +#: ../../../changes.txt:733 bfca811530f74ec9bc23e87f41d1a268 +msgid "" +"**Fixed** `2736 `_: " +"Failure when set cropbox with mediabox negative value" +msgstr "" + +#: ../../../changes.txt:734 427c74b2289b45e0ad3edd747029ac09 +msgid "" +"**Fixed** `2749 `_: " +"RuntimeError: cycle in structure tree" +msgstr "" + +#: ../../../changes.txt:735 6e34101c4b044ad9960add282084ba39 +msgid "" +"**Fixed** `2753 `_: " +"Story.write_with_links will ignore everything after the first \"page " +"break\" in the HTML." +msgstr "" + +#: ../../../changes.txt:736 bafe1f526e0244eda987ab9f013ff61e +msgid "" +"**Fixed** `2812 `_: " +"find_tables on landscape page generates reversed text" +msgstr "" + +#: ../../../changes.txt:737 214f18e25a6a4ca1abfea39f7b0cef01 +msgid "" +"**Fixed** `2829 `_: " +"[cannot create /Annot for kind] is still printed despite #2345 is closed." +msgstr "" + +#: ../../../changes.txt:738 d263b1528eb54029b5f51e2b3acd0620 +msgid "" +"**Fixed** `2841 `_: " +"Unexpected KeyError when using scrub with fitz_new" +msgstr "" + +#: ../../../changes.txt:740 54350c8a28f145cf8571fda8f170f077 +msgid "Use MuPDF-1.23.7." +msgstr "" + +#: ../../../changes.txt:746 b4acef5e7f2d4bc98e9fa80a3b1c9c31 +msgid "Added flake8 code checking to test suite, and made various fixes." +msgstr "" + +#: ../../../changes.txt:747 92d76d3ade924b2aa67472c4b074763f +msgid "" +"Disable diagnostics during Document constructor to match classic " +"implementation." +msgstr "" + +#: ../../../changes.txt:749 ed4f292e614b4cb684bf7926e7655785 +msgid "" +"Additional fix to `2553 " +"`_: Invalid characters in" +" versions >= 1.22" +msgstr "" + +#: ../../../changes.txt:750 711655a363fe44deb8fb34ced91aab74 +msgid "" +"Fixed `MuPDF Bug 707324 " +"`_: Story: HTML " +"table row background color repeated incorrectly" +msgstr "" + +#: ../../../changes.txt:751 76c1bd2fc5f446e2bcc6a7a4d4bb8859 +msgid "Added `scripts/test.py`, for simple build+test of PyMuPDF git checkout." +msgstr "" + +#: ../../../changes.txt:752 8cca03c711524887828a6683b47143e1 +msgid "Added `fitz.pymupdf_version_tuple`, e.g. `(1, 23, 6)`." +msgstr "" + +#: ../../../changes.txt:753 40d15324bd114040a1c4035b8cdb89e4 +msgid "" +"Restored mistakenly-reverted fix for `2345 " +"`_: Turn off print " +"statements in utils.py" +msgstr "" + +#: ../../../changes.txt:754 881c24d332d94532ae730453f9a7fde0 +msgid "" +"Include any trailing `... repeated times...` text in warnings " +"returned by `mupdf_warnings()` (rebased only)." +msgstr "" + +#: ../../../changes.txt:758 d872853707ad4064931d00a196bbf97c +msgid "**Changes in version 1.23.6 (2023-11-06)**" +msgstr "" + +#: ../../../changes.txt:762 017033c49b804434af6eca76ab5827d3 +msgid "" +"**Fixed** `2553 `_: " +"Invalid characters in versions >= 1.22" +msgstr "" + +#: ../../../changes.txt:763 358d5c6ef045454f8bab98837ad326ad +msgid "" +"**Fixed** `2608 `_: " +"Incorrect utf32 text extraction (high & low surrogates are split)" +msgstr "" + +#: ../../../changes.txt:764 ../../../changes.txt:787 +#: 5fcdc8e45b5f4c609832c86a76099d85 de6f3b5dd8474b86a5265fe1cbc728ff +msgid "" +"**Fixed** `2710 `_: " +"page.rect and text location wrong / differing from older version" +msgstr "" + +#: ../../../changes.txt:765 acf583b7967b4b5ea619c6947569d17a +msgid "" +"**Fixed** `2774 `_: wrong" +" encoding for \"\\?\" character when sort=True" +msgstr "" + +#: ../../../changes.txt:766 392abb749f624b8bb1886892851d3b4f +msgid "" +"**Fixed** `2775 `_: " +"fitz_new does not work with python3.10 or earlier" +msgstr "" + +#: ../../../changes.txt:767 cf5b1039a1ba4662a91c375d0033da48 +msgid "" +"**Fixed** `2777 `_: With " +"fitz_new, wrong type for Page.mediabox" +msgstr "" + +#: ../../../changes.txt:771 9634e1f7d7a248b49a6629a35d9c5293 +msgid "Use MuPDF-1.23.5." +msgstr "" + +#: ../../../changes.txt:772 662402905c614a83b0c186684ce35a68 +msgid "Added Document.resolve_names() (rebased implementation only)." +msgstr "" + +#: ../../../changes.txt:775 76329cea89ff45a2a4ed0d5adbc3bd31 +msgid "**Changes in version 1.23.5 (2023-10-11)**" +msgstr "" + +#: ../../../changes.txt:779 278264a9f0f849abb0a3e7e51e4bcf33 +msgid "" +"**Fixed** `2341 `_: " +"Handling negative values in the zoom section for LINK_GOTO in linkDest" +msgstr "" + +#: ../../../changes.txt:780 6253460783d7418e8497f1cdb2875fb2 +msgid "" +"**Fixed** `2522 `_: Typo " +"in set_layer() - NameError: name 'f' is not defined" +msgstr "" + +#: ../../../changes.txt:781 a732f99e47504c389bb01f236abf5e74 +msgid "" +"**Fixed** `2548 `_: Fitz " +"freezes on some PDFs when calling the fitz.Page.get_text_blocks method." +msgstr "" + +#: ../../../changes.txt:782 d262ff81210441cc9f4c6293238c033f +msgid "" +"**Fixed** `2596 `_: " +"save(garbage=3) breaks get_pixmap() with side effect" +msgstr "" + +#: ../../../changes.txt:783 0decd50e22c34976b4dadde46da004ae +msgid "" +"**Fixed** `2635 `_: " +"\"clean=True\" makes objects invisible in the pdf" +msgstr "" + +#: ../../../changes.txt:784 ../../../changes.txt:812 +#: b72045cf3ebc425297bb5fca2e2fdf14 e18e697a3c2445b39268c151c595a94d +msgid "" +"**Fixed** `2637 `_: " +"Page.insert_textbox incorrectly handles the last word if it starts a new " +"line" +msgstr "" + +#: ../../../changes.txt:785 3905936b13384ba1850bc2457da78654 +msgid "" +"**Fixed** `2699 `_: " +"extract paragraph with below table" +msgstr "" + +#: ../../../changes.txt:786 4659c53925194789a37ddb05f4684a45 +msgid "" +"**Fixed** `2703 `_: Wrong" +" fontsize calculation in corner cases (\"page.get_texttrace()\")" +msgstr "" + +#: ../../../changes.txt:788 068fb89a2aee49bfbda8da27b7d7bbf6 +msgid "" +"**Fixed** `2723 `_: When " +"will a Python 3.12 wheel be available?" +msgstr "" + +#: ../../../changes.txt:789 07bbf5ae7e4749c6b873df336093b596 +msgid "" +"**Fixed** `2730 `_: " +"persistent get_text() formatting" +msgstr "" + +#: ../../../changes.txt:793 827be9716c0f45799f828806f32e4a46 +msgid "Use MuPDF-1.23.4." +msgstr "" + +#: ../../../changes.txt:794 f6586981ac234cd396fa43818ecfce43 +msgid "Fix optimisation flags with system installs." +msgstr "" + +#: ../../../changes.txt:795 8a8d3c26e07f4e27a72876c93c28ce4a +msgid "" +"Fixed the problem that the clip parameter does not take effect during " +"table recognition" +msgstr "" + +#: ../../../changes.txt:796 b0a3f1403dde481e9883aa2c6580ecda +msgid "Support Pillow mode \"RGBa\"" +msgstr "" + +#: ../../../changes.txt:797 9d0ff8236a1849b7a38fc376b7872fb6 +msgid "Support extra word delimiters" +msgstr "" + +#: ../../../changes.txt:798 75856de791ff4f39a56e2fcf268e750f +msgid "Support checking valid PDF name objects" +msgstr "" + +#: ../../../changes.txt:801 103cc581cd244d47af9758d05415b2e8 +msgid "**Changes in version 1.23.4 (2023-09-26)**" +msgstr "" + +#: ../../../changes.txt:803 bae913132e3449a7aa423306e2f8b684 +msgid "Improved build instructions." +msgstr "" + +#: ../../../changes.txt:804 4d9016b46972494eabae36e8d4fe9c46 +msgid "Fixed Tesseract in rebased implementation." +msgstr "" + +#: ../../../changes.txt:805 64583d37800d4f42a128cf51dae7384b +msgid "Improvements to build/install with system MuPDF." +msgstr "" + +#: ../../../changes.txt:807 44a01e18ad854989a0727acf285e4304 +msgid "Fixed rebased bug in _insert_image()." +msgstr "" + +#: ../../../changes.txt:811 77077284a0444ad685e45b0e86c65327 +msgid "" +"**Fixed** `2556 `_: " +"Segmentation fault at caling get_cdrawings(extended=True)" +msgstr "" + +#: ../../../changes.txt:813 0f46924776234c96a9f158798ecdb3f6 +msgid "" +"**Fixed** `2683 `_: " +"Windows sdist build failure - non-quoting of path and using UNIX which " +"command" +msgstr "" + +#: ../../../changes.txt:814 42c749d57f6f410e85ae658701f0ec53 +msgid "" +"**Fixed** `2691 `_: " +"Page.get_textpage_ocr() bug in rebased fitz_new version" +msgstr "" + +#: ../../../changes.txt:815 178d0039852645d5ba7349eec91de6f8 +msgid "" +"**Fixed** `2692 `_: " +"Page.get_pixmap(clip=Rect()) bug in rebased fitz_new version" +msgstr "" + +#: ../../../changes.txt:818 4c096fdef7c84aa3ad3766da8fd829f4 +msgid "**Changes in version 1.23.3 (2023-08-31)**" +msgstr "" + +#: ../../../changes.txt:820 fc9cfa91ff044d6097a979eca1cd971d +msgid "Fixed use of Tesseract for OCR." +msgstr "" + +#: ../../../changes.txt:823 843dbd95c3184f77973f45ffe92b27d8 +msgid "**Changes in version 1.23.2 (2023-08-28)**" +msgstr "" + +#: ../../../changes.txt:825 108f5c4365ff4f6a96491fae88a54640 +msgid "" +"**Fixed** `#2613 `_: " +"release 1.23.0 not MacOS-arm64 compatible" +msgstr "" + +#: ../../../changes.txt:828 e1b6d0f984f742b3bc6d7b84e674ddf7 +msgid "**Changes in version 1.23.1 (2023-08-24)**" +msgstr "" + +#: ../../../changes.txt:830 7476cc3c617145c9bac4b857ce5ce62e +msgid "Updated README and package summary description." +msgstr "" + +#: ../../../changes.txt:833 327a22dab196488bb7066d63d09f7e08 +msgid "" +"Fixed a problem on some Linux installations with Python-3.10 (and " +"possibly earlier versions) where `import fitz` failed with `ImportError: " +"libcrypt.so.2: cannot open shared object file: No such file or " +"directory`." +msgstr "" + +#: ../../../changes.txt:839 78e712a6bd7043dd90df607b90c4142a +msgid "Fixed `incompatible architecture` error on MacOS arm64." +msgstr "" + +#: ../../../changes.txt:842 6241fef4bb044910a27d4f7677c71242 +msgid "" +"Fixed installation warning from Poetry about missing entry in wheels' " +"RECORD files." +msgstr "" + +#: ../../../changes.txt:846 9df0ef853ccb4d338b1ca86b7cedb6e7 +msgid "**Changes in version 1.23.0 (2023-08-22)**" +msgstr "" + +#: ../../../changes.txt:848 0f1bdd72c98343f699861966bf0f6c3a +msgid "Add method `find_tables()` to the `Page` object." +msgstr "" + +#: ../../../changes.txt:850 8654939c7c024ed6adf4f5d10c1abe7d +msgid "" +"This allows locating tables on any supported document page, and " +"extracting table content by cell." +msgstr "" + +#: ../../../changes.txt:853 77b029509a4d4c3f8718cf2c64b3ec54 +msgid "New \"rebased\" implementation of PyMuPDF." +msgstr "" + +#: ../../../changes.txt:855 48d881e482ca4bc4b99b9bb9ca4f6625 +msgid "" +"The rebased implementation is available as Python module `fitz_new`. It " +"can be used as a drop-in replacement with `import fitz_new as fitz`." +msgstr "" + +#: ../../../changes.txt:860 fbda2a6a43fe4b5483bee1d3fb17d59c +msgid "" +"Python-independent MuPDF libraries are now in a second wheel called " +"`PyMuPDFb` that will be automatically installed by pip." +msgstr "" + +#: ../../../changes.txt:863 df0c9eaf56814168af7e0ebc45caac53 +msgid "" +"This is to save space on pypi.org - a full release only needs one " +"`PyMuPDFb` wheel for each OS." +msgstr "" + +#: ../../../changes.txt:868 4251f217a0ba434490bc7d53700d44d6 +msgid "" +"**Fixed** `#2542 `_: " +"fitz.utils.scrub AttributeError Annot object has no attribute fileUpd " +"inside" +msgstr "" + +#: ../../../changes.txt:869 a17ff529d1f84c619004c50fe455fa8a +msgid "" +"**Fixed** `#2533 `_: " +"get_texttrace returned a incorrect character bbox" +msgstr "" + +#: ../../../changes.txt:870 2cc1abba37874658a8a5b10a09b70f8d +msgid "" +"**Fixed** `#2537 `_: " +"Validation when setting a grouped RadioButton throws a RuntimeError: path" +" to 'V' has indirects" +msgstr "" + +#: ../../../changes.txt:872 ../../../changes.txt:1672 ../../../changes.txt:1836 +#: 2adce533804341359e79e543c8e32597 be0ad050d76848abaf5c4bf2dd926f47 +#: f10552d4ef324ed183963f71d17589ae +msgid "Other changes:" +msgstr "" + +#: ../../../changes.txt:874 f4c93428b1e04e399bce86345ce4ac8e +msgid "Dropped support for Python-3.7." +msgstr "" + +#: ../../../changes.txt:876 b5a147c23d654aa8a1374ec85166af54 +msgid "Fix for wrong page / annot `/Contents` cleaning." +msgstr "" + +#: ../../../changes.txt:878 cf56d3d3eb394479b64940c95114dd3a +msgid "We need to set `pdf_filter_options::no_update` to zero." +msgstr "" + +#: ../../../changes.txt:880 10366ab4e9324b888e0f48622b5ad6d2 +msgid "Added new function get_tessdata()." +msgstr "" + +#: ../../../changes.txt:882 ac474f74dbc74809b4e8808748f474ad +msgid "Cope with problem `/Annot` arrays." +msgstr "" + +#: ../../../changes.txt:884 d3741f21a88842cc8c8e13309642ee75 +msgid "" +"When copying page annotations in method Document.insert_pdf we previously" +" did not check the validity of members of the `/Annots` array. For " +"faulty members (like null or non-dictionary items) this could cause " +"unnecessary exceptions. This fix implements more checks and skips such " +"array items." +msgstr "" + +#: ../../../changes.txt:890 bdd753977a14421c918e39451c8e6e85 +msgid "Additional annotation type checks." +msgstr "" + +#: ../../../changes.txt:892 d8f4cedfef5240a488f92dd78858f257 +msgid "" +"We did not previously check for annotation type when getting / setting " +"annotation border properties. This is now checked in accordance with " +"MuPDF." +msgstr "" + +#: ../../../changes.txt:896 f352989e6f9847a0bfde4583e01e8c86 +msgid "Increase fault tolerance." +msgstr "" + +#: ../../../changes.txt:898 17d1fe7f81e94a2e946f57aeba6049c0 +msgid "" +"Avoid exceptions in method `insert_pdf()` when source pages contains " +"invalid items in the `/Annots` array." +msgstr "" + +#: ../../../changes.txt:901 b447a8623ebc4b269d6c93078450a4c6 +msgid "Return empty border dict for applicable annots." +msgstr "" + +#: ../../../changes.txt:903 af6d6b18663348e2bcd095ee59c9dabd +msgid "" +"We previously were returning a non-empty border dictionary even for non-" +"applicable annotation types. We now return the empty dictionary `{}` in " +"these cases. This requires some corresponding changes in the annotation " +"`.update()` method, namely for dashes and border width." +msgstr "" + +#: ../../../changes.txt:908 e3a589bf76fc4a7283ee0a15af5bd26e +msgid "Restrict `set_rect` to applicable annot types." +msgstr "" + +#: ../../../changes.txt:910 f25c7e27919646af84d880d0df6cbe89 +msgid "" +"We were insufficiently excluding non-applicable annotation types from " +"`set_rect()` method. We now let MuPDF catch unsupported annotations and " +"return `False` in these cases." +msgstr "" + +#: ../../../changes.txt:914 0e0af9a092254efcba378caad6d4a1b2 +msgid "Wrong fontsize computation in `page.get_texttrace()`." +msgstr "" + +#: ../../../changes.txt:916 cfe8ace08235424d901473a53c00d453 +msgid "" +"When computing the font size we were using the final text transformation " +"matrix, where we should have taken `span->trm` instead. This is " +"corrected here." +msgstr "" + +#: ../../../changes.txt:920 7dcc1debeebd455098741379fe8da1fe +msgid "Updates to cope with changes to latest MuPDF." +msgstr "" + +#: ../../../changes.txt:922 05106636db4e491f81981ac17971f470 +msgid "`pdf_lookup_anchor()` has been removed." +msgstr "" + +#: ../../../changes.txt:924 bbad1ed45e0d485da43a2d0aad55e753 +msgid "Update fill_textbox to better respect rect.width" +msgstr "" + +#: ../../../changes.txt:926 229efd97000941c89ca97bf36fe92209 +msgid "" +"The function norm_words in fill_textbox had a bug in its last loop, " +"appending n+1 characters when actually measuring width of n characters. " +"It led to a bug in fill_texbox when you tried to write a single word " +"mostly composed of \"wide\" letters (M,m, W, w...), causing the written " +"text to exceed the given rect." +msgstr "" + +#: ../../../changes.txt:932 764335f5358e4e81b74fe45a3458952b +msgid "The fix was just to replace n+1 by n." +msgstr "" + +#: ../../../changes.txt:934 eb1125385bca41c79ecb0f555ed03f3a +msgid "Add `script_focus` and `script_blur` options to widget." +msgstr "" + +#: ../../../changes.txt:938 20a01705be1c42399f0d8106509b1705 +msgid "**Changes in version 1.22.5 (2023-06-21)**" +msgstr "" + +#: ../../../changes.txt:940 193f20603e9040de955e1b4717906ac6 +msgid "This release uses ``MuPDF-1.22.2``." +msgstr "" + +#: ../../../changes.txt:944 7e9cc37cde864f90bdb0957f1ffbc8c1 +msgid "" +"**Fixed** `#2365 `_: " +"Incorrect dictionary values for type \"fs\" drawings." +msgstr "" + +#: ../../../changes.txt:945 64b07853adc043ddb24d4d665d7a3124 +msgid "" +"**Fixed** `#2391 `_: " +"Check box automatically uncheck when we update same checkbox more than 1 " +"times." +msgstr "" + +#: ../../../changes.txt:946 3c2b547afceb432281c28b59289df4ef +msgid "" +"**Fixed** `#2400 `_: Gaps" +" within text of same line not filled with spaces." +msgstr "" + +#: ../../../changes.txt:947 91b42babacf04225a8286e2635ccc155 +msgid "" +"**Fixed** `#2404 `_: " +"Blacklining an image in PDF won't remove underlying content in version " +"1.22.X." +msgstr "" + +#: ../../../changes.txt:948 3fb91e3fac9d4e57ab84eccf1bd1be1a +msgid "" +"**Fixed** `#2430 `_: " +"Incorrectly reducing ref count of Py_None." +msgstr "" + +#: ../../../changes.txt:949 4c1e45c820644d6ea47b6b7544c1b328 +msgid "" +"**Fixed** `#2450 `_: " +"Empty fill color and fill opacity for paths with fill and stroke " +"operations with 1.22.*" +msgstr "" + +#: ../../../changes.txt:950 dc4486cae00b4a8b90e9623f727d6f73 +msgid "" +"**Fixed** `#2462 `_: " +"Error at \"get_drawing(extended=True )\"" +msgstr "" + +#: ../../../changes.txt:951 4bdf307fb18444098aa46b847a9279ba +msgid "" +"**Fixed** `#2468 `_: " +"Decode error when trying to get drawings" +msgstr "" + +#: ../../../changes.txt:952 dcc459e62006443db89ff16b42b7ecf4 +msgid "" +"**Fixed** `#2710 `_: " +"page.rect and text location wrong / differing from older version" +msgstr "" + +#: ../../../changes.txt:953 16bb9fb02df547c988a00bcf9aed6288 +msgid "" +"**Fixed** `#2723 `_: When" +" will a Python 3.12 wheel be available?" +msgstr "" + +#: ../../../changes.txt:955 53a084e9066d494e9613025a7844f581 +msgid "New features:" +msgstr "" + +#: ../../../changes.txt:957 32b82f498bb742b298afc01d469a5a50 +msgid "" +"**Changed** Annotations now support \"cloudy\" borders. The " +":attr:`Annot.border` property has the new item `clouds`, and method " +":meth:`Annot.set_border` supports the corresponding `clouds` argument." +msgstr "" + +#: ../../../changes.txt:961 fa5b909ad5c049969b5a70426120219c +msgid "" +"**Changed** Radio button widgets in the same RB group are now " +"consistently updated **if the group is defined in the standard way**." +msgstr "" + +#: ../../../changes.txt:964 3f818b95573d4e6190d9c199f13a4f8b +msgid "" +"**Added** Support for the `/Locked` key in PDF Optional Content. This " +"array inside the catalog entry `/OCProperties` can now be extracted and " +"set." +msgstr "" + +#: ../../../changes.txt:967 0a5bcfca4db34c77b9ba6e3bbdc32115 +msgid "" +"**Added** Support for new parameter `tessdata` in OCR functions. New " +"function :meth:`get_tessdata` locates the language support folder if " +"Tesseract is installed." +msgstr "" + +#: ../../../changes.txt:972 933f018b78ff48648387c805f4e9cede +msgid "**Changes in version 1.22.3 (2023-05-10)**" +msgstr "" + +#: ../../../changes.txt:974 ../../../changes.txt:983 ../../../changes.txt:992 +#: ../../../changes.txt:1004 47f8eb670d284d0f897fffcaa6fa3f26 +#: e0e436a7050742fbae1c83ee0a207bd0 f7c6470cc74041dfa05783d4a6a6ebb0 +#: feaa863c22d14668874f211c2aab1d40 +msgid "This release uses ``MuPDF-1.22.0``." +msgstr "" + +#: ../../../changes.txt:978 48cab086148b4141ad2788f665ab8237 +msgid "" +"**Fixed** `#2333 `_: " +"Unable to set any of button radio group in form" +msgstr "" + +#: ../../../changes.txt:981 1aa3a80a68144d44ac8642832bae3cdc +msgid "**Changes in version 1.22.2 (2023-04-26)**" +msgstr "" + +#: ../../../changes.txt:987 226410ff43184f65ae3dcc7c8a25b6a2 +msgid "" +"**Fixed** `#2369 `_: " +"Image extraction bugs with newer versions" +msgstr "" + +#: ../../../changes.txt:990 9c04bba73b584a398f6db314660efc4e +msgid "**Changes in version 1.22.1 (2023-04-18)**" +msgstr "" + +#: ../../../changes.txt:996 56f34ed481bc41e2ae69f3e1b1bbac12 +msgid "" +"**Fixed** `#2345 `_: Turn" +" off print statements in utils.py" +msgstr "" + +#: ../../../changes.txt:997 f2c46fa9947e40ffbfbba836e8fd11b1 +msgid "" +"**Fixed** `#2348 `_: " +"extract_image returns an extension \"flate\" instead of \"png\"" +msgstr "" + +#: ../../../changes.txt:998 a88b429f0f3d4e0f8dcbf33220f970df +msgid "" +"**Fixed** `#2350 `_: Can " +"not make widget (checkbox) to read-only by adding flags " +"PDF_FIELD_IS_READ_ONLY" +msgstr "" + +#: ../../../changes.txt:999 8f94c1295de84f11b648b15bfb96fbe5 +msgid "" +"**Fixed** `#2355 `_: " +"1.22.0 error when using get_toc (AttributeError: 'SwigPyObject' object " +"has no attribute)" +msgstr "" + +#: ../../../changes.txt:1002 b10b8e0894ad4f368cf491f22172a8a2 +msgid "**Changes in version 1.22.0 (2023-04-14)**" +msgstr "" + +#: ../../../changes.txt:1006 896ed49cac1e411cb81f50e2a020811d +msgid "Behavioural changes:" +msgstr "" + +#: ../../../changes.txt:1008 95e8b0201bea41f0a189956256db83e0 +msgid "" +"Text extraction now includes glyphs that overlap with clip rect; " +"previously they were included only if they were entirely contained within" +" the clip rect." +msgstr "" + +#: ../../../changes.txt:1014 2ecfd19348bd4ee984a6181e55629f16 +msgid "" +"**Fixed** `#1763 `_: " +"Interactive(smartform) form PDF calculation not working in pymupdf" +msgstr "" + +#: ../../../changes.txt:1015 095a18db3c5148f78b2f31fb2fef9fdf +msgid "" +"**Fixed** `#1995 `_: " +"RuntimeError: image is too high for a long paged pdf file when trying" +msgstr "" + +#: ../../../changes.txt:1016 168a22f471304f4ca1f262c61cb9d5dd +msgid "" +"**Fixed** `#2093 `_: " +"Image in pdf changes color after applying redactions" +msgstr "" + +#: ../../../changes.txt:1017 5732f0a0f91d47ae81d7ce36c7876e8c +msgid "" +"**Fixed** `#2108 `_: " +"Redaction removing more text than expected" +msgstr "" + +#: ../../../changes.txt:1018 d63c4ac7a6e845439544633a034a208f +msgid "" +"**Fixed** `#2141 `_: " +"Failed to read JPX header when trying to get blocks" +msgstr "" + +#: ../../../changes.txt:1019 c9865d0bec8e467184282fb00f777216 +msgid "" +"**Fixed** `#2144 `_: " +"Replace image throws an error" +msgstr "" + +#: ../../../changes.txt:1020 a7d71f6a989d4211a200f1787e958955 +msgid "" +"**Fixed** `#2146 `_: " +"Wrong Handling of Reference Count of \"None\" Object" +msgstr "" + +#: ../../../changes.txt:1021 8c0ae982437c44298f874d491106d1a9 +msgid "" +"**Fixed** `#2161 `_: " +"Support adding images as pages directly" +msgstr "" + +#: ../../../changes.txt:1022 0a237d7a362748edb23cf7a968f3173e +msgid "" +"**Fixed** `#2168 `_: " +"``page.add_highlight_annot(start=pointa, stop=pointb)`` not working" +msgstr "" + +#: ../../../changes.txt:1023 2e16353bde914eacb5bb7494a02dabb7 +msgid "" +"**Fixed** `#2173 `_: " +"Double free of ``Colorspace`` used in ``Pixmap``" +msgstr "" + +#: ../../../changes.txt:1024 2979dd9f661d4da48dcc6f855d06eddc +msgid "" +"**Fixed** `#2179 `_: " +"Incorrect documentation for ``pixmap.tint_with()``" +msgstr "" + +#: ../../../changes.txt:1025 8548ef0b665b49649bfdca09818cf198 +msgid "" +"**Fixed** `#2208 `_: " +"Pushbutton widget appears as check box" +msgstr "" + +#: ../../../changes.txt:1026 60c3f9c4943540e99bcb62b739d017f5 +msgid "" +"**Fixed** `#2210 `_: " +"``apply_redactions()`` move pdf text to right after redaction" +msgstr "" + +#: ../../../changes.txt:1027 1ccb0299fd46433c8897acb4e310a3f0 +msgid "" +"**Fixed** `#2220 `_: " +"``Page.delete_image()`` | object has no attribute ``is_image``" +msgstr "" + +#: ../../../changes.txt:1028 7c746833cff44f158ec32c0d8e24cc68 +msgid "" +"**Fixed** `#2228 `_: open" +" some pdf cost too much time" +msgstr "" + +#: ../../../changes.txt:1029 e314c7c572ab498d88cfb5c35f637a88 +msgid "" +"**Fixed** `#2238 `_: Bug " +"- can not extract data from file in the newest version 1.21.1" +msgstr "" + +#: ../../../changes.txt:1030 7d5f3aefde2a4441834be423e385bc59 +msgid "" +"**Fixed** `#2242 `_: " +"Python quits silently in ``Story.element_positions()`` if callback " +"function prototype is wrong" +msgstr "" + +#: ../../../changes.txt:1031 6ec48556e40b4a2eb184fd2525e0968e +msgid "" +"**Fixed** `#2246 `_: " +"TextWriter write text in a wrong position" +msgstr "" + +#: ../../../changes.txt:1032 e145aed1d0634d7699634b8f141b822e +msgid "" +"**Fixed** `#2248 `_: " +"After redacting the content, the position of the remaining text changes" +msgstr "" + +#: ../../../changes.txt:1033 a45ac9c0ba474998814b73896e4fa19d +msgid "" +"**Fixed** `#2250 `_: " +"docs: unclear or broken link in page.rst" +msgstr "" + +#: ../../../changes.txt:1034 a6c30879fa014c239d56bf123d818726 +msgid "" +"**Fixed** `#2251 `_: " +"mupdf_display_errors does not apply to Pixmap when loading broken image" +msgstr "" + +#: ../../../changes.txt:1035 5b04df2094894cbda17293d2e807d8b9 +msgid "" +"**Fixed** `#2270 `_: " +"``Annot.get_text(\"words\")`` - doesn't return the first line of words" +msgstr "" + +#: ../../../changes.txt:1036 609feab2717d48d98ce2ed0e97e78d39 +msgid "" +"**Fixed** `#2275 `_: " +"insert_image: document that rotations are counterclockwise" +msgstr "" + +#: ../../../changes.txt:1037 eceefd69dd3449afb585f9713df1350e +msgid "" +"**Fixed** `#2278 `_: Can " +"not make widget (checkbox) to read-only by adding flags " +"PDF_FIELD_IS_READ_ONLY" +msgstr "" + +#: ../../../changes.txt:1038 c1d3a5a2d9664b79b0d81b6ffb1101ae +msgid "" +"**Fixed** `#2290 `_: " +"Different image format/data from Page.get_text(\"dict\") and " +"Fitz.get_page_images()" +msgstr "" + +#: ../../../changes.txt:1039 9ecd8df81b264fc28602487aaf2813e7 +msgid "" +"**Fixed** `#2293 `_: 68 " +"failed tests when installing from sdist on my box" +msgstr "" + +#: ../../../changes.txt:1040 eb19df62e407402092a18e832ec928b0 +msgid "" +"**Fixed** `#2300 `_: Too " +"much recursion in tree (parents), makes program terminate" +msgstr "" + +#: ../../../changes.txt:1041 5a6f910be4d8460db6a88e9f05d097f3 +msgid "" +"**Fixed** `#2322 `_: " +"add_highlight_annot using clip generates \"A Number is Out of Range\" " +"error in PDF" +msgstr "" + +#: ../../../changes.txt:1045 25b26a8da6e940ef8a27fb7ead0df71e +msgid "" +"Add key \"/AS (Yes)\" to the underlying annot object of a selected button" +" form field." +msgstr "" + +#: ../../../changes.txt:1047 79c32104157247d4b42e3cd1602104c1 +msgid "" +"Remove unused ``Document`` methods ``has_xref_streams()`` and " +"``has_old_style_xrefs()`` as MuPDF equivalents have been removed." +msgstr "" + +#: ../../../changes.txt:1050 06b7f9e669a04233adf6d1bd3d251dd2 +msgid "" +"Add new ``Document`` methods and properties for getting/setting " +"``/PageMode``, ``/PageLayout`` and ``/MarkInfo``." +msgstr "" + +#: ../../../changes.txt:1053 f4f2db0fb7ac482199ecfdf0500f5f22 +msgid "" +"New ``Document`` property ``version_count``, which contains the number of" +" incremental saves plus one." +msgstr "" + +#: ../../../changes.txt:1056 2e66f78d2d3d40c881d1387ef7496384 +msgid "" +"New ``Document`` property ``is_fast_webaccess`` which tells whether the " +"document is linearized." +msgstr "" + +#: ../../../changes.txt:1059 5ed7d10cb9a14b89bb46bf2a737b661d +msgid "``DocumentWriter`` is now a context manager." +msgstr "" + +#: ../../../changes.txt:1061 1bbd4a98cf0d4fdba0f411293355ef64 +msgid "Add support for ``Pixmap`` JPEG output." +msgstr "" + +#: ../../../changes.txt:1063 2b04688fc9ac46be956d7ca935e8aa13 +msgid "Add support for drawing rectangles with rounded corners." +msgstr "" + +#: ../../../changes.txt:1065 ae71d3831f1846e38738b6d910b0f82c +msgid "``get_drawings()``: added optional ``extended`` arg." +msgstr "" + +#: ../../../changes.txt:1067 1ab82030b8b64bdb91b1608a21f3b7a5 +msgid "" +"Fixed issue where trace devices' state was not being initialised " +"correctly; data returned from things like ``fitz.Page.get_texttrace()`` " +"might be slightly altered, e.g. ``linewidth`` values." +msgstr "" + +#: ../../../changes.txt:1071 7481aee3d31341bf911416720ed98529 +msgid "" +"Output warning to ``stderr`` if it looks like we are being used with " +"current directory containing an invalid ``fitz/`` directory, because this" +" can break import of ``fitz`` module. For example this happens if one " +"attempts to use ``fitz`` when current directory is a PyMuPDF checkout." +msgstr "" + +#: ../../../changes.txt:1077 ../../../changes.txt:1132 +#: 0be1fe7d711841d4a369e1ac2b6c83c9 eea908b777d04a46ab5aa846fa8f00ff +msgid "Documentation:" +msgstr "" + +#: ../../../changes.txt:1079 452c73ebeb194e6c962d4c1df98f2f70 +msgid "General rework:" +msgstr "" + +#: ../../../changes.txt:1081 13d8c841d1844c7da2c554e3c208ebd2 +msgid "Introduces a new home page and new table of contents." +msgstr "" + +#: ../../../changes.txt:1082 0d60d070cdfc4295a6c1c0845dab5a3d +msgid "Structural update to include new About section." +msgstr "" + +#: ../../../changes.txt:1083 c032ea66644e40a6a10d7c3fa8856e51 +msgid "Comparison & performance graphing." +msgstr "" + +#: ../../../changes.txt:1084 e72ddf771e77494d898ef5b190d8fe76 +msgid "Includes performance methodology in appendix." +msgstr "" + +#: ../../../changes.txt:1085 f8b3ceaa7fc44a689a2a952c95c67f90 +msgid "Updates conf.py to understand single back-ticks as code." +msgstr "" + +#: ../../../changes.txt:1086 dcb6db957d9d41ddb2726c56e5117eb8 +msgid "Converts double back-ticks to single back-ticks." +msgstr "" + +#: ../../../changes.txt:1087 3293ed48cc574bbe98c7df0b89db86f5 +msgid "Removes redundant files." +msgstr "" + +#: ../../../changes.txt:1089 2f5b6b36f3164a5bae8c0ac2abfdb9e3 +msgid "Improve ``insert_file()`` documentation." +msgstr "" + +#: ../../../changes.txt:1091 eba1f9b7aa904bf6b82517d001a802de +msgid "``get_bboxlog()``: aded optional ``layers`` to ``get_bboxlog()``." +msgstr "" + +#: ../../../changes.txt:1092 40daadc994364c7691c67846ed6fa351 +msgid "" +"``Page.get_texttrace()``: add new dictionary key ``layer``, name of " +"Optional Content Group." +msgstr "" + +#: ../../../changes.txt:1094 ab98feb4821143e4b6334043a3146ce3 +msgid "Mention use of Python venv in installation documentation." +msgstr "" + +#: ../../../changes.txt:1096 2157b86cff42409da5a33e06841a182b +msgid "Added missing fix for #2057 to release 1.21.1's changelog." +msgstr "" + +#: ../../../changes.txt:1098 328289616f72435ea94320994d961626 +msgid "Fixes many links to the PyMuPDF-Utilities repo scripts." +msgstr "" + +#: ../../../changes.txt:1100 b54993ea53594ae8a73d4929a8375261 +msgid "Avoid duplication of ``changes.txt`` and ``docs/changes.rst``." +msgstr "" + +#: ../../../changes.txt:1102 025cdc7a7578464e8ddd104564350a1a +msgid "Build" +msgstr "" + +#: ../../../changes.txt:1104 69e89fa300bc48a4ab03d3d0897226ef +msgid "Added ``pyproject.toml`` file to improve builds using pip etc." +msgstr "" + +#: ../../../changes.txt:1108 c535451da83d4789a10c9395586415a3 +msgid "**Changes in Version 1.21.1 (2022-12-13)**" +msgstr "" + +#: ../../../changes.txt:1110 3bb1eb2546e24b8ebdb0755a2b66b583 +msgid "This release uses ``MuPDF-1.21.1``." +msgstr "" + +#: ../../../changes.txt:1114 2de1b896301e4aaeaada183ee47c6844 +msgid "" +"**Fixed** `#2110 `_: " +"Fully embedded font is extracted only partially if it occupies more than " +"one object" +msgstr "" + +#: ../../../changes.txt:1115 23fd55546ad64412a49f806e2a08fbb2 +msgid "" +"**Fixed** `#2094 `_: " +"Rectangle Detection Logic" +msgstr "" + +#: ../../../changes.txt:1116 378ac79f32aa46c0b050cafa349db820 +msgid "" +"**Fixed** `#2088 `_: " +"Destination point not set for named links in toc" +msgstr "" + +#: ../../../changes.txt:1117 82d872180d97428684a13afba4189125 +msgid "" +"**Fixed** `#2087 `_: " +"Image with Filter \"[/FlateDecode/JPXDecode]\" not extracted" +msgstr "" + +#: ../../../changes.txt:1118 6b69f386a8b347a8878fc0131b68581e +msgid "" +"**Fixed** `#2086 `_: " +"Document.save() owner_pw & user_pw has buffer overflow bug" +msgstr "" + +#: ../../../changes.txt:1119 b1af5bd35578441b8a526d612d9bbeb5 +msgid "" +"**Fixed** `#2076 `_: " +"Segfault in fitz.py" +msgstr "" + +#: ../../../changes.txt:1120 10bfbc8ba7704375aaf02c8e14fd1610 +msgid "" +"**Fixed** `#2057 `_: " +"Document.save garbage parameter not working in PyMuPDF 1.21.0" +msgstr "" + +#: ../../../changes.txt:1121 b9d1bef0500e459691ee5732ee6d9420 +msgid "" +"**Fixed** `#2051 `_: " +"Missing DPI Parameter" +msgstr "" + +#: ../../../changes.txt:1122 d332cb77d11a44c8b90dbee144a49144 +msgid "" +"**Fixed** `#2048 `_: " +"Invalid size of TextPage and bbox with newest version 1.21.0" +msgstr "" + +#: ../../../changes.txt:1123 e433f49a3f9b4f5d840653e8aaf19af9 +msgid "" +"**Fixed** `#2045 `_: " +"SystemError: returned a result " +"with an error set" +msgstr "" + +#: ../../../changes.txt:1124 d1a9d0c2081e4b8b9c9e6a5796599755 +msgid "" +"**Fixed** `#2039 `_: " +"1.21.0 fails to build against system libmupdf" +msgstr "" + +#: ../../../changes.txt:1125 d2dda8f2369e4023bbb5302808b70f9a +msgid "" +"**Fixed** `#2036 `_: " +"Archive::Archive defined twice" +msgstr "" + +#: ../../../changes.txt:1127 d6d20cd7e1d34deb8c8357d4092e06e4 +msgid "Other" +msgstr "" + +#: ../../../changes.txt:1129 93ef67701ea94d2a8376025a12f3ac30 +msgid "Swallow \"&zoom=nan\" in link uri strings." +msgstr "" + +#: ../../../changes.txt:1130 b64a068650c64de0bd772999dc57c0f5 +msgid "" +"Add new Page utility methods ``Page.replace_image()`` and " +"``Page.delete_image()``." +msgstr "" + +#: ../../../changes.txt:1134 86ab6fec847d4f3885ca39139982df66 +msgid "" +"`#2040 `_: Added note " +"about test failure with non-default build of MuPDF, to " +"``tests/README.md``." +msgstr "" + +#: ../../../changes.txt:1135 274776123d27441caef4a47e77ea77af +msgid "" +"`#2037 `_: In " +"``docs/installation.rst``, mention incompatibility with chocolatey.org on" +" Windows." +msgstr "" + +#: ../../../changes.txt:1136 61e3ac1dd61e41c6bf577d4e833d25c9 +msgid "" +"`#2061 `_: Fixed " +"description of ``Annot.file_info``." +msgstr "" + +#: ../../../changes.txt:1137 f6b19e96a9344289939778f8c247ec96 +msgid "" +"`#2065 `_: Show how to " +"insert internal PDF link." +msgstr "" + +#: ../../../changes.txt:1138 a7a4ccc8f7524e9bba4b9bafd823d398 +msgid "Improved description of building from source without an sdist." +msgstr "" + +#: ../../../changes.txt:1139 57bd571205e64efab1acad657410273e +msgid "Added information about running tests." +msgstr "" + +#: ../../../changes.txt:1140 a9aa76fa4fe347e1be104390514767f2 +msgid "" +"`#2084 `_: Fixed broken " +"link to PyMuPDF-Utilities." +msgstr "" + +#: ../../../changes.txt:1143 8d142bd0cc904bac8edb9de68c0e66a4 +msgid "**Changes in Version 1.21.0 (2022-11-8)**" +msgstr "" + +#: ../../../changes.txt:1145 f37ee6ea62e14d89830e50c7626b1239 +msgid "This release uses ``MuPDF-1.21.0``." +msgstr "" + +#: ../../../changes.txt:1147 1a48a57b38ff4e07833e9c3d37727414 +msgid "New feature: Stories." +msgstr "" + +#: ../../../changes.txt:1149 059f8b39221247aa90432bd125a57346 +msgid "Added wheels for Python-3.11." +msgstr "" + +#: ../../../changes.txt:1153 8e13c13d7d1241e19d4a9b9643730a5e +msgid "" +"**Fixed** `#1701 `_: " +"Broken custom image insertion." +msgstr "" + +#: ../../../changes.txt:1154 7bf2231b1e0b4b94915b5b1b3086c11d +msgid "" +"**Fixed** `#1854 `_: " +"`Document.delete_pages()` declines keyword arguments." +msgstr "" + +#: ../../../changes.txt:1155 92372dd80a064daf9c690376a45e1943 +msgid "" +"**Fixed** `#1868 `_: " +"Access Violation Error at `page.apply_redactions()`." +msgstr "" + +#: ../../../changes.txt:1156 4b7551daad58493d8e1b5f9d46ab1069 +msgid "" +"**Fixed** `#1909 `_: " +"Adding text with `fontname=\"Helvetica\"` can silently fail." +msgstr "" + +#: ../../../changes.txt:1157 ba5e861150f9424cb820efff9aa97705 +msgid "" +"**Fixed** `#1913 `_: " +"`draw_rect()`: does not respect width if color is not specified." +msgstr "" + +#: ../../../changes.txt:1158 bcd64b9baa2348c0ac23a55fe605e452 +msgid "" +"**Fixed** `#1917 `_: " +"`subset_fonts()`: make it possible to silence the stdout." +msgstr "" + +#: ../../../changes.txt:1159 c8b98883b12047009210f07eb6d801b5 +msgid "" +"**Fixed** `#1936 `_: " +"Rectangle detection can be incorrect producing wrong output." +msgstr "" + +#: ../../../changes.txt:1160 1c0dfc851c0645b181fd9f2c7cbd0dd4 +msgid "" +"**Fixed** `#1945 `_: " +"Segmentation fault when saving with `clean=True`." +msgstr "" + +#: ../../../changes.txt:1161 36b3990eb7c94e17be470c88003e2e42 +msgid "" +"**Fixed** `#1965 `_: " +"`pdfocr_save()` Hard Crash." +msgstr "" + +#: ../../../changes.txt:1162 961174ec01f64b6a914abafe58f6f837 +msgid "" +"**Fixed** `#1971 `_: " +"Segmentation fault when using `get_drawings()`." +msgstr "" + +#: ../../../changes.txt:1163 678eccbe3c27419db3766841da6c74ac +msgid "" +"**Fixed** `#1946 `_: " +"`block_no` and `block_type` switched in `get_text()` docs." +msgstr "" + +#: ../../../changes.txt:1164 29ce69170d32413999e2bf61a8b660ef +msgid "" +"**Fixed** `#2013 `_: " +"AttributeError: 'Widget' object has no attribute '_annot' in delete " +"widget." +msgstr "" + +#: ../../../changes.txt:1166 9beebd00fd6d448d92d7504ef1892255 +msgid "Misc changes to core code:" +msgstr "" + +#: ../../../changes.txt:1168 660c2e826f474dbeb44ffe35874bcf2f +msgid "Fixed various compiler warnings and a sequence-point bug." +msgstr "" + +#: ../../../changes.txt:1169 22ed24bd99ae45c0b842d4394d452c38 +msgid "Added support for Memento builds." +msgstr "" + +#: ../../../changes.txt:1170 bfa15d60a210494d843d5cd2c2485e71 +msgid "Fixed leaks detected by Memento in test suite." +msgstr "" + +#: ../../../changes.txt:1171 57de10ad916844bb8adfd90224b3124e +msgid "Fixed handling of exceptions in set_name() and set_rect()." +msgstr "" + +#: ../../../changes.txt:1172 9ad7d680bbe5430f919a4c10bce408ef +msgid "Allow build with latest MuPDF, for regular testing of PyMuPDF master." +msgstr "" + +#: ../../../changes.txt:1173 4807f7a4bb5c4082928d24debb43ce8a +msgid "Cope with new MuPDF exceptions when setting rect for some Annot types." +msgstr "" + +#: ../../../changes.txt:1174 8175de75d6ce47f6a4397884ab6160d1 +msgid "" +"Reduced cosmetic differences between MuPDF's config.h and PyMuPDF's " +"_config.h." +msgstr "" + +#: ../../../changes.txt:1175 7a38e1cbd2f844d793f9dd5108fa280b +msgid "Cope with various changes to MuPDF API." +msgstr "" + +#: ../../../changes.txt:1179 5889cb3bfb2041eba8ea12c66c00014a +msgid "Fixed various broken links and typos in docs." +msgstr "" + +#: ../../../changes.txt:1180 8af2330e810445429d05d9bf2d52eb6d +msgid "Mention install of `swig-python` on MacOS for #875." +msgstr "" + +#: ../../../changes.txt:1181 7da5ddc7306147c8aa4af9ebdf84a50b +msgid "Added (untested) wheels for macos-arm64." +msgstr "" + +#: ../../../changes.txt:1186 ad22f625f2b646c8b529aaea34da4283 +msgid "**Changes in Version 1.20.2**" +msgstr "" + +#: ../../../changes.txt:1188 e34bcddf9c354eb0aea4463a83c58dd7 +msgid "This release uses ``MuPDF-1.20.3``." +msgstr "" + +#: ../../../changes.txt:1190 5811775a9ea9401aa90110ea63e89256 +msgid "" +"**Fixed** `#1787 `_. Fix " +"linking issues on Unix systems." +msgstr "" + +#: ../../../changes.txt:1193 25bb4fb1d4304f16b1cb07650cf2f12a +msgid "" +"**Fixed** `#1824 `_. " +"SegFault when applying redactions overlapping a transparent image. (Fixed" +" in ``MuPDF-1.20.3``.)" +msgstr "" + +#: ../../../changes.txt:1197 32e6f47fce404142907e47a4532c576a +msgid "Improvements to documentation:" +msgstr "" + +#: ../../../changes.txt:1199 c482411a819a4f64a38c8e1a32d366cb +msgid "" +"Improved information about building from source in " +"``docs/installation.rst``." +msgstr "" + +#: ../../../changes.txt:1200 9413a77ed9ab4b4580992f121035d007 +msgid "Clarified memory allocation setting ``JM_MEMORY` in ``docs/tools.rst``." +msgstr "" + +#: ../../../changes.txt:1201 ee4f3e379022483da0cb66bb5e461b51 +msgid "Fixed link to PDF Reference manual in ``docs/app3.rst``." +msgstr "" + +#: ../../../changes.txt:1202 41b1826514504b27a4559aed441e6155 +msgid "Fixed building of html documentation on OpenBSD." +msgstr "" + +#: ../../../changes.txt:1203 d7f99fbee3fd464cae429db22bf01e83 +msgid "Moved old ``docs/faq.rst`` into separate ``docs/recipes-*`` files." +msgstr "" + +#: ../../../changes.txt:1205 e0fdffe7fc984f0897ee6d05531f746a +msgid "Removed some unused files and directories:" +msgstr "" + +#: ../../../changes.txt:1207 c0961b70583f4add905375509140ffd7 +msgid "``installation/``" +msgstr "" + +#: ../../../changes.txt:1208 f3f4a94889fb40bfb59e643d7f29d940 +msgid "``docs/wheelnames.txt``" +msgstr "" + +#: ../../../changes.txt:1211 177ae9c1f4b74d65b1dd8f7057c9b2bb +msgid "**Changes in Version 1.20.1**" +msgstr "" + +#: ../../../changes.txt:1213 f9bc2c30db6b4913ad8f80c2a5ac86ae +msgid "" +"**Fixed** `#1724 `_. Fix " +"for building on FreeBSD." +msgstr "" + +#: ../../../changes.txt:1216 4ddf7f55a449424aa337ccadb113e6c6 +msgid "" +"**Fixed** `#1771 `_. " +"`linkDest()` had a broken call to `re.match()`, introduced in 1.20.0." +msgstr "" + +#: ../../../changes.txt:1219 966641d494d94cafaf728c1bce9d0026 +msgid "" +"**Fixed** `#1751 `_. " +"`get_drawings()` and `get_cdrawings()` previously always returned with " +"`closePath=False`." +msgstr "" + +#: ../../../changes.txt:1222 5c05ef22f2934aae9e39fa33add4d68e +msgid "" +"**Fixed** `#1645 `_. " +"Default FreeText annotation text color is now black." +msgstr "" + +#: ../../../changes.txt:1225 7a2c98d6944648a1bc2a9e27ca21546f +msgid "Improvements to sphinx-generated documentation:" +msgstr "" + +#: ../../../changes.txt:1227 9b98b0cea0854e0fa69c679ce8aadc8e +msgid "Use readthedocs theme with enhancements." +msgstr "" + +#: ../../../changes.txt:1228 a269f7c8267f4e4ba0cf4a2bc6c5db89 +msgid "Renamed the `.txt` files to have `.rst` suffixes." +msgstr "" + +#: ../../../changes.txt:1232 de37c3a471ae4c21aafa2b5025f1d938 +msgid "**Changes in Version 1.20.0**" +msgstr "" + +#: ../../../changes.txt:1234 b39c87edf89940c38ab2b6884be47ec7 +msgid "This release uses ``MuPDF-1.20.0``, released 2022-06-15." +msgstr "" + +#: ../../../changes.txt:1236 47061f9f01e64ccd9c355cef3a8d6a17 +msgid "" +"Cope with new MuPDF link uri format, changed from ``#,,`` " +"to ``#page=&zoom=,,``." +msgstr "" + +#: ../../../changes.txt:1238 abf0f98331b44a8e99e0fdaa923cefb5 +msgid "" +"In ``tests/test_insertpdf.py``, use new reference output " +"``joined-1.20.pdf``. We also check that new output values are " +"approximately the same as the old ones." +msgstr "" + +#: ../../../changes.txt:1240 10f94b49fe2b4892bbbaac23ec3df790 +msgid "" +"**Fixed** `#1738 `_. Leak" +" of `pdf_graft_map`. Also fixed a SEGV issue that this seemed to expose, " +"caused by incorrect freeing of underlying fz_document." +msgstr "" + +#: ../../../changes.txt:1243 4ee9d86d05624363b800826c8b6e83e9 +msgid "" +"**Fixed** `#1733 `_. " +"Fixed ownership of `Annotation.get_pixmap()`." +msgstr "" + +#: ../../../changes.txt:1245 288f9aa21a664bf0b90656b16d3e9c9d +msgid "Changes to build/release process:" +msgstr "" + +#: ../../../changes.txt:1247 c05acacb3fba4cb7aae4533893d59eb4 +msgid "" +"If pip builds from source because an appropriate wheel is not available, " +"we no longer require MuPDF to be pre-installed. Instead the required " +"MuPDF source is embedded in the sdist and automatically built into " +"PyMuPDF." +msgstr "" + +#: ../../../changes.txt:1249 30dd92a556cc4e0f813b0d0f0dd314c5 +msgid "" +"Various changes to ``setup.py`` to download the required MuPDF release as" +" required. See comments at start of setup.py for details." +msgstr "" + +#: ../../../changes.txt:1251 96ce8c226fe743068f1e6d17cc4204ce +msgid "" +"Added ``.github/workflows/build_wheels.yml`` to control building of " +"wheels on Github." +msgstr "" + +#: ../../../changes.txt:1255 30986c29af244f149746c662c31e41c5 +msgid "**Changes in Version 1.19.6**" +msgstr "" + +#: ../../../changes.txt:1257 306105c0937d4983b83bbb152ae03bae +msgid "" +"**Fixed** `#1620 `_. The " +":ref:`TextPage` created by :meth:`Page.get_textpage` will now be freed " +"correctly (removed memory leak)." +msgstr "" + +#: ../../../changes.txt:1258 ae60a9cbae3c4e31ad54c520b0382cd5 +msgid "" +"**Fixed** `#1601 `_. " +"Document open errors should now be more concise and easier to interpret. " +"In the course of this, two PyMuPDF-specific Python exceptions have been " +"**added:**" +msgstr "" + +#: ../../../changes.txt:1260 ae778068f5db415e9647c63b20554f56 +msgid "" +"``EmptyFileError`` -- raised when trying to create a :ref:`Document` " +"(``fitz.open()``) from an empty file or zero-length memory." +msgstr "" + +#: ../../../changes.txt:1261 471c7d3b9cba41a1826f7ebe1bd878c9 +msgid "" +"``FileDataError`` -- raised when MuPDF encounters irrecoverable document " +"structure issues." +msgstr "" + +#: ../../../changes.txt:1263 e755c60e279f4827a8a2892e3d711566 +msgid "**Added** :meth:`Page.load_widget` given a PDF field's xref." +msgstr "" + +#: ../../../changes.txt:1265 5d0fefd2151543a48aa13be030ffda46 +msgid "" +"**Added** Dictionary :attr:`pdfcolor` which provide the about 500 colors " +"defined as PDF color values with the lower case color name as key." +msgstr "" + +#: ../../../changes.txt:1267 dc94518632fb45e3841694955709f0a1 +msgid "" +"**Added** algebra functionality to the :ref:`Quad` class. These objects " +"can now also be added and subtracted among themselves, and be multiplied " +"by numbers and matrices." +msgstr "" + +#: ../../../changes.txt:1269 27048c74287b4cef9fcd622695ad0ef3 +msgid "" +"**Added** new constants defining the default text extraction flags for " +"more comfortable handling. Their naming convention is like " +":data:`TEXTFLAGS_WORDS` for ``page.get_text(\"words\")``. See " +":ref:`text_extraction_flags`." +msgstr "" + +#: ../../../changes.txt:1271 66296a89488f495e8ffdfdfdd543b607 +msgid "" +"**Changed** :meth:`Page.annots` and :meth:`Page.widgets` to detect and " +"prevent reloading the page (illegally) inside the iterator loops via " +":meth:`Document.reload_page`. Doing this brings down the interpretor. " +"Documented clean ways to do annotation and widget mass updates within " +"properly designed loops." +msgstr "" + +#: ../../../changes.txt:1273 e184245b4f08463481f8e8b18629efa4 +msgid "" +"**Changed** several internal utility functions to become standalone " +"(\"SWIG inline\") as opposed to be part of the :ref:`Tools` class. This, " +"among other things, increases the performance of geometry object " +"creation." +msgstr "" + +#: ../../../changes.txt:1275 44d43c571e9d4903ac43dbd52bc0f442 +msgid "" +"**Changed** :meth:`Document.update_stream` to always accept stream " +"updates - whether or not the dictionary object behind the xref already is" +" a stream. Thus the former ``new`` parameter is now ignored and will be " +"removed in v1.20.0." +msgstr "" + +#: ../../../changes.txt:1280 f399a35d5f64416db6d984f39d4b9b8f +msgid "**Changes in Version 1.19.5**" +msgstr "" + +#: ../../../changes.txt:1282 7b2a033386084aa185e40d79192ab2e7 +msgid "" +"**Fixed** `#1518 `_. A " +"limited \"fix\": in some cases, rectangles and quadrupels were not " +"correctly encoded to support re-drawing by :ref:`Shape`." +msgstr "" + +#: ../../../changes.txt:1284 3946afb9a5bd4971ba8541de589e4778 +msgid "" +"**Fixed** `#1521 `_. This" +" had the same ultimate reason behind issue #1510." +msgstr "" + +#: ../../../changes.txt:1286 7b37601956b6480c9895b8e94eb2bbf6 +msgid "" +"**Fixed** `#1513 `_. Some" +" Optional Content functions did not support non-ASCII characters." +msgstr "" + +#: ../../../changes.txt:1288 0e3e8d066bce4baa90014d9d2f806dc2 +msgid "" +"**Fixed** `#1510 `_. " +"Support more soft-mask image subtypes." +msgstr "" + +#: ../../../changes.txt:1290 dd3de42c8c174161bfb3fcb769b624d4 +msgid "" +"**Fixed** `#1507 `_. " +"Immunize against items in the outlines chain, that are ``\"null\"`` " +"objects." +msgstr "" + +#: ../../../changes.txt:1292 b80440005d8d4dd885a9b37949c88807 +msgid "" +"**Fixed** re-opened `#1417 " +"`_. (\"too many open " +"files\"). This was due to insufficient calls to MuPDF's " +"``fz_drop_document()``. This also fixes `#1550 " +"`_." +msgstr "" + +#: ../../../changes.txt:1294 3b434fe5008e4d5bad425da7101d03b9 +msgid "" +"**Fixed** several undocumented issues in relation to incorrectly setting " +"the text span origin :data:`point_like`." +msgstr "" + +#: ../../../changes.txt:1296 0a9ff9b403a9449492e10f79a72c8ae5 +msgid "" +"**Fixed** undocumented error computing the character bbox in method " +":meth:`Page.get_texttrace` when text is **flipped** (as opposed to just " +"rotated)." +msgstr "" + +#: ../../../changes.txt:1298 b824b84c7a61426e81fc587b8cf8e085 +msgid "" +"**Added** items to the dictionary returned by :meth:`image_properties`: " +"``orientation`` and ``transform`` report the natural image orientation " +"(EXIF data)." +msgstr "" + +#: ../../../changes.txt:1300 ed32558e215b4f78b2a54154401bfa98 +msgid "" +"**Added** method :meth:`Document.xref_copy`. It will make a given target " +"PDF object an exact copy of a source object." +msgstr "" + +#: ../../../changes.txt:1305 ea587b102a4a4fa3a33075d5d6525268 +msgid "**Changes in Version 1.19.4**" +msgstr "" + +#: ../../../changes.txt:1308 bd93ad74360f4c9a8d9287630bc2d3c0 +msgid "" +"**Fixed** `#1505 `_. " +"Immunize against circular outline items." +msgstr "" + +#: ../../../changes.txt:1310 dd0173bc16094414956cae7e260401e1 +msgid "" +"**Fixed** `#1484 `_. " +"Correct CropBox coordinates are now returned in all situations." +msgstr "" + +#: ../../../changes.txt:1312 daee6dd009684357a7beaea521f6e95f +msgid "**Fixed** `#1479 `_." +msgstr "" + +#: ../../../changes.txt:1314 4fb5317b94f941c4a4f55823b9924563 +msgid "" +"**Fixed** `#1474 `_. " +"TextPage objects are now properly deleted again." +msgstr "" + +#: ../../../changes.txt:1316 e500fe03552646f2bbc8bfa78a28c6a2 +msgid "" +"**Added** :ref:`Page` methods and attributes for PDF ``/ArtBox``, " +"``/BleedBox``, ``/TrimBox``." +msgstr "" + +#: ../../../changes.txt:1318 79f3c98bc47b4362b98d464ede595203 +msgid "" +"**Added** global attribute :attr:`TESSDATA_PREFIX` for easy checking of " +"OCR support." +msgstr "" + +#: ../../../changes.txt:1320 6272356c55e74c9ab3254960dcb0bdaa +msgid "" +"**Changed** :meth:`Document.xref_set_key` such that dictionary keys will " +"physically be removed if set to value ``\"null\"``." +msgstr "" + +#: ../../../changes.txt:1322 f5db8e277bc2420c9a0da6cc44f0e6f9 +msgid "" +"**Changed** :meth:`Document.extract_font` to optionally return a " +"dictionary (instead of a tuple)." +msgstr "" + +#: ../../../changes.txt:1326 9fedc5b850804e27ade28d84efdf8079 +msgid "**Changes in Version 1.19.3**" +msgstr "" + +#: ../../../changes.txt:1328 f969561dd09e46339dfc3197c1f8d95e +msgid "" +"This patch version implements minor improvements for :ref:`Pixmap` and " +"also some important fixes." +msgstr "" + +#: ../../../changes.txt:1330 f28152e17a914f85ae2341d992a91dd1 +msgid "" +"**Fixed** `#1351 `_." +" Reverted code that introduced the memory growth in v1.18.15." +msgstr "" + +#: ../../../changes.txt:1332 24112e49e2fd49f98349c220956a8633 +msgid "" +"**Fixed** `#1417 `_." +" Developped circumvention for growth of open file handles using " +":meth:`Document.insert_pdf`." +msgstr "" + +#: ../../../changes.txt:1334 3b4ffd5736fd4eeca74b163cac7d8f51 +msgid "" +"**Fixed** `#1418 `_." +" Developped circumvention for memory growth using " +":meth:`Document.insert_pdf`." +msgstr "" + +#: ../../../changes.txt:1336 2d79f3c27d7146659305df4383aee9b6 +msgid "" +"**Fixed** `#1430 `_." +" Developped circumvention for mass pixmap generations of document pages." +msgstr "" + +#: ../../../changes.txt:1338 a5c64ba9fd764d1d8494215e3c0b8858 +msgid "" +"**Fixed** `#1433 `_." +" Solves a bbox error for some Type 3 font in PyMuPDF text processing." +msgstr "" + +#: ../../../changes.txt:1340 13b39bcdc9194117a12843a43a34fc7d +msgid "" +"**Added** :meth:`Pixmap.color_topusage` to determine the share of the " +"most frequently used color. Solves `#1397 " +"`_." +msgstr "" + +#: ../../../changes.txt:1342 3d3512e6d4084104be69626f95680b4b +msgid "" +"**Added** :meth:`Pixmap.warp` which makes a new pixmap from a given " +"arbitrary convex quad inside the pixmap." +msgstr "" + +#: ../../../changes.txt:1344 cea44c06be0444c2862a47f8d83aed1a +msgid "" +"**Added** :attr:`Annot.irt_xref` and :meth:`Annot.set_irt_xref` to " +"inquire or set the `/IRT` (\"In Responde To\") property of an annotation." +" Implements `#1450 " +"`_." +msgstr "" + +#: ../../../changes.txt:1346 cd9c264f9b0c4ddeb0664c107019994c +msgid "" +"**Added** :meth:`Rect.torect` and :meth:`IRect.torect` which compute a " +"matrix that transforms to a given other rectangle." +msgstr "" + +#: ../../../changes.txt:1348 779a9e0fc4e446ad99773627e86d338f +msgid "" +"**Changed** :meth:`Pixmap.color_count` to also return the count of each " +"color." +msgstr "" + +#: ../../../changes.txt:1349 181969c281544876825ae17c78b2a920 +msgid "" +"**Changed** :meth:`Page.get_texttrace` to also return correct span and " +"character bboxes if ``span[\"dir\"] != (1, 0)``." +msgstr "" + +#: ../../../changes.txt:1353 9e17f8f3942f4fd4b9bb961d7fa1b1af +msgid "**Changes in Version 1.19.2**" +msgstr "" + +#: ../../../changes.txt:1355 ef70af06d0b9424cb9237cca9693fdb3 +msgid "" +"This patch version implements minor improvements for " +":meth:`Page.get_drawings` and also some important fixes." +msgstr "" + +#: ../../../changes.txt:1357 c78fc0d815fd4c3fb1d07013e135363d +msgid "" +"**Fixed** `#1388 `_." +" Fixed intermittent memory corruption when insert or updating " +"annotations." +msgstr "" + +#: ../../../changes.txt:1359 254b3f6414de4542bbe42569eb38dba0 +msgid "" +"**Fixed** `#1375 `_." +" Inconsistencies between line numbers as returned by the \"words\" and " +"the \"dict\" options of :meth:`Page.get_text` have been corrected." +msgstr "" + +#: ../../../changes.txt:1361 83eaa2d8c6154cbf9191af04ba4ff38a +msgid "" +"**Fixed** `#1364 `_. The " +"check for being a ``\"rawdict\"`` span in :meth:`recover_span_quad` now " +"works correctly." +msgstr "" + +#: ../../../changes.txt:1363 84c383e9b591448b969c3a085031fc56 +msgid "" +"**Fixed** `#1342 `_. " +"Corrected the check for rectangle infiniteness in " +":meth:`Page.show_pdf_page`." +msgstr "" + +#: ../../../changes.txt:1365 81b8fa0c5ab045b4b7ac5efabae6112d +msgid "" +"**Changed** :meth:`Page.get_drawings`, :meth:`Page.get_cdrawings` to " +"return an indicator on the area orientation covered by a rectangle. This " +"implements `#1355 `_. " +"Also, the recognition rate for rectangles and quads has been " +"significantly improved." +msgstr "" + +#: ../../../changes.txt:1367 82b6728d6eda4df6b12b2446fcada553 +msgid "" +"**Changed** all text search and extraction methods to set the new " +"``flags`` option ``TEXT_MEDIABOX_CLIP`` to ON by default. That bit causes" +" the automatic suppression of all characters that are completely outside " +"a page's mediabox (in as far as that notion is supported for a document " +"type). This eliminates the need for using ``clip=page.rect`` or similar " +"for omitting text outside the visible area." +msgstr "" + +#: ../../../changes.txt:1369 ebb3fbf2ef7343ef87fbcac225247cc5 +msgid "" +"**Added** parameter ``\"dpi\"`` to :meth:`Page.get_pixmap` and " +":meth:`Annot.get_pixmap`. When given, parameter ``\"matrix\"`` is " +"ignored, and a :ref:`Pixmap` with the desired dots per inch is created." +msgstr "" + +#: ../../../changes.txt:1371 76f6c7b1714843cf92b0ac65beb1d8f2 +msgid "" +"**Added** attributes :attr:`Pixmap.is_monochrome` and " +":attr:`Pixmap.is_unicolor` allowing fast checks of pixmap properties. " +"Addresses `#1397 `_." +msgstr "" + +#: ../../../changes.txt:1373 7161418b4dd04d41938c12178aa11b80 +msgid "" +"**Added** method :meth:`Pixmap.color_count` to determine the unique " +"colors in the pixmap." +msgstr "" + +#: ../../../changes.txt:1375 0658e70487044523abb865ad40815c18 +msgid "" +"**Added** boolean parameter ``\"compress\"`` to PDF document method " +":meth:`Document.update_stream`. Addresses / enables solution for `#1408 " +"`_." +msgstr "" + +#: ../../../changes.txt:1379 4166c4a4e9654f78859df125ef92c209 +msgid "**Changes in Version 1.19.1**" +msgstr "" + +#: ../../../changes.txt:1381 3cc0414c43844efc808b24c4ba7b0626 +msgid "" +"This is the first patch version to support MuPDF v1.19.0. Apart from one " +"bug fix, it includes important improvements for OCR support and the " +"option to **sort extracted text** to the standard reading order \"from " +"top-left to bottom-right\"." +msgstr "" + +#: ../../../changes.txt:1383 d650872b63c54df09c9e7538f1325c67 +msgid "" +"**Fixed** `#1328 `_. " +"\"words\" text extraction again returns correct ``(x0, y0)`` coordinates." +msgstr "" + +#: ../../../changes.txt:1385 9e5ce0b5a7cb4e0eadae5b30c7b50fee +msgid "" +"**Changed** :meth:`Page.get_textpage_ocr`: it now supports parameter " +"``dpi`` to control OCR quality. It is also possible to choose whether the" +" **full page** should be OCRed or **only the images displayed** by the " +"page." +msgstr "" + +#: ../../../changes.txt:1387 27a9045f8e504c2480897220182db02d +msgid "" +"**Changed** :meth:`Page.get_drawings` and :meth:`Page.get_cdrawings` to " +"automatically convert colors to RGB color tuples. Implements `#1332 " +"`_. Similar change " +"was applied to :meth:`Page.get_texttrace`." +msgstr "" + +#: ../../../changes.txt:1389 329284c7a0dd40b19d7780fc3b8be57e +msgid "" +"**Changed** :meth:`Page.get_text` to support a parameter ``sort``. If set" +" to ``True`` the output is conveniently sorted." +msgstr "" + +#: ../../../changes.txt:1394 6ffe6054e8fc4ec982438f8fd71f7f71 +msgid "**Changes in Version 1.19.0**" +msgstr "" + +#: ../../../changes.txt:1396 93fd42def89848ed83434ddc7c0150b3 +msgid "" +"This is the first version supporting MuPDF 1.19.*, published 2021-10-05. " +"It introduces many new features compared to the previous version 1.18.*." +msgstr "" + +#: ../../../changes.txt:1398 ccc5c32e95084bdfac6b6b57fec211ca +msgid "" +"PyMuPDF has now picked up integrated Tesseract OCR support, which was " +"already present in MuPDF v1.18.0." +msgstr "" + +#: ../../../changes.txt:1400 28a2967018544517b2ef43b9b220bf26 +msgid "" +"Supported images can be OCRed via their :ref:`Pixmap` which results in a " +"1-page PDF with a text layer." +msgstr "" + +#: ../../../changes.txt:1401 93277beffc8d4e4ea8e6cb1d3e17d5fd +msgid "" +"All supported document pages (i.e. not only PDFs), can be OCRed using " +"specialized text extraction methods. The result is a mixture of standard " +"and OCR text (depending on which part of the page was deemed to require " +"OCRing) that can be searched and extracted without restrictions." +msgstr "" + +#: ../../../changes.txt:1402 437b8793bf174ec09f0e61a513f7ae5c +msgid "" +"All this requires an independent installation of Tesseract. MuPDF " +"actually (only) needs the location of Tesseract's ``\"tessdata\"`` " +"folder, where its language support data are stored. This location must be" +" available as environment variable ``TESSDATA_PREFIX``." +msgstr "" + +#: ../../../changes.txt:1404 58c74a96eebf4d1b86e56b2ee6d2a3f5 +msgid "" +"A new MuPDF feature is **journalling PDF updates**, which is also " +"supported by this PyMuPDF version. Changes may be logged, rolled back or " +"replayed, allowing to implement a whole new level of control over PDF " +"document integrity -- similar to functions present in modern database " +"systems." +msgstr "" + +#: ../../../changes.txt:1406 e72da958f90a4f4f928ed1f01ea27c4e +msgid "" +"A third feature (unrelated to the new MuPDF version) includes the ability" +" to detect when page **objects cover or hide each other**. It is now e.g." +" possible to see that text is covered by a drawing or an image." +msgstr "" + +#: ../../../changes.txt:1408 6d5eac51058f4bb3ad540dc970f86972 +msgid "" +"**Changed** terminology and meaning of important geometry concepts: " +"Rectangles are now characterized as *finite*, *valid* or *empty*, while " +"the definitions of these terms have also changed. Rectangles specifically" +" are now thought of being \"open\": not all corners and sides are " +"considered part of the retangle. Please do read the :ref:`Rect` section " +"for details." +msgstr "" + +#: ../../../changes.txt:1410 e66c5280cbc346db97e5bc2cb51a481e +msgid "" +"**Added** new parameter `\"no_new_id\"` to :meth:`Document.save` / " +":meth:`Document.tobytes` methods. Use it to suppress updating the second " +"item of the document ``/ID`` which in PDF indicates that the original " +"file has been updated. If the PDF has no ``/ID`` at all yet, then no new " +"one will be created either." +msgstr "" + +#: ../../../changes.txt:1412 643a0fe551704f0085d1632dda0eb045 +msgid "" +"**Added** a **journalling facility** for PDF updates. This allows logging" +" changes, undoing or redoing them, or saving the journal for later use. " +"Refer to :meth:`Document.journal_enable` and friends." +msgstr "" + +#: ../../../changes.txt:1414 32a46f6e342a4279bd8a4f191436305f +msgid "" +"**Added** new :ref:`Pixmap` methods :meth:`Pixmap.pdfocr_save` and " +":meth:`Pixmap.pdfocr_tobytes`, which generate a 1-page PDF containing the" +" pixmap as PNG image with OCR text layer." +msgstr "" + +#: ../../../changes.txt:1416 50b47cf3028d4b6f9a0251bddada736c +msgid "" +"**Added** :meth:`Page.get_textpage_ocr` which executes optical character " +"recognition for the page, then extracts the results and stores them " +"together with \"normal\" page content in a :ref:`TextPage`. Use or reuse " +"this object in subsequent text extractions and text searches to avoid " +"multiple efforts. The existing text search and text extraction methods " +"have been extended to support a separately created textpage -- see next " +"item." +msgstr "" + +#: ../../../changes.txt:1418 2b936644a8104ed2ab25a506e8b5b46b +msgid "" +"**Added** a new parameter ``textpage`` to text extraction and text search" +" methods. This allows reuse of a previously created :ref:`TextPage` and " +"thus achieves significant runtime benefits -- which is especially " +"important for the new OCR features. But \"normal\" text extractions can " +"definitely also benefit." +msgstr "" + +#: ../../../changes.txt:1420 b95af0c617f348d4865a12fd70f530ac +msgid "" +"**Added** :meth:`Page.get_texttrace`, a technical method delivering low-" +"level text character properties. It was present before as a private " +"method, but the author felt it now is mature enough to be officially " +"available. It specifically includes a \"sequence number\" which indicates" +" the page appearance build operation that painted the text." +msgstr "" + +#: ../../../changes.txt:1422 8a3cd2567e7a4b759f03aba3fd23125b +msgid "" +"**Added** :meth:`Page.get_bboxlog` which delivers the list of rectangles " +"of page objects like text, images or drawings. Its significance lies in " +"its sequence: rectangles intersecting areas with a lower index are " +"covering or hiding them." +msgstr "" + +#: ../../../changes.txt:1424 83ad39d6a98c45b6bd09d5f48dcacfcf +msgid "" +"**Changed** methods :meth:`Page.get_drawings` and " +":meth:`Page.get_cdrawings` to include a \"sequence number\" indicating " +"the page appearance build operation that created the drawing." +msgstr "" + +#: ../../../changes.txt:1426 fa1ad871ec9d427ea5f9df835123d52c +msgid "" +"**Fixed** `#1311 `_. " +"Field values in comboboxes should now be handled correctly." +msgstr "" + +#: ../../../changes.txt:1427 c46b51b2d3c642e79136b06435fb8cf0 +msgid "" +"**Fixed** `#1290 `_. " +"Error was caused by incorrect rectangle emptiness check, which is fixed " +"due to new geometry logic of this version." +msgstr "" + +#: ../../../changes.txt:1428 794465f2accd4fb08c078f0cb87d1a8e +msgid "" +"**Fixed** `#1286 `_. Text" +" alignment for redact annotations is working again." +msgstr "" + +#: ../../../changes.txt:1429 c3b9aeb3854f4d568d13b78e55280dab +msgid "" +"**Fixed** `#1287 `_. " +"Infinite loop issue for non-Windows systems when applying some redactions" +" has been resolved." +msgstr "" + +#: ../../../changes.txt:1430 1d16f03733fa4336b9cd573a649b5a64 +msgid "" +"**Fixed** `#1284 `_. Text" +" layout destruction after applying redactions in some cases has been " +"resolved." +msgstr "" + +#: ../../../changes.txt:1434 b4e90d51da6740738a7bbf5bf5908c72 +msgid "**Changes in Version 1.18.18 / 1.18.19**" +msgstr "" + +#: ../../../changes.txt:1436 ea65fa34b9b840bda29c89b8454401eb +msgid "" +"**Fixed** issue `#1266 " +"`_. Failure to set " +":attr:`Pixmap.samples` in important cases, was hotfixed in a new version " +"1.18.19." +msgstr "" + +#: ../../../changes.txt:1438 b1f6621caf394c5c849f41d813270184 +msgid "" +"**Fixed** issue `#1257 " +"`_. Removing the read-" +"only flag from PDF fields is now possible." +msgstr "" + +#: ../../../changes.txt:1440 7777c7005097490aa8f13afc2b3f763e +msgid "" +"**Fixed** issue `#1252 " +"`_. Now correctly " +"specifying the ``zoom`` value for PDF link annotations." +msgstr "" + +#: ../../../changes.txt:1442 ebff53e03b9d4eada63e89f4a78c2f90 +msgid "" +"**Fixed** issue `#1244 " +"`_. Now correctly " +"computing the transform matrix in :meth:`Page.get_image__bbox`." +msgstr "" + +#: ../../../changes.txt:1444 3cdecee723d1422093d7075523a3a643 +msgid "" +"**Fixed** issue `#1241 " +"`_. Prevent returning " +"artifact characters in :meth:`Page.get_textbox`, which happened in " +"certain constellations." +msgstr "" + +#: ../../../changes.txt:1446 d49b6a1a79bf43b9a8c4d33277028a3e +msgid "" +"**Fixed** issue `#1234 " +"`_. Avoid creating " +"infinite rectangles in corner cases -- :meth:`Page.get_drawings`, " +":meth:`Page.get_cdrawings`." +msgstr "" + +#: ../../../changes.txt:1448 b03d9cc8f70348b3945451555a279408 +msgid "" +"**Added** test data and test scripts to the source PyPI source " +"distribution." +msgstr "" + +#: ../../../changes.txt:1452 682e7863ee1b4712a34eb85b4c4e83d4 +msgid "**Changes in Version 1.18.17**" +msgstr "" + +#: ../../../changes.txt:1454 5daa4029fb1949559b72014d79677738 +msgid "" +"Focus of this version are major performance improvements of selected " +"functions." +msgstr "" + +#: ../../../changes.txt:1456 a6b6181c27334dc1b28f017b27f36ffc +msgid "" +"**Fixed** issue `#1199 " +"`_. Using a non-existing " +"page number in :meth:`Document.get_page_images` and friends will no " +"longer lead to segfaults." +msgstr "" + +#: ../../../changes.txt:1458 db55fa540501479ab8d1db56b29e832d +msgid "" +"**Changed** :meth:`Page.get_drawings` to now differentiate between " +"\"stroke\", \"fill\" and combined paths. Paths containing more than one " +"rectangle (i.e. \"re\" items) are now supported. Extracting \"clipped\" " +"paths is now available as an option." +msgstr "" + +#: ../../../changes.txt:1460 f5051c27e4ff42d09099d4cf60648b84 +msgid "" +"**Added** :meth:`Page.get_cdrawings`, performance-optimized version of " +":meth:`Page.get_drawings`." +msgstr "" + +#: ../../../changes.txt:1462 73b510027cc649d682f21a347423c97f +msgid "" +"**Added** :attr:`Pixmap.samples_mv`, *memoryview* of a pixmap's pixel " +"area. Does not copy and thus always accesses the current state of that " +"area." +msgstr "" + +#: ../../../changes.txt:1464 131ccaf79bfe44a69e142c5f26094008 +msgid "" +"**Added** :attr:`Pixmap.samples_ptr`, Python \"pointer\" to a pixmap's " +"pixel area. Allows much faster creation (factor 800+) of Qt images." +msgstr "" + +#: ../../../changes.txt:1470 a8587ce933dc4affa83ff3eb08f32ec7 +msgid "**Changes in Version 1.18.16**" +msgstr "" + +#: ../../../changes.txt:1472 f6b559aeab32422ab496db35ab545e06 +msgid "" +"**Fixed** issue `#1184 " +"`_. Existing PDF widget " +"fonts in a PDF are now accepted (i.e. not forcedly changed to a Base-14 " +"font)." +msgstr "" + +#: ../../../changes.txt:1474 627e5b64fa8949468e59835f2a6a7e98 +msgid "" +"**Fixed** issue `#1154 " +"`_. Text search hits " +"should now be correct when ``clip`` is specified." +msgstr "" + +#: ../../../changes.txt:1476 ac9174121f3d4bdc801a1242201cf1d9 +msgid "**Fixed** issue `#1152 `_." +msgstr "" + +#: ../../../changes.txt:1478 402f9a650ab74b11bfdb6b5aca75922a +msgid "**Fixed** issue `#1146 `_." +msgstr "" + +#: ../../../changes.txt:1480 279a68255018450d8b7e27c38190fdf9 +msgid "" +"**Added** :attr:`Link.flags` and :meth:`Link.set_flags` to the " +":ref:`Link` class. Implements enhancement requests `#1187 " +"`_." +msgstr "" + +#: ../../../changes.txt:1482 6a600251dd1d45ce9238a4813e74452e +msgid "" +"**Added** option to *simulate* :meth:`TextWriter.fill_textbox` output for" +" predicting the number of lines, that a given text would occupy in the " +"textbox." +msgstr "" + +#: ../../../changes.txt:1484 e904e722c5ea404086e0ae0361ed0b66 +msgid "" +"**Added** text output support as subcommand `gettext` to the ``fitz`` CLI" +" module. Most importantly, original **physical text layout** reproduction" +" is now supported." +msgstr "" + +#: ../../../changes.txt:1489 d8a5df41c3d047b9bec77b1c92b072de +msgid "**Changes in Version 1.18.15**" +msgstr "" + +#: ../../../changes.txt:1491 a9f2620c404a491788eafdb4ad061f97 +msgid "" +"**Fixed** issue `#1088 " +"`_. Removing an " +"annotation's fill color should now work again both ways, using the " +"``fill_color=[]`` argument in :meth:`Annot.update` as well as ``fill=[]``" +" in :meth:`Annot.set_colors`." +msgstr "" + +#: ../../../changes.txt:1493 fbda096b20eb41b986e22ea248d4ddc8 +msgid "" +"**Fixed** issue `#1081 " +"`_. " +":meth:`Document.subset_fonts`: fixed an error which created wrong " +"character widths for some fonts." +msgstr "" + +#: ../../../changes.txt:1495 f52eb399b9a542b2bc16df84ed9b48a0 +msgid "" +"**Fixed** issue `#1078 " +"`_. :meth:`Page.get_text`" +" and other methods related to text extraction: changed the default value " +"of the :ref:`TextPage` ``flags`` parameter. All whitespace and " +":data:`ligatures` are now preserved." +msgstr "" + +#: ../../../changes.txt:1497 3e076c06c88b48b2ab97492fc0ef3366 +msgid "" +"**Fixed** issue `#1085 " +"`_. The old *snake_cased*" +" alias of ``fitz.detTextlength`` is now defined correctly." +msgstr "" + +#: ../../../changes.txt:1499 d005b102d62c4c49b15bb576cabd4189 +msgid "" +"**Changed** :meth:`Document.subset_fonts` will now correctly prefix font " +"subsets with an appropriate six letter uppercase tag, complying with the " +"PDF specification." +msgstr "" + +#: ../../../changes.txt:1501 e550e264b7d949249463ef766b4ccd3c +msgid "" +"**Added** new method :meth:`Widget.button_states` which returns the " +"possible values that a button-type field can have when being set to " +"\"on\" or \"off\"." +msgstr "" + +#: ../../../changes.txt:1503 811db94932604f10be4c635ecd2c7001 +msgid "" +"**Added** support of text with **Small Capital** letters to the " +":ref:`Font` and :ref:`TextWriter` classes. This is reflected by an " +"additional bool parameter ``small_caps`` in various of their methods." +msgstr "" + +#: ../../../changes.txt:1508 96033adec8824abdaa859944b30b9cbb +msgid "**Changes in Version 1.18.14**" +msgstr "" + +#: ../../../changes.txt:1510 0311a9d379ee4d52aff0d257ab2a9114 +msgid "" +"**Finished** implementing new, \"snake_cased\" names for methods and " +"properties, that were \"camelCased\" and awkward in many aspects. At the " +"end of this documentation, there is section :ref:`Deprecated` with more " +"background and a mapping of old to new names." +msgstr "" + +#: ../../../changes.txt:1512 8384d8ca12e2404d8a22fccdf28b4730 +msgid "" +"**Fixed** issue `#1053 " +"`_. " +":meth:`Page.insert_image`: when given, include image mask in the hash " +"computation." +msgstr "" + +#: ../../../changes.txt:1514 48fe45103dea418aa007745d19ec6150 +msgid "" +"**Fixed** issue `#1043 " +"`_. Added " +"``Pixmap.getPNGdata`` to the aliases of :meth:`Pixmap.tobytes`." +msgstr "" + +#: ../../../changes.txt:1516 3cb55c93f0014710955b68f707b9adbe +msgid "" +"**Fixed** an internal error when computing the enveloping rectangle of " +"drawn paths as returned by :meth:`Page.get_drawings`." +msgstr "" + +#: ../../../changes.txt:1518 fc780f03d6984e9f8e7828db6e63f8d2 +msgid "" +"**Fixed** an internal error occasionally causing loops when outputting " +"text via :meth:`TextWriter.fill_textbox`." +msgstr "" + +#: ../../../changes.txt:1520 79351fb9a4c140fa836fafa07d10f1ba +msgid "" +"**Added** :meth:`Font.char_lengths`, which returns a tuple of character " +"widths of a string." +msgstr "" + +#: ../../../changes.txt:1522 5bbea453d3134f82b4e00715877f5d6a +msgid "" +"**Added** more ways to specify pages in :meth:`Document.delete_pages`. " +"Now a sequence (list, tuple or range) can be specified, and the Python " +"``del`` statement can be used. In the latter case, Python ``slices`` are " +"also accepted." +msgstr "" + +#: ../../../changes.txt:1524 b6407c1172a8423cbecdc6a48dc6fb11 +msgid "" +"**Changed** :meth:`Document.del_toc_item`, which disables a single item " +"of the TOC: previously, the title text was removed. Instead, now the " +"complete item will be shown grayed-out by supporting viewers." +msgstr "" + +#: ../../../changes.txt:1529 7e33262c6b504f278e6ec5f30135a24b +msgid "**Changes in Version 1.18.13**" +msgstr "" + +#: ../../../changes.txt:1531 e331d7b0d02f455d897a482a2f066edf +msgid "**Fixed** issue `#1014 `_." +msgstr "" + +#: ../../../changes.txt:1532 854d698bb76840259fb63ae04348b283 +msgid "" +"**Fixed** an internal memory leak when computing image bboxes -- " +":meth:`Page.get_image_bbox`." +msgstr "" + +#: ../../../changes.txt:1533 1e050e2aba4240eba764296cc8f8f55b +msgid "" +"**Added** support for low-level access and modification of the PDF " +"trailer. Applies to :meth:`Document.xref_get_keys`, " +":meth:`Document.xref_get_key`, and :meth:`Document.xref_set_key`." +msgstr "" + +#: ../../../changes.txt:1534 f1d8388f983543a48cfa456ea3e4e24a +msgid "**Added** documentation for maintaining private entries in PDF metadata." +msgstr "" + +#: ../../../changes.txt:1535 b591b0f7756744eebd9f5851d773bde4 +msgid "" +"**Added** documentation for handling transparent image insertions, " +":meth:`Page.insert_image`." +msgstr "" + +#: ../../../changes.txt:1536 12e66b2b1e86467980bdc6528ad951c7 +msgid "" +"**Added** :meth:`Page.get_image_rects`, an improved version of " +":meth:`Page.get_image_bbox`." +msgstr "" + +#: ../../../changes.txt:1537 ad5ef4e7afb64311a13fbdf82dc127e0 +msgid "" +"**Changed** :meth:`Document.delete_pages` to support various ways of " +"specifying pages to delete. Implements `#1042 " +"`_." +msgstr "" + +#: ../../../changes.txt:1538 bd0ff477799347a581c13deed3fc5521 +msgid "" +"**Changed** :meth:`Page.insert_image` to also accept the xref of an " +"existing image in the file. This allows \"copying\" images between pages," +" and extremely fast mutiple insertions." +msgstr "" + +#: ../../../changes.txt:1539 99264003b05242f99ceff055d109e640 +msgid "" +"**Changed** :meth:`Page.insert_image` to also accept the integer " +"parameter ``alpha``. To be used for performance improvements." +msgstr "" + +#: ../../../changes.txt:1540 e202d5d349ea4d998926e379a3746669 +msgid "" +"**Changed** :meth:`Pixmap.set_alpha` to support new parameters for pre-" +"multiplying colors with their alpha values and setting a specific color " +"to fully transparent (e.g. white)." +msgstr "" + +#: ../../../changes.txt:1541 5d2a77b721e94658bc7db8ea33ca5a54 +msgid "" +"**Changed** :meth:`Document.embfile_add` to automatically set creation " +"and modification date-time. Correspondingly, :meth:`Document.embfile_upd`" +" automatically maintains modification date-time (``/ModDate`` PDF key), " +"and :meth:`Document.embfile_info` correspondingly reports these data. In " +"addition, the embedded file's associated \"collection item\" is included " +"via its :data:`xref`. This supports the development of PDF portfolio " +"applications." +msgstr "" + +#: ../../../changes.txt:1545 dd573ecd85b64c379dc198e1d848ca3d +msgid "**Changes in Version 1.18.11 / 1.18.12**" +msgstr "" + +#: ../../../changes.txt:1547 5bd355b2bd3e44ad812ec18e6e0a336b +msgid "" +"**Fixed** issue `#972 `_. " +"Improved layout of source distribution material." +msgstr "" + +#: ../../../changes.txt:1548 c2f29d093d80453b8c31e5db23bf4a71 +msgid "" +"**Fixed** issue `#962 `_. " +"Stabilized Linux distribution detection for generating PyMuPDF from " +"sources." +msgstr "" + +#: ../../../changes.txt:1549 4e50a28935fe4d2eb05b3603759844d1 +msgid "" +"**Added:** :meth:`Page.get_xobjects` delivers the result of " +":meth:`Document.get_page_xobjects`." +msgstr "" + +#: ../../../changes.txt:1550 3ab2760e98984dd6bad3034c039a0491 +msgid "" +"**Added:** :meth:`Page.get_image_info` delivers meta information for all " +"images shown on the page." +msgstr "" + +#: ../../../changes.txt:1551 91b085ef27984b72ac72a962c7cbc977 +msgid "" +"**Added:** :meth:`Tools.mupdf_display_warnings` allows setting on / off " +"the display of MuPDF-generated warnings. The default is off." +msgstr "" + +#: ../../../changes.txt:1552 a0960b5ab5974e588fde77393423500f +msgid "" +"**Added:** :meth:`Document.ez_save` convenience alias of " +":meth:`Document.save` with some different defaults." +msgstr "" + +#: ../../../changes.txt:1553 c6d910ded36b4aa5a130d05412bf8314 +msgid "" +"**Changed:** Image extractions of document pages now also contain the " +"image's **transformation matrix**. This concerns " +":meth:`Page.get_image_bbox` and the DICT, JSON, RAWDICT, and RAWJSON " +"variants of :meth:`Page.get_text`." +msgstr "" + +#: ../../../changes.txt:1558 d707ea216f4c4472aec2746cb96ddc20 +msgid "**Changes in Version 1.18.10**" +msgstr "" + +#: ../../../changes.txt:1560 021a4ff32dbf42d5bdf2c63c62bbd4b9 +msgid "" +"**Fixed** issue `#941 `_. " +"Added old aliases for :meth:`DisplayList.get_pixmap` and " +":meth:`DisplayList.get_textpage`." +msgstr "" + +#: ../../../changes.txt:1561 f31040d109554efba1c804e6b80b91c4 +msgid "" +"**Fixed** issue `#929 `_. " +"Stabilized removal of JavaScript objects with :meth:`Document.scrub`." +msgstr "" + +#: ../../../changes.txt:1562 d92283070ba648cf9313d1b486568d7c +msgid "" +"**Fixed** issue `#927 `_. " +"Removed a loop in the reworked :meth:`TextWriter.fill_textbox`." +msgstr "" + +#: ../../../changes.txt:1563 ea7477a2bc8c42f2903d3e90d7de9b3d +msgid "" +"**Changed** :meth:`Document.xref_get_keys` and " +":meth:`Document.xref_get_key` to also allow accessing the PDF trailer " +"dictionary. This can be done by using `-1` as the xref number argument." +msgstr "" + +#: ../../../changes.txt:1564 e32546c1b549420f8aa280a8c8ffa5dc +msgid "" +"**Added** a number of functions for reconstructing the quads for text " +"lines, spans and characters extracted by :meth:`Page.get_text` options " +"\"dict\" and \"rawdict\". See :meth:`recover_quad` and friends." +msgstr "" + +#: ../../../changes.txt:1565 3776e3b2571d4e4e8918fe317cef44db +msgid "" +"**Added** :meth:`Tools.unset_quad_corrections` to suppress character quad" +" corrections (occasionally required for erroneous fonts)." +msgstr "" + +#: ../../../changes.txt:1569 6d6c45b9b5874f949f0c148c475c8292 +msgid "**Changes in Version 1.18.9**" +msgstr "" + +#: ../../../changes.txt:1572 b664fe746c374845808f2345ae12ff4f +msgid "" +"**Fixed** issue `#888 `_. " +"Removed ambiguous statements concerning PyMuPDF's license, which is now " +"clearly stated to be GNU AGPL V3." +msgstr "" + +#: ../../../changes.txt:1573 b7e8373ddaf241329fd07e8f62e14800 +msgid "**Fixed** issue `#895 `_." +msgstr "" + +#: ../../../changes.txt:1574 c417db0238974740874e2a5b9766d1be +msgid "" +"**Fixed** issue `#896 `_. " +"Since v1.17.6 PyMuPDF suppresses the font subset tags and only reports " +"the base fontname in text extraction outputs \"dict\" / \"json\" / " +"\"rawdict\" / \"rawjson\". Now a new global parameter can request the old" +" behaviour, :meth:`Tools.set_subset_fontnames`." +msgstr "" + +#: ../../../changes.txt:1575 ce76ccfd8164412ea522a47f7446c8a1 +msgid "" +"**Fixed** issue `#885 `_. " +"Pixmap creation now also works with filenames given as ``pathlib.Paths``." +msgstr "" + +#: ../../../changes.txt:1576 82bfe6b311ba448aae8c250a181e54ee +msgid "" +"**Changed** :meth:`Document.subset_fonts`: Text is **not rewritten** any " +"more and should therefore **retain all its origial properties** -- like " +"being hidden or being controlled by Optional Content mechanisms." +msgstr "" + +#: ../../../changes.txt:1577 213d450a85124151959740a27e28b373 +msgid "" +"**Changed** :ref:`TextWriter` output to also accept text in right to left" +" mode (Arabian, Hebrew): :meth:`TextWriter.fill_textbox`, " +":meth:`TextWriter.append`. These methods now accept a new boolean " +"parameter `right_to_left`, which is ``False`` by default. Implements " +"`#897 `_." +msgstr "" + +#: ../../../changes.txt:1578 ca7482f646c245a886e3f61aec2b433b +msgid "" +"**Changed** :meth:`TextWriter.fill_textbox` to return all lines of text, " +"that did not fit in the given rectangle. Also changed the default of the " +"``warn`` parameter to no longer print a warning message in overflow " +"situations." +msgstr "" + +#: ../../../changes.txt:1579 71693cfa7586407d9eebceaa014cbc2d +msgid "" +"**Added** a utility function :meth:`recover_quad`, which computes the " +"quadrilateral of a span. This function can be used for correctly marking " +"text extracted with the \"dict\" or \"rawdict\" options of " +":meth:`Page.get_text`." +msgstr "" + +#: ../../../changes.txt:1583 661b66c74542413aac35590b79ca38c9 +msgid "**Changes in Version 1.18.8**" +msgstr "" + +#: ../../../changes.txt:1586 2d0cbe8f762144909b59ca9a37f49f0e +msgid "" +"This is a bug fix version only. We are publishing early because of the " +"potentially widely used functions." +msgstr "" + +#: ../../../changes.txt:1588 9bc66d4893594a18838a6bfb422a0453 +msgid "" +"**Fixed** issue `#881 `_. " +"Fixed a memory leak in :meth:`Page.insert_image` when inserting images " +"from files or memory." +msgstr "" + +#: ../../../changes.txt:1589 21c6b67d2342429aa43a89a1f386500a +msgid "" +"**Fixed** issue `#878 `_. " +"``pathlib.Path`` objects should now correctly handle file path " +"hierarchies." +msgstr "" + +#: ../../../changes.txt:1594 d62456f1fb3a4193aa61e1471de9d7d9 +msgid "**Changes in Version 1.18.7**" +msgstr "" + +#: ../../../changes.txt:1597 b2f8f15da7ad4d4cb2f5c6775b47efcc +msgid "" +"**Added** an experimental :meth:`Document.subset_fonts` which reduces the" +" size of eligible fonts based on their use by text in the PDF. Implements" +" `#855 `_." +msgstr "" + +#: ../../../changes.txt:1598 fb2d1980c9c64330b986cc526c756b3e +msgid "" +"**Implemented** request `#870 " +"`_: " +":meth:`Document.convert_to_pdf` now also supports PDF documents." +msgstr "" + +#: ../../../changes.txt:1599 c458d295a540457081cba8fe541b07d1 +msgid "" +"**Renamed** ``Document.write`` to :meth:`Document.tobytes` for greater " +"clarity. But the deprecated name remains available for some time." +msgstr "" + +#: ../../../changes.txt:1600 10e1725b4c934c378e0e845de2001938 +msgid "" +"**Implemented** request `#843 " +"`_: " +":meth:`Document.tobytes` now supports linearized PDF output. " +":meth:`Document.save` now also supports writing to Python **file " +"objects**. In addition, the open function now also supports Python file " +"objects." +msgstr "" + +#: ../../../changes.txt:1601 540893b1e4a6451182dda94e29ec91b7 +msgid "**Fixed** issue `#844 `_." +msgstr "" + +#: ../../../changes.txt:1602 e6fe126b81c84c7d82fc23847cb030ad +msgid "**Fixed** issue `#838 `_." +msgstr "" + +#: ../../../changes.txt:1603 ea6e976c20b34d6e82167c3b41020785 +msgid "" +"**Fixed** issue `#823 `_. " +"More logic for better support of OCRed text output (Tesseract, ABBYY)." +msgstr "" + +#: ../../../changes.txt:1604 167e0fbd590b42fcadacf22770ab687e +msgid "**Fixed** issue `#818 `_." +msgstr "" + +#: ../../../changes.txt:1605 9cddf1693df74c26b38a4f394617a9ef +msgid "**Fixed** issue `#814 `_." +msgstr "" + +#: ../../../changes.txt:1606 1b6de14349784d92a4f891b1b418bac4 +msgid "" +"**Added** :meth:`Document.get_page_labels` which returns a list of page " +"label definitions of a PDF." +msgstr "" + +#: ../../../changes.txt:1607 84dd27744e884bab88cfeaa307ab4e6b +msgid "" +"**Added** :meth:`Document.has_annots` and :meth:`Document.has_links` to " +"check whether these object types are present anywhere in a PDF." +msgstr "" + +#: ../../../changes.txt:1608 ec36dbebe4c6426e99d446934e13d6b2 +msgid "" +"**Added** expert low-level functions to simplify inquiry and modification" +" of PDF object sources: :meth:`Document.xref_get_keys` lists the keys of " +"object :data:`xref`, :meth:`Document.xref_get_key` returns type and " +"content of a key, and :meth:`Document.xref_set_key` modifies the key's " +"value." +msgstr "" + +#: ../../../changes.txt:1609 51fd4d0216864e24b2dbd6b6e6249c77 +msgid "" +"**Added** parameter ``thumbnails`` to :meth:`Document.scrub` to also " +"allow removing page thumbnail images." +msgstr "" + +#: ../../../changes.txt:1610 896815586b2f4f29b687ef9f295a28d6 +msgid "" +"**Improved** documentation for how to add valid text marker annotations " +"for non-horizontal text." +msgstr "" + +#: ../../../changes.txt:1612 fa08553ca010466e89e85b03a58ae882 +msgid "" +"We continued the process of renaming methods and properties from " +"*\"mixedCase\"* to *\"snake_case\"*. Documentation usually mentions the " +"new names only, but old, deprecated names remain available for some time." +msgstr "" + +#: ../../../changes.txt:1618 014081f152ea490994db6a97be4a5bb3 +msgid "**Changes in Version 1.18.6**" +msgstr "" + +#: ../../../changes.txt:1620 1e5b5624067e4c48bafb8f9741730594 +msgid "**Fixed** issue `#812 `_." +msgstr "" + +#: ../../../changes.txt:1621 3935acf37fb94d999894f69913635919 +msgid "" +"**Fixed** issue `#793 `_. " +"Invalid document metadata previously prevented opening some documents at " +"all. This error has been removed." +msgstr "" + +#: ../../../changes.txt:1622 c14dd85c69234ec6af8586c3de5ea1b4 +msgid "" +"**Fixed** issue `#792 `_. " +"Text search and text extraction will make no rectangle containment checks" +" at all if the default ``clip=None`` is used." +msgstr "" + +#: ../../../changes.txt:1623 105f7f16d5e8432aba3b8e341b5ab533 +msgid "**Fixed** issue `#785 `_." +msgstr "" + +#: ../../../changes.txt:1624 b1ca38ca3cad4eb0a99eaab0b79aa3f3 +msgid "" +"**Fixed** issue `#780 `_. " +"Corrected a parameter check error." +msgstr "" + +#: ../../../changes.txt:1625 c42692b91d9742218ca7a8cf99b936f0 +msgid "" +"**Fixed** issue `#779 `_. " +"Fixed typo" +msgstr "" + +#: ../../../changes.txt:1626 5104c62d4a7e43b48a5e55ce45980f70 +msgid "" +"**Added** an option to set the desired line height for text boxes. " +"Implements `#804 `_." +msgstr "" + +#: ../../../changes.txt:1627 2601095d28064734bb0369a235e90508 +msgid "" +"**Changed** text position retrieval to better cope with Tesseract's " +"glyphless font. Implements `#803 " +"`_." +msgstr "" + +#: ../../../changes.txt:1628 3589cb24a8264a83859f502be8212423 +msgid "" +"**Added** an option to choose the prefix of new annotations, fields and " +"links for providing unique annotation ids. Implements request `#807 " +"`_." +msgstr "" + +#: ../../../changes.txt:1629 840ad19cb59a497e9bd9755fb4a0734b +msgid "" +"**Added** getting and setting color and text properties for Table of " +"Contents items for PDFs. Implements `#779 " +"`_." +msgstr "" + +#: ../../../changes.txt:1630 0c739e4d3aba45b993b681b79ad3bca9 +msgid "" +"**Added** PDF page label handling: :meth:`Page.get_label()` returns the " +"page label, :meth:`Document.get_page_numbers` return all page numbers " +"having a specified label, and :meth:`Document.set_page_labels` adds or " +"updates a PDF's page label definition." +msgstr "" + +#: ../../../changes.txt:1635 4e82d874d63246a0b485fb4fecb1cd08 +msgid "" +"This version introduces **Python type hinting**. The goal is to provide " +"each parameter and the return value of all functions and methods with " +"type information. This still is work in progress although the majority of" +" functions has already been handled." +msgstr "" + +#: ../../../changes.txt:1640 1be8b2db48b8416ea5fea50ed24a1e3d +msgid "**Changes in Version 1.18.5**" +msgstr "" + +#: ../../../changes.txt:1642 6cf12425d10a48a6aecc144d81757c81 +msgid "" +"Apart from several fixes, this version also focusses on several minor, " +"but important feature improvements. Among the latter is a more precise " +"computation of proper line heights and insertion points for writing / " +"inserting text. As opposed to using font-agnostic constants, these values" +" are now taken from the font's properties." +msgstr "" + +#: ../../../changes.txt:1644 d03b677132f34061b38352314d9d8bac +msgid "" +"Also note that this is the first version which does no longer provide " +"pregenerated wheels for Python versions older than 3.6. PIP also " +"discontinues support for these by end of this year 2020." +msgstr "" + +#: ../../../changes.txt:1646 9c69b4298774494fa7be029e83ab311e +msgid "" +"**Fixed** issue `#771 `_. " +"By using \"small glyph heights\" option, the full page text can be " +"extracted." +msgstr "" + +#: ../../../changes.txt:1647 b72c5e7d653942c395d80159dd81551c +msgid "**Fixed** issue `#768 `_." +msgstr "" + +#: ../../../changes.txt:1648 85a78429d2614ee59bf6c3ac6482781e +msgid "**Fixed** issue `#750 `_." +msgstr "" + +#: ../../../changes.txt:1649 3e0227cb7eaa468d9440c831d0c95d66 +msgid "" +"**Fixed** issue `#739 `_. " +"The \"dict\", \"rawdict\" and corresponding JSON output variants now have" +" two new *span* keys: ``\"ascender\"`` and ``\"descender\"``. These " +"floats represent special font properties which can be used to compute " +"bboxes of spans or characters of **exactly fontsize height** (as opposed " +"to the default line height). An example algorithm is shown in section " +"\"Span Dictionary\" `here " +"`_. Also improved the " +"detection and correction of ill-specified ascender / descender values " +"encountered in some fonts." +msgstr "" + +#: ../../../changes.txt:1650 9c6d6c1f41164b168d157c6befcebe37 +msgid "" +"**Added** a new, experimental :meth:`Tools.set_small_glyph_heights` -- " +"also in response to issue `#739 " +"`_. This method sets or " +"unsets a global parameter to **always compute bboxes with fontsize " +"height**. If \"on\", text searching and all text extractions will " +"returned rectangles, bboxes and quads with a smaller height." +msgstr "" + +#: ../../../changes.txt:1651 edea78523a1a416e894d68d2020d06e1 +msgid "**Fixed** issue `#728 `_." +msgstr "" + +#: ../../../changes.txt:1652 6d63172dde3f4dd9a7be2e154ab868f7 +msgid "" +"**Changed** fill color logic of 'Polyline' annotations: this parameter " +"now only pertains to line end symbols -- the annotation itself can no " +"longer have a fill color. Also addresses issue `#727 " +"`_." +msgstr "" + +#: ../../../changes.txt:1653 bff7c018a8ea4a9ba93dd60a7ab9806e +msgid "" +"**Changed** :meth:`Page.getImageBbox` to also compute the bbox if the " +"image is contained in an XObject." +msgstr "" + +#: ../../../changes.txt:1654 0b0af59d5d1043c48f87ac1a1b3b3bef +msgid "" +"**Changed** :meth:`Shape.insertTextbox`, resp. " +":meth:`Page.insertTextbox`, resp. :meth:`TextWriter.fillTextbox` to " +"respect font's properties \"ascender\" / \"descender\" when computing " +"line height and insertion point. This should no longer lead to line " +"overlaps for multi-line output. These methods used to ignore font " +"specifics and used constant values instead." +msgstr "" + +#: ../../../changes.txt:1659 016cd54d9d24470db2db9b6aa1e9f531 +msgid "**Changes in Version 1.18.4**" +msgstr "" + +#: ../../../changes.txt:1661 952b800709df4d808c2d6d95e7269956 +msgid "" +"This version adds several features to support PDF Optional Content. Among" +" other things, this includes OCMDs (Optional Content Membership " +"Dictionaries) with the full scope of *\"visibility expressions\"* (PDF " +"key ``/VE``), text insertions (including the :ref:`TextWriter` class) and" +" drawings." +msgstr "" + +#: ../../../changes.txt:1663 44e02c90fe7341ce93f7b4d9eb205d59 +msgid "" +"**Fixed** issue `#727 `_. " +"Freetext annotations now support an uncolored rectangle when " +"``fill_color=None``." +msgstr "" + +#: ../../../changes.txt:1664 5ec24a657e804b77a217164637f5339b +msgid "" +"**Fixed** issue `#726 `_. " +"UTF-8 encoding errors are now handled for HTML / XML :meth:`Page.getText`" +" output." +msgstr "" + +#: ../../../changes.txt:1665 8f57327a087c44de9cb5eeaac4814865 +msgid "" +"**Fixed** issue `#724 `_. " +"Empty values are no longer stored in the PDF /Info metadata dictionary." +msgstr "" + +#: ../../../changes.txt:1666 e9a88fac47b84e8398d092b81a8dc9bc +msgid "" +"**Added** new methods :meth:`Document.set_oc` and :meth:`Document.get_oc`" +" to set or get optional content references for **existing** image and " +"form XObjects. These methods are similar to the same-named methods of " +":ref:`Annot`." +msgstr "" + +#: ../../../changes.txt:1667 cf1cf7090a7a43c480269083a0c4b49c +msgid "" +"**Added** :meth:`Document.set_ocmd`, :meth:`Document.get_ocmd` for " +"handling OCMDs." +msgstr "" + +#: ../../../changes.txt:1668 42c7e4582265428298876d83dc6ca43f +msgid "**Added** **Optional Content** support for text insertion and drawing." +msgstr "" + +#: ../../../changes.txt:1669 a9a1238c1ccb479cbdf733dd6051cee5 +msgid "" +"**Added** new method :meth:`Page.deleteWidget`, which deletes a form " +"field from a page. This is analogous to deleting annotations." +msgstr "" + +#: ../../../changes.txt:1670 4c07d363e6784cc49d63c6829e1d1f0d +msgid "" +"**Added** support for Popup annotations. This includes defining the Popup" +" rectangle and setting the Popup to open or closed. Methods / attributes " +":meth:`Annot.set_popup`, :meth:`Annot.set_open`, :attr:`Annot.has_popup`," +" :attr:`Annot.is_open`, :attr:`Annot.popup_rect`, " +":attr:`Annot.popup_xref`." +msgstr "" + +#: ../../../changes.txt:1674 e25d212c553240e9817f098fc4195950 +msgid "" +"The **naming of methods and attributes** in PyMuPDF is far from being " +"satisfactory: we have *CamelCases*, *mixedCases* and " +"*lower_case_with_underscores* all over the place. With the :ref:`Annot` " +"as the first candidate, we have started an activity to clean this up step" +" by step, converting to lower case with underscores for methods and " +"attributes while keeping UPPERCASE for the constants." +msgstr "" + +#: ../../../changes.txt:1676 96c07d5004c24b07b6c0fe9034806420 +msgid "" +"Old names will remain available to prevent code breaks, but they will no " +"longer be mentioned in the documentation." +msgstr "" + +#: ../../../changes.txt:1677 09372861df9d4a368a564d880fc93960 +msgid "" +"New methods and attributes of all classes will be named according to the " +"new standard." +msgstr "" + +#: ../../../changes.txt:1681 fcc79a27ddb0485d9f08d5e25bbd1c7a +msgid "**Changes in Version 1.18.3**" +msgstr "" + +#: ../../../changes.txt:1683 1a4bc2a8b28745e0ae1d5530a85453d5 +msgid "" +"As a major new feature, this version introduces support for PDF's " +"**Optional Content** concept." +msgstr "" + +#: ../../../changes.txt:1685 1e0bcdfa4e21494ebf2479c1666fd238 +msgid "**Fixed** issue `#714 `_." +msgstr "" + +#: ../../../changes.txt:1686 38a11c6f427542248a6517f5ee5ec5be +msgid "**Fixed** issue `#711 `_." +msgstr "" + +#: ../../../changes.txt:1687 9980668a791e4a1682badb54b7d38d19 +msgid "" +"**Fixed** issue `#707 `_: " +"if a PDF user password, but no owner password is supplied nor present, " +"then the user password is also used as the owner password." +msgstr "" + +#: ../../../changes.txt:1688 0ea2b5d936aa45a8a835c8457d83d01e +msgid "" +"**Fixed** ``expand`` and ``deflate`` parameters of methods " +":meth:`Document.save` and :meth:`Document.write`. Individual image and " +"font compression should now finally work. Addresses issue `#713 " +"`_." +msgstr "" + +#: ../../../changes.txt:1689 85d020f9bd664565a24c39da19dace4b +msgid "" +"**Added** a support of PDF optional content. This includes several new " +":ref:`Document` methods for inquiring and setting optional content status" +" and adding optional content configurations and groups. In addition, " +"images, form XObjects and annotations now can be bound to optional " +"content specifications. **Resolved** issue `#709 " +"`_." +msgstr "" + +#: ../../../changes.txt:1695 1151785913404c1ba50777871c13b468 +msgid "**Changes in Version 1.18.2**" +msgstr "" + +#: ../../../changes.txt:1697 e66e0b3cbabe4b908e4777b6a2837f72 +msgid "" +"This version contains some interesting improvements for text searching: " +"any number of search hits is now returned and the **hit_max** parameter " +"was removed. The new **clip** parameter in addition allows to restrict " +"the search area. Searching now detects hyphenations at line breaks and " +"accordingly finds hyphenated words." +msgstr "" + +#: ../../../changes.txt:1699 04aa0a6d98954691bf346583ea9b911b +msgid "" +"**Fixed** issue `#575 `_: " +"if using ``quads=False`` in text searching, then overlapping rectangles " +"on the same line are joined. Previously, parts of the search string, " +"which belonged to different \"marked content\" items, each generated " +"their own rectangle -- just as if occurring on separate lines." +msgstr "" + +#: ../../../changes.txt:1700 5d7e1a3b11a94ddbba4fb265a57c322d +msgid "" +"**Added** :attr:`Document.isRepaired`, which is true if the PDF was " +"repaired on open." +msgstr "" + +#: ../../../changes.txt:1701 315a4d9dad15480c98ea149782969c19 +msgid "" +"**Added** :meth:`Document.setXmlMetadata` which either updates or creates" +" PDF XML metadata. Implements issue `#691 " +"`_." +msgstr "" + +#: ../../../changes.txt:1702 664e274feffc4b609d814ba94002fe32 +msgid "**Added** :meth:`Document.getXmlMetadata` returns PDF XML metadata." +msgstr "" + +#: ../../../changes.txt:1703 8c00f78e949b4270b42bbc2efcb32896 +msgid "" +"**Changed** creation of PDF documents: they will now always carry a PDF " +"identification (``/ID`` field) in the document trailer. Implements issue " +"`#691 `_." +msgstr "" + +#: ../../../changes.txt:1704 275f081e7a634acb98452028e30637d6 +msgid "" +"**Changed** :meth:`Page.searchFor`: a new parameter ``clip`` is accepted " +"to restrict the search to this rectangle. Correspondingly, the attribute " +":attr:`TextPage.rect` is now respected by :meth:`TextPage.search`." +msgstr "" + +#: ../../../changes.txt:1705 4857267aa9ab4e629ddb8210b7c02e47 +msgid "" +"**Changed** parameter ``hit_max`` in :meth:`Page.searchFor` and " +":meth:`TextPage.search` is now obsolete: methods will return all hits." +msgstr "" + +#: ../../../changes.txt:1706 8a0e2a5a95654e638fb0ad111274d37d +msgid "" +"**Changed** character **selection criteria** in :meth:`Page.getText`: a " +"character is now considered to be part of a ``clip`` if its bbox is fully" +" contained. Before this, a non-empty intersection was sufficient." +msgstr "" + +#: ../../../changes.txt:1707 fac2f814dc6048feb69c2a41d979d176 +msgid "" +"**Changed** :meth:`Document.scrub` to support a new option " +"`redact_images`. This addresses issue `#697 " +"`_." +msgstr "" + +#: ../../../changes.txt:1712 82c818c7c8024a51b8e88d348034b442 +msgid "**Changes in Version 1.18.1**" +msgstr "" + +#: ../../../changes.txt:1714 e6d690752b9d4a59b7903c97be13b415 +msgid "" +"**Fixed** issue `#692 `_. " +"PyMuPDF now detects and recovers from more cyclic resource dependencies " +"in PDF pages and for the first time reports them in the MuPDF warnings " +"store." +msgstr "" + +#: ../../../changes.txt:1715 bf94f3eed60f4c918293ba068082a554 +msgid "**Fixed** issue `#686 `_." +msgstr "" + +#: ../../../changes.txt:1716 28489ac7467745b28158efdec835973a +msgid "" +"**Added** opacity options for the :ref:`Shape` class: Stroke and fill " +"colors can now be set to some transparency value. This means that all " +":ref:`Page` draw methods, methods :meth:`Page.insertText`, " +":meth:`Page.insertTextbox`, :meth:`Shape.finish`, " +":meth:`Shape.insertText`, and :meth:`Shape.insertTextbox` support two new" +" parameters: *stroke_opacity* and *fill_opacity*." +msgstr "" + +#: ../../../changes.txt:1717 5914ea8451fd488ab1c67e75b15b8c96 +msgid "" +"**Added** new parameter ``mask`` to :meth:`Page.insertImage` for " +"optionally providing an external image mask. Resolves issue `#685 " +"`_." +msgstr "" + +#: ../../../changes.txt:1718 7486ce883d8741cfa9b3727848fd9911 +msgid "" +"**Added** :meth:`Annot.soundGet` for extracting the sound of an audio " +"annotation." +msgstr "" + +#: ../../../changes.txt:1722 be51a07e23f047b89a1192a13cc1b37e +msgid "**Changes in Version 1.18.0**" +msgstr "" + +#: ../../../changes.txt:1724 ca6e909d126841fa957a5b0e4f1c66a7 +msgid "" +"This is the first PyMuPDF version supporting MuPDF v1.18. The focus here " +"is on extending PyMuPDF's own functionality -- apart from bug fixing. " +"Subsequent PyMuPDF patches may address features new in MuPDF." +msgstr "" + +#: ../../../changes.txt:1726 f43bfd57701946f2b107c133bcd8bd88 +msgid "" +"**Fixed** issue `#519 `_. " +"This upstream bug occurred occasionally for some pages only and seems to " +"be fixed now: page layout should no longer be ruined in these cases." +msgstr "" + +#: ../../../changes.txt:1728 51a6ec75218f496e8437b853311cb4b0 +msgid "**Fixed** issue `#675 `_." +msgstr "" + +#: ../../../changes.txt:1730 08e2eed983684354963f255ea517142f +msgid "" +"Unsuccessful storage allocations should now always lead to exceptions " +"(circumvention of an upstream bug intermittently crashing the " +"interpreter)." +msgstr "" + +#: ../../../changes.txt:1731 df1a808605e74e8cb20f1019033064f5 +msgid "" +":ref:`Pixmap` size is now based on ``size_t`` instead of ``int`` in C and" +" should be correct even for extremely large pixmaps." +msgstr "" + +#: ../../../changes.txt:1733 7dc80f3c8ab6454a946ca7de5037b618 +msgid "" +"**Fixed** issue `#668 `_. " +"Specification of dashes for PDF drawing insertion should now correctly " +"reflect the PDF spec." +msgstr "" + +#: ../../../changes.txt:1734 859ac4fd8573486696af807d849195ea +msgid "" +"**Fixed** issue `#669 `_. " +"A major source of memory leakage in :meth:`Page.insert_pdf` has been " +"removed." +msgstr "" + +#: ../../../changes.txt:1735 f5e9f0bbc04844988d42074b2ac82383 +msgid "" +"**Added** keyword *\"images\"* to :meth:`Page.apply_redactions` for fine-" +"controlling the handling of images." +msgstr "" + +#: ../../../changes.txt:1736 66320728177e4eafbd8eb21f481921df +msgid "" +"**Added** :meth:`Annot.getText` and :meth:`Annot.getTextbox`, which offer" +" the same functionality as the :ref:`Page` versions." +msgstr "" + +#: ../../../changes.txt:1737 d554a335a98240e4836322e6f168e6f3 +msgid "" +"**Added** key *\"number\"* to the block dictionaries of " +":meth:`Page.getText` / :meth:`Annot.getText` for options \"dict\" and " +"\"rawdict\"." +msgstr "" + +#: ../../../changes.txt:1738 3bb9fd8512c9457cb9dc3f9ddee1efcf +msgid "" +"**Added** :meth:`glyph_name_to_unicode` and " +":meth:`unicode_to_glyph_name`. Both functions do not really connect to a " +"specific font and are now independently available, too. The data are now " +"based on the `Adobe Glyph List `_." +msgstr "" + +#: ../../../changes.txt:1739 c1d9f18ac2804a76b70e0fdd07591795 +msgid "" +"**Added** convenience functions :meth:`adobe_glyph_names` and " +":meth:`adobe_glyph_unicodes` which return the respective available data." +msgstr "" + +#: ../../../changes.txt:1740 e9161904b5794f6585c7711040efd5f8 +msgid "" +"**Added** :meth:`Page.getDrawings` which returns details of drawing " +"operations on a document page. Works for all document types." +msgstr "" + +#: ../../../changes.txt:1741 ca31aa036d664d679b8c449c655681aa +msgid "" +"Improved performance of :meth:`Document.insert_pdf`. Multiple object " +"copies are now also suppressed across multiple separate insertions from " +"the same source. This saves time, memory and target file size. Previously" +" this mechanism was only active within each single method execution. The " +"feature can also be suppressed with the new method bool parameter " +"*final=1*, which is the default." +msgstr "" + +#: ../../../changes.txt:1742 ee8ce55f44ff4207afcba82cda95520c +msgid "" +"For PNG images created from pixmaps, the resolution (dpi) is now " +"automatically set from the respective :attr:`Pixmap.xres` and " +":attr:`Pixmap.yres` values." +msgstr "" + +#: ../../../changes.txt:1747 ba946a3911f94154a0ff39d42896b15c +msgid "**Changes in Version 1.17.7**" +msgstr "" + +#: ../../../changes.txt:1749 a41bb96a21bc45978ba315fc05cd772d +msgid "" +"**Fixed** issue `#651 `_. " +"An upstream bug causing interpreter crashes in corner case redaction " +"processings was fixed by backporting MuPDF changes from their development" +" repo." +msgstr "" + +#: ../../../changes.txt:1750 f8d00207d87a47a2b7be0a7ae7824291 +msgid "" +"**Fixed** issue `#645 `_. " +"Pixmap top-left coordinates can be set (again) by their own method, " +":meth:`Pixmap.set_origin`." +msgstr "" + +#: ../../../changes.txt:1751 a858dc52d5d648d4af0b2bb212397e60 +msgid "" +"**Fixed** issue `#622 `_. " +":meth:`Page.insertImage` again accepts a :data:`rect_like` parameter." +msgstr "" + +#: ../../../changes.txt:1752 ed11bd0035ed4478905c0d60c8ba2a8f +msgid "" +"**Added** severeal new methods to improve and speed-up table of contents " +"(TOC) handling. Among other things, TOC items can now changed or deleted " +"individually -- without always replacing the complete TOC. Furthermore, " +"access to some PDF page attributes is now possible without first " +"**loading** the page. This has a very significant impact on the " +"performance of TOC manipulation." +msgstr "" + +#: ../../../changes.txt:1753 25093ac3f49748ed823bbf337c97d5a5 +msgid "" +"**Added** an option to :meth:`Document.insert_pdf` which allows " +"displaying progress messages. Adresses `#640 " +"`_." +msgstr "" + +#: ../../../changes.txt:1754 42c40396212a4b5f92fce691a8a4bcef +msgid "" +"**Added** :meth:`Page.getTextbox` which extracts text contained in a " +"rectangle. In many cases, this should obsolete writing your own script " +"for this type of thing." +msgstr "" + +#: ../../../changes.txt:1755 f58cac906e164403aa5e0fcce67c8e21 +msgid "" +"**Added** new ``clip`` parameter to :meth:`Page.getText` to simplify and " +"speed up text extraction of page sub areas." +msgstr "" + +#: ../../../changes.txt:1756 a5ea759836cf4cd3851d99d987d72630 +msgid "" +"**Added** :meth:`TextWriter.appendv` to add text in **vertical write " +"mode**. Addresses issue `#653 " +"`_" +msgstr "" + +#: ../../../changes.txt:1761 8543b67efb064fbf948a80e1dbef272d +msgid "**Changes in Version 1.17.6**" +msgstr "" + +#: ../../../changes.txt:1763 fd6d1684e04348b5aae2cd5b472cdab9 +msgid "**Fixed** issue `#605 `_" +msgstr "" + +#: ../../../changes.txt:1764 7b4378d373954f788448dd5a30112ac6 +msgid "" +"**Fixed** issue `#600 `_ " +"-- text should now be correctly positioned also for pages with a CropBox " +"smaller than MediaBox." +msgstr "" + +#: ../../../changes.txt:1765 f02f2531ae134e728e35b2ba1348257a +msgid "" +"**Added** text span dictionary key ``origin`` which contains the lower " +"left coordinate of the first character in that span." +msgstr "" + +#: ../../../changes.txt:1766 bf078edb7bf14cf8ae624a2c2ae5b621 +msgid "**Added** attribute :attr:`Font.buffer`, a *bytes* copy of the font file." +msgstr "" + +#: ../../../changes.txt:1767 dc92466b657d488f8a12484321ee008e +msgid "" +"**Added** parameter *sanitize* to :meth:`Page.cleanContents`. Allows " +"switching of sanitization, so only syntax cleaning will be done." +msgstr "" + +#: ../../../changes.txt:1771 2ae9a785b9bf4bbeb0558dfe1a4bdb9f +msgid "**Changes in Version 1.17.5**" +msgstr "" + +#: ../../../changes.txt:1773 049d1da0f7274c66b958db83e3dfafcd +msgid "" +"**Fixed** issue `#561 `_ " +"-- second go: certain :ref:`TextWriter` usages with many alternating " +"fonts did not work correctly." +msgstr "" + +#: ../../../changes.txt:1774 e73316fa7e084e5791aab2bd991e374e +msgid "**Fixed** issue `#566 `_." +msgstr "" + +#: ../../../changes.txt:1775 35847dda64994ecf8aa83c2c1c13e237 +msgid "**Fixed** issue `#568 `_." +msgstr "" + +#: ../../../changes.txt:1776 9a33ee1625de4f83a82008bc750e99db +msgid "" +"**Fixed** -- opacity is now correctly taken from the :ref:`TextWriter` " +"object, if not given in :meth:`TextWriter.writeText`." +msgstr "" + +#: ../../../changes.txt:1777 441f11e9afad487c9103785fe1e8886b +msgid "" +"**Added** a new global attribute :attr:`fitz_fontdescriptors`. Contains " +"information about usable fonts from repository `pymupdf-fonts " +"`_." +msgstr "" + +#: ../../../changes.txt:1778 69596840785c476fac4cd007507c65e1 +msgid "" +"**Added** :meth:`Font.valid_codepoints` which returns an array of unicode" +" codepoints for which the font has a glyph." +msgstr "" + +#: ../../../changes.txt:1779 8d09e945e9fc4491ae8c1f6774ad9604 +msgid "" +"**Added** option ``text_as_path`` to :meth:`Page.getSVGimage`. this " +"implements `#580 `_. " +"Generates much smaller SVG files with parseable text if set to ``False``." +msgstr "" + +#: ../../../changes.txt:1784 9e7d00a285dd438a9930b3426bd86c37 +msgid "**Changes in Version 1.17.4**" +msgstr "" + +#: ../../../changes.txt:1786 aa309edb110e49268aa32ab0b31f24f8 +msgid "" +"**Fixed** issue `#561 `_. " +"Handling of more than 10 :ref:`Font` objects on one page should now work " +"correctly." +msgstr "" + +#: ../../../changes.txt:1787 c70048adb62e4ed089fb3d9296329ef3 +msgid "" +"**Fixed** issue `#562 `_. " +"Annotation pixmaps are no longer derived from the page pixmap, thus " +"avoiding unintended inclusion of page content." +msgstr "" + +#: ../../../changes.txt:1788 c035b1e664d14003b2a7c98a76c12cdb +msgid "" +"**Fixed** issue `#559 `_. " +"This |MuPDF| bug is being temporarily fixed with a pre-version of MuPDF's" +" next release." +msgstr "" + +#: ../../../changes.txt:1789 0f4ce78d82aa4ad784b0d94ddd0a20c3 +msgid "" +"**Added** utility function :meth:`repair_mono_font` for correcting " +"displayed character spacing for some mono-spaced fonts." +msgstr "" + +#: ../../../changes.txt:1790 7409c09d614e42e49ac2021d3200f46f +msgid "" +"**Added** utility method :meth:`Document.need_appearances` for fine-" +"controlling Form PDF behavior. Addresses issue `#563 " +"`_." +msgstr "" + +#: ../../../changes.txt:1791 ec66d102d47c44328d4ad9453574409c +msgid "" +"**Added** utility function :meth:`sRGB_to_pdf` to recover the PDF color " +"triple for a given color integer in sRGB format." +msgstr "" + +#: ../../../changes.txt:1792 85a9501591b449f9a28e5839f61d8736 +msgid "" +"**Added** utility function :meth:`sRGB_to_rgb` to recover the (R, G, B) " +"color triple for a given color integer in sRGB format." +msgstr "" + +#: ../../../changes.txt:1793 528e7fa8db3d4134afcd2783f1fa71ef +msgid "" +"**Added** utility function :meth:`make_table` which delivers table cells " +"for a given rectangle and desired numbers of columns and rows." +msgstr "" + +#: ../../../changes.txt:1794 ac2a95ed4e70477d93cc3770ab97fa46 +msgid "" +"**Added** support for optional fonts in repository `pymupdf-fonts " +"`_." +msgstr "" + +#: ../../../changes.txt:1798 02bb099ea49246638ca691b6aa100f12 +msgid "**Changes in Version 1.17.3**" +msgstr "" + +#: ../../../changes.txt:1800 48cc27fbc35c4cf182487f6578237379 +msgid "" +"**Fixed** an undocumented issue, which prevented fully cleaning a PDF " +"page when using :meth:`Page.cleanContents`." +msgstr "" + +#: ../../../changes.txt:1801 3c216e2a5fb34b76a331155626b1fc8a +msgid "" +"**Fixed** issue `#540 `_. " +"Text extraction for EPUB should again work correctly." +msgstr "" + +#: ../../../changes.txt:1802 d90075a4b2724dc89c3fbf6b6a1002d0 +msgid "" +"**Fixed** issue `#548 `_. " +"Documentation now includes ``LINK_NAMED``." +msgstr "" + +#: ../../../changes.txt:1803 4e418ace6afa452a96f9b1df1f94fb78 +msgid "" +"**Added** new parameter to control start of text in " +":meth:`TextWriter.fillTextbox`. Implements `#549 " +"`_." +msgstr "" + +#: ../../../changes.txt:1804 e850089b98914a46ba7a2e17db85a0ef +msgid "" +"**Changed** documentation of :meth:`Page.add_redact_annot` to explain the" +" usage of non-builtin fonts." +msgstr "" + +#: ../../../changes.txt:1808 fa17b746b88147d48dd1cc58bffc4bdc +msgid "**Changes in Version 1.17.2**" +msgstr "" + +#: ../../../changes.txt:1810 5e97b9fac703475585e31183172a8923 +msgid "**Fixed** issue `#533 `_." +msgstr "" + +#: ../../../changes.txt:1811 cc0199c5b1904c968be859f9274efb9e +msgid "" +"**Added** options to modify 'Redact' annotation appearance. Implements " +"`#535 `_." +msgstr "" + +#: ../../../changes.txt:1816 4ca10fdd3ed14d619d39d2cc49115149 +msgid "**Changes in Version 1.17.1**" +msgstr "" + +#: ../../../changes.txt:1818 d60f94eed73a49a79ad0f768219878de +msgid "**Fixed** issue `#520 `_." +msgstr "" + +#: ../../../changes.txt:1819 f6a87834529647a58444b15124ce2017 +msgid "" +"**Fixed** issue `#525 `_. " +"Vertices for 'Ink' annots should now be correct." +msgstr "" + +#: ../../../changes.txt:1820 139f957349344d179cacb065c1d23d8a +msgid "" +"**Fixed** issue `#524 `_. " +"It is now possible to query and set rotation for applicable annotation " +"types." +msgstr "" + +#: ../../../changes.txt:1822 47cd3a2f11fd4ba3bd66d2558fbe09ce +msgid "" +"Also significantly improved inline documentation for better support of " +"interactive help." +msgstr "" + +#: ../../../changes.txt:1826 3a1a2b6f17564377ba21ce37204ab311 +msgid "**Changes in Version 1.17.0**" +msgstr "" + +#: ../../../changes.txt:1828 8dfe85750d954a448005365db34cfe34 +msgid "" +"This version is based on MuPDF v1.17. Following are highlights of new and" +" changed features:" +msgstr "" + +#: ../../../changes.txt:1830 e9f0a490a63845bba721ca403ef8a07b +msgid "" +"**Added** extended language support for annotations and widgets: a " +"mixture of Latin, Greece, Russian, Chinese, Japanese and Korean " +"characters can now be used in 'FreeText' annotations and text widgets. No" +" special arrangement is required to use it." +msgstr "" + +#: ../../../changes.txt:1832 012da2fb91e54769a61b78e6b779cae9 +msgid "" +"Faster page access is implemented for documents supporting a \"chapter\" " +"structure. This applies to EPUB documents currently. This comes with " +"several new :ref:`Document` methods and changes for " +":meth:`Document.loadPage` and the \"indexed\" page access *doc[n]*: In " +"addition to specifying a page number as before, a tuple *(chaper, pno)* " +"can be specified to identify the desired page." +msgstr "" + +#: ../../../changes.txt:1834 251cb3bc65c9462f8db80d29fe71b1c7 +msgid "" +"**Changed:** Improved support of redaction annotations: images overlapped" +" by redactions are **permanantly modified** by erasing the overlap areas." +" Also links are removed if overlapped by redactions. This is now fully in" +" sync with PDF specifications." +msgstr "" + +#: ../../../changes.txt:1838 c2a181bba8964ee7a4206865bd7fbdd4 +msgid "" +"**Changed** :meth:`TextWriter.writeText` to support the *\"morph\"* " +"parameter." +msgstr "" + +#: ../../../changes.txt:1839 7b086fcac17b41ff8669913a8bf6203a +msgid "" +"**Added** methods :meth:`Rect.morph`, :meth:`IRect.morph`, and " +":meth:`Quad.morph`, which return a new :ref:`Quad`." +msgstr "" + +#: ../../../changes.txt:1840 ce27ddb3979c4d0f98802306945bd005 +msgid "" +"**Changed** :meth:`Page.add_freetext_annot` to support text alignment via" +" a new *\"align\"* parameter." +msgstr "" + +#: ../../../changes.txt:1841 9b91ce0c8efc4d218974462abdebd5b6 +msgid "" +"**Fixed** issue `#508 `_. " +"Improved image rectangle calculation to hopefully deliver correct values " +"in most if not all cases." +msgstr "" + +#: ../../../changes.txt:1842 5e30ead0e5e642a1b5b6c3f0fcb44b67 +msgid "**Fixed** issue `#502 `_." +msgstr "" + +#: ../../../changes.txt:1843 6e065185286f4284b4507f0de2aecaf8 +msgid "" +"**Fixed** issue `#500 `_. " +":meth:`Document.convertToPDF` should no longer cause memory leaks." +msgstr "" + +#: ../../../changes.txt:1844 ee1a61df509e4cb1a7abac6012a30011 +msgid "" +"**Fixed** issue `#496 `_. " +"Annotations and widgets / fields are now added or modified using the " +"coordinates of the **unrotated page**. This behavior is now in sync with " +"other methods modifying PDF pages." +msgstr "" + +#: ../../../changes.txt:1845 7ac05926bfd94e21bfe11b7b8826b143 +msgid "" +"**Added** :attr:`Page.rotationMatrix` and :attr:`Page.derotationMatrix` " +"to support coordinate transformations between the rotated and the " +"original versions of a PDF page." +msgstr "" + +#: ../../../changes.txt:1847 b9269ae8b60b40caa94721f4126393ae +msgid "Potential code breaking changes:" +msgstr "" + +#: ../../../changes.txt:1849 28f81d1b42d748fbb706c75fceb62c00 +msgid "" +"The private method ``Page._getTransformation()`` has been removed. Use " +"the public :attr:`Page.transformationMattrix` instead." +msgstr "" + +#: ../../../changes.txt:1854 06eddc593f294b88bdb6dfdd2369d1c1 +msgid "**Changes in Version 1.16.18**" +msgstr "" + +#: ../../../changes.txt:1856 e4884ae592db424f88025aec2e1a97e3 +msgid "" +"This version introduces several new features around PDF text output. The " +"motivation is to simplify this task, while at the same time offering " +"extending features." +msgstr "" + +#: ../../../changes.txt:1858 fd34580fc1464f1c81d06f426d07a778 +msgid "" +"One major achievement is using MuPDF's capabilities to dynamically " +"choosing fallback fonts whenever a character cannot be found in the " +"current one. This seemlessly works for Base-14 fonts in combination with " +"CJK fonts (China, Japan, Korea). So a text may contain **any combination " +"of characters** from the Latin, Greek, Russian, Chinese, Japanese and " +"Korean languages." +msgstr "" + +#: ../../../changes.txt:1860 a99effd7cbe546f989e59f9b483fb232 +msgid "" +"**Fixed** issue `#493 `_. " +"``Pixmap(doc, xref)`` should now again correctly resemble the loaded " +"image object." +msgstr "" + +#: ../../../changes.txt:1861 f11ea8321f33403084633958bcc22980 +msgid "" +"**Fixed** issue `#488 `_. " +"Widget names are now modifiable." +msgstr "" + +#: ../../../changes.txt:1862 0972bab5863f47e8a0ed54efe4d45dbd +msgid "**Added** new class :ref:`Font` which represents a font." +msgstr "" + +#: ../../../changes.txt:1863 a400f40fbc59428796fdee5a2db49109 +msgid "" +"**Added** new class :ref:`TextWriter` which serves as a container for " +"text to be written on a page." +msgstr "" + +#: ../../../changes.txt:1864 18ae7ce08b1340c38ebe9e58381eddd0 +msgid "" +"**Added** :meth:`Page.writeText` to write one or more :ref:`TextWriter` " +"objects to the page." +msgstr "" + +#: ../../../changes.txt:1869 906be9a3eae245f780b2b4938f85d6be +msgid "**Changes in Version 1.16.17**" +msgstr "" + +#: ../../../changes.txt:1872 9ab44ff63bed416f8570d43ce05a2730 +msgid "" +"**Fixed** issue `#479 `_. " +"PyMuPDF should now more correctly report image resolutions. This applies " +"to both, images (either from images files or extracted from PDF " +"documents) and pixmaps created from images." +msgstr "" + +#: ../../../changes.txt:1873 f6150fb465494bfdab764c5be6a7d39f +msgid "" +"**Added** :meth:`Pixmap.set_dpi` which sets the image resolution in x and" +" y directions." +msgstr "" + +#: ../../../changes.txt:1877 5f591a8832cc4723ba08ad68dda41ffe +msgid "**Changes in Version 1.16.16**" +msgstr "" + +#: ../../../changes.txt:1880 68e35dadf7bb47feb20698f3a7079c1f +msgid "**Fixed** issue `#477 `_." +msgstr "" + +#: ../../../changes.txt:1881 b5a248121553493e9847869e3904c948 +msgid "**Fixed** issue `#476 `_." +msgstr "" + +#: ../../../changes.txt:1882 40380d7dd6e449cb82d11a3ab8293127 +msgid "" +"**Changed** annotation line end symbol coloring and fixed an error " +"coloring the interior of 'Polyline' /'Polygon' annotations." +msgstr "" + +#: ../../../changes.txt:1886 76f99a33a85d4eca9407eb5295343aec +msgid "**Changes in Version 1.16.14**" +msgstr "" + +#: ../../../changes.txt:1889 01ec2719c2f4429b97eb0a71a3b527ae +msgid "" +"**Changed** text marker annotations to accept parameters beyond just " +"quadrilaterals such that now **text lines between two given points can be" +" marked**." +msgstr "" + +#: ../../../changes.txt:1891 ceafcbb56ef344abbf5624cfe164ae02 +msgid "" +"**Added** :meth:`Document.scrub` which **removes potentially sensitive " +"data** from a PDF. Implements `#453 " +"`_." +msgstr "" + +#: ../../../changes.txt:1893 de2d1b57b55b48d5b70e26d41a679763 +msgid "" +"**Added** :meth:`Annot.blendMode` which returns the **blend mode** of " +"annotations." +msgstr "" + +#: ../../../changes.txt:1895 27e96fc2e3254412a9af7c240106824e +msgid "" +"**Added** :meth:`Annot.setBlendMode` to set the annotation's blend mode. " +"This resolves issue `#416 " +"`_." +msgstr "" + +#: ../../../changes.txt:1896 20eeb00060ba4ac6b6666e571d2e5d75 +msgid "" +"**Changed** :meth:`Annot.update` to accept additional parameters for " +"setting blend mode and opacity." +msgstr "" + +#: ../../../changes.txt:1897 9c48b18d08cb4bf69b967a567749155f +msgid "" +"**Added** advanced graphics features to **control the anti-aliasing " +"values**, :meth:`Tools.set_aa_level`. Resolves `#467 " +"`_" +msgstr "" + +#: ../../../changes.txt:1899 e8e7e423f4dd4a48b63d582e7ab631e8 +msgid "**Fixed** issue `#474 `_." +msgstr "" + +#: ../../../changes.txt:1900 595ebd3ce6e2430da70b5c7885f3918a +msgid "**Fixed** issue `#466 `_." +msgstr "" + +#: ../../../changes.txt:1906 cc058930574c459ab2ddce625ffa9ccf +msgid "**Changes in Version 1.16.13**" +msgstr "" + +#: ../../../changes.txt:1909 50e20743a9084111ae7c001dbe4f8b62 +msgid "" +"**Added** :meth:`Document.getPageXObjectList` which returns a list of " +"**Form XObjects** of the page." +msgstr "" + +#: ../../../changes.txt:1910 c3c2bb5c9c6843c3830b39a7c0b31b37 +msgid "" +"**Added** :meth:`Page.setMediaBox` for changing the physical PDF page " +"size." +msgstr "" + +#: ../../../changes.txt:1911 eff993e6b1eb4774acd0c05308fd1e8a +msgid "" +"**Added** :ref:`Page` methods which have been internal before: " +":meth:`Page.cleanContents` (= :meth:`Page._cleanContents`), " +":meth:`Page.getContents` (= :meth:`Page._getContents`), " +":meth:`Page.getTransformation` (= :meth:`Page._getTransformation`)." +msgstr "" + +#: ../../../changes.txt:1917 8675947ea2c347a489c499af3d4fa06d +msgid "**Changes in Version 1.16.12**" +msgstr "" + +#: ../../../changes.txt:1919 af794db2704a4b0cbabb81f9299983fe +msgid "**Fixed** issue `#447 `_" +msgstr "" + +#: ../../../changes.txt:1920 129d68489e594bea97a5ce52a78d206a +msgid "**Fixed** issue `#461 `_." +msgstr "" + +#: ../../../changes.txt:1921 03a80ef0f419466495ad147a3dfeaeb2 +msgid "**Fixed** issue `#397 `_." +msgstr "" + +#: ../../../changes.txt:1922 a2bcd3fec32245f0a2327ad77f1b5445 +msgid "**Fixed** issue `#463 `_." +msgstr "" + +#: ../../../changes.txt:1923 64d4847e1a744dac89ad213ab71556fb +msgid "" +"**Added** JavaScript support to PDF form fields, thereby fixing `#454 " +"`_." +msgstr "" + +#: ../../../changes.txt:1924 49a4d22efbdc4a84915905ff1104bb9b +msgid "" +"**Added** a new annotation method :meth:`Annot.delete_responses`, which " +"removes 'Popup' and response annotations referring to the current one. " +"Mainly serves data protection purposes." +msgstr "" + +#: ../../../changes.txt:1925 421bc43eeb1b4c0fa3d1de9ac93c7f27 +msgid "" +"**Added** a new form field method :meth:`Widget.reset`, which resets the " +"field value to its default." +msgstr "" + +#: ../../../changes.txt:1926 fa9378cf2f35445eb096fb461328f8b8 +msgid "" +"**Changed** and extended handling of redactions: images and XObjects are " +"removed if *contained* in a redaction rectangle. Any partial only " +"overlaps will just be covered by the redaction background color. Now an " +"*overlay* text can be specified to be inserted in the rectangle area to " +"**take the place the deleted original** text. This resolves `#434 " +"`_." +msgstr "" + +#: ../../../changes.txt:1930 27b4c0399f754bfeb0d932b75262c480 +msgid "**Changes in Version 1.16.11**" +msgstr "" + +#: ../../../changes.txt:1932 21c52a1b60b84b448d79a9c6f5ad1f76 +msgid "" +"**Added** Support for redaction annotations via method " +":meth:`Page.add_redact_annot` and :meth:`Page.apply_redactions`." +msgstr "" + +#: ../../../changes.txt:1933 ddddf21cb7db45cdbd5b0faf124675f8 +msgid "**Fixed** issue #426 (\"PolygonAnnotation in 1.16.10 version\")." +msgstr "" + +#: ../../../changes.txt:1934 f1c946b998214501be7ed959f8951035 +msgid "" +"**Fixed** documentation only issues `#443 " +"`_ and `#444 " +"`_." +msgstr "" + +#: ../../../changes.txt:1938 b7965a894cf84ec19349ae08a3c7877c +msgid "**Changes in Version 1.16.10**" +msgstr "" + +#: ../../../changes.txt:1940 184d2f584f92410d9f61c98b4431a698 +msgid "" +"**Fixed** issue #421 (\"annot.set_rect(rect) has no effect on text " +"Annotation\")" +msgstr "" + +#: ../../../changes.txt:1941 9d3c664392cd4c52882997e43c5cee78 +msgid "" +"**Fixed** issue #417 (\"Strange behavior for page.deleteAnnot on 1.16.9 " +"compare to 1.13.20\")" +msgstr "" + +#: ../../../changes.txt:1942 361940c4c44d43cda4dfb1e36b1f8c6d +msgid "**Fixed** issue #415 (\"Annot.setOpacity throws mupdf warnings\")" +msgstr "" + +#: ../../../changes.txt:1943 ba8b4c6c823c478291a738c30877848b +msgid "" +"**Changed** all \"add annotation / widget\" methods to store a unique " +"name in the */NM* PDF key." +msgstr "" + +#: ../../../changes.txt:1944 bf79ca3ca50b4251bf44decca21a7782 +msgid "" +"**Changed** :meth:`Annot.setInfo` to also accept direct parameters in " +"addition to a dictionary." +msgstr "" + +#: ../../../changes.txt:1945 dbcae6b36c194c189b19c369912641ec +msgid "" +"**Changed** :attr:`Annot.info` to now also show the annotation's unique " +"id (*/NM* PDF key) if present." +msgstr "" + +#: ../../../changes.txt:1946 fe75d886aff24d2e8bf7e3925a83366e +msgid "" +"**Added** :meth:`Page.annot_names` which returns a list of all annotation" +" names (*/NM* keys)." +msgstr "" + +#: ../../../changes.txt:1947 4dcf529b87cc49f0bd1af4d4320cfde7 +msgid "" +"**Added** :meth:`Page.load_annot` which loads an annotation given its " +"unique id (*/NM* key)." +msgstr "" + +#: ../../../changes.txt:1948 fa03b37404f14d939921bf8a87c9c6da +msgid "" +"**Added** :meth:`Document.reload_page` which provides a new copy of a " +"page after finishing any pending updates to it." +msgstr "" + +#: ../../../changes.txt:1953 fae1066fa12546dd98bf65e5b543b86b +msgid "**Changes in Version 1.16.9**" +msgstr "" + +#: ../../../changes.txt:1955 d5df52286a6e4ec892a3e40c04b39aca +msgid "" +"**Fixed** #412 (\"Feature Request: Allow controlling whether TOC entries " +"should be collapsed\")" +msgstr "" + +#: ../../../changes.txt:1956 a16b04b1deb14236a8376570855cba40 +msgid "**Fixed** #411 (\"Seg Fault with page.firstWidget\")" +msgstr "" + +#: ../../../changes.txt:1957 0f681ad7e7484754bc9bc618f6ffe11d +msgid "**Fixed** #407 (\"Annot.setOpacity trouble\")" +msgstr "" + +#: ../../../changes.txt:1958 2a3d2e24c8374c9b91262b8488f64708 +msgid "" +"**Changed** methods :meth:`Annot.setBorder`, :meth:`Annot.setColors`, " +":meth:`Link.setBorder`, and :meth:`Link.setColors` to also accept direct " +"parameters, and not just cumbersome dictionaries." +msgstr "" + +#: ../../../changes.txt:1962 92378a229b934ccc949e112406645afd +msgid "**Changes in Version 1.16.8**" +msgstr "" + +#: ../../../changes.txt:1964 ecd0931d663f451c912766dde614088a +msgid "" +"**Added** several new methods to the :ref:`Document` class, which make " +"dealing with PDF low-level structures easier. I also decided to provide " +"them as \"normal\" methods (as opposed to private ones starting with an " +"underscore \"_\"). These are :meth:`Document.xrefObject`, " +":meth:`Document.xrefStream`, :meth:`Document.xrefStreamRaw`, " +":meth:`Document.PDFTrailer`, :meth:`Document.PDFCatalog`, " +":meth:`Document.metadataXML`, :meth:`Document.updateObject`, " +":meth:`Document.updateStream`." +msgstr "" + +#: ../../../changes.txt:1965 33164656aa7f427ca8595491bc0f2e01 +msgid "" +"**Added** :meth:`Tools.mupdf_disply_errors` which sets the display of " +"mupdf errors on *sys.stderr*." +msgstr "" + +#: ../../../changes.txt:1966 b81b74ebdb3d4ecd8c8e3ef522482092 +msgid "" +"**Added** a commandline facility. This a major new feature: you can now " +"invoke several utility functions via *\"python -m fitz ...\"*. It should " +"obsolete the need for many of the most trivial scripts. Please refer to " +":ref:`Module`." +msgstr "" + +#: ../../../changes.txt:1971 b1d2c1ae250049228d3d7121fe3edd5b +msgid "**Changes in Version 1.16.7**" +msgstr "" + +#: ../../../changes.txt:1973 e443ab65459f4b8683472209275745d8 +msgid "" +"Minor changes to better synchronize the binary image streams of " +":ref:`TextPage` image blocks and :meth:`Document.extractImage` images." +msgstr "" + +#: ../../../changes.txt:1975 97a1e113f9104b99b9faf259fdfa8d86 +msgid "" +"**Fixed** issue #394 (\"PyMuPDF Segfaults when using " +"TOOLS.mupdf_warnings()\")." +msgstr "" + +#: ../../../changes.txt:1976 97a62c243ff44c2cab6b73aac0ef7d9c +msgid "" +"**Changed** redirection of MuPDF error messages: apart from writing them " +"to Python *sys.stderr*, they are now also stored with the MuPDF warnings." +msgstr "" + +#: ../../../changes.txt:1977 4df4ed3f1c42469f8346eb289f1480cb +msgid "" +"**Changed** :meth:`Tools.mupdf_warnings` to automatically empty the store" +" (if not deactivated via a parameter)." +msgstr "" + +#: ../../../changes.txt:1978 8790e37e72154436b6624c8bad06f7cb +msgid "" +"**Changed** :meth:`Page.getImageBbox` to return an **infinite rectangle**" +" if the image could not be located on the page -- instead of raising an " +"exception." +msgstr "" + +#: ../../../changes.txt:1983 5e2929d554794f158d99d3d095fdde32 +msgid "**Changes in Version 1.16.6**" +msgstr "" + +#: ../../../changes.txt:1985 599d24845c774f4db0e7d17e2324090b +msgid "**Fixed** issue #390 (\"Incomplete deletion of annotations\")." +msgstr "" + +#: ../../../changes.txt:1986 8cd77dff6c2a4774a7722f9b37a3bec9 +msgid "" +"**Changed** :meth:`Page.searchFor` / :meth:`Document.searchPageFor` to " +"also support the *flags* parameter, which controls the data included in a" +" :ref:`TextPage`." +msgstr "" + +#: ../../../changes.txt:1987 f199177e91d241f296d12df70a9732ae +msgid "" +"**Changed** :meth:`Document.getPageImageList`, " +":meth:`Document.getPageFontList` and their :ref:`Page` counterparts to " +"support a new parameter *full*. If true, the returned items will contain " +"the :data:`xref` of the *Form XObject* where the font or image is " +"referenced." +msgstr "" + +#: ../../../changes.txt:1991 ad6179b247884db79ba6045ef43a1385 +msgid "**Changes in Version 1.16.5**" +msgstr "" + +#: ../../../changes.txt:1993 6460a7d5fe33486c8185d7400a14443e +msgid "More performance improvements for text extraction." +msgstr "" + +#: ../../../changes.txt:1995 d2c0600b030740ccbca12e0156f1a909 +msgid "**Fixed** second part of issue #381 (see item in v1.16.4)." +msgstr "" + +#: ../../../changes.txt:1996 1a90cd0a5238441a8e9dcf1ef823b0bf +msgid "" +"**Added** :meth:`Page.getTextPage`, so it is no longer required to create" +" an intermediate display list for text extractions. Page level wrappers " +"for text extraction and text searching are now based on this, which " +"should improve performance by ca. 5%." +msgstr "" + +#: ../../../changes.txt:2000 87308a30d9614d12a56898d0afca438e +msgid "**Changes in Version 1.16.4**" +msgstr "" + +#: ../../../changes.txt:2003 3861d98bb52f4ff9beec775ce41a56d0 +msgid "" +"**Fixed** issue #381 (\"TextPage.extractDICT ... failed ... after " +"upgrading ... to 1.16.3\")" +msgstr "" + +#: ../../../changes.txt:2004 ca4d38f09aa249f39d8bdf75e2930d93 +msgid "" +"**Added** method :meth:`Document.pages` which delivers a generator " +"iterator over a page range." +msgstr "" + +#: ../../../changes.txt:2005 c10d16381a4b4386bd41822dc259e62c +msgid "" +"**Added** method :meth:`Page.links` which delivers a generator iterator " +"over the links of a page." +msgstr "" + +#: ../../../changes.txt:2006 325a170f3f8741e29e0ac263081297a0 +msgid "" +"**Added** method :meth:`Page.annots` which delivers a generator iterator " +"over the annotations of a page." +msgstr "" + +#: ../../../changes.txt:2007 facf7023e6e347079584df6e97f7c0d6 +msgid "" +"**Added** method :meth:`Page.widgets` which delivers a generator iterator" +" over the form fields of a page." +msgstr "" + +#: ../../../changes.txt:2008 5600da0f54e4453caafc2f3bb6fae938 +msgid "" +"**Changed** :attr:`Document.is_form_pdf` to now contain the number of " +"widgets, and ``False`` if not a PDF or this number is zero." +msgstr "" + +#: ../../../changes.txt:2013 defabc6d60e541a3876a173e5b67ed2f +msgid "**Changes in Version 1.16.3**" +msgstr "" + +#: ../../../changes.txt:2015 b1dc046ead24444bba8246655fa172f6 +msgid "" +"Minor changes compared to version 1.16.2. The code of the \"dict\" and " +"\"rawdict\" variants of :meth:`Page.getText` has been ported to C which " +"has greatly improved their performance. This improvement is mostly " +"noticeable with text-oriented documents, where they now should execute " +"almost two times faster." +msgstr "" + +#: ../../../changes.txt:2017 d83f278ce36346cf826935d8c8a55ed2 +msgid "" +"**Fixed** issue #369 (\"mupdf: cmsCreateTransform failed\") by removing " +"ICC colorspace support." +msgstr "" + +#: ../../../changes.txt:2018 fdc7d313bad44fa88425c3e5c850a645 +msgid "" +"**Changed** :meth:`Page.getText` to accept additional keywords \"blocks\"" +" and \"words\". These will deliver the results of " +":meth:`Page.getTextBlocks` and :meth:`Page.getTextWords`, respectively. " +"So all text extraction methods are now available via a uniform API. " +"Correspondingly, there are now new methods :meth:`TextPage.extractBLOCKS`" +" and :meth:`TextPage.extractWords`." +msgstr "" + +#: ../../../changes.txt:2019 3fca663f69c6497fa3c6a7ad2adb2450 +msgid "" +"**Changed** :meth:`Page.getText` to default bit indicator " +"*TEXT_INHIBIT_SPACES* to **off**. Insertion of additional spaces is **not" +" suppressed** by default." +msgstr "" + +#: ../../../changes.txt:2023 f0e8a9a697d545e2aa558945e286c755 +msgid "**Changes in Version 1.16.2**" +msgstr "" + +#: ../../../changes.txt:2025 244003fcd5e442408c7c59ff86a18348 +msgid "" +"**Changed** text extraction methods of :ref:`Page` to allow detail " +"control of the amount of extracted data." +msgstr "" + +#: ../../../changes.txt:2026 96ffbb66826344c0abebab9c361f16e7 +msgid "" +"**Added** :meth:`planish_line` which maps a given line (defined as a pair" +" of points) to the x-axis." +msgstr "" + +#: ../../../changes.txt:2027 4459642dc0264674becb06363fab528b +msgid "" +"**Fixed** an issue (w/o Github number) which brought down the interpreter" +" when encountering certain non-UTF-8 encodable characters while using " +":meth:`Page.getText` with te \"dict\" option." +msgstr "" + +#: ../../../changes.txt:2028 69518c711a8c4719b36e078a17fa77cf +msgid "**Fixed** issue #362 (\"Memory Leak with getText('rawDICT')\")." +msgstr "" + +#: ../../../changes.txt:2032 b793bdf7e6c742a1a840e24b28cc9d09 +msgid "**Changes in Version 1.16.1**" +msgstr "" + +#: ../../../changes.txt:2034 2141dc782ed3451b88a2f81567116dc2 +msgid "" +"**Added** property :attr:`Quad.is_convex` which checks whether a line is " +"contained in the quad if it connects two points of it." +msgstr "" + +#: ../../../changes.txt:2035 63eca3d8920e45b1abad5ec6df745db7 +msgid "" +"**Changed** :meth:`Document.insert_pdf` to now allow dropping or " +"including links and annotations independently during the copy. Fixes " +"issue #352 (\"Corrupt PDF data and ...\"), which seemed to intermittently" +" occur when using the method for some problematic PDF files." +msgstr "" + +#: ../../../changes.txt:2036 c7544beddb164c228c7bc9f680ab7e70 +msgid "" +"**Fixed** a bug which, in matrix division using the syntax *\"m1/m2\"*, " +"caused matrix *\"m1\"* to be **replaced** by the result instead of " +"delivering a new matrix." +msgstr "" + +#: ../../../changes.txt:2037 6d5a1ed42ec94b869a923337b5e3f8f6 +msgid "" +"**Fixed** issue #354 (\"SyntaxWarning with Python 3.8\"). We now always " +"use *\"==\"* for literals (instead of the *\"is\"* Python keyword)." +msgstr "" + +#: ../../../changes.txt:2038 8cdcf5b48aae49c789c3f25284c44087 +msgid "" +"**Fixed** issue #353 (\"mupdf version check\"), to no longer refuse the " +"import when there are only patch level deviations from MuPDF." +msgstr "" + +#: ../../../changes.txt:2044 6bedc075ffe3480e8f7206bcc0ff2112 +msgid "**Changes in Version 1.16.0**" +msgstr "" + +#: ../../../changes.txt:2046 4328810aa9a64eb0bcc336044e7a2576 +msgid "" +"This major new version of MuPDF comes with several nice new or changed " +"features. Some of them imply programming API changes, however. This is a " +"synopsis of what has changed:" +msgstr "" + +#: ../../../changes.txt:2048 a760e7e0b517454a9fd80d87f5a5c043 +msgid "" +"PDF document encryption and decryption is now **fully supported**. This " +"includes setting **permissions**, **passwords** (user and owner " +"passwords) and the desired encryption method." +msgstr "" + +#: ../../../changes.txt:2049 0ac3db53497d4789a51fbaae5532e2cc +msgid "" +"In response to the new encryption features, PyMuPDF returns an integer " +"(ie. a combination of bits) for document permissions, and no longer a " +"dictionary." +msgstr "" + +#: ../../../changes.txt:2050 1ba4256335a74714ad2a964147508a53 +msgid "" +"Redirection of MuPDF errors and warnings is now natively supported. " +"PyMuPDF redirects error messages from MuPDF to *sys.stderr* and no longer" +" buffers them. Warnings continue to be buffered and will not be " +"displayed. Functions exist to access and reset the warnings buffer." +msgstr "" + +#: ../../../changes.txt:2051 f88afc96b7934797a998f8f1ea458c18 +msgid "Annotations are now **only supported for PDF**." +msgstr "" + +#: ../../../changes.txt:2052 c96da7bcdf2f46ee9426f5df7fba34b3 +msgid "" +"Annotations and widgets (form fields) are now **separate object chains** " +"on a page (although widgets technically still **are** PDF annotations). " +"This means, that you will **never encounter widgets** when using " +":attr:`Page.firstAnnot` or :meth:`Annot.next`. You must use " +":attr:`Page.firstWidget` and :meth:`Widget.next` to access form fields." +msgstr "" + +#: ../../../changes.txt:2053 dd0eec567c304bafa665a9761b9d1beb +msgid "" +"As part of MuPDF's changes regarding widgets, only the following four " +"fonts are supported, when **adding** or **changing** form fields: " +"**Courier, Helvetica, Times-Roman** and **ZapfDingBats**." +msgstr "" + +#: ../../../changes.txt:2055 ba377b66c80c4815bc4d4909a93a2fa0 +msgid "List of change details:" +msgstr "" + +#: ../../../changes.txt:2057 3dbe328d1c8f4a6586e283c0353288ca +msgid "" +"**Added** :meth:`Document.can_save_incrementally` which checks conditions" +" that are preventing use of option *incremental=True* of " +":meth:`Document.save`." +msgstr "" + +#: ../../../changes.txt:2058 dfba798e391e49969a75c38421999aba +msgid "" +"**Added** :attr:`Page.firstWidget` which points to the first field on a " +"page." +msgstr "" + +#: ../../../changes.txt:2059 70ec756e3fde41a58f61257712f68175 +msgid "" +"**Added** :meth:`Page.getImageBbox` which returns the rectangle occupied " +"by an image shown on the page." +msgstr "" + +#: ../../../changes.txt:2060 52cdd784572345b8b494754d2d47e06a +msgid "" +"**Added** :meth:`Annot.setName` which lets you change the (icon) name " +"field." +msgstr "" + +#: ../../../changes.txt:2061 6609b86145064db79dc97436d04f1a9e +msgid "" +"**Added** outputting the text color in :meth:`Page.getText`: the " +"*\"dict\"*, *\"rawdict\"* and *\"xml\"* options now also show the color " +"in sRGB format." +msgstr "" + +#: ../../../changes.txt:2062 74730827e0a64061a1e3e47286823bcf +msgid "" +"**Changed** :attr:`Document.permissions` to now contain an integer of " +"bool indicators -- was a dictionary before." +msgstr "" + +#: ../../../changes.txt:2063 4126d540220b444d96835814920f8fe3 +msgid "" +"**Changed** :meth:`Document.save`, :meth:`Document.write`, which now " +"fully support password-based decryption and encryption of PDF files." +msgstr "" + +#: ../../../changes.txt:2064 5b6524742b4e411fb6f211d717e9a10d +msgid "" +"**Changed the names of all Python constants** related to annotations and " +"widgets. Please make sure to consult the **Constants and Enumerations** " +"chapter if your script is dealing with these two classes. This decision " +"goes back to the dropped support for non-PDF annotations. The **old " +"names** (starting with \"ANNOT_*\" or \"WIDGET_*\") will be available as " +"deprecated synonyms." +msgstr "" + +#: ../../../changes.txt:2065 d682ef7a26f44a1ebb180063ad5744ac +msgid "" +"**Changed** font support for widgets: only *Cour* (Courier), *Helv* " +"(Helvetica, default), *TiRo* (Times-Roman) and *ZaDb* (ZapfDingBats) are " +"accepted when **adding or changing** form fields. Only the plain versions" +" are possible -- not their italic or bold variations. **Reading** " +"widgets, however will show its original font." +msgstr "" + +#: ../../../changes.txt:2066 4cdf4f75df404138b707cec59ae070fb +msgid "" +"**Changed** the name of the warnings buffer to " +":meth:`Tools.mupdf_warnings` and the function to empty this buffer is now" +" called :meth:`Tools.reset_mupdf_warnings`." +msgstr "" + +#: ../../../changes.txt:2067 1d07282579fc4e26bbff7ed1ba846e1b +msgid "" +"**Changed** :meth:`Page.getPixmap`, :meth:`Document.get_page_pixmap`: a " +"new bool argument *annots* can now be used to **suppress the rendering of" +" annotations** on the page." +msgstr "" + +#: ../../../changes.txt:2068 dd6323f156064fa4aeb65edcfcb24717 +msgid "" +"**Changed** :meth:`Page.add_file_annot` and :meth:`Page.add_text_annot` " +"to enable setting an icon." +msgstr "" + +#: ../../../changes.txt:2069 a6723eca6bfd4546b5422272e17f0b98 +msgid "" +"**Removed** widget-related methods and attributes from the :ref:`Annot` " +"object." +msgstr "" + +#: ../../../changes.txt:2070 2cc0b6ee238c4721b919cad0d784b5d9 +msgid "" +"**Removed** :ref:`Document` attributes *openErrCode*, *openErrMsg*, and " +":ref:`Tools` attributes / methods *stderr*, *reset_stderr*, *stdout*, and" +" *reset_stdout*." +msgstr "" + +#: ../../../changes.txt:2071 123bc0d89f13409bb11cc1936b963528 +msgid "" +"**Removed** **thirdparty zlib** dependency in PyMuPDF: there are now " +"compression functions available in MuPDF. Source installers of PyMuPDF " +"may now omit this extra installation step." +msgstr "" + +#: ../../../changes.txt:2073 31cbdf932b5843e09f7d2e015d6d6880 +msgid "**No version published for MuPDF v1.15.0**" +msgstr "" + +#: ../../../changes.txt:2078 09197a04e8324668bbdc9f0fded15060 +msgid "**Changes in Version 1.14.20 / 1.14.21**" +msgstr "" + +#: ../../../changes.txt:2080 01c834c97dd4498bb948ae249f083173 +msgid "" +"**Changed** text marker annotations to support multiple rectangles / " +"quadrilaterals. This fixes issue #341 (\"Question : How to addhighlight " +"so that a string spread across more than a line is covered by one " +"highlight?\") and similar (#285)." +msgstr "" + +#: ../../../changes.txt:2081 1ea43a8c786d4aad8056045e35f0bec8 +msgid "" +"**Fixed** issue #331 (\"Importing PyMuPDF changes warning filtering " +"behaviour globally\")." +msgstr "" + +#: ../../../changes.txt:2086 df7eb99eeee242edad81168e9683bba0 +msgid "**Changes in Version 1.14.19**" +msgstr "" + +#: ../../../changes.txt:2088 c712710865084b4987b9d1986582510d +msgid "**Fixed** issue #319 (\"InsertText function error when use custom font\")." +msgstr "" + +#: ../../../changes.txt:2089 04153b8b52dd42e0b65ae0b0198aa82e +msgid "" +"**Added** new method :meth:`Document.get_sigflags` which returns " +"information on whether a PDF is signed. Resolves issue #326 (\"How to " +"detect signature in a form pdf?\")." +msgstr "" + +#: ../../../changes.txt:2094 05872f2eea2f414b929a8c63193bc83b +msgid "**Changes in Version 1.14.17**" +msgstr "" + +#: ../../../changes.txt:2096 b7f49499026840c2b5e1b475b6856ee0 +msgid "" +"**Added** :meth:`Document.fullcopyPage` to make full page copies within a" +" PDF (not just copied references as :meth:`Document.copyPage` does)." +msgstr "" + +#: ../../../changes.txt:2097 ebe1f2886ed645babed59a2d3a3d699b +msgid "" +"**Changed** :meth:`Page.getPixmap`, :meth:`Document.get_page_pixmap` now " +"use *alpha=False* as default." +msgstr "" + +#: ../../../changes.txt:2098 98d3957628a945909abf5fa6c6461ec5 +msgid "" +"**Changed** text extraction: the span dictionary now (again) contains its" +" rectangle under the *bbox* key." +msgstr "" + +#: ../../../changes.txt:2099 b1da104141564ec18ab3fe3dd6c73c05 +msgid "" +"**Changed** :meth:`Document.movePage` and :meth:`Document.copyPage` to " +"use direct functions instead of wrapping :meth:`Document.select` -- " +"similar to :meth:`Document.delete_page` in v1.14.16." +msgstr "" + +#: ../../../changes.txt:2103 934aedea3ab342b492d489b3d257e15a +msgid "**Changes in Version 1.14.16**" +msgstr "" + +#: ../../../changes.txt:2105 40e0ee3010c0479fa6c31763023c4c4f +msgid "" +"**Changed** :ref:`Document` methods around PDF */EmbeddedFiles* to no " +"longer use MuPDF's \"portfolio\" functions. That support will be dropped " +"in MuPDF v1.15 -- therefore another solution was required." +msgstr "" + +#: ../../../changes.txt:2106 0805a835853d4a9a80bd9202aacdfd52 +msgid "" +"**Changed** :meth:`Document.embfile_Count` to be a function (was an " +"attribute)." +msgstr "" + +#: ../../../changes.txt:2107 06849828882d49dea4600975cc5f48c4 +msgid "" +"**Added** new method :meth:`Document.embfile_Names` which returns a list " +"of names of embedded files." +msgstr "" + +#: ../../../changes.txt:2108 92418dcf2837409c8f097be2fd0db7d3 +msgid "" +"**Changed** :meth:`Document.delete_page` and " +":meth:`Document.delete_pages` to internally no longer use " +":meth:`Document.select`, but instead use functions to perform the " +"deletion directly. As it has turned out, the :meth:`Document.select` " +"method yields invalid outline trees (tables of content) for very complex " +"PDFs and sophisticated use of annotations." +msgstr "" + +#: ../../../changes.txt:2113 7be1be7e14e348659ee4570c1e7a904a +msgid "**Changes in Version 1.14.15**" +msgstr "" + +#: ../../../changes.txt:2115 f4cc678f1e12459d8c3f0eeb4b1d64ab +msgid "" +"**Fixed** issues #301 (\"Line cap and Line join\"), #300 (\"How to draw a" +" shape without outlines\") and #298 (\"utils.updateRect exception\"). " +"These bugs pertain to drawing shapes with PyMuPDF. Drawing shapes without" +" any border is fully supported. Line cap styles and line line join style " +"are now differentiated and support all possible PDF values (0, 1, 2) " +"instead of just being a bool. The previous parameter *roundCap* is " +"deprecated in favor of *lineCap* and *lineJoin* and will be deleted in " +"the next release." +msgstr "" + +#: ../../../changes.txt:2116 ff98970f7cad47ed8fac68f3688fffc8 +msgid "" +"**Fixed** issue #290 (\"Memory Leak with getText('rawDICT')\"). This bug " +"caused memory not being (completely) freed after invoking the \"dict\", " +"\"rawdict\" and \"json\" versions of :meth:`Page.getText`." +msgstr "" + +#: ../../../changes.txt:2121 11d8b202851c430e8381a3c1c21e80d6 +msgid "**Changes in Version 1.14.14**" +msgstr "" + +#: ../../../changes.txt:2123 340485a50d694bc9accb9f986fa0bf58 +msgid "" +"**Added** new low-level function :meth:`ImageProperties` to determine a " +"number of characteristics for an image." +msgstr "" + +#: ../../../changes.txt:2124 5453a7f953c34bedbca1791479b5714d +msgid "" +"**Added** new low-level function :meth:`Document.is_stream`, which checks" +" whether an object is of stream type." +msgstr "" + +#: ../../../changes.txt:2125 ec1a8c46ebca48eebbf3d5827c12d214 +msgid "" +"**Changed** low-level functions :meth:`Document._getXrefString` and " +":meth:`Document._getTrailerString` now by default return object " +"definitions in a formatted form which makes parsing easy." +msgstr "" + +#: ../../../changes.txt:2129 d23a665652534b489f1ca082bda099dd +msgid "**Changes in Version 1.14.13**" +msgstr "" + +#: ../../../changes.txt:2131 4861a2a0e1c943a19adbe349309a159f +msgid "" +"**Changed** methods working with binary input: while ever supporting " +"bytes and bytearray objects, they now also accept *io.BytesIO* input, " +"using their *getvalue()* method. This pertains to document creation, " +"embedded files, FileAttachment annotations, pixmap creation and others. " +"Fixes issue #274 (\"Segfault when using BytesIO as a stream for " +"insertImage\")." +msgstr "" + +#: ../../../changes.txt:2132 7da722ec959a4b7a9a2b74690d0d5a18 +msgid "" +"**Fixed** issue #278 (\"Is insertImage(keep_proportion=True) broken?\"). " +"Images are now correctly presented when keeping aspect ratio." +msgstr "" + +#: ../../../changes.txt:2137 966095679d2e4a2ba13407749072fe18 +msgid "**Changes in Version 1.14.12**" +msgstr "" + +#: ../../../changes.txt:2139 a5eb02c774cc4752bb4d914ad3a75c04 +msgid "" +"**Changed** the draw methods of :ref:`Page` and :ref:`Shape` to support " +"not only RGB, but also GRAY and CMYK colorspaces. This solves issue #270 " +"(\"Is there a way to use CMYK color to draw shapes?\"). This change also " +"applies to text insertion methods of :ref:`Shape`, resp. :ref:`Page`." +msgstr "" + +#: ../../../changes.txt:2140 5b6a9f0eb247493aadab10072b4ba869 +msgid "" +"**Fixed** issue #269 (\"AttributeError in Document.insert_page()\"), " +"which occurred when using :meth:`Document.insert_page` with text " +"insertion." +msgstr "" + +#: ../../../changes.txt:2145 9e30f11242644851b3b3b29662fa6746 +msgid "**Changes in Version 1.14.11**" +msgstr "" + +#: ../../../changes.txt:2147 cc8e088ee81e46559947186dff3f70d6 +msgid "" +"**Changed** :meth:`Page.show_pdf_page` to always position the source " +"rectangle centered in the target. This method now also supports " +"**rotation by arbitrary angles**. The argument *reuse_xref* has been " +"deprecated: prevention of duplicates is now **handled internally**." +msgstr "" + +#: ../../../changes.txt:2148 520e83b843e9431a808601e837226257 +msgid "" +"**Changed** :meth:`Page.insertImage` to support rotated display of the " +"image and keeping the aspect ratio. Only rotations by multiples of 90 " +"degrees are supported here." +msgstr "" + +#: ../../../changes.txt:2149 b296d98ca9844bdb9424bea2f7c16e7c +msgid "" +"**Fixed** issue #265 (\"TypeError: insertText() got an unexpected keyword" +" argument 'idx'\"). This issue only occurred when using " +":meth:`Document.insert_page` with also inserting text." +msgstr "" + +#: ../../../changes.txt:2153 c840f78999bc4bcf800e3ee3864f9528 +msgid "**Changes in Version 1.14.10**" +msgstr "" + +#: ../../../changes.txt:2155 9bd1d3e0020e4cd792e1526ac4faedec +msgid "" +"**Changed** :meth:`Page.show_pdf_page` to support rotation of the source " +"rectangle. Fixes #261 (\"Cannot rotate insterted pages\")." +msgstr "" + +#: ../../../changes.txt:2156 4b154a05a7a8473098ebfcd373d5a56e +msgid "" +"**Fixed** a bug in :meth:`Page.insertImage` which prevented insertion of " +"multiple images provided as streams." +msgstr "" + +#: ../../../changes.txt:2161 30c68c097ab74de2af491451fdb43ed6 +msgid "**Changes in Version 1.14.9**" +msgstr "" + +#: ../../../changes.txt:2163 ca7e6b3fa698461d9329bd534d283f30 +msgid "" +"**Added** new low-level method :meth:`Document._getTrailerString`, which " +"returns the trailer object of a PDF. This is much like " +":meth:`Document._getXrefString` except that the PDF trailer has no / " +"needs no :data:`xref` to identify it." +msgstr "" + +#: ../../../changes.txt:2164 eabe1cf906764e1abe1f72302297c36b +msgid "" +"**Added** new parameters for text insertion methods. You can now set " +"stroke and fill colors of glyphs (text characters) independently, as well" +" as the thickness of the glyph border. A new parameter *render_mode* " +"controls the use of these colors, and whether the text should be visible " +"at all." +msgstr "" + +#: ../../../changes.txt:2165 9c65a08c28ae409c97803bd1cf7a0846 +msgid "" +"**Fixed** issue #258 (\"Copying image streams to new PDF without size " +"increase\"): For JPX images embedded in a PDF, " +":meth:`Document.extractImage` will now return them in their original " +"format. Previously, the MuPDF base library was used, which returns them " +"in PNG format (entailing a massive size increase)." +msgstr "" + +#: ../../../changes.txt:2166 27248cd680b44b3ab3bc08cd324c5432 +msgid "" +"**Fixed** issue #259 (\"Morphing text to fit inside rect\"). Clarified " +"use of :meth:`get_text_length` and removed extra line breaks for long " +"words." +msgstr "" + +#: ../../../changes.txt:2170 b11b3a86f9764bc19a57f063fe13532c +msgid "**Changes in Version 1.14.8**" +msgstr "" + +#: ../../../changes.txt:2172 82d7526204c549a1858994a428b8a8d4 +msgid "" +"**Added** :meth:`Pixmap.set_rect` to change the pixel values in a " +"rectangle. This is also an alternative to setting the color of a complete" +" pixmap (:meth:`Pixmap.clear_with`)." +msgstr "" + +#: ../../../changes.txt:2173 ea602f5ff6e2498a92e5f4d0bbdc1702 +msgid "" +"**Fixed** an image extraction issue with JBIG2 (monochrome) encoded PDF " +"images. The issue occurred in :meth:`Page.getText` (parameters \"dict\" " +"and \"rawdict\") and in :meth:`Document.extractImage` methods." +msgstr "" + +#: ../../../changes.txt:2174 c402b7e623ea4c139cc88be9cac4e36c +msgid "" +"**Fixed** an issue with not correctly clearing a non-alpha :ref:`Pixmap` " +"(:meth:`Pixmap.clear_with`)." +msgstr "" + +#: ../../../changes.txt:2175 9c2d1b085a384b24a5db1fd8090d8a67 +msgid "" +"**Fixed** an issue with not correctly inverting colors of a non-alpha " +":ref:`Pixmap` (:meth:`Pixmap.invert_irect`)." +msgstr "" + +#: ../../../changes.txt:2179 ab4fbd78e984463abd686a605884ddda +msgid "**Changes in Version 1.14.7**" +msgstr "" + +#: ../../../changes.txt:2181 16cd5e507c2b40248d6af093b2a8eacb +msgid "**Added** :meth:`Pixmap.set_pixel` to change one pixel value." +msgstr "" + +#: ../../../changes.txt:2182 7037f2e88d0b4768b0578ec2c4572f9b +msgid "**Added** documentation for image conversion in the :ref:`FAQ`." +msgstr "" + +#: ../../../changes.txt:2183 41aa04fff22c4bc9b2ffe561f734d15e +msgid "" +"**Added** new function :meth:`get_text_length` to determine the string " +"length for a given font." +msgstr "" + +#: ../../../changes.txt:2184 7ade45e65078489394e7c8ba14ae98f4 +msgid "" +"**Added** Postscript image output (changed :meth:`Pixmap.save` and " +":meth:`Pixmap.tobytes`)." +msgstr "" + +#: ../../../changes.txt:2185 ab6d907cbfcc4f708913a797f2f1fb16 +msgid "" +"**Changed** :meth:`Pixmap.save` and :meth:`Pixmap.tobytes` to ensure " +"valid combinations of colorspace, alpha and output format." +msgstr "" + +#: ../../../changes.txt:2186 063494697f8e43318d31d30f940c3481 +msgid "" +"**Changed** :meth:`Pixmap.save`: the desired format is now inferred from " +"the filename." +msgstr "" + +#: ../../../changes.txt:2187 ede0fa8b74574050b048e88fc24f1fc6 +msgid "" +"**Changed** FreeText annotations can now have a transparent background - " +"see :meth:`Annot.update`." +msgstr "" + +#: ../../../changes.txt:2191 a6af2b7087ff4f87a3d94a14f6bdae06 +msgid "**Changes in Version 1.14.5**" +msgstr "" + +#: ../../../changes.txt:2193 018e32d9c25047af810c2a84b70970b9 +msgid "" +"**Changed:** :ref:`Shape` methods now strictly use the transformation " +"matrix of the :ref:`Page` -- instead of \"manually\" calculating " +"locations." +msgstr "" + +#: ../../../changes.txt:2194 54bc4b2d86e14789b40419fac12f97d7 +msgid "" +"**Added** method :meth:`Pixmap.pixel` which returns the pixel value (a " +"list) for given pixel coordinates." +msgstr "" + +#: ../../../changes.txt:2195 857bf92e13484c7cb3693e7335a772f6 +msgid "" +"**Added** method :meth:`Pixmap.tobytes` which returns a bytes object " +"representing the pixmap in a variety of formats. Previously, this could " +"be done for PNG outputs only (:meth:`Pixmap.tobytes`)." +msgstr "" + +#: ../../../changes.txt:2196 f67fb06ac693416fbc7c8aa65f7bd7d4 +msgid "" +"**Changed:** output of methods :meth:`Pixmap.save` and (the new) " +":meth:`Pixmap.tobytes` may now also be PSD (Adobe Photoshop Document)." +msgstr "" + +#: ../../../changes.txt:2197 f125017e6376447793ab5cfac68cd949 +msgid "" +"**Added** method :meth:`Shape.drawQuad` which draws a :ref:`Quad`. This " +"actually is a shorthand for a :meth:`Shape.drawPolyline` with the edges " +"of the quad." +msgstr "" + +#: ../../../changes.txt:2198 53b30bdb598d4653ac927d8cce460309 +msgid "" +"**Changed** method :meth:`Shape.drawOval`: the argument can now be " +"**either** a rectangle (:data:`rect_like`) **or** a quadrilateral " +"(:data:`quad_like`)." +msgstr "" + +#: ../../../changes.txt:2202 f1dc893234d245e9830179a93f629dee +msgid "**Changes in Version 1.14.4**" +msgstr "" + +#: ../../../changes.txt:2204 e420fa40704f4a8186c084fd1142a269 +msgid "**Fixes** issue #239 \"Annotation coordinate consistency\"." +msgstr "" + +#: ../../../changes.txt:2209 72e6ad3f521a42b8bace87e106ee6ede +msgid "**Changes in Version 1.14.3**" +msgstr "" + +#: ../../../changes.txt:2211 991373ab5ac7471a838bc8249d4d5514 +msgid "This patch version contains minor bug fixes and CJK font output support." +msgstr "" + +#: ../../../changes.txt:2213 f1c4a04f6dcf43eeb95899dfb02e27c9 +msgid "" +"**Added** support for the four CJK fonts as PyMuPDF generated text " +"output. This pertains to methods :meth:`Page.insertFont`, " +":meth:`Shape.insertText`, :meth:`Shape.insertTextbox`, and corresponding " +":ref:`Page` methods. The new fonts are available under \"reserved\" " +"fontnames \"china-t\" (traditional Chinese), \"china-s\" (simplified " +"Chinese), \"japan\" (Japanese), and \"korea\" (Korean)." +msgstr "" + +#: ../../../changes.txt:2214 73ef513029a141df8449b5146d0253fe +msgid "**Added** full support for the built-in fonts 'Symbol' and 'Zapfdingbats'." +msgstr "" + +#: ../../../changes.txt:2215 76f60cd573e245f991db0d4cd24a974a +msgid "" +"**Changed:** The 14 standard fonts can now each be referenced by a " +"4-letter abbreviation." +msgstr "" + +#: ../../../changes.txt:2219 aa2af09fdf5042b4ba0a256c100e7b9b +msgid "**Changes in Version 1.14.1**" +msgstr "" + +#: ../../../changes.txt:2221 b445b39bd77c4a67bd6f0167e0f9a803 +msgid "This patch version contains minor performance improvements." +msgstr "" + +#: ../../../changes.txt:2223 3b1145c0fba44f50840286c3e778fbd5 +msgid "" +"**Added** support for :ref:`Document` filenames given as *pathlib* object" +" by using the Python *str()* function." +msgstr "" + +#: ../../../changes.txt:2228 6af72592640445ada360fa165a69c74f +msgid "**Changes in Version 1.14.0**" +msgstr "" + +#: ../../../changes.txt:2230 b0a9f2e6d3084680b1d3d8709b899e46 +msgid "" +"To support MuPDF v1.14.0, massive changes were required in PyMuPDF -- " +"most of them purely technical, with little visibility to developers. But " +"there are also quite a lot of interesting new and improved features. " +"Following are the details:" +msgstr "" + +#: ../../../changes.txt:2232 b8f5ef7656d84770b030771baf1d633e +msgid "**Added** \"ink\" annotation." +msgstr "" + +#: ../../../changes.txt:2233 dc24c4b8f4b545d89982ba04c9c9fc54 +msgid "**Added** \"rubber stamp\" annotation." +msgstr "" + +#: ../../../changes.txt:2234 088c207ca84b4af885053800fc1c6abf +msgid "**Added** \"squiggly\" text marker annotation." +msgstr "" + +#: ../../../changes.txt:2235 9ee51087241e47a9a885c4c50006ac45 +msgid "" +"**Added** new class :ref:`Quad` (quadrilateral or tetragon) -- which " +"represents a general four-sided shape in the plane. The special subtype " +"of rectangular, non-empty tetragons is used in text marker annotations " +"and as returned objects in text search methods." +msgstr "" + +#: ../../../changes.txt:2236 82b93415181e4426b4fe3395a7da036b +msgid "" +"**Added** a new option \"decrypt\" to :meth:`Document.save` and " +":meth:`Document.write`. Now you can **keep encryption** when saving a " +"password protected PDF." +msgstr "" + +#: ../../../changes.txt:2237 db071be165374c34a62b140f8d12defb +msgid "" +"**Added** suppression and redirection of unsolicited messages issued by " +"the underlying C-library MuPDF. Consult :ref:`RedirectMessages` for " +"details." +msgstr "" + +#: ../../../changes.txt:2238 d98596ed75d443fbb4b3117a82102898 +msgid "" +"**Changed:** Changes to annotations now **always require** " +":meth:`Annot.update` to become effective." +msgstr "" + +#: ../../../changes.txt:2239 c44c9272aa294fec800c9996f97c2a47 +msgid "" +"**Changed** free text annotations to support the full Latin character set" +" and range of appearance options." +msgstr "" + +#: ../../../changes.txt:2240 aeb18ad0c68c4c259659020be5574a12 +msgid "" +"**Changed** text searching, :meth:`Page.searchFor`, to optionally return " +":ref:`Quad` instead :ref:`Rect` objects surrounding each search hit." +msgstr "" + +#: ../../../changes.txt:2241 0eae45181d5b44df80c66a945b388467 +msgid "" +"**Changed** plain text output: we now add a *\\n* to each line if it does" +" not itself end with this character." +msgstr "" + +#: ../../../changes.txt:2242 89a4c15cea29455abdffa4509aa7c012 +msgid "**Fixed** issue 211 (\"Something wrong in the doc\")." +msgstr "" + +#: ../../../changes.txt:2243 24f3fb3bfb2741e8ac3dd74649331e57 +msgid "" +"**Fixed** issue 213 (\"Rewritten outline is displayed only by mupdf-based" +" applications\")." +msgstr "" + +#: ../../../changes.txt:2244 946d6b5d44d6492c92f79cddf8724236 +msgid "**Fixed** issue 214 (\"PDF decryption GONE!\")." +msgstr "" + +#: ../../../changes.txt:2245 972d52791c01402891dcf5c8aa874517 +msgid "**Fixed** issue 215 (\"Formatting of links added with pyMuPDF\")." +msgstr "" + +#: ../../../changes.txt:2246 ec35920e96a248b792b7359b83e0e806 +msgid "**Fixed** issue 217 (\"extraction through json is failing for my pdf\")." +msgstr "" + +#: ../../../changes.txt:2248 9d2784d5d5bb494b99f6009662588af4 +msgid "" +"Behind the curtain, we have changed the implementation of geometry " +"objects: they now purely exist in Python and no longer have \"shadow\" " +"twins on the C-level (in MuPDF). This has improved processing speed in " +"that area by more than a factor of two." +msgstr "" + +#: ../../../changes.txt:2250 ea4731de39514f5cacdbfe20993679c4 +msgid "" +"Because of the same reason, most methods involving geometry parameters " +"now also accept the corresponding Python sequence. For example, in method" +" *\"page.show_pdf_page(rect, ...)\"* parameter *rect* may now be any " +":data:`rect_like` sequence." +msgstr "" + +#: ../../../changes.txt:2252 3e2d3f1fb7cb4cff97d5076e2c0789b1 +msgid "" +"We also invested considerable effort to further extend and improve the " +":ref:`FAQ` chapter." +msgstr "" + +#: ../../../changes.txt:2257 04dc59fccc1f4cb08c810e3aa3ddd24f +msgid "**Changes in Version 1.13.19**" +msgstr "" + +#: ../../../changes.txt:2259 ac830b2aba964a00af50503281c9ad40 +msgid "" +"This version contains some technical / performance improvements and bug " +"fixes." +msgstr "" + +#: ../../../changes.txt:2261 d4e8a437b8634e2ebefb07b91b1ca5f3 +#, python-format +msgid "" +"**Changed** memory management: for Python 3 builds, Python memory " +"management is exclusively used across all C-level code (i.e. no more " +"native *malloc()* in MuPDF code or PyMuPDF interface code). This leads to" +" improved memory usage profiles and also some runtime improvements: we " +"have seen > 2% shorter runtimes for text extractions and pixmap creations" +" (on Windows machines only to date)." +msgstr "" + +#: ../../../changes.txt:2262 66b6b824d6ff411aa7a516457cd7dfc1 +msgid "" +"**Fixed** an error occurring in Python 2.7, which crashed the interpreter" +" when using :meth:`TextPage.extractRAWDICT` (= " +"*Page.getText(\"rawdict\")*)." +msgstr "" + +#: ../../../changes.txt:2263 dbbd352318fa4bdab10496e4c123fd45 +msgid "" +"**Fixed** an error occurring in Python 2.7, when creating link " +"destinations." +msgstr "" + +#: ../../../changes.txt:2264 a539ed90455049c1a4fb83a3421f871b +msgid "**Extended** the :ref:`FAQ` chapter with more examples." +msgstr "" + +#: ../../../changes.txt:2268 f7a59a8c7ef94dd897e7aaf8068e48cb +msgid "**Changes in Version 1.13.18**" +msgstr "" + +#: ../../../changes.txt:2270 43bbf6e1968c4e85870a1a74cbb89f71 +msgid "" +"**Added** method :meth:`TextPage.extractRAWDICT`, and a corresponding new" +" string parameter \"rawdict\" to method :meth:`Page.getText`. It extracts" +" text and images from a page in Python *dict* form like " +":meth:`TextPage.extractDICT`, but with the detail level of " +":meth:`TextPage.extractXML`, which is position information down to each " +"single character." +msgstr "" + +#: ../../../changes.txt:2274 550b7eeff5ce4f7a87b0f090fc851031 +msgid "**Changes in Version 1.13.17**" +msgstr "" + +#: ../../../changes.txt:2276 0aae2b940513466da0740808136e1c25 +msgid "" +"**Fixed** an error that intermittently caused an exception in " +":meth:`Page.show_pdf_page`, when pages from many different source PDFs " +"were shown." +msgstr "" + +#: ../../../changes.txt:2277 8eb6e43d098c4499ae042f34979ce16a +msgid "" +"**Changed** method :meth:`Document.extractImage` to now return more meta " +"information about the extracted imgage. Also, its performance has been " +"greatly improved. Several demo scripts have been changed to make use of " +"this method." +msgstr "" + +#: ../../../changes.txt:2278 b4dd712416a74448a2beceebcc6588d4 +msgid "" +"**Changed** method :meth:`Document._getXrefStream` to now return ``None``" +" if the object is no stream and no longer raise an exception if " +"otherwise." +msgstr "" + +#: ../../../changes.txt:2279 793d97c12403403b93395d171befee80 +msgid "" +"**Added** method :meth:`Document._deleteObject` which deletes a PDF " +"object identified by its :data:`xref`. Only to be used by the experienced" +" PDF expert." +msgstr "" + +#: ../../../changes.txt:2280 ef87bb386e9d416dacabbace0deeb487 +msgid "" +"**Added** a method :meth:`paper_rect` which returns a :ref:`Rect` for a " +"supplied paper format string. Example: *fitz.paper_rect(\"letter\") = " +"fitz.Rect(0.0, 0.0, 612.0, 792.0)*." +msgstr "" + +#: ../../../changes.txt:2281 84216f6fce5c4e17b9b6b285d8ce3ca7 +msgid "**Added** a :ref:`FAQ` chapter to this document." +msgstr "" + +#: ../../../changes.txt:2285 dcb3ebafdc1c4b969e427c36de3d6f5b +msgid "**Changes in Version 1.13.16**" +msgstr "" + +#: ../../../changes.txt:2287 0404f4f99ccc47bfb344cc3c765aa9fd +msgid "" +"**Added** support for correctly setting transparency (opacity) for " +"certain annotation types." +msgstr "" + +#: ../../../changes.txt:2288 fa36c556f0ab43b5aa6974b0b480b1f2 +msgid "" +"**Added** a tool property (:attr:`Tools.fitz_config`) showing the " +"configuration of this PyMuPDF version." +msgstr "" + +#: ../../../changes.txt:2289 37008e7403de4fba890a2d19d5bd0c3b +msgid "" +"**Fixed** issue #193 ('insertText(overlay=False) gives \"cannot resize a " +"buffer with shared storage\" error') by avoiding read-only buffers." +msgstr "" + +#: ../../../changes.txt:2293 cd6f672a6bd9458f9aa9e92509de85b7 +msgid "**Changes in Version 1.13.15**" +msgstr "" + +#: ../../../changes.txt:2295 316a3c060926436ab657de6a4b5b7204 +msgid "" +"**Fixed** issue #189 (\"cannot find builtin CJK font\"), so we are " +"supporting builtin CJK fonts now (CJK = China, Japan, Korea). This should" +" lead to correctly generated pixmaps for documents using these languages." +" This change has consequences for our binary file size: it will now range" +" between 8 and 10 MB, depending on the OS." +msgstr "" + +#: ../../../changes.txt:2296 7d9dc52bf7494845b113b4b655b1570a +msgid "" +"**Fixed** issue #191 (\"Jupyter notebook kernel dies after ca. 40 " +"pages\"), which occurred when modifying the contents of an annotation." +msgstr "" + +#: ../../../changes.txt:2300 d874cbbdb7ff40659df936702f7f04c2 +msgid "**Changes in Version 1.13.14**" +msgstr "" + +#: ../../../changes.txt:2302 a4459868b07e4a85a30a28ce5a45cc14 +msgid "This patch version contains several improvements, mainly for annotations." +msgstr "" + +#: ../../../changes.txt:2304 fb74830b41da4ccea9eee9c618becaac +msgid "" +"**Changed** :attr:`Annot.lineEnds` is now a list of two integers " +"representing the line end symbols. Previously was a *dict* of strings." +msgstr "" + +#: ../../../changes.txt:2305 0f2491593bd142b6abbc471b7999f395 +msgid "" +"**Added** support of line end symbols for applicable annotations. PyMuPDF" +" now can generate these annotations including the line end symbols." +msgstr "" + +#: ../../../changes.txt:2306 e184151c6e4a4e5e806c9c0b3c4927ef +msgid "" +"**Added** :meth:`Annot.setLineEnds` adds line end symbols to applicable " +"annotation types ('Line', 'PolyLine', 'Polygon')." +msgstr "" + +#: ../../../changes.txt:2307 2fcf671317634df984317a62c5835924 +msgid "" +"**Changed** technical implementation of :meth:`Page.insertImage` and " +":meth:`Page.show_pdf_page`: they now create there own contents objects, " +"thereby avoiding changes of potentially large streams with consequential " +"compression / decompression efforts and high change volumes with " +"incremental updates." +msgstr "" + +#: ../../../changes.txt:2311 a0286875157d46249f9bb3108b95ce75 +msgid "**Changes in Version 1.13.13**" +msgstr "" + +#: ../../../changes.txt:2313 4833f89ddc274e9e849c017c5f1875bc +msgid "" +"This patch version contains several improvements for embedded files and " +"file attachment annotations." +msgstr "" + +#: ../../../changes.txt:2315 076f1140d17f43b18fa6eed621773df3 +msgid "" +"**Added** :meth:`Document.embfile_Upd` which allows changing **file " +"content and metadata** of an embedded file. It supersedes the old method " +":meth:`Document.embfile_SetInfo` (which will be deleted in a future " +"version). Content is automatically compressed and metadata may be " +"unicode." +msgstr "" + +#: ../../../changes.txt:2316 b4d0ad5ed9554ceda18aed73d3d04ab6 +msgid "" +"**Changed** :meth:`Document.embfile_Add` to now automatically compress " +"file content. Accompanying metadata can now be unicode (had to be ASCII " +"in the past)." +msgstr "" + +#: ../../../changes.txt:2317 158d0b3898064454a33abac94aac2f19 +msgid "" +"**Changed** :meth:`Document.embfile_Del` to now automatically delete " +"**all entries** having the supplied identifying name. The return code is " +"now an integer count of the removed entries (was ``None`` previously)." +msgstr "" + +#: ../../../changes.txt:2318 3a95121e687b445d9fe8686b06fec070 +msgid "" +"**Changed** embedded file methods to now also accept or show the PDF " +"unicode filename as additional parameter *ufilename*." +msgstr "" + +#: ../../../changes.txt:2319 fdb4309dc07f4fb4a1d07f5da90aba8a +msgid "" +"**Added** :meth:`Page.add_file_annot` which adds a new file attachment " +"annotation." +msgstr "" + +#: ../../../changes.txt:2320 831e22f310f84fcea93160f4434afc33 +msgid "" +"**Changed** :meth:`Annot.fileUpd` (file attachment annot) to now also " +"accept the PDF unicode *ufilename* parameter. The description parameter " +"*desc* correctly works with unicode. Furthermore, **all** parameters are " +"optional, so metadata may be changed without also replacing the file " +"content." +msgstr "" + +#: ../../../changes.txt:2321 dcae1cfbe1f943179f312c444c9fd1d0 +msgid "" +"**Changed** :meth:`Annot.fileInfo` (file attachment annot) to now also " +"show the PDF unicode filename as parameter *ufilename*." +msgstr "" + +#: ../../../changes.txt:2322 a5cb289e23b949048d4ed3d4937ef181 +msgid "" +"**Fixed** issue #180 (\"page.getText(output='dict') return invalid " +"bbox\") to now also work for vertical text." +msgstr "" + +#: ../../../changes.txt:2323 546f1a5c95ef4d538df52b7648d3b364 +msgid "" +"**Fixed** issue #185 (\"Can't render the annotations created by " +"PyMuPDF\"). The issue's cause was the minimalistic MuPDF approach when " +"creating annotations. Several annotation types have no */AP* " +"(\"appearance\") object when created by MuPDF functions. MuPDF, " +"SumatraPDF and hence also PyMuPDF cannot render annotations without such " +"an object. This fix now ensures, that an appearance object is always " +"created together with the annotation itself. We still do not support line" +" end styles." +msgstr "" + +#: ../../../changes.txt:2327 5bc5acd39391430e8507d2fe02b64686 +msgid "**Changes in Version 1.13.12**" +msgstr "" + +#: ../../../changes.txt:2329 e50fc34357eb4bdd92d1a2c52c118d8f +msgid "" +"**Fixed** issue #180 (\"page.getText(output='dict') return invalid " +"bbox\"). Note that this is a circumvention of an MuPDF error, which " +"generates zero-height character rectangles in some cases. When this " +"happens, this fix ensures a bbox height of at least fontsize." +msgstr "" + +#: ../../../changes.txt:2330 8ea6cf5d670244559e3f3e7ba10c7b0c +msgid "" +"**Changed** for ListBox and ComboBox widgets, the attribute list of " +"selectable values has been renamed to :attr:`Widget.choice_values`." +msgstr "" + +#: ../../../changes.txt:2331 780189986d414a6a9f66d2c3d0e05803 +msgid "" +"**Changed** when adding widgets, any missing of the :ref:`Base-14-Fonts` " +"is automatically added to the PDF. Widget text fonts can now also be " +"chosen from existing widget fonts. Any specified field values are now " +"honored and lead to a field with a preset value." +msgstr "" + +#: ../../../changes.txt:2332 75697589207f4648866d8377fbc97421 +msgid "" +"**Added** :meth:`Annot.updateWidget` which allows changing existing form " +"fields -- including the field value." +msgstr "" + +#: ../../../changes.txt:2336 a1b51790ace74e5abd3390c43ce546f9 +msgid "**Changes in Version 1.13.11**" +msgstr "" + +#: ../../../changes.txt:2338 a4cacb4e59354d3bac560dd65aa0fa34 +msgid "" +"While the preceeding patch subversions only contained various fixes, this" +" version again introduces major new features:" +msgstr "" + +#: ../../../changes.txt:2340 42f2139dcdd441898f1d13176501b1a8 +msgid "" +"**Added** basic support for PDF widget annotations. You can now add PDF " +"form fields of types Text, CheckBox, ListBox and ComboBox. Where " +"necessary, the PDF is tranformed to a Form PDF with the first added " +"widget." +msgstr "" + +#: ../../../changes.txt:2341 72a98d2ee0394cd3a2ea9f7ade45fdde +msgid "" +"**Fixed** issues #176 (\"wrong file embedding\"), #177 (\"segment fault " +"when invoking page.getText()\")and #179 (\"Segmentation fault using " +"page.getLinks() on encrypted PDF\")." +msgstr "" + +#: ../../../changes.txt:2346 b2db781fbac94fce9568b767814a017e +msgid "**Changes in Version 1.13.7**" +msgstr "" + +#: ../../../changes.txt:2348 07e1086fd4224f2782b0f9aba9227446 +msgid "" +"**Added** support of variable page sizes for reflowable documents " +"(e-books, HTML, etc.): new parameters *rect* and *fontsize* in " +":ref:`Document` creation (open), and as a separate method " +":meth:`Document.layout`." +msgstr "" + +#: ../../../changes.txt:2349 4fe572e6632949ec8bf12d0193680640 +msgid "" +"**Added** :ref:`Annot` creation of many annotations types: sticky notes, " +"free text, circle, rectangle, line, polygon, polyline and text markers." +msgstr "" + +#: ../../../changes.txt:2350 d4ceb9bbde2b46fa98dc5df623827a25 +msgid "" +"**Added** support of annotation transparency (:attr:`Annot.opacity`, " +":meth:`Annot.setOpacity`)." +msgstr "" + +#: ../../../changes.txt:2351 c4461424a48a4ceb80c674b3e52bd4e9 +msgid "" +"**Changed** :attr:`Annot.vertices`: point coordinates are now grouped as " +"pairs of floats (no longer as separate floats)." +msgstr "" + +#: ../../../changes.txt:2352 4274f5155af843c4b5df0186ad8c1a88 +msgid "" +"**Changed** annotation colors dictionary: the two keys are now named " +"*\"stroke\"* (formerly *\"common\"*) and *\"fill\"*." +msgstr "" + +#: ../../../changes.txt:2353 b9c6b2b2fe4a4de29a361a128a74ca35 +msgid "" +"**Added** :attr:`Document.isDirty` which is ``True`` if a PDF has been " +"changed in this session. Reset to ``False`` on each :meth:`Document.save`" +" or :meth:`Document.write`." +msgstr "" + +#: ../../../changes.txt:2357 587eee0c54da419abaf95aa36b3786f9 +msgid "**Changes in Version 1.13.6**" +msgstr "" + +#: ../../../changes.txt:2359 7630c3d98438465fa2ac21f7de572a2d +msgid "" +"Fix #173: for memory-resident documents, ensure the stream object will " +"not be garbage-collected by Python before document is closed." +msgstr "" + +#: ../../../changes.txt:2363 a3e3d3dceb794bf5827f9fe31fdf1bae +msgid "**Changes in Version 1.13.5**" +msgstr "" + +#: ../../../changes.txt:2365 2b8ae3453e1e4169b25f4de75e80145b +msgid "" +"New low-level method :meth:`Page._setContents` defines an object given by" +" its :data:`xref` to serve as the :data:`contents` object." +msgstr "" + +#: ../../../changes.txt:2366 b9cb68e98a444f29b95cf50c4b846b89 +msgid "" +"Changed and extended PDF form field support: the attribute *widget_text* " +"has been renamed to :attr:`Annot.widget_value`. Values of all form field " +"types (except signatures) are now supported. A new attribute " +":attr:`Annot.widget_choices` contains the selectable values of listboxes " +"and comboboxes. All these attributes now contain ``None`` if no value is " +"present." +msgstr "" + +#: ../../../changes.txt:2370 4d6cf91c2cab4adeb182b503663811d7 +msgid "**Changes in Version 1.13.4**" +msgstr "" + +#: ../../../changes.txt:2372 c530d2c8ae88418aab6b9f79a27ada89 +msgid "" +":meth:`Document.convertToPDF` now supports page ranges, reverted page " +"sequences and page rotation. If the document already is a PDF, an " +"exception is raised." +msgstr "" + +#: ../../../changes.txt:2373 3c04284022fd4ac7a6e7a1acd8b93f41 +msgid "" +"Fixed a bug (introduced with v1.13.0) that prevented " +":meth:`Page.insertImage` for transparent images." +msgstr "" + +#: ../../../changes.txt:2377 21bd2382d72444caacf5ff2daa4ed26e +msgid "**Changes in Version 1.13.3**" +msgstr "" + +#: ../../../changes.txt:2379 bc3c8bf546984d6b837e58506e06ae2a +msgid "" +"Introduces a way to convert **any MuPDF supported document** to a PDF. If" +" you ever wanted PDF versions of your XPS, EPUB, CBZ or FB2 files -- here" +" is a way to do this." +msgstr "" + +#: ../../../changes.txt:2381 af0f82b656ec4a05af65e1426e6dd1c8 +msgid "" +":meth:`Document.convertToPDF` returns a Python *bytes* object in PDF " +"format. Can be opened like normal in PyMuPDF, or be written to disk with " +"the *\".pdf\"* extension." +msgstr "" + +#: ../../../changes.txt:2385 585c752c6c56413a92faaa286cd0aecf +msgid "**Changes in Version 1.13.2**" +msgstr "" + +#: ../../../changes.txt:2387 74a8e2c87f8b4eb09bfe07e9ebabc8b4 +msgid "" +"The major enhancement is PDF form field support. Form fields are " +"annotations of type *(19, 'Widget')*. There is a new document method to " +"check whether a PDF is a form. The :ref:`Annot` class has new properties " +"describing field details." +msgstr "" + +#: ../../../changes.txt:2389 8201f05c61da41b9b38389a76b118d1d +msgid "" +":attr:`Document.is_form_pdf` is true if object type */AcroForm* and at " +"least one form field exists." +msgstr "" + +#: ../../../changes.txt:2390 605228181363474ea1907637ee7efffa +msgid "" +":attr:`Annot.widget_type`, :attr:`Annot.widget_text` and " +":attr:`Annot.widget_name` contain the details of a form field (i.e. a " +"\"Widget\" annotation)." +msgstr "" + +#: ../../../changes.txt:2394 43c08ba9202844e381f8695f5869c0d1 +msgid "**Changes in Version 1.13.1**" +msgstr "" + +#: ../../../changes.txt:2396 c17a528c4acf47d9a22f7bab42a4f49d +msgid "" +":meth:`TextPage.extractDICT` is a new method to extract the contents of a" +" document page (text and images). All document types are supported as " +"with the other :ref:`TextPage` *extract*()* methods. The returned object " +"is a dictionary of nested lists and other dictionaries, and **exactly " +"equal** to the JSON-deserialization of the old " +":meth:`TextPage.extractJSON`. The difference is that the result is " +"created directly -- no JSON module is used. Because the user needs no " +"JSON module to interpet the information, it should be easier to use, and " +"also have a better performance, because it contains images in their " +"original **binary format** -- they need not be base64-decoded." +msgstr "" + +#: ../../../changes.txt:2397 fc905731743f43948a641768f4657450 +msgid "" +":meth:`Page.getText` correspondingly supports the new parameter value " +"*\"dict\"* to invoke the above method." +msgstr "" + +#: ../../../changes.txt:2398 11f441a8459a441594dbb2b5319f862c +msgid "" +":meth:`TextPage.extractJSON` (resp. *Page.getText(\"json\")*) is still " +"supported for convenience, but its use is expected to decline." +msgstr "" + +#: ../../../changes.txt:2402 cb61eefa292c4d10bd084337eda8c38a +msgid "**Changes in Version 1.13.0**" +msgstr "" + +#: ../../../changes.txt:2404 3484ec3852674cb991bd7e63f53ddeda +msgid "" +"This version is based on MuPDF v1.13.0. This release is \"primarily a bug" +" fix release\"." +msgstr "" + +#: ../../../changes.txt:2406 5905b20f41b3436baa07f1d61121cf37 +msgid "" +"In PyMuPDF, we are also doing some bug fixes while introducing minor " +"enhancements. There only very minimal changes to the user's API." +msgstr "" + +#: ../../../changes.txt:2408 21683ebfff7446fb868d3fdf766b60f6 +msgid "" +":ref:`Document` construction is more flexible: the new *filetype* " +"parameter allows setting the document type. If specified, any extension " +"in the filename will be ignored. More completely addresses `issue #156 " +"`_. As part of this, the " +"documentation has been reworked." +msgstr "" + +#: ../../../changes.txt:2412 0a223b29ef9a491892bee74025cc8dc0 +msgid "Changes to :ref:`Pixmap` constructors:" +msgstr "" + +#: ../../../changes.txt:2411 26d2f125201f46e2871d3373d24c7c2d +msgid "" +"Colorspace conversion no longer allows dropping the alpha channel: source" +" and target **alpha will now always be the same**. We have seen " +"exceptions and even interpreter crashes when using *alpha = 0*." +msgstr "" + +#: ../../../changes.txt:2412 a0a9760c471644a2bcdaa217255354b1 +msgid "As a replacement, the simple pixmap copy lets you choose the target alpha." +msgstr "" + +#: ../../../changes.txt:2414 1db34238940441e2bcf3df96bd2c6c24 +msgid "" +":meth:`Document.save` again offers the full garbage collection range 0 " +"thru 4. Because of a bug in :data:`xref` maintenance, we had to " +"temporarily enforce *garbage > 1*. Finally resolves `issue #148 " +"`_." +msgstr "" + +#: ../../../changes.txt:2416 7150cb564a9c4319902b18edae926f61 +msgid "" +":meth:`Document.save` now offers to \"prettify\" PDF source via an " +"additional argument." +msgstr "" + +#: ../../../changes.txt:2417 07d4829283b74797837f2d5f11782d43 +msgid "" +":meth:`Page.insertImage` has the additional *stream* \\-parameter, " +"specifying a memory area holding an image." +msgstr "" + +#: ../../../changes.txt:2419 f15b5c730fba4aa2bc6f76700fd8b7d2 +msgid "" +"Issue with garbled PNGs on Linux systems has been resolved (`\"Problem " +"writing PNG\" #133) `_." +msgstr "" + +#: ../../../changes.txt:2424 cc8fc0779a8642d4ba85dc4bd4643b78 +msgid "**Changes in Version 1.12.4**" +msgstr "" + +#: ../../../changes.txt:2426 9781c9f86ccd46df8a11c97071c8e53f +msgid "This is an extension of 1.12.3." +msgstr "" + +#: ../../../changes.txt:2428 bfe55e79b5054c5eb3881d0869cf1236 +msgid "" +"Fix of `issue #147 `_: " +"methods :meth:`Document.getPageFontlist` and " +":meth:`Document.getPageImagelist` now also show fonts and images " +"contained in :data:`resources` nested via \"Form XObjects\"." +msgstr "" + +#: ../../../changes.txt:2429 c4eab138d6734344a7bed77512b23a10 +msgid "" +"Temporary fix of `issue #148 " +"`_: Saving to new PDF " +"files will now automatically use *garbage = 2* if a lower value is given." +" Final fix is to be expected with MuPDF's next version. At that point we " +"will remove this circumvention." +msgstr "" + +#: ../../../changes.txt:2430 cbbd705645ed41d3a51592564e68d3c2 +msgid "" +"Preventive fix of illegally using stencil / image mask pixmaps in some " +"methods." +msgstr "" + +#: ../../../changes.txt:2431 186d7bac6ae54348b445f7891263fca6 +msgid "" +"Method :meth:`Document.getPageFontlist` now includes the encoding name " +"for each font in the list." +msgstr "" + +#: ../../../changes.txt:2432 0aaf9f75b8264c99881cf89a9ad29f43 +msgid "" +"Method :meth:`Document.getPageImagelist` now includes the decode method " +"name for each image in the list." +msgstr "" + +#: ../../../changes.txt:2436 ef66b620ed484120874575679c4f5817 +msgid "**Changes in Version 1.12.3**" +msgstr "" + +#: ../../../changes.txt:2438 1896f885d6c548939d8c638fac1ac455 +msgid "This is an extension of 1.12.2." +msgstr "" + +#: ../../../changes.txt:2440 6d0eaef14cb84c06bb422e66fceea719 +msgid "" +"Many functions now return ``None`` instead of *0*, if the result has no " +"other meaning than just indicating successful execution " +"(:meth:`Document.close`, :meth:`Document.save`, :meth:`Document.select`, " +":meth:`Pixmap.save` and many others)." +msgstr "" + +#: ../../../changes.txt:2444 d99b5cfb9c664823b992dd7076b0299f +msgid "**Changes in Version 1.12.2**" +msgstr "" + +#: ../../../changes.txt:2446 86f952c5348148efb4c098ab52be0c38 +msgid "This is an extension of 1.12.1." +msgstr "" + +#: ../../../changes.txt:2448 b87afbe85cd348b28a7252a5af6d2b42 +msgid "" +"Method :meth:`Page.show_pdf_page` now accepts the new *clip* argument. " +"This specifies an area of the source page to which the display should be " +"restricted." +msgstr "" + +#: ../../../changes.txt:2450 5340966b63d4493782e410edcd088234 +msgid "" +"New :attr:`Page.CropBox` and :attr:`Page.MediaBox` have been included for" +" convenience." +msgstr "" + +#: ../../../changes.txt:2455 4b31fc60a15e42e7b7de95b0858ddb3d +msgid "**Changes in Version 1.12.1**" +msgstr "" + +#: ../../../changes.txt:2457 24fdd844d5ed4490b5ecfc723d6be137 +msgid "This is an extension of version 1.12.0." +msgstr "" + +#: ../../../changes.txt:2459 a1b26c6d81944f33a89ba1f736b09015 +msgid "" +"New method :meth:`Page.show_pdf_page` displays another's PDF page. This " +"is a **vector** image and therefore remains precise across zooming. Both " +"involved documents must be PDF." +msgstr "" + +#: ../../../changes.txt:2461 0c38f0be178b4788974741252fed5612 +msgid "" +"New method :meth:`Page.getSVGimage` creates an SVG image from the page. " +"In contrast to the raster image of a pixmap, this is a vector image " +"format. The return is a unicode text string, which can be saved in a " +"*.svg* file." +msgstr "" + +#: ../../../changes.txt:2463 111f56b16f954371b924cbfc90c6dfb2 +msgid "" +"Method :meth:`Page.getTextBlocks` now accepts an additional bool " +"parameter \"images\". If set to true (default is false), image blocks " +"(metadata only) are included in the produced list and thus allow " +"detecting areas with rendered images." +msgstr "" + +#: ../../../changes.txt:2465 92b482e4cf134723943122871ada66c2 +msgid "Minor bug fixes." +msgstr "" + +#: ../../../changes.txt:2467 1b1d537477854a9599a2a769bc05ba3a +msgid "" +"\"text\" result of :meth:`Page.getText` concatenates all lines within a " +"block using a single space character. MuPDF's original uses \"\\\\n\" " +"instead, producing a rather ragged output." +msgstr "" + +#: ../../../changes.txt:2469 7c54bc39f79b4cb1aed6b2f722e28eaf +msgid "" +"New properties of :ref:`Page` objects :attr:`Page.MediaBoxSize` and " +":attr:`Page.CropBoxPosition` provide more information about a page's " +"dimensions. For non-PDF files (and for most PDF files, too) these will be" +" equal to :attr:`Page.rect.bottom_right`, resp. " +":attr:`Page.rect.top_left`. For example, class :ref:`Shape` makes use of " +"them to correctly position its items." +msgstr "" + +#: ../../../changes.txt:2473 3d6a9151396640b1bd9d151bc7b5b41b +msgid "**Changes in Version 1.12.0**" +msgstr "" + +#: ../../../changes.txt:2475 c6c7d1a9e63046158d23f86083949ae0 +msgid "" +"This version is based on and requires MuPDF v1.12.0. The new MuPDF " +"version contains quite a number of changes -- most of them around text " +"extraction. Some of the changes impact the programmer's API." +msgstr "" + +#: ../../../changes.txt:2477 94500a94302a432a91cdb0a323d3d9d2 +msgid "" +":meth:`Outline.saveText` and :meth:`Outline.saveXML` have been deleted " +"without replacement. You probably haven't used them much anyway. But if " +"you are looking for a replacement: the output of :meth:`Document.get_toc`" +" can easily be used to produce something equivalent." +msgstr "" + +#: ../../../changes.txt:2479 71d854025faf423d98dea6297385f8d8 +msgid "Class *TextSheet* does no longer exist." +msgstr "" + +#: ../../../changes.txt:2481 f55954801979410dbc03c43a2b504234 +msgid "" +"Text \"spans\" (one of the hierarchy levels of :ref:`TextPage`) no longer" +" contain positioning information (i.e. no \"bbox\" key). Instead, spans " +"now provide the font information for its text. This impacts our JSON " +"output variant." +msgstr "" + +#: ../../../changes.txt:2483 172a43be11f04ae4a73aa78b3386513c +msgid "" +"HTML output has improved very much: it now creates valid documents which " +"can be displayed by browsers to produce a similar view as the original " +"document." +msgstr "" + +#: ../../../changes.txt:2485 fa92d836768e4d5981bd27101b1864a0 +msgid "" +"There is a new output format XHTML, which provides text and images in a " +"browser-readable format. The difference to HTML output is, that no effort" +" is made to reproduce the original layout." +msgstr "" + +#: ../../../changes.txt:2487 7c6662c121e84549abb98ed38a0ca9ac +msgid "" +"All output formats of :meth:`Page.getText` now support creating complete," +" valid documents, by wrapping them with appropriate header and trailer " +"information. If you are interested in using the HTML output, please make " +"sure to read :ref:`HTMLQuality`." +msgstr "" + +#: ../../../changes.txt:2489 926774a89ac74674a7fa858d989bc1a9 +msgid "" +"To support finding text positions, we have added special methods that " +"don't need detours like :meth:`TextPage.extractJSON` or " +":meth:`TextPage.extractXML`: use :meth:`Page.getTextBlocks` or resp. " +":meth:`Page.getTextWords` to create lists of text blocks or resp. words, " +"which are accompanied by their rectangles. This should be much faster " +"than the standard text extraction methods and also avoids using " +"additional packages for interpreting their output." +msgstr "" + +#: ../../../changes.txt:2494 df0286dbaf204c268372f6ad5af5380c +msgid "**Changes in Version 1.11.2**" +msgstr "" + +#: ../../../changes.txt:2496 0a552555ced74faaac228b9fe50dafb6 +msgid "This is an extension of v1.11.1." +msgstr "" + +#: ../../../changes.txt:2498 75bbf382a21041899e41482491192e27 +msgid "" +"New :meth:`Page.insertFont` creates a PDF */Font* object and returns its " +"object number." +msgstr "" + +#: ../../../changes.txt:2500 2c5953555ce344018ad92ff2697776eb +msgid "" +"New :meth:`Document.extractFont` extracts the content of an embedded font" +" given its object number." +msgstr "" + +#: ../../../changes.txt:2502 86b03154bde6488b90de8b57cd8b393f +msgid "" +"Methods **FontList(...)** items no longer contain the PDF generation " +"number. This value never had any significance. Instead, the font file " +"extension is included (e.g. \"pfa\" for a \"PostScript Font for ASCII\")," +" which is more valuable information." +msgstr "" + +#: ../../../changes.txt:2504 555ad5d9b2104b1ba89fad9cff3a8180 +msgid "Fonts other than \"simple fonts\" (Type1) are now also supported." +msgstr "" + +#: ../../../changes.txt:2506 2c2b24363fd846f6863381fb32049658 +msgid "New options to change :ref:`Pixmap` size:" +msgstr "" + +#: ../../../changes.txt:2508 cdb4cbcd769649f1a58fbaa0866218e3 +msgid "Method :meth:`Pixmap.shrink` reduces the pixmap proportionally in place." +msgstr "" + +#: ../../../changes.txt:2510 42a286d35ccb4b0eac31df32ad72ecc5 +msgid "" +"A new :ref:`Pixmap` copy constructor allows scaling via setting target " +"width and height." +msgstr "" + +#: ../../../changes.txt:2515 79676f1b970e4733b67e4b8a4747d0ef +msgid "**Changes in Version 1.11.1**" +msgstr "" + +#: ../../../changes.txt:2517 967ccbb071be4c19abe9b7ac083697d5 +msgid "This is an extension of v1.11.0." +msgstr "" + +#: ../../../changes.txt:2519 dc87c625689f42b0b5880ed5d452bd23 +msgid "" +"New class *Shape*. It facilitates and extends the creation of image " +"shapes on PDF pages. It contains multiple methods for creating elementary" +" shapes like lines, rectangles or circles, which can be combined into " +"more complex ones and be given common properties like line width or " +"colors. Combined shapes are handled as a unit and e.g. be \"morphed\" " +"together. The class can accumulate multiple complex shapes and put them " +"all in the page's foreground or background -- thus also reducing the " +"number of updates to the page's :data:`contents` object." +msgstr "" + +#: ../../../changes.txt:2521 f8dc912de95e4c2fba7010f2f990f7ae +msgid "All *Page* draw methods now use the new *Shape* class." +msgstr "" + +#: ../../../changes.txt:2523 fdf3b41e3dc14fbba8b618bcf7691c39 +msgid "" +"Text insertion methods *insertText()* and *insertTextBox()* now support " +"morphing in addition to text rotation. They have become part of the " +"*Shape* class and thus allow text to be freely combined with graphics." +msgstr "" + +#: ../../../changes.txt:2525 b736ba135e344197947ef466b4c08139 +msgid "" +"A new ``Pixmap`` constructor allows creating pixmap copies with an added " +"alpha channel. A new method also allows directly manipulating alpha " +"values." +msgstr "" + +#: ../../../changes.txt:2527 615cf47c44ae436db26693965eefe818 +msgid "" +"Binary algebraic operations with geometry objects (matrices, rectangles " +"and points) now generally also support lists or tuples as the second " +"operand. You can add a tuple *(x, y)* of numbers to a :ref:`Point`. In " +"this context, such sequences are called \":data:`point_like`\" (resp. " +":data:`matrix_like`, :data:`rect_like`)." +msgstr "" + +#: ../../../changes.txt:2529 1f318316e89344309399e272f4160705 +msgid "" +"Geometry objects now fully support in-place operators. For example, *p /=" +" m* replaces point p with *p * 1/m* for a number, or *p * ~m* for a " +":data:`matrix_like` object *m*. Similarly, if *r* is a rectangle, then *r" +" |= (3, 4)* is the new rectangle that also includes *fitz.Point(3, 4)*, " +"and *r &= (1, 2, 3, 4)* is its intersection with *fitz.Rect(1, 2, 3, 4)*." +msgstr "" + +#: ../../../changes.txt:2533 09f4d2a2e7f242adbd66efe7051225bf +msgid "**Changes in Version 1.11.0**" +msgstr "" + +#: ../../../changes.txt:2535 eb957530b2594c15ac89ca754855c1e0 +msgid "This version is based on and requires MuPDF v1.11." +msgstr "" + +#: ../../../changes.txt:2537 f9922487c5ee43d38c25f38a9a5db1c1 +msgid "" +"Though MuPDF has declared it as being mostly a bug fix version, one major" +" new feature is indeed contained: support of embedded files -- also " +"called portfolios or collections. We have extended PyMuPDF functionality " +"to embrace this up to an extent just a little beyond the *mutool* utility" +" as follows." +msgstr "" + +#: ../../../changes.txt:2539 3898f08551584cb79a7289e30f2a31f1 +msgid "" +"The *Document* class now support embedded files with several new methods " +"and one new property:" +msgstr "" + +#: ../../../changes.txt:2541 0d19acbd00e04282be89675fd317524d +msgid "" +"*embfile_Info()* returns metadata information about an entry in the list " +"of embedded files. This is more than *mutool* currently provides: it " +"shows all the information that was used to embed the file (not just the " +"entry's name)." +msgstr "" + +#: ../../../changes.txt:2542 22e0cfadd1324dd2a59e2d37345d0121 +msgid "" +"*embfile_Get()* retrieves the (decompressed) content of an entry into a " +"*bytes* buffer." +msgstr "" + +#: ../../../changes.txt:2543 ccff8ac168cd4341ba6fa4e78d324b0c +msgid "" +"*embfile_Add(...)* inserts new content into the PDF portfolio. We (in " +"contrast to *mutool*) **restrict** this to entries with a **new name** " +"(no duplicate names allowed)." +msgstr "" + +#: ../../../changes.txt:2544 b160b18bf4824ea69b770b5a1eefdc97 +msgid "" +"*embfile_Del(...)* deletes an entry from the portfolio (function not " +"offered in MuPDF)." +msgstr "" + +#: ../../../changes.txt:2545 0e6a82cc60b14ca1afc3e1af5ae4b73d +msgid "" +"*embfile_SetInfo()* -- changes filename or description of an embedded " +"file." +msgstr "" + +#: ../../../changes.txt:2546 a31473118d0e40b38e01fe689c228d5a +msgid "*embfile_Count* -- contains the number of embedded files." +msgstr "" + +#: ../../../changes.txt:2548 ad63da41b4eb4881bff53f7c6d68b179 +msgid "" +"Several enhancements deal with streamlining geometry objects. These are " +"not connected to the new MuPDF version and most of them are also " +"reflected in PyMuPDF v1.10.0. Among them are new properties to identify " +"the corners of rectangles by name (e.g. *Rect.bottom_right*) and new " +"methods to deal with set-theoretic questions like *Rect.contains(x)* or " +"*IRect.intersects(x)*. Special effort focussed on supporting more " +"\"Pythonic\" language constructs: *if x in rect ...* is equivalent to " +"*rect.contains(x)*." +msgstr "" + +#: ../../../changes.txt:2550 1cecbb5f36bc4c8ab01561d1f056652d +msgid "" +"The :ref:`Rect` chapter now has more background on empty amd infinite " +"rectangles and how we handle them. The handling itself was also updated " +"for more consistency in this area." +msgstr "" + +#: ../../../changes.txt:2552 ae831c92b6504e48bc462932ef9b07f1 +msgid "We have started basic support for **generation** of PDF content:" +msgstr "" + +#: ../../../changes.txt:2554 cc52bd38881941b5b6fce1a7a00299f0 +msgid "" +"*Document.insert_page()* adds a new page into a PDF, optionally " +"containing some text." +msgstr "" + +#: ../../../changes.txt:2555 85a7a921467f4433bb0ea0413fea4b7e +msgid "*Page.insertImage()* places a new image on a PDF page." +msgstr "" + +#: ../../../changes.txt:2556 01c1a6aca699490bb684774048689320 +msgid "*Page.insertText()* puts new text on an existing page" +msgstr "" + +#: ../../../changes.txt:2558 7ddb621738ec4c0dbe41e01033d8bfe1 +msgid "" +"For **FileAttachment** annotations, content and name of the attached file" +" can extracted and changed." +msgstr "" + +#: ../../../changes.txt:2562 3bec344b93fc4f8faf35093b73cdc9fc +msgid "**Changes in Version 1.10.0**" +msgstr "" + +#: ../../../changes.txt:2564 feb11f01562a493d8e023a2182b14f50 +msgid "**MuPDF v1.10 Impact**" +msgstr "" + +#: ../../../changes.txt:2566 c091ff823b584bd2a97c354d5cfec071 +msgid "" +"MuPDF version 1.10 has a significant impact on our bindings. Some of the " +"changes also affect the API -- in other words, **you** as a PyMuPDF user." +msgstr "" + +#: ../../../changes.txt:2568 4f068889e650415bbda512d4b796394e +msgid "" +"Link destination information has been reduced. Several properties of the " +"*linkDest* class no longer contain valuable information. In fact, this " +"class as a whole has been deleted from MuPDF's library and we in PyMuPDF " +"only maintain it to provide compatibilty to existing code." +msgstr "" + +#: ../../../changes.txt:2570 665fcb454ef0415c92eae6cfc5927c1a +msgid "" +"In an effort to minimize memory requirements, several improvements have " +"been built into MuPDF v1.10:" +msgstr "" + +#: ../../../changes.txt:2572 9f2418fa2e244d98b86f329845a70f9e +msgid "" +"A new *config.h* file can be used to de-select unwanted features in the C" +" base code. Using this feature we have been able to reduce the size of " +"our binary *_fitz.o* / *_fitz.pyd* by about 50% (from 9 MB to 4.5 MB). " +"When UPX-ing this, the size goes even further down to a very handy 2.3 " +"MB." +msgstr "" + +#: ../../../changes.txt:2574 3d01f6167b3c4481be84e2f491a62f71 +msgid "" +"The alpha (transparency) channel for pixmaps is now optional. Letting " +"alpha default to ``False`` significantly reduces pixmap sizes (by 20% -- " +"CMYK, 25% -- RGB, 50% -- GRAY). Many ``Pixmap`` constructors therefore " +"now accept an *alpha* boolean to control inclusion of this channel. Other" +" pixmap constructors (e.g. those for file and image input) create pixmaps" +" with no alpha alltogether. On the downside, save methods for pixmaps no " +"longer accept a *savealpha* option: this channel will always be saved " +"when present. To minimize code breaks, we have left this parameter in the" +" call patterns -- it will just be ignored." +msgstr "" + +#: ../../../changes.txt:2576 f351b44858f4486388b18583eca79166 +msgid "" +"``DisplayList`` and ``TextPage`` class constructors now **require the " +"mediabox** of the page they are referring to (i.e. the *page.bound()* " +"rectangle). There is no way to construct this information from other " +"sources, therefore a source code change cannot be avoided in these cases." +" We assume however, that not many users are actually employing these " +"rather low level classes explixitely. So the impact of that change should" +" be minor." +msgstr "" + +#: ../../../changes.txt:2578 67793c99d670479286b459b318537565 +msgid "**Other Changes compared to Version 1.9.3**" +msgstr "" + +#: ../../../changes.txt:2580 7d8ac7a4fa2149d4995bd6536d50c8ed +msgid "" +"The new :ref:`Document` method *write()* writes an opened PDF to memory " +"(as opposed to a file, like *save()* does)." +msgstr "" + +#: ../../../changes.txt:2581 30175d0eb5284676b1bd1fc00310374c +msgid "" +"An annotation can now be scaled and moved around on its page. This is " +"done by modifying its rectangle." +msgstr "" + +#: ../../../changes.txt:2582 47028729c9cb426f85efb8fd7a360f15 +msgid "" +"Annotations can now be deleted. :ref:`Page` contains the new method " +"*deleteAnnot()*." +msgstr "" + +#: ../../../changes.txt:2583 99e52edc3ad14227aa8212b59b158eeb +msgid "" +"Various annotation attributes can now be modified, e.g. content, dates, " +"title (= author), border, colors." +msgstr "" + +#: ../../../changes.txt:2584 41e889d52a094b7d84188d17bfa360e2 +msgid "" +"Method *Document.insert_pdf()* now also copies annotations of source " +"pages." +msgstr "" + +#: ../../../changes.txt:2585 fe06bf31dc44494ba788c7e278b979b3 +msgid "" +"The *Pages* class has been deleted. As documents can now be accessed with" +" page numbers as indices (like *doc[n] = doc.loadPage(n)*), and document " +"object can be used as iterators, the benefit of this class was too low to" +" maintain it. See the following comments." +msgstr "" + +#: ../../../changes.txt:2586 b1d8e938e5d14cc7bebb91d64c18cf05 +msgid "" +"*loadPage(n)* / *doc[n]* now accept arbitrary integers to specify a page " +"number, as long as *n < pageCount*. So, e.g. *doc[-500]* is always valid " +"and will load page *(-500) % pageCount*." +msgstr "" + +#: ../../../changes.txt:2587 bdfbc80ad6854d6ca563e3d4501039ef +msgid "" +"A document can now also be used as an iterator like this: *for page in " +"doc: ... ...*. This will yield all pages of " +"*doc* as *page*." +msgstr "" + +#: ../../../changes.txt:2588 5d98777946264eed939290f140ba07d4 +msgid "" +"The :ref:`Pixmap` method *getSize()* has been replaced with property " +"*size*. As before *Pixmap.size == len(Pixmap)* is true." +msgstr "" + +#: ../../../changes.txt:2589 89d95b558a8d40baaf6caa0218148650 +msgid "" +"In response to transparency (alpha) being optional, several new " +"parameters and properties have been added to :ref:`Pixmap` and " +":ref:`Colorspace` classes to support determining their characteristics." +msgstr "" + +#: ../../../changes.txt:2590 71e327689c924881aaf4e00495ab3a5e +msgid "" +"The :ref:`Page` class now contains new properties *firstAnnot* and " +"*firstLink* to provide starting points to the respective class chains, " +"where *firstLink* is just a mnemonic synonym to method *loadLinks()* " +"which continues to exist. Similarly, the new property *rect* is a synonym" +" for method *bound()*, which also continues to exist." +msgstr "" + +#: ../../../changes.txt:2591 52e1209684e443469ab42da7e0f3cdd1 +msgid "" +":ref:`Pixmap` methods *samplesRGB()* and *samplesAlpha()* have been " +"deleted because pixmaps can now be created without transparency." +msgstr "" + +#: ../../../changes.txt:2592 08675dfe98d3450988cbaded9800e597 +msgid "" +":ref:`Rect` now has a property *irect* which is a synonym of method " +"*round()*. Likewise, :ref:`IRect` now has property *rect* to deliver a " +":ref:`Rect` which has the same coordinates as floats values." +msgstr "" + +#: ../../../changes.txt:2593 0c6e4750a9244bdfaad3fadda40994ae +msgid "" +"Document has the new method *searchPageFor()* to search for a text " +"string. It works exactly like the corresponding *Page.searchFor()* with " +"page number as additional parameter." +msgstr "" + +#: ../../../changes.txt:2598 32d6aa7229224c54aa2f77ec92e477bb +msgid "**Changes in Version 1.9.3**" +msgstr "" + +#: ../../../changes.txt:2600 7a1d9bfb735c4479bbbf4dafdbfe0a5a +msgid "" +"This version is also based on MuPDF v1.9a. Changes compared to version " +"1.9.2:" +msgstr "" + +#: ../../../changes.txt:2602 ef12802e55f346d5a87f9850ddf6676a +msgid "" +"As a major enhancement, annotations are now supported in a similar way as" +" links. Annotations can be displayed (as pixmaps) and their properties " +"can be accessed." +msgstr "" + +#: ../../../changes.txt:2603 350dfa5815a14a26babff12ee9ff57f7 +msgid "" +"In addition to the document *select()* method, some simpler methods can " +"now be used to manipulate a PDF:" +msgstr "" + +#: ../../../changes.txt:2605 3bdb6e23f5484918a7b3eb0b76acfe59 +msgid "*copyPage()* copies a page within a document." +msgstr "" + +#: ../../../changes.txt:2606 2e4f63fb375140e6965dcf773cc3dcb0 +msgid "*movePage()* is similar, but deletes the original." +msgstr "" + +#: ../../../changes.txt:2607 5e96661d219b410aacdb6c369900c20e +msgid "*delete_page()* deletes a page" +msgstr "" + +#: ../../../changes.txt:2608 6e795d710ad04d208bbf3c6891d8c429 +msgid "*delete_pages()* deletes a page range" +msgstr "" + +#: ../../../changes.txt:2610 10906bbce5334284a869abbfaae9cb9b +msgid "" +"*rotation* or *setRotation()* access or change a PDF page's rotation, " +"respectively." +msgstr "" + +#: ../../../changes.txt:2611 ebfec7fd3c814436b6ca05f48c05e3e1 +msgid "" +"Available but undocumented before, :ref:`IRect`, :ref:`Rect`, " +":ref:`Point` and :ref:`Matrix` support the *len()* method and their " +"coordinate properties can be accessed via indices, e.g. *IRect.x1 == " +"IRect[2]*." +msgstr "" + +#: ../../../changes.txt:2612 163e3e57f4b04a4c94b6fa9e56c40f86 +msgid "" +"For convenience, documents now support simple indexing: *doc.loadPage(n) " +"== doc[n]*. The index may however be in range *-pageCount < n < " +"pageCount*, such that *doc[-1]* is the last page of the document." +msgstr "" + +#: ../../../changes.txt:2616 f205bdf79fec4a8588d0303bc52e3d26 +msgid "**Changes in Version 1.9.2**" +msgstr "" + +#: ../../../changes.txt:2618 46300de29d1d4f369fd37e5f7b81fc3d +msgid "" +"This version is also based on MuPDF v1.9a. Changes compared to version " +"1.9.1:" +msgstr "" + +#: ../../../changes.txt:2620 4524f8edbc0b40acb7151be3de5d91c4 +msgid "" +"*fitz.open()* (no parameters) creates a new empty |PDF| document, i.e. if" +" saved afterwards, it must be given a *.pdf* extension." +msgstr "" + +#: ../../../changes.txt:2621 19063a4b34714531a4904246782340ab +msgid "" +":ref:`Document` now accepts all of the following formats (*Document* and " +"*open* are synonyms):" +msgstr "" + +#: ../../../changes.txt:2623 2ea0067410a1444fa304ca4dd05e03db +msgid "*open()*," +msgstr "" + +#: ../../../changes.txt:2624 abdad6059ca245e39cbd318f80543f9c +msgid "*open(filename)* (equivalent to *open(filename, None)*)," +msgstr "" + +#: ../../../changes.txt:2625 1570421beac349d590be8dde6e98d39b +msgid "*open(filetype, area)* (equivalent to *open(filetype, stream = area)*)." +msgstr "" + +#: ../../../changes.txt:2627 87cd65eb52fc4ceba03e9708190a06bd +msgid "" +"Type of memory area *stream* may be *bytes* or *bytearray*. Thus, e.g. " +"*area = open(\"file.pdf\", \"rb\").read()* may be used directly (without " +"first converting it to bytearray)." +msgstr "" + +#: ../../../changes.txt:2628 0195ab65cf4f4d84b0a344bc8058bacb +msgid "" +"New method *Document.insert_pdf()* (PDFs only) inserts a range of pages " +"from another PDF." +msgstr "" + +#: ../../../changes.txt:2629 ce60f25ee293401ab15978a90001d58b +msgid "" +"*Document* objects doc now support the *len()* function: ``len(doc) == " +"doc.pageCount``." +msgstr "" + +#: ../../../changes.txt:2630 f4cdb4a434c948f1aca1c4c8bc7d39da +msgid "" +"New method *Document.getPageImageList()* creates a list of images used on" +" a page." +msgstr "" + +#: ../../../changes.txt:2631 1377372b76d14efd85dc025d6b047374 +msgid "" +"New method *Document.getPageFontList()* creates a list of fonts " +"referenced by a page." +msgstr "" + +#: ../../../changes.txt:2632 3947dd92b80e44e28f7625984956795d +msgid "" +"New pixmap constructor *fitz.Pixmap(doc, xref)* creates a pixmap based on" +" an opened PDF document and an :data:`xref` number of the image." +msgstr "" + +#: ../../../changes.txt:2633 e4626fdd13f94ea2b479c0ffbd7adbce +msgid "" +"New pixmap constructor *fitz.Pixmap(cspace, spix)* creates a pixmap as a " +"copy of another one *spix* with the colorspace converted to *cspace*. " +"This works for all colorspace combinations." +msgstr "" + +#: ../../../changes.txt:2634 303ea21711234a9c8361936a8f6445bd +msgid "" +"Pixmap constructor *fitz.Pixmap(colorspace, width, height, samples)* now " +"allows *samples* to also be *bytes*, not only *bytearray*." +msgstr "" + +#: ../../../changes.txt:2639 a756be7d6b5b4ecfa20f9a73024d90a0 +msgid "**Changes in Version 1.9.1**" +msgstr "" + +#: ../../../changes.txt:2641 7f7cc713c0b245efbed63364fd8059eb +msgid "" +"This version of PyMuPDF is based on MuPDF library source code version " +"1.9a published on April 21, 2016." +msgstr "" + +#: ../../../changes.txt:2643 65ca3396f570490fbd27b7ab37d18ce7 +msgid "" +"Please have a look at MuPDF's website to see which changes and " +"enhancements are contained herein." +msgstr "" + +#: ../../../changes.txt:2645 23fc5187b90a4f869fbc4dffe7c698ee +msgid "Changes in version 1.9.1 compared to version 1.8.0 are the following:" +msgstr "" + +#: ../../../changes.txt:2647 3674e1b43a73413e80031d439f55aaf3 +msgid "New methods *get_area()* for both *fitz.Rect* and *fitz.IRect*" +msgstr "" + +#: ../../../changes.txt:2648 0862079db4cd4a8ab5c62fb650c99bc3 +msgid "" +"Pixmaps can now be created directly from files using the new constructor " +"*fitz.Pixmap(filename)*." +msgstr "" + +#: ../../../changes.txt:2649 ba7aa343f0af464b8d0132883b1aabc0 +msgid "The Pixmap constructor *fitz.Pixmap(image)* has been extended accordingly." +msgstr "" + +#: ../../../changes.txt:2650 394f7d1fe2084d338c50b78b8842ba51 +msgid "" +"*fitz.Rect* can now be created with all possible combinations of points " +"and coordinates." +msgstr "" + +#: ../../../changes.txt:2651 4931c3a586304468b98266fb84c9142a +msgid "" +"PyMuPDF classes and methods now all contain __doc__ strings, most of " +"them created by SWIG automatically. While the PyMuPDF documentation " +"certainly is more detailed, this feature should help a lot when " +"programming in Python-aware IDEs." +msgstr "" + +#: ../../../changes.txt:2652 7fbf8997aa274c1baf0c785f58826183 +msgid "" +"A new document method of *getPermits()* returns the permissions " +"associated with the current access to the document (print, edit, " +"annotate, copy), as a Python dictionary." +msgstr "" + +#: ../../../changes.txt:2653 56318f7975d34ae89ec4b7af9193f864 +msgid "The identity matrix *fitz.Identity* is now **immutable**." +msgstr "" + +#: ../../../changes.txt:2654 af8644e1ad8f4858814c0a14874e9418 +msgid "" +"The new document method *select(list)* removes all pages from a document " +"that are not contained in the list. Pages can also be duplicated and re-" +"arranged." +msgstr "" + +#: ../../../changes.txt:2655 e2c4b887798e425f8ef1ba145db8e959 +msgid "" +"Various improvements and new members in our demo and examples " +"collections. Perhaps most prominently: *PDF_display* now supports " +"scrolling with the mouse wheel, and there is a new example program " +"*wxTableExtract* which allows to graphically identify and extract table " +"data in documents." +msgstr "" + +#: ../../../changes.txt:2656 0d2c41ce644b422b89071bc46be38b5b +msgid "*fitz.open()* is now an alias of *fitz.Document()*." +msgstr "" + +#: ../../../changes.txt:2657 d3388aa72d8a4d57911fdf4dceadd443 +msgid "" +"New pixmap method *tobytes()* which will return a bytearray formatted as " +"a PNG image of the pixmap." +msgstr "" + +#: ../../../changes.txt:2658 112d8c2f5eb34860be8e69518bebef01 +msgid "" +"New pixmap method *samplesRGB()* providing a *samples* version with alpha" +" bytes stripped off (RGB colorspaces only)." +msgstr "" + +#: ../../../changes.txt:2659 50b4f449559743fd91fc9f64829c520c +msgid "" +"New pixmap method *samplesAlpha()* providing the alpha bytes only of the " +"*samples* area." +msgstr "" + +#: ../../../changes.txt:2660 a48ba5ae44ec42d6b1072b3a38da931c +msgid "New iterator *fitz.Pages(doc)* over a document's set of pages." +msgstr "" + +#: ../../../changes.txt:2661 fe060ee8237b49569b7d751a560b714d +msgid "" +"New matrix methods *invert()* (calculate inverted matrix), *concat()* " +"(calculate matrix product), *pretranslate()* (perform a shift operation)." +msgstr "" + +#: ../../../changes.txt:2662 96b8f12d6a154ce8b2929131e6133492 +msgid "" +"New *IRect* methods *intersect()* (intersection with another rectangle), " +"*translate()* (perform a shift operation)." +msgstr "" + +#: ../../../changes.txt:2663 de74fa1695ba4f2cb8aec322869f17f2 +msgid "" +"New *Rect* methods *intersect()* (intersection with another rectangle), " +"*transform()* (transformation with a matrix), *include_point()* (enlarge " +"rectangle to also contain a point), *include_rect()* (enlarge rectangle " +"to also contain another one)." +msgstr "" + +#: ../../../changes.txt:2664 9d1082f2ab5c4eb4882b2e9f31b89b24 +msgid "Documented *Point.transform()* (transform a point with a matrix)." +msgstr "" + +#: ../../../changes.txt:2665 81e75991b8fd4651be894bdf57224e29 +msgid "" +"*Matrix*, *IRect*, *Rect* and *Point* classes now support compact, " +"algebraic formulations for manipulating such objects." +msgstr "" + +#: ../../../changes.txt:2666 8aa17130b3724609adc6873d480bab1a +msgid "" +"Incremental saves for changes are possible now using the call pattern " +"*doc.save(doc.name, incremental=True)*." +msgstr "" + +#: ../../../changes.txt:2667 85e1d074eda8470e952eac33c7fca480 +msgid "" +"A PDF's metadata can now be deleted, set or changed by document method " +"*set_metadata()*. Supports incremental saves." +msgstr "" + +#: ../../../changes.txt:2668 405109fca6bb44be8031fd6091b7f975 +msgid "" +"A PDF's bookmarks (or table of contents) can now be deleted, set or " +"changed with the entries of a list using document method *set_toc(list)*." +" Supports incremental saves." +msgstr "" + +#: ../../footer.rst:60 04ec74847748455e856ddafa9e759e2b +msgid "This documentation covers all versions up to |version|." +msgstr "" + +#~ msgid "**Changes in version 1.23.0rc1 (2023-08-10)**" +#~ msgstr "" + +#~ msgid "Contains a new \"rebased\" implementation of PyMuPDF." +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `#2542 " +#~ "`_: " +#~ "fritz.utils.scrub AttributeError Annot object " +#~ "has no attribute fileUpd inside" +#~ msgstr "" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ "Other: * Use MuPDF-1.23.4. * Fix " +#~ "optimisation flags with system installs. " +#~ "* Fixed the problem that the clip" +#~ " parameter does not take effect " +#~ "during table recognition * Support " +#~ "Pillow mode \"RGBa\" * Support extra " +#~ "word delimiters * Support checking valid" +#~ " PDF name objects" +#~ msgstr "" + +#~ msgid "**Changes in version 1.24.3 (2024-04-xx)**" +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `3180 " +#~ "`_: Cannot " +#~ "show optional content group: AttributeError:" +#~ " module 'pymupdf.mupdf' has no attribute" +#~ " 'pdf_array_push_drop'" +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `3163 " +#~ "`_: " +#~ "AssertionError on using pymupdf.IRect" +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `3177 " +#~ "`_: " +#~ "pymupdf.Pixmap(None, pix) Unrecognised args " +#~ "for constructing Pixmap" +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `2978 " +#~ "`_: 1.23.9rc1:" +#~ " module 'pymupdf.mupdf' has no attribute" +#~ " 'fz_copy_pixmap_rect'" +#~ msgstr "" + +#~ msgid "" +#~ "Fixed rebased `pymupdf.pymupdf_version_tuple` - " +#~ "was previously set to mupdf version." +#~ msgstr "" + +#~ msgid "" +#~ "Added test for `pymupdf.css_for_pymupdf_font()` " +#~ "(uses package `pymupdf-fonts`)." +#~ msgstr "" + +#~ msgid "Added `pymupdf.pymupdf_version_tuple`, e.g. `(1, 23, 6)`." +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `2548 " +#~ "`_: Fitz " +#~ "freezes on some PDFs when calling " +#~ "the pymupdf.Page.get_text_blocks method." +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `#2542 " +#~ "`_: " +#~ "pymupdf.utils.scrub AttributeError Annot object " +#~ "has no attribute fileUpd inside" +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `#2290 " +#~ "`_: Different" +#~ " image format/data from Page.get_text(\"dict\")" +#~ " and pymupdf.get_page_images()" +#~ msgstr "" + +#~ msgid "" +#~ "Fixed issue where trace devices' state" +#~ " was not being initialised correctly; " +#~ "data returned from things like " +#~ "``pymupdf.Page.get_texttrace()`` might be slightly" +#~ " altered, e.g. ``linewidth`` values." +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** `#2076 " +#~ "`_: Segfault " +#~ "in pymupdf.py" +#~ msgstr "" + +#~ msgid "" +#~ "``EmptyFileError`` -- raised when trying " +#~ "to create a :ref:`Document` " +#~ "(``pymupdf.open()``) from an empty file " +#~ "or zero-length memory." +#~ msgstr "" + +#~ msgid "" +#~ "**Fixed** issue `#1085 " +#~ "`_. The " +#~ "old *snake_cased* alias of " +#~ "``pymupdf.detTextlength`` is now defined " +#~ "correctly." +#~ msgstr "" + +#~ msgid "" +#~ "**Added** a method :meth:`paper_rect` which" +#~ " returns a :ref:`Rect` for a supplied" +#~ " paper format string. Example: " +#~ "*pymupdf.paper_rect(\"letter\") = pymupdf.Rect(0.0, " +#~ "0.0, 612.0, 792.0)*." +#~ msgstr "" + +#~ msgid "" +#~ "Geometry objects now fully support " +#~ "in-place operators. For example, *p " +#~ "/= m* replaces point p with *p " +#~ "* 1/m* for a number, or *p *" +#~ " ~m* for a :data:`matrix_like` object " +#~ "*m*. Similarly, if *r* is a " +#~ "rectangle, then *r |= (3, 4)* is" +#~ " the new rectangle that also includes" +#~ " *pymupdf.Point(3, 4)*, and *r &= (1," +#~ " 2, 3, 4)* is its intersection " +#~ "with *pymupdf.Rect(1, 2, 3, 4)*." +#~ msgstr "" + +#~ msgid "" +#~ "A new *config.h* file can be used" +#~ " to de-select unwanted features in" +#~ " the C base code. Using this " +#~ "feature we have been able to " +#~ "reduce the size of our binary " +#~ "*_pymupdf.o* / *_pymupdf.pyd* by about " +#~ "50% (from 9 MB to 4.5 MB). " +#~ "When UPX-ing this, the size goes" +#~ " even further down to a very " +#~ "handy 2.3 MB." +#~ msgstr "" + +#~ msgid "" +#~ "*pymupdf.open()* (no parameters) creates a " +#~ "new empty |PDF| document, i.e. if " +#~ "saved afterwards, it must be given " +#~ "a *.pdf* extension." +#~ msgstr "" + +#~ msgid "" +#~ "New pixmap constructor *pymupdf.Pixmap(doc, " +#~ "xref)* creates a pixmap based on " +#~ "an opened PDF document and an " +#~ ":data:`xref` number of the image." +#~ msgstr "" + +#~ msgid "" +#~ "New pixmap constructor *pymupdf.Pixmap(cspace, " +#~ "spix)* creates a pixmap as a copy" +#~ " of another one *spix* with the " +#~ "colorspace converted to *cspace*. This " +#~ "works for all colorspace combinations." +#~ msgstr "" + +#~ msgid "" +#~ "Pixmap constructor *pymupdf.Pixmap(colorspace, " +#~ "width, height, samples)* now allows " +#~ "*samples* to also be *bytes*, not " +#~ "only *bytearray*." +#~ msgstr "" + +#~ msgid "New methods *get_area()* for both *pymupdf.Rect* and *pymupdf.IRect*" +#~ msgstr "" + +#~ msgid "" +#~ "Pixmaps can now be created directly " +#~ "from files using the new constructor " +#~ "*pymupdf.Pixmap(filename)*." +#~ msgstr "" + +#~ msgid "" +#~ "The Pixmap constructor *pymupdf.Pixmap(image)* " +#~ "has been extended accordingly." +#~ msgstr "" + +#~ msgid "" +#~ "*pymupdf.Rect* can now be created with" +#~ " all possible combinations of points " +#~ "and coordinates." +#~ msgstr "" + +#~ msgid "The identity matrix *pymupdf.Identity* is now **immutable**." +#~ msgstr "" + +#~ msgid "*pymupdf.open()* is now an alias of *pymupdf.Document()*." +#~ msgstr "" + +#~ msgid "New iterator *pymupdf.Pages(doc)* over a document's set of pages." +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/classes.mo b/docs/locales/ja/LC_MESSAGES/classes.mo new file mode 100644 index 000000000..7a52204fc Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/classes.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/classes.po b/docs/locales/ja/LC_MESSAGES/classes.po new file mode 100644 index 000000000..b3a6ff0c5 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/classes.po @@ -0,0 +1,48 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 2a3afd4ee887482197b760f562ffccc4 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 88c2fb74c3264e88ab5b351998d6487c +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 704d5cb7888e4240b2b40aab099a2c07 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../classes.rst:5 7c00ecb7e10b4a23a407ab80742606b5 +msgid "Classes" +msgstr "" + +#: ../../footer.rst:60 3cc655b31f5a4c8c856c90f57dbc49cf +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/colors.mo b/docs/locales/ja/LC_MESSAGES/colors.mo new file mode 100644 index 000000000..ff5244d26 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/colors.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/colors.po b/docs/locales/ja/LC_MESSAGES/colors.po new file mode 100644 index 000000000..4d765bba4 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/colors.po @@ -0,0 +1,120 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 35fb650a253a4f3b8498a5be6524cf11 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 82dc5bc568734b99a2d2f57e0a4da5d0 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 546a16c6fc534b38a0db66fe761c1c61 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../colors.rst:7 248d0e17c78b4f6fa1b57235e07bb572 +msgid "Color Database" +msgstr "カラーデータベース" + +#: ../../colors.rst:8 866762e2283b43a5a605f529de6310df +msgid "" +"Since the introduction of methods involving colors (like " +":meth:`Page.draw_circle`), a requirement may be to have access to " +"predefined colors." +msgstr "" +":meth:`Page.draw_circle`) " +"のような色を含むメソッドが導入されて以来、事前定義された色にアクセスする必要があるかもしれません。)" + +#: ../../colors.rst:10 03afc088d6e34acc9076fc34915e9064 +msgid "" +"The fabulous GUI package `wxPython `_ has a " +"database of over 540 predefined RGB colors, which are given more or less " +"memorizable names. Among them are not only standard names like \"green\" " +"or \"blue\", but also \"turquoise\", \"skyblue\", and 100 (not only 50 " +"...) shades of \"gray\", etc." +msgstr "" +"素晴らしいGUIパッケージ `wxPython `_ " +"には、記憶しやすい名前が与えられた540以上の事前定義されたRGBカラーのデータベースがあります。その中には「green」や「blue」のような標準的な名前だけでなく、「turquoise」や「skyblue」、「gray」の100のシェードなどが含まれています(50だけでなく…)。" + +#: ../../colors.rst:12 7ff92a944ea44490b292556c69b2273b +msgid "" +"We have taken the liberty to copy this database (a list of tuples) " +"modified into PyMuPDF and make its colors available as PDF compatible " +"float triples: for wxPython's *(\"WHITE\", 255, 255, 255)* we return *(1," +" 1, 1)*, which can be directly used in *color* and *fill* parameters. We " +"also accept any mixed case of \"wHiTe\" to find a color." +msgstr "" +"私たちは、このデータベース(タプルのリスト)をPyMuPDFにコピーし、その色をPDF互換の浮動小数点トリプルとして利用可能にしました。例えば、wxPythonの(\"WHITE\"," +" 255, 255, 255)は(1, 1, 1)として返され、これは *直接色* や *塗りつぶし* " +"のパラメータとして使用できます。また、「wHiTe」といった大小文字の組み合わせも受け入れます。" + +#: ../../colors.rst:15 e54f7361ac594e49854f711406c08592 +msgid "Function *getColor()*" +msgstr "関数 *getColor()* " + +#: ../../colors.rst:16 2f1966c2184a4746af35dea0735b144d +msgid "" +"As the color database may not be needed very often, one additional import" +" statement seems acceptable to get access to it::" +msgstr "カラーデータベースはあまり頻繁に必要ないかもしれないため、アクセスするために1つの追加のインポート文は受け入れられると思われます。" + +#: ../../colors.rst:41 7d1b6378ac6440948098bc91a988cb05 +msgid "Printing the Color Database" +msgstr "カラーデータベースの印刷" + +#: ../../colors.rst:42 a5973aca56984f88bf0fe7e9e3c52644 +msgid "" +"If you want to actually see how the many available colors look like, use " +"scripts `print by RGB `_ or `print by HSV " +"`_ in the examples directory. They create PDFs (already " +"existing in the same directory) with all these colors. Their only " +"difference is sorting order: one takes the RGB values, the other one the " +"Hue-Saturation-Values as sort criteria. This is a screen print of what " +"these files look like." +msgstr "" +"実際に利用可能な多くの色がどのように見えるかを確認したい場合は、examplesディレクトリにある `RGB印刷 " +"`_ または `HSV印刷 `_ " +"のスクリプトを使用してください。これらのスクリプトは、これらすべての色を含むPDFを作成します(すでに同じディレクトリに存在します)。これらのファイルはRGB値を使用するものと" +"、ソート基準としてHue-Saturation-" +"Valueを使用するものの2つの違いだけです。以下は、これらのファイルがどのように見えるかのスクリーンプリントです。" + +#: ../../footer.rst:60 d448c701ca134c668840e68d45395229 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/colorspace.mo b/docs/locales/ja/LC_MESSAGES/colorspace.mo new file mode 100644 index 000000000..02e15a433 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/colorspace.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/colorspace.po b/docs/locales/ja/LC_MESSAGES/colorspace.po new file mode 100644 index 000000000..0c5e74f67 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/colorspace.po @@ -0,0 +1,125 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 4d4488c4917748bd91bcc3dd579d4cd8 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 806d51d5b25e4c2dbf32c0b904cf8d32 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 7c1c6b7e76d04443adc894e153404331 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../colorspace.rst:7 9117f6e8aa1a4dc0a4d17d89aa5543d1 +msgid "Colorspace" +msgstr "Colorspace (カラースペース)" + +#: ../../colorspace.rst:9 9ff1e9e9281248fbbe713af3f44fa1a1 +msgid "Represents the color space of a :ref:`Pixmap`." +msgstr ":ref:`Pixmap` のカラースペースを表します。" + +#: ../../colorspace.rst:12 b21c879680e34acdbd48605116e91b1a +msgid "**Class API**" +msgstr "**クラス API** " + +#: ../../colorspace.rst:18 728f2df68cff450fba89dc0e2961d0fd +msgid "Constructor" +msgstr "コンストラクタ" + +#: ../../colorspace.rst 89cae8192bc741d38d655d66d7b02a2c +msgid "Parameters" +msgstr "パラメータ:" + +#: ../../colorspace.rst:20 9ca03de683124137b4c31d0d8cedbbb3 +msgid "" +"A number identifying the colorspace. Possible values are :data:`CS_RGB`, " +":data:`CS_GRAY` and :data:`CS_CMYK`." +msgstr "" +"カラースペースを識別する番号。可能な値は :data:`CS_RGB` 、:data:`CS_GRAY` 、および :data:`CS_CMYK`" +" です。" + +#: ../../colorspace.rst:24 ee0b34b3c30b495c99bb627cad25b1df +msgid "" +"The name identifying the colorspace. Example: *pymupdf.csCMYK.name = " +"'DeviceCMYK'*." +msgstr "カラースペースを識別する名前です。例: *pymupdf.csCMYK.name = 'DeviceCMYK'* 。" + +#: ../../colorspace.rst 1dec254d03ca404d997d0b3d10053f1d +#: 57ea4a70ca2340d6aa71a8e9824e03a8 +msgid "type" +msgstr "型:" + +#: ../../colorspace.rst:26 0a4657c57f1d49e39feed5228f4c5d25 +msgid "str" +msgstr "" + +#: ../../colorspace.rst:30 b3000e3e34554cb5b72ca9feabc9711c +msgid "" +"The number of bytes required to define the color of one pixel. Example: " +"*pymupdf.csCMYK.n == 4*." +msgstr "1ピクセルの色を定義するのに必要なバイト数です。例: *pymupdf.csCMYK.n == 4* 。" + +#: ../../colorspace.rst:32 09c02be8130e4c67b7e790e38c19788f +msgid "int" +msgstr "" + +#: ../../colorspace.rst:35 305216fe8af54e58abf2e1fce4d6f78a +msgid "**Predefined Colorspaces**" +msgstr "**事前定義済みのカラースペース** " + +#: ../../colorspace.rst:37 2a6d64970b50414ca57b9df4b896c7d6 +msgid "" +"For saving some typing effort, there exist predefined colorspace objects " +"for the three available cases." +msgstr "入力を簡略化するために、三つの利用可能なケースのための事前定義されたカラースペースオブジェクトが存在します。" + +#: ../../colorspace.rst:39 acc3013bd78b41dc8fddea9d4ee301a5 +msgid ":data:`csRGB` = *pymupdf.Colorspace(pymupdf.CS_RGB)*" +msgstr "" + +#: ../../colorspace.rst:40 5813b3b4dcbf437f8815044834076e72 +msgid ":data:`csGRAY` = *pymupdf.Colorspace(pymupdf.CS_GRAY)*" +msgstr "" + +#: ../../colorspace.rst:41 57fef72a75814b848806cf016b2c1baf +msgid ":data:`csCMYK` = *pymupdf.Colorspace(pymupdf.CS_CMYK)*" +msgstr "" + +#: ../../footer.rst:60 e39ad3f11f894208aed37df423e4558e +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/coop_low.mo b/docs/locales/ja/LC_MESSAGES/coop_low.mo new file mode 100644 index 000000000..da53def06 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/coop_low.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/coop_low.po b/docs/locales/ja/LC_MESSAGES/coop_low.po new file mode 100644 index 000000000..07788bbd4 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/coop_low.po @@ -0,0 +1,192 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 5a355454a0e14f23a63b8a2e7ce669e8 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 09967d10ba634cb1bff385ff21b01dde +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 d1d359d0d40748de8b9c0d01760f6668 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../coop_low.rst:7 bdf9ff13324144f3aea1e6e6fb99a106 +msgid "Working together: DisplayList and TextPage" +msgstr "共同作業:DisplayList と TextPage" + +#: ../../coop_low.rst:8 f4bd6483319f4150b1c8bb96b8e13933 +msgid "Here are some instructions on how to use these classes together." +msgstr "これらのクラスを一緒に使用する方法に関するいくつかの手順があります。" + +#: ../../coop_low.rst:10 ed7509d3a4e1434498829a99374f641b +msgid "" +"In some situations, performance improvements may be achievable, when you " +"fall back to the detail level explained here." +msgstr "一部の状況では、ここで説明されている詳細レベルに戻ると、パフォーマンスの向上が可能かもしれません。" + +#: ../../coop_low.rst:13 cd9e800e616545078dedc61814a029c7 +msgid "Create a DisplayList" +msgstr "DisplayList の作成" + +#: ../../coop_low.rst:14 c3a4cb6ce3974be5af359b73bd3b8d71 +msgid "" +"A :ref:`DisplayList` represents an interpreted document page. Methods for" +" pixmap creation, text extraction and text search are -- behind the " +"curtain -- all using the page's display list to perform their tasks. If a" +" page must be rendered several times (e.g. because of changed zoom " +"levels), or if text search and text extraction should both be performed, " +"overhead can be saved, if the display list is created only once and then " +"used for all other tasks." +msgstr "" +":ref:`DisplayList` " +"は解釈された文書ページを表します。ピクセルマップの作成、テキスト抽出、およびテキスト検索のメソッドは、幕の内側で、それぞれのタスクを実行するためにページの表示リストを使用しています。ページを複数回描画する必要がある場合(ズームレベルが変更されたためなど)、またはテキストの検索とテキストの抽出の両方を実行する必要がある場合、表示リストは一度だけ作成し、その後のすべてのタスクに使用すると、オーバーヘッドを節約できます。" + +#: ../../coop_low.rst:18 4a46c1c8dddf48029189e3b15bc68591 +msgid "" +"You can also create display lists for many pages \"on stack\" (in a " +"list), may be during document open, during idling times, or you store it " +"when a page is visited for the first time (e.g. in GUI scripts)." +msgstr "ディスプレイリストを多くのページに対して「スタック上」(リスト内)で作成することもできます。これはドキュメントを開いている間、アイドリング時間中、またはページが初めて訪れられたとき(GUIスクリプトなど)に行うことができます。" + +#: ../../coop_low.rst:20 2f2b49df380e40cea8dba1b11f5ee9a8 +msgid "" +"Note, that for everything what follows, only the display list is needed " +"-- the corresponding :ref:`Page` object could have been deleted." +msgstr "" +"注意:以下のすべてのことについて、ディスプレイリストのみが必要です - 対応する :ref:`Page` " +"オブジェクトは削除されている可能性があります。" + +#: ../../coop_low.rst:23 aebfe0ed572842b9ba5b11a8d1aab0d2 +msgid "Generate Pixmap" +msgstr "ピクセルマップの生成" + +#: ../../coop_low.rst:24 7dd16880b19c48e9837fd5dc2c621515 +msgid "" +"The following creates a Pixmap from a :ref:`DisplayList`. Parameters are " +"the same as for :meth:`Page.get_pixmap`." +msgstr "" +"以下は、:ref:`DisplayList` からピクセルマップを生成するものです。パラメータは :meth:`Page.get_pixmap` " +"と同じです。" + +#: ../../coop_low.rst:28 e89f1e9ef8d649f4a06985425cfa62a4 +#, python-format +msgid "" +"The execution time of this statement may be up to 50% shorter than that " +"of :meth:`Page.get_pixmap`." +msgstr "この文の実行時間は、:meth:`Page.get_pixmap` の実行時間よりも最大50%短くなる可能性があります。" + +#: ../../coop_low.rst:31 d7fa50d37ba94e5cb71679b146596aa3 +msgid "Perform Text Search" +msgstr "テキスト検索を実行" + +#: ../../coop_low.rst:32 883c1b8167124e3c87bee02cb540ebbb +msgid "With the display list from above, we can also search for text." +msgstr "上記のディスプレイリストを使用して、テキストを検索することもできます。" + +#: ../../coop_low.rst:34 dffc2a72113945cbbce0613fb18a4903 +msgid "For this we need to create a :ref:`TextPage`." +msgstr "これには、:ref:`TextPage` を作成する必要があります。" + +#: ../../coop_low.rst:42 268c3ac74027425381db0e5a5b1beeec +msgid "Extract Text" +msgstr "テキストの抽出" + +#: ../../coop_low.rst:43 499d4763043d407c8815a5ff4c386e37 +msgid "" +"With the same :ref:`TextPage` object from above, we can now immediately " +"use any or all of the 5 text extraction methods." +msgstr "前述の :ref:`TextPage` オブジェクトを使用することで、今すぐに5つのテキスト抽出メソッドのいずれかまたはすべてを使用できます。" + +#: ../../coop_low.rst:45 6f1b9f5de2eb487c9502d14d7e74597c +msgid "" +"Above, we have created our text page without argument. This leads to a " +"default argument of 3 (:data:`ligatures` and white-space are preserved), " +"IAW images will **not** be extracted -- see below." +msgstr "" +"前述のように、テキストページを引数なしで作成しました。これにより、デフォルトの引数3(合字と空白が保持されます)が適用されます。つまり、画像は抽出" +" **されません** - 以下を参照してください。" + +#: ../../coop_low.rst:54 ab9774b34e1845db917cc426db636fd8 +msgid "Further Performance improvements" +msgstr "さらなるパフォーマンスの向上" + +#: ../../coop_low.rst:56 8aa6869464d643f1b386fb7323425982 +msgid "Pixmap" +msgstr "" + +#: ../../coop_low.rst:57 d58917690b86432a8db25b4e290c8aab +msgid "As explained in the :ref:`Page` chapter:" +msgstr "ページの章で説明されているように:" + +#: ../../coop_low.rst:59 5f452558de7e46d1a6bb4d407940f71d +#, python-format +msgid "" +"If you do not need transparency set *alpha = 0* when creating pixmaps. " +"This will save 25% memory (if RGB, the most common case) and possibly 5% " +"execution time (depending on the GUI software)." +msgstr "" +"透明度が不要な場合は、ピクスマップを作成する際に alpha = 0 " +"に設定します。これにより、メモリが25%節約されます(RGBの場合、最も一般的なケース)し、GUIソフトウェアに依存して実行時間が5%削減される可能性があります。" + +#: ../../coop_low.rst:62 75cc3ebbf8db48479d9f9b81f89a5254 +msgid "TextPage" +msgstr "" + +#: ../../coop_low.rst:63 08ee726963904764a54737759d3c18d6 +msgid "" +"If you do not need images extracted alongside the text of a page, you can" +" set the following option:" +msgstr "ページのテキストと一緒に画像を抽出する必要がない場合、以下のオプションを設定できます:" + +#: ../../coop_low.rst:68 4251f3ef7f5f4ac69038ff012e1254c6 +#, python-format +msgid "" +"This will save ca. 25% overall execution time for the HTML, XHTML and " +"JSON text extractions and **hugely** reduce the amount of storage (both, " +"memory and disk space) if the document is graphics oriented." +msgstr "" +"これにより、HTML、XHTML、およびJSONのテキスト抽出全体の実行時間が約25%節約され、ドキュメントがグラフィックス志向である場合、ストレージ(メモリとディスクスペースの両方)の量が" +" **大幅に** 削減されます。" + +#: ../../coop_low.rst:70 bbd6a64180374797a986afa4351f7218 +msgid "If you however do need images, use a value of 7 for flags:" +msgstr "ただし、画像が必要な場合は、フラグに7の値を使用してください:" + +#: ../../footer.rst:60 ef9f02956d6945e086760c64e2387362 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/deprecated.mo b/docs/locales/ja/LC_MESSAGES/deprecated.mo new file mode 100644 index 000000000..929785f41 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/deprecated.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/deprecated.po b/docs/locales/ja/LC_MESSAGES/deprecated.po new file mode 100644 index 000000000..23e08297f --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/deprecated.po @@ -0,0 +1,880 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-05-06 22:50+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../deprecated.rst:3 d1d5b6242bd2429487e3c00f4bd27b44 +msgid ":index:`_isWrapped` -- :attr:`Page.is_wrapped`" +msgstr "" + +#: ../../deprecated.rst:4 eadbd707ba5b49ffaa1a8578c63f2fa7 +msgid ":index:`addCaretAnnot` -- :meth:`Page.add_caret_annot`" +msgstr "" + +#: ../../deprecated.rst:5 f3d2fc15c03d40fe9c5aeee5eb43619e +msgid ":index:`addCircleAnnot` -- :meth:`Page.add_circle_annot`" +msgstr "" + +#: ../../deprecated.rst:6 051b8aa821bb412898f73279004d2d87 +msgid ":index:`addFileAnnot` -- :meth:`Page.add_file_annot`" +msgstr "" + +#: ../../deprecated.rst:7 6b9299e0c63d4f9c840acf5cef786d3b +msgid ":index:`addFreetextAnnot` -- :meth:`Page.add_freetext_annot`" +msgstr "" + +#: ../../deprecated.rst:8 448eb9354f114ddc801e1e7e17c90813 +msgid ":index:`addHighlightAnnot` -- :meth:`Page.add_highlight_annot`" +msgstr "" + +#: ../../deprecated.rst:9 afdf93da30d0472583f679bb32992982 +msgid ":index:`addInkAnnot` -- :meth:`Page.add_ink_annot`" +msgstr "" + +#: ../../deprecated.rst:10 111293a76e4048d0b5e1605e9e874812 +msgid ":index:`addLineAnnot` -- :meth:`Page.add_line_annot`" +msgstr "" + +#: ../../deprecated.rst:11 27c091e35cd143f5b67a0e8c8fa0875f +msgid ":index:`addPolygonAnnot` -- :meth:`Page.add_polygon_annot`" +msgstr "" + +#: ../../deprecated.rst:12 6933d20ce2e141c5a3fadbd053766d4d +msgid ":index:`addPolylineAnnot` -- :meth:`Page.add_polyline_annot`" +msgstr "" + +#: ../../deprecated.rst:13 30c229985fe44b80af84338499a2f559 +msgid ":index:`addRectAnnot` -- :meth:`Page.add_rect_annot`" +msgstr "" + +#: ../../deprecated.rst:14 d78dac9a27684b5da29499e4b2f6b8f5 +msgid ":index:`addRedactAnnot` -- :meth:`Page.add_redact_annot`" +msgstr "" + +#: ../../deprecated.rst:15 04264798641b4de3938500d873c2c6f8 +msgid ":index:`addSquigglyAnnot` -- :meth:`Page.add_squiggly_annot`" +msgstr "" + +#: ../../deprecated.rst:16 fc348b38ada34284b4ff935bc0b96c15 +msgid ":index:`addStampAnnot` -- :meth:`Page.add_stamp_annot`" +msgstr "" + +#: ../../deprecated.rst:17 bee1ce84496246a38df53948bf77f357 +msgid ":index:`addStrikeoutAnnot` -- :meth:`Page.add_strikeout_annot`" +msgstr "" + +#: ../../deprecated.rst:18 25dad72947f64b9e8b569df99549e4e9 +msgid ":index:`addTextAnnot` -- :meth:`Page.add_text_annot`" +msgstr "" + +#: ../../deprecated.rst:19 00aa64572a5f41b9a8cbed77037f912b +msgid ":index:`addUnderlineAnnot` -- :meth:`Page.add_underline_annot`" +msgstr "" + +#: ../../deprecated.rst:20 c385a1bc52db4e72a107bfed39ab9073 +msgid ":index:`addWidget` -- :meth:`Page.add_widget`" +msgstr "" + +#: ../../deprecated.rst:21 18d7411089ff4ab38e8a6b325dbafb58 +msgid ":index:`chapterCount` -- :attr:`Document.chapter_count`" +msgstr "" + +#: ../../deprecated.rst:22 3e8e8070762748c6ab15040d7ddfbf98 +msgid ":index:`chapterPageCount` -- :meth:`Document.chapter_page_count`" +msgstr "" + +#: ../../deprecated.rst:23 877a39673c34487ebea478f53dfbf4c4 +msgid ":index:`cleanContents` -- :meth:`Page.clean_contents`" +msgstr "" + +#: ../../deprecated.rst:24 d264e6c6a4f34ccda276df5fadd9f2f3 +msgid ":index:`clearWith` -- :meth:`Pixmap.clear_with`" +msgstr "" + +#: ../../deprecated.rst:25 c6e7724402164c7595cec2d1b515954d +msgid ":index:`convertToPDF` -- :meth:`Document.convert_to_pdf`" +msgstr "" + +#: ../../deprecated.rst:26 30efb14a60a04279b22147f212ff2ce0 +msgid ":index:`copyPage` -- :meth:`Document.copy_page`" +msgstr "" + +#: ../../deprecated.rst:27 71be569311b643fcad76f9175f84ad8d +msgid ":index:`copyPixmap` -- :meth:`Pixmap.copy`" +msgstr "" + +#: ../../deprecated.rst:28 9d169e01195d45ef8b642a34622387b3 +msgid ":index:`CropBox` -- :attr:`Page.cropbox`" +msgstr "" + +#: ../../deprecated.rst:29 d4e6e34a1e174e78aaaa16cc1589987b +msgid ":index:`CropBoxPosition` -- :attr:`Page.cropbox_position`" +msgstr "" + +#: ../../deprecated.rst:30 eb2e77d896d148228b3ac65c9ac53982 +msgid ":index:`deleteAnnot` -- :meth:`Page.delete_annot`" +msgstr "" + +#: ../../deprecated.rst:31 8f12dc1d6a2c47bc90ace8d1c4c0837a +msgid ":index:`deleteLink` -- :meth:`Page.delete_link`" +msgstr "" + +#: ../../deprecated.rst:32 9e4d33fa051741b48c48b7cdb720d90d +msgid ":index:`deletePage` -- :meth:`Document.delete_page`" +msgstr "" + +#: ../../deprecated.rst:33 66877e75a8c649ff8eebd342e28ceedf +msgid ":index:`deletePageRange` -- :meth:`Document.delete_pages`" +msgstr "" + +#: ../../deprecated.rst:34 25721559e2fe452c86484ce93215a810 +msgid ":index:`deleteWidget` -- :meth:`Page.delete_widget`" +msgstr "" + +#: ../../deprecated.rst:35 0c1a8e11e7fd4587ae4312e58bf62d33 +msgid ":index:`derotationMatrix` -- :attr:`Page.derotation_matrix`" +msgstr "" + +#: ../../deprecated.rst:36 9c6cfa1376f94b509d2447887df70621 +msgid ":index:`drawBezier` -- :meth:`Page.draw_bezier`" +msgstr "" + +#: ../../deprecated.rst:37 22e65f0dbdfd44d3b77a5f0b9e807410 +msgid ":index:`drawBezier` -- :meth:`Shape.draw_bezier`" +msgstr "" + +#: ../../deprecated.rst:38 9240fc36aeb64362af89de7221a1ec18 +msgid ":index:`drawCircle` -- :meth:`Page.draw_circle`" +msgstr "" + +#: ../../deprecated.rst:39 3e71c253f1c64091b41e692eb39e85cb +msgid ":index:`drawCircle` -- :meth:`Shape.draw_circle`" +msgstr "" + +#: ../../deprecated.rst:40 be7a4c92ba9346599ed48970f720d423 +msgid ":index:`drawCurve` -- :meth:`Page.draw_curve`" +msgstr "" + +#: ../../deprecated.rst:41 513be296f2cb4b618a91597a2df7e65d +msgid ":index:`drawCurve` -- :meth:`Shape.draw_curve`" +msgstr "" + +#: ../../deprecated.rst:42 c0b7d4f8b2ad4691bd0aa7859ea33b77 +msgid ":index:`drawLine` -- :meth:`Page.draw_line`" +msgstr "" + +#: ../../deprecated.rst:43 554d906bb05f409190d6e6893190f958 +msgid ":index:`drawLine` -- :meth:`Shape.draw_line`" +msgstr "" + +#: ../../deprecated.rst:44 b2405f1b84e642eeafb66539f9808fb8 +msgid ":index:`drawOval` -- :meth:`Page.draw_oval`" +msgstr "" + +#: ../../deprecated.rst:45 aa69dd3ae74a463bb7b144a8d78c7cb4 +msgid ":index:`drawOval` -- :meth:`Shape.draw_oval`" +msgstr "" + +#: ../../deprecated.rst:46 bd4e3e4e051d4f2aa442d3d777a3b5ff +msgid ":index:`drawPolyline` -- :meth:`Page.draw_polyline`" +msgstr "" + +#: ../../deprecated.rst:47 65c2a5f2e60d4e8791bd6bc4911e5d02 +msgid ":index:`drawPolyline` -- :meth:`Shape.draw_polyline`" +msgstr "" + +#: ../../deprecated.rst:48 ada5b07f1393444ca6f56d8878c5344f +msgid ":index:`drawQuad` -- :meth:`Page.draw_quad`" +msgstr "" + +#: ../../deprecated.rst:49 bacd1883810d4581a41f8c39d4b1d590 +msgid ":index:`drawQuad` -- :meth:`Shape.draw_quad`" +msgstr "" + +#: ../../deprecated.rst:50 44a58d0bc884449eb62d74622f5c6c95 +msgid ":index:`drawRect` -- :meth:`Page.draw_rect`" +msgstr "" + +#: ../../deprecated.rst:51 c0cc88dd71fb497996d4893f4a14d229 +msgid ":index:`drawRect` -- :meth:`Shape.draw_rect`" +msgstr "" + +#: ../../deprecated.rst:52 f0925417a417408fb57e98400f82de38 +msgid ":index:`drawSector` -- :meth:`Page.draw_sector`" +msgstr "" + +#: ../../deprecated.rst:53 4e06c452b87b43dc88597aecb0183689 +msgid ":index:`drawSector` -- :meth:`Shape.draw_sector`" +msgstr "" + +#: ../../deprecated.rst:54 f8a714b644134acc857618d42422b411 +msgid ":index:`drawSquiggle` -- :meth:`Page.draw_squiggle`" +msgstr "" + +#: ../../deprecated.rst:55 93bb79f96d144fd38674d32e9aaa84c9 +msgid ":index:`drawSquiggle` -- :meth:`Shape.draw_squiggle`" +msgstr "" + +#: ../../deprecated.rst:56 cf3932da67be42dc8007fbee0a4427ec +msgid ":index:`drawZigzag` -- :meth:`Page.draw_zigzag`" +msgstr "" + +#: ../../deprecated.rst:57 3107e38c57054eeea4d24f265b0f6671 +msgid ":index:`drawZigzag` -- :meth:`Shape.draw_zigzag`" +msgstr "" + +#: ../../deprecated.rst:58 03b818471af840fdb95c8dc2d8e7b209 +msgid ":index:`embeddedFileAdd` -- :meth:`Document.embfile_add`" +msgstr "" + +#: ../../deprecated.rst:59 5167d2308e644b71a09de926761ece0f +msgid ":index:`embeddedFileCount` -- :meth:`Document.embfile_count`" +msgstr "" + +#: ../../deprecated.rst:60 161231ecd2224595bf3872c106a60136 +msgid ":index:`embeddedFileDel` -- :meth:`Document.embfile_del`" +msgstr "" + +#: ../../deprecated.rst:61 1023a942cc61494caba4804eeb29cebc +msgid ":index:`embeddedFileGet` -- :meth:`Document.embfile_get`" +msgstr "" + +#: ../../deprecated.rst:62 a52dfb754efa438ea2c232dc7130cf4b +msgid ":index:`embeddedFileInfo` -- :meth:`Document.embfile_info`" +msgstr "" + +#: ../../deprecated.rst:63 ef355611163d4b0bb14aaa00d18a0564 +msgid ":index:`embeddedFileNames` -- :meth:`Document.embfile_names`" +msgstr "" + +#: ../../deprecated.rst:64 999df931002c417da988825a29f420e0 +msgid ":index:`embeddedFileUpd` -- :meth:`Document.embfile_upd`" +msgstr "" + +#: ../../deprecated.rst:65 bf948d4f5d5f4caab2b7b01337d456e2 +msgid ":index:`extractFont` -- :meth:`Document.extract_font`" +msgstr "" + +#: ../../deprecated.rst:66 93fd021db67c403a8c52df83b26fea46 +msgid ":index:`extractImage` -- :meth:`Document.extract_image`" +msgstr "" + +#: ../../deprecated.rst:67 3021f35e288d4b7896cc4998b8d5cd4c +msgid ":index:`fileGet` -- :meth:`Annot.get_file`" +msgstr "" + +#: ../../deprecated.rst:68 e766fa14928e487d81044fb5bfec79a7 +msgid ":index:`fileUpd` -- :meth:`Annot.update_file`" +msgstr "" + +#: ../../deprecated.rst:69 054a45d2bd564e99936796752bc21ae2 +msgid ":index:`fillTextbox` -- :meth:`TextWriter.fill_textbox`" +msgstr "" + +#: ../../deprecated.rst:70 5611c1d6bebc4834ba3309d08e108fe4 +msgid ":index:`findBookmark` -- :meth:`Document.find_bookmark`" +msgstr "" + +#: ../../deprecated.rst:71 19590b26aef74066bd9089aaa473b418 +msgid ":index:`firstAnnot` -- :attr:`Page.first_annot`" +msgstr "" + +#: ../../deprecated.rst:72 11cd12cfd924436f9989b50bf24076ce +msgid ":index:`firstLink` -- :attr:`Page.first_link`" +msgstr "" + +#: ../../deprecated.rst:73 e0341c36f0474b29b165756ff2bf2e39 +msgid ":index:`firstWidget` -- :attr:`Page.first_widget`" +msgstr "" + +#: ../../deprecated.rst:74 3e529771a460436ca6ec8cc3553b1646 +msgid ":index:`fullcopyPage` -- :meth:`Document.fullcopy_page`" +msgstr "" + +#: ../../deprecated.rst:75 7a0f1fd1838049589f29544cbafbee6e +msgid ":index:`gammaWith` -- :meth:`Pixmap.gamma_with`" +msgstr "" + +#: ../../deprecated.rst:76 3e03080a489a4185ab0403f5e4a10533 +msgid ":index:`getArea` -- :meth:`Rect.get_area`" +msgstr "" + +#: ../../deprecated.rst:77 d08fd4faaef048ef9455a3dc22e0abe0 +msgid ":index:`getArea` -- :meth:`IRect.get_area`" +msgstr "" + +#: ../../deprecated.rst:78 8feb6c6033434a7eb93729dd7a214492 +msgid ":index:`getCharWidths` -- :meth:`Document.get_char_widths`" +msgstr "" + +#: ../../deprecated.rst:79 e820322f7d12476983ff2f6844edebb7 +msgid ":index:`getContents` -- :meth:`Page.get_contents`" +msgstr "" + +#: ../../deprecated.rst:80 315689e05e6b4ee79e9ee34b4855c9b3 +msgid ":index:`getDisplayList` -- :meth:`Page.get_displaylist`" +msgstr "" + +#: ../../deprecated.rst:81 59ef935a55fb48e0ad3d0b05fe9bba6a +msgid ":index:`getDrawings` -- :meth:`Page.get_drawings`" +msgstr "" + +#: ../../deprecated.rst:82 fdac52da208b4710a3dcf0603259e088 +msgid ":index:`getFontList` -- :meth:`Page.get_fonts`" +msgstr "" + +#: ../../deprecated.rst:83 1cb6609cac5344bbb52338df5723abbb +msgid ":index:`getImageBbox` -- :meth:`Page.get_image_bbox`" +msgstr "" + +#: ../../deprecated.rst:84 b2db41e3b28c49fe85ada1aaa5d11c93 +msgid ":index:`getImageData` -- :meth:`Pixmap.tobytes`" +msgstr "" + +#: ../../deprecated.rst:85 4016e42ec02d47a98cd479a56c1d6f81 +msgid ":index:`getImageList` -- :meth:`Page.get_images`" +msgstr "" + +#: ../../deprecated.rst:86 ba9df1a02bbb41d699430071a0853142 +msgid ":index:`getLinks` -- :meth:`Page.get_links`" +msgstr "" + +#: ../../deprecated.rst:87 e45e78c952c149b99893f2bdf0464ce9 +msgid ":index:`getOCGs` -- :meth:`Document.get_ocgs`" +msgstr "" + +#: ../../deprecated.rst:88 8cb86892aeb14018b456bb1500228603 +msgid ":index:`getPageFontList` -- :meth:`Document.get_page_fonts`" +msgstr "" + +#: ../../deprecated.rst:89 def3f6e7f54a48d584b35944ac483909 +msgid ":index:`getPageImageList` -- :meth:`Document.get_page_images`" +msgstr "" + +#: ../../deprecated.rst:90 e60fafcb1644487f85f736966a96e6f3 +msgid ":index:`getPagePixmap` -- :meth:`Document.get_page_pixmap`" +msgstr "" + +#: ../../deprecated.rst:91 404571c66f3f4449b253bac469681083 +msgid ":index:`getPageText` -- :meth:`Document.get_page_text`" +msgstr "" + +#: ../../deprecated.rst:92 d912b4e08d8e43759b50bd3fe66bf799 +msgid ":index:`getPageXObjectList` -- :meth:`Document.get_page_xobjects`" +msgstr "" + +#: ../../deprecated.rst:93 b989c010b0e941358950411ecae89d0d +msgid ":index:`getPDFnow` -- :meth:`get_pdf_now`" +msgstr "" + +#: ../../deprecated.rst:94 afc6743031c3459a8b023575e22108ce +msgid ":index:`getPDFstr` -- :meth:`get_pdf_str`" +msgstr "" + +#: ../../deprecated.rst:95 4513c128c5d24612b222b6bf32c9ba78 +msgid ":index:`getPixmap` -- :meth:`Page.get_pixmap`" +msgstr "" + +#: ../../deprecated.rst:96 8e17558bf3a7484da7e142427292d305 +msgid ":index:`getPixmap` -- :meth:`Annot.get_pixmap`" +msgstr "" + +#: ../../deprecated.rst:97 aaaba69a72e34566b856f031a3cc9f3b +msgid ":index:`getPixmap` -- :meth:`DisplayList.get_pixmap`" +msgstr "" + +#: ../../deprecated.rst:98 b6a103d877a149f78ef58e60d1576a51 +msgid ":index:`getPNGData` -- :meth:`Pixmap.tobytes`" +msgstr "" + +#: ../../deprecated.rst:99 8991d6c4e3dc4accaf8c4e5726f4c4a7 +msgid ":index:`getPNGdata` -- :meth:`Pixmap.tobytes`" +msgstr "" + +#: ../../deprecated.rst:100 d37fe89c453843e9bbc3c6518487b009 +msgid ":index:`getRectArea` -- :meth:`Rect.get_area`" +msgstr "" + +#: ../../deprecated.rst:101 31cafead100b4a0eab043b8db48315c9 +msgid ":index:`getRectArea` -- :meth:`IRect.get_area`" +msgstr "" + +#: ../../deprecated.rst:102 18a3052627f84e0d9076a72aaedd1f56 +msgid ":index:`getSigFlags` -- :meth:`Document.get_sigflags`" +msgstr "" + +#: ../../deprecated.rst:103 c8a9c4bf70704bc09d700cd72ee9ab2b +msgid ":index:`getSVGimage` -- :meth:`Page.get_svg_image`" +msgstr "" + +#: ../../deprecated.rst:104 962d959e225d4325824133ce80ea7844 +msgid ":index:`getText` -- :meth:`Page.get_text`" +msgstr "" + +#: ../../deprecated.rst:105 d84d77c264e44b28bf9d043ea4bc2a40 +msgid ":index:`getText` -- :meth:`Annot.get_text`" +msgstr "" + +#: ../../deprecated.rst:106 0e7807a62c5e48aab9c53b62bc99d5f5 +msgid ":index:`getTextBlocks` -- :meth:`Page.get_text_blocks`" +msgstr "" + +#: ../../deprecated.rst:107 eaa539c4b1e643f1bb4d8541e3c2cae0 +msgid ":index:`getTextbox` -- :meth:`Page.get_textbox`" +msgstr "" + +#: ../../deprecated.rst:108 73c3be87d16b4ce1ab32e14a43c83625 +msgid ":index:`getTextbox` -- :meth:`Annot.get_textbox`" +msgstr "" + +#: ../../deprecated.rst:109 57184e466926474a8122c92d716efc01 +msgid ":index:`getTextLength` -- :meth:`get_text_length`" +msgstr "" + +#: ../../deprecated.rst:110 ef0865561e5245caaf7cad5c31fa5b7a +msgid ":index:`getTextPage` -- :meth:`Page.get_textpage`" +msgstr "" + +#: ../../deprecated.rst:111 145ac3860c93466f80ff59355fc3ff5a +msgid ":index:`getTextPage` -- :meth:`Annot.get_textpage`" +msgstr "" + +#: ../../deprecated.rst:112 9751cae710914d3bab31ee6ba59c5991 +msgid ":index:`getTextPage` -- :meth:`DisplayList.get_textpage`" +msgstr "" + +#: ../../deprecated.rst:113 5f860f6e22a941eca21077328b814fea +msgid ":index:`getTextWords` -- :meth:`Page.get_text_words`" +msgstr "" + +#: ../../deprecated.rst:114 285cb38b768c4499b093536527cca417 +msgid ":index:`getToC` -- :meth:`Document.get_toc`" +msgstr "" + +#: ../../deprecated.rst:115 06baade7ba234e409910b5725a990d12 +msgid ":index:`getXmlMetadata` -- :meth:`Document.get_xml_metadata`" +msgstr "" + +#: ../../deprecated.rst:116 9fcdf92180a34622be5d144da780ea7a +msgid ":index:`ImageProperties` -- :meth:`image_properties`" +msgstr "" + +#: ../../deprecated.rst:117 f394aac5cf1143c38ce86651288f35f0 +msgid ":index:`includePoint` -- :meth:`Rect.include_point`" +msgstr "" + +#: ../../deprecated.rst:118 570fa5daadf74e338f232b7b4971d361 +msgid ":index:`includePoint` -- :meth:`IRect.include_point`" +msgstr "" + +#: ../../deprecated.rst:119 8ddaec2281cd46aa8b6c4dcff8562392 +msgid ":index:`includeRect` -- :meth:`Rect.include_rect`" +msgstr "" + +#: ../../deprecated.rst:120 ff5100ed366b479f96fb5c7fedb20a24 +msgid ":index:`includeRect` -- :meth:`IRect.include_rect`" +msgstr "" + +#: ../../deprecated.rst:121 0dfb59dc42064652be63a6368543020e +msgid ":index:`insertFont` -- :meth:`Page.insert_font`" +msgstr "" + +#: ../../deprecated.rst:122 d4a5843f50214d2880c0e10589ce890f +msgid ":index:`insertImage` -- :meth:`Page.insert_image`" +msgstr "" + +#: ../../deprecated.rst:123 6f95549ce82d47e5ba18bbb14215c08d +msgid ":index:`insertLink` -- :meth:`Page.insert_link`" +msgstr "" + +#: ../../deprecated.rst:124 b5e3f22cdb294479ade281982639843b +msgid ":index:`insertPage` -- :meth:`Document.insert_page`" +msgstr "" + +#: ../../deprecated.rst:125 030309c3c03340bab039d374155ef986 +msgid ":index:`insertPDF` -- :meth:`Document.insert_pdf`" +msgstr "" + +#: ../../deprecated.rst:126 df931ba7d0564c688d145b7f8a2871e0 +msgid ":index:`insertText` -- :meth:`Page.insert_text`" +msgstr "" + +#: ../../deprecated.rst:127 17b568407a8e44f8a855291d34cf1d90 +msgid ":index:`insertText` -- :meth:`Shape.insert_text`" +msgstr "" + +#: ../../deprecated.rst:128 561e6f7e79fa47f98b59b04cc29ee2f6 +msgid ":index:`insertTextbox` -- :meth:`Page.insert_textbox`" +msgstr "" + +#: ../../deprecated.rst:129 54f104892aa04a8993366721a1017bd0 +msgid ":index:`insertTextbox` -- :meth:`Shape.insert_textbox`" +msgstr "" + +#: ../../deprecated.rst:130 8f6ad8ae4e3441d0bbe6ceddd469e8a7 +msgid ":index:`invertIRect` -- :meth:`Pixmap.invert_irect`" +msgstr "" + +#: ../../deprecated.rst:131 3c2a3d820abe454a99bf6a608d7f9750 +msgid ":index:`isConvex` -- :attr:`Quad.is_convex`" +msgstr "" + +#: ../../deprecated.rst:132 ff4a100633ac4aef8956ee89fea3928c +msgid ":index:`isDirty` -- :attr:`Document.is_dirty`" +msgstr "" + +#: ../../deprecated.rst:133 b3d2162130f7469da99822ab533562fb +msgid ":index:`isEmpty` -- :attr:`Rect.is_empty`" +msgstr "" + +#: ../../deprecated.rst:134 7902fb832f4d46148e5fb12d33180721 +msgid ":index:`isEmpty` -- :attr:`IRect.is_empty`" +msgstr "" + +#: ../../deprecated.rst:135 219e226b694b4424a8cc0b243125195b +msgid ":index:`isEmpty` -- :attr:`Quad.is_empty`" +msgstr "" + +#: ../../deprecated.rst:136 6d2a36ec88b848888e47502f443ddd2f +msgid ":index:`isFormPDF` -- :attr:`Document.is_form_pdf`" +msgstr "" + +#: ../../deprecated.rst:137 ee6a8794ac454167bf112c26aee0679e +msgid ":index:`isInfinite` -- :attr:`Rect.is_infinite`" +msgstr "" + +#: ../../deprecated.rst:138 0e449ebf4dbf41c3a5684705c4764d1e +msgid ":index:`isInfinite` -- :attr:`IRect.is_infinite`" +msgstr "" + +#: ../../deprecated.rst:139 bb7e257c9c5d4c649588be9dcc1539e8 +msgid ":index:`isPDF` -- :attr:`Document.is_pdf`" +msgstr "" + +#: ../../deprecated.rst:140 6034a32b2cd641c587f398e466223fbb +msgid ":index:`isRectangular` -- :attr:`Quad.is_rectangular`" +msgstr "" + +#: ../../deprecated.rst:141 e04e5574d4294c58a6e2e047385e4bab +msgid ":index:`isRectilinear` -- :attr:`Matrix.is_rectilinear`" +msgstr "" + +#: ../../deprecated.rst:142 72f57f01824e4c2090e4cecaaa3bd7b3 +msgid ":index:`isReflowable` -- :attr:`Document.is_reflowable`" +msgstr "" + +#: ../../deprecated.rst:143 6f1427d4d36448bea20405dd494e4e9b +msgid ":index:`isRepaired` -- :attr:`Document.is_repaired`" +msgstr "" + +#: ../../deprecated.rst:144 46501c7649d2435db16b4ba2b180de44 +msgid ":index:`isStream` -- :meth:`Document.is_stream`" +msgstr "" + +#: ../../deprecated.rst:145 d8c668fe2a6a41368d3753de325db923 +msgid ":index:`lastLocation` -- :attr:`Document.last_location`" +msgstr "" + +#: ../../deprecated.rst:146 23b2e4723f784739b87a95d0e621af1b +msgid ":index:`lineEnds` -- :attr:`Annot.line_ends`" +msgstr "" + +#: ../../deprecated.rst:147 9f84a5afe9f94be08d591804cdb1f2dd +msgid ":index:`loadAnnot` -- :meth:`Page.load_annot`" +msgstr "" + +#: ../../deprecated.rst:148 a9b23d3b3f6343dcb80e169000f2b2f5 +msgid ":index:`loadLinks` -- :meth:`Page.load_links`" +msgstr "" + +#: ../../deprecated.rst:149 25f9456cfe284e5fb321a13a57937aff +msgid ":index:`loadPage` -- :meth:`Document.load_page`" +msgstr "" + +#: ../../deprecated.rst:150 9c522c046f63431fad4d3d4dcae73d31 +msgid ":index:`makeBookmark` -- :meth:`Document.make_bookmark`" +msgstr "" + +#: ../../deprecated.rst:151 89dd42c792e44a5db93d7cf5e8d1bdd0 +msgid ":index:`MediaBox` -- :attr:`Page.mediabox`" +msgstr "" + +#: ../../deprecated.rst:152 2ce8cf40024f432a8a6d298e8396809e +msgid ":index:`MediaBoxSize` -- :attr:`Page.mediabox_size`" +msgstr "" + +#: ../../deprecated.rst:153 fc3637603edc44449553dd1fc10ef2dc +msgid ":index:`metadataXML` -- :meth:`Document.xref_xml_metadata`" +msgstr "" + +#: ../../deprecated.rst:154 4734ad7ca35e4295bc39b9343fae96bf +msgid ":index:`movePage` -- :meth:`Document.move_page`" +msgstr "" + +#: ../../deprecated.rst:155 29dc231050284695b482c40ab67035cd +msgid ":index:`needsPass` -- :attr:`Document.needs_pass`" +msgstr "" + +#: ../../deprecated.rst:156 b7bbcca1b65140f7a23f06023146091a +msgid ":index:`newPage` -- :meth:`Document.new_page`" +msgstr "" + +#: ../../deprecated.rst:157 7b9a25eacb194c1085cd415855c4860e +msgid ":index:`newShape` -- :meth:`Page.new_shape`" +msgstr "" + +#: ../../deprecated.rst:158 8173575617414d0989cd5e7b5dbe4825 +msgid ":index:`nextLocation` -- :meth:`Document.next_location`" +msgstr "" + +#: ../../deprecated.rst:159 0b83f1a78c974acba949f3651c3987ca +msgid ":index:`pageCount` -- :attr:`Document.page_count`" +msgstr "" + +#: ../../deprecated.rst:160 d347647e7c4845689c148b6687cbe86e +msgid ":index:`pageCropBox` -- :meth:`Document.page_cropbox`" +msgstr "" + +#: ../../deprecated.rst:161 07ab9db3d77249908e0570a9f601b00b +msgid ":index:`pageXref` -- :meth:`Document.page_xref`" +msgstr "" + +#: ../../deprecated.rst:162 c48b011bfa7a49f4a715474371748d0d +msgid ":index:`PaperRect` -- :meth:`paper_rect`" +msgstr "" + +#: ../../deprecated.rst:163 794d8f18e4b34f019a7e5a1eff5f2db4 +msgid ":index:`PaperSize` -- :meth:`paper_size`" +msgstr "" + +#: ../../deprecated.rst:164 912bd5afa4174ad7ad07cacc9d16aef4 +msgid ":index:`paperSizes` -- :attr:`paper_sizes`" +msgstr "" + +#: ../../deprecated.rst:165 b03e09c6342c41299da1cdecfe87a1fc +msgid ":index:`PDFCatalog` -- :meth:`Document.pdf_catalog`" +msgstr "" + +#: ../../deprecated.rst:166 962d9ba05d8841d1b1c66b3fc304d6f8 +msgid ":index:`PDFTrailer` -- :meth:`Document.pdf_trailer`" +msgstr "" + +#: ../../deprecated.rst:167 2f4783d82c5a4d14b0f377fcffb5fed8 +msgid ":index:`pillowData` -- :meth:`Pixmap.pil_tobytes`" +msgstr "" + +#: ../../deprecated.rst:168 baf9bf3a1e884ddb99139199010c1f5b +msgid ":index:`pillowWrite` -- :meth:`Pixmap.pil_save`" +msgstr "" + +#: ../../deprecated.rst:169 96c05e011f1242b9b8163d060e11876a +msgid ":index:`planishLine` -- :meth:`planish_line`" +msgstr "" + +#: ../../deprecated.rst:170 68bd0d9d10734094b2e9dbc55162e13a +msgid ":index:`preRotate` -- :meth:`Matrix.prerotate`" +msgstr "" + +#: ../../deprecated.rst:171 8bdcf39ac3a04ec584a07c3670e6c1c7 +msgid ":index:`preScale` -- :meth:`Matrix.prescale`" +msgstr "" + +#: ../../deprecated.rst:172 cf845bc2492648df9b941b7ef7db68b6 +msgid ":index:`preShear` -- :meth:`Matrix.preshear`" +msgstr "" + +#: ../../deprecated.rst:173 14c5026144d84103b74a07b30fee2512 +msgid ":index:`preTranslate` -- :meth:`Matrix.pretranslate`" +msgstr "" + +#: ../../deprecated.rst:174 fc0559ee8c6343eea94b1b7e3948e03e +msgid ":index:`previousLocation` -- :meth:`Document.prev_location`" +msgstr "" + +#: ../../deprecated.rst:175 27731a7d4029401da7bc531da68f1a21 +msgid ":index:`readContents` -- :meth:`Page.read_contents`" +msgstr "" + +#: ../../deprecated.rst:176 133ffd4b0d204ec4b93e2e4cfafcb76f +msgid ":index:`resolveLink` -- :meth:`Document.resolve_link`" +msgstr "" + +#: ../../deprecated.rst:177 dd161914317344ab9c50aa1713b3bc8a +msgid ":index:`rotationMatrix` -- :attr:`Page.rotation_matrix`" +msgstr "" + +#: ../../deprecated.rst:178 12c65e9ce74e4188ade3e84773fff4a4 +msgid ":index:`searchFor` -- :meth:`Page.search_for`" +msgstr "" + +#: ../../deprecated.rst:179 7ebba037060c4ccb94fb6611ec91a95c +msgid ":index:`searchPageFor` -- :meth:`Document.search_page_for`" +msgstr "" + +#: ../../deprecated.rst:180 1bd92b7e92794d7ea9fd5fe12a917fb5 +msgid ":index:`setAlpha` -- :meth:`Pixmap.set_alpha`" +msgstr "" + +#: ../../deprecated.rst:181 d939772fbf0440968d64b115e336901b +msgid ":index:`setBlendMode` -- :meth:`Annot.set_blendmode`" +msgstr "" + +#: ../../deprecated.rst:182 eee4c968e1ce440b9bca3317f1c97c2b +msgid ":index:`setBorder` -- :meth:`Annot.set_border`" +msgstr "" + +#: ../../deprecated.rst:183 8972027ca4c64c239a0ac5796eda537e +msgid ":index:`setColors` -- :meth:`Annot.set_colors`" +msgstr "" + +#: ../../deprecated.rst:184 25dcdb414c364adcaecd75bbad18538b +msgid ":index:`setCropBox` -- :meth:`Page.set_cropbox`" +msgstr "" + +#: ../../deprecated.rst:185 3478d381553d46b98ad378b06d3535d7 +msgid ":index:`setFlags` -- :meth:`Annot.set_flags`" +msgstr "" + +#: ../../deprecated.rst:186 a3f6ed932990480d8ef802f1aa8dc611 +msgid ":index:`setInfo` -- :meth:`Annot.set_info`" +msgstr "" + +#: ../../deprecated.rst:187 399b02fa3d8645c9a0e6a63968a49ab6 +msgid ":index:`setLanguage` -- :meth:`Document.set_language`" +msgstr "" + +#: ../../deprecated.rst:188 15718079cc2847dea27b601e918c69d2 +msgid ":index:`setLineEnds` -- :meth:`Annot.set_line_ends`" +msgstr "" + +#: ../../deprecated.rst:189 c64721b035484fb7afc912582cdde029 +msgid ":index:`setMediaBox` -- :meth:`Page.set_mediabox`" +msgstr "" + +#: ../../deprecated.rst:190 dda850ad8d874c678fc63fb40bdf18f0 +msgid ":index:`setMetadata` -- :meth:`Document.set_metadata`" +msgstr "" + +#: ../../deprecated.rst:191 da20869148a84b22aae622c6cf2f1a94 +msgid ":index:`setName` -- :meth:`Annot.set_name`" +msgstr "" + +#: ../../deprecated.rst:192 3465192236a64449886c7a641b6ae7e4 +msgid ":index:`setOC` -- :meth:`Annot.set_oc`" +msgstr "" + +#: ../../deprecated.rst:193 565618b0eeda4dab9fcb83f7cb66ced6 +msgid ":index:`setOpacity` -- :meth:`Annot.set_opacity`" +msgstr "" + +#: ../../deprecated.rst:194 0da4f191ff2a4ee6b4dcd65f2389eaa2 +msgid ":index:`setOrigin` -- :meth:`Pixmap.set_origin`" +msgstr "" + +#: ../../deprecated.rst:195 18647efea53342d5b7557f7cae05e4a1 +msgid ":index:`setPixel` -- :meth:`Pixmap.set_pixel`" +msgstr "" + +#: ../../deprecated.rst:196 a24782160fb141e8ac764c333d987674 +msgid ":index:`setRect` -- :meth:`Annot.set_rect`" +msgstr "" + +#: ../../deprecated.rst:197 b6d40d4e98674081b1a9af6708231fdf +msgid ":index:`setRect` -- :meth:`Pixmap.set_rect`" +msgstr "" + +#: ../../deprecated.rst:198 5c6f3e60a4784efe97eec74f3efead31 +msgid ":index:`setResolution` -- :meth:`Pixmap.set_dpi`" +msgstr "" + +#: ../../deprecated.rst:199 d3cf7265bb0345caaa35cee0ccec1f58 +msgid ":index:`setRotation` -- :meth:`Page.set_rotation`" +msgstr "" + +#: ../../deprecated.rst:200 1573ef9d77b343afb19a92a27d87756e +msgid ":index:`setToC` -- :meth:`Document.set_toc`" +msgstr "" + +#: ../../deprecated.rst:201 458d23fe34464a6595998b7eb1018338 +msgid ":index:`setXmlMetadata` -- :meth:`Document.set_xml_metadata`" +msgstr "" + +#: ../../deprecated.rst:202 dbff7623129443ae909b28be52fdaacb +msgid ":index:`showPDFpage` -- :meth:`Page.show_pdf_page`" +msgstr "" + +#: ../../deprecated.rst:203 7e7c0909bd734d378738029819134255 +msgid ":index:`soundGet` -- :meth:`Annot.get_sound`" +msgstr "" + +#: ../../deprecated.rst:204 8161ab61491b42ad924a02953ddd540b +msgid ":index:`tintWith` -- :meth:`Pixmap.tint_with`" +msgstr "" + +#: ../../deprecated.rst:205 315914d9d5214bf9bb07a4f746a91ac6 +msgid ":index:`transformationMatrix` -- :attr:`Page.transformation_matrix`" +msgstr "" + +#: ../../deprecated.rst:206 8e9b02db2d39463b950c2d3bbff108a3 +msgid ":index:`updateLink` -- :meth:`Page.update_link`" +msgstr "" + +#: ../../deprecated.rst:207 8263c336e9bb402ca9cbd9086d43d75b +msgid ":index:`updateObject` -- :meth:`Document.update_object`" +msgstr "" + +#: ../../deprecated.rst:208 6ae4d97e8c5e4da99256aeead52810f1 +msgid ":index:`updateStream` -- :meth:`Document.update_stream`" +msgstr "" + +#: ../../deprecated.rst:209 73f4ed438af740348f883a9f36ca6474 +msgid ":index:`wrapContents` -- :meth:`Page.wrap_contents`" +msgstr "" + +#: ../../deprecated.rst:210 78a6c29c00914e158bb52d47d3e67380 +msgid ":index:`writeImage` -- :meth:`Pixmap.save`" +msgstr "" + +#: ../../deprecated.rst:211 43019b552508422699e7de4d4adc0be3 +msgid ":index:`writePNG` -- :meth:`Pixmap.save`" +msgstr "" + +#: ../../deprecated.rst:212 534c176a9f984bbba09535e897312f16 +msgid ":index:`writeText` -- :meth:`Page.write_text`" +msgstr "" + +#: ../../deprecated.rst:213 cd67b2d738be4ecd8d5087581b2752ec +msgid ":index:`writeText` -- :meth:`TextWriter.write_text`" +msgstr "" + +#: ../../deprecated.rst:214 63dbb736b48743d28f2b8f6a1feac7f0 +msgid ":index:`xrefLength` -- :meth:`Document.xref_length`" +msgstr "" + +#: ../../deprecated.rst:215 e817cc360dfe4f009e7c7b400c69193d +msgid ":index:`xrefObject` -- :meth:`Document.xref_object`" +msgstr "" + +#: ../../deprecated.rst:216 7c0ef03c3a82406985b37ac2a4840260 +msgid ":index:`xrefStream` -- :meth:`Document.xref_stream`" +msgstr "" + +#: ../../deprecated.rst:217 c3fdec8f544048ccbff884ecff95a30c +msgid ":index:`xrefStreamRaw` -- :meth:`Document.xref_stream_raw`" +msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/device.mo b/docs/locales/ja/LC_MESSAGES/device.mo new file mode 100644 index 000000000..d113a5316 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/device.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/device.po b/docs/locales/ja/LC_MESSAGES/device.po new file mode 100644 index 000000000..8059dab66 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/device.po @@ -0,0 +1,101 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 55167d0c5e4948eabf41a2e0ad7fff8a +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 d1afc8b9b26740989ade65fddf4c784c +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 fc4a52fdadd6458e894b792b0f04f4d1 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../device.rst:7 e5be8af0be364d21a95eff2daec5e5d1 +msgid "Device" +msgstr "Device (デバイス)" + +#: ../../device.rst:9 c79f0e0250fc4ea296a521429e0863db +msgid "" +"The different format handlers (pdf, xps, etc.) interpret pages to a " +"\"device\". Devices are the basis for everything that can be done with a " +"page: rendering, text extraction and searching. The device type is " +"determined by the selected construction method." +msgstr "" +"異なるフォーマットハンドラ(pdf、xps " +"など)はページを「デバイス」に解釈します。デバイスはページで行うすべてのことの基盤であり、レンダリング、テキスト抽出、検索などが行えます。デバイスのタイプは選択した構築方法によって決まります。" + +#: ../../device.rst:11 e1ec859236f84532835550a8d0d4f793 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../device.rst:17 bdb2f8e55f6f4e9582764fb173e2584e +msgid "Constructor for either a pixel map or a display list device." +msgstr "ピクセルマップまたは表示リストデバイスのコンストラクタ。" + +#: ../../device.rst 25de13f5cdbc4f6fac5f18832215e384 +#: 543e28e5ab6b4c8db6ae54916f8223e5 +msgid "Parameters" +msgstr "" + +#: ../../device.rst:19 df3b8b3f3fda4ed685d15456fcbbc98b +msgid "either a ``Pixmap`` or a ``DisplayList``." +msgstr "``Pixmap`` または ``DisplayList`` のいずれか。" + +#: ../../device.rst:22 21adb78e90cf4d948936ecce99c0f28e +msgid "" +"An optional `IRect` for ``Pixmap`` devices to restrict rendering to a " +"certain area of the page. If the complete page is required, specify " +"``None``. For display list devices, this parameter must be omitted." +msgstr "" +"``Pixmap`` デバイス用の任意の `IRect` " +"で、ページの特定の領域にレンダリングを制限するためのものです。完全なページが必要な場合は、``None`` " +"を指定してください。表示リストデバイスの場合、このパラメータは省略する必要があります。" + +#: ../../device.rst:27 455c165a85814a96b91272c070f1afcc +msgid "Constructor for a text page device." +msgstr "テキストページデバイスのコンストラクタ。" + +#: ../../device.rst:29 5a36c4d50ef34335b590bbe855dda21d +msgid "``TextPage`` object" +msgstr "TextPageオブジェクト" + +#: ../../device.rst:32 fb3e0d3553a54959b420fe3c83ce17a0 +msgid "" +"control the way how text is parsed into the text page. Currently 3 " +"options can be coded into this parameter, see :ref:`TextPreserve`. To set" +" these options use something like ``flags=0 | TEXT_PRESERVE_LIGATURES | " +"...``." +msgstr "テキストがテキストページに解析される方法を制御するフラグです" + +#: ../../footer.rst:60 ade0e54ff3ee44398c800affaa5a276f +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/displaylist.mo b/docs/locales/ja/LC_MESSAGES/displaylist.mo new file mode 100644 index 000000000..f59b5b8c7 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/displaylist.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/displaylist.po b/docs/locales/ja/LC_MESSAGES/displaylist.po new file mode 100644 index 000000000..32d5242cd --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/displaylist.po @@ -0,0 +1,262 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 85ee81383b5446bab311d8f7b3c8fcac +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 2aadbeed28004bc5b1af3cafca54cd49 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 5f03ea6228c540d490e7e21dbe7a682e +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../displaylist.rst:7 ffd953b6fbc64a39866787a0edce72c2 +msgid "DisplayList" +msgstr "DisplayList(ディスプレイリスト)" + +#: ../../displaylist.rst:9 a98bdb15facb45eca03a302e65321649 +msgid "" +"DisplayList is a list containing drawing commands (text, images, etc.). " +"The intent is two-fold:" +msgstr "DisplayListは、描画コマンド(テキスト、画像など)を含むリストです。その目的は二つあります:" + +#: ../../displaylist.rst:11 f34b2f44389140eca2d21b0d7c566835 +msgid "as a caching-mechanism to reduce parsing of a page" +msgstr "ページのパースを削減するためのキャッシュメカニズムとして" + +#: ../../displaylist.rst:12 11fd7eeaa74c41e38f44f93fe53f799d +msgid "" +"as a data structure in multi-threading setups, where one thread parses " +"the page and another one renders pages. This aspect is currently not " +"supported by PyMuPDF." +msgstr "ページをパースするスレッドとページをレンダリングする別のスレッドでのデータ構造として。この側面は現在、PyMuPDFではサポートされていません。" + +#: ../../displaylist.rst:14 f47fc55997964be8872823d0d68a9434 +msgid "" +"A display list is populated with objects from a page, usually by " +"executing :meth:`Page.get_displaylist`. There also exists an independent " +"constructor." +msgstr "" +"ディスプレイリストは通常、:meth:`Page.get_displaylist` " +"を実行することによってページからオブジェクトで満たされます。また、独立したコンストラクタも存在します。" + +#: ../../displaylist.rst:16 478941b4f0a74dd5ac00af8a2e5de8e0 +msgid "" +"\"Replay\" the list (once or many times) by invoking one of its methods " +":meth:`~DisplayList.run`, :meth:`~DisplayList.get_pixmap` or " +":meth:`~DisplayList.get_textpage`." +msgstr "" +"このリストを(一度または複数回)再生するには、そのメソッドのうちのいずれかを呼び出します。:meth:`~DisplayList.run` " +"、:meth:`~DisplayList.get_pixmap` 、または :meth:`~DisplayList.get_textpage` " +"です。" + +#: ../../displaylist.rst:20 b035d30ab932458c89921fc2091ca6ed +msgid "**Method**" +msgstr "**メソッド** " + +#: ../../displaylist.rst:20 9ec33062cfa84f33adb5ca985df1b62a +msgid "**Short Description**" +msgstr "**短い説明** " + +#: ../../displaylist.rst:22 f063e9c7368a4d1bb4efbf9e02d9a7b0 +msgid ":meth:`~DisplayList.run`" +msgstr "" + +#: ../../displaylist.rst:22 916c809bdfae4d9b912c30cda33c7d3b +msgid "Run a display list through a device." +msgstr "ディスプレイリストをデバイスを介して実行します。" + +#: ../../displaylist.rst:23 a412fe84e6a943109a82c9872dc2a953 +msgid ":meth:`~DisplayList.get_pixmap`" +msgstr "" + +#: ../../displaylist.rst:23 b2523139914a4541a0c3f79442bf5c6f +msgid "generate a pixmap" +msgstr "ピクスマップを生成します。" + +#: ../../displaylist.rst:24 5bfdb590999b469981c332ada5769bbf +msgid ":meth:`~DisplayList.get_textpage`" +msgstr "" + +#: ../../displaylist.rst:24 24b92e98327b4214865e137898c355c1 +msgid "generate a text page" +msgstr "テキストページを生成します。" + +#: ../../displaylist.rst:25 758211c051284705afa9e186fdea6e54 +msgid ":attr:`~DisplayList.rect`" +msgstr "" + +#: ../../displaylist.rst:25 8247482dba784fabb5f89ce11f17a64c +msgid "mediabox of the display list" +msgstr "ディスプレイリストのメディアボックス(表示領域)です。" + +#: ../../displaylist.rst:29 879da44f5b924a8299e2cdf2daa64852 +msgid "**Class API**" +msgstr "**Class API(クラスAPI)** " + +#: ../../displaylist.rst:35 7024f17715094e0bac4657dc96d778fe +msgid "Create a new display list." +msgstr "新しいディスプレイリストを作成します。" + +#: ../../displaylist.rst 368347bd6e354c7286c6d761323614c6 +#: 5cffa7e1860f48d2b2b02af84396e8b7 a68f36c6fdbd4638841bd4b5d14e01c5 +#: fbf69acf127c475684179bebec45b555 +msgid "Parameters" +msgstr "パラメーター" + +#: ../../displaylist.rst:37 1a15f2e868934ebfbb706101234b942c +msgid "The page's rectangle." +msgstr "ページの矩形。" + +#: ../../displaylist.rst 4aa7e95bfb304eb786873dc128080192 +#: 9628266c652c43e397ab17aff7b09d9a fdc8f4060e4044a4bba226ce56c05efe +msgid "Return type" +msgstr "戻り値の型" + +#: ../../displaylist.rst:40 136f549e2738445d8d830e101523615c +msgid "``DisplayList``" +msgstr "" + +#: ../../displaylist.rst:44 a708030e4a714162a183122ff845f300 +msgid "" +"Run the display list through a device. The device will populate the " +"display list with its \"commands\" (i.e. text extraction or image " +"creation). The display list can later be used to \"read\" a page many " +"times without having to re-interpret it from the document file." +msgstr "デバイスを介してディスプレイリストを実行します。デバイスはディスプレイリストをその「コマンド」(つまりテキストの抽出または画像の作成)で満たします。ディスプレイリストは後で文書ファイルから再解釈することなく、ページを多くの回数「読む」ために使用できます。" + +#: ../../displaylist.rst:46 4c8291909a844ff4a2f2ab416252270b +msgid "" +"You will most probably instead use one of the specialized run methods " +"below -- :meth:`get_pixmap` or :meth:`get_textpage`." +msgstr "" +"おそらく、以下の専門の実行メソッドの1つを代わりに使用するでしょう - :meth:`get_pixmap` または " +":meth:`get_textpage` 。" + +#: ../../displaylist.rst:48 4f7a4933e8924fe0b4bf3ba52b68b918 +msgid "Device" +msgstr "デバイス" + +#: ../../displaylist.rst:51 c7fc73de742e46aaa1a3bfac323ffe52 +msgid "Transformation matrix to apply to the display list contents." +msgstr "ディスプレイリストのコンテンツに適用する変換行列。" + +#: ../../displaylist.rst:54 8d876f1c867a4ba9aab91aa8844f11ed +msgid "" +"Only the part visible within this area will be considered when the list " +"is run through the device." +msgstr "デバイスを介してリストを実行する際にこの領域内で可視部分のみが考慮されます。" + +#: ../../displaylist.rst:65 4cd71203402f42c084c05537b4069b8e +msgid "Run the display list through a draw device and return a pixmap." +msgstr "ディスプレイリストを描画デバイスを介して実行し、ピクスマップを返します。" + +#: ../../displaylist.rst:67 40ffabb8733d4c159fb9c1bf5f123b7a +msgid "matrix to use. Default is the identity matrix." +msgstr "使用する行列。デフォルトは単位行列です。" + +#: ../../displaylist.rst:70 9f80e04939df4e869a6aae307600f9b9 +msgid "the desired colorspace. Default is RGB." +msgstr "望ましいカラースペース。デフォルトはRGBです。" + +#: ../../displaylist.rst:73 004942119bb548268104bdbf345c6626 +msgid "determine whether or not (0, default) to include a transparency channel." +msgstr "透明チャネルを含めるかどうかを決定します(0、デフォルトは含めない)。" + +#: ../../displaylist.rst:75 63499de6adb349218d9c228d37571c9b +msgid "" +"restrict rendering to the intersection of this area with " +":attr:`DisplayList.rect`." +msgstr ":attr:`DisplayList.rect` との交差部分にレンダリングを制限します。" + +#: ../../displaylist.rst:77 2b4519ceb62d4ff5aaf26469b6ad27e7 +msgid ":ref:`Pixmap`" +msgstr "" + +#: ../../displaylist.rst 9e12b4a400bd4f28a64154727e1341e8 +#: dfd4f14a1c4c40b4b4593b30d2801610 +msgid "Returns" +msgstr "戻り値" + +#: ../../displaylist.rst:78 4d9e816a554e498f8b4ced5ead7fae63 +msgid "pixmap of the display list." +msgstr "ディスプレイリストのピクスマップ。" + +#: ../../displaylist.rst:82 d201e153d5a74b6286a3f24dd07771e4 +msgid "Run the display list through a text device and return a text page." +msgstr "ディスプレイリストをテキストデバイスを介して実行し、テキストページを返します。" + +#: ../../displaylist.rst:84 f7168bc2140948b1b23ee9a6555aa19c +msgid "" +"control which information is parsed into a text page. Default value in " +"PyMuPDF is `3 = TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE`, i.e." +" :data:`ligatures` are **passed through**, white spaces are **passed " +"through** (not translated to spaces), and images are **not included**. " +"See :ref:`TextPreserve`." +msgstr "" +"テキストページに解析される情報を制御します。PyMuPDFのデフォルト値は `3 = TEXT_PRESERVE_LIGATURES | " +"TEXT_PRESERVE_WHITESPACE` です。つまり、:data:`ligatures` はそのまま **通過** し、空白はそのまま" +" **通過** します(スペースに変換されません)、画像は **含まれません** 。:ref:`TextPreserve` を参照してください。" + +#: ../../displaylist.rst:86 331d9ffb9e2c41b2a7aa918985944d03 +msgid ":ref:`TextPage`" +msgstr "" + +#: ../../displaylist.rst:87 9a3d633bb3b9470e9633979dc60e32db +msgid "text page of the display list." +msgstr "ディスプレイリストのテキストページ。" + +#: ../../displaylist.rst:91 a0532ee2b43e4881966b65e83c28a1f5 +msgid "" +"Contains the display list's mediabox. This will equal the page's " +"rectangle if it was created via :meth:`Page.get_displaylist`." +msgstr "" +"ディスプレイリストのmediaboxを含みます。これは、:meth:`Page.get_displaylist` " +"を介して作成された場合、ページの矩形と等しいです。" + +#: ../../displaylist.rst 027bab9cbfe6488eb3f5cf5818313a0e +msgid "type" +msgstr "" + +#: ../../displaylist.rst:93 3a0e088ac75d4a128417cd79855b6777 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../footer.rst:60 5f7c2ee2445b4bd69c561fa6fb3cf797 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/document-writer-class.mo b/docs/locales/ja/LC_MESSAGES/document-writer-class.mo new file mode 100644 index 000000000..e75a9a871 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/document-writer-class.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/document-writer-class.po b/docs/locales/ja/LC_MESSAGES/document-writer-class.po new file mode 100644 index 000000000..e02bb16a7 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/document-writer-class.po @@ -0,0 +1,196 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 a8b3ae3a5d614d1e8729320973185b23 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 fdd48e8301e84183b56ee383e825d1d8 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 ee68e630d503448088146398fe92de3e +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../document-writer-class.rst:7 2e25f1ee520c42cd915962f032064cf1 +msgid "DocumentWriter" +msgstr "DocumentWriter(ドキュメントライター)" + +#: ../../document-writer-class.rst:9 9c9bc94001de467cb49c9bf4b7b82cfc +msgid "|pdf_only_class|" +msgstr "PDFのみ。" + +#: ../../document-writer-class.rst:12 0d6e81eab559420a9cb87ac6aab3fea5 +msgid "New in v1.21.0" +msgstr "バージョン1.21.0で導入" + +#: ../../document-writer-class.rst:14 b10e3b014d11420d87f829cd717b0cd1 +msgid "" +"This class represents a utility which can output various :ref:`document " +"types supported by PyMuPDF`." +msgstr "" +"このクラスは、MuPDFで :ref:`サポートされている ` " +"さまざまなドキュメントタイプを出力できるユーティリティを表します。" + +#: ../../document-writer-class.rst:16 9c4bec500209410f8b4cbcf2371d35db +msgid "" +"In |PyMuPDF| only used for outputting PDF documents whose pages are " +"populated by :ref:`Story` DOMs." +msgstr "" +"PyMuPDFでは、:ref:`Story` " +"DOM(ドキュメントオブジェクトモデル)でページが埋められたPDFドキュメントを出力するためにのみ使用されます。" + +#: ../../document-writer-class.rst:18 9871fdeac3864a2da5273e4540a0a267 +msgid "" +"Using DocumentWriter_ also for other document types might happen in the " +"future." +msgstr "将来的には、DocumentWriter_ を他のドキュメントタイプにも使用することがあるかもしれません。" + +#: ../../document-writer-class.rst:21 9968d643215c4572b41cf341313b8f65 +msgid "**Method / Attribute**" +msgstr "**メソッド/属性** " + +#: ../../document-writer-class.rst:21 606c7ae0f2594eddbe42178bb8644d9a +msgid "**Short Description**" +msgstr "**簡単な説明** " + +#: ../../document-writer-class.rst:23 50c0babb7b8840198f03c08d30e9d3c5 +msgid ":meth:`DocumentWriter.begin_page`" +msgstr "" + +#: ../../document-writer-class.rst:23 3bb196d61c3a4aa0b7994d563be77f25 +msgid "start a new output page" +msgstr "新しい出力ページを開始します。" + +#: ../../document-writer-class.rst:24 1f63440a5b0e4d6ea6e75dda50dddea3 +msgid ":meth:`DocumentWriter.end_page`" +msgstr "" + +#: ../../document-writer-class.rst:24 10f5e50934fe4657a5d2600346ceafcf +msgid "finish the current output page" +msgstr "現在の出力ページを終了します。" + +#: ../../document-writer-class.rst:25 b0dad4d0d79645819b89624337d71ac9 +msgid ":meth:`DocumentWriter.close`" +msgstr "" + +#: ../../document-writer-class.rst:25 7fbb29ccb4b845ec90dd86ccb30df13f +msgid "flush pending output and close the file" +msgstr "保留中の出力をフラッシュし、ファイルを閉じます。" + +#: ../../document-writer-class.rst:28 1c4c83c4a8944dd48400b056f6f197d2 +msgid "**Class API**" +msgstr "**クラスAPI** " + +#: ../../document-writer-class.rst:34 b3f94e8ec90e4ea783dc2ad85b3a4c37 +msgid "" +"Create a document writer object, passing a Python file pointer or a file " +"path. Options to use when saving the file may also be passed." +msgstr "Pythonファイルポインタまたはファイルパスを渡してドキュメントライターオブジェクトを作成します。ファイルを保存する際に使用するオプションも渡すことができます。" + +#: ../../document-writer-class.rst:36 8c30ef19be644ae4acdc8aa962a11f4b +msgid "This class can also be used as a Python context manager." +msgstr "このクラスはPythonのコンテキストマネージャとしても使用できます。" + +#: ../../document-writer-class.rst 269746a16d004bdba23418807f5f2e30 +#: d6b77987b178432f8bf67e15280f52aa +msgid "Parameters" +msgstr "" + +#: ../../document-writer-class.rst:38 397489f724624b7e9532d6d269dd6670 +msgid "" +"the output file. This may be a string file name, or any Python file " +"pointer. .. note:: By using a `io.BytesIO()` object as file pointer, a " +"document writer can create a PDF in memory. Subsequently, this PDF can be" +" re-opened for input and be further manipulated. This technique is used " +"by several example scripts in :ref:`Stories recipes`." +msgstr "" + +#: ../../document-writer-class.rst:38 05e9879e1f4b4ae59a5e41c552e922db +msgid "" +"the output file. This may be a string file name, or any Python file " +"pointer." +msgstr "出力ファイル。これは文字列のファイル名またはPythonファイルポインタのいずれかです。" + +#: ../../document-writer-class.rst:40 37022195f8fe44c0806b5b19a88bcd4c +msgid "" +"By using a `io.BytesIO()` object as file pointer, a document writer can " +"create a PDF in memory. Subsequently, this PDF can be re-opened for input" +" and be further manipulated. This technique is used by several example " +"scripts in :ref:`Stories recipes`." +msgstr "" +"`io.BytesIO()` " +"オブジェクトをファイルポインタとして使用することで、ドキュメントライターはメモリ内にPDFを作成できます。その後、このPDFは入力用に再オープンしてさらに操作できます。これは、:ref:`Storiesレシピ`" +" のいくつかのサンプルスクリプトで使用されています。" + +#: ../../document-writer-class.rst:42 76a68fdef51c4889b0ad25785985c8b1 +msgid "" +"specify saving options for the output PDF. Typical are \"compress\" or " +"\"clean\". More possible values may be taken from help output of the " +"`mutool convert` CLI utility." +msgstr "" +"出力PDFの保存オプションを指定します。一般的なものは「compress」や「clean」です。その他の可能な値は、`mutool " +"convert` CLI ユーティリティのヘルプ出力から取得できます。" + +#: ../../document-writer-class.rst:46 26994c9b27a84276951802359dd085d9 +msgid "Start a new output page of a given dimension." +msgstr "指定された寸法の新しい出力ページを開始します。" + +#: ../../document-writer-class.rst:48 76ed5effaf3f494e924c1e4409c325a4 +msgid "" +"a rectangle specifying the page size. After this method, output " +"operations may write content to the page." +msgstr "ページサイズを指定する矩形。このメソッドの後、出力操作はページにコンテンツを書き込むことができます。" + +#: ../../document-writer-class.rst:52 3ea8959411e545e6b5a8f32498bf303e +msgid "" +"Finish a page. This flushes any pending data and appends the page to the " +"output document." +msgstr "ページを終了します。保留中のデータをフラッシュし、ページを出力ドキュメントに追加します。" + +#: ../../document-writer-class.rst:56 444a43febca543608815926d8f431a3e +msgid "" +"Close the output file. This method is required for writing any pending " +"data." +msgstr "出力ファイルを閉じます。保留中のデータを書き込むためにこのメソッドが必要です。" + +#: ../../document-writer-class.rst:58 6f307923a0ba47a5a19b6b9a85f7bdc9 +msgid "For usage examples consult the section of :ref:`Story`." +msgstr "使用例については、:ref:`Story` のセクションを参照してください。" + +#: ../../footer.rst:60 0ad0cba35bcc45868f1d5640bc7e226b +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/document.mo b/docs/locales/ja/LC_MESSAGES/document.mo new file mode 100644 index 000000000..7d5f68cac Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/document.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/document.po b/docs/locales/ja/LC_MESSAGES/document.po new file mode 100644 index 000000000..cb7f9dbc1 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/document.po @@ -0,0 +1,6460 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 98ab536d639d43b3acc623a987a0f047 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 f7af9c3d1ef640149349d799451b6946 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 2aadb102c464428888e2574393e9f1cd +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../document.rst:7 91ed63d1daa94a11bb6e9e3d2e263b47 +msgid "Document" +msgstr "Document (ドキュメント)" + +#: ../../document.rst:11 9f3d3e5cbffc4fa9b669d55af51adf35 +msgid "" +"This class represents a document. It can be constructed from a file or " +"from memory." +msgstr "このクラスはドキュメントを表します。ファイルまたはメモリから構築することができます。" + +#: ../../document.rst:13 fc5a69fcc5e04dcbb95118c5e0848aac +msgid "" +"There exists the alias *open* for this class, i.e. " +"`pymupdf.Document(...)` and `pymupdf.open(...)` do exactly the same " +"thing." +msgstr "" +"このクラスには *「open」* というエイリアスが存在し、つまり、 `pymupdf.Document(...)` と " +"`pymupdf.open(...)` はまったく同じことを行います。" + +#: ../../document.rst:15 48be984cc280465dba5b9ad59ee8f6f8 +msgid "For details on **embedded files** refer to Appendix 3." +msgstr "**埋め込まれたファイル** の詳細については、付録3を参照してください。" + +#: ../../document.rst:19 bae151c2e8724244a9554194cbdc2ba4 +msgid "" +"Starting with v1.17.0, a new page addressing mechanism for **EPUB files " +"only** is supported. This document type is internally organized in " +"chapters such that pages can most efficiently be found by their so-called" +" \"location\". The location is a tuple *(chapter, pno)* consisting of the" +" chapter number and the page number **in that chapter**. Both numbers are" +" zero-based." +msgstr "" +"バージョン1.17.0から、 **EPUBファイル専用** " +"の新しいページアドレッシングメカニズムがサポートされています。このドキュメントタイプは、ページがその「場所」によって最も効率的に見つけられるように、章ごとに内部的に組織されています。場所は" +" *(章、pno)* というタプルで構成され、章番号と **その章の中** のページ番号を指します。どちらの番号もゼロベースです。" + +#: ../../document.rst:21 50766760714f494fbac95f548cf5b87f +msgid "" +"While it is still possible to locate a page via its (absolute) number, " +"doing so may mean that the complete EPUB document must be laid out before" +" the page can be addressed. This may have a significant performance " +"impact if the document is very large. Using the page's *(chapter, pno)* " +"prevents this from happening." +msgstr "" +"依然として(絶対的な)番号を使用してページを見つけることは可能ですが、これを行うとページにアクセスする前に完全なEPUBドキュメントをレイアウトする必要があるかもしれません。これはドキュメントが非常に大きい場合、重要なパフォーマンスの影響を持つ可能性があります。ページの" +" *(章、pno)* を使用することで、これを防ぐことができます。" + +#: ../../document.rst:23 2c02a7c9f16f4727850836322bf68e47 +msgid "" +"To maintain a consistent API, PyMuPDF supports the page *location* syntax" +" for **all file types** -- documents without this feature simply have " +"just one chapter. :meth:`Document.load_page` and the equivalent index " +"access now also support a *location* argument." +msgstr "" +"一貫性のあるAPIを維持するために、PyMuPDFは **すべてのファイルタイプ** にページの場所構文をサポートしています - " +"この機能のないドキュメントは単に1つの章を持っています。:meth:`Document.load_page` および同等のインデックスアクセスも " +"*場所* 引数をサポートしています。" + +#: ../../document.rst:25 9ba193b74a5a49169207809d41c71a5b +msgid "" +"There are a number of methods for converting between page numbers and " +"locations, for determining the chapter count, the page count per chapter," +" for computing the next and the previous locations, and the last page " +"location of a document." +msgstr "ページ番号と場所の間で変換するためのいくつかのメソッドがあり、章の数を決定するためのメソッド、章ごとのページ数を決定するためのメソッド、次の場所と前の場所を計算するためのメソッド、およびドキュメントの最後のページの場所を計算するためのメソッドがあります。" + +#: ../../document.rst:28 101d535d2aa7406997700ef159b35072 +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性** " + +#: ../../document.rst:28 e591cb1507ca46bdb0b159d377b5646e +msgid "**Short Description**" +msgstr "**短い説明** " + +#: ../../document.rst:30 eab995d9258c4b96889775bd41ac9837 +msgid ":meth:`Document.add_layer`" +msgstr "" + +#: ../../document.rst:30 ec69b29cdf7d42fe874dcbe3cf954624 +msgid "PDF only: make new optional content configuration" +msgstr "PDFのみ:新しいオプションコンテンツ設定を作成" + +#: ../../document.rst:31 b32c6489722044d6bb04d9ca99eb56bd +msgid ":meth:`Document.add_ocg`" +msgstr "" + +#: ../../document.rst:31 603f2cfc649a4452b0c4487a39e4ddcf +msgid "PDF only: add new optional content group" +msgstr "PDFのみ:新しいオプションコンテンツグループを追加" + +#: ../../document.rst:32 66c21ebcefc24dfd9bbc44c2e661b231 +msgid ":meth:`Document.authenticate`" +msgstr "" + +#: ../../document.rst:32 b6f6506ad5a348559982cfec77798887 +msgid "gain access to an encrypted document" +msgstr "暗号化されたドキュメントへのアクセスを取得" + +#: ../../document.rst:33 12e2f803568b424e8a90ce4141036d08 +#, fuzzy +msgid ":meth:`Document.bake`" +msgstr ":meth:`Document.make_bookmark` によって作成されたもの。" + +#: ../../document.rst:33 401b0220462942d0a1dfee34da205ed0 +msgid "PDF only: make annotations / fields permanent content" +msgstr "PDFのみ:注釈やフォームフィールドを永続的なコンテンツに変換します" + +#: ../../document.rst:34 dd50f99a786e41ebbd68cd9fb8ca64c1 +msgid ":meth:`Document.can_save_incrementally`" +msgstr "" + +#: ../../document.rst:34 f6350528b09d43a7a0d13ebd1dcbaa8c +msgid "check if incremental save is possible" +msgstr "インクリメンタルセーブが可能かどうかを確認" + +#: ../../document.rst:35 699412e9b737479aa96a155d07f5288f +msgid ":meth:`Document.chapter_page_count`" +msgstr "" + +#: ../../document.rst:35 0b0be59b6e7e484a9d0c7452df811b16 +msgid "number of pages in chapter" +msgstr "章内のページ数" + +#: ../../document.rst:36 102487c46b774d30a292cf28495c812c +msgid ":meth:`Document.close`" +msgstr "" + +#: ../../document.rst:36 70aa8cb1d13540378325fc97d4425de1 +msgid "close the document" +msgstr "ドキュメントを閉じる" + +#: ../../document.rst:37 655a998dd4b244e9bbd5b37980161f2a +msgid ":meth:`Document.convert_to_pdf`" +msgstr "" + +#: ../../document.rst:37 ab3b992bbb8f4642929774c741a037c3 +msgid "write a PDF version to memory" +msgstr "PDFバージョンをメモリに書き込む" + +#: ../../document.rst:38 ca3640236b37479ab5c9e8b2ec3dc1b9 +msgid ":meth:`Document.copy_page`" +msgstr "" + +#: ../../document.rst:38 448a7ecc912642419a75151501593012 +msgid "PDF only: copy a page reference" +msgstr "PDFのみ:ページの参照をコピー" + +#: ../../document.rst:39 69eee92a9eb944568380ed7b71683ec4 +msgid ":meth:`Document.del_toc_item`" +msgstr "" + +#: ../../document.rst:39 66b8afea9df7476d80d2c062415dec73 +msgid "PDF only: remove a single TOC item" +msgstr "PDFのみ:単一のTOCアイテムを削除" + +#: ../../document.rst:40 3886de667ca24e39a76c448e8fc25248 +msgid ":meth:`Document.delete_page`" +msgstr "" + +#: ../../document.rst:40 b8617e651a684f0dbf65fb708b8ed404 +msgid "PDF only: delete a page" +msgstr "PDFのみ:ページを削除" + +#: ../../document.rst:41 8d7d194ed7644002b844a1c2bd69d45b +msgid ":meth:`Document.delete_pages`" +msgstr "" + +#: ../../document.rst:41 6f1a4f2ce41445918429ec201d106bad +msgid "PDF only: delete multiple pages" +msgstr "PDFのみ:複数のページを削除" + +#: ../../document.rst:42 e4152c296d094c5c87aaeb21236efc3b +msgid ":meth:`Document.embfile_add`" +msgstr "" + +#: ../../document.rst:42 d11fa614b1cf449893e4d4ab9e012cfd +msgid "PDF only: add a new embedded file from buffer" +msgstr "PDFのみ:バッファから新しい埋め込みファイルを追加" + +#: ../../document.rst:43 988a9e7c75c042d5a2f905574053e033 +msgid ":meth:`Document.embfile_count`" +msgstr "" + +#: ../../document.rst:43 748a5953ba754e8ca862b037d4d302aa +msgid "PDF only: number of embedded files" +msgstr "PDFのみ:埋め込みファイルの数" + +#: ../../document.rst:44 f25fa2c51c3e4be093367edee0a6917e +msgid ":meth:`Document.embfile_del`" +msgstr "" + +#: ../../document.rst:44 77af58e470c84741aad17647af48762d +msgid "PDF only: delete an embedded file entry" +msgstr "PDFのみ:埋め込みファイルエントリを削除" + +#: ../../document.rst:45 a9fded50fc1f4d7a9a5aa3de403dc9d0 +msgid ":meth:`Document.embfile_get`" +msgstr "" + +#: ../../document.rst:45 996e583127cb46e0aae26829538d18ed +msgid "PDF only: extract an embedded file buffer" +msgstr "PDFのみ:埋め込みファイルバッファを抽出" + +#: ../../document.rst:46 dc64a6f81f6f45a3adac7fb1562bfe69 +msgid ":meth:`Document.embfile_info`" +msgstr "" + +#: ../../document.rst:46 992bfccef1144733b132d3186109594a +msgid "PDF only: metadata of an embedded file" +msgstr "PDFのみ:埋め込みファイルのメタデータ" + +#: ../../document.rst:47 eddf644557f44c1a8501bd84440647a2 +msgid ":meth:`Document.embfile_names`" +msgstr "" + +#: ../../document.rst:47 f76de0f95e48441ea3e7515269ae8c92 +msgid "PDF only: list of embedded files" +msgstr "PDFのみ:埋め込みファイルのリスト" + +#: ../../document.rst:48 8331588255db4e3e8a3dab614ae120e7 +msgid ":meth:`Document.embfile_upd`" +msgstr "" + +#: ../../document.rst:48 5ae51b3f54064cf182e546ae5e44d7d1 +msgid "PDF only: change an embedded file" +msgstr "PDFのみ:埋め込みファイルを変更" + +#: ../../document.rst:49 1c2742d215a54f68a0a1f16644a2a6db +msgid ":meth:`Document.extract_font`" +msgstr "" + +#: ../../document.rst:49 310f24e31cdd4a2cb66bfebaabf27431 +msgid "PDF only: extract a font by :data:`xref`" +msgstr "PDFのみ: :data:`xref` によるフォントの抽出" + +#: ../../document.rst:50 8c856cf6518e49398c7f76a36e95dfd9 +msgid ":meth:`Document.extract_image`" +msgstr "" + +#: ../../document.rst:50 a0a3ca83952a4c8bac3bb40a1e11eb46 +msgid "PDF only: extract an embedded image by :data:`xref`" +msgstr "PDFのみ: :data:`xref` による埋め込み画像の抽出" + +#: ../../document.rst:51 ad16d5d64d0a4092bc34738d3139b218 +msgid ":meth:`Document.ez_save`" +msgstr "" + +#: ../../document.rst:51 94a8bbd646c74dbe973c5fb9c12e0e14 +msgid "PDF only: :meth:`Document.save` with different defaults" +msgstr "PDFのみ:異なるデフォルト値で :meth:`Document.save` を実行" + +#: ../../document.rst:52 6208bc9fd0704a379ae5a114b79b1f56 +msgid ":meth:`Document.find_bookmark`" +msgstr "" + +#: ../../document.rst:52 fe4449f51f9b40bb97e1199450d8face +msgid "retrieve page location after laid out document" +msgstr "レイアウトされたドキュメント後のページ位置を取得" + +#: ../../document.rst:53 7c0242efa90849c5b0ce401b8cbb9949 +msgid ":meth:`Document.fullcopy_page`" +msgstr "" + +#: ../../document.rst:53 b2e7d5ed0d62420ca6cc2fa6d2244514 +msgid "PDF only: duplicate a page" +msgstr "PDFのみ:ページの複製" + +#: ../../document.rst:54 5b80ea3741bb4f898d03fa67f6ae3a34 +msgid ":meth:`Document.get_layer`" +msgstr "" + +#: ../../document.rst:54 a7706b707989466cb4383d64cadadb13 +msgid "PDF only: lists of OCGs in ON, OFF, RBGroups" +msgstr "PDFのみ:ON、OFF、RBGroups内のOCGのリスト" + +#: ../../document.rst:55 acdbc4e25e18460499b5e12cf6808a32 +msgid ":meth:`Document.get_layers`" +msgstr "" + +#: ../../document.rst:55 eea448604cdc4bd9a737f7b0c22c84cd +msgid "PDF only: list of optional content configurations" +msgstr "PDFのみ:オプションコンテンツ設定のリスト" + +#: ../../document.rst:56 8d6c3f3828cc4e60b7ce7780cd5c87f2 +msgid ":meth:`Document.get_oc`" +msgstr "" + +#: ../../document.rst:56 444960953f2c46d98b04eba21fb3db06 +msgid "PDF only: get OCG /OCMD xref of image / form xobject" +msgstr "PDFのみ:画像/フォームオブジェクトのOCG / OCMD xrefを取得" + +#: ../../document.rst:57 e2e7d707a6344fb292edcbd1208cac47 +msgid ":meth:`Document.get_ocgs`" +msgstr "" + +#: ../../document.rst:57 0bf8c1b011ef4121b2d67a5f2c2f65f2 +msgid "PDF only: info on all optional content groups" +msgstr "PDFのみ:すべてのオプションコンテンツグループの情報" + +#: ../../document.rst:58 4557616c5e13460c8b6e717bc9e81f9b +msgid ":meth:`Document.get_ocmd`" +msgstr "" + +#: ../../document.rst:58 d1f76d8b9b514eb9baac040bc8ded891 +msgid "PDF only: retrieve definition of an :data:`OCMD`" +msgstr "PDFのみ: :data:`OCMD` の定義を取得" + +#: ../../document.rst:59 a5a109993ee248f4a5d11f965d1d664a +msgid ":meth:`Document.get_page_fonts`" +msgstr "" + +#: ../../document.rst:59 804a7b3ca19241aab72f12dd6a7b82a8 +msgid "PDF only: list of fonts referenced by a page" +msgstr "PDFのみ:ページで参照されるフォントのリスト" + +#: ../../document.rst:60 756196df77ba4196b51697d3086cddfe +msgid ":meth:`Document.get_page_images`" +msgstr "" + +#: ../../document.rst:60 aed2189676e14abf9889bfa19f732e4c +msgid "PDF only: list of images referenced by a page" +msgstr "PDFのみ:ページで参照される画像のリスト" + +#: ../../document.rst:61 ca1b48537c664584843d36787fd2cc0c +msgid ":meth:`Document.get_page_labels`" +msgstr "" + +#: ../../document.rst:61 7e0edb35b8d24533a094d32b40609986 +msgid "PDF only: list of page label definitions" +msgstr "PDFのみ:ページラベルの定義のリスト" + +#: ../../document.rst:62 f137fd86eb1d4427924ba25cb7174235 +msgid ":meth:`Document.get_page_numbers`" +msgstr "" + +#: ../../document.rst:62 cb23b05728334b81888952a1f04cddee +msgid "PDF only: get page numbers having a given label" +msgstr "PDFのみ:指定されたラベルを持つページ番号を取得" + +#: ../../document.rst:63 f9b524b5add743f28aee3840c0912855 +msgid ":meth:`Document.get_page_pixmap`" +msgstr "" + +#: ../../document.rst:63 869e93413cee426dbb502720bc9313ae +msgid "create a pixmap of a page by page number" +msgstr "ページ番号によるページのピクスマップの作成" + +#: ../../document.rst:64 53b7e4d2648e439297d2c85812a5b802 +msgid ":meth:`Document.get_page_text`" +msgstr "" + +#: ../../document.rst:64 1f7da7792ddd430fa0c718b83a5c7f99 +msgid "extract the text of a page by page number" +msgstr "ページ番号によるページのテキストの抽出" + +#: ../../document.rst:65 1b104a47ab3942528dc1e4aea861cfe4 +msgid ":meth:`Document.get_page_xobjects`" +msgstr "" + +#: ../../document.rst:65 aa52803a2c63433db60e2cce48c22766 +msgid "PDF only: list of XObjects referenced by a page" +msgstr "PDFのみ:ページで参照されるXObjectのリスト" + +#: ../../document.rst:66 ca303e88911644c4a131146214f643d7 +msgid ":meth:`Document.get_sigflags`" +msgstr "" + +#: ../../document.rst:66 79de646a9e854a00acca4ea4f8460071 +msgid "PDF only: determine signature state" +msgstr "PDFのみ:署名状態を確認" + +#: ../../document.rst:67 7afcaf2f8a1148e2a7fbf39b2cf5cc21 +msgid ":meth:`Document.get_toc`" +msgstr "" + +#: ../../document.rst:67 af1837052c6f4040b800b893256abff5 +msgid "extract the table of contents" +msgstr "目次を抽出" + +#: ../../document.rst:68 98ea1f995fc14171b885e0993bcfe717 +msgid ":meth:`Document.get_xml_metadata`" +msgstr "" + +#: ../../document.rst:68 e31f72febe2548a086cf164f57dcc6aa +msgid "PDF only: read the XML metadata" +msgstr "PDFのみ:XMLメタデータを読み込む" + +#: ../../document.rst:69 1532f2bf38714ad893ba5421b5a9dfeb +msgid ":meth:`Document.has_annots`" +msgstr "" + +#: ../../document.rst:69 f94f14927d3b441891a3888a7a181505 +msgid "PDF only: check if PDF contains any annots" +msgstr "PDFのみ:PDFに注釈が含まれているかを確認" + +#: ../../document.rst:70 4142700081b942d4adff9318c2df9fb0 +msgid ":meth:`Document.has_links`" +msgstr "" + +#: ../../document.rst:70 2969def0330446c29aa7b0da7e59a20e +msgid "PDF only: check if PDF contains any links" +msgstr "PDFのみ:PDFにリンクが含まれているかを確認" + +#: ../../document.rst:71 f694e20f753d474bbc341264ff9b801c +msgid ":meth:`Document.insert_page`" +msgstr "" + +#: ../../document.rst:71 3ce06a744bc54da381c0cef76fb9c971 +msgid "PDF only: insert a new page" +msgstr "PDFのみ:新しいページを挿入" + +#: ../../document.rst:72 3f0d3139b0ca48b5bbc63cca2e46d0aa +msgid ":meth:`Document.insert_pdf`" +msgstr "" + +#: ../../document.rst:72 734c32398f73447fb4f1a7c203e910a2 +msgid "PDF only: insert pages from another PDF" +msgstr "PDFのみ:別のPDFからページを挿入" + +#: ../../document.rst:73 bf568ebd79e244b79ebe84d50ceaaef7 +msgid ":meth:`Document.insert_file`" +msgstr "" + +#: ../../document.rst:73 2bff6082f5bf4f8a8d5dd5e999b9c074 +msgid "PDF only: insert pages from arbitrary document" +msgstr "PDFのみ:任意のドキュメントからページを挿入" + +#: ../../document.rst:74 f36ab9a883434e47bb27b5f7d6820181 +msgid ":meth:`Document.journal_can_do`" +msgstr "" + +#: ../../document.rst:74 e6eeec9eb2fa440886315cfddc27cecf +msgid "PDF only: which journal actions are possible" +msgstr "PDFのみ:どのジャーナルアクションが可能か" + +#: ../../document.rst:75 b8520aa73224496f84431ef46857f333 +msgid ":meth:`Document.journal_enable`" +msgstr "" + +#: ../../document.rst:75 8c9eaf187688493da5404862ecebe535 +msgid "PDF only: enables journalling for the document" +msgstr "PDFのみ:ドキュメントのジャーナルを有効にする" + +#: ../../document.rst:76 acfc3684bf71420f9c3bb2f249aa47e1 +msgid ":meth:`Document.journal_load`" +msgstr "" + +#: ../../document.rst:76 851b0af1028f4f438b26a8f62dde1189 +msgid "PDF only: load journal from a file" +msgstr "PDFのみ:ファイルからジャーナルを読み込む" + +#: ../../document.rst:77 16502c8afb224cb4895b301405e470de +msgid ":meth:`Document.journal_op_name`" +msgstr "" + +#: ../../document.rst:77 ab7eabee94034ea2b6cbbc5653b06f13 +msgid "PDF only: return name of a journalling step" +msgstr "PDFのみ:ジャーナルステップの名前を返す" + +#: ../../document.rst:78 19861328e93546d7922dddc5463b6662 +msgid ":meth:`Document.journal_position`" +msgstr "" + +#: ../../document.rst:78 71d240ca35334b74bb905184ae7d52bd +msgid "PDF only: return journalling status" +msgstr "PDFのみ:ジャーナリングステータスを返す" + +#: ../../document.rst:79 9cf39978bcdd428eb4f5d6c8e8658908 +msgid ":meth:`Document.journal_redo`" +msgstr "" + +#: ../../document.rst:79 a9a683239adb45aaaefae5a648ac8ec1 +msgid "PDF only: redo current operation" +msgstr "PDFのみ:現在の操作をやり直す" + +#: ../../document.rst:80 fdf2acdab0394fb5bb1ee5e0c32089ff +msgid ":meth:`Document.journal_save`" +msgstr "" + +#: ../../document.rst:80 3ba31d5b0940484fa9fd230fc404e0e6 +msgid "PDF only: save journal to a file" +msgstr "PDFのみ:ジャーナルをファイルに保存" + +#: ../../document.rst:81 7e005e29d5d94997b3b3eb46a32f9317 +msgid ":meth:`Document.journal_start_op`" +msgstr "" + +#: ../../document.rst:81 82fad559780a4d2984cfcf94f13ea758 +msgid "PDF only: start an \"operation\" giving it a name" +msgstr "PDFのみ:名前を付けて「操作」を開始" + +#: ../../document.rst:82 eaf56262c7a64fb7ad21b3c269a0da50 +msgid ":meth:`Document.journal_stop_op`" +msgstr "" + +#: ../../document.rst:82 579541e1f64349eea6781fbcae25f23b +msgid "PDF only: end current operation" +msgstr "PDFのみ:現在の操作を終了" + +#: ../../document.rst:83 d5ad2d3e28f84d1dad8c3636bcd96068 +msgid ":meth:`Document.journal_undo`" +msgstr "" + +#: ../../document.rst:83 6e4da9715d7146e1aa6af075bf79afa7 +msgid "PDF only: undo current operation" +msgstr "PDFのみ:現在の操作を元に戻す" + +#: ../../document.rst:84 0ed2ea08b8f543f19c5f6208ad0883d6 +msgid ":meth:`Document.layer_ui_configs`" +msgstr "" + +#: ../../document.rst:84 eb101d7c6a27466884fbf40cf689f453 +msgid "PDF only: list of optional content intents" +msgstr "PDFのみ:オプションコンテンツインテントのリスト" + +#: ../../document.rst:85 d76005cf42af4d37a6095588afe048e9 +msgid ":meth:`Document.layout`" +msgstr "" + +#: ../../document.rst:85 3f774a0f57124a15b684a9e2c01dc470 +msgid "re-paginate the document (if supported)" +msgstr "ドキュメントを再ページ化(サポートされている場合)" + +#: ../../document.rst:86 98c2ca1a60cd433bb8dc14a182819bf1 +msgid ":meth:`Document.load_page`" +msgstr "" + +#: ../../document.rst:86 b522c2c4ac954d99a904f5ed4288a427 +msgid "read a page" +msgstr "ページを読み込む" + +#: ../../document.rst:87 d09a13f7a9b543869281fc5835fcf910 +msgid ":meth:`Document.make_bookmark`" +msgstr "" + +#: ../../document.rst:87 c96cb50dda404fee9009d63b28b6f347 +msgid "create a page pointer in reflowable documents" +msgstr "リフローアブルドキュメント内でページポインタを作成" + +#: ../../document.rst:88 a1f04d3cc46f449eac455e55b1702455 +msgid ":meth:`Document.move_page`" +msgstr "" + +#: ../../document.rst:88 86c2a62fc967429aa1db1ce5c33effb0 +msgid "PDF only: move a page to different location in doc" +msgstr "PDFのみ:ページをドキュメント内の異なる場所に移動" + +#: ../../document.rst:89 f00729c3537d477d91a79740c0987720 +msgid ":meth:`Document.need_appearances`" +msgstr "" + +#: ../../document.rst:89 ede3c937486f4d33ab642efff2fc6f2d +msgid "PDF only: get/set `/NeedAppearances` property" +msgstr "PDFのみ: `/NeedAppearances` プロパティを取得/設定" + +#: ../../document.rst:90 acb1170c62cf4a8cbb13e217bf3d5863 +msgid ":meth:`Document.new_page`" +msgstr "" + +#: ../../document.rst:90 222ef3b17cbb4737ba82eacfb6a26d4a +msgid "PDF only: insert a new empty page" +msgstr "PDFのみ:新しい空白ページを挿入" + +#: ../../document.rst:91 bae0b773a03d47ff823a6c000f56b9ec +msgid ":meth:`Document.next_location`" +msgstr "" + +#: ../../document.rst:91 ca665637d3844075b0dc5dd4174a44db +msgid "return (chapter, pno) of following page" +msgstr "次のページの(章、pno)" + +#: ../../document.rst:92 a4959b83048c49a59f3e57e3aa9ab69d +msgid ":meth:`Document.outline_xref`" +msgstr "" + +#: ../../document.rst:92 26d12fcc9a2f424a8af7349a7b8783a9 +msgid "PDF only: :data:`xref` a TOC item" +msgstr "PDFのみ:TOCアイテムを :data:`xref` " + +#: ../../document.rst:93 13ccab21d94f4a5e97a2e2bd614097bc +msgid ":meth:`Document.page_cropbox`" +msgstr "" + +#: ../../document.rst:93 0bce11acc6e6405083a654972d690499 +msgid "PDF only: the unrotated page rectangle" +msgstr "PDFのみ:回転していないページの矩形" + +#: ../../document.rst:94 280be10fb5464b8fa611b4618447c111 +msgid ":meth:`Document.page_xref`" +msgstr "" + +#: ../../document.rst:94 a6dab1c0c1b94e1982c4b5603cf78b4f +msgid "PDF only: :data:`xref` of a page number" +msgstr "PDFのみ:ページ番号の :data:`xref`" + +#: ../../document.rst:95 740c31de378c453f962f41a1f8e2e018 +msgid ":meth:`Document.pages`" +msgstr "" + +#: ../../document.rst:95 dd9645ad29944deeb4aae5cc82d2c862 +msgid "iterator over a page range" +msgstr "ページ範囲のイテレータ" + +#: ../../document.rst:96 f728fe24614647ab9badda0fc88bc1dc +msgid ":meth:`Document.pdf_catalog`" +msgstr "" + +#: ../../document.rst:96 459ef0598a31446694bf4c8516fa3acb +msgid "PDF only: :data:`xref` of catalog (root)" +msgstr "PDFのみ:カタログ(ルート)の :data:`xref` " + +#: ../../document.rst:97 4fc0c49ad16a46fbb5677870f976f9fe +msgid ":meth:`Document.pdf_trailer`" +msgstr "" + +#: ../../document.rst:97 ae19b55dcead4271b9ef4673a6f487e0 +msgid "PDF only: trailer source" +msgstr "PDFのみ:トレイラーソース" + +#: ../../document.rst:98 7f3c95701b3249bcbd665c8f7060e0a6 +msgid ":meth:`Document.prev_location`" +msgstr "" + +#: ../../document.rst:98 62dc836188774b84bd191037f02f29ed +msgid "return (chapter, pno) of preceding page" +msgstr "前のページの(章、pno)を返す" + +#: ../../document.rst:99 e62775f8eb9e44b9af6fc9edb21f904c +#, fuzzy +msgid ":meth:`Document.recolor`" +msgstr ":meth:`Document.make_bookmark` によって作成されたもの。" + +#: ../../document.rst:99 75788361bf0e490794958cd6a8d7237a +msgid "PDF only: execute :meth:`Page.recolor` for all pages" +msgstr "" + +#: ../../document.rst:100 eefda64ea8434c289242bbf8b59b5a5b +msgid ":meth:`Document.reload_page`" +msgstr "" + +#: ../../document.rst:100 62d8e789a0f743b4a8288f88f271796d +msgid "PDF only: provide a new copy of a page" +msgstr "PDFのみ:ページの新しいコピーを提供" + +#: ../../document.rst:101 9fe05ae6fcc549d58be039c5feb766a1 +msgid ":meth:`Document.resolve_names`" +msgstr "" + +#: ../../document.rst:101 e77f3de589b548a1a2ea07c794ac264d +msgid "PDF only: Convert destination names into a Python dict" +msgstr "PDFのみ: ページの目的地名をPythonの辞書に変換します" + +#: ../../document.rst:102 a4a3b0c33b5441ae9537c01d4c44182e +msgid ":meth:`Document.save`" +msgstr "" + +#: ../../document.rst:102 2381f21b7ba446ee9d07de4a5042a777 +msgid "PDF only: save the document" +msgstr "PDFのみ:ドキュメントを保存" + +#: ../../document.rst:103 22e9feec64e04cebb9fe260965cba18c +msgid ":meth:`Document.saveIncr`" +msgstr "" + +#: ../../document.rst:103 6020023bd82a423083306e9a4ee1d666 +msgid "PDF only: save the document incrementally" +msgstr "PDFのみ:ドキュメントを増分保存" + +#: ../../document.rst:104 0bbdb390769b460eabab8ce497be52a3 +msgid ":meth:`Document.scrub`" +msgstr "" + +#: ../../document.rst:104 aef6e70714864cf4b67ff4e403f02abc +msgid "PDF only: remove sensitive data" +msgstr "PDFのみ:機密データを削除" + +#: ../../document.rst:105 c47e007413e74a7d821f322d11acc7c4 +msgid ":meth:`Document.search_page_for`" +msgstr "" + +#: ../../document.rst:105 14f84e3da71b48c2a311bc02648690ff +msgid "search for a string on a page" +msgstr "ページ上で文字列を検索" + +#: ../../document.rst:106 95cddc5e5594416f9ec6e21277a5b29c +msgid ":meth:`Document.select`" +msgstr "" + +#: ../../document.rst:106 d8d09032311747edaaa7645430822eab +msgid "PDF only: select a subset of pages" +msgstr "PDFのみ:ページのサブセットを選択" + +#: ../../document.rst:107 3819c9e9ac8e43039b32be9d79ac86cc +msgid ":meth:`Document.set_layer_ui_config`" +msgstr "" + +#: ../../document.rst:107 5c70440924d84e419361c5d09b378663 +msgid "PDF only: set OCG visibility temporarily" +msgstr "PDFのみ:一時的にOCGの表示を設定" + +#: ../../document.rst:108 672520376dd241eeb303626ec81b1cfb +msgid ":meth:`Document.set_layer`" +msgstr "" + +#: ../../document.rst:108 276c6b9b34fa43c9bd1e652c4ea30a2a +msgid "PDF only: mass changing OCG states" +msgstr "PDFのみ:OCGステータスを一括変更" + +#: ../../document.rst:109 00eeb62d0c454681bedac3fe485f088a +msgid ":meth:`Document.set_markinfo`" +msgstr "" + +#: ../../document.rst:109 99b60ae09a654f56b55a4ad36936b7d3 +msgid "PDF only: set the MarkInfo values" +msgstr "PDFのみ:MarkInfoの値を設定" + +#: ../../document.rst:110 676fe5ab018c4804be2aaa38b1e204f9 +msgid ":meth:`Document.set_metadata`" +msgstr "" + +#: ../../document.rst:110 ae151ad30546419494fc89573935ccf8 +msgid "PDF only: set the metadata" +msgstr "PDFのみ:メタデータを設定" + +#: ../../document.rst:111 4f71e258fbbc487499c898f7f47bb6ad +msgid ":meth:`Document.set_oc`" +msgstr "" + +#: ../../document.rst:111 c7e1976d016c4281b4ab331ae8e31835 +msgid "PDF only: attach OCG/OCMD to image / form xobject" +msgstr "PDFのみ:画像/フォームオブジェクトにOCG/OCMDを添付" + +#: ../../document.rst:112 bd0743fc6a3b4a12888fa5410dad247f +msgid ":meth:`Document.set_ocmd`" +msgstr "" + +#: ../../document.rst:112 0b6eea3df4e4463f9dfc7363dbf1c750 +msgid "PDF only: create or update an :data:`OCMD`" +msgstr "PDFのみ::data:`OCMD` を作成または更新" + +#: ../../document.rst:113 9b289030e6ba4a408f9b6885bc7d6084 +msgid ":meth:`Document.set_page_labels`" +msgstr "" + +#: ../../document.rst:113 c182346afcb443298f9f547a73aed953 +msgid "PDF only: add/update page label definitions" +msgstr "PDFのみ:ページラベルの定義を追加/更新" + +#: ../../document.rst:114 8351d97b0f314b2796f9f5d44bc02fe7 +msgid ":meth:`Document.set_pagemode`" +msgstr "" + +#: ../../document.rst:114 bbf2b36051ed4d15a1b73d46f03a8bd8 +msgid "PDF only: set the PageMode" +msgstr "PDFのみ:PageModeを設定" + +#: ../../document.rst:115 c9fb88a8b924411abf8880bdd70abff2 +msgid ":meth:`Document.set_pagelayout`" +msgstr "" + +#: ../../document.rst:115 e52062324a8c45e58473a760fed17728 +msgid "PDF only: set the PageLayout" +msgstr "PDFのみ:PageLayoutを設定" + +#: ../../document.rst:116 5d017c37c2e64190b5afb244aa0a482d +msgid ":meth:`Document.set_toc_item`" +msgstr "" + +#: ../../document.rst:116 1e5474262af7458b95cee8ccb84f9f75 +msgid "PDF only: change a single TOC item" +msgstr "PDFのみ:単一のTOCアイテムを変更" + +#: ../../document.rst:117 d0972372c4e946188408fc234abf4e85 +msgid ":meth:`Document.set_toc`" +msgstr "" + +#: ../../document.rst:117 7ef8ccdc56bb4c8a82ed20f39637353d +msgid "PDF only: set the table of contents (TOC)" +msgstr "PDFのみ:目次(TOC)を設定" + +#: ../../document.rst:118 e0944240454d494d918b26f2999d717c +msgid ":meth:`Document.set_xml_metadata`" +msgstr "" + +#: ../../document.rst:118 23fbf8dfa547489fa1108c1f7197ec90 +msgid "PDF only: create or update document XML metadata" +msgstr "PDFのみ:ドキュメントXMLメタデータを作成または更新" + +#: ../../document.rst:119 90fa077517cb4f5faaf602d56fe4b452 +msgid ":meth:`Document.subset_fonts`" +msgstr "" + +#: ../../document.rst:119 70abbf122fb64339a4a88cc0c65664ef +msgid "PDF only: create font subsets" +msgstr "PDFのみ:フォントのサブセットを作成" + +#: ../../document.rst:120 d03dc93f0c7f40818a912b5cc28921ca +msgid ":meth:`Document.switch_layer`" +msgstr "" + +#: ../../document.rst:120 05ae17c693174be8bdcb476bbc15ee2b +msgid "PDF only: activate OC configuration" +msgstr "PDFのみ:OC設定をアクティブ化" + +#: ../../document.rst:121 e86f0f3639d841c783d823849d5a3510 +msgid ":meth:`Document.tobytes`" +msgstr "" + +#: ../../document.rst:121 c43b005dc4d74cccaaaa2ebe30ef6f36 +msgid "PDF only: writes document to memory" +msgstr "PDFのみ:ドキュメントをメモリに書き込む" + +#: ../../document.rst:122 7bbdaa6e17604b1aa022868ede6e58ed +msgid ":meth:`Document.xref_copy`" +msgstr "" + +#: ../../document.rst:122 0782cc7d4ffa48ee978c13d8cac66a78 +msgid "PDF only: copy a PDF dictionary to another :data:`xref`" +msgstr "PDFのみ:PDF辞書を別の :data:`xref` にコピー" + +#: ../../document.rst:123 1791d70719634cce80f3ebab218a636a +msgid ":meth:`Document.xref_get_key`" +msgstr "" + +#: ../../document.rst:123 98454fe4dea24c888a3a52a7c8ae39d3 +msgid "PDF only: get the value of a dictionary key" +msgstr "PDFのみ:辞書キーの値を取得" + +#: ../../document.rst:124 44517590b7b34f6f9ac105b17238c6b9 +msgid ":meth:`Document.xref_get_keys`" +msgstr "" + +#: ../../document.rst:124 0f586fe39fc348849f963a2f4e32fd55 +msgid "PDF only: list the keys of object at :data:`xref`" +msgstr "PDFのみ: :data:`xref` のオブジェクトのキーをリスト" + +#: ../../document.rst:125 e357bb4599284e0ba6461f21fc148392 +msgid ":meth:`Document.xref_object`" +msgstr "" + +#: ../../document.rst:125 32d9ce181e194d9e80b2730b7d06abde +msgid "PDF only: get the definition source of :data:`xref`" +msgstr "PDFのみ::data:`xref` の定義ソースを取得" + +#: ../../document.rst:126 83b882f9fa3d4cf99250d57ea0665bd4 +msgid ":meth:`Document.xref_set_key`" +msgstr "" + +#: ../../document.rst:126 0a9d823219d14cfe804d4ea413f6bc9f +msgid "PDF only: set the value of a dictionary key" +msgstr "PDFのみ:辞書キーの値を設定" + +#: ../../document.rst:127 a37b06e1bd5b4229bdef88d32cbeac18 +msgid ":meth:`Document.xref_stream_raw`" +msgstr "" + +#: ../../document.rst:127 3a9d569e447547cca4d20e0a94cba236 +msgid "PDF only: raw stream source at :data:`xref`" +msgstr "PDFのみ: :data:`xref` での生のストリームソース" + +#: ../../document.rst:128 b3cc5166877a4acca3e0ffbc761e65e0 +msgid ":meth:`Document.xref_xml_metadata`" +msgstr "" + +#: ../../document.rst:128 71e96c67b1ba496d8defc3f57c4df773 +msgid "PDF only: :data:`xref` of XML metadata" +msgstr "PDFのみ: XMLメタデータの :data:`xref` " + +#: ../../document.rst:129 bfd0316e4e03467e83a38140df621535 +msgid ":attr:`Document.chapter_count`" +msgstr "" + +#: ../../document.rst:129 088a1ba017424a9db0042551297143af +msgid "number of chapters" +msgstr "章の数" + +#: ../../document.rst:130 51b6ce42e0a442739c5b4408d080f8ca +msgid ":attr:`Document.FormFonts`" +msgstr "" + +#: ../../document.rst:130 4aed85b9c56045bab5c192da76510387 +msgid "PDF only: list of global widget fonts" +msgstr "PDFのみ:グローバルウィジェットフォントのリスト" + +#: ../../document.rst:131 3837c6499d9d4fa69e6a2e000ac17736 +msgid ":attr:`Document.is_closed`" +msgstr "" + +#: ../../document.rst:131 99c24a73b4a242c891763e488e795fc3 +msgid "has document been closed?" +msgstr "ドキュメントが閉じられていますか?" + +#: ../../document.rst:132 ba745f36d2144eadad4749b56a142f67 +msgid ":attr:`Document.is_dirty`" +msgstr "" + +#: ../../document.rst:132 025cc91b138c4f0eb040a19effe7dc8e +msgid "PDF only: has document been changed yet?" +msgstr "PDFのみ:ドキュメントは変更されましたか?" + +#: ../../document.rst:133 a912e0d667f24bae89113efab3188e8d +msgid ":attr:`Document.is_encrypted`" +msgstr "" + +#: ../../document.rst:133 717818c95e734be4b274062bcfa42492 +msgid "document (still) encrypted?" +msgstr "ドキュメントは(まだ)暗号化されていますか?" + +#: ../../document.rst:134 8013e4a7f7114d7cbd33b4f23e024b66 +msgid ":attr:`Document.is_fast_webaccess`" +msgstr "" + +#: ../../document.rst:134 ada7b0267fcb4090868a8ee233a2b0c0 +msgid "is PDF linearized?" +msgstr "PDFは線形化されていますか?" + +#: ../../document.rst:135 d414ece359ec44aebc0d5eb43f9bdde5 +msgid ":attr:`Document.is_form_pdf`" +msgstr "" + +#: ../../document.rst:135 dccb6d66877a4efb8020aefd510b49c9 +msgid "is this a Form PDF?" +msgstr "これはフォームPDFですか?" + +#: ../../document.rst:136 b9c1714543584987893f0bf7edd6e3f1 +msgid ":attr:`Document.is_pdf`" +msgstr "" + +#: ../../document.rst:136 d358fbd9f9284cd89a851b1a627998ac +msgid "is this a PDF?" +msgstr "これはPDFですか?" + +#: ../../document.rst:137 faf59fd9a8714ab4a3644e04735758ca +msgid ":attr:`Document.is_reflowable`" +msgstr "" + +#: ../../document.rst:137 43005e90fb534126849acc8ada1fa6f4 +msgid "is this a reflowable document?" +msgstr "これはリフローアブルドキュメントですか?" + +#: ../../document.rst:138 073c5717af6e43d5b64ae18539d151bb +msgid ":attr:`Document.is_repaired`" +msgstr "" + +#: ../../document.rst:138 9b833a63b555494399f9abe110658a58 +msgid "PDF only: has this PDF been repaired during open?" +msgstr "PDFのみ:このPDFは開いている間に修復されましたか?" + +#: ../../document.rst:139 ed52f62f7fa541f88c0fb68da623952e +msgid ":attr:`Document.last_location`" +msgstr "" + +#: ../../document.rst:139 35bf606a3272441597d7fb24393a1a19 +msgid "(chapter, pno) of last page" +msgstr "最後のページの(章、pno)" + +#: ../../document.rst:140 21058c5563f64f569a0bba2db35afcc8 +msgid ":attr:`Document.metadata`" +msgstr "" + +#: ../../document.rst:140 2d1a686f0de14e1fbeaaecd2251b8c41 +msgid "metadata" +msgstr "メタデータ" + +#: ../../document.rst:141 8e3e0aae2b0642a2ba808fae85ba8da8 +msgid ":attr:`Document.markinfo`" +msgstr "" + +#: ../../document.rst:141 d614147561964dd6b6edd08df4ffedc1 +msgid "PDF MarkInfo value" +msgstr "PDF MarkInfoの値" + +#: ../../document.rst:142 83722d1eaa87405ea91a811791d95b56 +msgid ":attr:`Document.name`" +msgstr "" + +#: ../../document.rst:142 82012043d2d34b3882dd16bfc94886ff +msgid "filename of document" +msgstr "ドキュメントのファイル名" + +#: ../../document.rst:143 7bcb74efd387441792f2140fb9bb474f +msgid ":attr:`Document.needs_pass`" +msgstr "" + +#: ../../document.rst:143 4f4aec9256394a34982cbfcc59c44209 +msgid "require password to access data?" +msgstr "データにアクセスするにはパスワードが必要ですか?" + +#: ../../document.rst:144 7d539505cdb84ca9bdec8258464eb4e6 +msgid ":attr:`Document.outline`" +msgstr "" + +#: ../../document.rst:144 9dba8b97f25a4e53af0632141b0e9193 +msgid "first `Outline` item" +msgstr "最初のアウトラインアイテム" + +#: ../../document.rst:145 640253eb6de14de6b93e3aa96d705306 +msgid ":attr:`Document.page_count`" +msgstr "" + +#: ../../document.rst:145 649e78a0c36841659c5924efe7fb70d9 +msgid "number of pages" +msgstr "ページ数" + +#: ../../document.rst:146 a761dddfb5a543a4a6087cfa3f42f7e3 +msgid ":attr:`Document.permissions`" +msgstr "" + +#: ../../document.rst:146 15566e532007445b97fd6d9f7cce06f2 +msgid "permissions to access the document" +msgstr "ドキュメントへのアクセス権限" + +#: ../../document.rst:147 2267f744a8c44b51a7a7aee4e957a6f3 +msgid ":attr:`Document.pagemode`" +msgstr "" + +#: ../../document.rst:147 d18b418186ce41e1a5987e00cbca7b5a +msgid "PDF PageMode value" +msgstr "PDF PageModeの値" + +#: ../../document.rst:148 5b93d429d9434d0ab688003e27e28302 +msgid ":attr:`Document.pagelayout`" +msgstr "" + +#: ../../document.rst:148 112638dcb6e54e839ae4b68703fa8568 +msgid "PDF PageLayout value" +msgstr "PDF PageLayoutの値" + +#: ../../document.rst:149 52e94e9c5815485b8b55620442fc2abc +msgid ":attr:`Document.version_count`" +msgstr "" + +#: ../../document.rst:149 ba864d474c3949e3b088dafc28abacd7 +msgid "PDF count of versions" +msgstr "PDFバージョンの数" + +#: ../../document.rst:152 bf54a791d9c948b59738a99781e0cec1 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../document.rst:173 1f4b30f1da9a4efa92e5c69516f674e4 +msgid "Changed in v1.14.13: support `io.BytesIO` for memory documents." +msgstr "v1.14.13 で変更: メモリドキュメント用に `io.BytesIO` をサポート。" + +#: ../../document.rst:174 f63f565c15b54d82957aabb3bbf3343e +msgid "" +"Changed in v1.19.6: Clearer, shorter and more consistent exception " +"messages. File type \"pdf\" is always assumed if not specified. Empty " +"files and memory areas will always lead to exceptions." +msgstr "" +"v1.19.6 で変更: より明確で短く、一貫性のある例外メッセージ。ファイルタイプが指定されていない場合、常にファイルタイプ \"pdf\" " +"が仮定されます。空のファイルとメモリ領域は常に例外を発生させます。" + +#: ../../document.rst:176 78a8e1b559ae4990978f6e91655bd5b4 +msgid "Creates a *Document* object." +msgstr "*Document* オブジェクトを作成します。" + +#: ../../document.rst:178 a6fc1bf94cb3436ab361c7d2304a872e +msgid "With default parameters, a **new empty PDF** document will be created." +msgstr "デフォルトのパラメータを使用すると、**新しい空の PDF** ドキュメントが作成されます。" + +#: ../../document.rst:179 35f9397e7eaa42f786dbe923a8ea01ec +msgid "" +"If *stream* is given, then the document is created from memory and, if " +"not a PDF, either *filename* or *filetype* must indicate its type." +msgstr "" +"*stream* が指定されている場合、ドキュメントはメモリから作成され、PDF でない場合は *filename* または *filetype*" +" のいずれかがそのタイプを示さなければなりません。" + +#: ../../document.rst:180 ab8bbaf8a97c478fa108f84b9541a47f +msgid "" +"If *stream* is `None`, then a document is created from the file given by " +"*filename*. Its type is inferred from the extension. This can be " +"overruled by *filetype.*" +msgstr "" +"*stream* が `None` の場合、*filename* " +"で指定されたファイルからドキュメントが作成されます。そのタイプは拡張子から推測されます。これは *filetype* によって上書きできます。" + +#: ../../document.rst 0286c3f0d5e84dc0abde96c41bd52a3a +#: 03c52ff1d7244706b2337b67fe3dfe51 046f7416d0904cff92295cf2459c8064 +#: 05c6720240e5464b8fb9a280f236107b 0783792c6ba8468dbd7135877c6b6aab +#: 07d21a6f3dce491e991ed06a5b8d8826 0f6085818f6f48cfac79132465bd2f7b +#: 16028dfa956e4a7da0a9fd70653cb766 1843dd57c01648b781ea599eb3412fac +#: 18833d694856455f9fe47e8af27c04fd 1ad1d883c5d5414c8f7446a934af47e3 +#: 2044fc5e6a8f40bdb867c725a1ebdddf 20ebdee863ae4cd182b5fb01f853dd8d +#: 210be495f1cc49bd85f94147ad4327f5 25faf31b63324891bb29150f5c0b509b +#: 27165bf78ca74339b208810610c0b7d2 2779f42946714b57a46bcbc9c2f19e3c +#: 31dc907616304bd3a410bc5e2420c063 383b9ccb35d841eaada8319646ee508f +#: 3893062e45544cb6bfdedb797aafc8b7 3d1aca1483374e8f91f61cdca30e5152 +#: 3fc4da2cda624ab0a1764081fbae174a 42a1bc85bd0949f69cc4ed5e6afd4462 +#: 42b2aa4de02f49f9b0fb88861ec788f0 473dd8ff1d234cbabb07e5d566f918e9 +#: 4a792f49277346b5a0d37cb4c5055737 4e187b3f083347ca946ce1cdff4f2b43 +#: 4f50297466694f2e8ef61164c9841b62 580d4930f51e4238aa065d8cb31e1a4d +#: 64150e7cbaff4d32b37ac31e23f577cd 6635bc28dbb64d098db6f12ba2798faa +#: 6db5fc653bd44f1189830521732e3a6e 6e1e830f79e740ab8497aa151a2a9190 +#: 74183e1a350940d9a155ce5fe51f4fcd 74c5f688a7644e7ba75184a498d70817 +#: 7536230b40a340318d057951aecf8b52 793fa2b077344e8fa6dca7dd619697fa +#: 7ba92ea60d7b415fb150a5599ae9a877 7fca962a57db4d0b9ee43eda5678c2e2 +#: 825aba59da734c128826477a5d9095c9 83bea4e9983e4e99a40f705f2df1fe08 +#: 85f0c817bfd9431ca56c14c7045c5ee5 8d4a54ff783d4c3ab2726512cd950443 +#: 8db92fb7354a4a3b9c87b418c6194dda 8ef8c1cab5a24bcc887442fbc68f44bf +#: 94d85eed3e0648048f19158c6e7baf02 993d96dcd1674abb92525ea003e45f41 +#: 9a7f2a3c8e8d410b974e7bf35a7399f6 a16681604e7649d39e7d719872db8910 +#: a46ef47c4a2b4dc8b6937e85953c9ff7 b385ef7af10a425cb416760f914fe9a2 +#: b3d26df75b6f42e59c2da00c103f2827 bf00aaf2cbe3411ebb820550fe340d7d +#: c4a5136b5094487a8fcd9966b690e1bb c60c5c4a2b8f4d82b7c18f4467fe46fa +#: c73d7869c84c488d868f3d02af62e2a8 c891f3d191ec4631b87e3a1f75e14fd8 +#: ca447ff8ec1246c5a82352eb5124ea89 cb848cc262904393890303a4be62b558 +#: cf7269d90a0042b1a7629cbcc163ca62 d657794be3de4385801678d52afd3d9e +#: e18cd7ece71341edbb4979e84e45339d e249c9403d6a4aeaa4b0a4e9ea7bfa1e +#: e2f0a2c12d494d8987b1daaa22024fec e67af000efde4b9bada19c349f11ef52 +#: eab0a13856ee4ce1b00da5297ce19c05 f02c7057b2764519b7dae97665ded230 +#: f2388a7f8a4546a68df9ab388b27a1a3 f38d711608c54c70888aecfc35eab7e0 +#: f4c7dbd9ec9a47cf8ea2c996d477e4b3 f7bdd9f4f7b74944b93b55fc99703970 +#: f870de0e2c86425687f2b627ea2fcd45 fdbecc7eef214601b8cfd13ce905cb14 +msgid "Parameters" +msgstr "パラメータ" + +#: ../../document.rst:182 344cd30ffab94080bb6edc5d83b91982 +msgid "" +"A UTF-8 string or *pathlib* object containing a file path. The document " +"type is inferred from the filename extension. If not present or not " +"matching :ref:`a supported type`, a PDF document is" +" assumed. For memory documents, this argument may be used instead of " +"`filetype`, see below." +msgstr "" +"ファイルパスを含む UTF-8 文字列または *pathlib* " +"オブジェクト。ドキュメントのタイプはファイル名の拡張子から推測されます。存在しないか、:ref:`サポートされてい`" +" ないタイプに一致しない場合、PDF ドキュメントが仮定されます。メモリドキュメントの場合、`filetype` " +"の代わりにこの引数を使用できます。詳細は以下を参照してください。" + +#: ../../document.rst:184 3c0f27937357412297565715d47494e5 +msgid "" +"A memory area containing a supported document. If not a PDF, its type " +"**must** be specified by either `filename` or `filetype`." +msgstr "" +"(bytes, bytearray, BytesIO) - :ref:`サポートされている` " +"ドキュメントを含むメモリ領域。PDF でない場合、そのタイプは `filename` または `filetype` のいずれかによって指定 " +"**しなければなりません** 。" + +#: ../../document.rst:186 379a0be077fb4537ac23aaf3578475a5 +msgid "" +"A string specifying the type of document. This may be anything looking " +"like a filename (e.g. \"x.pdf\"), in which case MuPDF uses the extension " +"to determine the type, or a mime type like *application/pdf*. Just using " +"strings like \"pdf\" or \".pdf\" will also work. May be omitted for PDF " +"documents, otherwise must match :ref:`a supported document " +"type`." +msgstr "" +"ドキュメントのタイプを指定する文字列。ファイル名のように見えるもの(例: \"x.pdf\")である必要があります。この場合、MuPDF " +"は拡張子を使用してタイプを判断します。*application/pdf* のような mime タイプも使用できます。単に \"pdf\" または " +"\".pdf\" のような文字列を使用することもできます。PDF ドキュメントの場合、省略可能であり、それ以外の場合は " +":ref:`サポートされている` ドキュメントタイプに一致しなければなりません。" + +#: ../../document.rst:188 3d424ff6e21346d28c153e69067e9ce0 +msgid "" +"a rectangle specifying the desired page size. This parameter is only " +"meaningful for documents with a variable page layout (\"reflowable\" " +"documents), like e-books or HTML, and ignored otherwise. If specified, it" +" must be a non-empty, finite rectangle with top-left coordinates (0, 0). " +"Together with parameter *fontsize*, each page will be accordingly laid " +"out and hence also determine the number of pages." +msgstr "" +"希望のページサイズを指定する矩形。このパラメータは可変ページレイアウト(\"reflowable\" ドキュメント)を持つドキュメント(電子書籍や" +" HTML のようなもの)にのみ意味があり、それ以外の場合は無視されます。指定される場合、非空で有限な矩形で、左上座標 (0, 0) " +"を持っていなければなりません。*fontsize* パラメータと一緒に、各ページのレイアウトが適切に行われ、したがってページ数も決定されます。" + +#: ../../document.rst:190 0fe87d9dfe03401083c99ff847b81951 +msgid "" +"may used together with ``height`` as an alternative to ``rect`` to specify " +"layout information." +msgstr "レイアウト情報を指定するための ``rect`` の代替として、 ``height`` と一緒に使用できます。" + +#: ../../document.rst:192 f416d1dd95eb4a53a918ba5ba81f5216 +msgid "" +"may used together with ``width`` as an alternative to ``rect`` to specify " +"layout information." +msgstr "レイアウト情報を指定するための ``rect`` の代替として、 ``width`` と一緒に使用できます。" + +#: ../../document.rst:194 dd1d7138ed3e4c2d9eaa21674b14f35b +msgid "" +"the default :data:`fontsize` for reflowable document types. This " +"parameter is ignored if none of the parameters ``rect`` or ``width`` and " +"``height`` are specified. Will be used to calculate the page layout." +msgstr "" +"可変ページドキュメントタイプのデフォルト :data:`fontsize`。 ``rect`` または ``width`` と ``height`` " +"のいずれのパラメータも指定されていない場合、このパラメータは無視されます。ページレイアウトを計算するために使用されます。" + +#: ../../document.rst 45cc1324293c427382027b70b78e7c0b +#: bb4a3e5c2a9d40fb8236630a23be1df8 +msgid "Raises" +msgstr "" + +#: ../../document.rst:196 bd848d5064054375b5d7a8697b9abaf3 +msgid "if the *type* of any parameter does not conform." +msgstr "任意のパラメータの *型* が準拠していない場合。" + +#: ../../document.rst:197 f5ecd504e9b945bfb5f6859484d5d7cd +msgid "" +"if the file / path cannot be found. Re-implemented as subclass of " +"`RuntimeError`." +msgstr "ファイル/パスが見つからない場合。`RuntimeError` のサブクラスとして再実装されました。" + +#: ../../document.rst:198 5b186861f3ac4071949004ebfdbec056 +msgid "" +"if the file / path is empty or the `bytes` object in memory has zero " +"length. A subclass of `FileDataError` and `RuntimeError`." +msgstr "" +"ファイル/パスが空であるか、メモリ内の `bytes` オブジェクトの長さがゼロの場合。`FileDataError` および " +"`RuntimeError` のサブクラス。" + +#: ../../document.rst:199 8c783cf4e1c84cfaab2b8a5416ddeaa5 +msgid "if an unknown file type is explicitly specified." +msgstr "明示的に未知のファイルタイプが指定された場合。" + +#: ../../document.rst:200 b85280cd01e24f38a80f854c40667360 +msgid "" +"if the document has an invalid structure for the given type -- or is no " +"file at all (but e.g. a folder). A subclass of `RuntimeError`." +msgstr "" +"ドキュメントが指定されたタイプに対して無効な構造を持っているか、ファイルではない場合(たとえば、フォルダの場合)。`RuntimeError` " +"のサブクラス。" + +#: ../../document.rst 09c6af5ead994ae088de1afcd7114ad0 +#: 0a7e2fb9ac974a8fa1330b9d42207434 257bba4be3d44207983043b9626c397e +#: 36b2976e0d2b45ee9a50e99d7e882e83 36e8fc90b1e0456482505ceb99cf6ed9 +#: 3c6769b601244a478a5bc2838cb67f6d 4716327cb45b4e6bb680a5820f8562f8 +#: 48b9103297964eb0bf17c066ddf12681 519da62766224aeca8ccb66a3de585d4 +#: 5724bf2ed05e4ebfac74bc656e4e0172 57a6dfd67f4448e9aac0447d22a9251b +#: 5853660403604dc0af9621499e243d50 5cd6a1be97a942798863ae963f79287b +#: 5f4d7f912cb34a489338493a13dbc3d9 5f9cfc7d64b34d9b9ce8c4f2335013e2 +#: 5fd74513ad25474c8d77d933c824b758 5fe85ad0ea55466b9bdddd9058a8ea28 +#: 6655b8ad15d54d928cf17ebd729cf7c2 66a16f1ad5784904b55d69d304cd9995 +#: 6bd3b715ba524ca2ab587261ba80e3d1 744403beaa1f4aa08046532091ee1f37 +#: 8015d61fb433457bba996791d597fcd2 88b73a4f9d994449a7f2808c2e9d68b1 +#: a876d69563074060853778a84f4600b6 a941ad0e330543d4a550b95e93ff04ad +#: b55b9e06ead3430fb403923249886ffc bc650397c19f4e04a79ccab5b3c03ddc +#: c8556ea17b2748c2b934345cf84afb5a ce7c38197ad948fdac4255ab831ea6b7 +#: d3db6ea957ef4ea1b0e5fe07ae90a242 d497765c20974eae9b34c0974c91f495 +#: d4f7245f363a4a7593a3349d3a71f3c0 d529ca24cf66418097369c4790d0a246 +#: d5786ad98c5d4c809046fe5de59f6c91 d7f5d3d25ad04aec84d923b8b622b503 +#: d9b6568ae90a4b1c8fbd7cdb6d9141a8 db4cc76afec44fc082c1bf3de1d5f02a +#: e30ca0a2a0e845cc8f88699400d963c8 e960957b179645e9825de9c876d889bf +#: eb87921d62f9444ca0d9ede840c95da6 eef4a580032b44bc9e8ef7af4343790b +#: f5bc242fb54e4485b66120d4021dd2c6 f767fb5a683d4191b5a808d3fbcd6d1d +#: fb3d343f355540518f482fbeb51a4e94 fdfd0e3cfcc9476f9cd14657fbb76237 +msgid "Returns" +msgstr "" + +#: ../../document.rst:202 3bc9fad57b344868b68081705e9374b4 +msgid "" +"A document object. If the document cannot be created, an exception is " +"raised in the above sequence. Note that PyMuPDF-specific exceptions, " +"`FileNotFoundError`, `EmptyFileError` and `FileDataError` are intercepted" +" if you check for `RuntimeError`. In case of problems you can see more " +"detail in the internal messages store: " +"`print(pymupdf.TOOLS.mupdf_warnings())` (which will be emptied by this " +"call, but you can also prevent this -- consult " +":meth:`Tools.mupdf_warnings`)." +msgstr "" + +#: ../../document.rst:202 68caa9eb7f9f4618854bff38104f0b7a +msgid "" +"A document object. If the document cannot be created, an exception is " +"raised in the above sequence. Note that PyMuPDF-specific exceptions, " +"`FileNotFoundError`, `EmptyFileError` and `FileDataError` are intercepted" +" if you check for `RuntimeError`." +msgstr "" +"ドキュメントオブジェクト。ドキュメントを作成できない場合、上記の順序で例外が発生します。PyMuPDF固有の例外、`FileNotFoundError`" +" 、`EmptyFileError` 、および `FileDataError` は、`RuntimeError` " +"をチェックする場合にキャッチされます。" + +#: ../../document.rst:204 abe6b00f2b0e41d8afec662814791054 +msgid "" +"In case of problems you can see more detail in the internal messages " +"store: `print(pymupdf.TOOLS.mupdf_warnings())` (which will be emptied by " +"this call, but you can also prevent this -- consult " +":meth:`Tools.mupdf_warnings`)." +msgstr "" +"問題が発生した場合、内部メッセージストアで詳細を確認できます: `print(pymupdf.TOOLS.mupdf_warnings())` " +"(この呼び出しによって空にされますが、これを防ぐこともできます - :meth:`Tools.mupdf_warnings` " +"を参照してください)。" + +#: ../../document.rst:206 c3e7fc0a3b1c40c8b64e20f41a52f890 +msgid "" +"Not all document types are checked for valid formats already at open " +"time. Raster images for example will raise exceptions only later, when " +"trying to access the content. Other types (notably with non-binary " +"content) may also be opened (and sometimes **accessed**) successfully -- " +"sometimes even when having invalid content for the format:" +msgstr "" +"すべてのドキュメントタイプがオープン時に有効なフォーマットで確認されるわけではありません。例えば、ラスター画像はコンテンツにアクセスしようとした際に例外を発生させることがあります。他のタイプ(特にバイナリでないコンテンツを持つもの)は、有効なコンテンツを持たない場合でも成功してオープンされ(そして時に" +" **アクセスされる** こともあります):" + +#: ../../document.rst:208 098d3641b2da4c6e97432afcfcedd2d2 +msgid "" +"HTM, HTML, XHTML: **always** opened, `metadata[\"format\"]` is \"HTML5\"," +" resp. \"XHTML\"." +msgstr "" +"HTM、HTML、XHTML: **常に** オープンされ、 `metadata[\"format\"]` は \"HTML5\" または " +"\"XHTML\" です。" + +#: ../../document.rst:209 cb5779d64b3941b4b8de3a3073313a4f +msgid "XML, FB2: **always** opened, `metadata[\"format\"]` is \"FictionBook2\"." +msgstr "XML、FB2: **常に** オープンされ、 `metadata[\"format\"]` は \"FictionBook2\" です。" + +#: ../../document.rst:211 37cc676e2ace4f47b00ccb0207a83536 +msgid "Overview of possible forms, note: `open` is a synonym of `Document`::" +msgstr "可能なフォームの概要、注: `open` は `Document` の同義語です::" + +#: ../../document.rst:228 56b467bff2714e36b4cd332b444fa962 +msgid "" +"Raster images with a wrong (but supported) file extension **are no " +"problem**. MuPDF will determine the correct image type when file " +"**content** is actually accessed and will process it without complaint." +msgstr "" +"サポートされているが正しくないファイル拡張子を持つラスター画像は **問題ありません** 。MuPDFは、ファイルの実際の **内容** " +"がアクセスされると正しい画像タイプを判断し、何もクレームをつけずに処理します。" + +#: ../../document.rst:230 0241155cc4cd4ae4af4e0094c55ac867 +msgid "" +"The Document class can be also be used as a **context manager**. Exiting the content manager will close the document automatically." +msgstr "Document クラスは **コンテキストマネージャ** としても使用できます。終了時に、ドキュメントは自動的に閉じられます。" + +#: ../../document.rst:246 ../../document.rst:256 ../../document.rst:316 +#: ../../document.rst:362 13bc087318584ac2a19c9ab8c10ec7a2 +#: 17e448ab020f4b4facd306f956642566 56384202f30e46618912166e026fb4b1 +#: 83d146659e664858863f57dea0164f84 +msgid "New in v1.18.4" +msgstr "v1.18.4 で新規追加" + +#: ../../document.rst:248 2e54d119ca394575bb3575941fa68307 +msgid "" +"Return the cross reference number of an :data:`OCG` or :data:`OCMD` " +"attached to an image or form xobject." +msgstr "画像またはフォームXObjectに添付された :data:`OCG` または :data:`OCMD` のクロスリファレンス番号を返します。" + +#: ../../document.rst:250 8e725470595447c789bc07dc930f3879 +msgid "" +"the :data:`xref` of an image or form xobject. Valid such cross reference " +"numbers are returned by :meth:`Document.get_page_images`, resp. " +":meth:`Document.get_page_xobjects`. For invalid numbers, an exception is " +"raised." +msgstr "" +"画像またはフォームXObjectの :data:`xref` " +"。有効なクロスリファレンス番号は、:meth:`Document.get_page_images` または " +":meth:`Document.get_page_xobjects` で返されます。無効な番号の場合、例外が発生します。" + +#: ../../document.rst 0a21b790773a4090bfc5271474a195cb +#: 10a83fa3eae840f9833eef241c187305 16f1acc2440a4c35993643d150c8c720 +#: 1da2b6257594426fb6f2128a9813217b 1ea1c881e2764afebb28bad05bb6f36a +#: 28019a4db4f44523a66404894b3d6284 2b101e8a7db1403e912fbebaf420dae3 +#: 326ca52e3c994ccbb396508d1da76b36 44c3a0a240f5440fb0d5739e1801471f +#: 44d067fdc96c46d295e7a9ab89bfa2ee 45da04136a514f089a6a57d98bcbfd6c +#: 5200762a664b4ecd9a1ba012cbbd7423 6307a143471a4627ac611101c51ccf81 +#: 707ddca3b0bc44a1b6a33e48ac2e1d75 74e17f1e1afe498881359c88ddc50cd5 +#: 784cb709707646a098224af800f0988a 79d859f2cef64b18be7b237003d52167 +#: 8211be426df143fba7835fb8a0feb445 844d58b3a3c14fb5afab11871d16cec0 +#: 8fd4c7db1f194c61b51e9a879f672de2 a0d6b2eff5d2470c8723accb1440e09a +#: a1d8dff87f4c4a3897dc6e0768a58e6d aa933b87d78145e3baef1ab3ca03d20f +#: abf811ba9c234b53a7c045649c4cf38b b0378c71d9564599a6752f6e65118889 +#: b2a81a6bbfdf4ed38f420a88772a122c b382338bd6514c78860e7e8477d135e8 +#: b3e0e99b68644128a50db0673fcb5cb3 d8c7da52fe5c44c1a5baf752140a73df +#: da29f51e370d4ed5bfe0b9509be69005 e1a11efad7a940f0bd4580da962ccb09 +#: e579746f01e145139f6a24613fb4abf3 ea602632da4a4f609cb240376c2bdba6 +#: ec1608b88dcb42c1b9cf558406991504 f7975af7da1343aa96a39a4525d0dc64 +#: f89c1d3148004904986ce3e8036e2e36 +msgid "Return type" +msgstr "戻り値の型" + +#: ../../document.rst:252 b8beca35910045089e4ce3f0c9b284a8 +msgid "" +"the cross reference number of an optional contents object or zero if " +"there is none." +msgstr "オプションコンテンツオブジェクトのクロスリファレンス番号、または存在しない場合はゼロ。" + +#: ../../document.rst:258 9bc5b4bd65d54421ac34b31abe50a92f +msgid "" +"If :data:`xref` represents an image or form xobject, set or remove the " +"cross reference number *ocxref* of an optional contents object." +msgstr "" +":data:`xref` が画像またはフォームXObjectを表す場合、オプションコンテンツオブジェクトのクロスリファレンス番号 *ocxref*" +" を設定または削除します。" + +#: ../../document.rst:260 2fde6ff5ddd54ba78ec4b6ccbd2d11ff +msgid "" +"the :data:`xref` of an image or form xobject [#f5]_. Valid such cross " +"reference numbers are returned by :meth:`Document.get_page_images`, resp." +" :meth:`Document.get_page_xobjects`. For invalid numbers, an exception is" +" raised." +msgstr "" +"画像またはフォームXObjectの :data:`xref` " +"[#f5]_。有効なクロスリファレンス番号は、:meth:`Document.get_page_images` または " +":meth:`Document.get_page_xobjects` で返されます。無効な番号の場合、例外が発生します。" + +#: ../../document.rst:261 fd703d6957164c338beff42d52bab016 +msgid "" +"the :data:`xref` number of an :data:`OCG` / :data:`OCMD`. If not zero, an" +" invalid reference raises an exception. If zero, any OC reference is " +"removed." +msgstr "" +":data:`OCG` / :data:`OCMD` の :data:`xref` " +"番号。ゼロでない場合、無効な参照は例外を発生させます。ゼロの場合、任意のOC参照が削除されます。" + +#: ../../document.rst:266 ../../document.rst:276 ../../document.rst:287 +#: ../../document.rst:299 ../../document.rst:373 ../../document.rst:385 +#: ../../document.rst:407 ../../document.rst:425 ../../document.rst:441 +#: 1ebb0dc696424238a5358253445a56d1 239d00cc10df4c3f9dcb020cbaec1a87 +#: 4329c49ae53c46999d95685858874053 4ef034b06e1e4159a4ad268f1e850cbd +#: 6d5e2a896148412db20e40813655a4ae 88730381f1674b4cabdaead24ba6242f +#: a231b15b2bed4928a31a3699dd40064e bb287da011604799865a06f7df9d20c7 +#: dff1d832e6514290be48c737ca4e782c +msgid "New in v1.18.3" +msgstr "v1.18.3 で新規追加" + +#: ../../document.rst:268 51a7ed15e0a34682892b1432d85babbb +msgid "" +"Show optional layer configurations. There always is a standard one, which" +" is not included in the response." +msgstr "オプションのレイヤー構成を表示します。常に標準のものが存在し、それは応答に含まれていません。" + +#: ../../document.rst:278 374ab6f84e2447acb856e3165482ffd9 +msgid "" +"Add an optional content configuration. Layers serve as a collection of ON" +" / OFF states for optional content groups and allow fast visibility " +"switches between different views on the same document." +msgstr "" +"オプションのコンテンツ構成を追加します。レイヤーはオプションコンテンツグループのON / " +"OFFの状態のコレクションとして機能し、同じドキュメントの異なるビュー間での高速な表示切り替えを可能にします。" + +#: ../../document.rst:280 35814160d4494e69a130e8185865d792 +msgid "arbitrary name." +msgstr "任意の名前。" + +#: ../../document.rst:281 4af6ab310ec64faba5a2ccf8a57b6233 +msgid "(optional) creating software." +msgstr "(オプション)作成ソフトウェア。" + +#: ../../document.rst:282 7c5cfab6ddf142e681e86769706ba6de +msgid "" +"a sequence of OCG :data:`xref` numbers which should be set to ON when " +"this layer gets activated. All OCGs not listed here will be set to OFF." +msgstr "" +"このレイヤーがアクティブになったときにONに設定されるOCG :data:`xref` " +"番号のシーケンス。ここでリストされていないすべてのOCGはOFFに設定されます。" + +#: ../../document.rst:289 a64b197f2f304289b326fb6dbacd1286 +msgid "" +"Switch to a document view as defined by the optional layer's " +"configuration number. This is temporary, except if established as " +"default." +msgstr "オプションレイヤーの構成番号によって定義されたドキュメントビューに切り替えます。これは一時的なものであり、デフォルトとして確立されていない限り、一時的なものです。" + +#: ../../document.rst:291 6f1f64b1fcb04da78b1c21e3e7636f80 +msgid "config number as returned by :meth:`Document.layer_configs`." +msgstr ":meth:`Document.layer_configs` によって返される構成番号。" + +#: ../../document.rst:292 c7f0ded359bf4871ba2ea0aec5a7371d +msgid "make this the default configuration." +msgstr "これをデフォルト構成にします。" + +#: ../../document.rst:294 02c3213ae6284cf7ade2a43256e7984e +msgid "" +"Activates the ON / OFF states of OCGs as defined in the identified layer." +" If ``as_default=True``, then additionally all layers, including the " +"standard one, are merged and the result is written back to the standard " +"layer, and **all optional layers are deleted**." +msgstr "" +"識別されたレイヤーで定義されたOCGのON / OFFの状態をアクティブにします。 ``as_default=True`` " +"の場合、追加で、標準のレイヤーを含むすべてのレイヤーがマージされ、結果が標準レイヤーに書き込まれ、 " +"**すべてのオプションレイヤーが削除されます** 。" + +#: ../../document.rst:301 356c8eaa49a949d8aef9fd5cc8a4c65c +msgid "" +"Add an optional content group. An OCG is the most important unit of " +"information to determine object visibility. For a PDF, in order to be " +"regarded as having optional content, at least one OCG must exist." +msgstr "オプションコンテンツグループを追加します。OCGはオブジェクトの表示を決定するための最も重要な情報単位です。PDFでは、オプションコンテンツとして扱われるためには、少なくとも1つのOCGが存在する必要があります。" + +#: ../../document.rst:303 90f3ea8909be4feaa40749f018f1d3a9 +msgid "arbitrary name. Will show up in supporting PDF viewers." +msgstr "任意の名前。サポートするPDFビューアに表示されます。" + +#: ../../document.rst:304 7f590f7231744e01b5571c95e86286d4 +msgid "layer configuration number. Default -1 is the standard configuration." +msgstr "レイヤー構成番号。デフォルトは-1で、標準構成です。" + +#: ../../document.rst:305 61a3bf7b996747b9a65344923ea9e9a8 +msgid "standard visibility status for objects pointing to this OCG." +msgstr "このOCGを指すオブジェクトの標準の表示状態。" + +#: ../../document.rst:306 ce09ba3d03f34b4283394531db465e0f +msgid "" +"a string or list of strings declaring the visibility intents. There are " +"two PDF standard values to choose from: \"View\" and \"Design\". Default " +"is \"View\". Correct **spelling is important**." +msgstr "" +"表示意図を宣言する文字列または文字列のリスト。PDF標準の2つの値から選択できます: \"View\" と \"Design\"。デフォルトは " +"\"View\" です。 **正確なスペルが重要です** 。" + +#: ../../document.rst:307 fc4ef2e67f69442fa039e22cdf455121 +msgid "" +"another influencer for OCG visibility. This will become part of the OCG's" +" `/Usage` key. There are two PDF standard values to choose from: " +"\"Artwork\" and \"Technical\". Default is \"Artwork\". Please only change" +" when required." +msgstr "" +"OCGの表示に影響を与えるもう一つの要因。これはOCGの `/Usage` キーの一部になります。PDF標準の2つの値から選択できます: " +"\"Artwork\" と \"Technical\"。デフォルトは \"Artwork\" です。必要な場合にのみ変更してください。" + +#: ../../document.rst:309 f5d476306bd44db4a28019e3a0b51898 +msgid "" +":data:`xref` of the created OCG. Use as entry for `oc` parameter in " +"supporting objects." +msgstr "作成されたOCGの :data:`xref` 。サポートオブジェクトの `oc` パラメータのエントリとして使用します。" + +#: ../../document.rst:311 4baf97664df64c3a98e3561af570ae84 +msgid "" +"Multiple OCGs with identical parameters may be created. This will not " +"cause problems. Garbage option 3 of :meth:`Document.save` will get rid of" +" any duplicates." +msgstr "" +"同一のパラメータを持つ複数のOCGを作成することができます。これは問題を引き起こしません。:meth:`Document.save` " +"のゴミオプション3を使用すれば、重複を削除できます。" + +#: ../../document.rst:318 db5bd12d3dbc420087d12d833f9c598e +msgid "" +"Create or update an :data:`OCMD`, **Optional Content Membership " +"Dictionary.**" +msgstr ":data:`OCMD` ( **Optional Content Membership Dictionary** )を作成または更新します。" + +#: ../../document.rst:320 0077e90b93324ba88c115d6ee3d5ac5c +msgid ":data:`xref` of the OCMD to be updated, or 0 for a new OCMD." +msgstr "更新するOCMDの :data:`xref` 、または新しいOCMDの場合は0" + +#: ../../document.rst:321 ac78f73e99864584b9fa09f88291a2cd +msgid "a sequence of :data:`xref` numbers of existing :data:`OCG` PDF objects." +msgstr "既存の :data:`OCG` PDFオブジェクトの :data:`xref` 番号のシーケンス。" + +#: ../../document.rst:322 de566168b1414d0a8987e77d28482d95 +msgid "" +"one of \"AnyOn\" (default), \"AnyOff\", \"AllOn\", \"AllOff\" (mixed or " +"lower case)." +msgstr "\"AnyOn\"(デフォルト)、\"AnyOff\"、\"AllOn\"、\"AllOff\"のいずれか。大文字小文字は区別されません。" + +#: ../../document.rst:323 b620c7ce19d049e781343d4fdd974df6 +msgid "" +"a \"visibility expression\". This is a list of arbitrarily nested other " +"lists -- see explanation below. Use as an alternative to the combination " +"*ocgs* / *policy* if you need to formulate more complex conditions." +msgstr "" +"\"表示条件式\"。これは他のリストを任意に入れ子にしたリストです - 説明は以下を参照してください。より複雑な条件を定式化する必要がある場合、 " +"*ocgs* / *policy* の組み合わせの代替として使用します。" + +#: ../../document.rst:325 2a5a3c59ac1b46699cbf05748642b252 +msgid "" +":data:`xref` of the OCMD. Use as `oc=xref` parameter in supporting " +"objects, and respectively in :meth:`Document.set_oc` or " +":meth:`Annot.set_oc`." +msgstr "" +"OCMDの :data:`xref` 。サポートオブジェクトの `oc=xref` " +"パラメータとして使用し、:meth:`Document.set_oc` または :meth:`Annot.set_oc` にもそれぞれ使用します。" + +#: ../../document.rst:329 f1338e535b114a4094ce381c9474405f +msgid "" +"Like an OCG, an OCMD has a visibility state ON or OFF, and it can be used" +" like an OCG. In contrast to an OCG, the OCMD state is determined by " +"evaluating the state of one or more OCGs via special forms of **boolean " +"expressions.** If the expression evaluates to true, the OCMD state is ON " +"and OFF for false." +msgstr "" +"OCGと同様に、OCMDには表示状態ONまたはOFFがあり、OCGのように使用できます。OCGとは異なり、OCMDの状態は **ブール式** " +"の特別な形式を使用して1つ以上のOCGの状態を評価することによって決定されます。式がtrueに評価される場合、OCMDの状態はONで、falseに評価される場合はOFFです。" + +#: ../../document.rst:331 dd33d0294d674e2a87fe2c44817c8eb5 +msgid "There are two ways to formulate OCMD visibility:" +msgstr "OCMDの表示を定式化する方法は2つあります:" + +#: ../../document.rst:333 a5477a5b28504196b445183f988a735f +msgid "" +"Use the combination of *ocgs* and *policy*: The *policy* value is " +"interpreted as follows:" +msgstr "*ocgs* と *policy* の組み合わせを使用する: *policy* の値は次のように解釈されます:" + +#: ../../document.rst:335 fc3e9f29df53403f82d22b7de723acdc +msgid "AnyOn -- (default) true if at least one OCG is ON." +msgstr "(デフォルト) 少なくとも1つのOCGがONの場合、true。" + +#: ../../document.rst:336 539a3f619a49428692f8e63ddf0fe484 +msgid "AnyOff -- true if at least one OCG is OFF." +msgstr "少なくとも1つのOCGがOFFの場合、true。" + +#: ../../document.rst:337 0fe13a500cc64a62a92dd7c5b643b9d6 +msgid "AllOn -- true if all OCGs are ON." +msgstr "すべてのOCGがONの場合、true。" + +#: ../../document.rst:338 008a93b661e449598b46988b85ae73f0 +msgid "AllOff -- true if all OCGs are OFF." +msgstr "すべてのOCGがOFFの場合、true。" + +#: ../../document.rst:340 6da86fb0daa44840bc4b416a4a6ff84c +msgid "" +"Suppose you want two PDF objects be displayed exactly one at a time (if " +"one is ON, then the other one must be OFF):" +msgstr "2つのPDFオブジェクトを、必ず1つずつ表示するようにしたい場合(1つがONの場合、他の1つはOFFにする必要があります):" + +#: ../../document.rst:342 ab33f5968f234adba5e93312924f2004 +msgid "" +"Solution: use an **OCG** for object 1 and an **OCMD** for object 2. " +"Create the OCMD via `set_ocmd(ocgs=[xref], policy=\"AllOff\")`, with the " +":data:`xref` of the OCG." +msgstr "" +"解決策: オブジェクト1用のOCGとオブジェクト2用のOCMDを使用します。 OCMDは `set_ocmd(ocgs=[xref], " +"policy=\"AllOff\")` を使用して作成し、OCGの :data:`xref` を指定します。" + +#: ../../document.rst:344 0ac3735619df4bc388e5ab64ff7a7fee +msgid "" +"Use the **visibility expression** *ve*: This is a list of two or more " +"items. The **first item** is a logical keyword: one of the strings " +"**\"and\"**, **\"or\"**, or **\"not\"**. The **second** and all " +"subsequent items must either be an integer or another list. An integer " +"must be the :data:`xref` number of an OCG. A list must again have at " +"least two items starting with one of the boolean keywords. This syntax is" +" a bit awkward, but quite powerful:" +msgstr "" +"**表示条件式** *ve* を使用する: これは2つ以上のアイテムから成るリストです。 **最初のアイテム** は論理キーワードで、文字列 " +"**\"and\"** 、**\"or\"** 、または **\"not\"** " +"のいずれかです。2番目以降のアイテムは整数または別のリストである必要があります。整数はOCGの :data:`xref` " +"番号でなければなりません。リストは再び少なくとも2つのアイテムから始まり、ブールキーワードのいずれかで始まる必要があります。この構文はやや厄介ですが、非常に強力です:" + +#: ../../document.rst:346 6cf5a382eb1040d9b9a6423cabc513ca +msgid "Each list must start with a logical keyword." +msgstr "各リストは論理キーワードで始まる必要があります。" + +#: ../../document.rst:347 a526e683a21846e6b38ff2825b77627f +msgid "" +"If the keyword is a **\"not\"**, then the list must have exactly two " +"items. If it is **\"and\"** or **\"or\"**, any number of other items may " +"follow." +msgstr "" +"キーワードが **\"not\"** の場合、リストは正確に2つのアイテムを持たなければなりません。 **\"and\"** または " +"**\"or\"** の場合、その後にいくつでも他のアイテムが続くことができます。" + +#: ../../document.rst:348 23524270e63d4257abebc4ab7d8d67a4 +msgid "" +"Items following the logical keyword may be either integers or again a " +"list. An *integer* must be the xref of an OCG. A *list* must conform to " +"the previous rules." +msgstr "" +"論理キーワードの後に続くアイテムは、整数または再びリストである必要があります。 *整数* はOCGのxref番号でなければなりません。 *リスト*" +" は前述のルールに従う必要があります。" + +#: ../../document.rst:350 85bca83dab23408e8a01ce2ef12bd648 +msgid "**Examples:**" +msgstr "**例:** " + +#: ../../document.rst:352 3475135ba7d44691ac3b4e166c84f12e +msgid "" +"`set_ocmd(ve=[\"or\", 4, [\"not\", 5], [\"and\", 6, 7]])`. This delivers " +"ON if the following is true: **\"4 is ON, or 5 is OFF, or 6 and 7 are " +"both ON\"**." +msgstr "" +"`set_ocmd(ve=[\"or\", 4, [\"not\", 5], [\"and\", 6, 7]])` " +"。これは次の条件がtrueの場合にONを返します: **\"4がON、または5がOFF、または6と7が両方ON** \"。" + +#: ../../document.rst:353 7666a329e9ed406da01627bd493da505 +msgid "" +"`set_ocmd(ve=[\"not\", xref])`. This has the same effect as the OCMD " +"example created under 1." +msgstr "`set_ocmd(ve=[\"not\", xref])` 。これは1で作成されたOCMDの例と同じ効果があります。" + +#: ../../document.rst:355 2d0514e36d0a49f084fdfe9c3f092459 +msgid "" +"For more details and examples see page 224 of :ref:`AdobeManual`. Also do" +" have a look at example scripts `here `_." +msgstr "" +"詳細と例については、 :ref:`AdobeManual` の224ページを参照してください。また、`こちら " +"`_ の例のスクリプトもご覧いただけます。" + +#: ../../document.rst:357 46760dbbe40c4a8e8a1ea0e9becd7ca0 +msgid "" +"Visibility expressions, `/VE`, are part of PDF specification version 1.6." +" So not all PDF viewers / readers may already support this feature and " +"hence will react in some standard way for those cases." +msgstr "" +"表示条件式 `/VE` " +"はPDF仕様バージョン1.6の一部です。したがって、すべてのPDFビューア/リーダーがすでにこの機能をサポートしているわけではなく" + +#: ../../document.rst:364 ee5a1a0e82be4360bc7870730a4c0c42 +msgid "Retrieve the definition of an :data:`OCMD`." +msgstr ":data:`OCMD` の定義を取得します。" + +#: ../../document.rst:366 2e78b0f033d94f7a8ad06d69691f107d +msgid "the :data:`xref` of the OCMD." +msgstr ":data:`xref` (int)- OCMDのxref。" + +#: ../../document.rst:368 07494fa965de4c8ab089d1e4b59f3130 +msgid "a dictionary with the keys :data:`xref`, *ocgs*, *policy* and *ve*." +msgstr ":data:`xref` 、*ocgs* 、*policy*、*ve* のキーを持つ辞書" + +#: ../../document.rst:375 73714edbf6c5428388be75bc838a65af +msgid "" +"List of optional content groups by status in the specified configuration." +" This is a dictionary with lists of cross reference numbers for OCGs that" +" occur in the arrays `/ON`, `/OFF` or in some radio button group " +"(`/RBGroups`)." +msgstr "" +"指定された構成内のステータス別オプションコンテンツグループのリスト。これは、OCGsのクロスリファレンス番号のリストを持つ辞書で、`/ON` " +"、`/OFF` 、またはラジオボタングループ (`/RBGroups`) のいずれかに出現するOCGsに対応しています。" + +#: ../../document.rst:377 75befc4dc3314c519e48b236807ec818 +msgid "the configuration layer (default is the standard config layer)." +msgstr "構成レイヤー(デフォルトは標準の構成レイヤー)。" + +#: ../../document.rst:387 5e3dd6212c8649359a28d9a9dde70e8a +msgid "Changed in v1.22.5: Support list of *locked* OCGs." +msgstr "バージョン1.22.5で変更: *ロックされた* OCGのリストをサポート。" + +#: ../../document.rst:389 830da4a5e3e947a7984cfa8af2405f40 +msgid "" +"Mass status changes of optional content groups. **Permanently** sets the " +"status of OCGs." +msgstr "オプションコンテンツグループの大量ステータス変更。OCGのステータスを **永続的に** 設定します。" + +#: ../../document.rst:391 8977a0c07e1f4b3984d013a61f77cdf7 +msgid "desired configuration layer, choose -1 for the default one." +msgstr "希望の構成レイヤー、デフォルトのものには-1を選択します。" + +#: ../../document.rst:392 aad2d8c73ab048cda5faaf2597d11459 +msgid "" +"list of :data:`xref` of OCGs to set ON. Replaces previous values. An " +"empty list will cause no OCG being set to ON anymore. Should be specified" +" if `basestate=\"ON\"` is used." +msgstr "" +"ONに設定するOCGの :data:`xref` " +"のリスト。以前の値を置換します。空のリストはもうOCGをONに設定しなくなります。`basestate=\"ON\"` " +"が使用される場合は指定する必要があります。" + +#: ../../document.rst:393 bf18370943344aff81bb71e9a7bf8fdb +msgid "" +"list of :data:`xref` of OCGs to set OFF. Replaces previous values. An " +"empty list will cause no OCG being set to OFF anymore. Should be " +"specified if `basestate=\"OFF\"` is used." +msgstr "" +"OFFに設定するOCGの :data:`xref` のリスト。以前の値を置換します。空のリストはもうOCGをOFFに設定しなくなります。 " +"`basestate=\"OFF\"` が使用される場合は指定する必要があります。" + +#: ../../document.rst:394 d10f9c3218b84dd0ab61c593cfeff50e +msgid "" +"state of OCGs that are not mentioned in *on* or *off*. Possible values " +"are \"ON\", \"OFF\" or \"Unchanged\". Upper / lower case possible." +msgstr "" +"*on* または *off* " +"で言及されていないOCGの状態。可能な値は「ON」、「OFF」または「Unchanged」です。大文字/小文字を区別できます。" + +#: ../../document.rst:395 8d3be651951a406893b3b70d1eca0912 +msgid "" +"a list of lists. Replaces previous values. Each sublist should contain " +"two or more OCG xrefs. OCGs in the same sublist are handled like buttons " +"in a radio button group: setting one to ON automatically sets all other " +"group members to OFF." +msgstr "" +"リストのリスト。以前の値を置換します。各サブリストには2つ以上のOCG " +"xrefを含める必要があります。同じサブリスト内のOCGはラジオボタングループ内のボタンのように処理され、1つをONに設定すると他のすべてのグループメンバーがOFFに設定されます。" + +#: ../../document.rst:396 0580d6eb3d53458792a20ec40a781b02 +msgid "a list of OCG xref number that cannot be changed by the user interface." +msgstr "ユーザーインターフェースで変更できないOCG xref番号のリスト。" + +#: ../../document.rst:398 ee670e6b96b343338a05af967d9158de +msgid "Values `None` will not change the corresponding PDF array." +msgstr "値 `None` は対応するPDF配列を変更しません。" + +#: ../../document.rst:409 ead81941d26a4414a8ffc13162026d51 +msgid "" +"Details of all optional content groups. This is a dictionary of " +"dictionaries like this (key is the OCG's :data:`xref`):" +msgstr "オプションコンテンツグループの詳細情報。これは次のような辞書の辞書です(キーはOCGの :data:`xref` です):" + +#: ../../document.rst:427 a53580de03a44f849f63fbaa4a4d1e73 +msgid "" +"Show the visibility status of optional content that is modifiable by the " +"user interface of supporting PDF viewers." +msgstr "サポートするPDFビューアのユーザーインターフェースで変更可能なオプションコンテンツの表示状態を表示します。" + +#: ../../document.rst:429 19657ea9ba8346388c0250aa3213298d +msgid "" +"Only reports items contained in the currently selected layer " +"configuration." +msgstr "現在選択されているレイヤー設定に含まれるアイテムのみを報告します。" + +#: ../../document.rst:436 c8dbb8ff82c34477ac9f19e279231952 +msgid "The meaning of the dictionary keys is as follows:" +msgstr "辞書のキーの意味は次の通りです:" + +#: ../../document.rst:432 845eede4ae824dfab1de3dc452eac6cd +msgid "*depth:* item's nesting level in the `/Order` array" +msgstr "*depth:* `/Order` 配列内のアイテムのネストレベル" + +#: ../../document.rst:433 06b6bc5cb5fe4c12accd9e8f3d7042a4 +msgid "*locked:* true if cannot be changed via user interfaces" +msgstr "*locked:* ユーザーインターフェースを介して変更できない場合はtrue" + +#: ../../document.rst:434 89558845cbd04e858106b655305de4f7 +msgid "*number:* running sequence number" +msgstr "*number:* 連続するシーケンス番号" + +#: ../../document.rst:435 88b798a54ea34344b4210ffe1181404d +msgid "*on:* item state" +msgstr "*on:* アイテムの状態" + +#: ../../document.rst:436 5d1f88eeb5b34806a19135eeaa76ba46 +msgid "*text:* text string or name field of the originating OCG" +msgstr "*text:* 元のOCGのテキスト文字列または名前フィールド" + +#: ../../document.rst:437 1b1ceafa27b441dda9196254eb0b3da7 +msgid "" +"*type:* one of \"label\" (set by a text string), \"checkbox\" (set by a " +"single OCG) or \"radiobox\" (set by a set of connected OCGs)" +msgstr "" +"*type:* \"label\"(テキスト文字列によって設定)、\"checkbox\"(単一のOCGによって設定)、または " +"\"radiobox\"(接続されたOCGのセットによって設定)のいずれか" + +#: ../../document.rst:443 b1c81ff21ecc42afbfab2179ec9cc0f2 +msgid "" +"Modify OC visibility status of content groups. This is analog to what " +"supporting PDF viewers would offer." +msgstr "コンテンツグループのOC表示状態を変更します。これは、サポートするPDFビューアが提供するものと同様です。" + +#: ../../document.rst:445 bd8000a233aa4115a722ee3b083ea52c +msgid "" +"Please note that visibility is **not** a property stored with the OCG. It" +" is not even information necessarily present in the PDF document at all. " +"Instead, the current visibility is **temporarily** set using the user " +"interface of some supporting PDF consumer software. The same type of " +"functionality is offered by this method." +msgstr "" +"表示状態はOCGとして保存されるプロパティ **ではない** " +"ことに注意してください。それはPDFドキュメントに必ずしも存在しない情報でもありません。代わりに、現在の表示状態はサポートするPDF消費者ソフトウェアのユーザーインターフェースを使用して**一時的に**" +" 設定されます。このメソッドでも同じタイプの機能が提供されます。" + +#: ../../document.rst:447 e212555b749b44a6b4f562a12fb95ef2 +msgid "To make **permanent** changes, use :meth:`Document.set_layer`." +msgstr "**永続的な** 変更を行うには、:meth:`Document.set_layer` を使用してください。" + +#: ../../document.rst:449 fbe306d1307848f1ab3938e738258066 +msgid "" +"either the sequence number of the item in list " +":meth:`Document.layer_configs` or the \"text\" of one of these items." +msgstr ":meth:`Document.layer_configs` リストのアイテムのシーケンス番号またはこれらのアイテムの「テキスト」。" + +#: ../../document.rst:450 3dbca83257a54c80af622790e7a2d830 +msgid "" +"`PDF_OC_ON` = set on (default), `PDF_OC_TOGGLE` = toggle on/off, " +"`PDF_OC_OFF` = set off." +msgstr "" +"`PDF_OC_ON` = ONに設定(デフォルト)、`PDF_OC_TOGGLE` = ON/OFFを切り替え、`PDF_OC_OFF` = " +"OFFに設定。" + +#: ../../document.rst:455 1a43efa52c434e2ba8c7b6f808bfeb1f +msgid "" +"Decrypts the document with the string *password*. If successful, document" +" data can be accessed. For PDF documents, the \"owner\" and the \"user\" " +"have different privileges, and hence different passwords may exist for " +"these authorization levels. The method will automatically establish the " +"appropriate (owner or user) access rights for the provided password." +msgstr "" +"文字列の *パスワード* " +"でドキュメントを複合化します。成功した場合、ドキュメントデータにアクセスできます。PDFドキュメントの場合、「オーナー」および「ユーザー」には異なる特権があり、したがってこれらの認証レベルに異なるパスワードが存在する可能性があります。このメソッドは提供されたパスワードに適切な(オーナーまたはユーザー)アクセス権を自動的に確立します。" + +#: ../../document.rst:457 3dd4a0f7ba2b4b669d92257a1504439f +msgid "owner or user password." +msgstr "オーナーまたはユーザーパスワード。" + +#: ../../document.rst:460 d31f4a53b1e04294bc6c2e1a1919d29d +msgid "" +"a positive value if successful, zero otherwise (the string does not match" +" either password). If positive, the indicator " +":attr:`Document.is_encrypted` is set to ``False``. **Positive** return " +"codes carry the following information detail: * 1 => authenticated, but " +"the PDF has neither owner nor user passwords. * 2 => authenticated with " +"the **user** password. * 4 => authenticated with the **owner** password. " +"* 6 => authenticated and both passwords are equal -- probably a rare " +"situation. .. note:: The document may be protected by an owner, but " +"**not** by a user password. Detect this situation via " +"`doc.authenticate(\"\") == 2`. This allows opening and reading the " +"document without authentication, but, depending on the " +":attr:`Document.permissions` value, other actions may be prohibited. " +"PyMuPDF (like MuPDF) in this case **ignores those restrictions**. So, -- " +"in contrast to any PDF viewers -- you can for example extract text and " +"add or modify content, even if the respective permission flags " +"`PDF_PERM_COPY`, `PDF_PERM_MODIFY`, `PDF_PERM_ANNOTATE`, etc. are set " +"off! It is your responsibility building a legally compliant application " +"where applicable." +msgstr "" + +#: ../../document.rst:460 6ba8e1cf3a644f9c8317e43603585098 +msgid "" +"a positive value if successful, zero otherwise (the string does not match" +" either password). If positive, the indicator " +":attr:`Document.is_encrypted` is set to ``False``. **Positive** return " +"codes carry the following information detail:" +msgstr "" +"成功した場合は正の値、それ以外はゼロです(文字列がどちらのパスワードとも一致しない場合)。正の戻り値がある場合、インジケータ " +":attr:`Document.is_encrypted` は ``False`` に設定されます。**正の** " +"戻り値コードには、次の情報の詳細が含まれています:" + +#: ../../document.rst:462 4b23d932af1f47dfb645a634cf51d717 +msgid "1 => authenticated, but the PDF has neither owner nor user passwords." +msgstr "1 => 認証済み、ただしPDFにはオーナーまたはユーザーパスワードがありません。" + +#: ../../document.rst:463 c410aa901a8a4722a786ecbda1773364 +msgid "2 => authenticated with the **user** password." +msgstr "2 => **ユーザー** パスワードで認証済み。" + +#: ../../document.rst:464 e3ce9b279da54a07a8cbbb5738228811 +msgid "4 => authenticated with the **owner** password." +msgstr "4 => **オーナー** パスワードで認証済み。" + +#: ../../document.rst:465 ebe25554a7144e6f8a924c50920cc86e +msgid "" +"6 => authenticated and both passwords are equal -- probably a rare " +"situation." +msgstr "6 => 認証済みで両方のパスワードが等しい-おそらくまれな状況" + +#: ../../document.rst:469 28ddd78b67804caa95d8b0f7f9b64892 +msgid "" +"The document may be protected by an owner, but **not** by a user " +"password. Detect this situation via `doc.authenticate(\"\") == 2`. This " +"allows opening and reading the document without authentication, but, " +"depending on the :attr:`Document.permissions` value, other actions may be" +" prohibited. PyMuPDF (like MuPDF) in this case **ignores those " +"restrictions**. So, -- in contrast to any PDF viewers -- you can for " +"example extract text and add or modify content, even if the respective " +"permission flags `PDF_PERM_COPY`, `PDF_PERM_MODIFY`, `PDF_PERM_ANNOTATE`," +" etc. are set off! It is your responsibility building a legally compliant" +" application where applicable." +msgstr "" +"ドキュメントはオーナーによって保護されている場合でも、ユーザーパスワードによって保護 **されていない** " +"場合があります。この状況は、`doc.authenticate(\"\") == 2` " +"を使用して検出できます。これにより、認証なしでドキュメントを開いて読むことができますが、:attr:`Document.permissions` " +"値に応じて、他のアクションが制限される場合があります。この場合、PyMuPDF(MuPDFと同様)はこれらの **制限を無視します** " +"。したがって、`PDF_PERM_COPY` 、`PDF_PERM_MODIFY` 、`PDF_PERM_ANNOTATE` " +"などの対応する許可フラグがオフに設定されていても、テキストを抽出したり、コンテンツを追加または変更したりすることができます!該当する場合、法的に適合するアプリケーションを構築する責任があります。" + +#: ../../document.rst:473 77b217ddc32c4f70ab3b022b40117cc1 +msgid "New in v 1.18.6" +msgstr "バージョン 1.18.6 での新機能" + +#: ../../document.rst:475 a3e58e7fdbc4431aa838c03548bdcab1 +msgid "" +"PDF only: Return a list of page numbers that have the specified label -- " +"note that labels may not be unique in a PDF. This implies a sequential " +"search through **all page numbers** to compare their labels." +msgstr "" +"PDF のみ:指定されたラベルを持つページ番号のリストを返します。ラベルは PDF " +"では一意でないことがあることに注意してください。これは、**すべてのページ番号** を逐次検索してそのラベルを比較することを意味します。" + +#: ../../document.rst:477 c8c2ee5c83ce40db824b6209b1e3809f +msgid "Implementation detail -- pages are **not loaded** for this purpose." +msgstr "実装の詳細 - この目的でページは **読み込まれません** 。" + +#: ../../document.rst:479 5d808a1230ca4174974e00810f24dda8 +msgid "the label to look for, e.g. \"vii\" (Roman number 7)." +msgstr "検索対象のラベル、例: \"vii\"(ローマ数字 7)。" + +#: ../../document.rst:480 63226c9d70b1477ba39669cffcfde1e2 +msgid "" +"stop after first hit. Useful e.g. if labelling is known to be unique, or " +"there are many pages, etc. The default will check every page number." +msgstr "最初の一致で停止します。ラベリングが一意であることがわかっている場合や、多くのページがある場合などに便利です。デフォルトではすべてのページ番号をチェックします。" + +#: ../../document.rst:482 f5d03ab00a5c4c7dbc8d30330c402ad3 +msgid "" +"list of page numbers that have this label. Empty if none found, no labels" +" defined, etc." +msgstr "このラベルを持つページ番号のリスト。見つからない場合やラベルが定義されていない場合などは空です。" + +#: ../../document.rst:487 ../../document.rst:777 ../../document.rst:798 +#: ../../document.rst:1835 ../../document.rst:1859 +#: 00e5d800806d4befb68e44539d5e3d2b 52abd04029c448dc8b66b342498ff51a +#: 795752c8c5f34590a03245bef3c22dc0 c326bff701194aa6b2b548f5892ebc2a +#: e61ef169a79d47fe917db70cea1692d1 +msgid "New in v1.18.7" +msgstr "バージョン 1.18.7 での新機能" + +#: ../../document.rst:489 5526c7633a0e4eb3b4372581a25d50eb +msgid "" +"PDF only: Extract the list of page label definitions. Typically used for " +"modifications before feeding it into :meth:`Document.set_page_labels`." +msgstr "" +"PDF のみ:ページラベルの定義のリストを抽出します。通常は :meth:`Document.set_page_labels` " +"に渡す前の変更に使用されます。" + +#: ../../document.rst:491 ad8d156e57d74eb8ae4aaa51f4863cbd +msgid "a list of dictionaries as defined in :meth:`Document.set_page_labels`." +msgstr ":meth:`Document.set_page_labels` で定義されたように辞書のリスト。" + +#: ../../document.rst:495 899254ea0c9c4ed99dbcfe83a5c1f99f +msgid "New in v1.18.6" +msgstr "バージョン 1.18.6 での新機能" + +#: ../../document.rst:497 a36b2a03a63f407a85e05b74ba57f024 +msgid "PDF only: Add or update the page label definitions of the PDF." +msgstr "PDF のみ:PDF のページラベルの定義を追加または更新します。" + +#: ../../document.rst:499 505683a75e8c4e8baf1766d51e0d26de +msgid "" +"a list of dictionaries. Each dictionary defines a label building rule and" +" a 0-based \"start\" page number. That start page is the first for which " +"the label definition is valid. Each dictionary has up to 4 items and " +"looks like `{'startpage': int, 'prefix': str, 'style': str, " +"'firstpagenum': int}` and has the following items. - `startpage`: (int) " +"the first page number (0-based) to apply the label rule. This key **must " +"be present**. The rule is applied to all subsequent pages until either " +"end of document or superseded by the rule with the next larger page " +"number. - `prefix`: (str) an arbitrary string to start the label with, " +"e.g. \"A-\". Default is \"\". - `style`: (str) the numbering style. " +"Available are \"D\" (decimal), \"r\"/\"R\" (Roman numbers, lower / upper " +"case), and \"a\"/\"A\" (lower / upper case alphabetical numbering: \"a\" " +"through \"z\", then \"aa\" through \"zz\", etc.). Default is \"\". If " +"\"\", no numbering will take place and the pages in that range will " +"receive the same label consisting of the `prefix` value. If prefix is " +"also omitted, then the label will be \"\". - `firstpagenum`: (int) start " +"numbering with this value. Default is 1, smaller values are ignored." +msgstr "" + +#: ../../document.rst:499 4b9ea342bc174d848a753d24bd9fd590 +msgid "" +"a list of dictionaries. Each dictionary defines a label building rule and" +" a 0-based \"start\" page number. That start page is the first for which " +"the label definition is valid. Each dictionary has up to 4 items and " +"looks like `{'startpage': int, 'prefix': str, 'style': str, " +"'firstpagenum': int}` and has the following items." +msgstr "" +"辞書のリスト。各辞書はラベル構築ルールと 0 ベースの \"start\" " +"ページ番号を定義します。その開始ページはラベル定義が有効になる最初のページです。各辞書は最大 4 つの項目を持ち、 `{'startpage': " +"int, 'prefix': str, 'style': str, 'firstpagenum': int}` という形式で、次の項目を持ちます。" + +#: ../../document.rst:501 459cdc3edc19474d9874b4176f2b770c +msgid "" +"`startpage`: (int) the first page number (0-based) to apply the label " +"rule. This key **must be present**. The rule is applied to all subsequent" +" pages until either end of document or superseded by the rule with the " +"next larger page number." +msgstr "" +"`startpage` :(int)ラベルルールを適用する最初のページ番号(0 ベース)。このキーは **存在する必要があります** " +"。ルールはドキュメントの終端に達するか、次の大きなページ番号を持つルールに置き換えられるまで、すべての後続ページに適用されます。" + +#: ../../document.rst:502 c64500585bde414a83c7547ded5715cf +msgid "" +"`prefix`: (str) an arbitrary string to start the label with, e.g. \"A-\"." +" Default is \"\"." +msgstr "`prefix` :(str)ラベルの先頭に付ける任意の文字列、例: \"A-\"。デフォルトは \"\" です。" + +#: ../../document.rst:503 e10dbc001fbc4f398a7f13ef7fd444c0 +msgid "" +"`style`: (str) the numbering style. Available are \"D\" (decimal), " +"\"r\"/\"R\" (Roman numbers, lower / upper case), and \"a\"/\"A\" (lower /" +" upper case alphabetical numbering: \"a\" through \"z\", then \"aa\" " +"through \"zz\", etc.). Default is \"\". If \"\", no numbering will take " +"place and the pages in that range will receive the same label consisting " +"of the `prefix` value. If prefix is also omitted, then the label will be " +"\"\"." +msgstr "" +"style:(str)番号付けのスタイル。使用できるのは \"D\"(10 進数)、\"r\"/\"R\"(ローマ数字、小文字/大文字)、および " +"\"a\"/\"A\"(小文字/大文字のアルファベット番号: \"a\" から \"z\"、次に \"aa\" から \"zz\" " +"など)。デフォルトは \"\" です。 \"\" " +"の場合、番号付けは行われず、その範囲のページはプレフィックス値から成る同じラベルを受け取ります。`prefix` も省略された場合、ラベルは " +"\"\" になります。" + +#: ../../document.rst:504 c6e67023eb8d4238b1d1f30abd874de9 +msgid "" +"`firstpagenum`: (int) start numbering with this value. Default is 1, " +"smaller values are ignored." +msgstr "`firstpagenum` :(int)この値から番号付けを開始します。デフォルトは 1 で、小さい値は無視されます。" + +#: ../../document.rst:506 1b1e49880cf049f6a1ff93045b392960 +msgid "For example::" +msgstr "例::" + +#: ../../document.rst:511 8f31f7e7e4e444e49e3118f5ecbdb058 +msgid "" +"will generate the labels \"A-10\", \"A-11\", \"A-12\", \"A-13\", \"1\", " +"\"2\", \"3\", ... for pages 6, 7 and so on until end of document. Pages 0" +" through 5 will have the label \"\"." +msgstr "" +"次のラベルを生成します: \"A-10\"、\"A-11\"、\"A-12\"、\"A-13\"、\"1\"、\"2\"、\"3\"、... " +"ページ 6、7、など、ドキュメントの終了まで続きます。ページ 0 から 5 まではラベル \"A-\" が付きます。" + +#: ../../document.rst:516 ../../document.rst:530 +#: 772aa52715f6499c891d664f7529b9c8 bdf83f312d9e4d09810192fc4a8decbd +msgid "New in v.1.17.3" +msgstr "バージョン 1.17.3 での新機能" + +#: ../../document.rst:518 66d372f32ecb4e74984de4b4a7a7dd6b +msgid "" +"Return a page pointer in a reflowable document. After re-layouting the " +"document, the result of this method can be used to find the new location " +"of the page." +msgstr "リフロータブルなドキュメント内のページポインターを返します。ドキュメントの再レイアウト後、このメソッドの結果はページの新しい位置を見つけるために使用できます。" + +#: ../../document.rst:520 f23213f179fe40b7a72f40464cf851a9 +msgid "Do not confuse with items of a table of contents, TOC." +msgstr "目次の項目とは混同しないでください。" + +#: ../../document.rst:522 71671bfa13e545b682cc52bf1695d0fc +msgid "page location. Must be a valid *(chapter, pno)*." +msgstr "ページの位置。有効なものである必要があります *(章、ページ番号)* 。" + +#: ../../document.rst:525 087aa911d9a744c9ae6a67741197e987 +msgid "" +"a long integer in pointer format. To be used for finding the new location" +" of the page after re-layouting the document. Do not touch or re-assign." +msgstr "ポインターフォーマットの長整数。ドキュメントの再レイアウト後のページの新しい位置を見つけるために使用されます。変更しないでください。" + +#: ../../document.rst:532 98941bcf4dda4435a226bfaa556008a7 +msgid "Return the new page location after re-layouting the document." +msgstr "ドキュメントの再レイアウト後の新しいページの位置を返します。" + +#: ../../document.rst:534 dc0f0731eb2148669b8c227ae9190bb8 +msgid "created by :meth:`Document.make_bookmark`." +msgstr ":meth:`Document.make_bookmark` によって作成されたもの。" + +#: ../../document.rst:537 f3490537b6b144d5b8e63cd8888773c6 +msgid "the new (chapter, pno) of the page." +msgstr "ページの新しい(章、ページ番号)。" + +#: ../../document.rst:542 ../../document.rst:554 ../../document.rst:565 +#: 21d1d657e4ee444f986b635f805fcfd5 a193e53c41e549ea82e7d96ff9b92231 +#: b122ce60fb3a4d5abf86bceb9077ec57 +msgid "New in v.1.17.0" +msgstr "バージョン 1.17.0 での新機能" + +#: ../../document.rst:544 2615090100224dc2a3d9267d0ec58241 +msgid "Return the number of pages of a chapter." +msgstr "章のページ数を返します。" + +#: ../../document.rst:546 7522ca0d7afc4bfba33b976a4262edc1 +msgid "the 0-based chapter number." +msgstr "0 ベースの章番号。" + +#: ../../document.rst:549 11816b8fe558459f9a9675333f683d11 +msgid "" +"number of pages in chapter. Relevant only for document types with chapter" +" support (EPUB currently)." +msgstr "章内のページ数。章のサポートを持つドキュメントタイプに関連します(現在は EPUB のみ)。" + +#: ../../document.rst:556 2179f23cf070467195c75e4b7d33f4ee +msgid "Return the location of the following page." +msgstr "次のページの位置を返します。" + +#: ../../document.rst:558 ../../document.rst:569 +#: 07705b8ee93b48cb96aa5fd0b7235f60 3685a64b0c8c41dcb95fe4cb7a355fc6 +msgid "" +"the current page id. This must be a tuple *(chapter, pno)* identifying an" +" existing page." +msgstr "現在のページ ID。これは既存のページを識別するタプル *(章、ページ番号)* である必要があります。" + +#: ../../document.rst:560 59412c3cfa2a4986bacf02ce94022ca7 +msgid "" +"The tuple of the following page, i.e. either *(chapter, pno + 1)* or " +"*(chapter + 1, 0)*, **or** the empty tuple *()* if the argument was the " +"last page. Relevant only for document types with chapter support (EPUB " +"currently)." +msgstr "" +"次のページのタプル、つまり *(章、ページ番号 + 1)* または *(章 + 1、0)* 、**または** " +"引数が最後のページである場合は空のタプル *()* 。章のサポートを持つドキュメントタイプに関連します(現在は EPUB のみ)。" + +#: ../../document.rst:567 eca860a95ca34478bb570ab041b828f6 +msgid "Return the locator of the preceding page." +msgstr "前のページの位置を返します。" + +#: ../../document.rst:571 53c5a2b7e00f49538c6330c1844ef619 +msgid "" +"The tuple of the preceding page, i.e. either *(chapter, pno - 1)* or the " +"last page of the preceding chapter, **or** the empty tuple *()* if the " +"argument was the first page. Relevant only for document types with " +"chapter support (EPUB currently)." +msgstr "" +"前のページのタプル、つまり *(章、ページ番号 - 1)* **または** 前の章の最後のページ、または引数が最初のページである場合は空のタプル " +"*()* 。章のサポートを持つドキュメントタイプに関連します(現在は EPUB のみ)。" + +#: ../../document.rst:576 2deebc74649d4656bed4b761c2dd19fb +msgid "" +"Changed in v1.17.0: For document types supporting a so-called \"chapter " +"structure\" (like EPUB), pages can also be loaded via the combination of " +"chapter number and relative page number, instead of the absolute page " +"number. This should **significantly speed up access** for large " +"documents." +msgstr "" +"バージョン 1.17.0 で変更: \"章の構造\" " +"をサポートするドキュメントタイプ(例:EPUB)の場合、絶対ページ番号の代わりに章番号と相対ページ番号の組み合わせを使用してページをロードすることもできます。これにより、大きなドキュメントへの" +" **アクセスが大幅に高速化される** はずです。" + +#: ../../document.rst:578 377222ade7c945c7bde7b14244bece04 +msgid "" +"Create a :ref:`Page` object for further processing (like rendering, text " +"searching, etc.)." +msgstr "さらなる処理(レンダリング、テキスト検索など)のための :ref:`Page` オブジェクトを作成します。" + +#: ../../document.rst:580 f36a12d37f79485d87c4bf4a515e0e4f +msgid "" +"*(Changed in v1.17.0)* Either a 0-based page number, or a tuple " +"*(chapter, pno)*. For an **integer**, any `-∞ < page_id < page_count` is " +"acceptable. While page_id is negative, :attr:`page_count` will be added " +"to it. For example: to load the last page, you can use " +"*doc.load_page(-1)*. After this you have page.number = doc.page_count - " +"1. For a tuple, *chapter* must be in range " +":attr:`Document.chapter_count`, and *pno* must be in range " +":meth:`Document.chapter_page_count` of that chapter. Both values are " +"0-based. Using this notation, :attr:`Page.number` will equal the given " +"tuple. Relevant only for document types with chapter support (EPUB " +"currently)." +msgstr "" + +#: ../../document.rst:580 1512338ecdd04c348659a0323f7aecdf +msgid "*(Changed in v1.17.0)*" +msgstr "*(バージョン 1.17.0 で変更)*" + +#: ../../document.rst:582 17f834ccf97443c5accfad63c19f5f48 +msgid "" +"Either a 0-based page number, or a tuple *(chapter, pno)*. For an " +"**integer**, any `-∞ < page_id < page_count` is acceptable. While page_id" +" is negative, :attr:`page_count` will be added to it. For example: to " +"load the last page, you can use *doc.load_page(-1)*. After this you have " +"page.number = doc.page_count - 1." +msgstr "" +"0ベースのページ番号またはタプル *(章、ページ番号)* のいずれか。**整数** の場合、`-∞ < page_id < page_count`" +" が許容されます。`page_id` が負の場合、 :attr:`page_count` " +"が追加されます。たとえば、最後のページを読み込むには、*doc.load_page(-1)* を使用できます。これにより、 " +"`page.number = doc.page_count - 1` となります。" + +#: ../../document.rst:584 b79c95fb0ede4718a95c88960fa53e35 +msgid "" +"For a tuple, *chapter* must be in range :attr:`Document.chapter_count`, " +"and *pno* must be in range :meth:`Document.chapter_page_count` of that " +"chapter. Both values are 0-based. Using this notation, " +":attr:`Page.number` will equal the given tuple. Relevant only for " +"document types with chapter support (EPUB currently)." +msgstr "" +"タプルの場合、*chapter* は :attr:`Document.chapter_count` の範囲内になければならず、*ページ番号* " +"はその章の :meth:`Document.chapter_page_count` " +"の範囲内になければなりません。両方の値は0から始まります。この表記法を使用すると、:attr:`Page.number` " +"は指定されたタプルに等しくなります。章のサポートを持つドキュメントタイプに関連します(現在は EPUB のみ)。" + +#: ../../document.rst:586 ../../document.rst:617 ../../document.rst:1390 +#: 50ae7cbcdb4742c29d5a033a4fc933d7 55b4cd344aef490cb3c3d8708149ae5d +#: 7e748f42d2fb4b0d80b35cc1c60bfecd +msgid ":ref:`Page`" +msgstr "" + +#: ../../document.rst:590 fd0f6c32991d41d3ab32ce52f3104091 +msgid "" +"Documents also follow the Python sequence protocol with page numbers as " +"indices: *doc.load_page(n) == doc[n]*." +msgstr "" +"ドキュメントはページ番号をインデックスとするPythonのシーケンスプロトコルに従います:indices: *doc.load_page(n) " +"== doc[n]* 。" + +#: ../../document.rst:592 8dea69cd0aad4a66b0696a76fa8cc82e +msgid "" +"For **absolute page numbers** only, expressions like *\"for page in doc: " +"...\"* and *\"for page in reversed(doc): ...\"* will successively yield " +"the document's pages. Refer to :meth:`Document.pages` which allows " +"processing pages as with slicing." +msgstr "" +"**絶対ページ番号** の場合、式 *\"for page in doc: …\"* および *\"for page in " +"reversed(doc): …\"* は文書のページを順次生成します。スライシングのようにページを処理できる " +":meth:`Document.pages` を参照してください。" + +#: ../../document.rst:594 9edaee625ebb486bb9fe0a059a073385 +msgid "" +"You can also use index notation with the new chapter-based page " +"identification: use *page = doc[(5, 2)]* to load the third page of the " +"sixth chapter." +msgstr "" +"新しい章ベースのページ識別にもインデックス表記を使用できます: *page = doc[(5, 2)]* として、 " +"6番目の章の3番目のページを読み込むことができます。" + +#: ../../document.rst:596 f1304b560624489ea0557ed7cf6de939 +msgid "" +"To maintain a consistent API, for document types not supporting a chapter" +" structure (like PDFs), :attr:`Document.chapter_count` is 1, and pages " +"can also be loaded via tuples *(0, pno)*. See this [#f3]_ footnote for " +"comments on performance improvements." +msgstr "" +"一貫性のあるAPIを維持するため、章の構造をサポートしていないドキュメントタイプ(PDFなど)の場合、:attr:`Document.chapter_count`" +" は1であり、ページはタプル *(0、pno)* を使用しても読み込むことができます。パフォーマンスの改善に関するコメントについては、この " +"[#f3]_ の注釈を参照してください。" + +#: ../../document.rst:601 68c322ee2c48453fb89596b42ec3a41b +msgid "" +"PDF only: Change the color component counts for all object types text, " +"image and vector graphics for all pages." +msgstr "" + +#: ../../document.rst:603 8548a955df7248bab5796a7b981e2cc9 +msgid "" +"desired color space indicated by the number of color components: 1 = " +"DeviceGRAY, 3 = DeviceRGB, 4 = DeviceCMYK." +msgstr "" + +#: ../../document.rst:605 04afac121dd941a29f03623c8a265035 +msgid "" +"The typical use case is 1 (DeviceGRAY) which converts the PDF to " +"grayscale." +msgstr "" + +#: ../../document.rst:610 c8d895eb5eba47ddb5ea0dabeec141e7 +msgid "New in v1.16.10" +msgstr "バージョン 1.16.10 での新機能" + +#: ../../document.rst:612 943fbb7b4c1b4a81b4dce6a7f0ee672e +msgid "" +"PDF only: Provide a new copy of a page after finishing and updating all " +"pending changes." +msgstr "PDF のみ:保留中のすべての変更を終了および更新した後、ページの新しいコピーを提供します。" + +#: ../../document.rst:614 4e8d792de4c1453680688ea24e51416a +msgid "page object." +msgstr "ページオブジェクト。" + +#: ../../document.rst:619 11aef2ea90ae4af690da0f74c161d5a6 +msgid "" +"a new copy of the same page. All pending updates (e.g. to annotations or " +"widgets) will be finalized and a fresh copy of the page will be loaded. " +".. note:: In a typical use case, a page :ref:`Pixmap` should be taken " +"after annotations / widgets have been added or changed. To force all " +"those changes being reflected in the page structure, this method re-" +"instates a fresh copy while keeping the object hierarchy \"document -> " +"page -> annotations/widgets\" intact." +msgstr "" + +#: ../../document.rst:619 e6d5b5d4da4f4dbfb4e0225583f8ea5a +msgid "" +"a new copy of the same page. All pending updates (e.g. to annotations or " +"widgets) will be finalized and a fresh copy of the page will be loaded." +msgstr "同じページの新しいコピー。すべての保留中の更新(注釈やウィジェットなど)が確定し、ページの新しいコピーが読み込まれます。" + +#: ../../document.rst:621 885aa43c28704aeb9e16090d8df6c920 +msgid "" +"In a typical use case, a page :ref:`Pixmap` should be taken after " +"annotations / widgets have been added or changed. To force all those " +"changes being reflected in the page structure, this method re-instates a " +"fresh copy while keeping the object hierarchy \"document -> page -> " +"annotations/widgets\" intact." +msgstr "" +"典型的なユースケースでは、注釈やウィジェットが追加または変更された後にページの :ref:`Pixmap` " +"を取得する必要があります。これらの変更がページ構造に反映されるようにするために、このメソッドは \"document -> page -> " +"annotations/widgets\" のオブジェクト階層を保持したまま、新しいコピーを再設定します。" + +#: ../../document.rst:626 cf5c3c95d8664db2b7be4ba1f1d6c5e2 +msgid "PDF only: Convert destination names into a Python dict." +msgstr "PDFのみ: ページの目的地名をPythonの辞書に変換します" + +#: ../../document.rst:628 2d4ffec227b64c0899f1d4af952cf9b8 +msgid "" +"A dictionary with the following layout: * *key*: (str) the name. * " +"*value*: (dict) with the following layout: * \"page\": target page " +"number (0-based). If no page number found -1. * \"to\": (x, y) target" +" point on page. Currently in PDF coordinates, i.e. point (0,0) is " +"the bottom-left of the page. * \"zoom\": (float) the zoom factor." +" * \"dest\": (str) only present if the target location on the page " +"has not been provided as \"/XYZ\" or if no page number was found. " +"Examples:: { '__bookmark_1': {'page': 0, 'to': (0.0, 541.0)," +" 'zoom': 0.0}, '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), " +"'zoom': 0.0}, } or:: { " +"'21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 " +"1486 0'}, ... }" +msgstr "" + +#: ../../document.rst:629 8c08f8d4386e4cac808542d05c7c07b5 +msgid "A dictionary with the following layout:" +msgstr "以下のキーを持つ辞書:" + +#: ../../document.rst:631 a1744f0351f949cf8179d23b302afcd1 +msgid "*key*: (str) the name." +msgstr "キー: (str) 名前" + +#: ../../document.rst:637 8df122bec3d142f8a607917f44f38332 +msgid "*value*: (dict) with the following layout:" +msgstr "以下のキーを持つ辞書:" + +#: ../../document.rst:633 525df66fc21943b0a24379c72dbac672 +msgid "\"page\": target page number (0-based). If no page number found -1." +msgstr "\"page\": 対象のページ番号(0から始まる)。ページ番号が見つからない場合は-1。" + +#: ../../document.rst:634 59f559f2579f4863932532a135b4c20f +msgid "" +"\"to\": (x, y) target point on page. Currently in PDF coordinates, i.e. " +"point (0,0) is the bottom-left of the page." +msgstr "\"to\": (x, y) ページ上のターゲットポイント。現在はPDF座標であり、つまり点(0,0)がページの左下になります。" + +#: ../../document.rst:636 b1183d130eb1479f9b3fdfecb2f7c536 +msgid "\"zoom\": (float) the zoom factor." +msgstr "\"zoom\": ターゲットページ上のズームファクター(float)。" + +#: ../../document.rst:637 dce3f3fcf79d4221a915941c4f265a7c +msgid "" +"\"dest\": (str) only present if the target location on the page has not " +"been provided as \"/XYZ\" or if no page number was found." +msgstr "\"dest\": (str) \"/XYZ\" としてターゲット位置が指定されていない場合や、ページ番号が見つからない場合にのみ存在します。" + +#: ../../document.rst:639 3fa6a208d82447d396afcf33d9b52c8e +msgid "Examples::" +msgstr "例::" + +#: ../../document.rst:646 52809bb95d8d44639dec4855045d4835 +msgid "or::" +msgstr "または:" + +#: ../../document.rst:653 54e150b680c44fc28555686e749b4dbf +msgid "" +"All names found in the catalog under keys \"/Dests\" and \"/Names/Dests\"" +" are included." +msgstr "キー \"/Dests\" および \"/Names/Dests\" の下でカタログ内に見つかるすべての名前が含まれます。" + +#: ../../document.rst:656 029d932ed78d461fbf4f7e23ac10d40d +msgid "New in v1.23.6" +msgstr "v1.23.6で新登場" + +#: ../../document.rst:661 ../../document.rst:671 ../../document.rst:1104 +#: ../../document.rst:1114 ../../document.rst:1126 +#: 42da0868d52045f4912cdc253b0d58d0 43dc347381244a1bb0247e19dee82361 +#: bde0f345fc704856bfa6922bc3c598f2 c8dfe172f6eb4880b9559bb1a41a9a66 +#: fd31829450cb409580bc03f15f672add +msgid "New in v1.17.7" +msgstr "バージョン 1.17.7 での新機能" + +#: ../../document.rst:663 aa60e0724f6542b89b15cb221c5e4492 +msgid "" +"PDF only: Return the unrotated page rectangle -- **without loading the " +"page** (via :meth:`Document.load_page`). This is meant for internal " +"purpose requiring best possible performance." +msgstr "" +"PDF のみ:ページを読み込まずに(:meth:`Document.load_page` " +"を介さずに)、回転を無視してページの長方形を返します。これは、最高のパフォーマンスを必要とする内部目的のために使用されます。" + +#: ../../document.rst:665 ../../document.rst:675 +#: d6f44a6f77ba4aef87b8a3f5c983f2fd ece882efa538407a93154e82c6c9e8da +msgid "0-based page number." +msgstr "0から始まるページ番号。" + +#: ../../document.rst:667 0b215749aa924948a79159275a12e6cd +msgid ":ref:`Rect` of the page like :meth:`Page.rect`, but ignoring any rotation." +msgstr ":meth:`Page.rect` のようなページの :ref:`Rect` ですが、回転を無視します。" + +#: ../../document.rst:673 94aed8c23e4440008ff68cd76cae2948 +msgid "" +"PDF only: Return the :data:`xref` of the page -- **without loading the " +"page** (via :meth:`Document.load_page`). This is meant for internal " +"purpose requiring best possible performance." +msgstr "" +"PDF のみ: **ページを読み込まずに** (:meth:`Document.load_page` を介さずに)、ページの " +":data:`xref` を返します。これは、最高のパフォーマンスを必要とする内部目的のために使用されます。" + +#: ../../document.rst:677 f68b5fd43cd44364a072ac854e970d7d +msgid ":data:`xref` of the page like :attr:`Page.xref`." +msgstr ":attr:`Page.xref` のようなページの :data:`xref` 。" + +#: ../../document.rst:681 d59a89bac09b4944afd99a99bb1fe2c5 +msgid "New in v1.16.4" +msgstr "バージョン 1.16.4 での新機能" + +#: ../../document.rst:683 bd51b24afd4e47ebac962d0313b3f815 +msgid "" +"A generator for a range of pages. Parameters have the same meaning as in " +"the built-in function *range()*. Intended for expressions of the form " +"*\"for page in doc.pages(start, stop, step): ...\"*." +msgstr "" +"一連のページのためのジェネレーター。パラメーターの意味は組み込みの *range()* 関数と同じです。 *\"for page in " +"doc.pages(start, stop, step): …\"* の形式の式に使用することを意図しています。" + +#: ../../document.rst:685 75be001f5d554c8e9f05155e9f180e11 +msgid "" +"start iteration with this page number. Default is zero, allowed values " +"are `-∞ < start < page_count`. While this is negative, :attr:`page_count`" +" is added **before** starting the iteration." +msgstr "" +"このページ番号から反復を開始します。デフォルトはゼロで、許容値は `-∞ < start < page_count` " +"です。負の値の間は反復を開始する **前に** :attr:`page_count` が追加されます。" + +#: ../../document.rst:686 850d3ba2e51942bdac38c2c96d14b32b +msgid "" +"stop iteration at this page number. Default is :attr:`page_count`, " +"possible are `-∞ < stop <= page_count`. Larger values are **silently " +"replaced** by the default. Negative values will cyclically emit the pages" +" in reversed order. As with the built-in *range()*, this is the first " +"page **not** returned." +msgstr "" +"このページ番号で反復を停止します。デフォルトは :attr:`page_count` で、可能な値は `-∞ < stop <= " +"page_count` です。大きな値はデフォルトで **静かに置き換えられます** 。負の値は逆順でページを循環的に生成します。組み込みの " +"*range()* 関数と同様に、これは **返されない** 最初のページです。" + +#: ../../document.rst:687 71a1420d64a94c03afd8796a6ed02124 +msgid "" +"stepping value. Defaults are 1 if start < stop and -1 if start > stop. " +"Zero is not allowed." +msgstr "ステップ値。start < stop の場合はデフォルトで1、start > stop の場合は-1です。ゼロは許可されていません。" + +#: ../../document.rst:689 40f613aff2cc4f50bf0b79d1c5bfecbe +msgid "" +"a generator iterator over the document's pages. Some examples: * " +"\"doc.pages()\" emits all pages. * \"doc.pages(4, 9, 2)\" emits pages 4, " +"6, 8. * \"doc.pages(0, None, 2)\" emits all pages with even numbers. * " +"\"doc.pages(-2)\" emits the last two pages. * \"doc.pages(-1, -1)\" emits" +" all pages in reversed order. * \"doc.pages(-1, -10)\" always emits 10 " +"pages in reversed order, starting with the last page -- **repeatedly** if" +" the document has less than 10 pages. So for a 4-page document the " +"following page numbers are emitted: 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, " +"3." +msgstr "" + +#: ../../document.rst:689 f45b9694e9f34c38ad4482760ae7aca1 +msgid "a generator iterator over the document's pages. Some examples:" +msgstr "ドキュメントのページに対するジェネレーターイテレーターです。いくつかの例:" + +#: ../../document.rst:691 7e5e22c9cf1c4650ae403be190736d44 +msgid "\"doc.pages()\" emits all pages." +msgstr "\"doc.pages()\" はすべてのページを生成します。" + +#: ../../document.rst:692 44f14db8e1c941b1ae62879f95497c29 +msgid "\"doc.pages(4, 9, 2)\" emits pages 4, 6, 8." +msgstr "\"doc.pages(4, 9, 2)\" はページ4、6、8を生成します。" + +#: ../../document.rst:693 a533cad1663e46e6a5941d76b2037bb4 +msgid "\"doc.pages(0, None, 2)\" emits all pages with even numbers." +msgstr "\"doc.pages(0, None, 2)\" はすべての偶数ページを生成します。" + +#: ../../document.rst:694 16ef2fc267d744f686ffa88af27369f6 +msgid "\"doc.pages(-2)\" emits the last two pages." +msgstr "\"doc.pages(-2)\" は最後の2ページを生成します。" + +#: ../../document.rst:695 863a7d77e31649a2a715d675aa4edb32 +msgid "\"doc.pages(-1, -1)\" emits all pages in reversed order." +msgstr "\"doc.pages(-1, -1)\" は逆の順序ですべてのページを生成します。" + +#: ../../document.rst:696 3f767da46b0d44e4b3556a7d311806db +msgid "" +"\"doc.pages(-1, -10)\" always emits 10 pages in reversed order, starting " +"with the last page -- **repeatedly** if the document has less than 10 " +"pages. So for a 4-page document the following page numbers are emitted: " +"3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3." +msgstr "" +"\"doc.pages(-1, -10)\" は常に逆の順序で10ページを生成し、最後のページから始まり、ドキュメントが10ページ未満の場合は " +"**繰り返し** 生成します。したがって、4ページのドキュメントの場合、次のページ番号が生成されます: " +"3、2、1、0、3、2、1、0、3、2、1、0、3。" + +#: ../../document.rst:705 f48c185825c2403aa8b51fcffaa8c9b0 +msgid "" +"Create a PDF version of the current document and write it to memory. " +"**All document types** are supported. The parameters have the same " +"meaning as in :meth:`insert_pdf`. In essence, you can restrict the " +"conversion to a page subset, specify page rotation, and revert page " +"sequence." +msgstr "" +"現在のドキュメントのPDFバージョンを作成し、メモリに書き込みます。**すべてのドキュメントタイプ** がサポートされています。パラメータは " +":meth:`insert_pdf` と同じ意味を持ちます。 " +"基本的に、ページのサブセットに変換を制限し、ページの回転を指定し、ページの順序を逆にすることができます。" + +#: ../../document.rst:707 46ccb3be97f741dcbcfe7fc4137e7bcd +msgid "first page to copy (0-based). Default is first page." +msgstr "コピーする最初のページ(0ベース)。デフォルトは最初のページです。" + +#: ../../document.rst:709 23a04f94f6724a3899a1375c75690320 +msgid "last page to copy (0-based). Default is last page." +msgstr "コピーする最後のページ(0ベース)。デフォルトは最後のページです。" + +#: ../../document.rst:711 bb2f1bc11b48415cba76b4961858f552 +msgid "" +"rotation angle. Default is 0 (no rotation). Should be *n * 90* with an " +"integer n (not checked)." +msgstr "回転角度。デフォルトは0度(回転なし)です。整数n(チェックされていない)で *n * 90* である必要があります。" + +#: ../../document.rst:714 c74c2791f258466296ec36b0fc5626c7 +msgid "" +"a Python *bytes* object containing a PDF file image. It is created by " +"internally using `tobytes(garbage=4, deflate=True)`. See :meth:`tobytes`." +" You can output it directly to disk or open it as a PDF. Here are some " +"examples:: >>> # convert an XPS file to PDF >>> xps = " +"pymupdf.open(\"some.xps\") >>> pdfbytes = xps.convert_to_pdf() >>> >>> # " +"either do this --> >>> pdf = pymupdf.open(\"pdf\", pdfbytes) >>> " +"pdf.save(\"some.pdf\") >>> >>> # or this --> >>> pdfout = " +"open(\"some.pdf\", \"wb\") >>> pdfout.tobytes(pdfbytes) >>> " +"pdfout.close() >>> # copy image files to PDF pages >>> # each page will " +"have image dimensions >>> doc = pymupdf.open() # new " +"PDF >>> imglist = [ ... image file names ...] # e.g. a directory listing " +">>> for img in imglist: imgdoc=pymupdf.open(img) # open" +" image as a document pdfbytes=imgdoc.convert_to_pdf() # make a " +"1-page PDF of it imgpdf=pymupdf.open(\"pdf\", pdfbytes) " +"doc.insert_pdf(imgpdf) # insert the image PDF >>> " +"doc.save(\"allmyimages.pdf\")" +msgstr "" + +#: ../../document.rst:714 fcede7b6375e4313a29021a400393b1c +msgid "" +"a Python *bytes* object containing a PDF file image. It is created by " +"internally using `tobytes(garbage=4, deflate=True)`. See :meth:`tobytes`." +" You can output it directly to disk or open it as a PDF. Here are some " +"examples::" +msgstr "" +"PDFファイルイメージを含むPythonの *bytes* オブジェクト。内部的に `tobytes(garbage=4, " +"deflate=True)` を使用して作成されます。:meth:`tobytes` " +"を参照してください。これを直接ディスクに出力するか、PDFとして開くことができます。以下にいくつかの例を示します::" + +#: ../../document.rst:740 338cff57f1c14eb78e44aa08f634a79e +msgid "" +"The method uses the same logic as the *mutool convert* CLI. This works " +"very well in most cases -- however, beware of the following limitations." +msgstr "" +"このメソッドは、*mutool convert* " +"CLIと同じロジックを使用しています。これはほとんどの場合非常にうまく機能しますが、以下の制限に注意してください。" + +#: ../../document.rst:742 8bcf27ec2972489b8a66b52e727c26a7 +msgid "" +"Image files: perfect, no issues detected. However, image transparency is " +"ignored. If you need that (like for a watermark), use " +":meth:`Page.insert_image` instead. Otherwise, this method is recommended " +"for its much better performance." +msgstr "" +"画像ファイル: 完璧で、問題は検出されません。ただし、画像の透明度は無視されます。透明度が必要な場合(ウォーターマークなど)、代わりに " +":meth:`Page.insert_image` " +"を使用してください。それ以外の場合、このメソッドははるかに優れたパフォーマンスのために推奨されます。" + +#: ../../document.rst:743 efd920175c804d928c317a5a501316ec +msgid "" +"XPS: appearance very good. Links work fine, outlines (bookmarks) are " +"lost, but can easily be recovered [#f2]_." +msgstr "XPS: 外観は非常に良好です。リンクは正常に機能し、アウトライン(ブックマーク)は失われますが、簡単に回復できます [#f2]_。" + +#: ../../document.rst:744 336a0be2bc9e4860b7b3b0c47ad4835f +msgid "EPUB, CBZ, FB2: similar to XPS." +msgstr "EPUB、CBZ、FB2: XPSと類似しています。" + +#: ../../document.rst:745 9dbcb1b8abae45cabc70a4552926325f +msgid "" +"SVG: medium. Roughly comparable to `svglib " +"`_." +msgstr "SVG: 中程度です。おおよそ `svglib `_ と比較できます。" + +#: ../../document.rst:749 5cb402163d4e4c6999bdfedfc4c6ad7f +msgid "Creates a table of contents (TOC) out of the document's outline chain." +msgstr "ドキュメントのアウトラインチェーンから目次(TOC)を作成します。" + +#: ../../document.rst:751 b052df8d0ef140c783e851c1563df7a8 +msgid "" +"Indicates whether a simple or a detailed TOC is required. If ``False``, " +"each item of the list also contains a dictionary with :ref:`linkDest` " +"details for each outline entry." +msgstr "" +"簡単なTOCまたは詳細なTOCが必要かを示す値です。``False`` の場合、リストの各アイテムにはアウトラインエントリごとの " +":ref:`linkDest` の詳細を含む辞書も含まれます。" + +#: ../../document.rst:755 f2ee7e1afe8e4f06819f620115b35f3a +msgid "" +"a list of lists. Each entry has the form *[lvl, title, page, dest]*. Its " +"entries have the following meanings: * *lvl* -- hierarchy level " +"(positive *int*). The first entry is always 1. Entries in a row are " +"either **equal**, **increase** by 1, or **decrease** by any number. * " +"*title* -- title (*str*) * *page* -- 1-based source page number (*int*). " +"`-1` if no destination or outside document. * *dest* -- (*dict*) included" +" only if *simple=False*. Contains details of the TOC item as follows: " +"- kind: destination kind, see :ref:`linkDest Kinds`. - file: filename " +"if kind is :data:`LINK_GOTOR` or :data:`LINK_LAUNCH`. - page: target " +"page, 0-based, :data:`LINK_GOTOR` or :data:`LINK_GOTO` only. - to: " +"position on target page (:ref:`Point`). - zoom: (float) zoom factor on " +"target page. - xref: :data:`xref` of the item (0 if no PDF). - color:" +" item color in PDF RGB format `(red, green, blue)`, or omitted (always " +"omitted if no PDF). - bold: true if bold item text or omitted. PDF " +"only. - italic: true if italic item text, or omitted. PDF only. - " +"collapse: true if sub-items are folded, or omitted. PDF only. - " +"nameddest: target name if kind=4. PDF only. (New in 1.23.7.)" +msgstr "" + +#: ../../document.rst:755 55d6cd189dfe4990bcd196b86a15835b +msgid "" +"a list of lists. Each entry has the form *[lvl, title, page, dest]*. Its " +"entries have the following meanings:" +msgstr "リストのリスト。各エントリは次の形式を持っています: *[lvl, title, page, dest]* 。そのエントリは次の意味を持っています:" + +#: ../../document.rst:757 454d03d450624061938d946b501c9663 +msgid "" +"*lvl* -- hierarchy level (positive *int*). The first entry is always 1. " +"Entries in a row are either **equal**, **increase** by 1, or **decrease**" +" by any number." +msgstr "" +"*lvl* – 階層レベル(正の整数)。最初のエントリは常に1です。行内のエントリは **等しい** か、1ずつ **増加** または任意の数で " +"**減少** します。" + +#: ../../document.rst:758 0889207b55464b11a1a4d2eb60f7268b +msgid "*title* -- title (*str*)" +msgstr "*title* – タイトル(*str*)" + +#: ../../document.rst:759 5fa3112e6f004b80ae2976b1070cf936 +msgid "" +"*page* -- 1-based source page number (*int*). `-1` if no destination or " +"outside document." +msgstr "*page* – 1から始まるページ番号(*int*)。`-1` の場合、宛先なしまたはドキュメント外。" + +#: ../../document.rst:760 abd18e5ece4d4c88b1c87baf72f2c2c6 +msgid "" +"*dest* -- (*dict*) included only if *simple=False*. Contains details of " +"the TOC item as follows:" +msgstr "*dest* – (*dict*) *simple=False* の場合のみ含まれます。TOCアイテムの詳細が以下のように含まれます:" + +#: ../../document.rst:762 76bc0d0dfaee41c48fff15ada7b9a5b4 +msgid "kind: destination kind, see :ref:`linkDest Kinds`." +msgstr "kind: 宛先の種類、:ref:`linkDest Kinds` を参照。" + +#: ../../document.rst:763 9758bcc5f2f746e18f86c4fa2d250b60 +msgid "file: filename if kind is :data:`LINK_GOTOR` or :data:`LINK_LAUNCH`." +msgstr "file: `kind` が :data:`LINK_GOTOR` または :data:`LINK_LAUNCH` の場合のファイル名。" + +#: ../../document.rst:764 5afcceaa21ab4291a43e7196af474940 +msgid "page: target page, 0-based, :data:`LINK_GOTOR` or :data:`LINK_GOTO` only." +msgstr "page: ターゲットページ、0ベース、:data:`LINK_GOTOR` または :data:`LINK_GOTO` の場合のみ。" + +#: ../../document.rst:765 7ef679ac849c48eb8d577c0fe3454ac4 +msgid "to: position on target page (:ref:`Point`)." +msgstr "to: ターゲットページ上の位置 (:ref:`Point`)。" + +#: ../../document.rst:766 8a166e04e8df4d08b89f8440cc674f32 +msgid "zoom: (float) zoom factor on target page." +msgstr "zoom: ターゲットページ上のズームファクター(float)。" + +#: ../../document.rst:767 d702c639749e417cb078fd32f13de012 +msgid "xref: :data:`xref` of the item (0 if no PDF)." +msgstr "xref: アイテムの :data:`xref` (PDFがない場合は0)。" + +#: ../../document.rst:768 99f0660ad6394091809cbdf3d302955c +msgid "" +"color: item color in PDF RGB format `(red, green, blue)`, or omitted " +"(always omitted if no PDF)." +msgstr "color: PDF RGB形式 `(red, green, blue)` のアイテムカラー、または省略(PDFがない場合は常に省略)。" + +#: ../../document.rst:769 c4f8e59bfeac45bfbfb7ea9d1248838a +msgid "bold: true if bold item text or omitted. PDF only." +msgstr "bold: アイテムテキストが太字の場合、または省略。PDFのみ。" + +#: ../../document.rst:770 f6cae7c40b8e45f59f44ad81ef15567c +msgid "italic: true if italic item text, or omitted. PDF only." +msgstr "italic: アイテムテキストがイタリックの場合、または省略。PDFのみ。" + +#: ../../document.rst:771 76b18e63c1714b74b2122abb08daa430 +msgid "collapse: true if sub-items are folded, or omitted. PDF only." +msgstr "collapse: サブアイテムが折りたたまれている場合、または省略。PDFのみ。" + +#: ../../document.rst:772 1101c4e55af149bdaf2a45d5f3f593ce +msgid "nameddest: target name if kind=4. PDF only. (New in 1.23.7.)" +msgstr "" + +#: ../../document.rst:779 0cb3aef79b2e4dc3bd6ade0f8560df02 +msgid "" +"PDF only: Return the PDF dictionary keys of the :data:`dictionary` object" +" provided by its xref number." +msgstr "PDFのみ: xref番号で提供される :data:`dictionary` オブジェクトのPDF辞書キーを返します。" + +#: ../../document.rst:781 c77458be421d4b29bb97c9dc1a3e67fa +msgid "" +"the :data:`xref`. *(Changed in v1.18.10)* Use `-1` to access the special " +"dictionary \"PDF trailer\"." +msgstr ":data:`xref` 。 *(v1.18.10で変更)* \"PDF trailer\" としてアクセスするには `-1` を使用します。" + +#: ../../document.rst:783 efe50840bb3a41ea82e644f3e3f5e202 +msgid "" +"a tuple of dictionary keys present in object :data:`xref`. Examples: >>>" +" from pprint import pprint >>> import pymupdf >>> " +"doc=pymupdf.open(\"pymupdf.pdf\") >>> xref = doc.page_xref(0) # xref of " +"page 0 >>> pprint(doc.xref_get_keys(xref)) # primary level keys of a " +"page ('Type', 'Contents', 'Resources', 'MediaBox', 'Parent') >>> " +"pprint(doc.xref_get_keys(-1)) # primary level keys of the trailer " +"('Type', 'Index', 'Size', 'W', 'Root', 'Info', 'ID', 'Length', 'Filter') " +">>>" +msgstr "" + +#: ../../document.rst:783 61a280371ea44c08be60d1d6e1116b80 +msgid "a tuple of dictionary keys present in object :data:`xref`. Examples:" +msgstr ":data:`xref` オブジェクトに存在する辞書キーのタプル。例:" + +#: ../../document.rst:800 180b102e04e64f7798fcdb86576717b5 +msgid "" +"PDF only: Return type and value of a PDF dictionary key of a " +":data:`dictionary` object given by its xref." +msgstr "PDFのみ: xrefによって提供される :data:`dictionary` オブジェクトのPDF辞書キーの戻り値の型と値を返します。" + +#: ../../document.rst:802 b6b9bd8f35c24eada60ca621edbe56ed +msgid "" +"the :data:`xref`. *Changed in v1.18.10:* Use `-1` to access the special " +"dictionary \"PDF trailer\"." +msgstr ":data:`xref` 。*v1.18.10で変更:* 特別な辞書 \"PDF trailer\" にアクセスするには `-1` を使用します。" + +#: ../../document.rst:804 69819bf36f4b42558eeb2c4bbe22cb65 +msgid "" +"the desired PDF key. Must **exactly** match (case-sensitive) one of the " +"keys contained in :meth:`Document.xref_get_keys`." +msgstr "" +"望ましいPDFキー。:meth:`Document.xref_get_keys` " +"に含まれるキーのいずれかと厳密に一致する必要があります(大文字と小文字を区別します)。" + +#: ../../document.rst:808 41a966d291554b56b93fc03772a17bf0 +msgid "" +"A tuple (type, value) of strings, where type is one of \"xref\", " +"\"array\", \"dict\", \"int\", \"float\", \"null\", \"bool\", \"name\", " +"\"string\" or \"unknown\" (should not occur). Independent of \"type\", " +"the value of the key is **always** formatted as a string -- see the " +"following example -- and (almost always) a faithful reflection of what is" +" stored in the PDF. In most cases, the format of the value string also " +"gives a clue about the key type:" +msgstr "" +"文字列のタプル(type、value)、ここでtypeは次のいずれかです: " +"\"xref\"、\"array\"、\"dict\"、\"int\"、\"float\"、\"null\"、\"bool\"、\"name\"、\"string\"" +" または \"unknown\"(発生しないはず)。\"type\" に関係なく、キーの値は **常に** 文字列としてフォーマットされます – " +"以下の例を参照 – " +"そして(ほとんどの場合)PDFに格納されている内容の忠実な反映です。ほとんどの場合、値文字列のフォーマットもキーのタイプについてのヒントを提供します:" + +#: ../../document.rst:810 68962e8fe4d442e293d9c3118347e01e +msgid "A \"name\" always starts with a \"/\" slash." +msgstr "\"name\" は常に \"/\" スラッシュで始まります。" + +#: ../../document.rst:811 32d534887bbc47a096b329464447814b +msgid "An \"xref\" always ends with \" 0 R\"." +msgstr "\"xref\" は常に \" 0 R\" で終わります。" + +#: ../../document.rst:812 36770e892a53423d9754f40b4cbb44fd +msgid "An \"array\" is always enclosed in \"[...]\" brackets." +msgstr "\"array\" は常に \"[...]\" 角括弧で囲まれています。" + +#: ../../document.rst:813 fd7eef20a4fe4744bd5b4151cd909798 +msgid "A \"dict\" is always enclosed in \"<<...>>\" brackets." +msgstr "\"dict\" は常に \"<<...>>\" 角括弧で囲まれています。" + +#: ../../document.rst:814 a7ca5eff251543ceb8cfb9e695ebde43 +msgid "" +"A \"bool\", resp. \"null\" always equal either \"true\", \"false\", resp." +" \"null\"." +msgstr "\"bool\" または \"null\" は常に \"true\"、\"false\" または \"null\" のいずれかです。" + +#: ../../document.rst:815 9ba0410ed2ab4e03877a3a08d13bf77c +msgid "" +"\"float\" and \"int\" are represented by their string format -- and are " +"thus not always distinguishable." +msgstr "\"float\" と \"int\" は文字列形式で表され、したがって常に区別できるわけではありません。" + +#: ../../document.rst:816 f33721d0aec84d23b9300ace9c35a6f9 +msgid "" +"A \"string\" is converted to UTF-8 and may therefore deviate from what is" +" stored in the PDF. For example, the PDF key \"Author\" may have a value " +"of \"\" in the " +"file, but the method will return `('string', 'Jorj X. McKie')`." +msgstr "" +"\"string\" はUTF-8に変換され、したがってPDFに格納されている内容と異なる場合があります。たとえば、PDFキー " +"\"Author\" はファイル内で " +"\"FEFF004A006F0072006A00200058002E0020004D0063004B00690065\" " +"という値を持つかもしれませんが、このメソッドは `('string', 'Jorj X. McKie')` を返します。" + +#: ../../document.rst:845 e9c2a10f60da46c587236437b7c1ed6b +msgid "New in v1.18.7, changed in v 1.18.13" +msgstr "バージョン1.18.7で新規追加、バージョン1.18.13で変更されました。" + +#: ../../document.rst:846 ba86e3e07a4c47598104d3b71044774e +msgid "Changed in v1.19.4: remove a key \"physically\" if set to \"null\"." +msgstr "バージョン1.19.4で変更: \"null\" に設定された場合、キーを \"物理的に\" 削除します。" + +#: ../../document.rst:848 e1292beca902409b92239cd05c2a3a50 +msgid "" +"PDF only: Set (add, update, delete) the value of a PDF key for the " +":data:`dictionary` object given by its xref." +msgstr "PDFのみ: xrefによって提供される :data:`dictionary` オブジェクトのPDFキーの値を設定(追加、更新、削除)します。" + +#: ../../document.rst:850 0bb3825d68dd410e991716b3f51fab4a +msgid "" +"This is an expert function: if you do not know what you are doing, there " +"is a high risk to render (parts of) the PDF unusable. Please do consult " +":ref:`AdobeManual` about object specification formats (page 18) and the " +"structure of special dictionary types like page objects." +msgstr "" +"これはエキスパート向けの機能です。何をしているのかわからない場合、PDFの(一部の)使用不能の高いリスクがあります。PDFオブジェクト仕様フォーマット(ページ18)やページオブジェクトなどの特別な辞書タイプの構造については" +" :ref:`AdobeManual` を参照してください。" + +#: ../../document.rst:852 16ca3e7fbf794d38a823d632ad2ae243 +msgid "" +"the :data:`xref`. *Changed in v1.18.13:* To update the PDF trailer, " +"specify -1." +msgstr ":data:`xref` 。バージョン1.18.13で変更: PDFトレーラーを更新する場合、-1を指定します。" + +#: ../../document.rst:853 8960997235104ce1abc84fe4cb69fefe +msgid "" +"the desired PDF key (without leading \"/\"). Must not be empty. Any valid" +" PDF key -- whether already present in the object (which will be " +"overwritten) -- or new. It is possible to use PDF path notation like " +"`\"Resources/ExtGState\"` -- which sets the value for key " +"`\"/ExtGState\"` as a sub-object of `\"/Resources\"`." +msgstr "" +"望ましいPDFキー(先頭の \"/\" " +"なし)。空であってはいけません。既にオブジェクト内に存在するかどうかに関係なく、新しいPDFキーでも構いません。PDFパス表記 " +"(`\"Resources/ExtGState\"` のような)を使用して、`\"/Resources\"` のサブオブジェクトとしてキー " +"`\"/ExtGState\"` の値を設定することも可能です。" + +#: ../../document.rst:854 368de1a24c0c4b9f98d0d3cb4d81d00d +msgid "" +"the value for the key. It must be a non-empty string and, depending on " +"the desired PDF object type, the following rules must be observed. There " +"is some syntax checking, but **no type checking** and no checking if it " +"makes sense PDF-wise, i.e. **no semantics checking**. Upper / lower case " +"is important!" +msgstr "キーの値。空でない文字列である必要があり、望ましいPDFオブジェクトのタイプに応じて以下のルールを守る必要があります。一部の構文チェックは行われますが、型チェックやPDFとして意味があるかどうかのチェックは行われません。大文字と小文字の区別が重要です!" + +#: ../../document.rst:856 06ed2d0e262a45629c998ae7c495a933 +msgid "" +"*:data:`xref`* -- must be provided as `\"nnn 0 R\"` with a valid " +":data:`xref` number nnn of the PDF. The suffix \"`0 R`\" is required to " +"be recognizable as an xref by PDF applications." +msgstr "" +"*:data:`xref`* – 有効なPDFのxref番号nnnを持つ `\"nnn 0 R\"` として提供される必要があります。サフィックス" +" \"`0 R`\" はPDFアプリケーションによって :data:`xref` として認識されるため必要です。" + +#: ../../document.rst:857 330970abae9f4facabb37ba6eccd7370 +msgid "" +"**array** -- a string like `\"[a b c d e f]\"`. The brackets are " +"required. Array items must be separated by at least one space (not commas" +" like in Python). An empty array `\"[]\"` is possible and *equivalent* to" +" removing the key. Array items may be any PDF objects, like dictionaries," +" xrefs, other arrays, etc. Like in Python, array items may be of " +"different types." +msgstr "" +"**array** – `\"[a b c d e f]\"` " +"のような文字列。角括弧が必要です。配列の要素は少なくとも1つのスペースで区切られている必要があります(Pythonのようなカンマではありません)。空の配列" +" `\"[]\"` も可能で、キーを削除することと *同等* " +"です。配列のアイテムは、辞書、xref、他の配列など、PDFオブジェクトである必要があります。Pythonと同様に、配列のアイテムは異なるタイプである場合があります。" + +#: ../../document.rst:858 dd02ad08c6f7478299103ed59cc6a4a9 +msgid "" +"**dict** -- a string like `\"<< ... >>\"`. The brackets are required and " +"must enclose a valid PDF dictionary definition. The empty dictionary " +"`\"<<>>\"` is possible and *equivalent* to removing the key." +msgstr "" +"**dict** – `\"<< ... >>\"` のような文字列。角括弧が必要で、有効なPDF辞書定義を囲む必要があります。空の辞書 " +"`\"<<>>\"` も可能で、キーを削除することと *同等* です。" + +#: ../../document.rst:859 c4181d6e27f34f4c9ff75b529d476a93 +msgid "**int** -- an integer formatted **as a string**." +msgstr "**int** – **文字列として** フォーマットされた整数。" + +#: ../../document.rst:860 157a221ca7aa4d48a23b606b9975d385 +msgid "" +"**float** -- a float formatted **as a string**. Scientific notation (with" +" exponents) is **not allowed by PDF**." +msgstr "**float** – 文字列としてフォーマットされた浮動小数点数。科学的表記法(指数を含む)は **PDFでは許可されていません** 。" + +#: ../../document.rst:861 2de697538bd44abca00e569494265742 +msgid "" +"**null** -- the string `\"null\"`. This is the PDF equivalent to Python's" +" `None` and causes the key to be ignored -- however not necessarily " +"removed, resp. removed on saves with garbage collection. *Changed in " +"v1.19.4:* If the key is no path hierarchy (i.e. contains no slash \"/\")," +" then it will be completely removed." +msgstr "" +"**null** – 文字列 `\"null\"`。これはPythonの `None` " +"に相当し、キーを無視させますが、必ずしも削除されるわけではありません。ガベージコレクションを伴う保存時に削除される場合があります。*バージョン1.19.4で変更:*" +" キーがパス階層でない場合(つまりスラッシュ \"/\" を含まない場合)、それは完全に削除されます。" + +#: ../../document.rst:862 b39a734639f8407ba3bbdaf290e83863 +msgid "**bool** -- one of the strings `\"true\"` or `\"false\"`." +msgstr "**bool** – `\"true\"` または `\"false\"` のいずれかの文字列。" + +#: ../../document.rst:863 abd3326f76034c32ad2cece7dd88ee74 +msgid "" +"**name** -- a valid PDF name with a leading slash like this: " +"`\"/PageLayout\"`. See page 16 of the :ref:`AdobeManual`." +msgstr "" +"**name** – `\"/PageLayout\"` のように先頭にスラッシュを持つ有効なPDF名。:ref:`AdobeManual` " +"のページ16を参照してください。" + +#: ../../document.rst:864 a124ee862f9e4d9f8e2db002683963e3 +msgid "" +"**string** -- a valid PDF string. **All PDF strings must be enclosed by " +"brackets**. Denote the empty string as `\"()\"`. Depending on its " +"content, the possible brackets are" +msgstr "" +"**string** – 有効なPDF文字列。 **すべてのPDF文字列は角括弧で囲まれている必要があります** 。空の文字列は `\"()\"`" +" として表記されます。内容に応じて、可能な角括弧は次の通りです。" + +#: ../../document.rst:866 cf9ad384fa7e48e89ed6220e656605e2 +msgid "" +"\"(...)\" for ASCII-only text. Reserved PDF characters must be backslash-" +"escaped and non-ASCII characters must be provided as 3-digit backslash-" +"escaped octals -- including leading zeros. Example: 12 = 0x0C must be " +"encoded as `\\014`." +msgstr "" +"\"(…)\":ASCIIのテキストの場合。予約されたPDF文字はバックスラッシュでエスケープし、非ASCII文字は先頭にゼロパディングされた3桁のバックスラッシュエスケープの8進数で提供する必要があります。例:" +" 12 = 0x0C は `\\014` としてエンコードする必要があります。" + +#: ../../document.rst:867 6933707b8a404c698d2bb0178fe6bb7d +msgid "" +"\"<...>\" for hex-encoded text. Every character must be represented by " +"two hex-digits (lower or upper case)." +msgstr "\"<…>\":16進数でエンコードされたテキストの場合。各文字は2桁の16進数で表されなければなりません(大文字または小文字)。" + +#: ../../document.rst:869 617b10333ace4793b0272dcbf1ad1528 +msgid "" +"If in doubt, we **strongly recommend** to use :meth:`get_pdf_str`! This " +"function automatically generates the right brackets, escapes, and overall" +" format. It will for example do conversions like these:" +msgstr "" +"疑念がある場合は、:meth:`get_pdf_str` " +"の使用を**強くお勧めします**!この関数は自動的に適切な角括弧、エスケープ、および全体のフォーマットを生成します。たとえば、次のような変換を行います。" + +#: ../../document.rst:881 d9f815c2f13749c1bbba34778122e4b5 +msgid "" +"Creates a pixmap from page *pno* (zero-based). Invokes " +":meth:`Page.get_pixmap`." +msgstr "*pno* (ゼロベース)のページからピクスマップを作成します。:meth:`Page.get_pixmap` を呼び出します。" + +#: ../../document.rst:883 212a466e64f54ceab96d342b6203b839 +msgid "All parameters except `pno` are *keyword-only.*" +msgstr "`pno` 以外のすべてのパラメーターは *キーワード専用* です。" + +#: ../../document.rst:885 e1319e0e7970424791a53525152e1639 +msgid "page number, 0-based in `-∞ < pno < page_count`." +msgstr "pno (int) – ページ番号、ゼロベース、`-∞ < pno < page_count`。" + +#: ../../document.rst:887 2cc8a9bd2bb645669b18514646b4297e +msgid ":ref:`Pixmap`" +msgstr "" + +#: ../../document.rst:891 9ea89530b850439c97138a6fcab13410 +msgid "New in v1.16.13" +msgstr "バージョン1.16.13で新規追加" + +#: ../../document.rst:892 73acb049c43a4985a295c16dfa1b05f2 +msgid "Changed in v1.18.11" +msgstr "バージョン1.18.11で変更" + +#: ../../document.rst:894 ad363437834b4d41a88a015c57fb06bf +msgid "PDF only: Return a list of all XObjects referenced by a page." +msgstr "PDFのみ: ページによって参照されるすべてのXObjectのリストを返します。" + +#: ../../document.rst:896 ../../document.rst:911 ../../document.rst:940 +#: 1f721aa38b2841618a5ff2c3d399515f 2be167774b6e411286adf1d850042ad0 +#: 6373181f04914740a5bbd91abdcd1de2 +msgid "page number, 0-based, `-∞ < pno < page_count`." +msgstr "ページ番号、ゼロベース、`-∞ < pno < page_count`。" + +#: ../../document.rst:899 722d965ee64d45429057256a881874f3 +msgid "" +"a list of (non-image) XObjects. These objects typically represent pages " +"*embedded* (not copied) from other PDFs. For example, " +":meth:`Page.show_pdf_page` will create this type of object. An item of " +"this list has the following layout: `(xref, name, invoker, bbox)`, where" +" * *:data:`xref`* (*int*) is the XObject's :data:`xref`. * **name** " +"(*str*) is the symbolic name to reference the XObject. * **invoker** " +"(*int*) the :data:`xref` of the invoking XObject or zero if the page " +"directly invokes it. * **bbox** (:ref:`Rect`) the boundary box of the " +"XObject's location on the page **in untransformed coordinates**. To get " +"actual, non-rotated page coordinates, multiply with the page's " +"transformation matrix :attr:`Page.transformation_matrix`. *Changed in " +"v.18.11:* the bbox is now formatted as :ref:`Rect`." +msgstr "" + +#: ../../document.rst:899 0fbe94736a044053802826a4e53067ad +msgid "" +"a list of (non-image) XObjects. These objects typically represent pages " +"*embedded* (not copied) from other PDFs. For example, " +":meth:`Page.show_pdf_page` will create this type of object. An item of " +"this list has the following layout: `(xref, name, invoker, bbox)`, where" +msgstr "" +"(画像でない)XObjectのリスト。これらのオブジェクトは通常、他のPDFから **埋め込まれた** " +"(コピーされていない)ページを表します。例えば、:meth:`Page.show_pdf_page` " +"はこのタイプのオブジェクトを作成します。このリストのアイテムは以下のレイアウトを持っています:`(xref, name, invoker, " +"bbox)`、ここで" + +#: ../../document.rst:901 7a364bde2f77412fb904d358b7a02643 +msgid "*:data:`xref`* (*int*) is the XObject's :data:`xref`." +msgstr "*:data:`xref`* (*int*) はXObjectの :data:`xref` です。" + +#: ../../document.rst:902 9922e6b3ac3446dabf4f05cbd662e6eb +msgid "**name** (*str*) is the symbolic name to reference the XObject." +msgstr "**name** (*str*) はXObjectを参照するための象徴的な名前です。" + +#: ../../document.rst:903 b55ebb0f160f40568cc38ab211aceaa7 +msgid "" +"**invoker** (*int*) the :data:`xref` of the invoking XObject or zero if " +"the page directly invokes it." +msgstr "" +"**invoker** (*int*) は、ページがそれを直接呼び出す場合はゼロ、それ以外の場合は呼び出し元XObjectの " +":data:`xref` です。" + +#: ../../document.rst:904 eb3c40a7273b48a394cae50ae2bf4c16 +msgid "" +"**bbox** (:ref:`Rect`) the boundary box of the XObject's location on the " +"page **in untransformed coordinates**. To get actual, non-rotated page " +"coordinates, multiply with the page's transformation matrix " +":attr:`Page.transformation_matrix`. *Changed in v.18.11:* the bbox is now" +" formatted as :ref:`Rect`." +msgstr "" +"**bbox** (:ref:`Rect`) はXObjectのページ上の位置の境界ボックスで、**変換されていない座標で** " +"表されます。実際の、回転していないページ座標を取得するには、ページの変換行列 :attr:`Page.transformation_matrix`" +" を掛けてください。*バージョン1.18.11で変更:* **bbox** は今や :ref:`Rect` としてフォーマットされています。" + +#: ../../document.rst:909 be3724545cd849628e6bd091a3cc3fa8 +msgid "" +"PDF only: Return a list of all images (directly or indirectly) referenced" +" by the page." +msgstr "PDFのみ: ページによって参照されるすべての画像(直接または間接的に)のリストを返します。" + +#: ../../document.rst:912 bf86d878c0c74dbe9f6905d912e1069e +msgid "" +"whether to also include the referencer's :data:`xref` (which is zero if " +"this is the page)." +msgstr "このページがそのページ自体の場合、参照元の :data:`xref` も含めるかどうか(これがページの場合はゼロ)。" + +#: ../../document.rst:916 88bc7cacbc2348e98e470cbb54a6ea4c +msgid "" +"a list of images **referenced** by this page. Each item looks like " +"`(xref, smask, width, height, bpc, colorspace, alt_colorspace, name, " +"filter, referencer)` Where * *:data:`xref`* (*int*) is the image " +"object number * **smask** (*int*) is the object number of its soft-mask" +" image * **width** (*int*) is the image width * **height** (*int*) is" +" the image height * **bpc** (*int*) denotes the number of bits per " +"component (normally 8) * **colorspace** (*str*) a string naming the " +"colorspace (like **DeviceRGB**) * **alt_colorspace** (*str*) is any " +"alternate colorspace depending on the value of **colorspace** * " +"**name** (*str*) is the symbolic name by which the image is referenced " +"* **filter** (*str*) is the decode filter of the image " +"(:ref:`AdobeManual`, pp. 22). * **referencer** (*int*) the :data:`xref`" +" of the referencer. Zero if directly referenced by the page. Only present" +" if *full=True*." +msgstr "" + +#: ../../document.rst:916 6491f87ae96840e09c13fccc226463ef +msgid "a list of images **referenced** by this page. Each item looks like" +msgstr "このページで **参照されている** 画像のリスト。各アイテムは以下のようになります:" + +#: ../../document.rst:918 855d4bf2eedd4786ab4836f0e660eeb1 +msgid "" +"`(xref, smask, width, height, bpc, colorspace, alt_colorspace, name, " +"filter, referencer)`" +msgstr "" + +#: ../../document.rst:920 d393059c685e4b2b99530489a9fd4cd7 +msgid "Where" +msgstr "ここで" + +#: ../../document.rst:922 34311a21103f4cc58b7e72482c578f48 +msgid "*:data:`xref`* (*int*) is the image object number" +msgstr "*:data:`xref`* (*int*) は画像オブジェクトの番号です" + +#: ../../document.rst:923 da18a00df2e547b59ff9927842e1ff99 +msgid "**smask** (*int*) is the object number of its soft-mask image" +msgstr "**smask** (*int*) はそのソフトマスク画像のオブジェクト番号です" + +#: ../../document.rst:924 5dfc945fe0eb407dbb539c5a180be7e1 +#, fuzzy +msgid "**width** (*int*) is the image width" +msgstr "*width* (*int*) 画像の幅" + +#: ../../document.rst:925 e927e011deb14515876f09257cf7ffbc +#, fuzzy +msgid "**height** (*int*) is the image height" +msgstr "*height* (*int*) 画像の高さ" + +#: ../../document.rst:926 80ca7112d6be40dba2189a15e0d020d3 +msgid "**bpc** (*int*) denotes the number of bits per component (normally 8)" +msgstr "**bpc** (*int*) はコンポーネントごとのビット数を示します(通常は8)" + +#: ../../document.rst:927 ec292a4da4644bf6bac8b05be16ecef0 +msgid "**colorspace** (*str*) a string naming the colorspace (like **DeviceRGB**)" +msgstr "**colorspace** (*str*) は色空間の名前を示す文字列です( **DeviceRGB** など)" + +#: ../../document.rst:928 ab540ead740a449db82338f6215bb0f0 +#, fuzzy +msgid "" +"**alt_colorspace** (*str*) is any alternate colorspace depending on the " +"value of **colorspace**" +msgstr "**alt. colorspace** (*str*) は **colorspace** の値に依存する代替の色空間です" + +#: ../../document.rst:929 93d6b6c06d624f5d8d5e1ea99e488051 +msgid "**name** (*str*) is the symbolic name by which the image is referenced" +msgstr "**name** (*str*) は画像が参照される際の象徴的な名前です" + +#: ../../document.rst:930 a82bc86c4ff44e3797f686d5a6832a66 +msgid "" +"**filter** (*str*) is the decode filter of the image (:ref:`AdobeManual`," +" pp. 22)." +msgstr "**filter** (*str*) は画像のデコードフィルタです(:ref:`AdobeManual`、pp. 22)。" + +#: ../../document.rst:931 8c958de2bcd240e7b553529c32c3183f +msgid "" +"**referencer** (*int*) the :data:`xref` of the referencer. Zero if " +"directly referenced by the page. Only present if *full=True*." +msgstr "" +"**referencer** (*int*) は参照元の :data:`xref` です。直接ページから参照されている場合はゼロ。 " +"*full=True* の場合のみ存在します" + +#: ../../document.rst:933 ef506a92d3bd45eea0964062da5b01a5 +msgid "" +"In general, this is not the list of images that are **actually " +"displayed**. This method only parses several PDF objects to collect " +"references to embedded images. It does not analyse the page's " +":data:`contents`, where all the actual image display commands are " +"defined. To get this information, please use :meth:`Page.get_image_info`." +" Also have a look at the discussion in section :ref:`textpagedict`." +msgstr "" +"一般的に、これは **実際に表示されている** " +"画像のリストではありません。このメソッドは埋め込まれた画像への参照を収集するためにいくつかのPDFオブジェクトのみを解析します。実際の画像表示コマンドが定義されているページの内容は解析しません。この情報を取得するには、:meth:`Page.get_image_info`" +" を使用してください。また、:ref:`textpagedict` のセクションでの議論もご覧ください。" + +#: ../../document.rst:938 3e222a6391974132911f5718c2fcc006 +#, fuzzy +msgid "" +"PDF only: Return a list of all fonts (directly or indirectly) referenced " +"by the page object definition." +msgstr "PDFのみ: ページによって参照されるすべてのフォント(直接または間接的に)のリストを返します。" + +#: ../../document.rst:941 8591c400dad84c1684522b3ca38aa15e +msgid "" +"whether to also include the referencer's :data:`xref`. If ``True``, the " +"returned items are one entry longer. Use this option if you need to know," +" whether the page directly references the font. In this case the last " +"entry is 0. If the font is referenced by an `/XObject` of the page, you " +"will find its :data:`xref` here." +msgstr "" +"参照元の :data:`xref` も含めるかどうか。``True`` " +"の場合、返されるアイテムは1つ多くなります。ページがフォントを直接参照しているかどうかを知る必要がある場合は、このオプションを使用します。この場合、最後のエントリは0です。フォントがページの" +" `/XObject` によって参照される場合、その :data:`xref` をここで見つけることができます。" + +#: ../../document.rst:945 b3162d921d8b405db6a0eb5f2ae20038 +#, fuzzy +msgid "" +"a list of fonts referenced by the object definition of the page. Each " +"entry looks like" +msgstr "このページで参照されているフォントのリスト。各エントリは以下のようになります:" + +#: ../../document.rst:947 f034c95a0d374a0692c7b6a2ed4f295a +msgid "**(xref, ext, type, basefont, name, encoding, referencer)**," +msgstr "" + +#: ../../document.rst:949 f7d5895a2dee45758178d3e3f856822e +msgid "where" +msgstr "ここで" + +#: ../../document.rst:951 1f8f44b1b35c48feb6f38d57662547bb +msgid "" +"*:data:`xref`* (*int*) is the font object number (may be zero if the PDF " +"uses one of the builtin fonts directly)" +msgstr "" +"*:data:`xref`* (*int*) " +"はフォントオブジェクト番号です(PDFが組み込みフォントを直接使用している場合、ゼロになることがあります)" + +#: ../../document.rst:952 f04105275ec7406c813b24e358165f3c +msgid "" +"**ext** (*str*) font file extension (e.g. \"ttf\", see " +":ref:`FontExtensions`)" +msgstr "**ext** (*str*) フォントファイルの拡張子(例: \"ttf\"、:ref:`FontExtensions` を参照)" + +#: ../../document.rst:953 18fe5995888b4d6cbd2090663098b08f +msgid "**type** (*str*) is the font type (like \"Type1\" or \"TrueType\" etc.)" +msgstr "**type** (*str*) フォントの種類(\"Type1\"や\"TrueType\"など)" + +#: ../../document.rst:954 46d4f4b7854f4c7bb33265a3bbea63e4 +msgid "**basefont** (*str*) is the base font name," +msgstr "**basefont** (*str*) ベースフォント名" + +#: ../../document.rst:955 b34d9fb1b13c48748456b908131517b0 +msgid "**name** (*str*) is the symbolic name, by which the font is referenced" +msgstr "**name** (*str*) フォントが参照される象徴的な名前" + +#: ../../document.rst:956 340cfb69f253428fad01b23c36d7d10c +msgid "" +"**encoding** (*str*) the font's character encoding if different from its " +"built-in encoding (:ref:`AdobeManual`, p. 254):" +msgstr "" +"**encoding** (*str*) " +"フォントの文字エンコーディング。組み込みエンコーディングと異なる場合(:ref:`AdobeManual`、p. 254を参照):" + +#: ../../document.rst:957 237434e9722c44f6a2a3a13c03118c18 +msgid "" +"**referencer** (*int* optional) the :data:`xref` of the referencer. Zero " +"if directly referenced by the page, otherwise the xref of an XObject. " +"Only present if *full=True*." +msgstr "" +"**referencer** (*int*、オプション) 参照元のxref。ページから直接参照されている場合は0、それ以外の場合はXObjectの" +" :data:`xref` です。 *full=True* の場合のみ存在します。" + +#: ../../document.rst:959 deb2aa7147f34e08aa316d1edbcde9b1 +msgid "Example::" +msgstr "例::" + +#: ../../document.rst:972 5a7a9c1645de41c6adc2914a03660dd2 +msgid "" +"This list has no duplicate entries: the combination of :data:`xref`, " +"*name* and *referencer* is unique." +msgstr "このリストには重複するエントリはありません::data:`xref` 、*name* 、および *referencer* の組み合わせは一意です。" + +#: ../../document.rst:973 b7bc024d7ade48f29ca45e6fbffac417 +#, fuzzy +msgid "" +"In general, this is a true superset of the fonts actually in use by this " +"page. The PDF creator may e.g. have specified some global list, of which " +"each page make only partial use." +msgstr "一般的に、これはこのページで実際に使用されているフォントのスーパーセットです。PDF作成者は、各ページが部分的にしか使用しない、グローバルリストを指定したかもしれません。" + +#: ../../document.rst:974 04ac002739ac4185a9858211f4e8bc62 +msgid "" +"Be aware that font names returned by some variants of " +":meth:`Page.get_text` (respectively :ref:`TextPage` methods) need not " +"(exactly) equal the base font name shown here. Reasons for any " +"differences include:" +msgstr "" + +#: ../../document.rst:976 152d5a7785474d7dbff306e85b09958d +msgid "" +"This method always shows any subset prefixes (the pattern ``ABCDEF+``), " +"whereas text extractions do not do this by default." +msgstr "" + +#: ../../document.rst:977 e1b5ec7f36b9446c906f18220ae53a37 +msgid "" +"Text extractions use the base library to access the font name, which has " +"a length cap of 31 bytes and generally interrogates the font file binary " +"to access the name. Method ``get_page_fonts()`` however looks at the PDF " +"definition source." +msgstr "" + +#: ../../document.rst:978 f30488f7a506418d877eebc2490fed5d +msgid "" +"Text extractions work for all supported document types in exactly the " +"same way -- not just for PDFs. Consequently they do not contain PDF-" +"specifics." +msgstr "" + +#: ../../document.rst:982 ad73ac2dd81547b28a9a20e0bb368d63 +msgid "" +"Extracts the text of a page given its page number *pno* (zero-based). " +"Invokes :meth:`Page.get_text`." +msgstr "ページ番号 *pno* (0から始まる)を指定して、ページのテキストを抽出します。:meth:`Page.get_text` を呼び出します。" + +#: ../../document.rst:984 b258e962454b4ab2b5f383f3356416d5 +msgid "page number, 0-based, any value `-∞ < pno < page_count`." +msgstr "ページ番号、0から始まる、任意の値 `-∞ < pno < page_count`。" + +#: ../../document.rst:986 4d32e6e7d1ff40b9997b6a2fab8d9414 +msgid "For other parameter refer to the page method." +msgstr "その他のパラメータについては、ページのメソッドを参照してください。" + +#: ../../document.rst:998 670a6c4d60ae4a4e856c0aed38a32b8d +msgid "" +"Re-paginate (\"reflow\") the document based on the given page dimension " +"and fontsize. This only affects some document types like e-books and " +"HTML. Ignored if not supported. Supported documents have ``True`` in " +"property :attr:`is_reflowable`." +msgstr "" +"与えられたページの寸法とフォントサイズに基づいて、ドキュメントを再ページ割り(\"リフロー\")します。これは電子書籍やHTMLなどの一部のドキュメントタイプにのみ影響します。サポートされていない場合は無視されます。サポートされているドキュメントには" +" :attr:`is_reflowable` プロパティで ``True`` が設定されています。" + +#: ../../document.rst:1000 ebf2748dcfce49bca4d672942c503996 +msgid "desired page size. Must be finite, not empty and start at point (0, 0)." +msgstr "望ましいページサイズ。有限で、空でなく、ポイント(0, 0)で始まる必要があります。" + +#: ../../document.rst:1001 86347c614af0436bb338d20c7582e940 +msgid "use it together with *height* as alternative to *rect*." +msgstr "*rect* との代替として、*height* と一緒に使用します。" + +#: ../../document.rst:1002 ea15e2c3e68549618e1fa6ed8f2cf0b0 +msgid "use it together with *width* as alternative to *rect*." +msgstr "*rect* との代替として、*width* と一緒に使用します。" + +#: ../../document.rst:1003 e860e1d18a5c42f2b1fe3b03e07d9196 +msgid "the desired default fontsize." +msgstr "望ましいデフォルトのフォントサイズ。" + +#: ../../document.rst:1007 f4ae5777a1ed4675a10cc9991509f306 +msgid "" +"PDF only: Keeps only those pages of the document whose numbers occur in " +"the list. Empty sequences or elements outside `range(doc.page_count)` " +"will cause a *ValueError*. For more details see remarks at the bottom or " +"this chapter." +msgstr "" +"PDFのみ:ドキュメントのページ番号がリストに含まれるページのみ保持します。空のシーケンスまたは範囲外の要素 " +"`range(doc.page_count)` は *ValueError* " +"を引き起こします。詳細については、この章の最後の注釈を参照してください。" + +#: ../../document.rst:1009 5d1c676de3e248aa9afa6e840a8e87c9 +msgid "" +"The sequence (see :ref:`SequenceTypes`) of page numbers (zero-based) to " +"be included. Pages not in the sequence will be deleted (from memory) and " +"become unavailable until the document is reopened. **Page numbers can " +"occur multiple times and in any order:** the resulting document will " +"reflect the sequence exactly as specified." +msgstr "" +"(:ref:`SequenceTypes`) – " +"ページ番号(0から始まる)のシーケンス(PyMuPDFで引数として使用する場合はPythonシーケンスを使用)を含めるためのもの。シーケンス内に含まれていないページは(メモリから)削除され、ドキュメントが再オープンされるまで利用できなくなります。**ページ番号は複数回発生し、任意の順序で発生できます:**" +" 結果のドキュメントは指定された正確なシーケンスを反映します。" + +#: ../../document.rst:1013 c24904baf72a426784816f2dec800563 +msgid "" +"Page numbers in the sequence need not be unique nor be in any particular " +"order. This makes the method a versatile utility to e.g. select only the " +"even or the odd pages or meeting some other criteria and so forth." +msgstr "シーケンス内のページ番号は一意である必要も、特定の順序である必要もありません。これにより、この方法は、例えば偶数のページだけを選択したり、奇数のページだけを選択したり、その他の基準を満たしたりするなど、多目的に使用できます。" + +#: ../../document.rst:1015 1eedae859790478fa6c58679c6a117e3 +msgid "" +"On a technical level, the method will always create a new " +":data:`pagetree`." +msgstr "技術的なレベルでは、この方法は常に新しい :data:`pagetree` を作成します。" + +#: ../../document.rst:1017 a4541d5a87e24452b0ce0cc2ca1f1117 +msgid "" +"When dealing with only a few pages, methods :meth:`copy_page`, " +":meth:`move_page`, :meth:`delete_page` are easier to use. In fact, they " +"are also **much faster** -- by at least one order of magnitude when the " +"document has many pages." +msgstr "" +"数ページしか扱わない場合、methods :meth:`copy_page` " +"、:meth:`move_page`、:meth:`delete_page` " +"を使用する方が簡単です。実際、これらの方法は、文書に多くのページがある場合でも、少なくとも1桁のオーダーで **高速です** 。" + +#: ../../document.rst:1022 9b3a0322272e49d7ab4240c67bf562c6 +msgid "" +"PDF only: Sets or updates the metadata of the document as specified in " +"*m*, a Python dictionary." +msgstr "PDFのみ: *m* で指定されたPythonの辞書と同じキーを持つ辞書を設定または更新します。" + +#: ../../document.rst:1024 c3400fc94d384392908cfb4b29dfa389 +msgid "" +"A dictionary with the same keys as *metadata* (see below). All keys are " +"optional. A PDF's format and encryption method cannot be set or changed " +"and will be ignored. If any value should not contain data, do not specify" +" its key or set the value to `None`. If you use *{}* all metadata " +"information will be cleared to the string *\"none\"*. If you want to " +"selectively change only some values, modify a copy of *doc.metadata* and " +"use it as the argument. Arbitrary unicode values are possible if " +"specified as UTF-8-encoded." +msgstr "" +"*metadata* " +"(以下参照)と同じキーを持つ辞書。すべてのキーはオプションです。PDFのフォーマットと暗号化方法は設定または変更できないため、無視されます。データを含めないべき値がある場合、そのキーを指定しないか、値を" +" `None` に設定しないでください。*{}* を使用すると、すべてのメタデータ情報が文字列 *\"none\"* " +"にクリアされます。一部の値のみを選択的に変更したい場合は、*doc.metadata* " +"のコピーを変更して引数として使用してください。UTF-8でエンコードされた場合、任意のUnicode値が指定可能です。" + +#: ../../document.rst:1026 a9f1934c049c4780b0818c16ed0fcc95 +msgid "" +"*(Changed in v1.18.4)* Empty values or \"none\" are no longer written, " +"but completely omitted." +msgstr "*(v1.18.4で変更)* 空の値または \"none\" は書き込まれなくなり、完全に省略されます。" + +#: ../../document.rst:1030 41d3c4ca07ec4f089d03386dccc57d6a +msgid "PDF only: Get the document XML metadata." +msgstr "PDFのみ:ドキュメントのXMLメタデータを取得します。" + +#: ../../document.rst:1033 25fd37327bb346108add254429abcd2d +msgid "XML metadata of the document. Empty string if not present or not a PDF." +msgstr "ドキュメントのXMLメタデータ。存在しない場合やPDFでない場合は空の文字列。" + +#: ../../document.rst:1037 2fdf0ee60d4b4edd9e3fa3d209aab99f +msgid "PDF only: Sets or updates XML metadata of the document." +msgstr "PDFのみ:ドキュメントのXMLメタデータを設定または更新します。" + +#: ../../document.rst:1039 e9c74b38abe24b6aab15e7e575325869 +msgid "" +"the new XML metadata. Should be XML syntax, however no checking is done " +"by this method and any string is accepted." +msgstr "xml(str)–新しいXMLメタデータ。XML構文である必要がありますが、このメソッドではチェックされず、任意の文字列が受け入れられます。" + +#: ../../document.rst:1044 ../../document.rst:1053 ../../document.rst:1062 +#: ../../document.rst:2000 ../../document.rst:2008 ../../document.rst:2016 +#: ../../document.rst:2024 ../../document.rst:2032 +#: 0f80c2e6f76943548daf29544192ee08 3f59c0024a1e452c9f3cc1d5e65ac6c8 +#: 5a2c60666f0f45e69c41fa94a13d52f0 a89d92b2c4fd4b89978475498a73d59e +#: a9a03d3f9bf543139f6dffe335c28111 c1cbcd07cce14eb19a42b3aec21e5df2 +#: ce98846a575c43db9f1ce69e181223f8 eb059f73cbfb45f89eb50d0b7a163741 +msgid "New in v1.22.2" +msgstr "v1.22.2で新登場" + +#: ../../document.rst:1046 7a89cff419794c21b4d37c19a0085600 +msgid "PDF only: Set the `/PageLayout`." +msgstr "PDFのみ:`/PageLayout` を設定します。" + +#: ../../document.rst:1048 d35d4f2dea4647fe9ee6c05e0920823c +msgid "" +"one of the strings \"SinglePage\", \"OneColumn\", \"TwoColumnLeft\", " +"\"TwoColumnRight\", \"TwoPageLeft\", \"TwoPageRight\". Lower case is " +"supported." +msgstr "以下の文字列のいずれか、\"SinglePage\"、\"OneColumn\"、\"TwoColumnLeft\"、\"TwoColumnRight\"、\"TwoPageLeft\"、\"TwoPageRight\"。小文字もサポートされています。" + +#: ../../document.rst:1055 8799ae6598b84f96ae31960223b42da9 +msgid "PDF only: Set the `/PageMode`." +msgstr "PDFのみ:`/PageMode` を設定します。" + +#: ../../document.rst:1057 3581829f53d74f5dbe7f0c6057610060 +msgid "" +"one of the strings \"UseNone\", \"UseOutlines\", \"UseThumbs\", " +"\"FullScreen\", \"UseOC\", \"UseAttachments\". Lower case is supported." +msgstr "以下の文字列のいずれか、\"UseNone\"、\"UseOutlines\"、\"UseThumbs\"、\"FullScreen\"、\"UseOC\"、\"UseAttachments\"。小文字もサポートされています。" + +#: ../../document.rst:1064 e4f86a3e7a9c450fa44c58e504cbfc52 +msgid "PDF only: Set the `/MarkInfo` values." +msgstr "PDFのみ:`/MarkInfo` の値を設定します。" + +#: ../../document.rst:1066 17f255ef8d8c4cecad9d64a169bfeee8 +msgid "" +"a dictionary like this one: `{\"Marked\": False, \"UserProperties\": " +"False, \"Suspects\": False}`. This dictionary contains information about " +"the usage of Tagged PDF conventions. For details please see the `PDF " +"specifications `_." +msgstr "" +"次のような辞書:`{\"Marked\": False, \"UserProperties\": False, \"Suspects\": " +"False}`。この辞書にはタグ付きPDF規則の使用に関する情報が含まれています。詳細については `PDF仕様 " +"`_ を参照してください。" + +#: ../../document.rst:1071 6e7e2e7e67574edea4027eced013ae29 +msgid "" +"PDF only: Replaces the **complete current outline** tree (table of " +"contents) with the one provided as the argument. After successful " +"execution, the new outline tree can be accessed as usual via " +":meth:`Document.get_toc` or via :attr:`Document.outline`. Like with other" +" output-oriented methods, changes become permanent only via :meth:`save` " +"(incremental save supported). Internally, this method consists of the " +"following two steps. For a demonstration see example below." +msgstr "" +"PDFのみ:提供された引数で **現在のアウトラインツリー(目次)全体** を置き換えます。成功した実行後、新しいアウトラインツリーは通常通り D" +" :meth:`Document.get_toc` または :attr:`Document.outline` " +"を使用してアクセスできます。他の出力指向のメソッドと同様に、変更は :meth:`save` " +"(増分保存対応)を介してのみ永続的になります。内部的には、このメソッドは次の2つのステップで構成されています。デモンストレーションについては以下の例を参照してください。" + +#: ../../document.rst:1073 791c9c71e2184c7f81fd6a3d8ff7c22c +msgid "Step 1 deletes all existing bookmarks." +msgstr "ステップ1:すべての既存のブックマークを削除します。" + +#: ../../document.rst:1075 c786a74e27b1445d983bfe65ed6efbbd +msgid "Step 2 creates a new TOC from the entries contained in *toc*." +msgstr "ステップ2:*toc* に含まれるエントリを使用して新しいTOCを作成します。" + +#: ../../document.rst:1077 01c47a1265f840ed992ec7c8f24007b7 +msgid "" +"A list / tuple with **all bookmark entries** that should form the new " +"table of contents. Output variants of :meth:`get_toc` are acceptable. To " +"completely remove the table of contents specify an empty sequence or " +"None. Each item must be a list with the following format. * [lvl, title," +" page [, dest]] where - **lvl** is the hierarchy level (int > 0) of " +"the item, which **must be 1** for the first item and at most 1 larger " +"than the previous one. - **title** (str) is the title to be displayed." +" It is assumed to be UTF-8-encoded (relevant for multibyte code points " +"only). - **page** (int) is the target page number **(attention: " +"1-based)**. Must be in valid range if positive. Set it to -1 if there is " +"no target, or the target is external. - **dest** (optional) is a " +"dictionary or a number. If a number, it will be interpreted as the " +"desired height (in points) this entry should point to on the page. Use a " +"dictionary (like the one given as output by `get_toc(False)`) for a " +"detailed control of the bookmark's properties, see " +":meth:`Document.get_toc` for a description." +msgstr "" + +#: ../../document.rst:1079 c7dd9bee1ec14c3496fc2aa291b50dfb +msgid "" +"A list / tuple with **all bookmark entries** that should form the new " +"table of contents. Output variants of :meth:`get_toc` are acceptable. To " +"completely remove the table of contents specify an empty sequence or " +"None. Each item must be a list with the following format." +msgstr "" +"新しい目次を形成するための **すべてのブックマークエントリ** を含むリスト/タプル。:meth:`get_toc` " +"の出力バリエーションが許容されます。目次を完全に削除するには、空のシーケンスまたはNoneを指定してください。各アイテムは、次の形式である必要があります。" + +#: ../../document.rst:1081 38299a74fc0349db86848d437f980ca7 +msgid "[lvl, title, page [, dest]] where" +msgstr "[lvl, title, page [, dest]] ここで" + +#: ../../document.rst:1083 30de0bbe44cb439fbdcdef536e433e50 +msgid "" +"**lvl** is the hierarchy level (int > 0) of the item, which **must be 1**" +" for the first item and at most 1 larger than the previous one." +msgstr "**lvl** はアイテムの階層レベル(int > 0)で、最初のアイテムの場合は1で、前のアイテムより最大1大きくする必要があります。" + +#: ../../document.rst:1085 e09bf06761b0434a8b8d1fe4c3013804 +msgid "" +"**title** (str) is the title to be displayed. It is assumed to be " +"UTF-8-encoded (relevant for multibyte code points only)." +msgstr "" +"**title** " +"(str)は表示されるタイトルです。UTF-8でエンコードされていると仮定されています(マルチバイトコードポイントの場合に関連します)。" + +#: ../../document.rst:1087 6f3cd9c55d3b4dce865154bff0d76b74 +msgid "" +"**page** (int) is the target page number **(attention: 1-based)**. Must " +"be in valid range if positive. Set it to -1 if there is no target, or the" +" target is external." +msgstr "" +"**page**(int)は対象のページ番号です **(注意:1から始まります)** " +"。正の場合、有効な範囲内になければなりません。対象がない場合、または対象が外部の場合は-1に設定します。" + +#: ../../document.rst:1089 bf68607517eb40ccafef1c06f1d77730 +msgid "" +"**dest** (optional) is a dictionary or a number. If a number, it will be " +"interpreted as the desired height (in points) this entry should point to " +"on the page. Use a dictionary (like the one given as output by " +"`get_toc(False)`) for a detailed control of the bookmark's properties, " +"see :meth:`Document.get_toc` for a description." +msgstr "" +"**dest** " +"(オプション)は辞書または数値です。数値の場合、このエントリがページ上で指し示すべき目標の高さ(ポイント単位)と解釈されます。詳細なブックマークのプロパティを制御するには(`get_toc(False)`" +" によって出力されるものと同様の辞書を使用してください)、:meth:`Document.get_toc` の説明を参照してください。" + +#: ../../document.rst:1091 97edf9c633a944dc8cdc27675efe2f29 +msgid "" +"*(new in v1.16.9)* controls the hierarchy level beyond which outline " +"entries should initially show up collapsed. The default 1 will hence only" +" display level 1, higher levels must be unfolded using the PDF viewer. To" +" unfold everything, specify either a large integer, 0 or None." +msgstr "" +"*(v1.16.9で新規追加)* " +"アウトラインエントリが初めて折りたたまれて表示される階層レベルを制御します。デフォルトの1はレベル1のみを表示し、より高いレベルはPDFビューアを使用して展開する必要があります。すべてを展開するには、大きな整数、0、またはNoneを指定してください。" + +#: ../../document.rst:1094 f08551f3aadd45be8c4ac339de74411e +msgid "the number of inserted, resp. deleted items." +msgstr "挿入されたアイテム、または削除されたアイテムの数。" + +#: ../../document.rst:1096 c8b19a664c41408894120b76b42475b1 +msgid "" +"Changed in v1.23.8: Destination 'to' coordinates should now be in the " +"same coordinate system as those returned by `get_toc()` (internally they " +"are now transformed with `page.cropbox` and `page.rotation_matrix`). So " +"for example `set_toc(get_toc())` now gives unchanged destination 'to' " +"coordinates." +msgstr "" +"1.23.8 で変更されました: デスティネーションの 'to' 座標は、現在 `get_toc()` で返される座標系と同じ座標系であるべきです" +" (内部的には `page.cropbox` と `page.rotation_matrix` で変換されます)。したがって、例えば " +"`set_toc(get_toc())` は、変更されていない 'to' 座標を与えます。" + +#: ../../document.rst:1106 1b44b9477da5409fa135b1af7596c6c7 +msgid "" +"PDF only: Return the :data:`xref` of the outline item. This is mainly " +"used for internal purposes." +msgstr "PDFのみ:アウトラインアイテムの :data:`xref` を返します。これは主に内部用途で使用されます。" + +#: ../../document.rst:1108 46ab12633f9f48c5ac377c51eaeed3cf +#, fuzzy +msgid "index of the item in list :meth:`Document.get_toc`." +msgstr ":meth:`Document.get_toc` のリスト内のアイテムのインデックス。" + +#: ../../document.rst:1110 de8f2ba5f5fa48f388f70daecdd094b0 +msgid ":data:`xref`." +msgstr "" + +#: ../../document.rst:1115 183f533e24964bd29ae666eadc3bf9a1 +msgid "" +"Changed in v1.18.14: no longer remove the item's text, but show it " +"grayed-out." +msgstr "v1.18.14で変更: アイテムのテキストを削除しなくなり、灰色で表示されます。" + +#: ../../document.rst:1117 6aedd6b394514138a6f0cc42782782e1 +msgid "" +"PDF only: Remove this TOC item. This is a high-speed method, which " +"**disables** the respective item, but leaves the overall TOC structure " +"intact. Physically, the item still exists in the TOC tree, but is shown " +"grayed-out and will no longer point to any destination." +msgstr "" +"PDFのみ: この目次アイテムを削除します。これは高速なメソッドで、該当するアイテムを **無効にします** " +"が、全体の目次構造はそのままです。物理的には、アイテムはまだ目次ツリーに存在しますが、灰色で表示され、もはやどの宛先も指し示しません。" + +#: ../../document.rst:1119 b0c8964d54534f91afc361d747cda4b4 +msgid "" +"This also implies that you can reassign the item to a new destination " +"using :meth:`Document.set_toc_item`, when required." +msgstr "" +"これはまた、必要な場合に:meth:`Document.set_toc_item` " +"を使用してアイテムを新しい宛先に再割り当てできることを意味します。" + +#: ../../document.rst:1121 f863cf2617b04cabafa956b26cf5fddb +msgid "the index of the item in list :meth:`Document.get_toc`." +msgstr ":meth:`Document.get_toc` のリスト内のアイテムのインデックス。" + +#: ../../document.rst:1127 b9b8e1aae2154d798b5a9c69f9716526 +msgid "Changed in v1.18.6" +msgstr "v1.18.6で変更" + +#: ../../document.rst:1129 cd7352b98859448391d3bceecde81fd8 +msgid "" +"PDF only: Changes the TOC item identified by its index. Change the item " +"**title**, **destination**, **appearance** (color, bold, italic) or " +"collapsing sub-items -- or to remove the item altogether." +msgstr "" +"PDFのみ: インデックスによって識別されるTOCアイテムを変更します。アイテムの **タイトル** 、**宛先** 、 **外観** " +"(色、太字、イタリック)を変更したり、サブアイテムを折りたたんだり、アイテムを完全に削除したりするために使用します。" + +#: ../../document.rst:1131 9cc7c9f824384d8d80e92eea4a4b5887 +msgid "" +"Use this method if you need specific changes for selected entries only " +"and want to avoid replacing the complete TOC. This is beneficial " +"especially when dealing with large table of contents." +msgstr "選択したエントリに対して特定の変更が必要で、完全なTOCを置き換えたくない場合にこのメソッドを使用します。大きな目次を扱う場合に特に便利です。" + +#: ../../document.rst:1133 6b4e00f4ee6148a78200cb3ad35ca391 +msgid "the index of the entry in the list created by :meth:`Document.get_toc`." +msgstr ":meth:`Document.get_toc` によって作成されたリスト内のエントリのインデックス。" + +#: ../../document.rst:1134 e812d5f67b2a410f9087d76dd19d4c85 +msgid "" +"the new destination. A dictionary like the last entry of an item in " +"`doc.get_toc(False)`. Using this as a template is recommended. When " +"given, **all other parameters are ignored** -- except title." +msgstr "" +"新しい宛先。`doc.get_toc(False)` のアイテムの最後のエントリのような辞書を使用することが推奨されます。指定された場合、他の " +"**すべてのパラメータは無視されます** - タイトル以外。" + +#: ../../document.rst:1135 9fb602c3c81f4ba0a0e6078792c10a32 +msgid "" +"the link kind, see :ref:`linkDest Kinds`. If :data:`LINK_NONE`, then all " +"remaining parameter will be ignored, and the TOC item will be removed -- " +"same as :meth:`Document.del_toc_item`. If None, then only the title is " +"modified and the remaining parameters are ignored. All other values will " +"lead to making a new destination dictionary using the subsequent " +"arguments." +msgstr "" +"リンクの種類、:ref:`linkDest Kinds` を参照してください。:data:`LINK_NONE` " +"の場合、残りのパラメータはすべて無視され、TOCアイテムは削除されます - :meth:`Document.del_toc_item` " +"と同じです。Noneの場合、タイトルのみが変更され、残りのパラメータは無視されます。それ以外の値は、後続の引数を使用して新しい宛先辞書を作成します。" + +#: ../../document.rst:1136 c8fd861096fa449a8a72b9c25bc74212 +msgid "" +"the 1-based page number, i.e. a value 1 <= pno <= doc.page_count. " +"Required for LINK_GOTO." +msgstr "1から始まるページ番号、つまり1 <= pno <= doc.page_count。LINK_GOTOの場合に必要です。" + +#: ../../document.rst:1137 e3124c2dabb84235a2dab26fa5e4313b +msgid "the URL text. Required for LINK_URI." +msgstr "URLのテキスト。LINK_URIの場合に必要です。" + +#: ../../document.rst:1138 4d9ab75988b84c51a00d505cad5d6597 +msgid "the desired new title. None if no change." +msgstr "新しいタイトル。変更しない場合はNone。" + +#: ../../document.rst:1139 4da7aca1af47456987cba8fc8a789cf5 +msgid "" +"(optional) points to a coordinate on the target page. Relevant for " +"LINK_GOTO. If omitted, a point near the page's top is chosen." +msgstr "(オプション)対象ページ上の座標を指定します。LINK_GOTOの場合に関連します。省略された場合、ページの上部近くのポイントが選択されます。" + +#: ../../document.rst:1140 a1ec74204c6e4cea9d578172ca6472de +msgid "required for LINK_GOTOR and LINK_LAUNCH." +msgstr "LINK_GOTORおよびLINK_LAUNCHに必要です。" + +#: ../../document.rst:1141 98848c95243e4967ae1b563865daec93 +msgid "use this zoom factor when showing the target page." +msgstr "対象ページを表示する際にこのズームファクターを使用します。" + +#: ../../document.rst:1143 07b9d1f04f8b4b1eb0cfe2fbdcb95bec +msgid "**Example use:** Change the TOC of the SWIG manual to achieve this:" +msgstr "**使用例:** SWIGマニュアルのTOCを変更して、次のことを達成します:" + +#: ../../document.rst:1145 cba2012437d147cb8d7e4d8043ab9045 +msgid "" +"Collapse everything below top level and show the chapter on Python " +"support in red, bold and italic::" +msgstr "トップレベル以下をすべて折りたたみ、Pythonサポートの章を赤色で太字かつイタリックで表示::" + +#: ../../document.rst:1162 4b7969ea1ad749c3ba251a7815355131 +msgid "" +"In the previous example, we have changed only 42 of the 1240 TOC items of" +" the file." +msgstr "前の例では、ファイルの1240のTOCアイテムのうち、わずか42のみを変更しました。" + +#: ../../document.rst:1166 ef06c1c4efa948a68ccfa92c2da560d5 +msgid "" +"PDF only: Convert annotations and / or widgets to become permanent parts " +"of the pages. The PDF **will be changed** by this method. If `widgets` is" +" `True`, the document will also no longer be a \"Form PDF\"." +msgstr "" +"PDF のみ: 注釈やウィジェットをページの永続的な部分に変換します。このメソッドによって PDF **は変更されます**。 `widgets` " +"が `True` の場合、ドキュメントは「フォーム PDF」ではなくなります。" + +#: ../../document.rst:1168 3c8b2c2bbd0e46588b86ed44ebf4977f +msgid "" +"All pages will look the same, but will no longer have annotations, " +"respectively fields. The visible parts will be converted to standard " +"text, vector graphics or images as required." +msgstr "すべてのページが同じように見えますが、注釈またはフィールドがなくなります。必要に応じて、表示される部分は標準のテキスト、ベクトルグラフィックス、または画像に変換されます。" + +#: ../../document.rst:1170 2d05aa4abfb34b2a9795d88893ac9f2b +msgid "" +"The method may thus be a viable **alternative for PDF-to-PDF " +"conversions** using :meth:`Document.convert_to_pdf`." +msgstr "" +"このメソッドは、:meth:`Document.convert_to_pdf` を使用して PDF を PDF " +"に変換する際の代替手段として有効な選択肢です。" + +#: ../../document.rst:1172 edbcbf03d0284b9ab26884f37b7fd612 +msgid "" +"Please consider that annotations are complex objects and may consist of " +"more data \"underneath\" their visual appearance. Examples are \"Text\" " +"and \"FileAttachment\" annotations. When \"baking in\" annotations / " +"widgets with this method, all this underlying information (attached " +"files, comments, associated PopUp annotations, etc.) will be lost and be " +"removed on next garbage collection." +msgstr "" +"注釈は複雑なオブジェクトであり、視覚的な外観の下にさらに多くのデータが存在する場合があります。例としては、\"Text\" や " +"\"FileAttachment\" " +"の注釈が挙げられます。このメソッドで注釈やウィジェットを組み込む際には、この下部情報(添付ファイル、コメント、関連するポップアップ注釈など)がすべて失われ、次のガベージコレクション時に削除されます。" + +#: ../../document.rst:1174 c30265d88f5d4a7d9c1019d6774131b8 +#, fuzzy +msgid "" +"Use this feature for instance for :meth:`Page.show_pdf_page` (which " +"supports neither annotations nor widgets) when the source pages should " +"look exactly the same in the target." +msgstr "" +"たとえば、 :meth:`Document.insert_pdf` (ウィジェットのコピーをサポートしていない)や " +":meth:`Page.show_pdf_page` " +"(注釈やウィジェットをサポートしていない)などのメソッドで、ソースページがターゲットで完全に同じように見えるようにする場合にこの機能を使用します。" + +#: ../../document.rst:1177 d1469dec5e3249dabbf9a298e8466705 +msgid "convert annotations." +msgstr "注釈を変換するかどうか。" + +#: ../../document.rst:1178 dd66637740294e09ba7b8e36f33a8206 +msgid "" +"convert fields / widgets. After execution, the document will no longer be" +" a \"Form PDF\"." +msgstr "フィールド / ウィジェットを変換します。実行後、ドキュメントはもはや「フォーム PDF」ではありません。" + +#: ../../document.rst:1183 b05fb5b75e5a4162b9f73314aa643aa6 +msgid "New in v1.16.0" +msgstr "v1.16.0で新規追加" + +#: ../../document.rst:1185 c1f4ba67c1b24bf6867b2fd21bb8cccc +msgid "" +"Check whether the document can be saved incrementally. Use it to choose " +"the right option without encountering exceptions." +msgstr "ドキュメントを増分保存できるかどうかを確認します。例外を発生させずに正しいオプションを選択するために使用します。" + +#: ../../document.rst:1189 41e3e4bdc3424c10aafd4d97c601e30d +msgid "New in v1.16.14" +msgstr "v1.16.14で新規追加" + +#: ../../document.rst:1191 831d489a84d740a984bb6dde04e39587 +msgid "" +"PDF only: Remove potentially sensitive data from the PDF. This function " +"is inspired by the similar \"Sanitize\" function in Adobe Acrobat " +"products. The process is configurable by a number of options." +msgstr "" +"PDFのみ:PDFから潜在的に機密性の高いデータを削除します。この関数はAdobe " +"Acrobat製品の類似の「Sanitize」機能にインスパイアを受けたものです。プロセスはさまざまなオプションで設定可能です。" + +#: ../../document.rst:1193 6f8418ad5d594c2685d50bc277597149 +msgid "Search for 'FileAttachment' annotations and remove the file content." +msgstr "'FileAttachment' 注釈を検索してファイルコンテンツを削除します。" + +#: ../../document.rst:1194 ae021ac28be945bd8cd77cf390db94fb +msgid "" +"Remove any comments from page painting sources. If this option is set to " +"``False``, then this is also done for *hidden_text* and *redactions*." +msgstr "" +"ページ描画ソースからコメントを削除します。このオプションが ``False`` に設定されている場合、*hidden_text* と " +"*redactions* に対しても同様の処理が行われます。" + +#: ../../document.rst:1195 660dc58f6a0f44e69faa091b43161b8e +msgid "Remove embedded files." +msgstr "埋め込みファイルを削除します。" + +#: ../../document.rst:1196 5104963ff403431481222cfd5218c3ec +msgid "Remove OCRed text and invisible text [#f7]_." +msgstr "OCRedテキストと不可視テキストを削除します [#f7]_。" + +#: ../../document.rst:1197 4dfbac2778a64056ba97610453c08522 +msgid "Remove JavaScript sources." +msgstr "JavaScriptソースを削除します。" + +#: ../../document.rst:1198 74cac565ea5943e59ad48e5ef187e1dc +msgid "Remove PDF standard metadata." +msgstr "PDF標準のメタデータを削除します。" + +#: ../../document.rst:1199 4db71b1a67b74f88ba5f0ae8b3c0552c +msgid "Apply redaction annotations." +msgstr "レダクション注釈を適用します。" + +#: ../../document.rst:1200 455b9180bd39482ca48bc22b220d7c1b +msgid "" +"how to handle images if applying redactions. One of 0 (ignore), 1 (blank " +"out overlaps) or 2 (remove)." +msgstr "レダクションを適用する場合の画像の処理方法。0(無視)、1(オーバーラップをブランク化)、または2(削除)のいずれかです。" + +#: ../../document.rst:1201 4efb835789e3448f850ed836092cadbd +msgid "Remove all links." +msgstr "すべてのリンクを削除します。" + +#: ../../document.rst:1202 a04b25ebc4be4ab78fd4025c31825208 +msgid "Reset all form fields to their defaults." +msgstr "すべてのフォームフィールドをデフォルトにリセットします。" + +#: ../../document.rst:1203 d2fdd0f9a581444f8169ad8e41f10582 +msgid "Remove all responses from all annotations." +msgstr "すべての注釈からすべての応答を削除します。" + +#: ../../document.rst:1204 425b0e0e0fa44a04be0e85f4ead6310e +msgid "Remove thumbnail images from pages." +msgstr "ページからサムネイル画像を削除します。" + +#: ../../document.rst:1205 322bd83e66124ae39a349175bf2440a5 +msgid "Remove XML metadata." +msgstr "XMLメタデータを削除します。" + +#: ../../document.rst:1210 ../../document.rst:1287 +#: 80c798afa80c4f6080e78b5de0336fd6 a03c50fd205f46eeb779eb48aa917435 +msgid "Changed in v1.18.7" +msgstr "v1.18.7で変更" + +#: ../../document.rst:1211 ../../document.rst:1288 +#: 6cc5089ce6d64019a52eafa8037879b1 ee9e935b08974827b8c9e8eadaefe2fb +msgid "Changed in v1.19.0" +msgstr "v1.19.0で変更" + +#: ../../document.rst:1212 ../../document.rst:1289 +#: 94c5625550364c218d32a9427d6f6d74 b9f4e4ff00684e08926149ff521dda37 +#, fuzzy +msgid "Changed in v1.24.1" +msgstr "v1.14.12で変更" + +#: ../../document.rst:1214 bf084f82a22c44c38ac1c20f5267276d +msgid "PDF only: Saves the document in its **current state**." +msgstr "PDFのみ:ドキュメントの **現在の状態** を保存します。" + +#: ../../document.rst:1216 888d6783b4d84d5db564523f80262aac +msgid "" +"The file path, `pathlib.Path` or file object to save to. A file object " +"must have been created before via `open(...)` or `io.BytesIO()`. Choosing" +" `io.BytesIO()` is similar to :meth:`Document.tobytes` below, which " +"equals the `getvalue()` output of an internally created `io.BytesIO()`." +msgstr "" +"保存先のファイルパス、`pathlib.Path` 、またはファイルオブジェクト。ファイルオブジェクトは `open(...)` または " +"`io.BytesIO()` を介して事前に作成されている必要があります。`io.BytesIO()` を選択することは、以下の " +":meth:`Document.tobytes` と同等で、内部で作成された `io.BytesIO()` の `getvalue()` " +"出力に等しいです。" + +#: ../../document.rst:1218 53b2f2f5f0b340ac918bec088d5b4e88 +msgid "" +"Do garbage collection. Positive values exclude \"incremental\". * 0 = " +"none * 1 = remove unused (unreferenced) objects. * 2 = in addition to 1, " +"compact the :data:`xref` table. * 3 = in addition to 2, merge duplicate " +"objects. * 4 = in addition to 3, check :data:`stream` objects for " +"duplication. This may be slow because such data are typically large." +msgstr "" + +#: ../../document.rst:1218 7efba68731ea4ff284b87f46f662b83c +msgid "Do garbage collection. Positive values exclude \"incremental\"." +msgstr "ガベージコレクションを実行します。正の値は「増分」を除外します。" + +#: ../../document.rst:1220 ../../document.rst:1238 +#: a3229c6eac6941e192f4df52120f797b c973b26bd6aa4aa58c7afd967487a60e +msgid "0 = none" +msgstr "0 = なし" + +#: ../../document.rst:1221 d0d8843048a44405847d3294c003cd6f +msgid "1 = remove unused (unreferenced) objects." +msgstr "1 = 未使用(参照されていない)オブジェクトを削除します。" + +#: ../../document.rst:1222 84a303b468a04dc4a0521209bcdc388c +msgid "2 = in addition to 1, compact the :data:`xref` table." +msgstr "2 = 1に加えて、:data:`xref` テーブルを最適化します。" + +#: ../../document.rst:1223 1d4e31d30172446eba5010679376acf6 +msgid "3 = in addition to 2, merge duplicate objects." +msgstr "3 = 2に加えて、重複したオブジェクトを統合します。" + +#: ../../document.rst:1224 37a687cab330413f9397288384e42c13 +msgid "" +"4 = in addition to 3, check :data:`stream` objects for duplication. This " +"may be slow because such data are typically large." +msgstr "4 = 3に加えて、ストリームオブジェクトの重複をチェックします。これは、そのようなデータが通常大きいため、遅い場合があります。" + +#: ../../document.rst:1226 1137c653f68a4160a407c0ac30f70d42 +msgid "" +"Clean and sanitize content streams [#f1]_. Corresponds to \"mutool clean " +"-sc\"." +msgstr "コンテンツストリームをクリーンアップおよびサニタイズします [#f1]_。これは「mutool clean -sc」に対応します。" + +#: ../../document.rst:1228 4fe2857bcc254ad98f3ea8856af1c985 +msgid "Deflate (compress) uncompressed streams." +msgstr "未圧縮のストリームをデフレート(圧縮)します。" + +#: ../../document.rst:1229 062ab8a185724229b02afcc813ab29e4 +msgid "*(new in v1.18.3)* Deflate (compress) uncompressed image streams [#f4]_." +msgstr "*(v1.18.3で新規追加)* 未圧縮の画像ストリームをデフレート(圧縮)します [#f4]_。" + +#: ../../document.rst:1230 1e5e79417a27470a9b7536921b0697cc +msgid "" +"*(new in v1.18.3)* Deflate (compress) uncompressed fontfile streams " +"[#f4]_." +msgstr "(v1.18.3で新規追加)未圧縮のフォントファイルストリームをデフレート(圧縮)します [#f4]_。" + +#: ../../document.rst:1232 e79d1702eaa84c59bcd3d53f88c1060d +msgid "" +"Only save changes to the PDF. Excludes \"garbage\" and \"linear\". Can " +"only be used if *outfile* is a string or a `pathlib.Path` and equal to " +":attr:`Document.name`. Cannot be used for files that are decrypted or " +"repaired and also in some other cases. To be sure, check " +":meth:`Document.can_save_incrementally`. If this is false, saving to a " +"new file is required." +msgstr "" +"PDFへの変更を保存します。 \"garbage\" および \"linear\" を除外します。*outfile* が文字列または " +"`pathlib.Path` であり、:attr:`Document.name` " +"に等しい場合にのみ使用できます。復号化または修復されたファイルおよび一部の他の場合には使用できません。確実にするために、:meth:`Document.can_save_incrementally`" +" を確認してください。これがFalseの場合、新しいファイルに保存する必要があります。" + +#: ../../document.rst:1234 a9a373ed3f6743cf99dd5aee541254be +msgid "convert binary data to ASCII." +msgstr "バイナリデータをASCIIに変換します。" + +#: ../../document.rst:1236 e825d9d3a8884ad2a569e181c0a15562 +msgid "" +"Decompress objects. Generates versions that can be better read by some " +"other programs and will lead to larger files. * 0 = none * 1 = images * " +"2 = fonts * 255 = all" +msgstr "" + +#: ../../document.rst:1236 e9848e0cf34b4ca7a118b7514fe07ef2 +msgid "" +"Decompress objects. Generates versions that can be better read by some " +"other programs and will lead to larger files." +msgstr "オブジェクトを展開します。他のプログラムにより読みやすいバージョンを生成し、ファイルサイズが大きくなります。" + +#: ../../document.rst:1239 31e7fe586f994835ae20290ce1782cbe +msgid "1 = images" +msgstr "1 = 画像" + +#: ../../document.rst:1240 793a606defb04ae7b1787243445f0db0 +msgid "2 = fonts" +msgstr "2 = フォント" + +#: ../../document.rst:1241 cc23f66f1b9242f29d94d08b2c6799a7 +msgid "255 = all" +msgstr "255 = すべて" + +#: ../../document.rst:1243 6b3c690a7f944f17b9432682b66ab1b4 +#, fuzzy +msgid "" +"Save a linearised version of the document. This option creates a file " +"format for improved performance for Internet access. Excludes " +"\"incremental\" and \"use_objstms\"." +msgstr "" +"ドキュメントの線形バージョンを保存します。このオプションは、インターネットアクセスのパフォーマンス向上のためのファイル形式を作成します。 " +"\"増分\" を除外します。" + +#: ../../document.rst:1245 e1ddd0f43c05434ab3b80e71b45039a7 +msgid "" +"Prettify the document source for better readability. PDF objects will be " +"reformatted to look like the default output of " +":meth:`Document.xref_object`." +msgstr "" +"ドキュメントソースを見やすく整形します。PDFオブジェクトは、:meth:`Document.xref_object` " +"のデフォルト出力のように再フォーマットされます。" + +#: ../../document.rst:1247 c90f9b7287884d9e838eb1a205e8b10c +msgid "" +"Suppress the update of the file's `/ID` field. If the file happens to " +"have no such field at all, also suppress creation of a new one. Default " +"is `False`, so every save will lead to an updated file identification." +msgstr "" +"ファイルの `/ID` フィールドの更新を抑制します。 " +"ファイルにそのようなフィールドがまったくない場合、新しいフィールドの作成も抑制します。デフォルトは `False` " +"で、各保存でファイル識別情報が更新されます。" + +#: ../../document.rst:1249 a45bccacac804f9e8e2fe9954896e0ce +msgid "" +"*(new in v1.16.0)* Set the desired permission levels. See " +":ref:`PermissionCodes` for possible values. Default is granting all." +msgstr "" +"(v1.16.0で新規追加)希望の権限レベルを設定します。可能な値については :ref:`PermissionCodes` " +"を参照してください。デフォルトはすべてを許可します。" + +#: ../../document.rst:1251 27eb2c84ab3e4ac8a20455ef9980b23a +msgid "" +"*(new in v1.16.0)* set the desired encryption method. See " +":ref:`EncryptionMethods` for possible values." +msgstr "" +"*(v1.16.0で新規追加)* 希望の暗号化メソッドを設定します。可能な値については :ref:`EncryptionMethods` " +"を参照してください。" + +#: ../../document.rst:1253 8e39572e62904c4e93c05ddd7bcc7198 +msgid "" +"*(new in v1.16.0)* set the document's owner password. *(Changed in " +"v1.18.3)* If not provided, the user password is taken if provided. The " +"string length must not exceed 40 characters." +msgstr "" +"*(v1.16.0で新規追加)* ドキュメントの所有者パスワードを設定します。 (v1.18.3で変更) " +"指定しない場合、ユーザーパスワードが提供された場合にユーザーパスワードが使用されます。文字列の長さは40文字を超えてはいけません。" + +#: ../../document.rst:1255 02459360d62b445f90c1f256fa56bfb9 +msgid "" +"*(new in v1.16.0)* set the document's user password. The string length " +"must not exceed 40 characters." +msgstr "(v1.16.0で新規追加)ドキュメントのユーザーパスワードを設定します。文字列の長さは40文字を超えてはいけません。" + +#: ../../document.rst:1257 896c5b94bd874b09b79318b506cdd76d +msgid "" +"*(new in v1.24.0)* compression option that converts eligible PDF object " +"definitions to information that is stored in some other object's " +":data:`stream` data. Depending on the `deflate` parameter value, the " +"converted object definitions will be compressed -- which can lead to very" +" significant file size reductions." +msgstr "" +"*(v1.24.0で新機能)* 可変化PDFオブジェクト定義を他のオブジェクトの :data:`stream` " +"データに格納される情報に変換する圧縮オプションです。`deflate` " +"パラメータの値に応じて、変換されたオブジェクト定義が圧縮されます。これにより、ファイルサイズが非常に大幅に削減される可能性があります。" + +#: ../../document.rst:1259 289ef1c582d84a49aa961c905ef123c1 +msgid "" +"The method does not check, whether a file of that name already exists, " +"will hence not ask for confirmation, and overwrite the file. It is your " +"responsibility as a programmer to handle this." +msgstr "このメソッドは、その名前のファイルがすでに存在するかどうかをチェックしません。したがって、確認を求めずにファイルを上書きします。これについては、プログラマーとしての責任があります。" + +#: ../../document.rst:1263 a498e025ef0847c4bac9ebdeb7895f8d +msgid "**File size reduction**" +msgstr "**ファイルサイズの削減**" + +#: ../../document.rst:1265 e2a31e8ad643474899c1129d6b2574df +#, fuzzy +msgid "" +"1. Use the save options like `garbage=3|4, deflate=True, " +"use_objstms=True|1`. Do not touch the default values `expand=False|0, " +"clean=False|0, incremental=False|0, linear=False|0`. This is a " +"\"lossless\" file size reduction. There is a convenience version of this " +"method with these values set by default, :meth:`Document.ez_save` -- " +"please see below." +msgstr "" +"オプションとして、`garbage=3|4, deflate=True, use_objstms=True|1`. " +"デフォルト値に触れないでください。 `expand=False|0, clean=False|0, incremental=False|0` " +"は変更しないでください。これは「非破壊的」なファイルサイズの削減です。これらの値がデフォルトで設定されたメソッドの簡易版もあります。 " +":meth:`Document.ez_save()` をご覧ください。" + +#: ../../document.rst:1268 02162b7327224823bfe59a1333d7bb60 +msgid "" +"\"Lossy\" file size reduction in essence must give up something with " +"respect to images, like (a) remove all images (b) replace images by their" +" grayscale versions (c) reduce image resolutions. Find examples in the " +"`PyMuPDF Utilities \"replace-image\" folder `_." +msgstr "" +"本質的に「損失のある」ファイルサイズの削減は、画像に関して何かを犠牲にする必要があります。例えば、(a) すべての画像を削除する、(b) " +"画像をグレースケールに置換する、(c) 画像の解像度を低下させるなどの方法があります。`PyMuPDF Utilities \"replace-" +"image\" folder `_ に例があります。" + +#: ../../document.rst:1272 3c2052c0fe104385a7fa4378f718d772 +msgid "New in v1.18.11" +msgstr "v1.18.11で新規追加" + +#: ../../document.rst:1274 e779ad2fbbce4acdb947d7e7224bb271 +msgid "" +"PDF only: The same as :meth:`Document.save` but with changed defaults " +"`deflate=True, garbage=3, use_objstms=1`." +msgstr "" +"PDFのみ::meth:`Document.save` と同じですが、デフォルト値が `deflate=True, garbage=3, " +"use_objstms=1` に変更されています。" + +#: ../../document.rst:1278 a086d3ff8a9546cd8c3474b44628a32e +msgid "" +"PDF only: saves the document incrementally. This is a convenience " +"abbreviation for *doc.save(doc.name, incremental=True, " +"encryption=PDF_ENCRYPT_KEEP)*." +msgstr "" +"PDFのみ:ドキュメントを増分的に保存します。これは、 *doc.save(doc.name, incremental=True, " +"encryption=PDF_ENCRYPT_KEEP)* の簡略表記です。" + +#: ../../document.rst:1282 173d1bc3507841f6b0154a4d61ab6ec4 +msgid "" +"Saving incrementally may be required if the document contains verified " +"signatures which would be invalidated by saving to a new file." +msgstr "増分保存が必要な場合、ドキュメントに確認済みの署名が含まれている場合、新しいファイルに保存することによって無効になる可能性があります。" + +#: ../../document.rst:1291 69ee28fed5104fc6ad96bd34c07e1b26 +msgid "" +"PDF only: Writes the **current content of the document** to a bytes " +"object instead of to a file. Obviously, you should be wary about memory " +"requirements. The meanings of the parameters exactly equal those in " +":meth:`save`. Chapter :ref:`FAQ` contains an example for using this " +"method as a pre-processor to `pdfrw " +"`_." +msgstr "" +"PDFのみ: **ドキュメントの現在のコンテンツ** " +"をファイルではなくバイトオブジェクトに書き込みます。明らかに、メモリ要件に注意する必要があります。パラメータの意味は :meth:`save` " +"とまったく同じです。チャプター :ref:`FAQ` には、このメソッドを `pdfrw " +"`_ の前処理として使用する例が含まれています。" + +#: ../../document.rst:1293 d3725dc95b37470b8575d3846941665b +msgid "*(Changed in v1.16.0)* for extended encryption support." +msgstr "*(v1.16.0で変更)* 拡張暗号サポート用。" + +#: ../../document.rst:1296 cd3d4e2cbea24746b1282f1af4521b74 +msgid "a bytes object containing the complete document." +msgstr "ドキュメント全体を含むbytesオブジェクト。" + +#: ../../document.rst:1300 c8cfce8c4fdd42aa8813bf8eb3c4dee7 +msgid "" +"Search for \"text\" on page number \"pno\". Works exactly like the " +"corresponding :meth:`Page.search_for`. Any integer `-∞ < pno < " +"page_count` is acceptable." +msgstr "" +"\"pno\" ページ上で \"text\" を検索します。対応する :meth:`Page.search_for` " +"とまったく同じ方法で機能します。整数 `-∞ < pno < page_count` ならどのような値でも受け入れられます。" + +#: ../../document.rst:1318 09b2ff9e99e94f089fd00c4231869235 +#, fuzzy +msgid "" +"PDF only: Copy the page range **[from_page, to_page]** (including both) " +"of PDF document *docsrc* into the current one. Inserts will start with " +"page number *start_at*. Value -1 indicates default values. All pages thus" +" copied will be rotated as specified. Links, annotations and widgets can " +"be excluded in the target, see below. All page numbers are 0-based." +msgstr "" +"PDFのみ:PDFドキュメント *docsrc* のページ範囲 **[from_page、to_page]** " +"(両方を含む)を現在のドキュメントにコピーします。挿入はページ番号 *start_at* " +"から開始します。値-1はデフォルト値を示します。したがって、コピーされるすべてのページは指定されたように回転します。リンクと注釈は対象から除外することができます(以下参照)。すべてのページ番号は0から始まります。" + +#: ../../document.rst:1320 0cfbbbe2873045fab67a1c9d19fd9274 +msgid "" +"An opened PDF *Document* which must not be the current document. However," +" it may refer to the same underlying file." +msgstr "現在のドキュメントではない、開かれたPDF *ドキュメント* 。ただし、同じ基盤ファイルを参照する場合があります。" + +#: ../../document.rst:1323 43c125b72c3048ebaf3f53f305f4ff8c +msgid "First page number in *docsrc*. Default is zero." +msgstr "*docsrc* 内の最初のページ番号。デフォルトはゼロです。" + +#: ../../document.rst:1325 578e57c96db74a6682265149c492bdaf +msgid "Last page number in *docsrc* to copy. Defaults to last page." +msgstr "コピーする *docsrc* 内の最後のページ番号。デフォルトは最終ページです。" + +#: ../../document.rst:1327 961000a4a8ef4b9b8656febad06ba539 +msgid "" +"First copied page, will become page number *start_at* in the target. " +"Default -1 appends the page range to the end. If zero, the page range " +"will be inserted before current first page." +msgstr "" +"コピーされる最初のページ、対象のページ番号 *start_at* " +"になります。デフォルト-1はページ範囲を末尾に追加します。ゼロの場合、ページ範囲は現在の最初のページの前に挿入されます。" + +#: ../../document.rst:1329 1800a49c031744218281af9422f6d529 +msgid "" +"All copied pages will be rotated by the provided value (degrees, integer " +"multiple of 90)." +msgstr "コピーされるすべてのページは、指定された値(度数、90の整数倍)で回転します。" + +#: ../../document.rst:1331 3f4f1a4a518440a6a7ec602c7e4a43c5 +msgid "" +"Choose whether (internal and external) links should be included in the " +"copy. Default is `True`. *Named* links (:data:`LINK_NAMED`) and internal " +"links to outside the copied page range are **always excluded**." +msgstr "" +"(内部および外部) リンクをコピーに含めるかどうかを選択します。デフォルトは ``True`` です。コピー対象外のコピー範囲外の内部リンクは " +"**常に除外されます** 。" + +#: ../../document.rst:1333 391fa50b6e85480f9ddedac416feae2a +#, fuzzy +msgid "choose whether annotations should be included in the copy." +msgstr "*(v1.16.1で新規追加)* 注釈をコピーに含めるかどうかを選択します。 フォームフィールドはコピーできません。" + +#: ../../document.rst:1335 57a88c0d4e95428180e328cc1e060e20 +msgid "" +"choose whether annotations should be included in the copy. If `True` and " +"at least one of the source pages contains form fields, the target PDF " +"will be turned into a Form PDF (if not already being one)." +msgstr "" + +#: ../../document.rst:1337 080cd6c505c54450af5334a5aba9403c +msgid "" +"*(New in version 1.25.5)* Choose how to handle duplicate root field names" +" in the source pages. This parameter is ignored if `widgets=False`. " +"Default is ``False`` which will add unifying strings to the name of those" +" source root fields which have a duplicate in the target. For instance, " +"if \"name\" already occurs in the target, the source widget's name will " +"be changed to \"name [text]\" with a suitably chosen string \"text\". If" +" ``True``, root fields with duplicate names in source and target will be " +"converted to so-called \"Kids\" of a \"Parent\" object (which lists all " +"kid widgets in a PDF array). This will effectively turn those kids into " +"instances of the \"same\" widget: if e.g. one of the kids is changed, " +"then all its instances will automatically inherit this change -- no " +"matter on which page they happen to be displayed." +msgstr "" + +#: ../../document.rst:1337 9e7900061e1243c4901ba1776afa8120 +msgid "" +"*(New in version 1.25.5)* Choose how to handle duplicate root field names" +" in the source pages. This parameter is ignored if `widgets=False`." +msgstr "" + +#: ../../document.rst:1339 9325fc72fda5486b981214d0018f9cfc +msgid "" +"Default is ``False`` which will add unifying strings to the name of those" +" source root fields which have a duplicate in the target. For instance, " +"if \"name\" already occurs in the target, the source widget's name will " +"be changed to \"name [text]\" with a suitably chosen string \"text\"." +msgstr "" + +#: ../../document.rst:1341 5ffb402e3396421aae813b66c0b5de76 +msgid "" +"If ``True``, root fields with duplicate names in source and target will " +"be converted to so-called \"Kids\" of a \"Parent\" object (which lists " +"all kid widgets in a PDF array). This will effectively turn those kids " +"into instances of the \"same\" widget: if e.g. one of the kids is " +"changed, then all its instances will automatically inherit this change --" +" no matter on which page they happen to be displayed." +msgstr "" + +#: ../../document.rst:1343 a4ba7d35af8f47d883a4bb779faab641 +msgid "" +"*(new in v1.17.7)* specify an interval size greater zero to see progress " +"messages on `sys.stdout`. After each interval, a message like `Inserted " +"30 of 47 pages.` will be printed." +msgstr "" +"*(v1.17.7で新規追加)* `sys.stdout` " +"で進捗メッセージを表示するための大きなゼロより大きい間隔を指定します。各間隔後、`Inserted 30 of 47 pages.` " +"のようなメッセージが印刷されます。" + +#: ../../document.rst:1345 45d40f5660454970b9ddb2a95ff37fbf +msgid "" +"*(new in v1.18.0)* controls whether the list of already copied objects " +"should be **dropped** after this method, default ``True``. Set it to 0 " +"except for the last one of multiple insertions from the same source PDF. " +"This saves target file size and speeds up execution considerably." +msgstr "" +"*(v1.18.0で新規追加)* このメソッドの後にすでにコピーされたオブジェクトのリストを **削除する** かどうかを制御します。デフォルトは" +" ``True`` " +"です。同じソースPDFからの複数の挿入の最後以外は、0に設定します。これにより、対象ファイルのサイズが節約され、実行が大幅に高速化されます。" + +#: ../../document.rst:1349 23da58c56e6d4983997732275eddf854 +msgid "" +"This is a page-based method. Document-level information of source " +"documents is therefore mostly ignored. Examples include Optional Content," +" Embedded Files, `StructureElem`, table of contents, page labels, " +"metadata, named destinations (and other named entries) and some more." +msgstr "" + +#: ../../document.rst:1351 f0a071616e314f88a87910b55cd6cee6 +msgid "" +"If `from_page > to_page`, pages will be **copied in reverse order**. If " +"`0 <= from_page == to_page`, then one page will be copied." +msgstr "" +"`from_page > to_page` の場合、ページは **逆の順序でコピーされます** 。`0 <= from_page == " +"to_page` の場合、1ページがコピーされます。" + +#: ../../document.rst:1353 283180bbf983435e91685f67ed77c66e +msgid "" +"`docsrc` TOC entries **will not be copied**. It is easy however, to " +"recover a table of contents for the resulting document. Look at the " +"examples below and at program `join.py `_ in the " +"*examples* directory: it can join PDF documents and at the same time " +"piece together respective parts of the tables of contents." +msgstr "" +"*docsrc* のTOCエントリは **コピーされません** " +"。ただし、結果のドキュメントの目次を復元することは簡単です。以下の例と、*examples* ディレクトリのプログラム `join.py " +"`_ " +"を参照してください:これはPDFドキュメントを結合し、同時に対応する目次の部分を組み立てることができます。" + +#: ../../document.rst:1370 39e76b859dc446ec9080ec8d242bc13b +msgid "New in v1.22.0" +msgstr "v1.22.0で新規追加" + +#: ../../document.rst:1372 afcd6f4f6dbb49de8e379a8352a9acb5 +msgid "" +"PDF only: Add an arbitrary supported document to the current PDF. Opens " +"\"infile\" as a document, converts it to a PDF and then invokes " +":meth:`Document.insert_pdf`. Parameters are the same as for that method. " +"Among other things, this features an easy way to append images as full " +"pages to an output PDF." +msgstr "" +"PDFのみ:任意のサポートされているドキュメントを現在のPDFに追加します。 \"infile\" " +"をドキュメントとして開き、PDFに変換し、:meth:`Document.insert_pdf` " +"を呼び出します。パラメータはそのメソッドと同じです。他のことの中で、これには画像を出力PDFに完全なページとして追加する簡単な方法が含まれています。" + +#: ../../document.rst:1374 980b05d8ec9043478b43e690e2c8f931 +msgid "" +"the input document to insert. May be a filename specification as is valid" +" for creating a :ref:`Document` or a :ref:`Pixmap`." +msgstr "" +"挿入する入力ドキュメント。:ref:`Document` を作成する際に有効なファイル名の指定または :ref:`Pixmap` " +"である可能性があります。" + +#: ../../document.rst:1383 1973a63997bf441bb333f3a64ba68bcf +msgid "PDF only: Insert an empty page." +msgstr "PDFのみ:空のページを挿入します。" + +#: ../../document.rst:1385 b7256473ae514578be3947849b44db62 + +msgid "" +"page number in front of which the new page should be inserted. Must be in" +" `1 < pno <= page_count`. Special values -1 and *doc.page_count* insert " +"**after** the last page." +msgstr "" +"新しいページを挿入する前のページ番号。`1 < pno <= page_count` である必要があります。特別な値-1および " +"*doc.page_count* は最後のページの後に挿入します。" + +#: ../../document.rst:1387 0f646751e84c4800909811f615b97ee4 +msgid "page width." +msgstr "ページの幅。" + +#: ../../document.rst:1388 acdaf160ab0943e4a039b2b995e3514a +msgid "page height." +msgstr "ページの高さ。" + +#: ../../document.rst:1391 f533e6f166d54bbd9f836817a4477c70 +msgid "" +"the created page object. Be aware that the page numbers of pages after " +"the inserted one will have changed after method execution. For the same " +"reason, **all existing page objects will be invalidated.** Using them " +"will lead to exceptions." +msgstr "" + +#: ../../document.rst:1403 a0af237003924b22876d7fa12331f23f +msgid "" +"PDF only: Insert a new page and insert some text. Convenience function " +"which combines :meth:`Document.new_page` and (parts of) " +":meth:`Page.insert_text`." +msgstr "" +"PDFのみ:新しいページを挿入し、テキストを挿入します。:meth:`Document.new_page` と " +":meth:`Page.insert_text` の一部を組み合わせた便利な関数です。" + +#: ../../document.rst:1405 93a93061c9dd424aa19f6744784f6e49 +msgid "" +"page number (0-based) **in front of which** to insert. Must be in " +"`range(-1, doc.page_count + 1)`. Special values -1 and `doc.page_count` " +"insert **after** the last page. Changed in v1.14.12 This is now a " +"positional parameter" +msgstr "" + +#: ../../document.rst:1405 e43138d7fbcd4bd3ac2e388dcd5a0cca +msgid "" +"page number (0-based) **in front of which** to insert. Must be in " +"`range(-1, doc.page_count + 1)`. Special values -1 and `doc.page_count` " +"insert **after** the last page." +msgstr "" +"**挿入する前** のページ番号(0ベース)を指定します。範囲 `range(-1, doc.page_count + 1)` " +"内である必要があります。特別な値-1および `doc.page_count` は最後のページの **後** に挿入します。" + +#: ../../document.rst:1408 a71c7b2a75be4830bcd8cbd5c7d79e28 +msgid "Changed in v1.14.12" +msgstr "v1.14.12で変更" + +#: ../../document.rst:1408 fc2477f4827449258c65aafe838c04f2 +msgid "This is now a positional parameter" +msgstr "これは今ポジションパラメータです" + +#: ../../document.rst:1410 1eb7aeb952014ed4a78680770afb15b8 +msgid "For the other parameters, please consult the aforementioned methods." +msgstr "その他のパラメータについては、前述のメソッドをご参照ください。" + +#: ../../document.rst:1413 e4069f60a52b4bd69514a1d009befbe1 +msgid "" +"the result of :meth:`Page.insert_text` (number of successfully inserted " +"lines)." +msgstr ":meth:`Page.insert_text` の結果(正常に挿入された行数)。" + +#: ../../document.rst:1417 a23ff1a304a54e25a7a07e740752b8dd +msgid "" +"PDF only: Delete a page given by its 0-based number in `-∞ < pno < " +"page_count - 1`." +msgstr "PDFのみ:0ベースの番号で指定されたページを削除します。`-∞ < pno < page_count - 1` です。" + +#: ../../document.rst:1419 ../../document.rst:1426 +#: b511b678cb654603bcd324b8f1915b53 e3a047edfe434ac5a2930b5d930e13a8 +msgid "Changed in v1.18.14: support Python's `del` statement." +msgstr "v1.18.14で変更:Pythonの `del` 文をサポート。" + +#: ../../document.rst:1421 69185a84c34145d7b927e424304e13b3 +msgid "" +"the page to be deleted. Negative number count backwards from the end of " +"the document (like with indices). Default is the last page." +msgstr "削除するページ。負の数は文書の末尾から逆に数えます(インデックスと同様)。デフォルトは最後のページです。" + +#: ../../document.rst:1425 abd23351a7d545b6a100cd90751beb0a +msgid "Changed in v1.18.13: more flexibility specifying pages to delete." +msgstr "v1.18.13 で変更されました: 削除するページを指定する柔軟性が向上しました。" + +#: ../../document.rst:1428 0a047abd46d54fada919148f5b6bca27 +msgid "PDF only: Delete multiple pages given as 0-based numbers." +msgstr "PDF のみ: 0 ベースの番号として指定された複数のページを削除します。" + +#: ../../document.rst:1432 175c76de607e4cfc80c5a197eda88f94 +msgid "" +"**Format 1:** Use keywords. Represents the old format. A contiguous range" +" of pages is removed." +msgstr "**フォーマット 1:** キーワードを使用します。古いフォーマットを表します。連続したページ範囲が削除されます。" + +#: ../../document.rst:1431 2a065399a1214adf8f0b8fd284af6372 +msgid "\"from_page\": first page to delete. Zero if omitted." +msgstr "\"from_page\": 削除する最初のページ。省略された場合はゼロです。" + +#: ../../document.rst:1432 69402a8c85fd4253bf4b4815a985bcd8 +msgid "" +"\"to_page\": last page to delete. Last page in document if omitted. Must " +"not be less then \"from_page\"." +msgstr "" +"\"to_page\": 削除する最後のページ。省略された場合はドキュメント内の最後のページです。\"from_page\" " +"より小さくしてはいけません。" + +#: ../../document.rst:1434 9268bfe4dc0e4412824caa21a36b3525 +msgid "" +"**Format 2:** Two page numbers as positional parameters. Handled like " +"Format 1." +msgstr "**フォーマット 2:** 位置パラメータとしての2つのページ番号。フォーマット 1 と同様に処理されます。" + +#: ../../document.rst:1436 4f45a9abbb234670ad5f48db8ed0ed68 +msgid "" +"**Format 3:** One positional integer parameter. Equivalent to " +":meth:`Page.delete_page`." +msgstr "**フォーマット 3:** 1 つの位置パラメータの整数。:meth:`Page.delete_page` に相当します。" + +#: ../../document.rst:1438 52626733de604cf1b5447bb8f0d753b5 +msgid "" +"**Format 4:** One positional parameter of type *list*, *tuple* or " +"*range()* of page numbers. The items of this sequence may be in any order" +" and may contain duplicates." +msgstr "" +"**フォーマット 4:** ページ番号のリスト、*タプル*、または *range()* " +"の1つの位置パラメータ。このシーケンスのアイテムは任意の順序であり、重複していてもかまいません。" + +#: ../../document.rst:1440 41dbb1b8fe4d4535b5c6d32d5afc2dca +msgid "" +"**Format 5:** *(New in v1.18.14)* Using the Python `del` statement and " +"index / slice notation is now possible." +msgstr "" +"**フォーマット 5:** *(v1.18.14 で新規)* Python の `del` ステートメントとインデックス / " +"スライス表記を使用することができます。" + +#: ../../document.rst:1444 77a9dcc310bc4cbb8f21655f85e3d166 +msgid "" +"*(Changed in v1.14.17, optimized in v1.17.7)* In an effort to maintain a " +"valid PDF structure, this method and :meth:`delete_page` will also " +"deactivate items in the table of contents which point to deleted pages. " +"\"Deactivation\" here means, that the bookmark will point to nowhere and " +"the title will be shown grayed-out by supporting PDF viewers. The overall" +" TOC structure is left intact." +msgstr "" +"*(v1.14.17 で変更, v1.17.7 で最適化)* 有効な PDF 構造を維持するために、このメソッドと " +":meth:`delete_page` は、削除されたページを指す目次のアイテムも無効化します。ここでの \"無効化\" " +"とは、ブックマークがどこを指しているのか分からなくなり、サポートされている PDF " +"ビューアによってタイトルがグレーアウト表示されることを意味します。全体の目次構造は維持されます。" + +#: ../../document.rst:1446 e7da40e8d07e48708979fdae87e22e32 +msgid "" +"It will also remove any **links on remaining pages** which point to a " +"deleted one. This action may have an extended response time for documents" +" with many pages." +msgstr "" +"また、削除されたページを指す **残りのページ上のリンク** " +"も削除されます。これにより、多くのページを持つドキュメントでは拡張された応答時間が発生する可能性があります。" + +#: ../../document.rst:1448 411a3e7d8d1648d2865212d6dcc56ba6 +msgid "Following examples will all delete pages 500 through 519:" +msgstr "以下の例はすべて、ページ500から519を削除します:" + +#: ../../document.rst:1450 62a61e5649474ca582a515b0bc621c0b +msgid "`doc.delete_pages(500, 519)`" +msgstr "" + +#: ../../document.rst:1451 10c409a9fd5840abb743d555dbfff4b9 +msgid "`doc.delete_pages(from_page=500, to_page=519)`" +msgstr "" + +#: ../../document.rst:1452 ebf6a7145f184b17a4166e1f00c71e1c +msgid "`doc.delete_pages((500, 501, 502, ... , 519))`" +msgstr "" + +#: ../../document.rst:1453 ab18e8ba47d549589b99cf6f69044c7f +msgid "`doc.delete_pages(range(500, 520))`" +msgstr "" + +#: ../../document.rst:1454 a5186cdcb011492dba90d3a3030884dc +msgid "`del doc[500:520]`" +msgstr "" + +#: ../../document.rst:1455 85a3222b689444209f224d5602b13a26 +msgid "`del doc[(500, 501, 502, ... , 519)]`" +msgstr "" + +#: ../../document.rst:1456 1ed4d0db6b78459ab0976ad876a90ad7 +msgid "`del doc[range(500, 520)]`" +msgstr "" + +#: ../../document.rst:1458 5088720006cc436aab4d360eb69ac504 +msgid "" +"For the :ref:`AdobeManual` the above takes about 0.6 seconds, because the" +" remaining 1290 pages must be cleaned from invalid links." +msgstr "" +":ref:`AdobeManual` " +"では、上記の操作に約0.6秒かかります。なぜなら、残りの1290ページから無効なリンクを削除する必要があるからです。" + +#: ../../document.rst:1460 c6a3995894004ef197d262a2b817d678 +msgid "" +"In general, the performance of this method is dependent on the number of " +"remaining pages -- **not** on the number of deleted pages: in the above " +"example, **deleting all pages except** those 20, will need much less " +"time." +msgstr "" +"一般的に、このメソッドのパフォーマンスは残りのページの数に依存します - 削除されたページの数には依存 **しません** " +"。上記の例では、20個のページ **以外のすべてのページを削除する** 場合、はるかに少ない時間がかかります。" + +#: ../../document.rst:1465 705b0078ed754e92a8d93ffaa22a672b +msgid "PDF only: Copy a page reference within the document." +msgstr "PDF のみ: ドキュメント内でページの参照をコピーします。" + +#: ../../document.rst:1467 60aecdad658c4ca5a914a99b2b3dcf58 +msgid "the page to be copied. Must be in range `0 <= pno < page_count`." +msgstr "コピーするページ。範囲は `0 <= pno < page_count` である必要があります。" + +#: ../../document.rst:1469 ../../document.rst:1481 +#: 246e4a838923434ab9fd784d103d18cb 81dfc717124c41aba7f5f4ecbc9884d7 +msgid "" +"the page number in front of which to copy. The default inserts **after** " +"the last page." +msgstr "コピーする位置の前のページ番号。デフォルトでは最後のページの **後** に挿入されます。" + +#: ../../document.rst:1471 53a1a95597734ded97d4ec5f763b5811 +msgid "" +"Only a new **reference** to the page object will be created -- not a new " +"page object, all copied pages will have identical attribute values, " +"including the :attr:`Page.xref`. This implies that any changes to one of " +"these copies will appear on all of them." +msgstr "" +"ページオブジェクトへの新しい **参照** のみが作成されます - " +"新しいページオブジェクトは作成されません。すべてのコピーされたページは、:attr:`Page.xref` " +"を含む属性値が同じになります。これは、これらのコピーの1つに対する変更がすべてのコピーに反映されることを意味します。" + +#: ../../document.rst:1475 8cea514a3a744c2ea11dde817b21393b +msgid "New in v1.14.17" +msgstr "v1.14.17 で新規" + +#: ../../document.rst:1477 a749aee3a9dc4121a1a2250b36db8e82 +msgid "PDF only: Make a full copy (duplicate) of a page." +msgstr "PDF のみ: ページの完全なコピー(複製)を作成します。" + +#: ../../document.rst:1479 90ce7f1125ea445aae29c35aa5d80383 +msgid "the page to be duplicated. Must be in range `0 <= pno < page_count`." +msgstr "複製するページ。範囲は `0 <= pno < page_count` である必要があります。" + +#: ../../document.rst:1485 36b8a43c605d47ab9a0a82bd42df7c25 +msgid "" +"In contrast to :meth:`copy_page`, this method creates a new page object " +"(with a new :data:`xref`), which can be changed independently from the " +"original." +msgstr "" +":meth:`copy_page` とは異なり、このメソッドは新しいページオブジェクト(新しい :data:`xref` " +"を持つ)を作成します。これは元のページから独立して変更できます。" + +#: ../../document.rst:1487 670014ae08da406ca2be38655e5bd201 +msgid "" +"Any Popup and \"IRT\" (\"in response to\") annotations are **not copied**" +" to avoid potentially incorrect situations." +msgstr "ポップアップと \"IRT\"(\"応答先\")注釈は、潜在的に誤った状況を回避するために **コピーされません** 。" + +#: ../../document.rst:1491 4ae895e2d04644e8a9fe842fe0f3cc70 +msgid "PDF only: Move (copy and then delete original) a page within the document." +msgstr "PDF のみ: ドキュメント内でページを移動します(コピーしてから元のページを削除)。" + +#: ../../document.rst:1493 b29c45738329442b91a0f727076f4fcb +msgid "the page to be moved. Must be in range `0 <= pno < page_count`." +msgstr "移動するページ。範囲は `0 <= pno < page_count` である必要があります。" + +#: ../../document.rst:1495 fc13875857424ed295d7af1c2209553c +msgid "" +"the page number in front of which to insert the moved page. The default " +"moves **after** the last page." +msgstr "移動したページの前に挿入するページ番号。デフォルトでは最後のページの **後** に移動します。" + +#: ../../document.rst:1500 5d500f5b36af4323beb85a3298a954a2 +msgid "New in v1.17.4" +msgstr "v1.17.4 で新規" + +#: ../../document.rst:1502 c39d81a776164d29a530943108efaadc +msgid "" +"PDF only: Get or set the */NeedAppearances* property of Form PDFs. Quote:" +" *\"(Optional) A flag specifying whether to construct appearance streams " +"and appearance dictionaries for all widget annotations in the document " +"... Default value: false.\"* This may help controlling the behavior of " +"some readers / viewers." +msgstr "" +"PDF のみ: Form PDF の */NeedAppearances* プロパティを取得または設定します。引用: " +"*\"(オプション)ドキュメント内のすべてのウィジェット注釈の外観ストリームと外観辞書を構築するかどうかを指定するフラグ...デフォルト値: " +"false\"* 。これは一部のリーダー/ビューアの動作を制御するのに役立つかもしれません。" + +#: ../../document.rst:1504 e4314279bc2c408f9dde35e3f7e7e40c +msgid "" +"set the property to this value. If omitted or `None`, inquire the current" +" value." +msgstr "プロパティをこの値に設定します。省略された場合または `None` の場合、現在の値を問い合わせます。" + +#: ../../document.rst:1507 d9c3dbb3bcc94873a067046f484f7a2f +msgid "" +"* None: not a Form PDF, or property not defined. * True / False: the " +"value of the property (either just set or existing for inquiries). Has no" +" effect if no Form PDF." +msgstr "" + +#: ../../document.rst:1508 764e7d8a7f6a4f968098dee9e0e48af9 +msgid "None: not a Form PDF, or property not defined." +msgstr "None: フォーム PDF ではないか、プロパティが定義されていない。" + +#: ../../document.rst:1509 8b3a18e58aa548319307bc6b0ea6112e +msgid "" +"True / False: the value of the property (either just set or existing for " +"inquiries). Has no effect if no Form PDF." +msgstr "True / False: プロパティの値(設定されたばかりまたは問い合わせ用に存在する)。フォーム PDF がない場合は影響しません。" + +#: ../../document.rst:1515 507ed2c3e11e4ab0837cfc30a44db928 +msgid "" +"PDF only: Return whether the document contains signature fields. This is " +"an optional PDF property: if not present (return value -1), no " +"conclusions can be drawn -- the PDF creator may just not have bothered " +"using it." +msgstr "" +"PDF のみ: ドキュメントに署名フィールドが含まれているかどうかを返します。これはオプションの PDF プロパティです。存在しない場合(返り値 " +"-1)、結論を導くことはできません - PDF の作成者は単にそれを使用しなかった可能性があります。" + +#: ../../document.rst:1518 66607b1a4570426c885fa1810e2f38d6 +msgid "" +"* -1: not a Form PDF / no signature fields recorded / no *SigFlags* " +"found. * 1: at least one signature field exists. * 3: contains " +"signatures that may be invalidated if the file is saved (written) in a " +"way that alters its previous contents, as opposed to an incremental " +"update." +msgstr "" + +#: ../../document.rst:1519 f5324fe1ddb443398af1771fca69a157 +msgid "-1: not a Form PDF / no signature fields recorded / no *SigFlags* found." +msgstr "-1: フォーム PDF でない / 署名フィールドが記録されていない / *SigFlags* が見つからない。" + +#: ../../document.rst:1520 0884776d1e7e4becb22692136174f4ee +msgid "1: at least one signature field exists." +msgstr "1: 少なくとも1つの署名フィールドが存在します。" + +#: ../../document.rst:1521 6622bf12d0344a21ac885957fa8f1d8a +msgid "" +"3: contains signatures that may be invalidated if the file is saved " +"(written) in a way that alters its previous contents, as opposed to an " +"incremental update." +msgstr "3: ファイルが前の内容を変更する方法で保存(書き込み)されると、署名が無効になる可能性のある署名が含まれています。これは増分更新とは対照的です。" + +#: ../../document.rst:1530 528434fc5e09484e9eb2a85a231054b8 +msgid "" +"Changed in v1.14.16: The sequence of positional parameters \"name\" and " +"\"buffer\" has been changed to comply with the call pattern of other " +"functions." +msgstr "" +"v1.14.16 で変更されました: 位置パラメータ \"name\" と \"buffer\" " +"の順序が他の関数の呼び出しパターンに従うように変更されました。" + +#: ../../document.rst:1532 eca673836bcd48c5a8d26046e16edc13 +msgid "" +"PDF only: Embed a new file. All string parameters except the name may be " +"unicode (in previous versions, only ASCII worked correctly). File " +"contents will be compressed (where beneficial)." +msgstr "" +"PDF のみ: 新しいファイルを埋め込みます。名前以外のすべての文字列パラメータは Unicode である場合があります(以前のバージョンでは " +"ASCII しか正しく動作しませんでした)。ファイルの内容は(有益な場合に)圧縮されます。" + +#: ../../document.rst:1534 cb0ab8857b9c4af48c687ed6a564a8bb +msgid "entry identifier, **must not already exist**." +msgstr "エントリの識別子、**すでに存在しない必要** があります。" + +#: ../../document.rst:1535 ce223d9f9961497d9b60c0a5aeb35f00 +msgid "" +"file contents. *(Changed in v1.14.13)* *io.BytesIO* is now also " +"supported." +msgstr "" + +#: ../../document.rst:1535 d0e7f79c8154495b8c46b61cf3c005a2 +msgid "file contents." +msgstr "ファイルの内容。" + +#: ../../document.rst:1537 ../../document.rst:1611 +#: 2e665750305f41efb2f4afc33ec5e9aa c70a3f73e78043fa88f0de8aa92d8587 +msgid "*(Changed in v1.14.13)* *io.BytesIO* is now also supported." +msgstr "*(v1.14.13 で変更)* *io.BytesIO* もサポートされるようになりました。" + +#: ../../document.rst:1539 90969f73d7ff4f51ba67614c9c2b474f +msgid "optional filename. Documentation only, will be set to *name* if `None`." +msgstr "オプションのファイル名。ドキュメンテーション専用で、`None` の場合は *name* に設定されます。" + +#: ../../document.rst:1540 7e6212904ad2482dbec0fd308683752c +msgid "" +"optional unicode filename. Documentation only, will be set to *filename* " +"if `None`." +msgstr "オプションのUnicodeファイル名。ドキュメンテーション専用で、`None` の場合は *filename* に設定されます。" + +#: ../../document.rst:1541 c72a82ba117a4b2b89ac71c25a98622b +msgid "optional description. Documentation only, will be set to *name* if `None`." +msgstr "オプションの説明。ドキュメンテーション専用で、`None` の場合は *name* に設定されます。" + +#: ../../document.rst:1544 be4b00766ada40a2bd40f839a28e2e4a +msgid "" +"*(Changed in v1.18.13)* The method now returns the :data:`xref` of the " +"inserted file. In addition, the file object now will be automatically " +"given the PDF keys `/CreationDate` and `/ModDate` based on the current " +"date-time." +msgstr "" +"*(v1.18.13 で変更)* このメソッドは挿入されたファイルの :data:`xref` " +"も返すようになりました。さらに、ファイルオブジェクトには現在の日時に基づいて自動的に PDF キー `/CreationDate` および " +"`/ModDate` が設定されるようになりました。" + +#: ../../document.rst:1549 45ee39f2dfbc45d08de6112ef030d189 +msgid "" +"Changed in v1.14.16: This is now a method. In previous versions, this was" +" a property." +msgstr "v1.14.16 で変更されました: これは現在のメソッドです。以前のバージョンではプロパティでした。" + +#: ../../document.rst:1551 e96e4fa4cc3045fe872784dd49d8cbcc +msgid "PDF only: Return the number of embedded files." +msgstr "PDF のみ: 埋め込まれたファイルの数を返します。" + +#: ../../document.rst:1555 1a8bef3689d8400ba77f0281259128e0 +msgid "" +"PDF only: Retrieve the content of embedded file by its entry number or " +"name. If the document is not a PDF, or entry cannot be found, an " +"exception is raised." +msgstr "" +"PDF のみ: エントリ番号または名前によって埋め込まれたファイルの内容を取得します。ドキュメントが PDF " +"でない場合、またはエントリが見つからない場合、例外が発生します。" + +#: ../../document.rst:1557 ../../document.rst:1577 ../../document.rst:1608 +#: 0681fb6812f247b3a657b4b0d828c4b6 383e427f82004cbe8cc2a825920e0b05 +#: 3aa0076b95144faba6014fb667a2524d +msgid "index or name of entry. An integer must be in `range(embfile_count())`." +msgstr "エントリのインデックスまたは名前。整数は範囲内である必要があります `range(embfile_count())`。" + +#: ../../document.rst:1563 ebbdeff1629b489a97670b59e1ae920b +msgid "Changed in v1.14.16: Items can now be deleted by index, too." +msgstr "v1.14.16 で変更されました: インデックスによってアイテムを削除できるようになりました。" + +#: ../../document.rst:1565 f0136a227370430087d262df18e1a84d +msgid "" +"PDF only: Remove an entry from `/EmbeddedFiles`. As always, physical " +"deletion of the embedded file content (and file space regain) will occur " +"only when the document is saved to a new file with a suitable garbage " +"option." +msgstr "" +"PDF のみ: `/EmbeddedFiles` " +"からエントリを削除します。いつものように、適切なガベージオプションを使用して新しいファイルに保存すると、埋め込まれたファイルの内容の物理的な削除(およびファイルスペースの回復)が行われます。" + +#: ../../document.rst:1567 faacd51f9a014064a8e3da2f5c142759 +msgid "index or name of entry." +msgstr "エントリのインデックスまたは名前。" + +#: ../../document.rst:1569 a81454daa9fe45cbba977936d41c7176 +msgid "" +"When specifying an entry name, this function will only **delete the first" +" item** with that name. Be aware that PDFs not created with PyMuPDF may " +"contain duplicate names. So you may want to take appropriate precautions." +msgstr "" +"エントリ名を指定する場合、この関数はその名前を持つ **最初のアイテムのみを削除します** 。PyMuPDF で作成された PDF 以外の PDF" +" には重複する名前が含まれている可能性があるため、適切な注意を払う必要があるかもしれません。" + +#: ../../document.rst:1573 32126dd18fd24500b308accd077d025f +msgid "Changed in v1.18.13" +msgstr "v1.18.13 で変更されました" + +#: ../../document.rst:1575 b197159c495a4d389a8e72ac34725c36 +msgid "" +"PDF only: Retrieve information of an embedded file given by its number or" +" by its name." +msgstr "PDF のみ: 埋め込まれたファイルの情報を取得します。エントリ番号または名前によって指定されたファイルの情報を取得します。" + +#: ../../document.rst:1580 c947869a0abd450aba656b8457d2e67b +msgid "" +"a dictionary with the following keys: * ``name`` -- (*str*) name under " +"which this entry is stored * ``filename`` -- (*str*) filename * " +"``ufilename`` -- (*unicode*) filename * ``description`` -- (*str*) " +"description * ``size`` -- (*int*) original file size * ``length`` -- " +"(*int*) compressed file length * ``creationDate`` -- (*str*) date-time of" +" item creation in PDF format * ``modDate`` -- (*str*) date-time of last " +"change in PDF format * ``collection`` -- (*int*) :data:`xref` of the " +"associated PDF portfolio item if any, else zero. * ``checksum`` -- " +"(*str*) a hashcode of the stored file content as a hexadecimal string. " +"Should be MD5 according to PDF specifications, but be prepared to see " +"other hashing algorithms." +msgstr "" + +#: ../../document.rst:1580 c6d2fd48543e43c98de50702298f3e93 +msgid "a dictionary with the following keys:" +msgstr "以下のキーを持つ辞書:" + +#: ../../document.rst:1582 fbac45b6fb9b45b696291bbb6e40d275 + +msgid "``name`` -- (*str*) name under which this entry is stored" +msgstr "``name`` – (*str*) このエントリが格納されている名前" + +#: ../../document.rst:1583 ac7cdd97593b4a499b8c7d804c6f4014 + +msgid "``filename`` -- (*str*) filename" +msgstr "``filename`` – (*str*) ファイル名" + +#: ../../document.rst:1584 935d57ba0d7349c192ccf39791ae5335 + +msgid "``ufilename`` -- (*unicode*) filename" +msgstr "``ufilename`` – (*unicode*) Unicode ファイル名" + +#: ../../document.rst:1585 9c68a90a51b1456f84258a5c564f1934 + +msgid "``description`` -- (*str*) description" +msgstr "``description`` – (*str*) 説明" + +#: ../../document.rst:1586 7e7ddf34d9a1459c8c1de5dc8202df7b + +msgid "``size`` -- (*int*) original file size" +msgstr "``size`` – (*int*) 元のファイルサイズ" + +#: ../../document.rst:1587 fb9eddcb3d474795a2468d79481a7f8f + +msgid "``length`` -- (*int*) compressed file length" +msgstr "``length`` – (*int*) 圧縮ファイルの長さ" + +#: ../../document.rst:1588 3b6add56ce56412b80dd09077ed06529 + +msgid "``creationDate`` -- (*str*) date-time of item creation in PDF format" +msgstr "``creationDate`` – (*str*) PDF 形式のアイテム作成の日時" + +#: ../../document.rst:1589 0fb6cb02dc664e4580cc745eece83618 + +msgid "``modDate`` -- (*str*) date-time of last change in PDF format" +msgstr "``modDate`` – (*str*) PDF 形式の最終変更の日時" + +#: ../../document.rst:1590 1ed3eec819544ad88fe1e48742f08be5 + +msgid "" +"``collection`` -- (*int*) :data:`xref` of the associated PDF portfolio " +"item if any, else zero." +msgstr "" +"``collection`` – (*int*) 関連する PDF ポートフォリオアイテムの " +":data:`xref`(あれば)、それ以外はゼロ。" + +#: ../../document.rst:1591 b50b9a70e5b24a5097b61c6c20e91bd6 + +msgid "" +"``checksum`` -- (*str*) a hashcode of the stored file content as a " +"hexadecimal string. Should be MD5 according to PDF specifications, but be" +" prepared to see other hashing algorithms." +msgstr "" +"``checksum`` – (*str*) " +"16進数の文字列として格納されたファイルコンテンツのハッシュコード。PDF 仕様に従えば MD5 " +"であるべきですが、他のハッシュアルゴリズムも見る可能性があるので、準備しておいてください。" + +#: ../../document.rst:1595 c5155287efc6411fa9ea13c55b53cde3 +msgid "" +"PDF only: Return a list of embedded file names. The sequence of the names" +" equals the physical sequence in the document." +msgstr "PDF のみ: 埋め込まれたファイルの名前のリストを返します。名前のシーケンスはドキュメント内の物理的なシーケンスと同じです。" + +#: ../../document.rst:1606 84d2e06f1a5c410d8ede4fcd1602ba91 +msgid "" +"PDF only: Change an embedded file given its entry number or name. All " +"parameters are optional. Letting them default leads to a no-operation." +msgstr "" +"PDF のみ: " +"エントリ番号または名前によって指定された埋め込まれたファイルを変更します。すべてのパラメータはオプションです。デフォルトで設定すると、操作は行われません。" + +#: ../../document.rst:1609 38c5beed974341f2bee1c034141e92bb +msgid "" +"the new file content. *(Changed in v1.14.13)* *io.BytesIO* is now also " +"supported." +msgstr "新しいファイルの内容。*(v1.14.13 で変更)* *io.BytesIO* もサポートされるようになりました。" + +#: ../../document.rst:1609 b6d5c01a9c2349c78c6d62752c61660c +msgid "the new file content." +msgstr "新しいファイルの内容。" + +#: ../../document.rst:1613 3ba623d2e9d14185b19cbc759afd1b82 +msgid "the new filename." +msgstr "新しいファイル名。" + +#: ../../document.rst:1614 ed5923bc2633452eaf57327d15e9e8dd +msgid "the new unicode filename." +msgstr "新しいUnicodeファイル名。" + +#: ../../document.rst:1615 e40b7cce2d134763b249779bbc9813b4 +msgid "the new description." +msgstr "新しい説明。" + +#: ../../document.rst:1617 eccaeb63c015477a9c290bedf3341def +msgid "" +"*(Changed in v1.18.13)* The method now returns the :data:`xref` of the " +"file object." +msgstr "*(v1.18.13 で変更)* このメソッドはファイルオブジェクトの :data:`xref` も返すようになりました。" + +#: ../../document.rst:1620 366c4efd2a264a579ef34427b84e24de +msgid "" +"xref of the file object. Automatically, its `/ModDate` PDF key will be " +"updated with the current date-time." +msgstr "ファイルオブジェクトのxref。自動的に、`/ModDate` PDF キーが現在の日時で更新されます。" + +#: ../../document.rst:1625 fd1fa19c112e490cb4ddba079fe457a1 +msgid "" +"Release objects and space allocations associated with the document. If " +"created from a file, also closes *filename* (releasing control to the " +"OS). Explicitly closing a document is equivalent to deleting it, `del " +"doc`, or assigning it to something else like `doc = None`." +msgstr "" +"文書に関連付けられたオブジェクトとスペースの割り当てを解放します。ファイルから作成された場合、ファイル名も閉じられ(OS " +"に制御を解放)ます。文書を明示的に閉じることは、それを削除すること、`del doc` 、または `doc = None` " +"のように別のものに割り当てることと同等です。" + +#: ../../document.rst:1629 ../../document.rst:1643 ../../document.rst:1650 +#: ../../document.rst:1657 ../../document.rst:1668 ../../document.rst:1677 +#: ../../document.rst:1826 14270db97f264164b3d69f71fe0c69f5 +#: 47d6b76ebe8b409fac6fe3b3a248d7b9 8c37e8f457fb483ca28af03c826fcd9d +#: c086dd2a27094668a3840fa79cd84e18 c69e0d6af69b4a0f842860ca8c505d1d +#: c8b828c921894f2286ddf6656820725b f8a3165580ff48368a0b479e9fdfe975 +msgid "New in v1.16.8" +msgstr "" + +#: ../../document.rst:1630 d6b60999b8d94f51a2e0483ff83c7b82 +msgid "Changed in v1.18.10" +msgstr "v1.18.10 で変更" + +#: ../../document.rst:1632 d347d20f77b9466ea053a83e859da0b7 +msgid "PDF only: Return the definition source of a PDF object." +msgstr "PDF のみ: PDF オブジェクトの定義ソースを返します。" + +#: ../../document.rst:1634 0e9d6b86440048cfb572b93e553490bf +#, fuzzy +msgid "" +"the object's :data:`xref`. *Changed in v1.18.10:* A value of `-1` returns" +" the PDF trailer source." +msgstr "オブジェクトの :data`xref` 。*v1.18.10 で変更:* -1 の値は PDF トレーラーのソースを返します。" + +#: ../../document.rst:1635 be9f07753f0148da9f081a7ff76d6c6d +msgid "whether to generate a compact output with no line breaks or spaces." +msgstr "改行やスペースのないコンパクトな出力を生成するかどうか。" + +#: ../../document.rst:1636 c4c8ef97519a4dc0b7f70eb9a4c3d7b8 +msgid "whether to ASCII-encode binary data." +msgstr "バイナリデータを ASCII エンコードするかどうか。" + +#: ../../document.rst:1639 81018b7cd93b40d7a34959a2fc502118 +msgid "The object definition source." +msgstr "オブジェクトの定義ソース。" + +#: ../../document.rst:1645 0fc49a0de9e84778a0bc18624f536533 +msgid "" +"PDF only: Return the :data:`xref` number of the PDF catalog (or root) " +"object. Use that number with :meth:`Document.xref_object` to see its " +"source." +msgstr "" +"PDF のみ: PDF カタログ(またはルート)オブジェクトの :data:`xref` 番号を返します。これを " +":meth:`Document.xref_object` で使用してそのソースを表示できます。" + +#: ../../document.rst:1652 7f6be18bdd1e495095253b00baf60799 +msgid "" +"PDF only: Return the trailer source of the PDF, which is usually located" +" at the PDF file's end. This is :meth:`Document.xref_object` with an " +":data:`xref` argument of -1." +msgstr "" +"PDF のみ: PDF のトレーラーソースを返します。通常、これは PDF ファイルの末尾にあります。これは " +":meth:`Document.xref_object` で :data:`xref` 引数が -1 の場合です。" + +#: ../../document.rst:1659 cda51bc9c719464ba91fae26c9d622c7 +msgid "" +"PDF only: Return the **decompressed** contents of the :data:`xref` stream" +" object." +msgstr "PDF のみ: :data:`xref` ストリームオブジェクトの **解凍された** コンテンツを返します。" + +#: ../../document.rst:1661 ../../document.rst:1681 ../../document.rst:1700 +#: 90fdc9a5cc744b868d7af84c5cb5be91 c5f1d5447e98482db0d26ea3fe57fcf2 +#: e47e0b06f7b1465481e307eeabddc696 +msgid ":data:`xref` number." +msgstr ":data:`xref` 番号。" + +#: ../../document.rst:1664 5376bfef128f44058da5eebfe6aa818d +msgid "the (decompressed) stream of the object." +msgstr "(解凍された)オブジェクトのストリーム。" + +#: ../../document.rst:1670 b3a4a8aa7935410ea8c4d7ab3133a391 +msgid "" +"PDF only: Return the **unmodified** (esp. **not decompressed**) contents " +"of the :data:`xref` stream object. Otherwise equal to " +":meth:`Document.xref_stream`." +msgstr "" +"PDF のみ: :data:`xref` ストリームオブジェクトの**変更前**(特に " +"**解凍されていない**)コンテンツを返します。それ以外の点では :meth:`Document.xref_stream` と同等です。" + +#: ../../document.rst:1673 efd01ffe56354915803c3346b344e52e +msgid "the (original, unmodified) stream of the object." +msgstr "(元の、変更前の)オブジェクトのストリーム。" + +#: ../../document.rst:1679 ffc079d7b8b2498ab520df7c23173fc8 +msgid "" +"PDF only: Replace object definition of :data:`xref` with the provided " +"string. The xref may also be new, in which case this instruction " +"completes the object definition. If a page object is also given, its " +"links and annotations will be reloaded afterwards." +msgstr "" +"PDF のみ: :data:`xref` のオブジェクト定義を提供された文字列で置き換えます。xref " +"が新しい場合、この命令はオブジェクト定義を完成させます。ページオブジェクトも指定された場合、そのリンクと注釈が後で再ロードされ、リンクと/または注釈に関連する変更が反映されます。" + +#: ../../document.rst:1683 af7ef9c3350d46dd91ea33d838f30ead +msgid "a string containing a valid PDF object definition." +msgstr "有効な PDF オブジェクト定義を含む文字列。" + +#: ../../document.rst:1685 31f52c85186448aba72732d814dc072a +msgid "" +"a page object. If provided, indicates, that annotations of this page " +"should be refreshed (reloaded) to reflect changes incurred with links and" +" / or annotations." +msgstr "ページオブジェクト。指定された場合、このページの注釈が変更を反映するために再ロードされることを示します。" + +#: ../../document.rst:1689 483c09db656d477ab951dc64519cfd02 +msgid "zero if successful, otherwise an exception will be raised." +msgstr "成功した場合はゼロ、それ以外の場合は例外が発生します。" + +#: ../../document.rst:1694 ae0ff7cd9ec24ce48088e3a5e0bc7c45 +msgid "New in v.1.16.8" +msgstr "v1.16.8 で新規" + +#: ../../document.rst:1695 afa3844759854f40a009d8d2095af614 +msgid "Changed in v1.19.2: added parameter \"compress\"" +msgstr "v1.19.2 で変更: パラメータ \"compress\" を追加" + +#: ../../document.rst:1696 e3670f10669943438f37d0e3d3fbc30b +msgid "" +"Changed in v1.19.6: deprecated parameter \"new\". Now confirms that the " +"object is a PDF dictionary object." +msgstr "" +"v1.19.6 で変更: パラメータ \"new\" を非推奨にし、無視します。オブジェクトが PDF " +"辞書オブジェクトであることを確認するようになりました。" + +#: ../../document.rst:1698 801d5e33074841c3875b9bcf083a2669 +msgid "" +"Replace the stream of an object identified by :data:`xref`, which must be" +" a PDF dictionary. If the object is no :data:`stream`, it will be turned " +"into one. The function automatically performs a compress operation " +"(\"deflate\") where beneficial." +msgstr "" +"xref で識別されるオブジェクトのストリームを置き換えます。:data:`xref` は PDF " +"辞書である必要があります。オブジェクトがストリームでない場合、それをストリームに変換します。この関数は、有益な場合には自動的に圧縮操作(\"deflate\")を実行します。" + +#: ../../document.rst:1702 a663fea15c1841c7b5086dd25b9ee0c9 +msgid "" +"the new content of the stream. *(Changed in v1.14.13:)* *io.BytesIO* " +"objects are now also supported." +msgstr "" + +#: ../../document.rst:1702 5586045fec9e4af589b8a924b2aa966f +msgid "the new content of the stream." +msgstr "ストリームの新しい内容。" + +#: ../../document.rst:1704 2986570ec9f84dcbb3dc3f15f9ba16de +msgid "*(Changed in v1.14.13:)* *io.BytesIO* objects are now also supported." +msgstr "*(v1.14.13 で変更:)* *io.BytesIO* オブジェクトもサポートされるようになりました。" + +#: ../../document.rst:1706 708b8e1347c54de588279245527cd0d5 +msgid "*deprecated* and ignored. Will be removed some time after v1.20.0." +msgstr "*非推奨* で無視されます。v1.20.0 以降のある時点で削除されます。" + +#: ../../document.rst:1707 9169f5b479714b5a823515010c9c4077 +msgid "" +"whether to compress the inserted stream. If `True` (default), the stream " +"will be inserted using `/FlateDecode` compression (if beneficial), " +"otherwise the stream will inserted as is." +msgstr "" +"挿入されるストリームを圧縮するかどうか。`True` の場合(デフォルト)、ストリームは `/FlateDecode` " +"圧縮を使用して挿入されます(有益な場合)、それ以外の場合はストリームはそのまま挿入されます。" + +#: ../../document.rst:1709 6a01d56f4b354296b703610ea2bbe419 +msgid "" +"if :data:`xref` does not represent a PDF :data:`dict`. An empty " +"dictionary ``<<>>`` is accepted. So if you just created the xref and want" +" to give it a stream, first execute `doc.update_object(xref, \"<<>>\")`, " +"and then insert the stream data with this method." +msgstr "" +":data:`xref` が PDF 辞書を表していない場合。空の辞書 < は受け入れられます。したがって、xref " +"を作成し、それにストリームを指定する場合は、まず `doc.update_object(xref, \"<<>>\")` " +"を実行し、その後、このメソッドでストリームデータを挿入してください。" + +#: ../../document.rst:1711 415073833d5341049e2e7a7ddb473a50 +msgid "" +"The method is primarily (but not exclusively) intended to manipulate " +"streams containing PDF operator syntax (see pp. 643 of the " +":ref:`AdobeManual`) as it is the case for e.g. page content streams." +msgstr "" +"このメソッドは主に(しかし排他的ではなく)PDFオペレータ構文を含むストリームを操作することを意図しています(:ref:`AdobeManual`" +" の pp. 643 参照)。例えばページのコンテンツストリームのようにです。" + +#: ../../document.rst:1713 bc0a1750301d4b80bca19316ee01933a +msgid "" +"If you update a contents stream, consider using save parameter " +"*clean=True* to ensure consistency between PDF operator source and the " +"object structure." +msgstr "" +"コンテンツストリームを更新する場合、PDFオペレータのソースとオブジェクト構造の間の整合性を確保するために save パラメータを " +"*clean=True* で使用することを検討してください。" + +#: ../../document.rst:1715 02d2b54a93af4ea890ed6715774590d5 +msgid "" +"Example: Let us assume that you no longer want a certain image appear on " +"a page. This can be achieved by deleting the respective reference in its " +"contents source(s) -- and indeed: the image will be gone after reloading " +"the page. But the page's :data:`resources` object would still show the " +"image as being referenced by the page. This save option will clean up any" +" such mismatches." +msgstr "" +"例: ある画像をページに表示させたくないと仮定しましょう。これは、そのコンテンツソース内の該当する参照を削除することによって実現できます - " +"そして実際には、ページを再読み込みした後、画像は消えてしまいます。ただし、ページの :data:`resources` " +"は、まだその画像がページによって参照されていると表示されます。この保存オプションは、そのような不一致をクリーンアップします。" + +#: ../../document.rst:1720 556ab1a555e9477d800957c0e4e1e97b +msgid "New in v1.19.5" +msgstr "v1.19.5 で新規" + +#: ../../document.rst:1722 47e849734d964b1490749d596ade2345 +msgid "" +"PDF Only: Make *target* xref an exact copy of *source*. If *source* is a " +":data:`stream`, then these data are also copied." +msgstr "" +"PDF のみ: *ターゲット* の xref をソースの正確なコピーにします。*ソース* が :data:`stream` " +"の場合、これらのデータもコピーされます。" + +#: ../../document.rst:1724 21c3cbbe776e4f8a8ac7f5190fb7a335 +msgid "the source :data:`xref`. It must be an existing **dictionary** object." +msgstr "ソースの :data:`xref`。既存の **辞書** オブジェクトである必要があります。" + +#: ../../document.rst:1725 55276a5af2ec4e179a9d6c501b808961 +msgid "" +"the target xref. Must be an existing **dictionary** object. If the xref " +"has just been created, make sure to initialize it as a PDF dictionary " +"with the minimum specification ``<<>>``." +msgstr "" +"ターゲットの :data:`xref`。既存の **辞書** オブジェクトである必要があります。xref が新たに作成されたばかりの場合、最小仕様" +" ``<<>>`` を持つ PDF 辞書として初期化することを確認してください。" + +#: ../../document.rst:1726 2b605267f77649968b69d698aeead973 +msgid "" +"an optional list of top-level keys in ``target``, that should not be " +"removed in preparation of the copy process." +msgstr "``target`` 内のトップレベルのキーを削除する準備段階で削除しない、オプションのキーリスト。" + +#: ../../document.rst:1730 f9a0573da15e4f609a6b4dafb0c6f191 +msgid "This method has much in common with Python's *dict* method `copy()`." +msgstr "このメソッドは、Python の *dict* メソッド `copy()` と多くの共通点があります。" + +#: ../../document.rst:1731 e284df946c1a4d42a141beebb9ecdbd4 +msgid "Both xref numbers must represent existing dictionaries." +msgstr "両方の xref 番号は既存の辞書を表す必要があります。" + +#: ../../document.rst:1732 d76070ec87284eeba9402138be67ac92 +msgid "" +"Before data is copied from *source*, all *target* dictionary keys are " +"deleted. You can specify exceptions from this in the ``keep`` list. If " +"*source* however has a same-named key, its value will still replace the " +"target." +msgstr "" +"データがソースからコピーされる前に、すべての *ターゲット* 辞書のキーが削除されます。この削除からの例外を ``keep`` " +"リストで指定できます。ただし、*ソース* に同じ名前のキーがある場合、その値はターゲットに置き換えられます。" + +#: ../../document.rst:1733 eb592c11643040f8bacae8d7ff2aa6b0 +msgid "" +"If ``source`` is a :data:`stream` object, then these data will also be copied over, and ``target`` will be converted to a stream object." +msgstr "``source`` が ``stream`` オブジェクトである場合、そのデータもコピーされ、``target`` は ``stream`` オブジェクトに変換されます。" + +#: ../../document.rst:1734 6e76189531f4460496c74073c0e2105f +msgid "" +"A typical use case is to replace or remove an existing image without " +"using redaction annotations. Example scripts can be seen `in this PyMuPDF Utilities example " +"`_." +msgstr "" +"典型的な使用例は、赤塗り注釈を使用せずに既存の画像を置き換えたり削除したりすることです。例のスクリプトは `こちら " +"`_ で確認できます。" + +#: ../../document.rst:1738 3757280fea3a405cb5eec49e258ba662 +msgid "" +"PDF Only: Extract data and meta information of an image stored in the " +"document. The output can directly be used to be stored as an image file, " +"as input for PIL, :ref:`Pixmap` creation, etc. This method avoids using " +"pixmaps wherever possible to present the image in its original format " +"(e.g. as JPEG)." +msgstr "" +"PDF のみ: " +"文書に格納された画像のデータとメタ情報を抽出します。出力は、画像ファイルとして保存するための直接的な使用、PIL、:ref:`Pixmap` " +"の作成などに使用できます。このメソッドは、できる限りピクマップを使用せず、画像をその元の形式(例:JPEG " +"として)で表示することを目的としています。" + +#: ../../document.rst:1740 145ea90626d94642bace65f7cfae7c01 +msgid "" +":data:`xref` of an image object. If this is not in `range(1, " +"doc.xref_length())`, or the object is no image or other errors occur, " +"`None` is returned and no exception is raised." +msgstr "" +"画像オブジェクトの :data:`xref`。これが `range(1, doc.xref_length())` " +"の範囲外であるか、オブジェクトが画像でないか、その他のエラーが発生した場合、 `None` が返され、例外は発生しません。" + +#: ../../document.rst:1743 757d4da025c14b728095072969c1d22d +msgid "" +"a dictionary with the following keys * *ext* (*str*) image type (e.g. " +"*'jpeg'*), usable as image file extension * *smask* (*int*) :data:`xref` " +"number of a stencil (/SMask) image or zero * *width* (*int*) image width " +"* *height* (*int*) image height * *colorspace* (*int*) the image's " +"*colorspace.n* number. * *cs-name* (*str*) the image's *colorspace.name*." +" * *xres* (*int*) resolution in x direction. Please also see " +":data:`resolution`. * *yres* (*int*) resolution in y direction. Please " +"also see :data:`resolution`. * *image* (*bytes*) image data, usable as " +"image file content" +msgstr "" + +#: ../../document.rst:1743 5da832c6b8c343ea946b193ed144376e +msgid "a dictionary with the following keys" +msgstr "以下のキーを持つ辞書" + +#: ../../document.rst:1745 e9f061ed5c634149934f0ff466277a8b +msgid "*ext* (*str*) image type (e.g. *'jpeg'*), usable as image file extension" +msgstr "*ext* (*str*) 画像タイプ(例:*'jpeg'*)、画像ファイルの拡張子として使用可能" + +#: ../../document.rst:1746 4d2df5918a8345cb92c00ac43a54a409 +msgid "*smask* (*int*) :data:`xref` number of a stencil (/SMask) image or zero" +msgstr "*smask* (*int*) ステンシル(/SMask)画像の :data:`xref` 番号またはゼロ" + +#: ../../document.rst:1747 179f1bf46b6e40afbeb65a87365dc3e3 +msgid "*width* (*int*) image width" +msgstr "*width* (*int*) 画像の幅" + +#: ../../document.rst:1748 88b43c7b7ead4e9cbce2276a540e5c6c +msgid "*height* (*int*) image height" +msgstr "*height* (*int*) 画像の高さ" + +#: ../../document.rst:1749 0448216e247447b5b867c7a212a7db7c +msgid "*colorspace* (*int*) the image's *colorspace.n* number." +msgstr "*colorspace* (*int*) 画像のカラースペースの数" + +#: ../../document.rst:1750 bfec3e10c09347409c2ce4991f863d76 +msgid "*cs-name* (*str*) the image's *colorspace.name*." +msgstr "*cs-name* (*str*) 画像のカラースペースの名前" + +#: ../../document.rst:1751 bab5172f3e2d42d7a2d7687f1aabc59f +msgid "" +"*xres* (*int*) resolution in x direction. Please also see " +":data:`resolution`." +msgstr "*xres* (*int*) x 方向の解像度。:data:`resolution` も参照してください。" + +#: ../../document.rst:1752 d940169fe33947e8b45dc1161bd129d8 +msgid "" +"*yres* (*int*) resolution in y direction. Please also see " +":data:`resolution`." +msgstr "*yres* (*int*) y 方向の解像度。:data:`resolution` も参照してください。" + +#: ../../document.rst:1753 a2f030ad2bcb4bf68c260cb4874be600 +msgid "*image* (*bytes*) image data, usable as image file content" +msgstr "*image* (*bytes*) 画像データ、画像ファイルのコンテンツとして使用可能" + +#: ../../document.rst:1765 efcfe100b8a6420e850eeb29525eac1b +msgid "" +"There is a functional overlap with *pix = pymupdf.Pixmap(doc, xref)*, " +"followed by a *pix.tobytes()*. Main differences are that extract_image, " +"**(1)** does not always deliver PNG image formats, **(2)** is **very** " +"much faster with non-PNG images, **(3)** usually results in much less " +"disk storage for extracted images, **(4)** returns `None` in error cases " +"(generates no exception). Look at the following example images within the" +" same PDF." +msgstr "" +"*pix = pymupdf.Pixmap(doc, xref)* *に続いてpix.tobytes()* " +"という機能的な重複があります。主な違いは、extract_imageは、**(1)** 必ずしもPNG画像形式を提供しない、 **(2)** " +"PNG以外の画像では非常に高速である、 **(3)** 抽出された画像のディスクストレージが通常ははるかに少ない、 **(4)** " +"エラーケースでは `None` を返す(例外を生成しない)という点です。同じPDF内の以下の例の画像を見てみましょう。" + +#: ../../document.rst:1767 79b4a5862036494bb5451d004438fe43 +msgid "xref 1268 is a PNG -- Comparable execution time and identical output::" +msgstr "xref 1268 は PNG 形式です - 比較可能な実行時間と同じ出力::" + +#: ../../document.rst:1779 3cfaf358ed8f49a288bdad998ca0dfc8 +msgid "" +"xref 1186 is a JPEG -- :meth:`Document.extract_image` is **many times " +"faster** and produces a **much smaller** output (2.48 MB vs. 0.35 MB)::" +msgstr "" +"xref 1186 は JPEG です - :meth:`Document.extract_image` は " +"**何倍も速く**、はるかに小さい出力を生成します(2.48 MB に対して 0.35 MB)::" + +#: ../../document.rst:1794 f96212b879b34069be2cb1046653ead4 +msgid "Changed in v1.19.4: return a dictionary if `named == True`." +msgstr "v1.19.4 で変更: `named == True` の場合、辞書を返します。" + +#: ../../document.rst:1796 60c2bb121f8c407fb832d07b5ea5366b +msgid "" +"PDF Only: Return an embedded font file's data and appropriate file " +"extension. This can be used to store the font as an external file. The " +"method does not throw exceptions (other than via checking for PDF and " +"valid :data:`xref`)." +msgstr "" +"PDF のみ: " +"埋め込まれたフォントファイルのデータと適切なファイル拡張子を返します。これを使用してフォントを外部ファイルとして保存することができます。このメソッドは例外をスローしません(PDF" +" および有効な :data:`xref` のチェックを除く)。" + +#: ../../document.rst:1798 a9d21a1213bf49d5ab5fe1f9aa1f9fcd +msgid "PDF object number of the font to extract." +msgstr "抽出するフォントの PDF オブジェクト番号。" + +#: ../../document.rst:1799 4ff80700b4ea49ad83243ffdfeefb1d4 +msgid "" +"only return font information, not the buffer. To be used for information-" +"only purposes, avoids allocation of large buffer areas." +msgstr "フォント情報のみを返し、バッファの割り当てを回避するために使用します。" + +#: ../../document.rst:1800 3b6cc4812e4b48aba4f32d17dbe04134 +msgid "" +"If true, a dictionary with the following keys is returned: 'name' (font " +"base name), 'ext' (font file extension), 'type' (font type), 'content' " +"(font file content)." +msgstr "" +"`True` の場合、次のキーを持つ辞書が返されます: 'name'(フォントのベース名)、 'ext'(フォントファイルの拡張子)、 " +"'type'(フォントのタイプ)、 'content'(フォントファイルの内容)。" + +#: ../../document.rst:1803 d418db8a63b94c2caac5f1effd542a11 +#, fuzzy +msgid "" +"a tuple `(basename, ext, type, content)`, where *ext* is a 3-byte " +"suggested file extension (*str*), *basename* is the font's name (*str*), " +"*type* is the font's type (e.g. \"Type1\") and *content* is a bytes " +"object containing the font file's content (or *b\"\"*). For possible " +"extension values and their meaning see :ref:`FontExtensions`. Return " +"details on error: * `(\"\", \"\", \"\", b\"\")` -- invalid xref or xref " +"is not a (valid) font object. * `(basename, \"n/a\", \"Type1\", b\"\")` " +"-- *basename* is not embedded and thus cannot be extracted. This is the " +"case for e.g. the :ref:`Base-14-Fonts` and Type 3 fonts." +msgstr "" +"`(basename, ext, type, content)` のタプルで、ext " +"は3バイトの推奨ファイル拡張子(*str*)で、*basename* はフォントの名前(*str*)で、*type* " +"はフォントのタイプ(例:「Type1」)で、*content* はフォントファイルの内容を含むバイトオブジェクトです(または *b\"\"* " +")。可能な拡張子の値とその意味については :ref:`FontExtensions` を参照してください。エラー時の返り値の詳細:" + +#: ../../document.rst:1803 e4ecc274179d4bbe96d0ebb132fc1670 +msgid "" +"a tuple `(basename, ext, type, content)`, where *ext* is a 3-byte " +"suggested file extension (*str*), *basename* is the font's name (*str*), " +"*type* is the font's type (e.g. \"Type1\") and *content* is a bytes " +"object containing the font file's content (or *b\"\"*). For possible " +"extension values and their meaning see :ref:`FontExtensions`. Return " +"details on error:" +msgstr "" +"`(basename, ext, type, content)` のタプルで、ext " +"は3バイトの推奨ファイル拡張子(*str*)で、*basename* はフォントの名前(*str*)で、*type* " +"はフォントのタイプ(例:「Type1」)で、*content* はフォントファイルの内容を含むバイトオブジェクトです(または *b\"\"* " +")。可能な拡張子の値とその意味については :ref:`FontExtensions` を参照してください。エラー時の返り値の詳細:" + +#: ../../document.rst:1805 ff2818d044fa4383b69ac0ee6aa70281 +msgid "" +"`(\"\", \"\", \"\", b\"\")` -- invalid xref or xref is not a (valid) font" +" object." +msgstr "`(\"\", \"\", \"\", b\"\")` – 無効な xref または xref が(有効な)フォントオブジェクトではない場合。" + +#: ../../document.rst:1806 9d33bfe772504f61b0c7b69ca54f7ecd +msgid "" +"`(basename, \"n/a\", \"Type1\", b\"\")` -- *basename* is not embedded and" +" thus cannot be extracted. This is the case for e.g. the " +":ref:`Base-14-Fonts` and Type 3 fonts." +msgstr "" +"`(basename, \"n/a\", \"Type1\", b\"\")` – *basename* " +"は埋め込まれておらず、したがって抽出できません。これは、例えば :ref:`Base-14-Fonts` フォントや Type 3 " +"フォントの場合に該当します。" + +#: ../../document.rst:1808 8535e1de2d1f4c1787b82ba79e94aeaf +msgid "Example:" +msgstr "例:" + +#: ../../document.rst:1817 b1ee580e36614da38b63b42d9e2c408e +msgid "" +"The basename is returned unchanged from the PDF. So it may contain " +"characters (such as blanks) which may disqualify it as a filename for " +"your operating system. Take appropriate action." +msgstr "" +"*ベースネーム* は PDF から変更されずに返されます。そのため、それには (空白などの) " +"ファイル名として使用できない文字が含まれている可能性があります。適切な措置を取ってください。" + +#: ../../document.rst:1820 229b42ffbe404889b9cfe652193a8bea +msgid "" +"The returned *basename* in general is **not** the original file name, but" +" it probably has some similarity." +msgstr "通常、返される *ベースネーム* は元のファイル名ではなく、いくつかの類似性があるかもしれません。" + +#: ../../document.rst:1821 56b4bdd66e364960a6f523e14f5a5864 +msgid "" +"If parameter `named == True`, a dictionary with the following keys is " +"returned: `{'name': 'T1', 'ext': 'n/a', 'type': 'Type3', 'content': " +"b''}`." +msgstr "" +"`named == True` の場合、次のキーを持つ辞書が返されます: `{'name': 'T1', 'ext': 'n/a', " +"'type': 'Type3', 'content': b''}`。" + +#: ../../document.rst:1828 f5ee8f1765344460b1f72a694be0dcc5 +msgid "PDF only: Return the :data:`xref` of the document's XML metadata." +msgstr "PDF のみ: ドキュメントの XML メタデータの :data:`xref` を返します。" + +#: ../../document.rst:1837 3bc7c8decb7b41a1967751004cb7a16e +msgid "" +"PDF only: Check whether there are links, resp. annotations anywhere in " +"the document." +msgstr "PDF のみ: ドキュメント内にリンクまたは注釈が存在するかどうかを確認します。" + +#: ../../document.rst:1839 83ea1c4b222140c49a97cf7bece5dd05 +msgid "" +"``True`` / ``False``. As opposed to fields, which are also stored in a " +"central place of a PDF document, the existence of links / annotations can" +" only be detected by parsing each page. These methods are tuned to do " +"this efficiently and will immediately return, if the answer is ``True`` " +"for a page. For PDFs with many thousand pages however, an answer may take" +" some time [#f6]_ if no link, resp. no annotation is found." +msgstr "" +"``True`` / ``False`` 。フィールドとは異なり、リンクや注釈の存在は PDF " +"ドキュメントの中心的な場所に格納されているわけではなく、各ページを解析してのみ検出できます。これらのメソッドは効率的に実行するように調整されており、ページに対して" +" ``True`` の回答がある場合、すぐに返されます。ただし、多くのページを持つ PDF " +"の場合、リンクや注釈が見つからない場合、回答に時間がかかることがあります [#f6]_。" + +#: ../../document.rst:1844 59f96b58568e4e428c9c1e21712318a5 +msgid "" +"PDF only: Investigate eligible fonts for their use by text in the " +"document. If a font is supported and a size reduction is possible, that " +"font is replaced by a version with a subset of its characters." +msgstr "" +"PDF のみ: " +"ドキュメント内のテキストによる使用を検討するための適格なフォントを調査します。サポートされているフォントで、サイズの削減が可能な場合、そのフォントは文字のサブセットを含むバージョンで置換されます。" + +#: ../../document.rst:1846 a9ed3a66d31a41618be1bdfc1e160d89 +msgid "Use this method immediately before saving the document." +msgstr "このメソッドは、ドキュメントを保存する直前に使用します" + +#: ../../document.rst:1848 e6c74fb06afe4b3e9cace5df8c49914f +msgid "" +"write various progress information to sysout. This currently only has an " +"effect if `fallback` is `True`." +msgstr "さまざまな進行状況情報を sysout に書き込みます。現在、これは `fallback` が `True` の場合にのみ効果があります。" + +#: ../../document.rst:1849 a90de8c245eb47b99d1752fe0de85df8 +msgid "" +"if `True` use the deprecated algorithm that makes use of package " +"`fontTools `_ (which hence must be " +"installed). If using the recommended value `False` (default), MuPDF's " +"native function is used -- which is **very much faster** and can subset a" +" broader range of font types. Package fontTools is not required then." +msgstr "" +"`True` の場合、非推奨のアルゴリズムを使用して、パッケージ `fontTools " +"`_ を使用します(従って、fontTools " +"がインストールされている必要があります)。推奨される値 `False` (デフォルト)を使用すると、MuPDF " +"のネイティブ関数が使用されます。これは非常に高速で、より広範なフォントタイプをサブセット化できます。この場合、パッケージ fontTools " +"は必要ありません。" + +#: ../../document.rst:1851 1a7cb8534e5c46f19590403b54ef074f +msgid "" +"The greatest benefit can be achieved when creating new PDFs using large " +"fonts like is typical for Asian scripts. When using the :ref:`Story` " +"class or method :meth:`Page.insert_htmlbox`, multiple fonts may " +"automatically be included -- without the programmer becoming aware of it." +msgstr "" +"最大の利点は、アジアのスクリプトに典型的な大きなフォントを使用して新しい PDF を作成する場合に得られます。 :ref:`Story` " +"クラスまたはメソッド :meth:`Page.insert_htmlbox` " +"を使用する場合、複数のフォントが自動的に含まれる場合がありますが、プログラマーが気付かない場合もあります。" + +#: ../../document.rst:1853 997c9fd6bbde438f9c43d3d0ee42963c +msgid "" +"In all these cases, the set of actually used unicodes mostly is very " +"small compared to the number of glyphs available in the used fonts. Using" +" this method can easily reduce the embedded font binaries by two orders " +"of magnitude -- from several megabytes down to a low two-digit kilobyte " +"amount." +msgstr "" +"これらのすべての場合、実際に使用される Unicode " +"のセットは、使用されるフォントの使用可能なグリフの数に比べて非常に小さいことがほとんどです。このメソッドを使用すると、埋め込まれたフォントのバイナリを容易に2桁のキロバイト量にまで減らすことができます。" + +#: ../../document.rst:1855 31e85330a1524939a2a3ed0807433ec5 +msgid "" +"Creating font subsets leaves behind a large number of large, now unused " +"PDF objects (\"ghosts\"). Therefore, make sure to compress and garbage-" +"collect when saving the file. We recommend to use " +":meth:`Document.ez_save`." +msgstr "" +"フォントのサブセットを作成すると、多数の大きな、今は使用されていない PDF " +"オブジェクト(「ゴースト」)が残ります。そのため、ファイルを保存する際に圧縮およびガベージコレクトを行ってください。:meth:`Document.ez_save`" +" の使用をお勧めします。" + +#: ../../document.rst:1857 e7b42869145d4d2f9217a7bfce00aba9 +msgid "|history_begin|" +msgstr "" + +#: ../../document.rst:1860 dd414b262d654d37af1c10d6292b6773 +msgid "Changed in v1.18.9" +msgstr "v1.18.9で変更" + +#: ../../document.rst:1861 7a1af6d710284fa9bfb416a88abb494a +msgid "Changed in v1.24.2 use native function of MuPDF." +msgstr "v1.24.2 で変更され、MuPDF のネイティブ機能を使用するようになりました。" + +#: ../../document.rst:1863 7e539c977c0b42c7b31af48f1c4c68ce +msgid "|history_end|" +msgstr "" + +#: ../../document.rst:1868 ../../document.rst:1874 ../../document.rst:1881 +#: ../../document.rst:1888 ../../document.rst:1897 ../../document.rst:1904 +#: ../../document.rst:1913 ../../document.rst:1920 ../../document.rst:1927 +#: ../../document.rst:1936 ../../document.rst:1945 +#: 134b144f37c3455da3ec93f70f27276f 39e7cbe6c2474316b6c9fc99383efe76 +#: 44d241e5e38b48aca82290625d04112c 588e2116a323410ebb522ae9ce502dad +#: 5fe09ca4168c4ff38669cce32efa5023 6177169626d04870a257a387d1540d3f +#: 929e0adce63142fba8d8529664d15e11 92bfdc79266f4e24bbb782fccff67205 +#: c374df25a8944351a350e6977af4c6df c9d54685fa7e48738fdcf99788045578 +#: f3f00fcdf6434f9a8d759354cb52b541 +msgid "New in v1.19.0" +msgstr "v1.19.0 で新規追加" + +#: ../../document.rst:1870 5f34db045fd54cf1abcf901da92df76b +msgid "" +"PDF only: Enable journalling. Use this before you start logging " +"operations." +msgstr "PDF のみ: ジャーナリングを有効にします。ログ操作を開始する前にこれを使用します。" + +#: ../../document.rst:1876 6ff50c67b9d1453891f1b22d5e619bec +msgid "" +"PDF only: Start journalling an *\"operation\"* identified by a string " +"\"name\". Updates will fail for a journal-enabled PDF, if no operation " +"has been started." +msgstr "" +"PDF のみ: 文字列 \"name\" で識別される *\"操作\"* のジャーナリングを開始します。ジャーナリングが有効な PDF " +"に対しては、操作が開始されていない場合、更新が失敗します。" + +#: ../../document.rst:1883 0b85789997a34ddeb767c2eff02767e5 +msgid "" +"PDF only: Stop the current operation. The updates between start and stop " +"of an operation belong to the same unit of work and will be undone / " +"redone together." +msgstr "PDF のみ: 現在の操作を停止します。操作の開始から終了までの間の更新は、同じユニットの作業に属し、一緒に元に戻される / やり直されます。" + +#: ../../document.rst:1890 4771694b094349ff9dbd48caa537ed8b +msgid "" +"PDF only: Return the numbers of the current operation and the total " +"operation count." +msgstr "PDF のみ: 現在の操作番号と総操作数を返します。" + +#: ../../document.rst:1892 3e61c0d7bb2940ac8a92d69b5de2f9a6 +msgid "" +"a tuple `(step, steps)` containing the current operation number and the " +"total number of operations in the journal. If **step** is 0, we are at " +"the top of the journal. If **step** equals **steps**, we are at the " +"bottom. Updating the PDF with anything other than undo or redo will " +"automatically remove all journal entries after the current one and the " +"new update will become the new last entry in the journal. The updates " +"corresponding to the removed journal entries will be permanently lost." +msgstr "" +"ジャーナル内の現在の操作番号と総操作数を含むタプル `(step, steps)` が返されます。**step** が 0 " +"の場合、ジャーナルの先頭にいることを示します。**step** が **steps** " +"と等しい場合、ジャーナルの末尾にいることを示します。undo または redo 以外の何かで PDF " +"を更新すると、現在のエントリ以降のすべてのジャーナルエントリが自動的に削除され、新しい更新がジャーナルの新しい最後のエントリになります。削除されたジャーナルエントリに対応する更新は永久に失われます。" + +#: ../../document.rst:1899 7351954a6f184a1b91e20ccf82194e9d +msgid "PDF only: Return the name of operation number *step.*" +msgstr "PDF のみ: 操作番号 *step* の操作名を返します。" + +#: ../../document.rst:1906 0030e540718549a2b02199f54a9803f3 +msgid "" +"PDF only: Show whether forward (\"redo\") and / or backward (\"undo\") " +"executions are possible from the current journal position." +msgstr "PDF のみ: 現在のジャーナル位置から前方(\"やり直し\")および後方(\"元に戻す\")の実行が可能かどうかを表示します。" + +#: ../../document.rst:1908 6365fcc2558a4a63abc8665e14a07de3 +msgid "" +"a dictionary `{\"undo\": bool, \"redo\": bool}`. The respective method is" +" available if its value is `True`." +msgstr "`{\"undo\": bool, \"redo\": bool}` 形式の辞書。各メソッドはその値が `True` の場合に利用可能です。" + +#: ../../document.rst:1915 889f26f9d92e48dc9c535ab16045bc6c +msgid "" +"PDF only: Revert (undo) the current step in the journal. This moves " +"towards the journal's top." +msgstr "PDF のみ: ジャーナル内の現在のステップを元に戻します(元に戻す)。これにより、ジャーナルの先頭に向かって移動します。" + +#: ../../document.rst:1922 7945421689274307a8689bf3deb0a153 +msgid "" +"PDF only: Re-apply (redo) the current step in the journal. This moves " +"towards the journal's bottom." +msgstr "PDF のみ:現在のステップをジャーナルのボトムに戻して再適用します。" + +#: ../../document.rst:1929 1cc46a714d7144beb37789a07730ec12 +msgid "PDF only: Save the journal to a file." +msgstr "PDF のみ:ジャーナルをファイルに保存します。" + +#: ../../document.rst:1931 a89dc948eea94659b2d4d21afcf75782 +msgid "" +"either a filename as string or a file object opened as \"wb\" (or an " +"`io.BytesIO()` object)." +msgstr "文字列としてのファイル名または \"wb\" で開かれたファイルオブジェクト (または `io.BytesIO()` オブジェクト)。" + +#: ../../document.rst:1938 d7214c5f077d480d8ee2e10beb2b36f9 +msgid "" +"PDF only: Load journal from a file. Enables journalling for the document." +" If journalling is already enabled, an exception is raised." +msgstr "" +"PDF " +"のみ:ファイルからジャーナルを読み込みます。ドキュメントのジャーナリングを有効にします。既にジャーナリングが有効になっている場合、例外が発生します。" + +#: ../../document.rst:1940 ec37c90326cc4e7fa50fd0849adddb51 +msgid "" +"the filename (str) of the journal or a file object opened as \"rb\" (or " +"an `io.BytesIO()` object)." +msgstr "ジャーナルのファイル名 (str) または \"rb\" で開かれたファイルオブジェクト (または `io.BytesIO()` オブジェクト)。" + +#: ../../document.rst:1947 40a6123027074b8484b5e4701e8385ed +msgid "" +"PDF only: Saves a \"snapshot\" of the document. This is a PDF document " +"with a special, incremental-save format compatible with journalling -- " +"therefore no save options are available. Saving a snapshot is not " +"possible for new documents." +msgstr "" +"PDF のみ:ドキュメントの「スナップショット」を保存します。これは、ジャーナリングと互換性のある特別なインクリメンタルセーブ形式を持つ PDF " +"ドキュメントです。そのため、セーブオプションは利用できません。新しいドキュメントにはスナップショットを保存することはできません。" + +#: ../../document.rst:1949 a2b97da070ba4261906f3c43a837a36e +msgid "" +"This is a normal PDF document with no usage restrictions whatsoever. If " +"it is not being changed in any way, it can be used together with its " +"journal to undo / redo operations or continue updating." +msgstr "" +"これは通常の PDF " +"ドキュメントで、制限はありません。何も変更されていない場合、そのスナップショットはジャーナルと共に操作を元に戻したり、やり直したり、更新を続けたりするために使用できます。" + +#: ../../document.rst:1954 2431d2fb78d34d04a30cfc26a1ca2bcb +msgid "" +"Contains the first :ref:`Outline` entry of the document (or `None`). Can " +"be used as a starting point to walk through all outline items. Accessing " +"this property for encrypted, not authenticated documents will raise an " +"*AttributeError*." +msgstr "" +"ドキュメントの最初のアウトラインエントリを含みます (または " +"`None`)。すべてのアウトラインアイテムを歩く出発点として使用できます。暗号化された、認証されていないドキュメントに対してこのプロパティにアクセスすると、*AttributeError*" +" が発生します。" + +#: ../../document.rst 0425d6e823a049a58b3910948b859aac +#: 1a4c62f1f72547b28e337c09591fa4e3 22441097973c4c78a00bda60bb5b4b0a +#: 2a65fb9679e2422fa9a7e472a4674547 36df541fa177422fa6a1525f979fcd83 +#: 3c03be6185ec4eb1b121534a7f792853 82f9cd8d045845d0bdfd4f280aa0ad74 +#: 8c39c9ab02724cde8fbeac515bee65a0 932eb1e01abf4958907b24da9d2d0e16 +#: 9a6c05b88750437fa179d989e3edefcf a07df30a3dcf4033a2406acba7ee0b7d +#: a8cb1ffeb1f040fa8b730813e32f0122 ade9e281bfc64dc9b77c7e42a8f9f807 +#: b11dbf9a5df64369b2e3ae2e4491c99d b199035444494d29bd28c8629d429e4f +#: b603735f2cc64db598488fd2680cdc91 b961708a486a439faee6eb85cce60019 +#: c44492122ef943db8db2e8b9efd056e0 e46abe91945844eb8cc104c103211276 +#: edcee78110e840b4a187183ce9df3b5d f94ec6831ed84a0082a17f0c5455a30e +msgid "type" +msgstr "タイプ" + +#: ../../document.rst:1956 572eb577537b4bcb9d607b838aedc0ca +msgid ":ref:`Outline`" +msgstr "" + +#: ../../document.rst:1960 3873056a822545e3be2b362c2fe48a3c +msgid "" +"``False`` if document is still open. If closed, most other attributes and" +" methods will have been deleted / disabled. In addition, :ref:`Page` " +"objects referring to this document (i.e. created with " +":meth:`Document.load_page`) and their dependent objects will no longer be" +" usable. For reference purposes, :attr:`Document.name` still exists and " +"will contain the filename of the original document (if applicable)." +msgstr "" +"もし文書がまだ開いている場合は ``False`` " +"です。閉じた場合、他のほとんどの属性やメソッドが削除されたり無効になります。また、この文書を参照する :ref:`Page` " +"オブジェクト(すなわち :meth:`Document.load_page` " +"で作成されたもの)とそれに依存するオブジェクトは利用できなくなります。参照用に、:attr:`Document.name` " +"はまだ存在し、元の文書のファイル名が含まれます(該当する場合)。" + +#: ../../document.rst:1962 ../../document.rst:1968 ../../document.rst:1974 +#: ../../document.rst:1988 ../../document.rst:1996 ../../document.rst:2004 +#: ../../document.rst:2042 ../../document.rst:2048 +#: 0e0ecaeba1354583ab141694171983c1 15ecbd8f8b4e4f66838e84f048c23ac0 +#: 20ed8a1d74d64f768927618f4c8d5775 36c5bd4e96bb49b3974eb537d041b2b9 +#: 8b931470fbff4a78af465bc20f1b4bc5 ab529e3a588f4da89688c075525eca3b +#: b9b48ab1190d4eb0b0555c8b5504ff7e e204fe7e86144f0b89219e2564b70ef4 +msgid "bool" +msgstr "" + +#: ../../document.rst:1966 2500215d97e14534a9e2da08bbe6a2ea +msgid "" +"``True`` if this is a PDF document and contains unsaved changes, else " +"``False``." +msgstr "これがPDFドキュメントであり、保存されていない変更が含まれている場合は ``True`` 、それ以外は ``False`` です。" + +#: ../../document.rst:1972 1c51427200a443b3b3ca741816eab527 +msgid "``True`` if this is a PDF document, else ``False``." +msgstr "これがPDFドキュメントである場合は ``True`` 、それ以外は ``False`` です。" + +#: ../../document.rst:1978 01ec2efd01e14c3691724a5e542f728b +msgid "" +"``False`` if this is not a PDF or has no form fields, otherwise the " +"number of root form fields (fields with no ancestors)." +msgstr "" +"これがPDFでないか、フォームフィールドが含まれていない場合は ``False`` " +"であり、それ以外の場合はルートフォームフィールド(祖先のないフィールド)の数です。" + +#: ../../document.rst:1980 fb2835699f1a4d2e91cbeaca4ff3e396 +msgid "*(Changed in v1.16.4)* Returns the total number of (root) form fields." +msgstr "*(v1.16.4で変更)* (ルート)フォームフィールドの合計数を返します。" + +#: ../../document.rst:1982 2be18a9939df43299855a8423e99ec64 +msgid "bool,int" +msgstr "" + +#: ../../document.rst:1986 47fe05ac74a645bf862a453c4b9ef5ea +msgid "" +"``True`` if document has a variable page layout (like e-books or HTML). " +"In this case you can set the desired page dimensions during document " +"creation (open) or via method :meth:`layout`." +msgstr "" +"文書が可変ページレイアウト(電子書籍やHTMLのような)を持つ場合は ``True`` です。この場合、文書の作成(オープン)時または " +":meth:`layout` メソッドを使用して所望のページ寸法を設定できます。" + +#: ../../document.rst:1992 3305b89decd6493881920df4d4242e8d +msgid "New in v1.18.2" +msgstr "v1.18.2で新しく追加されました。" + +#: ../../document.rst:1994 126148a54fe24c0ea75ba68040b25524 +msgid "" +"``True`` if PDF has been repaired during open (because of major structure" +" issues). Always ``False`` for non-PDF documents. If true, more details " +"have been stored in `TOOLS.mupdf_warnings()`, and " +":meth:`Document.can_save_incrementally` will return ``False``." +msgstr "" +"PDFがオープン時に修復された場合は ``True`` です(主要な構造上の問題のため)。非PDF文書の場合は常に ``False`` " +"です。``True`` の場合、詳細は `TOOLS.mupdf_warnings()` " +"に保存され、:meth:`Document.can_save_incrementally` は ``False`` を返します。" + +#: ../../document.rst:2002 02a31170430a4f788d99c01c7f635162 +msgid "``True`` if PDF is in linearized format. ``False`` for non-PDF documents." +msgstr "PDFが直線化形式である場合は ``True`` です。非PDF文書の場合はFalseです。" + +#: ../../document.rst:2010 e99a659b3fa643b6accb313197214579 +msgid "" +"A dictionary indicating the `/MarkInfo` value. If not specified, the " +"empty dictionary is returned. If not a PDF, `None` is returned." +msgstr "`/MarkInfo` の値を示す辞書です。指定されていない場合、空の辞書が返されます。PDFでない場合は `None` が返されます。" + +#: ../../document.rst:2012 ../../document.rst:2076 +#: 0d6b84b0dcf64cc289c8dc99edf4b717 71735633a6684ca7b64ad390ad5a7b02 +msgid "dict" +msgstr "" + +#: ../../document.rst:2018 c22caa728e4e41e2bede8158057b0b5a +msgid "" +"A string containing the `/PageMode` value. If not specified, the default " +"\"UseNone\" is returned. If not a PDF, `None` is returned." +msgstr "" +"`/PageMode` の値を含む文字列です。指定されていない場合、デフォルトの「UseNone」が返されます。PDFでない場合、`None` " +"が返されます。" + +#: ../../document.rst:2020 ../../document.rst:2028 ../../document.rst:2082 +#: 9630ce9d52334f899eab242a03d0900f be6f4cd8d5ee452896e80446905aa219 +#: e213aab4508c414a8b7f18d6667d0332 +msgid "str" +msgstr "" + +#: ../../document.rst:2026 c7d730951aa94e158ce63b666f6b32d1 +msgid "" +"A string containing the `/PageLayout` value. If not specified, the " +"default \"SinglePage\" is returned. If not a PDF, `None` is returned." +msgstr "" +"`/PageLayout` " +"の値を含む文字列です。指定されていない場合、デフォルトの「SinglePage」が返されます。PDFでない場合、`None` が返されます。" + +#: ../../document.rst:2034 6bbf43e44f1c4939900b41773ff54c2f +msgid "" +"An integer counting the number of versions present in the document. Zero " +"if not a PDF, otherwise the number of incremental saves plus one." +msgstr "ドキュメント内に存在するバージョンの数をカウントする整数です。PDFでない場合、ゼロです。それ以外の場合、増分保存の数に1を加えたものです。" + +#: ../../document.rst:2036 ../../document.rst:2056 ../../document.rst:2088 +#: ../../document.rst:2096 ../../document.rst:2104 +#: 76db5832ec704f76968f4723517561f9 941e976c3d4242328e6907e519764906 +#: 9fcfff7a1a934e62872e49af7ebde2fd ad836f1a022c45c18d8eeb69062240dd +#: b1728f5baab047d0bc4679129588efa9 +msgid "int" +msgstr "" + +#: ../../document.rst:2040 fd94bdde1af14fcfa8e9e04928beafe4 +msgid "" +"Indicates whether the document is password-protected against access. This" +" indicator remains unchanged -- **even after the document has been " +"authenticated**. Precludes incremental saves if true." +msgstr "" +"ドキュメントがアクセス制限のあるパスワードで保護されているかどうかを示します。この指示は、**ドキュメントが認証された後も** 変更されません。 " +"`True` の場合、増分保存が不可能です。" + +#: ../../document.rst:2046 53fe029c3f12452dbaac145e9a48899a +msgid "" +"This indicator initially equals :attr:`Document.needs_pass`. After " +"successful authentication, it is set to ``False`` to reflect the " +"situation." +msgstr "" +"この指示は、最初に :attr:`Document.needs_pass` と等しいです。認証が成功した後、状況を反映するために " +"``False`` に設定されます。" + +#: ../../document.rst:2052 5199386a13e6471ba15ecc69930ed35c +msgid "" +"Changed in v1.16.0: This is now an integer comprised of bit indicators. " +"Was a dictionary previously." +msgstr "" + +#: ../../document.rst:2054 83c9a569a40e459383053082e7f988ec +msgid "" +"Contains the permissions to access the document. This is an integer " +"containing bool values in respective bit positions. For example, if " +"*doc.permissions & pymupdf.PDF_PERM_MODIFY > 0*, you may change the " +"document. See :ref:`PermissionCodes` for details." +msgstr "" +"アクセス許可を示します。これは、対応するビット位置にbool値を含む整数です。例えば、*doc.permissions & " +"pymupdf.PDF_PERM_MODIFY > 0* " +"の場合、ドキュメントを変更できます。詳細については、:ref:`PermissionCodes` を参照してください。" + +#: ../../document.rst:2060 d30b6615d8384ff88cd5d749383278f5 +msgid "" +"Contains the document's meta data as a Python dictionary or `None` (if " +"*is_encrypted=True* and *needPass=True*). Keys are *format*, " +"*encryption*, *title*, *author*, *subject*, *keywords*, *creator*, " +"*producer*, *creationDate*, *modDate*, *trapped*. All item values are " +"strings or `None`." +msgstr "" +"文書のメタデータをPythonの辞書形式で含んでいます。ただし、*is_encrypted=True* かつ *needPass=True* " +"の場合は `None` です。キーは " +"*format*、*encryption*、*title*、*author*、*subject*、*keywords*、*creator*、*producer*、*creationDate*、*modDate*、*trapped*" +" です。すべてのアイテムの値は文字列または `None` です。" + +#: ../../document.rst:2062 22a58e4fd9734362ae5a306ff4c5593c +msgid "" +"Except *format* and *encryption*, for PDF documents, the key names " +"correspond in an obvious way to the PDF keys */Creator*, */Producer*, " +"*/CreationDate*, */ModDate*, */Title*, */Author*, */Subject*, */Trapped* " +"and */Keywords* respectively." +msgstr "" +"*format* と *encryption* を除いて、PDF 文書の場合、キー名は明らかな方法で PDF キー " +"*/Creator*、*/Producer*、*/CreationDate*、*/ModDate*、*/Title*、*/Author*、*/Subject*、*/Trapped*、*/Keywords*" +" に対応しています。" + +#: ../../document.rst:2064 ad7c4d5d9fae4f0e898ac385bd7f8482 +msgid "*format* contains the document format (e.g. 'PDF-1.6', 'XPS', 'EPUB')." +msgstr "*format* には文書のフォーマット(例: 'PDF-1.6'、'XPS'、'EPUB')が含まれます。" + +#: ../../document.rst:2066 8dffab4742e1469397b2815626d806bf +msgid "" +"*encryption* either contains `None` (no encryption), or a string naming " +"an encryption method (e.g. *'Standard V4 R4 128-bit RC4'*). Note that an " +"encryption method may be specified **even if** *needs_pass=False*. In " +"such cases not all permissions will probably have been granted. Check " +":attr:`Document.permissions` for details." +msgstr "" +"*encryption* には `None` (暗号化なし)または暗号化メソッドを示す文字列(例: *'Standard V4 R4 " +"128-bit RC4'*) が含まれます。 *needs_pass=False* " +"の場合でも暗号化メソッドが指定される場合があります。その場合、すべての権限が付与されていない可能性があります。詳細は " +":attr:`Document.permissions` を確認してください。" + +#: ../../document.rst:2068 5f01390fc6d54c90b8256b82a6426d0a +msgid "" +"If the date fields contain valid data (which need not be the case at " +"all!), they are strings in the PDF-specific timestamp format " +"\"D:\", where" +msgstr "" +"日付フィールドに有効なデータが含まれている場合(必ずしもそうである必要はありません!)、それらは PDF 固有のタイムスタンプ形式 " +"\"D:\" の文字列です。" + +#: ../../document.rst:2070 86e562f458934d1c9608459d1de68ad0 +msgid "" +" is the 12 character ISO timestamp *YYYYMMDDhhmmss* (*YYYY* - year, " +"*MM* - month, *DD* - day, *hh* - hour, *mm* - minute, *ss* - second), and" +msgstr "" +" は 12 文字の ISO タイムスタンプ YYYYMMDDhhmmss(*YYYY* - 年、*MM* - 月、*DD* - " +"日、*hh* - 時、*mm* - 分、*ss* - 秒)であり、" + +#: ../../document.rst:2072 fcdfa6bbcaea4438a9be956c698c4c0e +msgid "" +" is a time zone value (time interval relative to GMT) containing a " +"sign ('+' or '-'), the hour (*hh*), and the minute (*'mm'*, note the " +"apostrophes!)." +msgstr "" +" は GMT に対する時刻間隔を示す符号('+' または " +"'-')、時間(*hh*)、および分(*'mm'*、アポストロフィに注意!)を含むタイムゾーン値です。" + +#: ../../document.rst:2074 0dc32bcac0904847ab80467d283ef68c +msgid "" +"A Paraguayan value might hence look like *D:20150415131602-04'00'*, which" +" corresponds to the timestamp April 15, 2015, at 1:16:02 pm local time " +"Asuncion." +msgstr "" +"したがって、パラグアイの値は *D:20150415131602-04'00'* となり、これは Asuncion の現地時間で 2015 年 4" +" 月 15 日午後 1 時 16 分 02 秒を表します。" + +#: ../../document.rst:2080 3a01420c50284cf9b138a814b97cc015 +msgid "" +"Contains the *filename* or *filetype* value with which *Document* was " +"created." +msgstr "*Document* が作成された *ファイル名* または *ファイルタイプ* の値を含みます。" + +#: ../../document.rst:2086 9dc57ac338cf4bb1922bd5c6ad91a2f0 +msgid "" +"Contains the number of pages of the document. May return 0 for documents " +"with no pages. Function `len(doc)` will also deliver this result." +msgstr "文書のページ数を含みます。ページがない文書の場合は 0 を返す場合があります。`len(doc)` 関数もこの結果を返します。" + +#: ../../document.rst:2092 ../../document.rst:2100 +#: 27898a87a5c14c2797cf0d9fef76b091 b2439eb54f6a41b9a4717f3fcbcc3676 +msgid "New in v1.17.0" +msgstr "v1.17.0 で新たに追加されました" + +#: ../../document.rst:2094 c077ffc6f78c4072bac3d07e3de18bbd +msgid "" +"Contains the number of chapters in the document. Always at least 1. " +"Relevant only for document types with chapter support (EPUB currently). " +"Other documents will return 1." +msgstr "" +"文書の章の数を含みます。常に少なくとも 1 です。章のサポートがある文書タイプ(現在は EPUB のみ)にのみ関連します。その他の文書は 1 " +"を返します。" + +#: ../../document.rst:2102 177e4df3b4274e7eb9fdeb47f94ebbed +msgid "" +"Contains (chapter, pno) of the document's last page. Relevant only for " +"document types with chapter support (EPUB currently). Other documents " +"will return `(0, page_count - 1)` and `(0, -1)` if it has no pages." +msgstr "" +"文書の最後のページの(章、pno)を含みます。章のサポートがある文書タイプ(現在は EPUB のみ)にのみ関連します。その他の文書は `(0, " +"page_count - 1)` と `(0, -1)` を返します。" + +#: ../../document.rst:2108 229315a297e647aeb533006453f78f80 +msgid "" +"A list of form field font names defined in the */AcroForm* object. `None`" +" if not a PDF." +msgstr "*/AcroForm* オブジェクトで定義されたフォームフィールドのフォント名のリストです。PDF でない場合は `None` です。" + +#: ../../document.rst:2110 59b7c20dd69b48cab5a65f4007b3ccd7 +msgid "list" +msgstr "" + +#: ../../document.rst:2112 2c90af6b351d4361997ec7e629d4b6f0 +msgid "" +"For methods that change the structure of a PDF (:meth:`insert_pdf`, " +":meth:`select`, :meth:`copy_page`, :meth:`delete_page` and others), be " +"aware that objects or properties in your program may have been " +"invalidated or orphaned. Examples are :ref:`Page` objects and their " +"children (links, annotations, widgets), variables holding old page " +"counts, tables of content and the like. Remember to keep such variables " +"up to date or delete orphaned objects. Also refer to " +":ref:`ReferenialIntegrity`." +msgstr "" +"PDFの構造を変更するメソッド " +"(:meth:`insert_pdf`、:meth:`select`、:meth:`copy_page`、:meth:`delete_page` " +"など)を使用する場合、プログラム内のオブジェクトやプロパティが無効化されたり孤立したりする可能性があることに注意してください。これには " +":ref:`Page` " +"オブジェクトとその子要素(リンク、注釈、ウィジェット)、古いページ数を保持する変数、目次などが含まれます。このような変数を最新の情報に保つか、孤立したオブジェクトを削除することを忘れないでください。:ref:`ReferenialIntegrity`" +" も参照してください。" + +#: ../../document.rst:2115 de4984bdc6ed427aaeabcf57d6cd7207 +msgid ":meth:`set_metadata` Example" +msgstr ":meth:`set_metadata` の例" + +#: ../../document.rst:2116 79432e081e244f6c8b722e43ff9b7245 +msgid "" +"Clear metadata information. If you do this out of privacy / data " +"protection concerns, make sure you save the document as a new file with " +"*garbage > 0*. Only then the old */Info* object will also be physically " +"removed from the file. In this case, you may also want to clear any XML " +"metadata inserted by several PDF editors:" +msgstr "" +"メタデータ情報をクリアします。プライバシー/データ保護の理由からこれを行う場合は、*ガベージ > 0* " +"でドキュメントを新しいファイルとして保存してから古い */Info* " +"オブジェクトもファイルから物理的に削除されることを確認してください。この場合、いくつかのPDFエディタによって挿入されたXMLメタデータもクリアすることを検討するかもしれません。" + +#: ../../document.rst:2134 6a3b7a5cd55a41f59c8dc713dd50a750 +msgid ":meth:`set_toc` Demonstration" +msgstr ":meth:`set_toc` のデモンストレーション" + +#: ../../document.rst:2135 8382f77c041647f1b4d870c36908eabc +msgid "" +"This shows how to modify or add a table of contents. Also have a look at " +"`import.py `_ and `export.py " +"`_ in the examples directory." +msgstr "" +"これは、目次を変更または追加する方法を示しています。また、examplesディレクトリにある `import.py " +"`_ と `export.py `_ " +"も参照してみてください。" + +#: ../../document.rst:2155 d2292f38b140476b980712ef37997a9e +msgid ":meth:`insert_pdf` Examples" +msgstr ":meth:`insert_pdf` の例" + +#: ../../document.rst:2156 6b959bd7cea34677878c2aea2d404c28 +msgid "**(1) Concatenate two documents including their TOCs:**" +msgstr "**(1)2つの文書を連結して、それぞれの目次を含める:**" + +#: ../../document.rst:2168 0ff20ee002e14e60bcd4138a25d8cf41 +msgid "" +"Obviously, similar ways can be found in more general situations. Just " +"make sure that hierarchy levels in a row do not increase by more than " +"one. Inserting dummy bookmarks before and after *toc2* segments would " +"heal such cases. A ready-to-use GUI (wxPython) solution can be found in " +"script `join.py `_ of the examples " +"directory." +msgstr "" +"明らかに、より一般的な状況についても同様の方法が見つかるでしょう。連続するヒエラルキーレベルが1以上増加しないように注意してください。 " +"**toc2** " +"セグメントの前後にダミーのブックマークを挿入することで、そのような場合を修正できます。すぐに使用できるGUI(wxPython)の解決策は、examplesディレクトリのスクリプト" +" `join.py `_ で見つけることができます。" + +#: ../../document.rst:2170 4f4610a44e0c4e7e817696b6550ea5d6 +msgid "**(2) More examples:**" +msgstr "**(2)その他の例:**" + +#: ../../document.rst:2182 47d56d0345d64fab86db840c51a5379a +msgid "Other Examples" +msgstr "他の例" + +#: ../../document.rst:2183 7faf6f3e99f0428498f9bce29d851f8c +msgid "**Extract all page-referenced images of a PDF into separate PNG files**::" +msgstr "**PDF内のすべてのページ参照画像を個別のPNGファイルに抽出します**::" + +#: ../../document.rst:2198 1bb439a9b34d45008b0859967d3af58f +msgid "**Rotate all pages of a PDF:**" +msgstr "**PDFのすべてのページを回転させます:**" + +#: ../../document.rst:2203 9b472940853140af99232c7023957677 +msgid "Footnotes" +msgstr "脚注" + +#: ../../document.rst:2204 07f097f7dd634f1f8949fb889cf944ec +msgid "" +"Content streams describe what (e.g. text or images) appears where and how" +" on a page. PDF uses a specialized mini language similar to PostScript to" +" do this (pp. 643 in :ref:`AdobeManual`), which gets interpreted when a " +"page is loaded." +msgstr "" +"コンテンツストリームは、ページ上に何が表示され、どのように表示されるかを説明します。PDFは、ページが読み込まれると解釈される、PostScriptに似た専門のミニ言語を使用しています(:ref:`AdobeManual`" +" のpp. 643)。" + +#: ../../document.rst:2206 75d56f01fa284bfe8e248978b604f238 +msgid "" +"However, you **can** use :meth:`Document.get_toc` and " +":meth:`Page.get_links` (which are available for all document types) and " +"copy this information over to the output PDF. See demo `convert.py " +"`_." +msgstr "" +"ただし、:meth:`Document.get_toc` と :meth:`Page.get_links` " +"(すべてのドキュメントタイプで利用可能)を使用して、この情報を出力PDFにコピーできます。デモ `convert.py " +"`_ を参照してください。" + +#: ../../document.rst:2208 a10321538c664b93bdf23fb77a5eade0 +msgid "" +"For applicable (EPUB) document types, loading a page via its absolute " +"number may result in layouting a large part of the document, before the " +"page can be accessed. To avoid this performance impact, prefer chapter-" +"based access. Use convenience methods and attributes " +":meth:`Document.next_location`, :meth:`Document.prev_location` and " +":attr:`Document.last_location` for maintaining a high level of coding " +"efficiency." +msgstr "" +"該当する(EPUB)ドキュメントタイプの場合、絶対番号でページを読み込むと、ページにアクセスする前にドキュメントの大部分のレイアウトが発生する場合があります。このパフォーマンスへの影響を避けるために、章ベースのアクセスを優先します。:meth:`Document.prev_location`、:attr:`Document.last_location`、:attr:`Document.last_location`" +" といった便利なメソッドや属性を使用して、高いコーディング効率を維持してください。" + +#: ../../document.rst:2210 cbc3cc03287e46448f98fa53d57a4df4 +msgid "" +"These parameters cause separate handling of stream categories: use it " +"together with `expand` to restrict decompression to streams other than " +"images / fontfiles." +msgstr "" +"これらのパラメータはストリームカテゴリの別々の処理を引き起こします:これらを `expand` " +"と一緒に使用して、画像やフォントファイル以外のストリームの解凍を制限できます。" + +#: ../../document.rst:2212 4e0731637c314114adc2120b29c55fbd +msgid "Examples for \"Form XObjects\" are created by :meth:`Page.show_pdf_page`." +msgstr "「Form XObjects」の例は、:meth:`Page.show_pdf_page` によって作成されます。" + +#: ../../document.rst:2214 f2e0dd8b194144bcbd4eea4a4505612e +msgid "" +"For a ``False`` the **complete document** must be scanned. Both methods " +"**do not load pages,** but only scan object definitions. This makes them " +"at least 10 times faster than application-level loops (where total " +"response time roughly equals the time for loading all pages). For the " +":ref:`AdobeManual` (756 pages) and the Pandas documentation (over 3070 " +"pages) -- both have no annotations -- the method needs about 11 ms for " +"the answer ``False``. So response times will probably become significant " +"only well beyond this order of magnitude." +msgstr "" +"``False`` の場合、完全なドキュメントをスキャンする必要があります。ただし、これらのメソッドは " +"**ページを読み込まず**、オブジェクト定義のみをスキャンします。これにより、アプリケーションレベルのループよりも少なくとも10倍高速になります(合計応答時間はすべてのページを読み込む時間とほぼ同じです)。:ref:`AdobeManual`(756ページ)とPandasのドキュメンテーション(3070ページ以上)の両方に注釈がない場合、このメソッドは" +" ``False`` " +"の回答に約11ミリ秒かかります。したがって、応答時間はおそらくこのオーダーオブマグニチュードをはるかに超えた範囲で初めて重要になる可能性があります。" + +#: ../../document.rst:2216 84720f80e51e4100b6e77f8e63c61312 +msgid "" +"This only works under certain conditions. For example, if there is normal" +" text covered by some image on top of it, then this is undetectable and " +"the respective text is **not** removed. Similar is true for white text on" +" white background, and so on." +msgstr "" +"これは特定の条件下でのみ機能します。たとえば、上に何らかの画像が覆っている通常のテキストがある場合、これは検出不可能で、該当するテキストは削除 " +"**されません**。同様に、白い背景の白いテキストなども同様です。" + +#: ../../footer.rst:60 8460d844a4eb409a858707466cd97c8c +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "a list of lists. Each entry has" +#~ " the form *[lvl, title, page, dest]*." +#~ " Its entries have the following " +#~ "meanings: * *lvl* -- hierarchy level" +#~ " (positive *int*). The first entry is" +#~ " always 1. Entries in a row are" +#~ " either **equal**, **increase** by 1, " +#~ "or **decrease** by any number. * " +#~ "*title* -- title (*str*) * *page* " +#~ "-- 1-based page number (*int*). If " +#~ "`-1` either no destination or outside" +#~ " document. * *dest* -- (*dict*) " +#~ "included only if *simple=False*. Contains " +#~ "details of the TOC item as " +#~ "follows: - kind: destination kind, " +#~ "see :ref:`linkDest Kinds`. - file: " +#~ "filename if kind is :data:`LINK_GOTOR` " +#~ "or :data:`LINK_LAUNCH`. - page: target " +#~ "page, 0-based, :data:`LINK_GOTOR` or " +#~ ":data:`LINK_GOTO` only. - to: position " +#~ "on target page (:ref:`Point`). - zoom:" +#~ " (float) zoom factor on target page." +#~ " - xref: :data:`xref` of the item" +#~ " (0 if no PDF). - color: item" +#~ " color in PDF RGB format `(red, " +#~ "green, blue)`, or omitted (always " +#~ "omitted if no PDF). - bold: true" +#~ " if bold item text or omitted. " +#~ "PDF only. - italic: true if " +#~ "italic item text, or omitted. PDF " +#~ "only. - collapse: true if sub-" +#~ "items are folded, or omitted. PDF " +#~ "only." +#~ msgstr "" + +#~ msgid "Only available in PyMuPDF's \"rebased\" implementation." +#~ msgstr "" + +#~ msgid "arg int xref" +#~ msgstr "" + +#~ msgid "arg bool info_only" +#~ msgstr "" + +#~ msgid "arg bool named" +#~ msgstr "" + +#~ msgid "rtype" +#~ msgstr "" + +#~ msgid "tuple,dict" +#~ msgstr "" + +#~ msgid "returns" +#~ msgstr "" + +#~ msgid "New in v1.18.7, changed in v1.18.9" +#~ msgstr "v1.18.7 で新規追加、v1.18.9 で変更" + +#~ msgid "" +#~ "Package `fontTools `_" +#~ " **must be installed**. It is " +#~ "required for creating the font subsets." +#~ " If not installed, the method raises" +#~ " an `ImportError` exception." +#~ msgstr "" +#~ "`fontTools `_ パッケージが" +#~ " **インストールされている必要があります**。フォントサブセットの作成に必要です。インストールされていない場合、メソッドは " +#~ "`ImportError` 例外を発生させます。" + +#~ msgid "" +#~ "Supported font types only include " +#~ "embedded OTF, TTF and WOFF that " +#~ "are **not already subsets**." +#~ msgstr "" +#~ "サポートされているフォントタイプには、埋め込まれた OTF、TTF、および WOFF " +#~ "のみが含まれ、**すでにサブセットであるものは含まれません** 。" + +#~ msgid "" +#~ "**Changed in v1.18.9:** A subset font" +#~ " directly replaces its original -- " +#~ "text remains untouched and **is not " +#~ "rewritten.** It thus should retain all" +#~ " its properties, like spacing, hiddenness," +#~ " control by Optional Content, etc." +#~ msgstr "" +#~ "**v1.18.9** で変更: サブセットフォントはその元のフォントを直接置換します - " +#~ "テキストは変更されず、**書き直されません**。したがって、スペーシング、非表示、オプションのコンテンツによる制御など、すべてのプロパティが保持されるはずです。" + +#~ msgid "" +#~ "The greatest benefit can be achieved " +#~ "when creating new PDFs using large " +#~ "fonts like is typical for Asian " +#~ "scripts. In these cases, the set " +#~ "of actually used unicodes mostly is " +#~ "small compared to the number of " +#~ "glyphs in the font. Using this " +#~ "feature can easily reduce the embedded" +#~ " font binary by two orders of " +#~ "magnitude -- from several megabytes to" +#~ " a low two-digit kilobyte amount." +#~ msgstr "" +#~ "最大の利点は、アジアの文字に典型的な大きなフォントを使用して新しい PDF " +#~ "を作成する場合に得られます。このような場合、実際に使用されているユニコードのセットは、フォント内のグリフの数と比較して非常に小さいことが多いです。この機能を使用すると、埋め込まれたフォントのバイナリサイズを容易に2桁のキロバイト単位にまで削減できます" +#~ " - 数メガバイトから低二桁キロバイトの量にまで。" + +#~ msgid "" +#~ "a list of images **referenced** by " +#~ "this page. Each item looks like " +#~ "`(xref, smask, width, height, bpc, " +#~ "colorspace, alt. colorspace, name, filter, " +#~ "referencer)` Where * *:data:`xref`* " +#~ "(*int*) is the image object number " +#~ "* **smask** (*int*) is the object " +#~ "number of its soft-mask image *" +#~ " **width** and **height** (*ints*) are " +#~ "the image dimensions * **bpc** (*int*)" +#~ " denotes the number of bits per " +#~ "component (normally 8) * **colorspace** " +#~ "(*str*) a string naming the colorspace" +#~ " (like **DeviceRGB**) * **alt. " +#~ "colorspace** (*str*) is any alternate " +#~ "colorspace depending on the value of " +#~ "**colorspace** * **name** (*str*) is " +#~ "the symbolic name by which the " +#~ "image is referenced * **filter** " +#~ "(*str*) is the decode filter of " +#~ "the image (:ref:`AdobeManual`, pp. 22). " +#~ "* **referencer** (*int*) the :data:`xref` " +#~ "of the referencer. Zero if directly " +#~ "referenced by the page. Only present " +#~ "if *full=True*." +#~ msgstr "" + +#~ msgid "" +#~ "`(xref, smask, width, height, bpc, " +#~ "colorspace, alt. colorspace, name, filter, " +#~ "referencer)`" +#~ msgstr "" + +#~ msgid "**width** and **height** (*ints*) are the image dimensions" +#~ msgstr "**width** と **height** (*int*)は画像の寸法です" + +#~ msgid "arg int idx: index of the item in list :meth:`Document.get_toc`." +#~ msgstr "引数 int idx: :meth:`Document.get_toc` リスト内のアイテムのインデックス。" + +#~ msgid "" +#~ "Changed in v1.19.3 - as a fix " +#~ "to issue `#537 " +#~ "`_, form " +#~ "fields are always excluded." +#~ msgstr "" +#~ "v1.19.3で変更 - 問題# `#537 " +#~ "`_ " +#~ "の修正として、フォームフィールドは常に除外されます。" + +#~ msgid "" +#~ "This is a page-based method. " +#~ "Document-level information of source " +#~ "documents is therefore ignored. Examples " +#~ "include Optional Content, Embedded Files, " +#~ "`StructureElem`, `AcroForm`, table of " +#~ "contents, page labels, metadata, named " +#~ "destinations (and other named entries) " +#~ "and some more. As a consequence, " +#~ "specifically, **Form Fields (widgets) can " +#~ "never be copied** -- although they " +#~ "seem to appear on pages only. Look" +#~ " at :meth:`Document.bake` for converting a" +#~ " source document if you need to " +#~ "retain at least widget **appearances.**" +#~ msgstr "" +#~ "これはページベースのメソッドです。そのため、ソース文書のドキュメントレベルの情報は無視されます。例には、オプショナルコンテンツ、埋め込みファイル、" +#~ " `StructureElem` 、 `AcroForm` " +#~ "、目次、ページラベル、メタデータ、名前付き目的地(および他の名前付きエントリ)などがあります。その結果、具体的にはフォームフィールド(ウィジェット)はコピーできません。" +#~ " - たとえそれらがページ上に表示されているように見える場合でも。ウィジェットの **外観** " +#~ "を少なくとも保持する必要がある場合は、:meth:`Document.bake` をご覧ください。" + +#~ msgid "the created page object." +#~ msgstr "作成されたページオブジェクト。" + +#~ msgid "" +#~ "a dictionary with the following keys:" +#~ " * *name* -- (*str*) name under " +#~ "which this entry is stored * " +#~ "*filename* -- (*str*) filename * " +#~ "*ufilename* -- (*unicode*) filename * " +#~ "*desc* -- (*str*) description * *size*" +#~ " -- (*int*) original file size * " +#~ "*length* -- (*int*) compressed file " +#~ "length * *creationDate* -- *(New in " +#~ "v1.18.13)* (*str*) date-time of item " +#~ "creation in PDF format * *modDate* " +#~ "-- *(New in v1.18.13)* (*str*) date-" +#~ "time of last change in PDF format" +#~ " * *collection* -- *(New in " +#~ "v1.18.13)* (*int*) :data:`xref` of the " +#~ "associated PDF portfolio item if any," +#~ " else zero. * *checksum* -- *(New " +#~ "in v1.18.13)* (*str*) a hashcode of " +#~ "the stored file content as a " +#~ "hexadecimal string. Should be MD5 " +#~ "according to PDF specifications, but be" +#~ " prepared to see other hashing " +#~ "algorithms." +#~ msgstr "" + +#~ msgid "New in v1.14.16" +#~ msgstr "v1.14.16 で新規" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/faq.mo b/docs/locales/ja/LC_MESSAGES/faq.mo new file mode 100644 index 000000000..8f61a3638 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/faq.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/faq.po b/docs/locales/ja/LC_MESSAGES/faq.po new file mode 100644 index 000000000..a30fc8c8a --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/faq.po @@ -0,0 +1,59 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 fc37bbaea9e645f2a1934950c4d360aa +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 3e2f00868cab4dc9b3f9c47206f4e0e1 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 dc5cbb9888564cd2904f297d4612db6a +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../faq.rst:7 34a4b295156e4a8b80a33df4e0d989fe +msgid "FAQ" +msgstr "" + +#: ../../faq.rst:9 42099b86e20e4e99a1843a506f01916e +msgid "A collection of recipes in “How-To” format for using PyMuPDF." +msgstr "" + +#: ../../faq.rst:12 b75419af6b7a47d9900bf518ab5e1ad3 +msgid "Please see:" +msgstr "" + +#: ../../faq.rst:14 478d3b75ae384ae8b14fb10fdd9b39fc +msgid ":ref:`Recipes: Table of Contents`" +msgstr "" + +#: ../../footer.rst:60 ebceb100878042a6ade4b6d7432a599e +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/font.mo b/docs/locales/ja/LC_MESSAGES/font.mo new file mode 100644 index 000000000..c3596dbbc Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/font.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/font.po b/docs/locales/ja/LC_MESSAGES/font.po new file mode 100644 index 000000000..1282eb244 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/font.po @@ -0,0 +1,1101 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 45a7d290afb143ae889044cf4a7e6d0a +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 00c23289844444afbfd2c89d7559321f +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 ab3ce3d73853429f893c65557831d844 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../font.rst:7 f02283a943e74b869bf944c3b38dab4b +msgid "Font" +msgstr "Font (フォント)" + +#: ../../font.rst:9 4b6a50a7ca01495fb1d18526d4a352a4 +msgid "New in v1.16.18" +msgstr "新機能:v1.16.18" + +#: ../../font.rst:11 ddeb945b793d49fab4dad50fd160c376 +msgid "" +"This class represents a font as defined in |MuPDF| (``fz_font_s`` structure)." +" It is required for the new class :ref:`TextWriter` and the new " +":meth:`Page.write_text`. Currently, it has no connection to how fonts are" +" used in methods :meth:`Page.insert_text` or :meth:`Page.insert_textbox`," +" respectively." +msgstr "" +"このクラスは、|MuPDF| で定義されたフォント( ``fz_font_s`` 構造体)を表します。これは、新しいクラス :ref:`TextWriter` " +"と新しい :meth:`Page.write_text` に必要であり、現在、:meth:`Page.insert_text` または " +":meth:`Page.insert_textbox` メソッドでフォントがどのように使用されているかとは関係ありません。" + +#: ../../font.rst:13 9d4c0b8b63e84f38b96afda6417986ff +msgid "" +"A ``Font`` object also contains useful general information, like the font " +"bbox, the number of defined glyphs, glyph names or the ``bbox`` of a single " +"glyph." +msgstr "``Font`` オブジェクトには、フォント ``bbox`` 、定義済みグリフの数、グリフ名、単一グリフの ``bbox`` など、有用な一般情報も含まれています。" + +#: ../../font.rst:17 7c72f65d296840899683ec8537ef1f7e +msgid "**Method / Attribute**" +msgstr "**メソッド/属性** " + +#: ../../font.rst:17 2dc504c945074db49451ff6cfe92ff06 +msgid "**Short Description**" +msgstr "**短い説明** " + +#: ../../font.rst:19 f59fd1b895564a42b724b588f4fc4535 +msgid ":meth:`~Font.glyph_advance`" +msgstr "" + +#: ../../font.rst:19 290af01ec4284c0a9459d29faf19c241 +msgid "Width of a character" +msgstr "文字の幅" + +#: ../../font.rst:20 d36df8d2c1534221865b7ee8c26d9a16 +msgid ":meth:`~Font.glyph_bbox`" +msgstr "" + +#: ../../font.rst:20 fde6283ce67f4c1ba76993b42ebb87e0 +msgid "Glyph rectangle" +msgstr "グリフの矩形" + +#: ../../font.rst:21 19c7f97bcc804101a1599f5d955a26c2 +msgid ":meth:`~Font.glyph_name_to_unicode`" +msgstr "" + +#: ../../font.rst:21 dfd0653dbbe14fbbb28215a00cef7ff7 +msgid "Get unicode from glyph name" +msgstr "グリフ名からUnicodeを取得" + +#: ../../font.rst:22 57f5b88c0c8c456fb8b818b59e822404 +msgid ":meth:`~Font.has_glyph`" +msgstr "" + +#: ../../font.rst:22 8768dbc2180b49648d5b8e8a35fe20e2 +msgid "Return glyph id of unicode" +msgstr "UnicodeのグリフIDを返す" + +#: ../../font.rst:23 25ac0c185f98441a97f85b1852974e5e +msgid ":meth:`~Font.text_length`" +msgstr "" + +#: ../../font.rst:23 cafb4c8e9f7145ff999b1c2859f5b5f6 +msgid "Compute string length" +msgstr "文字列の長さを計算" + +#: ../../font.rst:24 6426a083aaeb46909ac368aad5baa0be +msgid ":meth:`~Font.char_lengths`" +msgstr "" + +#: ../../font.rst:24 ae55279d05ff45639ca0126f6f5094d5 +msgid "Tuple of char widths of a string" +msgstr "文字列の文字幅のタプル" + +#: ../../font.rst:25 ec26b63e7eef445e9a0c1bd624fdc888 +msgid ":meth:`~Font.unicode_to_glyph_name`" +msgstr "" + +#: ../../font.rst:25 7c7807d100b04dd5941a20428f67a254 +msgid "Get glyph name of a unicode" +msgstr "Unicodeのグリフ名を取得" + +#: ../../font.rst:26 d926f8b02f4546d398cc79a470067477 +msgid ":meth:`~Font.valid_codepoints`" +msgstr "" + +#: ../../font.rst:26 6e2dfdf539a441da80c05f9704ab9444 +msgid "Array of supported unicodes" +msgstr "サポートされているUnicodeの配列" + +#: ../../font.rst:27 89d173d419b64155bd0aca6a425584e7 +msgid ":attr:`~Font.ascender`" +msgstr "" + +#: ../../font.rst:27 9c4238ddaced4bc3959d5546e0286306 +msgid "Font ascender" +msgstr "フォントのアセンダ" + +#: ../../font.rst:28 78b7d9e180f9485db69b51a5463d3571 +msgid ":attr:`~Font.descender`" +msgstr "" + +#: ../../font.rst:28 3d6f9666a2374daaa025f778a56be258 +msgid "Font descender" +msgstr "フォントのディセンダ" + +#: ../../font.rst:29 f42bed3ccae74e1085cc1506360d79c3 +msgid ":attr:`~Font.bbox`" +msgstr "" + +#: ../../font.rst:29 2dd21372d0b949b49f4e17dfcb4f4496 +msgid "Font rectangle" +msgstr "フォントの矩形" + +#: ../../font.rst:30 71b14ffa01ac43bd8ddbdaadbb0f1cfd +msgid ":attr:`~Font.buffer`" +msgstr "" + +#: ../../font.rst:30 5bd92ce5ad94497587aec728309e49d4 +msgid "Copy of the font's binary image" +msgstr "フォントのバイナリイメージのコピー" + +#: ../../font.rst:31 8f58bdb27c0c4035b913b270b3e02b02 +msgid ":attr:`~Font.flags`" +msgstr "" + +#: ../../font.rst:31 8987d36d302546c48fccabf333e8eb66 +msgid "Collection of font properties" +msgstr "フォントのプロパティのコレクション" + +#: ../../font.rst:32 10176eb1e76c4953ade9fb0a7baba794 +msgid ":attr:`~Font.glyph_count`" +msgstr "" + +#: ../../font.rst:32 f8d7e052eea8479c94a7cd51b5d4950c +msgid "Number of supported glyphs" +msgstr "サポートされているグリフの数" + +#: ../../font.rst:33 ba08a8cfb6c54d82a80a4ad50de26d64 +msgid ":attr:`~Font.name`" +msgstr "" + +#: ../../font.rst:33 32b7c1e41541468b94da653810f5aa7c +msgid "Name of font" +msgstr "フォントの名前" + +#: ../../font.rst:34 97ea28e957384acbb39bb0ee05bf7011 +msgid ":attr:`~Font.is_bold`" +msgstr "" + +#: ../../font.rst:34 0da7a495eb3940be97b05d6ff9e51e70 +msgid "`True` if bold" +msgstr "" + +#: ../../font.rst:35 e1aaedf145dd4413b79daae1269069a9 +msgid ":attr:`~Font.is_monospaced`" +msgstr "" + +#: ../../font.rst:35 9a7f20b9c7704e88b54fbdb6151fff49 +msgid "`True` if mono-spaced" +msgstr "" + +#: ../../font.rst:36 c4f4f6778d144109be97c6b7bd799e3b +msgid ":attr:`~Font.is_serif`" +msgstr "" + +#: ../../font.rst:36 1110ef3babb845a5ab3659f7e61dc42f +msgid "`True` if serif, `False` if sans-serif" +msgstr "" + +#: ../../font.rst:37 b68dcb7818c24ba9a89b733b3ada1fb3 +msgid ":attr:`~Font.is_italic`" +msgstr "" + +#: ../../font.rst:37 dcde6d1781444230aefd0dd66f36e48d +msgid "`True` if italic" +msgstr "" + +#: ../../font.rst:41 055c88f482754327b0319159f5cd05c4 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../font.rst:60 941c9b4f031a4bb7ac7ed85363ad6dfc +msgid "" +"Font constructor. The large number of parameters are used to locate font," +" which most closely resembles the requirements. Not all parameters are " +"ever required -- see the below pseudo code explaining the logic how the " +"parameters are evaluated." +msgstr "" +"フォントコンストラクタ。多くのパラメータは、要件に最も近いフォントを特定するために使用されます。必ずしもすべてのパラメータが必要とされるわけではありません" +" - パラメータが評価されるロジックを説明する以下の疑似コードを参照してください。" + +#: ../../font.rst 038b1350c78e4af38ce27b2161a878ba +#: 11d859906804423d906939c95128b9f1 4dc8aea23a6f47f3a29859317d74fc91 +#: 65f80f24c66a48b78e43f93f741e3fda 8930cac6b6524339bbac6f8a39bb3547 +#: 8d8305aa574c4ab6b4e0f12053b08ee1 b7c7897014cf43d2bd2709757c71556b +#: e243e193bd1c4d8cbecdcdefb29dcd48 +msgid "Parameters" +msgstr "パラメータ" + +#: ../../font.rst:62 ce1b93a1069e401a8fc8fb883957f814 +msgid "" +"one of the :ref:`Base-14-Fonts` or CJK fontnames. Also possible are a " +"select few other names like (watch the correct spelling): \"Arial\", " +"\"Times\", \"Times Roman\". *(Changed in v1.17.5)* If you have " +"installed `pymupdf-fonts `_, " +"there are also new \"reserved\" fontnames available, which are listed in " +":attr:`fitz_fonts` and in the table further down." +msgstr "" + +#: ../../font.rst:62 7dc0e95a2250423fa26f074438f4acc6 +msgid "" +"one of the :ref:`Base-14-Fonts` or CJK fontnames. Also possible are a " +"select few other names like (watch the correct spelling): \"Arial\", " +"\"Times\", \"Times Roman\"." +msgstr "" +":ref:`Base-14-Fonts` またはCJKフォントの名前のいずれか。また、\"Arial\"、\"Times\"、\"Times " +"Roman\"などの選択的な名前も可能です。" + +#: ../../font.rst:64 6bec0e09402e4bb6845ddf5125491bb3 +msgid "*(Changed in v1.17.5)*" +msgstr "(v1.17.5で変更)" + +#: ../../font.rst:66 a2688702fcc44a058dbe4572e93e9c67 +msgid "" +"If you have installed `pymupdf-fonts `_, there are also new \"reserved\" fontnames available, which are" +" listed in :attr:`fitz_fonts` and in the table further down." +msgstr "" +"`pymupdf-fonts `_ " +"をインストールしている場合、:attr:`fitz_fonts` と下の表でリストされている新しい「予約済み」フォント名も利用可能です。" + +#: ../../font.rst:68 64c4dea0a26f459f9ecfaeb1987f3820 +msgid "the filename of a fontfile somewhere on your system [#f1]_." +msgstr "システムのどこかにあるフォントファイルのファイル名 [#f1]_。" + +#: ../../font.rst:69 ec5352b957ba406f8ab58c0737979e0f +msgid "a fontfile loaded in memory [#f1]_." +msgstr "メモリ内にロードされたフォントファイル [#f1]_。" + +#: ../../font.rst:70 71bc43be60c04dde9f99f96570fca0a8 +msgid "" +"the number of a UCDN script. Currently supported in PyMuPDF are numbers " +"24, and 32 through 35." +msgstr "" +"UCDN(Unicode Character Database " +"Number)のスクリプト番号です。現在、PyMuPDFでサポートされている番号は、24、および32から35です。" + +#: ../../font.rst:71 add40d2e69f447128313fbd4ead517a1 +msgid "" +"one of the values \"zh-Hant\" (traditional Chinese), \"zh-Hans\" " +"(simplified Chinese), \"ja\" (Japanese) and \"ko\" (Korean). Otherwise, " +"all ISO 639 codes from the subsets 1, 2, 3 and 5 are also possible, but " +"are currently documentary only." +msgstr "UCDNスクリプトの番号。現在、PyMuPDFでサポートされているのは24、および32から35の数値です" + +#: ../../font.rst:72 5e4b791b58be400b99c3fccfaae671ba +msgid "an alternative selector for one of the CJK fonts." +msgstr "CJKフォントの選択のための代替セレクタ。" + +#: ../../font.rst:73 33707866dd584b6993b3c60292e5e392 +msgid "look for a bold font." +msgstr "太字のフォントを探します。" + +#: ../../font.rst:74 e8c45f980fa04d4394e41fe116a45f3f +msgid "look for an italic font." +msgstr "イタリック体のフォントを探します。" + +#: ../../font.rst:75 27bccc7d1af94edead067542a7104885 +msgid "look for a serifed font." +msgstr "セリフ付きのフォントを探します。" + +#: ../../font.rst 4a4bfff89a5f4d888dfc93b55c8fc5e3 +#: 6c3e1ce9b68e4804a745f0940b33e0bd 6d5f19eef6ac42d8a6f06d709743c29a +#: 7af7d1f2dc8c4e89b9947286bc644cad 7dd50832351947f9986543d602236567 +#: 7e13176fe56b4278aa552ef5f2800e85 7e576e8f92594b329d5a3ebd0eae62b5 +#: a32ab6ce83104291bb24d7a7dce8d2e5 f08da17b5bd640e68aab31bf872ecb8b +msgid "Returns" +msgstr "戻り値" + +#: ../../font.rst:77 af29f25c30c749a99cadf0048bdb2132 +msgid "" +"a MuPDF font if successful. This is the overall sequence of checks to " +"determine an appropriate font: =========== " +"============================================================ Argument " +"Action =========== " +"============================================================ fontfile? " +"Create font from file, exception if failure. fontbuffer? Create font from" +" buffer, exception if failure. ordering>=0 Create universal font, always " +"succeeds. fontname? Create a Base-14 font, universal font, or font" +" provided by `pymupdf-fonts `_. See table below. =========== " +"============================================================" +msgstr "" + +#: ../../font.rst:77 199ff48cd818477fa38adf2a6a1cba35 +msgid "" +"a |MuPDF| font if successful. This is the overall sequence of checks to " +"determine an appropriate font:" +msgstr "成功した場合は |MuPDF| フォント。適切なフォントを特定するための全体のチェックのシーケンスは次のとおりです。" + +#: ../../font.rst:80 3718868448404e609fd00b39ed9b41b9 +msgid "Argument" +msgstr "引数" + +#: ../../font.rst:80 fa6a9cc31ca146e394069b04868d0946 +msgid "Action" +msgstr "アクション" + +#: ../../font.rst:82 8dd85a26111942b7844cf367fe830da3 +msgid "fontfile?" +msgstr "" + +#: ../../font.rst:82 3670d7b40da644d2a20a88832c83529c +msgid "Create font from file, exception if failure." +msgstr "ファイルからフォントを作成し、失敗した場合は例外をスローします。" + +#: ../../font.rst:83 31eb3c630783441790a421702a4035ca +msgid "fontbuffer?" +msgstr "" + +#: ../../font.rst:83 e3bcdda824a34730bae2ca3a3b1787d0 +msgid "Create font from buffer, exception if failure." +msgstr "バッファからフォントを作成し、失敗した場合は例外をスローします。" + +#: ../../font.rst:84 3acc4602197a42aeac8ae72bab1ed28c +msgid "ordering>=0" +msgstr "" + +#: ../../font.rst:84 df1bde02d3334adfb8e5268b0c5f3ae5 +msgid "Create universal font, always succeeds." +msgstr "ユニバーサルフォントを作成し、常に成功します。" + +#: ../../font.rst:85 c44248a623b7435ba06d5df0b1cba3a5 +msgid "fontname?" +msgstr "" + +#: ../../font.rst:85 50cd0fd1f1a240c2ac6592e4a8356720 +msgid "" +"Create a Base-14 font, universal font, or font provided by `pymupdf-fonts" +" `_. See table below." +msgstr "" +"ベース14フォント、ユニバーサルフォント、または `pymupdf-fonts `_ で提供されるフォントを作成します。下の表を参照してください。" + +#: ../../font.rst:92 2fa3ff4df5ae4bef8482f34c63767318 +msgid "" +"With the usual reserved names \"helv\", \"tiro\", etc., you will create " +"fonts with the expected names \"Helvetica\", \"Times-Roman\" and so on. " +"**However**, and in contrast to :meth:`Page.insert_font` and friends," +msgstr "" +"通常の予約済みの名前「helv」、「tiro」などを使用すると、期待どおりの名前「Helvetica」、「Times-" +"Roman」などのフォントが作成されます。**ただし** 、:meth:`Page.insert_font` などとは異なり、" + +#: ../../font.rst:94 765b0615bb4b44bb9cdcca9bf1e3657d +msgid "a font file will **always** be embedded in your PDF," +msgstr "フォントファイルはPDFに **常に** 埋め込まれます。" + +#: ../../font.rst:95 8e3f9ad2f0754e10b50ef42e6882653d +msgid "" +"Greek and Cyrillic characters are supported without needing the " +"*encoding* parameter." +msgstr "ギリシャ文字およびキリル文字は *エンコーディング* パラメータを必要とせずにサポートされます。" + +#: ../../font.rst:97 d3746cee61fc4490b1696ed40b1b739a +msgid "" +"Using *ordering >= 0*, or fontnames \"cjk\", \"china-t\", \"china-s\", " +"\"japan\" or \"korea\" will **always create the same \"universal\"** font" +" **\"Droid Sans Fallback Regular\"**. This font supports **all Chinese, " +"Japanese, Korean and Latin characters**, including Greek and Cyrillic. " +"This is a sans-serif font." +msgstr "" +"ordering >= " +"0を使用するか、フォント名「cjk」、「china-t」、「china-s」、「japan」、「korea」を使用すると、**常に同じ「汎用」**" +" フォント **「Droid Sans Fallback " +"Regular」が作成されます**。このフォントは、**中国語、日本語、韓国語、ラテン文字、ギリシャ文字、キリル文字を含むすべての文字をサポートしています**。これはサンセリフのフォントです。" + +#: ../../font.rst:99 fa0bc1c990854bbe800c94afd2368fec +msgid "" +"Actually, you would rarely ever need another sans-serif font than " +"**\"Droid Sans Fallback Regular\"**. **Except** that this font file is " +"relatively large and adds about 1.65 MB (compressed) to your PDF file " +"size. If you do not need CJK support, stick with specifying \"helv\", " +"\"tiro\" etc., and you will get away with about 35 KB compressed." +msgstr "" +"実際、通常、「Droid Sans Fallback " +"Regular」以外のサンセリフのフォントはほとんど必要ありません。ただし、このフォントファイルは比較的大きく、PDFファイルサイズを約1.65 " +"MB(圧縮)増加させます。CJKのサポートが必要でない場合は、「helv」、「tiro」などを指定し、約35 KB(圧縮)で済むでしょう。" + +#: ../../font.rst:101 12bbf85ce3a143c1baa646f805eef0ae +msgid "" +"If you **know** you have a mixture of CJK and Latin text, consider just " +"using `Font(\"cjk\")` because this supports everything and also " +"significantly (by a factor of up to three) speeds up execution: MuPDF " +"will always find any character in this single font and never needs to " +"check fallbacks." +msgstr "" +"CJKとラテン文字の混合テキストがあることを **知っている** 場合は、単に `Font(\"cjk\")` " +"を使用して、すべてをサポートし、実行を大幅に高速化します(最大3倍)。MuPDFは常にこの単一のフォントで任意の文字を見つけることができ、フォールバックをチェックする必要はありません。" + +#: ../../font.rst:103 2df673aa00fd4367849b86bc1d74524c +msgid "" +"But if you do use some other font, you will still automatically be able " +"to also write CJK characters: MuPDF detects this situation and silently " +"falls back to the universal font (which will then of course also be " +"embedded in your PDF)." +msgstr "ただし、他のフォントを使用する場合、CJK文字を書き込むことも自動的にできるようになります。MuPDFはこの状況を検出し、静かにユニバーサルフォントにフォールバックします(その場合、当然、PDFにも埋め込まれます)。" + +#: ../../font.rst:105 7b6e209cffc74b7fad6dc4636f926e62 +msgid "" +"*(New in v1.17.5)* Optionally, some new \"reserved\" fontname codes " +"become available if you install `pymupdf-fonts `_, `pip install pymupdf-fonts`. **\"Fira Mono\"** is a " +"mono-spaced sans font set and **FiraGO** is another non-serifed " +"\"universal\" font set which supports all Latin (including Cyrillic and " +"Greek) plus Thai, Arabian, Hewbrew and Devanagari -- but none of the CJK " +"languages. The size of a FiraGO font is only a quarter of the \"Droid " +"Sans Fallback\" size (compressed 400 KB vs. 1.65 MB) -- **and** it " +"provides the weights bold, italic, bold-italic -- which the universal " +"font doesn't." +msgstr "" +"*(v1.17.5で新登場)* オプションで、`pymupdf-fonts `_ をインストールすると、いくつかの新しい「予約済み」フォント名コードが利用可能になります。 **\"Fira Mono\"** " +"は等幅のサンセリフフォントセットで、**FiraGO** " +"はラテン文字(キリル文字とギリシャ文字を含む)およびタイ語、アラビア語、ヘブライ語、デーバナガリ語をサポートする別のサンセリフの「ユニバーサル」フォントセットですが、CJK言語はサポートしていません。" +" FiraGOフォントのサイズは「Droid Sans Fallback」のサイズの1/4だけです(圧縮400 KB対1.65 MB) " +"**そして** 、ユニバーサルフォントにはない太字、イタリック、太字イタリックのウェイトを提供しています。" + +#: ../../font.rst:107 c1be2c550ca24809865553529c177515 +msgid "" +"**\"Space Mono\"** is another nice and small mono-spaced font from Google" +" Fonts, which supports Latin Extended characters and comes with all 4 " +"important weights." +msgstr "" +"**\"Space Mono\"** " +"はGoogleフォントからのもう1つの素敵で小さな等幅フォントで、ラテン拡張文字をサポートし、すべての4つの重要なウェイトが含まれています。" + +#: ../../font.rst:109 fc4e7c8d7b1345a5b448e81e805bd919 +msgid "" +"The following table maps a fontname code to the corresponding font. For " +"the current content of the package please see its documentation:" +msgstr "次の表は、フォント名コードを対応するフォントにマッピングしています。パッケージの現在の内容については、そのドキュメンテーションを参照してください:" + +#: ../../font.rst:112 b889ba03a97d448d986fc5f2720caed3 +msgid "Code" +msgstr "コード" + +#: ../../font.rst:112 35986fd845e84ccd9ebdea627b3a9ef0 +msgid "Fontname" +msgstr "フォント名" + +#: ../../font.rst:112 c5a86c33966344d99f7bc3aae0f70063 +msgid "New in" +msgstr "バージョン" + +#: ../../font.rst:112 28486ebc0e8b4800a1515b75de64e9a0 +msgid "Comment" +msgstr "コメント" + +#: ../../font.rst:114 5975f7a315f94f9d9b7894669a7ee349 +msgid "figo" +msgstr "" + +#: ../../font.rst:114 d65d13d55b874331bad6061fdbef6775 +msgid "FiraGO Regular" +msgstr "FiraGO レギュラー" + +#: ../../font.rst:114 ../../font.rst:115 ../../font.rst:116 ../../font.rst:117 +#: ../../font.rst:118 ../../font.rst:119 26d9cc6aebfa45b2be5bd54f05c56b18 +#: 3040ba82d7ef462ab3a04477cab47ceb 4bf61b95a36144759a5e36fcd0f1a9e6 +#: 8706d012913c41be8e39a69cd17e39c3 a0a1d05da1e94e96a62e651b53345c88 +#: a5f443561f364257a287e8edb593735a +msgid "v1.0.0" +msgstr "" + +#: ../../font.rst:114 9b165705f2d341558fec266780a6637b +msgid "narrower than Helvetica" +msgstr "Helveticaよりも狭い" + +#: ../../font.rst:115 211b48b8f6f3408e93fb4f09c9449ed7 +msgid "figbo" +msgstr "" + +#: ../../font.rst:115 00a72ab5997c45d89df1965e844fb1f3 +msgid "FiraGO Bold" +msgstr "FiraGO ボールド" + +#: ../../font.rst:116 000695b068a44cbd9a61c736c1bcdb72 +msgid "figit" +msgstr "" + +#: ../../font.rst:116 f6f05fa199b64571878af54d8245a1ee +msgid "FiraGO Italic" +msgstr "FiraGO イタリック" + +#: ../../font.rst:117 b03c1dc714324e83bd9b5d2de9ad4982 +msgid "figbi" +msgstr "" + +#: ../../font.rst:117 952652ef389945488cf95b796a3a1861 +msgid "FiraGO Bold Italic" +msgstr "FiraGO ボールドイタリック" + +#: ../../font.rst:118 0116f71438124d9989d6ea49a5fd1566 +msgid "fimo" +msgstr "" + +#: ../../font.rst:118 76b7d29d7c46451ab59fa5244188ed6c +msgid "Fira Mono Regular" +msgstr "Fira Mono レギュラー" + +#: ../../font.rst:119 e11d14ed93f644bf982e55a17242dbc6 +msgid "fimbo" +msgstr "" + +#: ../../font.rst:119 7dee55b4c8dd41a79e7dd83f4fa861c7 +msgid "Fira Mono Bold" +msgstr "Fira Mono ボールド" + +#: ../../font.rst:120 b33ec499345447df987f1b62a7b3eaf8 +msgid "spacemo" +msgstr "" + +#: ../../font.rst:120 412f72424f444a7cbadc466883b8753a +msgid "Space Mono Regular" +msgstr "Space Mono レギュラー" + +#: ../../font.rst:120 ../../font.rst:121 ../../font.rst:122 ../../font.rst:123 +#: 478011a3994744bbbc22cc3b08a52067 4906594870fd4487adb13efaec08eff8 +#: 521a2f9df8d7483c8aaa5e53b196946d 5665f7b625724faca1986fb76c559b60 +msgid "v1.0.1" +msgstr "" + +#: ../../font.rst:121 527f17d70be440feae40cbe476ad4155 +msgid "spacembo" +msgstr "" + +#: ../../font.rst:121 c49ee8d9e6b443d4b3be145ca5d6c539 +msgid "Space Mono Bold" +msgstr "Space Mono ボールド" + +#: ../../font.rst:122 4ee2f27f573144f59995c3586644e7b6 +msgid "spacemit" +msgstr "" + +#: ../../font.rst:122 df22d2b346b74f86b67ecdc28f4ffac1 +msgid "Space Mono Italic" +msgstr "Space Mono イタリック" + +#: ../../font.rst:123 06a57a0bfbd74a8481fdfca90105a4b8 +msgid "spacembi" +msgstr "" + +#: ../../font.rst:123 3cd67ab978494a2bae0c0243ae39582a +msgid "Space Mono Bold-Italic" +msgstr "Space Mono ボールドイタリック" + +#: ../../font.rst:124 b0331ba0b3054a0397901aa4c96bf478 +msgid "math" +msgstr "" + +#: ../../font.rst:124 dc1ddcfec5f84021867c259de1c6ea60 +msgid "Noto Sans Math Regular" +msgstr "Noto Sans Math レギュラー" + +#: ../../font.rst:124 ../../font.rst:125 ../../font.rst:126 ../../font.rst:127 +#: 020b572272924f2a81a3b2e824d2faeb 42238c699282447483321234ad9ce526 +#: cd035e835512415886066a9125f42355 ea8e4e408be748f8ab94007cab6e22d9 +msgid "v1.0.2" +msgstr "" + +#: ../../font.rst:124 5c61040268a244389881f4949cf314b1 +msgid "math symbols" +msgstr "数学記号" + +#: ../../font.rst:125 f5a5ebfdcca541298f34b60ee28390b1 +msgid "music" +msgstr "" + +#: ../../font.rst:125 8745e4a55f3f424c9b574b67e05b2069 +msgid "Noto Music Regular" +msgstr "Noto Music レギュラー" + +#: ../../font.rst:125 e3a94e4cd04644bfa48e0218fef0f3a4 +msgid "musical symbols" +msgstr "音楽記号" + +#: ../../font.rst:126 a2b333213a984f9292bef2472e1246a6 +msgid "symbol1" +msgstr "" + +#: ../../font.rst:126 c65253d724fe4a6289b73915fb6f4bf1 +msgid "Noto Sans Symbols Regular" +msgstr "Noto Sans Symbols レギュラー" + +#: ../../font.rst:126 6415defd1a154e49a942c003bbe9d113 +msgid "replacement for \"symb\"" +msgstr "\"symb\"の代替" + +#: ../../font.rst:127 8a7da80fb27f4d56bb84feefe8db6cab +msgid "symbol2" +msgstr "" + +#: ../../font.rst:127 abe3fb2655fc4c46b662a55dc3adc67f +msgid "Noto Sans Symbols2 Regular" +msgstr "Noto Sans Symbols2 レギュラー" + +#: ../../font.rst:127 67d6442a109d41f19c95dd48a9409c1b +msgid "extended symbol set" +msgstr "拡張記号セット" + +#: ../../font.rst:128 956820c414f346bca4169f1cc46ccfbd +msgid "notos" +msgstr "" + +#: ../../font.rst:128 836c1fc44ce94633a2eaa2e39e1bec91 +msgid "Noto Sans Regular" +msgstr "Noto Sans レギュラー" + +#: ../../font.rst:128 ../../font.rst:129 ../../font.rst:130 ../../font.rst:131 +#: 19c2680e58814212977e43f045109461 5c57345720c8420096ef62549d2d4d9f +#: acba3f025d354a9e9d4ca9c1eaeeac3f e88155edf09f4e00b96b40a8d0e70a0b +msgid "v1.0.3" +msgstr "" + +#: ../../font.rst:128 9add5c5b9d724ca6bd570d51dea7fe24 +msgid "alternative to Helvetica" +msgstr "Helveticaの代替" + +#: ../../font.rst:129 e3edc7ce2dbb4817a489806822007126 +msgid "notosit" +msgstr "" + +#: ../../font.rst:129 1dcc7a561c8a4df3b0fadbd361096b04 +msgid "Noto Sans Italic" +msgstr "Noto Sans イタリック" + +#: ../../font.rst:130 9d99ba2971544e52a7a994df81f87493 +msgid "notosbo" +msgstr "" + +#: ../../font.rst:130 2fc18b4937a04ddbaea8d219036c7be9 +msgid "Noto Sans Bold" +msgstr "Noto Sans ボールド" + +#: ../../font.rst:131 a2234fbc502a4a6f9075109141b92389 +msgid "notosbi" +msgstr "" + +#: ../../font.rst:131 d6b460e091084e358994cb4b527827da +msgid "Noto Sans BoldItalic" +msgstr "Noto Sans ボールドイタリック" + +#: ../../font.rst:141 3d4f99e68bba4b33a57aeac0f30d03b0 +msgid "" +"Check whether the unicode ``chr`` exists in the font or (option) some " +"fallback font. May be used to check whether any \"TOFU\" symbols will " +"appear on output." +msgstr "" +"指定した Unicode 文字 ``chr`` " +"がフォント内または(オプションで)フォールバックフォント内に存在するかどうかを確認します。これは、「TOFU(豆腐)」と呼ばれる記号が出力に表示されるかどうかを確認するために使用できます。" + +#: ../../font.rst:143 a3adecbceff049f2b37fa49823f174cf +msgid "the unicode of the character (i.e. ``ord()``)." +msgstr "文字の Unicode 値(すなわち、``ord()`` 関数の結果)。" + +#: ../../font.rst:144 6b13453ecb9a402ea63c263c3974e3e6 +msgid "the language -- currently unused." +msgstr "言語(現在は未使用)。" + +#: ../../font.rst:145 87bb13172100438ca088967f065ecaf4 +msgid "the UCDN script number." +msgstr "UCDN(Unicode Common Locale Data Repository)スクリプト番号。" + +#: ../../font.rst:146 df5aa548e49d42dd94504ade51ff9646 +msgid "" +"*(new in v1.17.5)* perform an extended search in fallback fonts or " +"restrict to current font (default)." +msgstr "*(v1.17.5 で新たに追加)* フォールバックフォントでの拡張検索を実行するか、現在のフォントに制限するかを指定します(デフォルト)。" + +#: ../../font.rst:147 18154b310acb430c8ef8d4afb245b735 +msgid "*(changed in 1.17.7)* the glyph number. Zero indicates no glyph found." +msgstr "*(v1.17.7 で変更)* グリフ番号。ゼロはグリフが見つからないことを示します。" + +#: ../../font.rst:151 ebc279839deb4f7284104ac16ffb8233 +msgid "New in v1.17.5" +msgstr "v1.17.5 で新たに追加" + +#: ../../font.rst:153 806672a68fe64a8299035f8a10070dd2 +msgid "Return an array of unicodes supported by this font." +msgstr "このフォントがサポートする Unicode の配列を返します。" + +#: ../../font.rst:155 65c86097105948eabada290a3ba06897 +#, python-format +msgid "" +"an ``array.array`` [#f2]_ of length at most :attr:`Font.glyph_count`. I.e. " +"``chr()`` of every item in this array has a glyph in the font without using" +" fallbacks. This is an example display of the supported glyphs: >>> " +"import pymupdf >>> font = pymupdf.Font(\"math\") >>> vuc = " +"font.valid_codepoints() >>> for i in vuc: print(\"%04X %s (%s)\" % " +"(i, chr(i), font.unicode_to_glyph_name(i))) 0000 000D (CR) 0020 " +"(space) 0021 ! (exclam) 0022 \" (quotedbl) 0023 # (numbersign) 0024 $ " +"(dollar) 0025 % (percent) ... 00AC ¬ (logicalnot) 00B1 ± (plusminus) ... " +"21D0 ⇐ (arrowdblleft) 21D1 ⇑ (arrowdblup) 21D2 ⇒ (arrowdblright) 21D3 ⇓ " +"(arrowdbldown) 21D4 ⇔ (arrowdblboth) ... 221E ∞ (infinity) ..." +msgstr "" + +#: ../../font.rst:155 ecaff954703b475f8857d239246738f0 +msgid "" +"an ``array.array`` [#f2]_ of length at most :attr:`Font.glyph_count`. I.e. " +"``chr()`` of every item in this array has a glyph in the font without using" +" fallbacks. This is an example display of the supported glyphs:" +msgstr "" +":attr:`Font.glyph_count` 以上の長さを持つ ``array.array`` [#f2]_。つまり、この配列内の各アイテムの " +"``chr()`` は、フォールバックを使用せずにフォント内にグリフを持っていることを示します。以下はサポートされるグリフの例示です:" + +#: ../../font.rst:183 0556ec3680644e8ea88f8c4958c55890 +msgid "" +"This method only returns meaningful data for fonts having a CMAP " +"(character map, charmap, the `/ToUnicode` PDF key). Otherwise, this array" +" will have length 1 and contain zero only." +msgstr "" +"このメソッドは、CMAP(文字マップ、charmap、`/ToUnicode` PDF " +"キー)を持つフォントに対してのみ意味のあるデータを返します。それ以外の場合、この配列は長さ1で、ゼロのみを含みます。" + +#: ../../font.rst:192 977003f77d0945819e92c70f65fbd280 +msgid "Calculate the \"width\" of the character's glyph (visual representation)." +msgstr "文字のグリフ(視覚的な表現)の「幅」を計算します。" + +#: ../../font.rst:194 afe1cf924a86491792a061d549e652d4 +msgid "" +"the unicode number of the character. Use ``ord()``, not the character " +"itself. Again, this should normally work even if a character is not " +"supported by that font, because fallback fonts will be checked where " +"necessary." +msgstr "" +"文字のUnicode番号。文字自体ではなく、``ord()`` " +"を使用します。文字がそのフォントでサポートされていない場合でも、必要に応じてフォールバックフォントがチェックされるため、通常は機能するはずです。" + +#: ../../font.rst:195 334621872cd84ea6ac7b451fe8ea32aa +msgid "write mode, ``0`` = horizontal, ``1`` = vertical." +msgstr "書き込みモード、 ``0`` = 横方向、 ``1`` = 縦方向。" + +#: ../../font.rst:197 928ef35ea33748988d53e0117ab7c79d +msgid "The other parameters are not in use currently." +msgstr "その他のパラメータは現在使用されていません。" + +#: ../../font.rst:199 5241fa12af7442538a664176d250293c +msgid "a float representing the glyph's width relative to **fontsize 1**." +msgstr "**フォントサイズ** 1に対するグリフの幅を表す浮動小数点数。" + +#: ../../font.rst:203 c2d509855a694eb4a75f07ca6581bd9b +msgid "" +"Return the unicode value for a given glyph name. Use it in conjunction " +"with `chr()` if you want to output e.g. a certain symbol." +msgstr "指定されたグリフ名のUnicode値を返します。特定のシンボルを出力したい場合は、`chr()` と組み合わせて使用します。" + +#: ../../font.rst:205 3c10db60e1374fec881815e9d3754bf4 +msgid "The name of the glyph." +msgstr "グリフの名前。" + +#: ../../font.rst:207 84cca9708fcf42f0b150dc6360ebc3e7 +msgid "" +"The unicode integer, or 65533 = 0xFFFD if the name is unknown. Examples: " +"`font.glyph_name_to_unicode(\"Sigma\") = 931`, " +"`font.glyph_name_to_unicode(\"sigma\") = 963`. Refer to the `Adobe Glyph " +"List `_ publication for a list of glyph names " +"and their unicode numbers. Example: >>> font = pymupdf.Font(\"helv\") " +">>> font.has_glyph(font.glyph_name_to_unicode(\"infinity\")) True" +msgstr "" + +#: ../../font.rst:207 c780e4b197d94ca68b5f7ea4196a216d +msgid "" +"The unicode integer, or 65533 = 0xFFFD if the name is unknown. Examples: " +"`font.glyph_name_to_unicode(\"Sigma\") = 931`, " +"`font.glyph_name_to_unicode(\"sigma\") = 963`. Refer to the `Adobe Glyph " +"List `_ publication for a list of glyph names " +"and their unicode numbers. Example:" +msgstr "" +"Unicode整数、または名前が不明な場合は65533 = " +"0xFFFDです。例:`font.glyph_name_to_unicode(\"Sigma\") = " +"931`、`font.glyph_name_to_unicode(\"sigma\") = 963` 。Adobe Glyph " +"Listの出版物を参照して、グリフ名とUnicode番号のリストを確認してください。例:" + +#: ../../font.rst:219 4e8ef50287f840c39ab6193fe4d15cef +msgid "The glyph rectangle relative to :data:`fontsize` 1." +msgstr ":data:`fontsize` 1に対するグリフの矩形領域。" + +#: ../../font.rst:221 2d75f0d683514f0ca0affa79216761d3 +msgid "``ord()`` of the character." +msgstr "文字の ``ord()`` 。" + +#: ../../font.rst:223 d5ebc63552854e929e574e66356875f3 +msgid "a :ref:`Rect`." +msgstr ":ref:`Rect`。" + +#: ../../font.rst:228 ecfa548b1cdb4cbf977bdd106fe6a53a +msgid "Show the name of the character's glyph." +msgstr "文字のグリフの名前を表示します。" + +#: ../../font.rst:230 0a3a425d6dd84c9bb036c80f302940e1 +msgid "" +"the unicode number of the character. Use ``ord()``, not the character " +"itself." +msgstr "文字のunicode番号。文字自体ではなく ``ord()`` を使用します。" + +#: ../../font.rst:232 e30f70a8afc247e1a71dff22dec0df83 +msgid "" +"a string representing the glyph's name. E.g. `font.glyph_name(ord(\"#\"))" +" = \"numbersign\"`. For an invalid code \".notfound\" is returned. .. " +"note:: *(Changed in v1.18.0)* This method and " +":meth:`Font.glyph_name_to_unicode` no longer depend on a font and instead" +" retrieve information from the **Adobe Glyph List**. Also available as " +"`pymupdf.unicode_to_glyph_name()` and resp. " +"`pymupdf.glyph_name_to_unicode()`." +msgstr "" + +#: ../../font.rst:232 7ae0eaba9ee2443a8f60786834a14ecb +msgid "" +"a string representing the glyph's name. E.g. `font.glyph_name(ord(\"#\"))" +" = \"numbersign\"`. For an invalid code \".notfound\" is returned." +msgstr "" +"グリフの名前を表す文字列。例:`font.glyph_name(ord(\"#\")) = \"numbersign\"` " +"。無効なコードの場合、\".notfound\" が返されます。" + +#: ../../font.rst:234 5b19c370607445debb9b31926c40bd34 +msgid "" +"*(Changed in v1.18.0)* This method and :meth:`Font.glyph_name_to_unicode`" +" no longer depend on a font and instead retrieve information from the " +"**Adobe Glyph List**. Also available as `pymupdf.unicode_to_glyph_name()`" +" and resp. `pymupdf.glyph_name_to_unicode()`." +msgstr "" +"*(1.18.0で変更)* このメソッドと:meth:`Font.glyph_name_to_unicode` " +"はもはやフォントに依存せず、**Adobe Glyph List** " +"から情報を取得します。`pymupdf.unicode_to_glyph_name()` とresp. " +"`pymupdf.glyph_name_to_unicode()` としても利用可能です。" + +#: ../../font.rst:241 c11d86d001524cb0b0035abf8228f4be +msgid "Calculate the length in points of a unicode string." +msgstr "Unicode文字列の長さをポイント単位で計算します。" + +#: ../../font.rst:243 e4ab64b446a74c32ae91c6a6adf19b19 +msgid "" +"There is a functional overlap with :meth:`get_text_length` for Base-14 " +"fonts only." +msgstr "Base-14フォントに対しては、:meth:`get_text_length` と機能が重複しています。" + +#: ../../font.rst:245 ../../font.rst:269 035f68a9090c4e2f987c581f69acb324 +#: 14a8f02b38c644daa664491248d5815c +msgid "a text string, UTF-8 encoded." +msgstr "テキスト文字列、UTF-8エンコード。" + +#: ../../font.rst:247 ../../font.rst:271 517bbee097774337989d00f3f53b0216 +#: ecc2c4f1004f417bb7b183dea2531bdf +msgid "the :data:`fontsize`." +msgstr ":data:`fontsize`。" + +#: ../../font.rst 0f9f97b5b7274e28bb66f006ab7729ff +#: 100725ef275b4763b1eb920723f8d42b 382dcd13dbdd4c6eb6dcb3f1fb1b0d98 +#: 5669062b0c794a1ea0c95524ee04fdc2 5aa22119ec9d467c80a0df11b13e21b2 +#: 67ed9c95928a4ccd955c23069b590461 6bc06432bdd44d2ba5bf2badedb1d12f +#: 742b70e009af4f818c34be3c0a57a54b af206f92225d443abcefdeea9a1f2793 +#: bc302339066642a0bbb9a13e75b7e47e +msgid "Return type" +msgstr "" + +#: ../../font.rst:251 8987bb3eba2948ca85213ee532f6a807 +msgid "" +"the length of the string in points when stored in the PDF. If a character" +" is not contained in the font, it will automatically be looked up in a " +"fallback font. .. note:: This method was originally implemented in " +"Python, based on calling :meth:`Font.glyph_advance`. For performance " +"reasons, it has been rewritten in C for v1.18.14. To compute the width of" +" a single character, you can now use either of the following without " +"performance penalty: 1. `font.glyph_advance(ord(\"Ä\")) * fontsize`" +" 2. `font.text_length(\"Ä\", fontsize=fontsize)` For multi-" +"character strings, the method offers a huge performance advantage " +"compared to the previous implementation: instead of about 0.5 " +"microseconds for each character, only 12.5 nanoseconds are required for " +"the second and subsequent ones." +msgstr "" + +#: ../../font.rst:251 70750ea5a311499ea7fac5b6aed32cb1 +msgid "" +"the length of the string in points when stored in the PDF. If a character" +" is not contained in the font, it will automatically be looked up in a " +"fallback font." +msgstr "PDFに格納された文字列のポイント単位の長さ。文字がフォントに含まれていない場合、自動的にフォールバックフォントで検索されます。" + +#: ../../font.rst:253 4ae0b7bb62084e13a26b43b1b57ac9fc +msgid "" +"This method was originally implemented in Python, based on calling " +":meth:`Font.glyph_advance`. For performance reasons, it has been " +"rewritten in C for v1.18.14. To compute the width of a single character, " +"you can now use either of the following without performance penalty:" +msgstr "" +"このメソッドは元々Pythonで実装され、:meth:`Font.glyph_advance` " +"を呼び出すことに基づいていました。性能の理由から、v1.18.14向けにCで書き直されました。単一の文字の幅を計算するには、以下のいずれかを使用でき、パフォーマンスに差異はありません:" + +#: ../../font.rst:255 dba2bd273bcb496e8048e7c5e69cb5cb +msgid "`font.glyph_advance(ord(\"Ä\")) * fontsize`" +msgstr "" + +#: ../../font.rst:256 d5e165b21ed445bc9a5c17a70d8d6558 +msgid "`font.text_length(\"Ä\", fontsize=fontsize)`" +msgstr "" + +#: ../../font.rst:258 eec51191fac44eb6ae1c858092678361 +msgid "" +"For multi-character strings, the method offers a huge performance " +"advantage compared to the previous implementation: instead of about 0.5 " +"microseconds for each character, only 12.5 nanoseconds are required for " +"the second and subsequent ones." +msgstr "複数文字の文字列に対して、このメソッドは以前の実装と比べて非常に高速で、各文字ごとに約0.5マイクロ秒かかる代わりに、2番目以降の文字には12.5ナノ秒しか必要ありません。" + +#: ../../font.rst:265 c34ff4dceaae4d6a81ed946778f0c4cf +msgid "*New in v1.18.14*" +msgstr "*v1.18.14で新たに導入*" + +#: ../../font.rst:267 d3992820d7be4dc1a6b845a733c4b3e9 +msgid "Sequence of character lengths in points of a unicode string." +msgstr "Unicode文字列の文字の長さ(ポイント単位)のシーケンス。" + +#: ../../font.rst:275 b84312cc1c534354881dd97d76839e9e +msgid "" +"the lengths in points of the characters of a string when stored in the " +"PDF. It works like :meth:`Font.text_length` broken down to single " +"characters. This is a high speed method, used e.g. in " +":meth:`TextWriter.fill_textbox`. The following is true (allowing rounding" +" errors): `font.text_length(text) == sum(font.char_lengths(text))`. >>> " +"font = pymupdf.Font(\"helv\") >>> text = \"PyMuPDF\" >>> " +"font.text_length(text) 50.115999937057495 >>> " +"pymupdf.get_text_length(text, fontname=\"helv\") 50.115999937057495 >>> " +"sum(font.char_lengths(text)) 50.115999937057495 >>> " +"pprint(font.char_lengths(text)) (7.336999952793121, # P 5.5," +" # y 9.163000047206879, # M 6.115999937057495, # u " +"7.336999952793121, # P 7.942000031471252, # D 6.721000015735626) # " +"F" +msgstr "" + +#: ../../font.rst:275 18590ab69f6c4f9aa28811c3edcdb8dd +msgid "" +"the lengths in points of the characters of a string when stored in the " +"PDF. It works like :meth:`Font.text_length` broken down to single " +"characters. This is a high speed method, used e.g. in " +":meth:`TextWriter.fill_textbox`. The following is true (allowing rounding" +" errors): `font.text_length(text) == sum(font.char_lengths(text))`." +msgstr "" +"PDFに保存される文字列の各文字の長さ(ポイント単位)。これは、:meth:`Font.text_length` " +"を各文字に分解したようなものです。高速なメソッドであり、例えば:meth:`TextWriter.fill_textbox` " +"で使用されています。以下が成り立ちます(丸め誤差を許容する):`font.text_length(text) == " +"sum(font.char_lengths(text))`。" + +#: ../../font.rst:297 90184e5c052f413bb769157e755c3564 +msgid "New in v1.17.6" +msgstr "新機能 v1.17.6" + +#: ../../font.rst:299 7789084e41e44fd8a5452982982c9d31 +msgid "Copy of the binary font file content." +msgstr "バイナリフォントファイルのコンテンツのコピー。" + +#: ../../font.rst:305 bc5d1c2db4ea4b55a720b5cd1cb60971 +msgid "" +"A dictionary with various font properties, each represented as bools. " +"Example for Helvetica::" +msgstr "Helveticaの例など、さまざまなフォントプロパティを持つ辞書で、それぞれがブール値として表されています。" + +#: ../../font.rst:325 2c69c51beb634c2ea40a3542e9623343 +msgid "Name of the font. May be \"\" or \"(null)\"." +msgstr "フォントの名前。空文字列であることもあります。" + +#: ../../font.rst:329 d87516f6f7ee41e5bcea1870810c81c0 +msgid "The font bbox. This is the maximum of its glyph bboxes." +msgstr "フォントのbbox。これは、そのグリフのbboxの最大値です。" + +#: ../../font.rst:331 5d6efec296e84501bce9006881c8e99e +msgid ":ref:`Rect`" +msgstr "" + +#: ../../font.rst:337 085122ed54784cd68460c9c243a86347 +msgid "The number of glyphs defined in the font." +msgstr "グリフ数。" + +#: ../../font.rst:341 ../../font.rst:349 4e654e57562b44bfbaaa02fd7dc465ec +#: d23b8faa3854454ca67f10a1f0d8260d +msgid "New in v1.18.0" +msgstr "v1.18.0で新規追加" + +#: ../../font.rst:343 f327c380b2a84214962dc640edc3d113 +msgid "" +"The ascender value of the font, see `ascender typography " +"`_ for details. " +"Please note that there is a difference to the strict definition: our " +"value includes everything above the baseline -- not just the height " +"difference between upper case \"A\" and and lower case \"a\"." +msgstr "フォントのアセンダー値。詳細についてはこちらを参照してください。ただし、厳密な定義とは異なり、私たちの値にはベースラインを上回るすべてが含まれます。単に大文字の「A」と小文字の「a」の高さの違いだけでなく、ベースラインを上回るすべてが含まれます。" + +#: ../../font.rst:351 c2b2e6bdad8a4d50864215d20298070f +msgid "" +"The descender value of the font, see `descender typography " +"`_ for details. This value " +"always is negative and is the portion that some glyphs descend below the " +"base line, for example \"g\" or \"y\". As a consequence, the value " +"`ascender - descender` is the total height, that every glyph of the font " +"fits into. This is true at least for most fonts -- as always, there are " +"exceptions, especially for calligraphic fonts, etc." +msgstr "" +"フォントのディセンダー値。詳細についてはこちらを参照してください。この値は常に負であり、一部のグリフがベースライン以下に降りる部分、例えば「g」や「y」です。したがって、`アセンダー" +" - ディセンダー` " +"の値は、フォントのすべてのグリフが収まる総高さです。これはほとんどのフォントに当てはまりますが、いくつかの例外があります。特にカリグラフィーフォントなどです。" + +#: ../../font.rst:363 183f4486ae154572b4f1eabf80f6d289 +msgid "" +"A number of attributes with obvious meanings. Reflect some values of the " +":attr:`Font.flags` dictionary." +msgstr "明らかな意味を持つ属性がいくつかあります。これらは :attr:`Font.flags` 辞書の値の一部を反映しています。" + +#: ../../font.rst:368 f5b86a56151043388dd70a219b536ee0 +msgid "Footnotes" +msgstr "脚注" + +#: ../../font.rst:369 fdd858aa443d477f829878a30b0ea1f0 +msgid "" +"MuPDF does not support all fontfiles with this feature and will raise " +"exceptions like *\"mupdf: FT_New_Memory_Face((null)): unknown file " +"format\"*, if it encounters issues." +msgstr "" +"MuPDFはこの機能を持つすべてのフォントファイルをサポートしておらず、問題が発生すると *「mupdf: " +"FT_New_Memory_Face((null)): unknown file format」* " +"といった例外を発生させます。:ref:`TextWriter` のメソッドは :attr:`Font.is_writable` をチェックします。" + +#: ../../font.rst:371 209859bd49c24fa297bf99f6b0a27b59 +msgid "" +"The built-in Python module `array` has been chosen for its speed and low " +"memory requirement." +msgstr "組み込みの *array* モジュールは、その速度と値のコンパクトな表現のために選択されました。" + +#: ../../footer.rst:60 9ec9aab2e7474eea822f4d9e093fb613 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid ":attr:`~Font.is_writable`" +#~ msgstr "" + +#~ msgid "Font usable with :ref:`TextWriter`" +#~ msgstr ":ref:`TextWriter` で使用可能なフォント" + +#~ msgid "Indicates whether this font can be used with :ref:`TextWriter`." +#~ msgstr "このフォントを :ref:`TextWriter` で使用できるかどうかを示します。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/footer.mo b/docs/locales/ja/LC_MESSAGES/footer.mo new file mode 100644 index 000000000..249832dee Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/footer.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/footer.po b/docs/locales/ja/LC_MESSAGES/footer.po new file mode 100644 index 000000000..61aaa6400 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/footer.po @@ -0,0 +1,28 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../footer.rst:60 74c961cfd0e64769bcdb58467763a6c6 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/functions.mo b/docs/locales/ja/LC_MESSAGES/functions.mo new file mode 100644 index 000000000..b69b2df87 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/functions.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/functions.po b/docs/locales/ja/LC_MESSAGES/functions.po new file mode 100644 index 000000000..d2427bfad --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/functions.po @@ -0,0 +1,2457 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 f10a8020c28249fca44848d866853453 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 8c253eade3be4573a8d216948f6a7359 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 c1dc5c8f7ce44ac4906d7f2e9ae8dd3c +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../functions.rst:5 f3404c3080304de29e895aac382d9965 +msgid "Functions" +msgstr "関数" + +#: ../../functions.rst:6 70323b9e30fb490db389a3d997a1cab9 +msgid "" +"The following are miscellaneous functions and attributes on a fairly low-" +"level technical detail." +msgstr "以下は、PDFに関する低レベルな技術的詳細に関するさまざまな関数と属性です。" + +#: ../../functions.rst:8 a7a4a6b9e92945e49a02a07f50b221d4 +msgid "" +"Some functions provide detail access to PDF structures. Others are " +"stripped-down, high performance versions of other functions which provide" +" more information." +msgstr "一部の関数は、PDF構造への詳細なアクセスを提供します。他の関数の高性能バージョンで、より多くの情報を提供する関数から派生しています。" + +#: ../../functions.rst:10 80d711cfafea41b6b18edc4996098631 +msgid "Yet others are handy, general-purpose utilities." +msgstr "また、その他にも便利な汎用ユーティリティが含まれています。" + +#: ../../functions.rst:14 ce08da4d75f04c2d88814b9f4cb4b9fb +msgid "**Function**" +msgstr "**関数**" + +#: ../../functions.rst:14 87267f8f2f9c4906abe394a3726d8ce3 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../functions.rst:16 8c7b262e30494eb2900a478249533401 +msgid ":attr:`Annot.apn_bbox`" +msgstr "" + +#: ../../functions.rst:16 3ec7f54fc6214998ac1f32de3ae05905 +msgid "PDF only: bbox of the appearance object" +msgstr "PDFのみ:外観オブジェクトのバウンディングボックス" + +#: ../../functions.rst:17 62b2f314087e4c5d802938929352a251 +msgid ":attr:`Annot.apn_matrix`" +msgstr "" + +#: ../../functions.rst:17 5d4bf95e115f4a79b0ac44f6d45a0cd4 +msgid "PDF only: the matrix of the appearance object" +msgstr "PDFのみ:外観オブジェクトの行列" + +#: ../../functions.rst:18 23379817033347e4a0ad91ffc4860e15 +msgid ":attr:`Page.is_wrapped`" +msgstr "" + +#: ../../functions.rst:18 49fe2cfdf483448abb46dfa426338186 +msgid "check whether contents wrapping is present" +msgstr "コンテンツの折り返しが存在するかどうかを確認します" + +#: ../../functions.rst:19 11b42e143aba4579bfd08436ce71b89f +msgid ":meth:`adobe_glyph_names`" +msgstr "" + +#: ../../functions.rst:19 f7532442f88b4d4481a985a21e1e9dca +msgid "list of glyph names defined in **Adobe Glyph List**" +msgstr "**Adobe Glyph List** で定義されたグリフ名のリスト" + +#: ../../functions.rst:20 bbe71f57dd1544d6abbffb3f025ad0d0 +msgid ":meth:`adobe_glyph_unicodes`" +msgstr "" + +#: ../../functions.rst:20 2e63b3cdb4614f68bd0f06bdf473415c +msgid "list of unicodes defined in **Adobe Glyph List**" +msgstr "**Adobe Glyph List** で定義されたUnicodeのリスト" + +#: ../../functions.rst:21 1dab9af8f225439a809a695624ad06ad +msgid ":meth:`Annot.clean_contents`" +msgstr "" + +#: ../../functions.rst:21 17d23fa135d74e28baebb9a590420a93 +msgid "PDF only: clean the annot's :data:`contents` object" +msgstr "PDFのみ:アノテーションの :data:`contents` オブジェクトをクリーンアップ" + +#: ../../functions.rst:22 2565b744b5074b0197b985feda6260d3 +msgid ":meth:`Annot.set_apn_bbox`" +msgstr "" + +#: ../../functions.rst:22 c603cce58b4e4f19ad03d7d5816decf7 +msgid "PDF only: set the bbox of the appearance object" +msgstr "PDFのみ:外観オブジェクトのバウンディングボックスを設定" + +#: ../../functions.rst:23 dec230a165a848fab667491090a3b48c +msgid ":meth:`Annot.set_apn_matrix`" +msgstr "" + +#: ../../functions.rst:23 1d4c8fcb695b41f794fe0181f55c4fed +msgid "PDF only: set the matrix of the appearance object" +msgstr "PDFのみ:外観オブジェクトの行列を設定" + +#: ../../functions.rst:24 919aa9c3fa0d4cea9f07bf595fd68689 +msgid ":meth:`ConversionHeader`" +msgstr "" + +#: ../../functions.rst:24 c5631d7a4fb54c26b307af4ec9a45d23 +msgid "return header string for *get_text* methods" +msgstr "*get_text* メソッド用のヘッダー文字列を返す" + +#: ../../functions.rst:25 02f472d4f87e4921a7850ccf52b83efe +msgid ":meth:`ConversionTrailer`" +msgstr "" + +#: ../../functions.rst:25 67364fbb2a0641498bd046f77798342d +msgid "return trailer string for *get_text* methods" +msgstr "*get_text* メソッド用のトレーラー文字列を返す" + +#: ../../functions.rst:26 ec526b8d6e4c4baa8432dc878bc3b9c2 +msgid ":meth:`Document.del_xml_metadata`" +msgstr "" + +#: ../../functions.rst:26 124e8e0efc184fe087303fef4d743203 +msgid "PDF only: remove XML metadata" +msgstr "PDFのみ:XMLメタデータを削除" + +#: ../../functions.rst:27 b6f93fdd44ad46129f8fa2758f2a4818 +msgid ":meth:`Document.get_char_widths`" +msgstr "" + +#: ../../functions.rst:27 09b83665efc54bec90b1491e267e0f10 +msgid "PDF only: return a list of glyph widths of a font" +msgstr "PDFのみ:フォントのグリフ幅のリストを返す" + +#: ../../functions.rst:28 62aedd1eef214e3b9fd7dfd8a09d1ff3 +msgid ":meth:`Document.get_new_xref`" +msgstr "" + +#: ../../functions.rst:28 0b6342969ec0441393c79f793e633ddd +msgid "PDF only: create and return a new :data:`xref` entry" +msgstr "PDFのみ:新しい :data:`xref` エントリを作成して返す" + +#: ../../functions.rst:29 62f8b93e8bf8444bbafae94639086f66 +msgid ":meth:`Document.is_stream`" +msgstr "" + +#: ../../functions.rst:29 3b16adc919ff4ccb921eba7b7ec2457c +msgid "PDF only: check whether an :data:`xref` is a stream object" +msgstr "PDFのみ::data:`xref` がストリームオブジェクトであるかどうかを確認" + +#: ../../functions.rst:30 7a3f46883b23434fb745bea51dd302df +msgid ":meth:`Document.xml_metadata_xref`" +msgstr "" + +#: ../../functions.rst:30 b0b51441d32e4c969fe3b90dc9843f34 +msgid "PDF only: return XML metadata :data:`xref` number" +msgstr "PDFのみ:XMLメタデータの :data:`xref` 番号を返す" + +#: ../../functions.rst:31 b4adeece49294f0a9217abd6f42e075d +msgid ":meth:`Document.xref_length`" +msgstr "" + +#: ../../functions.rst:31 cfa061cc4de4418a9a6c699fd6c1dbbf +msgid "PDF only: return length of :data:`xref` table" +msgstr "PDFのみ::data:`xref` テーブルの長さを返す" + +#: ../../functions.rst:32 973a90abac464c3b8b375c56719a0bff +msgid ":meth:`EMPTY_IRECT`" +msgstr "" + +#: ../../functions.rst:32 ../../functions.rst:34 +#: 291e722b9502438bb05be547414824c7 55383aeb0dae482fa2d060d914f7b6b4 +msgid "return the (standard) empty / invalid rectangle" +msgstr "標準の空の/無効な矩形を返す" + +#: ../../functions.rst:33 205726f3db7d409494a330988f4f46ba +msgid ":meth:`EMPTY_QUAD`" +msgstr "" + +#: ../../functions.rst:33 101a675d89a348a39c26a4c078403def +msgid "return the (standard) empty / invalid quad" +msgstr "標準の空の/無効な四角形を返す" + +#: ../../functions.rst:34 4b352eaf25a946d683f7845067fd3fe2 +msgid ":meth:`EMPTY_RECT`" +msgstr "" + +#: ../../functions.rst:35 4c90b4ab2918473bbd455ac6e171a04b +msgid ":meth:`get_pdf_now`" +msgstr "" + +#: ../../functions.rst:35 fa79da54010c460cb80834428c14658a +msgid "return the current timestamp in PDF format" +msgstr "現在のタイムスタンプをPDF形式で返す" + +#: ../../functions.rst:36 5235531d94bf455ca878c91fa390f88a +msgid ":meth:`get_pdf_str`" +msgstr "" + +#: ../../functions.rst:36 cd5a4fcf322c478eb525e24c4c3e9ca7 +msgid "return PDF-compatible string" +msgstr "PDF互換の文字列を返す" + +#: ../../functions.rst:37 3cef9014049b46ef8fb2c04a8a96525d +msgid ":meth:`get_text_length`" +msgstr "" + +#: ../../functions.rst:37 7deedb3c51ef4df0b7c843146b9b6110 +msgid "return string length for a given font & :data:`fontsize`" +msgstr "指定したフォントと :data:`fontsize` の文字列長を返す" + +#: ../../functions.rst:38 865c3294809b414a9f66ddd11a8975ba +msgid ":meth:`glyph_name_to_unicode`" +msgstr "" + +#: ../../functions.rst:38 c5ef798d869d46a488886a86148f9432 +msgid "return unicode from a glyph name" +msgstr "グリフ名からUnicodeを返す" + +#: ../../functions.rst:39 777efc2daaf34022b9921b15fb3406ed +msgid ":meth:`image_profile`" +msgstr "" + +#: ../../functions.rst:39 4ad04b9521d940ed8675cf7ccf967fc5 +msgid "return a dictionary of basic image properties" +msgstr "基本的な画像プロパティの辞書を返します。" + +#: ../../functions.rst:40 f9f1eaf655b84ca6aa0cca517c7f74e7 +msgid ":meth:`INFINITE_IRECT`" +msgstr "" + +#: ../../functions.rst:40 ../../functions.rst:42 +#: 2b0312cf18394f9bb48eea5c1e82f262 8cb320f4f817465fbba9644f0f247810 +msgid "return the (only existing) infinite rectangle" +msgstr "(唯一存在する)無限の矩形を返します。" + +#: ../../functions.rst:41 a1ac10acd9264731961d1b01f0eec40b +msgid ":meth:`INFINITE_QUAD`" +msgstr "" + +#: ../../functions.rst:41 c4853233b2054963818cbc88b3f9d8cb +msgid "return the (only existing) infinite quad" +msgstr "(唯一存在する)無限のクワッドを返します。" + +#: ../../functions.rst:42 b1324daf9ff745fb9d2168d7be4648c7 +msgid ":meth:`INFINITE_RECT`" +msgstr "" + +#: ../../functions.rst:43 4e1fe0343be849e89ad51baa5921d04a +msgid ":meth:`make_table`" +msgstr "" + +#: ../../functions.rst:43 fdf7383642024c03a67f955ae885907f +msgid "split rectangle in sub-rectangles" +msgstr "矩形をサブ矩形に分割します。" + +#: ../../functions.rst:44 4adb8d71f2044344a5b26234ccf68e06 +msgid ":meth:`Page.clean_contents`" +msgstr "" + +#: ../../functions.rst:44 f3cb03e57a5c41ae8143f7555a79c7fc +msgid "PDF only: clean the page's :data:`contents` objects" +msgstr "PDF のみ:ページの :data:`contents` オブジェクトをクリーンアップします" + +#: ../../functions.rst:45 75ced595d7584e5f92e8eb2b62cd3ad4 +msgid ":meth:`Page.get_bboxlog`" +msgstr "" + +#: ../../functions.rst:45 8c4f754edc4c4b319cc2b54860845f79 +msgid "list of rectangles that envelop text, drawing or image objects" +msgstr "テキスト、描画、または画像オブジェクトを囲む矩形のリストです。" + +#: ../../functions.rst:46 d1e897d011a04f2c920b3cd8c3a2f4d6 +msgid ":meth:`Page.get_contents`" +msgstr "" + +#: ../../functions.rst:46 8087b4ddded34432a1081ed947f22cea +msgid "PDF only: return a list of content :data:`xref` numbers" +msgstr "PDF のみ:コンテンツ :data:`xref` 番号のリストを返します。" + +#: ../../functions.rst:47 4002cfc938c14ecaa48d6ee403f154d0 +msgid ":meth:`Page.get_displaylist`" +msgstr "" + +#: ../../functions.rst:47 561f38475f9641f0bff2da76977dfe5a +msgid "create the page's display list" +msgstr "ページの表示リストを作成します。" + +#: ../../functions.rst:48 53c748a7a20f4af897e22b85db7febe7 +msgid ":meth:`Page.get_text_blocks`" +msgstr "" + +#: ../../functions.rst:48 6a53466a2b9c4ca3a36d2fd49d6b817d +msgid "extract text blocks as a Python list" +msgstr "テキスト ブロックを Python リストとして抽出します。" + +#: ../../functions.rst:49 ccaaf6bb9f4345e6846f9b35f9f45357 +msgid ":meth:`Page.get_text_words`" +msgstr "" + +#: ../../functions.rst:49 e1831d592e724dc3ac18729b5f39eb5e +msgid "extract text words as a Python list" +msgstr "テキストワードを Python リストとして抽出します。" + +#: ../../functions.rst:50 b6be6a798be9474b861e51b28bd9c46c +msgid ":meth:`Page.get_texttrace`" +msgstr "" + +#: ../../functions.rst:50 1918ba1582404d0da1cfa1bb90facd9a +msgid "low-level text information" +msgstr "低レベルのテキスト情報です。" + +#: ../../functions.rst:51 97f7f70a1f024c5596beba2d6083c370 +msgid ":meth:`Page.read_contents`" +msgstr "" + +#: ../../functions.rst:51 fbbe70d6b80548bf81db0ed638604ff3 +msgid "PDF only: get complete, concatenated /Contents source" +msgstr "PDF のみ:完全な連結 /Contents ソースを取得します。" + +#: ../../functions.rst:52 97fc05c38d804f0c829d6226fae6ff64 +msgid ":meth:`Page.run`" +msgstr "" + +#: ../../functions.rst:52 7d5ea217e7ce4dd2856d6dd788de51fa +msgid "run a page through a device" +msgstr "ページをデバイスを介して実行します。" + +#: ../../functions.rst:53 3f0fafaa94704a27945b473cc84d6b53 +msgid ":meth:`Page.set_contents`" +msgstr "" + +#: ../../functions.rst:53 3a38b120e7094eef91491802cb5ede57 +msgid "PDF only: set page's :data:`contents` to some :data:`xref`" +msgstr "PDF のみ:ページの :data:`contents` を特定の :data:`xref` に設定します。" + +#: ../../functions.rst:54 0feca3c718384d869711f8270fcfe9a9 +msgid ":meth:`Page.wrap_contents`" +msgstr "" + +#: ../../functions.rst:54 ae1231ecd4ea49c7813df81e4da27c5e +msgid "wrap contents with stacking commands" +msgstr "スタッキング コマンドでコンテンツをラップします。" + +#: ../../functions.rst:55 b104404c744c474293113d1eef9ae246 +msgid ":meth:`css_for_pymupdf_font`" +msgstr "" + +#: ../../functions.rst:55 79e6c0cb8367467aae4981ef407ebca5 +msgid "create CSS source for a font in package pymupdf_fonts" +msgstr "パッケージ pymupdf_fonts のフォント用の CSS ソースを作成します。" + +#: ../../functions.rst:56 86e04922d0254df48749ddd782a42be2 +msgid ":meth:`paper_rect`" +msgstr "" + +#: ../../functions.rst:56 850682504daa47a3b49a984ce1189a63 +msgid "return rectangle for a known paper format" +msgstr "既知の用紙形式の矩形を返します。" + +#: ../../functions.rst:57 c4d147c5a1d64c82a16e7e347fe90738 +msgid ":meth:`paper_size`" +msgstr "" + +#: ../../functions.rst:57 1c6cfbbad22542ec987d6587745efa4e +msgid "return width, height for a known paper format" +msgstr "既知の用紙形式の幅と高さを返します。" + +#: ../../functions.rst:58 3e78ca8b672c4df394141beb3ca86a19 +msgid ":meth:`paper_sizes`" +msgstr "" + +#: ../../functions.rst:58 efdf46d6a1ee49d9a65b0785d1b8197d +msgid "dictionary of pre-defined paper formats" +msgstr "事前定義の用紙形式の辞書です。" + +#: ../../functions.rst:59 2b76da1b09664c2281a67555e1aee79d +msgid ":meth:`planish_line`" +msgstr "" + +#: ../../functions.rst:59 55e6af8933184b9799583466ae6b5260 +msgid "matrix to map a line to the x-axis" +msgstr "直線を x 軸にマップする行列です。" + +#: ../../functions.rst:60 adf4a6912b74494e8b4a81e1ad1e023d +msgid ":meth:`recover_char_quad`" +msgstr "" + +#: ../../functions.rst:60 2593d09e30694c1f9bd6ce4bdb1520cb +msgid "compute the quad of a char (\"rawdict\")" +msgstr "文字のクワッドを計算します(\"rawdict\")。" + +#: ../../functions.rst:61 8b29bf9a3a4f4c3eb96ef7c0bd912926 +msgid ":meth:`recover_line_quad`" +msgstr "" + +#: ../../functions.rst:61 7d233952bb8e4ce885f963dd77fb0e6f +msgid "compute the quad of a subset of line spans" +msgstr "ラインスパンのサブセットのクワッドを計算します" + +#: ../../functions.rst:62 72275833d3a1438faa6a801185292b81 +msgid ":meth:`recover_quad`" +msgstr "" + +#: ../../functions.rst:62 5a8d087891f4445ea85ed013e701bc78 +msgid "compute the quad of a span (\"dict\", \"rawdict\")" +msgstr "スパンのクワッドを計算する (\"dict\", \"rawdict\")" + +#: ../../functions.rst:63 9eef281b47d64378adb6f46472037ad9 +msgid ":meth:`recover_span_quad`" +msgstr "" + +#: ../../functions.rst:63 1a7eefd270d942119569ac42043ae560 +msgid "compute the quad of a subset of span characters" +msgstr "行スパンのサブセットのクワッドを計算します。" + +#: ../../functions.rst:64 d8411fe2450a4335b4720cfc8fd88ac1 +msgid ":meth:`set_messages`" +msgstr "" + +#: ../../functions.rst:64 49fec9a75fe34e408cf4e2b564d9393e +msgid "set destination of |PyMuPDF| messages." +msgstr "" + +#: ../../functions.rst:65 bacb887d9fe64e2586f478457311f52e +msgid ":meth:`sRGB_to_pdf`" +msgstr "" + +#: ../../functions.rst:65 7b07cd68b95740db96d883725a23dfc8 +msgid "return PDF RGB color tuple from an sRGB integer" +msgstr "sRGB 整数から PDF RGB カラー タプルを返します。" + +#: ../../functions.rst:66 d9fad77ca5bb4ea29e7e115ae41027d9 +msgid ":meth:`sRGB_to_rgb`" +msgstr "" + +#: ../../functions.rst:66 3eb2e1a49c394d7190320f310ced9897 +msgid "return (R, G, B) color tuple from an sRGB integer" +msgstr "sRGB 整数から (R、G、B) カラー タプルを返します。" + +#: ../../functions.rst:67 dacbb5d08f684a739eea5e737a0f8475 +msgid ":meth:`unicode_to_glyph_name`" +msgstr "" + +#: ../../functions.rst:67 e27898f4a503438b9c84fbaea53d8e46 +msgid "return glyph name from a unicode" +msgstr "Unicode からグリフ名を返します。" + +#: ../../functions.rst:68 3761a359e2a440e6a6d94194ddd5ca47 +msgid ":meth:`get_tessdata`" +msgstr "" + +#: ../../functions.rst:68 7916666e646e4f1b81e7eeb0f76726f4 +msgid "locates the language support of the Tesseract-OCR installation" +msgstr "Tesseract-OCR インストールの言語サポートを特定します。" + +#: ../../functions.rst:69 88ba621d00d6454d87a956ca2ea10427 +msgid ":meth:`colors_pdf_dict`" +msgstr "" + +#: ../../functions.rst:69 8d156d20680342ef90bfb088e0b93fdf +msgid "return dict of color names." +msgstr "" + +#: ../../functions.rst:70 d6674fd6f92b465f9640631eee500d7b +msgid ":meth:`colors_wx_list`" +msgstr "" + +#: ../../functions.rst:70 72620216e10a478985af2dc1ca95125f +msgid "return list of color names." +msgstr "" + +#: ../../functions.rst:71 94e8002b7c134f61aae64d5f0ee42280 +msgid ":attr:`fitz_fontdescriptors`" +msgstr "" + +#: ../../functions.rst:71 ccee6906defe4b3391fa9a827d7183b6 +msgid "dictionary of available supplement fonts" +msgstr "利用可能な補足フォントの辞書です。" + +#: ../../functions.rst:72 d4da999f959b4d72813e7d4b11346f02 +msgid ":attr:`PYMUPDF_MESSAGE`" +msgstr "" + +#: ../../functions.rst:72 d20b7c4d1f2d40a295f7f3556bd510ff +msgid "destination of |PyMuPDF| messages." +msgstr "" + +#: ../../functions.rst:73 b730944f16e5483a91a1d0e55c948756 +msgid ":attr:`pdfcolor`" +msgstr "" + +#: ../../functions.rst:73 a7dbeae797964748bc2445c3524f2142 +msgid "dictionary of almost 500 RGB colors in PDF format." +msgstr "PDF 形式のほぼ 500 以上の RGB カラーの辞書です。" + +#: ../../functions.rst:78 002feca7554345c0b5cf4b41b774f168 +msgid "" +"Convenience function to return width and height of a known paper format " +"code. These values are given in pixels for the standard resolution 72 " +"pixels = 1 inch." +msgstr "既知の用紙フォーマットの幅と高さを返す便利な関数です。これらの値は、標準解像度 72 ピクセル = 1 インチのピクセル単位で指定されます。" + +#: ../../functions.rst:80 17ca7e059e904354a626f297dde8741d +msgid "" +"Currently defined formats include **'A0'** through **'A10'**, **'B0'** " +"through **'B10'**, **'C0'** through **'C10'**, **'Card-4x6'**, **'Card-" +"5x7'**, **'Commercial'**, **'Executive'**, **'Invoice'**, **'Ledger'**, " +"**'Legal'**, **'Legal-13'**, **'Letter'**, **'Monarch'** and **'Tabloid-" +"Extra'**, each in either portrait or landscape format." +msgstr "" +"現在の定義されたフォーマットには、**‘A0’** から **‘A10’**、**‘B0’** から **‘B10’**、**‘C0’** から " +"**‘C10’**、**‘Card-4x6’**、**‘Card-" +"5x7’**、**‘Commercial’**、**‘Executive’**、**‘Invoice’**、**‘Ledger’**、**‘Legal’**、**‘Legal-13’**、**‘Letter’**、**‘Monarch" +"’**、**‘Tabloid-Extra’** が含まれており、各フォーマットは縦向きまたは横向きで提供されています。" + +#: ../../functions.rst:82 33458151eb9e4b27adb3007ddcaf2e74 +msgid "" +"A format name must be supplied as a string (case **in** \\sensitive), " +"optionally suffixed with \"-L\" (landscape) or \"-P\" (portrait). No " +"suffix defaults to portrait." +msgstr "" +"フォーマット名は、文字列として指定する必要があります(大文字小文字を区別しない)。オプションで \" -L\"(横向き)または \" " +"-P\"(縦向き)でサフィックスを付けることができます。サフィックスが指定されていない場合、デフォルトは縦向きです。" + +#: ../../functions.rst 0086ae19d23245d2a3c425d537cff985 +#: 0107a4c1a39347c4af4004ad0d5e403f 252c1e16d25c490cb7cbcce3b9f1cfce +#: 4d137e1791c745deb8c590d5bfbb5dc5 53d3e4977fb146ad8669a8e821bfefc2 +#: 64c8593781c046ddb8f521a6f1f0b725 73e64bfff41249dc85e4ec96e8391f45 +#: 906ceb613e864f36a87c345f3583fe16 9710f4ab97144634a38ae20942d1913c +#: 9926c7d0fcee400e841aa1890bc67ea2 9a5f4a6a98dc4147bdf82210a75f126b +#: 9d47d65487774d9c8607562d808e63bc a546b36584fc4023a7e68fa52d3bb110 +#: a9f64f731e974ea0aa63eb249d1d18db b3ce1309b9604e59aa7340f733b76c42 +#: c25ed4ddc44a4d8292fe7a949a15fe58 c4cdb78e6944459a8934a78aadb3eaa5 +#: c9c22817211c49a2ae16da519ef351b3 d7d622d64742404689c069e3aca882b3 +#: d9a5cf2ad2a74ac69251b17f01c01ef3 da9e9f534e6d4e9f8dbbb79b9ea31590 +#: f4fb4b17df8044c5967eb49d08ad2940 f56da1d9666f450fbdb7617571826508 +msgid "Parameters" +msgstr "" + +#: ../../functions.rst:84 d1edba4a2b0749ff9e106b3732d82d71 +msgid "" +"any format name from above in upper or lower case, like *\"A4\"* or " +"*\"letter-l\"*." +msgstr "" +"上記のいずれかのフォーマット名を大文字または小文字で指定します。たとえば、*\"A4\"* または *\"letter-l\"* " +"のように指定します。" + +#: ../../functions.rst 0121f158b26c43a2b9c36019a068d476 +#: 0ce6605af5f4478d8061e1965c2e1d97 106ca0af52b042ab892bc071e9202602 +#: 1c86a8e962aa499e9e410d093e1a7f88 256a84d8523f4aee8718f26909ba604d +#: 327dfa749eae497cb7f5387483a9a444 32ee4d8eb19b45f6bae13ef61ea1df84 +#: 374143252c4d4144b7fc79a07ecbcfe9 54c6c9a08eb645fba6be747045bde8c4 +#: 550b0c06550e4a1db6acda53a6f75aee 64d7b707a7bf4582b8102acb5337d806 +#: 6c7410225afa49df905d5ed0d17dc474 6d694012d38941158e6047cbd6765876 +#: 7262ad3fefd743ddaa764526ac356634 89ce342518cf4fafa64b40ff6b7b140c +#: 97d9ffa38655495f8a0013a370df5289 a8487089b1734e4eb813ff485ba54710 +#: a8684a1cd202441c86731484fca25ce8 b10068ceba924c31976b9242b92fd17f +#: cd5f983948d04e5ab2670e6390e0052f d4a5ef1e9ff945c3b9d4e2747459e5b5 +#: e05046ff91b84ecfb6c3e9f910e35c6b e5e153a681464042ac902226ca22a638 +#: fec4328be3b2480a916f5378f659f73a +msgid "Return type" +msgstr "" + +#: ../../functions.rst 0b28aca76f944ff18b6098afd579680a +#: 2b6133170db1444d825aff6739634e39 2b9736094e9f44c3a525fc5e7097a5e5 +#: 313496306a9743919ee5823a60c2d361 33ac39ac07414f85b3786ef4c9223ea3 +#: 454aad8620e64aa1a01848beb351cbec 5253deb9dac74a40b25e1b22197ba15d +#: 53a771157e234176a0d7ae969d9b227a 58185911039140a28707f02ad521b3ab +#: 5f35c2a270ac4eefa23fdc76dcee2484 624cf976820b41928dcd66343ff03da6 +#: 63b4df40d8c140b1be54bc04ef374174 6408e5ad41d2461a9f1df6fd1d10247a +#: 770763d9916145aa866e83c019d3897f 87450388ea2e4b3ba78057e08bf8d59c +#: 94045b20d2aa4b1db426dcb58eea1b10 94d79843696941f89e89964f275df7e5 +#: 9ff48f8556cd4c7799e4ae3e2becbe62 af5f6456a8f7493cae3a280402806308 +#: bd1a8c77553943e290203312960e2401 d3e45a9126484cd4a279e1ae5a54a761 +#: ddf4fe6c595f4133838d416f125ba6e5 debdc63467a94655af3b4c62c59d52cb +#: e1773e4b9bbc4fc1b6025290b2dd5ef4 e7e7c4c4fd324c66870c6a5a75a9fe69 +msgid "Returns" +msgstr "" + +#: ../../functions.rst:87 1b138ce45cb24fb29f004a1d4309d345 +msgid "" +"*(width, height)* of the paper format. For an unknown format *(-1, -1)* " +"is returned. Examples: *pymupdf.paper_size(\"A4\")* returns *(595, 842)* " +"and *pymupdf.paper_size(\"letter-l\")* delivers *(792, 612)*." +msgstr "" +"用紙フォーマットの *(幅、高さ)*。不明なフォーマットの場合、(-1、-1) " +"が返されます。例:*pymupdf.paper_size(\"A4\")* は (595、842) " +"を返し、*pymupdf.paper_size(\"letter-l\")* は *(792、612)* を返します。" + +#: ../../functions.rst:93 405aadeaf0834803b43a3691be34a350 +msgid "Convenience function to return a :ref:`Rect` for a known paper format." +msgstr "既知の紙のフォーマットに対応する :ref:`Rect` を返す便益関数です。" + +#: ../../functions.rst:95 4321cecbf19e44b5969aaaac7d91192f +msgid "any format name supported by :meth:`paper_size`." +msgstr ":meth:`paper_size` でサポートされている任意のフォーマット名。" + +#: ../../functions.rst:97 6253b4e47f72404b86be542a1f5ba214 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../functions.rst:98 96cb4ba9703240cda5af5d9b5ebd4273 +msgid "" +"*pymupdf.Rect(0, 0, width, height)* with *width, " +"height=pymupdf.paper_size(s)*." +msgstr "" + +#: ../../functions.rst:109 209aaeabd9fc46ac99b66ad31493b3d0 +msgid "" +"Sets destination of |PyMuPDF| messages to a file descriptor, a file, an " +"existing stream or `Python's logging system " +"`_." +msgstr "" + +#: ../../functions.rst:113 494817e916d2455a86023b21c58f3a11 +msgid "Usually one would only set one arg, or one or more `pylogging*` args." +msgstr "" + +#: ../../functions.rst:115 8e1da9c75a4d46cd98e9c4659b1a151c +msgid "" +"A text specification of destination; for details see description of " +"environmental variable `PYMUPDF_MESSAGE`." +msgstr "" + +#: ../../functions.rst:118 34909df9199646a4b311eeccca4936a0 +msgid "Write to file descriptor." +msgstr "" + +#: ../../functions.rst:120 9b448bc58b824bb08b07efa6932546dd +msgid "" +"Write to existing stream, which must have methods `.write(text)` and " +"`.flush()`." +msgstr "" + +#: ../../functions.rst:123 f053785536f6462bba2672cfee39462e +msgid "Write to a file." +msgstr "" + +#: ../../functions.rst:125 5a0b5cb436f2417c881f352a3f16f2d2 +msgid "Append to a file." +msgstr "" + +#: ../../functions.rst:127 ad3bbc6027e84e2bab9cb27d2b9ea314 +msgid "Write to Python's `logging` system." +msgstr "" + +#: ../../functions.rst:129 11b3c7e90be841f99339dbc2b36b3221 +msgid "Write to Python's `logging` system using specified Logger." +msgstr "" + +#: ../../functions.rst:131 71019e8509cd432ab8818a7b104e22de +msgid "Write to Python's `logging` system using specified level." +msgstr "" + +#: ../../functions.rst:133 c04c80653e184bbab2777ad6e8ce5511 +msgid "" +"Write to Python's `logging` system using specified logger name. Only used" +" if `pylogging_logger` is None. Default is `pymupdf`." +msgstr "" + +#: ../../functions.rst:137 10dcacf9980a48319c1ccddf8727743f +msgid "" +"If any `pylogging*` arg is not None, we write to `Python's logging system" +" `_." +msgstr "" + +#: ../../functions.rst:144 ../../functions.rst:156 ../../functions.rst:270 +#: 263672d47c2b4937aabab2ac1973bdd6 b0f3d381bde84d2e9d140f8eb6fc20c8 +#: fc756121bb534e6888ea52f5d5f7475b +msgid "*New in v1.17.4*" +msgstr "*バージョン1.17.4で新たに追加*" + +#: ../../functions.rst:146 28a2adf5d1a8459a866ce88539692849 +msgid "" +"Convenience function returning a PDF color triple (red, green, blue) for " +"a given sRGB color integer as it occurs in :meth:`Page.get_text` " +"dictionaries \"dict\" and \"rawdict\"." +msgstr "" +":meth:`Page.get_text` の辞書 \"dict\" および \"rawdict\" に存在する与えられた sRGB " +"カラー整数に対して、PDF カラーのトリプル(赤、緑、青)を返す便益関数です。" + +#: ../../functions.rst:148 ../../functions.rst:160 +#: 8382544bac0e4c58959a7c6356a5ea1d 938ab4f232fd47ad925a7c3d3141b81b +msgid "" +"an integer of format RRGGBB, where each color component is an integer in " +"range(255)." +msgstr "各カラーコンポーネントが範囲[0, 255]の整数である RRGGBB 形式の整数。" + +#: ../../functions.rst:150 10f206e7d5e243329ebcb32246f8b902 +msgid "" +"a tuple (red, green, blue) with float items in interval *0 <= item <= 1* " +"representing the same color. Example `sRGB_to_pdf(0xff0000) = (1, 0, 0)` " +"(red)." +msgstr "" +"*0 <= item <= 1* " +"の間の浮動小数点数アイテムを持つタプル(赤、緑、青)で、同じカラーを表します。例:`sRGB_to_pdf(0xff0000) = (1, 0, " +"0)` (赤)。" + +#: ../../functions.rst:158 3f1bd182a011433f95c261cb5072c83e +msgid "" +"Convenience function returning a color (red, green, blue) for a given " +"*sRGB* color integer." +msgstr "与えられた *sRGB* カラー整数に対して、カラー(赤、緑、青)を返す便益関数です。" + +#: ../../functions.rst:162 56008f93f9c4445e9550ff701c9a5067 +msgid "" +"a tuple (red, green, blue) with integer items in `range(256)` " +"representing the same color. Example `sRGB_to_pdf(0xff0000) = (255, 0, " +"0)` (red)." +msgstr "" +"整数アイテムが `range(256)` の範囲にある、同じ色を表すタプル(赤、緑、青)。例: `sRGB_to_pdf(0xff0000) = " +"(255, 0, 0)` (赤)" + +#: ../../functions.rst:168 ../../functions.rst:183 ../../functions.rst:198 +#: ../../functions.rst:211 6909db03961b42e384921d94b42d088b +#: 7fa4e5d78b60492e8580ca2bd95fc9e2 df918e2414764ab88723bfeb276d87b3 +#: e0895f463b6a4252a8d81f9f899483c2 +msgid "*New in v1.18.0*" +msgstr "*バージョン1.18.0で新たに追加*" + +#: ../../functions.rst:170 1c99f3f02cbe41beaaccdc4a304a0c18 +msgid "" +"Return the unicode number of a glyph name based on the **Adobe Glyph " +"List**." +msgstr "**Adobe Glyph List** に基づくグリフ名のUnicode番号を返す関数です。" + +#: ../../functions.rst:172 4d64118594024a50bb3b89613e2a782a +msgid "" +"the name of some glyph. The function is based on the `Adobe Glyph List " +"`_." +msgstr "" +"グリフの名前。この関数は `Adobe Glyph List `_ に基づいています。" + +#: ../../functions.rst:175 6f671fb8e68041e9babf072c102e5b6f +msgid "the unicode. Invalid *name* entries return `0xfffd (65533)`." +msgstr "Unicode番号。無効な *name* のエントリは `0xfffd (65533)` を返します。" + +#: ../../functions.rst:177 ../../functions.rst:205 ../../functions.rst:218 +#: 1d529882b0fd460cb9ec78ab559a0ece 318abb6987cc4a819a5fcfa675571abb +#: 3ff634aebfcc49c0a91be502177113cf +msgid "" +"A similar functionality is provided by package `fontTools " +"`_ in its *agl* sub-package." +msgstr "" +"同様の機能は、`fontTools `_ パッケージの *agl* " +"サブパッケージで提供されています。" + +#: ../../functions.rst:185 609da01536814ffc8706b0bbea3ab5f8 +msgid "" +"Return the glyph name of a unicode number, based on the **Adobe Glyph " +"List**." +msgstr "以下は、**Adobe Glyph List** に基づいた、Unicode番号に基づいたグリフ名を返す関数です。" + +#: ../../functions.rst:187 6a2134788061488f80d967a33cc10f18 +#: b79e55b908d046998fb2c3e15ba9aa33 +msgid "" +"the unicode given by e.g. `ord(\"ß\")`. The function is based on the " +"`Adobe Glyph List `_." +msgstr "" +"Unicode番号、例:`ord(\"ß\")` によって与えられます。この関数は `Adobe Glyph List " +"`_ に基づいています。" + +#: ../../functions.rst:190 075e96968e27471791191d74ea61cb09 +msgid "" +"the glyph name. E.g. `pymupdf.unicode_to_glyph_name(ord(\"Ä\"))` returns " +"`'Adieresis'`." +msgstr "グリフ名、例:`pymupdf.unicode_to_glyph_name(ord(\"Ä\"))` は `'Adieresis'` を返します。" + +#: ../../functions.rst:192 2ebedf06c52f4f49a1b608ac0a185a68 +msgid "" +"A similar functionality is provided by package `fontTools " +"`_: in its *agl* sub-package." +msgstr "" +"類似の機能は、`fontTools `_ パッケージの *agl* " +"サブパッケージで提供されています。" + +#: ../../functions.rst:200 901a264999f74b27b1b6f0c75e3d26ce +msgid "Return a list of glyph names defined in the **Adobe Glyph List**." +msgstr "**Adobe Glyph List** で定義されたグリフ名のリストを返します。" + +#: ../../functions.rst:203 8a8082e41b3b4d36b08a96250bdea3ff +msgid "list of strings." +msgstr "文字列のリスト。" + +#: ../../functions.rst:213 2f4ba69189114cc490cffb97e8a0ab9f +msgid "" +"Return a list of unicodes for there exists a glyph name in the **Adobe " +"Glyph List**." +msgstr "**Adobe Glyph List** にグリフ名が存在するUnicodeのリストを返します。" + +#: ../../functions.rst:216 74dae50c6b504e448879f45b492bc8ca +msgid "list of integers." +msgstr "整数のリスト。" + +#: ../../functions.rst:224 6bcba2202f05412b80d121404158c83d +msgid "*New in v1.21.0*" +msgstr "*新機能 v1.21.0*" + +#: ../../functions.rst:226 fb495a7e4a74418db018826854000539 +msgid "**Utility function for use with \"Story\" applications.**" +msgstr "**\"Story\" アプリケーションで使用するためのユーティリティ関数。**" + +#: ../../functions.rst:228 4809306f864c4a89ad70d5975115d193 +msgid "" +"Create CSS `@font-face` items for the given fontcode in pymupdf-fonts. " +"Creates a CSS font-family for all fonts starting with string " +"\"fontcode\"." +msgstr "" +"指定された \"fontcode\" に対して CSS `@font-face` アイテムを作成します。文字列 \"fontcode\" " +"で始まるすべてのフォント用に CSS font-family を作成します。" + +#: ../../functions.rst:230 e303e778dae54e6386372944b977834f +msgid "" +"The font naming convention in package pymupdf-fonts is \"fontcode\", " +"where the suffix \"sf\" is one of \"\" (empty), \"it\"/\"i\", " +"\"bo\"/\"b\" or \"bi\". These suffixes thus represent the regular, " +"italic, bold or bold-italic variants of that font." +msgstr "" +"パッケージ pymupdf-fonts でのフォントの命名規則は \"fontcode\" で、サフィックス \"sf\" " +"は(空白)、\"it\"、\"i\"、\"bo\"、\"b\"、\"bi\" " +"のいずれかです。したがって、これらのサフィックスは、フォントの通常の、イタリックの、太字の、太字イタリックのバリアントを表します。" + +#: ../../functions.rst:232 49d93b48dea8445a957440edfb91fc78 +msgid "For example, font code \"notos\" refers to fonts" +msgstr "例えば、フォントコード \"notos\" は以下のフォントに対応します。" + +#: ../../functions.rst:234 d540049bb18b402b8e2fce94206a4c9d +msgid "\"notos\" - \"Noto Sans Regular\"" +msgstr "" + +#: ../../functions.rst:235 bf50eb25ccaa4873a1e000b10f70f85d +msgid "\"notosit\" - \"Noto Sans Italic\"" +msgstr "" + +#: ../../functions.rst:236 c64e89ce985b4b40b347a45ae5034e62 +msgid "\"notosbo\" - \"Noto Sans Bold\"" +msgstr "" + +#: ../../functions.rst:237 243f4afbd3cc424aace4fb3d1c4aeb29 +msgid "\"notosbi\" - \"Noto Sans Bold Italic\"" +msgstr "" + +#: ../../functions.rst:239 a51da85020894121a2adaabef5348f53 +msgid "" +"The function creates (up to) four CSS `@font-face` definitions and " +"collectively assigns the `font-family` name \"notos\" to them (or the " +"\"name\" value if provided). Associated font buffers are placed / added " +"to the provided archive." +msgstr "" +"この関数は(最大で)4つの CSS `@font-face` 定義を作成し、それらに `font-family` 名 " +"\"notos\"(または指定された \"name\" " +"の値)を割り当てます。関連するフォントバッファは提供されたアーカイブに配置されます/追加されます。" + +#: ../../functions.rst:241 3132df29e9a44c06b7d46173b110b76a +msgid "" +"To use the font in the Python API for :ref:`Story`, execute " +"`.set_font(fontcode)` (or \"name\" if given). The correct font weight or " +"style will automatically be selected as required." +msgstr "" +":ref:`Story` の Python API でフォントを使用するには、`.set_font(fontcode)`(または指定した場合は " +"\"name\")を実行します。必要に応じて正しいフォントウェイトまたはスタイルが自動的に選択されます。" + +#: ../../functions.rst:243 21a24adeea0843709838dbd657e52cf5 +msgid "" +"For example to replace the \"sans-serif\" HTML standard (i.e. Helvetica) " +"with the above \"notos\", execute the following. Whenever \"sans-serif\" " +"is used (whether explicitly or implicitly), the Noto Sans fonts will be " +"selected." +msgstr "" +"たとえば、上記の \"notos\" で \"sans-serif\" HTML 標準(Helvetica)を置き換えるには、次のように実行します" +"。\"sans-serif\" が使用される場合(明示的にまたは暗黙的に)、Noto Sans フォントが選択されます。" + +#: ../../functions.rst:245 6f8364fda1b84e83815345fa382938cb +msgid "" +"`CSS = pymupdf.css_for_pymupdf_font(\"notos\", name=\"sans-serif\", " +"archive=...)`" +msgstr "" + +#: ../../functions.rst:247 261383cb1f684bcaa5dc2ca977a8e1a7 +msgid "Expects and returns the CSS source, with the new CSS definitions appended." +msgstr "CSS ソースが期待されており、新しい CSS 定義が追加されています。" + +#: ../../functions.rst:249 5a7949de431b4714bf7738ba97e9843e +msgid "" +"one of the font codes present in package `pymupdf-fonts " +"`_ (usually) representing the " +"regular version of the font family." +msgstr "" +"`pymupdf-fonts `_ " +"パッケージに存在するフォントファミリーの通常バージョンを表す、フォントコード。" + +#: ../../functions.rst:250 82d6c0696254483d8cb06494af221497 +msgid "" +"any already existing CSS source, or `None`. The function will append its " +"new definitions to this. This is the string that **must be used** as " +"`user_css` when creating the :ref:`Story`." +msgstr "" +"既存の CSS ソース、または `None`。新しい定義はこれに追加されます。これは :ref:`Story` を作成する際に " +"`user_css` として **使用する必要がある** 文字列です。" + +#: ../../functions.rst:251 2d57c3a9f53d4da38683c5c30aed39d0 +msgid "" +":ref:`Archive`, **mandatory**. All font binaries (i.e. up to four) found " +"for \"fontcode\" will be added to the archive. This is the archive that " +"**must be used** as `archive` when creating the :ref:`Story`." +msgstr "" +":ref:`Archive`、**必須** です。\"fontcode\" " +"に対して見つかるすべてのフォントバイナリ(最大で4つ)がアーカイブに追加されます。これは :ref:`Story` を作成する際に " +":ref:`Archive` として**使用する必要があります** 。" + +#: ../../functions.rst:252 a831e1b443044f949bfac61510ccb880 +msgid "" +"the name under which the \"fontcode\" fonts should be found. If omitted, " +"\"fontcode\" will be used." +msgstr "\"fontcode\" フォントが見つかる名前。省略した場合、\"fontcode\" が使用されます。" + +#: ../../functions.rst:255 4ecf6d28131348f3a658cd320f01200f +msgid "" +"Modified CSS, with appended `@font-face` statements for each font variant" +" of fontcode. Fontbuffers associated with \"fontcode\" will have been " +"added to 'archive'. The function will automatically find up to 4 font " +"variants. All pymupdf-fonts (that are no special purpose like math or " +"music, etc.) have regular, bold, italic and bold-italic variants. To see " +"currently available font codes check " +"`pymupdf.fitz_fontdescriptors.keys()`. This will show something like " +"`dict_keys(['cascadia', 'cascadiai', 'cascadiab', 'cascadiabi', 'figbo', " +"'figo', 'figbi', 'figit', 'fimbo', 'fimo', 'spacembo', 'spacembi', " +"'spacemit', 'spacemo', 'math', 'music', 'symbol1', 'symbol2', 'notosbo', " +"'notosbi', 'notosit', 'notos', 'ubuntu', 'ubuntubo', 'ubuntubi', " +"'ubuntuit', 'ubuntm', 'ubuntmbo', 'ubuntmbi', 'ubuntmit'])`." +msgstr "" +"変更された CSS。\"fontcode\" の各フォントバリアントに対して追加された `@font-face` " +"ステートメントを含みます。関連する pymupdf-fonts のフォントバッファが 'archive' に追加されます。関数は最大で 4 " +"つのフォントバリアントを自動的に見つけます。現在利用可能なフォントコードを確認するには、`pymupdf.fitz_fontdescriptors.keys()`" +" を使用します。これにより、`dict_keys(['cascadia', 'cascadiai', 'cascadiab', " +"'cascadiabi', 'figbo', 'figo', 'figbi', 'figit', 'fimbo', 'fimo', " +"'spacembo', 'spacembi', 'spacemit', 'spacemo', 'math', 'music', " +"'symbol1', 'symbol2', 'notosbo', 'notosbi', 'notosit', 'notos', 'ubuntu'," +" 'ubuntubo', 'ubuntubi', 'ubuntuit', 'ubuntm', 'ubuntmbo', 'ubuntmbi', " +"'ubuntmit'])` のようなものが表示されます。" + +#: ../../functions.rst:257 bd2b0caaa55b48a4a91a57acaef9b5db +msgid "" +"Here is a complete snippet for using the \"Noto Sans\" font instead of " +"\"Helvetica\"::" +msgstr "以下は \"Helvetica\" の代わりに \"Noto Sans\" フォントを使用する完全なスニペットです::" + +#: ../../functions.rst:272 6c7eb3d6af61417aa66cef53d50ec562 +#, fuzzy +msgid "" +"Convenience function to split a rectangle into sub-rectangles of equal " +"size. Returns a list of `rows` lists, each containing `cols` :ref:`Rect` " +"items. Each sub-rectangle can then be addressed by its row and column " +"index." +msgstr "" +"矩形をサブ矩形に分割するための関数です。*行* のリストを含むリストを返し、各リストには指定された *列数* の :ref:`Rect` " +"アイテムが含まれています。各サブ矩形はその行と列のインデックスでアクセスできます。" + +#: ../../functions.rst:274 55f942fd80da4d66bac2b35ee5a51276 +msgid "the rectangle to split." +msgstr "分割する矩形。" + +#: ../../functions.rst:275 555136c14a224f6ea35dce0f1ec9c79d +msgid "the desired number of columns." +msgstr "列の数。" + +#: ../../functions.rst:276 6569fd83357741b98b4916dc1ebc46ae +msgid "the desired number of rows." +msgstr "行の数。" + +#: ../../functions.rst:277 b5930e3aeb564e1c9a36af2644961112 +msgid "" +"a list of :ref:`Rect` objects of equal size, whose union equals *rect*. " +"Here is the layout of a 3x4 table created by `cell = " +"pymupdf.make_table(rect, cols=4, rows=3)`:" +msgstr "" +"等しいサイズの :ref:`Rect` オブジェクトのリストで、それらの合併は元の *rect* と同じです。たとえば、`cell = " +"pymupdf.make_table(rect, cols=4, rows=3)` によって作成された 3x4 " +"のテーブルのレイアウトは次のようになります:" + +#: ../../functions.rst:287 aeb0295f33ed4f3680e7eaae3631829e +msgid "New in version 1.16.2)*" +msgstr "*バージョン1.16.2で新たに導入されました。*" + +#: ../../functions.rst:289 dd962cf0f83d4e7bbe3373a541ff2cd6 +msgid "" +"Return a matrix which maps the line from p1 to p2 to the x-axis such that" +" p1 will become (0,0) and p2 a point with the same distance to (0,0)." +msgstr "p1からp2への直線をx軸にマッピングする行列を返します。その際、p1は(0,0)になり、p2は(0,0)から同じ距離に配置されるように変換されます。" + +#: ../../functions.rst:291 27847587bb6e480588c5a42b1044353e +msgid "starting point of the line." +msgstr "直線の始点。" + +#: ../../functions.rst:292 f4a34c3eb1de46778eed630ba3f8a554 +msgid "end point of the line." +msgstr "直線の終点。" + +#: ../../functions.rst:294 0a643e5f3a5544c9a7f7eb96a29a5e50 +msgid ":ref:`Matrix`" +msgstr "" + +#: ../../functions.rst:295 efc91beb38014b22a8c5dc17a55a5ad8 +msgid "" +"a matrix which combines a rotation and a translation:: >>> p1 = " +"pymupdf.Point(1, 1) >>> p2 = pymupdf.Point(4, 5) >>> abs(p2 - p1) " +"# distance of points 5.0 >>> m = pymupdf.planish_line(p1, p2) " +">>> p1 * m Point(0.0, 0.0) >>> p2 * m Point(5.0, " +"-5.960464477539063e-08) >>> # distance of the resulting points >>> " +"abs(p2 * m - p1 * m) 5.0 .. image:: images/img-planish.png " +":scale: 40" +msgstr "" + +#: ../../functions.rst:295 99ce4ad1b2e44ef9952ab645c2e2a29f +msgid "a matrix which combines a rotation and a translation::" +msgstr "回転と平行移動を組み合わせた行列::" + +#: ../../functions.rst:319 cd6d3f128f3d404cb028905325357f80 +msgid "" +"A dictionary of pre-defines paper formats. Used as basis for " +":meth:`paper_size`." +msgstr "あらかじめ定義された用紙フォーマットの辞書。:meth:`paper_size` の基盤として使用されます。" + +#: ../../functions.rst:325 779d1b9d87ec456386f8b682518e1b8d +msgid "New in v1.17.5" +msgstr "バージョン1.17.5で新たに導入されました" + +#: ../../functions.rst:327 2a02368d2b68461c8413177ff0766a85 +msgid "" +"A dictionary of usable fonts from repository `pymupdf-fonts " +"`_. Items are keyed by their " +"reserved fontname and provide information like this::" +msgstr "" +"`pymupdf-fonts `_ " +"リポジトリから使用可能なフォントの辞書。アイテムは予約されたフォント名でキー付けされ、以下のような情報を提供します::" + +#: ../../functions.rst:343 cb398218853f42038512c72c146c78e8 +msgid "If `pymupdf-fonts` is not installed, the dictionary is empty." +msgstr "`pymupdf-fonts` がインストールされていない場合、この辞書は空です。" + +#: ../../functions.rst:345 d3a4226063254a47a7110bef0f56256f +msgid "" +"The dictionary keys can be used to define a :ref:`Font` via e.g. `font = " +"pymupdf.Font(\"fimo\")` -- just like you can do it with the builtin fonts" +" \"Helvetica\" and friends." +msgstr "" +"辞書のキーは、例えば `font = pymupdf.Font(\"fimo\")` " +"のように使用してフォントを定義できます。これは、組み込みのフォント \"Helvetica\" やその他のフォントと同様に行うことができます。" + +#: ../../functions.rst:351 c7682a849a5048fe99f8c19c9aee7cb3 +msgid "" +"If in `os.environ` when |PyMuPDF| is imported, sets destination of " +"|PyMuPDF| messages. Otherwise messages are sent to `sys.stdout`." +msgstr "" + +#: ../../functions.rst:355 3218febc90704ce79797a636f25c89c1 +msgid "" +"If the value starts with `fd:`, the remaining text should be an integer " +"file descriptor to which messages are written." +msgstr "" + +#: ../../functions.rst:358 638ff04c1b724dbbabe6d8cde2b252fb +msgid "For example `PYMUPDF_MESSAGE=fd:2` will send messages to stderr." +msgstr "" + +#: ../../functions.rst:360 dcaaa640c313488f9ea010cb608902e7 +msgid "" +"If the value starts with `path:`, the remaining text is the path of a " +"file to which messages are written. If the file already exists, it is " +"truncated." +msgstr "" + +#: ../../functions.rst:364 859891baca6846a2a7b06125bba642c4 +msgid "" +"If the value starts with `path+:`, the remaining text is the path of file" +" to which messages are written. If the file already exists, we append " +"output." +msgstr "" + +#: ../../functions.rst:369 917bb5b5660c4baaacb483f666337304 +msgid "" +"If the value starts with `logging:`, messages are written to `Python's " +"logging system `_. The " +"remaining text can contain comma-separated name=value items:" +msgstr "" + +#: ../../functions.rst:373 5220ec2027f94decbb3d5b101d6ac481 +msgid "`level=` sets the logging level." +msgstr "" + +#: ../../functions.rst:374 a4c4076564974edc9a9514098c5d95ed +msgid "`name=` sets the logger name (default is `pymupdf`)." +msgstr "" + +#: ../../functions.rst:376 2a32a55410504c23b6cb4f12682cafa2 +msgid "Other items are ignored." +msgstr "" + +#: ../../functions.rst:378 f0420cb19b0b4ec2bb48cadb322a64ba +msgid "Other prefixes will cause an error." +msgstr "" + +#: ../../functions.rst:380 c3898cead22f497ba9d9c391364c4c8a +msgid "Also see `set_messages()`." +msgstr "" + +#: ../../functions.rst:387 f5c9e9f015d44ceda40361601fc41f38 +msgid "New in v1.19.6" +msgstr "バージョン1.19.6で新たに導入されました" + +#: ../../functions.rst:389 f020265eebef4b1fb070d23fc83beb86 +msgid "" +"Contains about 500 RGB colors in PDF format with the color name as key. " +"To see what is there, you can obviously look at " +"`pymupdf.pdfcolor.keys()`." +msgstr "" +"PDF形式の約500個のRGB色が、色の名前をキーとして含まれています。中身を確認するには、`pymupdf.pdfcolor.keys()` " +"を見ることができます。" + +#: ../../functions.rst:391 e139a8577f8340ffb015fab764a9655a +msgid "Examples:" +msgstr "例:" + +#: ../../functions.rst:393 40f5af3779d84dc282680e022cece0c1 +msgid "`pymupdf.pdfcolor[\"red\"] = (1.0, 0.0, 0.0)`" +msgstr "" + +#: ../../functions.rst:394 413ce7dd0e8543f5868732ac0dbf8ef5 +msgid "" +"`pymupdf.pdfcolor[\"skyblue\"] = (0.5294117647058824, 0.807843137254902, " +"0.9215686274509803)`" +msgstr "" + +#: ../../functions.rst:395 2842b0f25a2c490c94a9b5e64376f558 +msgid "" +"`pymupdf.pdfcolor[\"wheat\"] = (0.9607843137254902, 0.8705882352941177, " +"0.7019607843137254)`" +msgstr "" + +#: ../../functions.rst:401 34272632112745f9a3d80b95aace8419 +msgid "" +"Convenience function to return the current local timestamp in PDF " +"compatible format, e.g. *D:20170501121525-04'00'* for local datetime May " +"1, 2017, 12:15:25 in a timezone 4 hours westward of the UTC meridian." +msgstr "" +"PDF互換の形式で現在のローカルタイムスタンプを返す便利な関数です。例: " +"ローカルの日付と時刻が2017年5月1日12時15分25秒で、UTC子午線の西4時間のタイムゾーンにある場合、*D:20170501121525-04’00’*" +" となります。" + +#: ../../functions.rst:404 d3e70a97bb2d4d0cbd2aabba7e397c47 +msgid "current local PDF timestamp." +msgstr "現在のローカルPDFタイムスタンプ。" + +#: ../../functions.rst:410 99c490db0f4f4638842bcf149740cc60 +msgid "New in version 1.14.7" +msgstr "バージョン1.14.7で新規追加" + +#: ../../functions.rst:412 6857c18d6df5448c947c917d8129b136 +msgid "" +"Calculate the length of text on output with a given **builtin** font, " +":data:`fontsize` and encoding." +msgstr "指定された *組み込み* フォント、:data:`fontsize` 、およびエンコーディングで出力されるテキストの長さを計算します。" + +#: ../../functions.rst:414 0c724a86099b4016a27ca94ce9ff53fb +msgid "the text string." +msgstr "テキスト文字列。" + +#: ../../functions.rst:415 b4f673b68cc549d3a3a4ed4d11159d4f +msgid "" +"the fontname. Must be one of either the :ref:`Base-14-Fonts` or the CJK " +"fonts, identified by their \"reserved\" fontnames (see table in " +":meth:`Page.insert_font`)." +msgstr "" +"フォント名。:ref:`Base-14-Fonts` " +"またはCJKフォントのいずれかでなければなりません。これらは「予約済み」フォント名で識別されます(:meth:`Page.insert_font`" +" のテーブルを参照)。" + +#: ../../functions.rst:416 fd9ab85296864b159297cf2f40c07b63 +msgid "the :data:`fontsize`." +msgstr ":data:`fontsize`。" + +#: ../../functions.rst:417 659e6b24c6a54cd6be9215d2a7b7bec6 +msgid "" +"the encoding to use. Besides 0 = Latin, 1 = Greek and 2 = Cyrillic " +"(Russian) are available. Relevant for Base-14 fonts \"Helvetica\", " +"\"Courier\" and \"Times\" and their variants only. Make sure to use the " +"same value as in the corresponding text insertion." +msgstr "" +"使用するエンコーディング。0 = ラテン、1 = ギリシャ、2 = " +"キリル文字(ロシア語)が利用可能です。ベース14フォント「Helvetica」、「Courier」、「Times」とそのバリアントにのみ関連します。対応するテキスト挿入で使用する値と同じ値を使用してください。" + +#: ../../functions.rst:419 085fba5efa1b441c8a7a959307fadd09 +msgid "" +"the length in points the string will have (e.g. when used in " +":meth:`Page.insert_text`)." +msgstr "文字列が持つポイント単位の長さ(たとえば、:meth:`Page.insert_text` で使用する場合)。" + +#: ../../functions.rst:421 0071e0f0d968469dbcf4380c63ab4efe +msgid "" +"This function will only do the calculation -- it won't insert font nor " +"text." +msgstr "この関数は計算のみを行います - フォントまたはテキストを挿入しません。" + +#: ../../functions.rst:423 beca988a40a1457882759c4d1063a516 +msgid "" +"The :ref:`Font` class offers a similar method, :meth:`Font.text_length`, " +"which supports Base-14 fonts and any font with a character map (CMap, " +"Type 0 fonts)." +msgstr "" +":ref:`Font` クラスは、Base-14フォントおよび文字マップ(CMap、Type " +"0フォントをサポートする任意のフォントに対応した似たようなメソッド :meth:`Font.text_length` を提供しています。" + +#: ../../functions.rst:425 2621d40424214d07bbd4fab562324e61 +msgid "" +"If you use this function to determine the required rectangle width for " +"the (:ref:`Page` or :ref:`Shape`) *insert_textbox* methods, be aware that" +" they calculate on a **by-character level**. Because of rounding effects," +" this will mostly lead to a slightly larger number: " +"*sum([pymupdf.get_text_length(c) for c in text]) > " +"pymupdf.get_text_length(text)*. So either (1) do the same, or (2) use " +"something like *pymupdf.get_text_length(text + \"'\")* for your " +"calculation." +msgstr "" +"この関数を使用して(:ref:`Page` または :ref:`Shape`)*insert_textbox* " +"メソッドの必要な矩形の幅を決定する場合、**文字単位で** " +"計算されることに注意してください。丸め効果のため、これはほとんどの場合、やや大きな数になります:*sum([pymupdf.get_text_length(c)" +" for c in text]) > " +"pymupdf.get_text_length(text)*。したがって、(1)同じことを行うか、(2)計算に " +"*pymupdf.get_text_length(text + \"’\")* のようなものを使用してください。" + +#: ../../functions.rst:431 858e74323bb44d4aabd1ec6116e45b4e +msgid "" +"Make a PDF-compatible string: if the text contains code points *ord(c) > " +"255*, then it will be converted to UTF-16BE with BOM as a hexadecimal " +"character string enclosed in \"<>\" brackets like **. Otherwise," +" it will return the string enclosed in (round) brackets, replacing any " +"characters outside the ASCII range with some special code. Also, every " +"\"(\", \")\" or backslash is escaped with a backslash." +msgstr "" +"PDF互換の文字列を作成します。テキストに含まれる文字のコードポイントが *ord(c) > 255* の場合、それはUTF-" +"16BEに変換され、BOMが含まれた16進数の文字列で \"<>\" " +"ブラケットで囲まれます。それ以外の場合は、ASCII範囲外の文字を特別なコードで置き換えて、(丸い)カッコで囲まれた文字列が返されます。また、すべての" +" \"(\", \")\", またはバックスラッシュはバックスラッシュでエスケープされます。" + +#: ../../functions.rst:433 70176ef4d97a409dbe6452c90d7b85c1 +msgid "the object to convert" +msgstr "変換するオブジェクト" + +#: ../../functions.rst:436 d145e56ff72f4b868a418777ae2da0a0 +msgid "PDF-compatible string enclosed in either *()* or *<>*." +msgstr "*()* または *<>* で囲まれたPDF互換の文字列。" + +#: ../../functions.rst:442 7699f58d78164df49cc2d12fb7cd9b3c +msgid "New in v1.16.7" +msgstr "バージョン1.16.7で新規追加" + +#: ../../functions.rst:443 957b32a916a54e10a69ac468ba8ebf2f +msgid "" +"Changed in v1.19.5: also return natural image orientation extracted from " +"EXIF data if present." +msgstr "バージョン1.19.5で変更:EXIFデータから抽出した自然な画像の向きも返すように変更されました。" + +#: ../../functions.rst:444 af5e89f8d4524be697b2b949fe03e45a +msgid "" +"Changed in v1.22.5: always return `None` in error cases instead of an " +"empty dictionary." +msgstr "バージョン1.22.5で変更:エラーケースで空の辞書ではなく、常に `None` を返すように変更されました。" + +#: ../../functions.rst:446 2b1e37d83d2345f9bc9d910447b32df9 +msgid "" +"Show important properties of an image provided as a memory area. Its main" +" purpose is to avoid using other Python packages just to determine them." +msgstr "メモリ領域として提供される画像の重要なプロパティを表示します。主な目的は、これらのプロパティを決定するために他のPythonパッケージを使用しないようにすることです。" + +#: ../../functions.rst:448 bb83a10d199948a28baf855069679fff +msgid "" +"either an image in memory or an **opened** file. An image in memory may " +"be any of the formats `bytes`, `bytearray` or `io.BytesIO`." +msgstr "" +"メモリ内の画像または **開いた** ファイル。メモリ内の画像は、`bytes`、`bytearray`、または `io.BytesIO` " +"形式のいずれかです。" + +#: ../../functions.rst:451 b80c362428224f70893f7dbb18fef550 +msgid "" +"No exception is ever raised. In case of an error, `None` is returned. " +"Otherwise, there are the following items:: In [2]: " +"pymupdf.image_profile(open(\"nur-ruhig.jpg\", \"rb\").read()) Out[2]:" +" {'width': 439, 'height': 501, 'orientation': 0, # natural " +"orientation (from EXIF) 'transform': (1.0, 0.0, 0.0, 1.0, 0.0, 0.0), " +"# orientation matrix 'xres': 96, 'yres': 96, 'colorspace': 3," +" 'bpc': 8, 'ext': 'jpeg', 'cs-name': 'DeviceRGB'} There is the " +"following relation to **Exif** information encoded in `orientation`, and " +"correspondingly in the `transform` matrix-like (quoted from MuPDF " +"documentation, *ccw* = counter-clockwise): 0. Undefined 1. 0 " +"degree ccw rotation. (Exif = 1) 2. 90 degree ccw rotation. (Exif = 8)" +" 3. 180 degree ccw rotation. (Exif = 3) 4. 270 degree ccw rotation." +" (Exif = 6) 5. flip on X. (Exif = 2) 6. flip on X, then rotate ccw " +"by 90 degrees. (Exif = 5) 7. flip on X, then rotate ccw by 180 " +"degrees. (Exif = 4) 8. flip on X, then rotate ccw by 270 degrees. " +"(Exif = 7) .. note:: * For some \"exotic\" images (FAX encodings, " +"RAW formats and the like), this method will not work. You can however " +"still work with such images in PyMuPDF, e.g. by using " +":meth:`Document.extract_image` or create pixmaps via `Pixmap(doc, xref)`." +" These methods will automatically convert exotic images to the PNG format" +" before returning results. * You can also get the properties of images" +" embedded in a PDF, via their :data:`xref`. In this case make sure to " +"extract the raw stream: " +"`pymupdf.image_profile(doc.xref_stream_raw(xref))`. * Images as " +"returned by the image blocks of :meth:`Page.get_text` using \"dict\" or " +"\"rawdict\" options are also supported." +msgstr "" + +#: ../../functions.rst:452 e84943d4656c4a47972901421acab8c3 +msgid "" +"No exception is ever raised. In case of an error, `None` is returned. " +"Otherwise, there are the following items::" +msgstr "例外は発生しません。エラーの場合、`None` が返されます。それ以外の場合、以下のアイテムがあります::" + +#: ../../functions.rst:467 e1c02eedf39547b084e5b48dafffc726 +msgid "" +"There is the following relation to **Exif** information encoded in " +"`orientation`, and correspondingly in the `transform` matrix-like (quoted" +" from MuPDF documentation, *ccw* = counter-clockwise):" +msgstr "" +"以下は、**Exif** 情報にエンコードされた`orientation` と、対応する `transform` " +"マトリックスの関係です(MuPDFドキュメンテーションから引用、*ccw* = 反時計回り):" + +#: ../../functions.rst:469 df23c683fa524ccc9fccbcd2d69f503e +msgid "Undefined" +msgstr "未定義" + +#: ../../functions.rst:470 f8275cc11f6449f9b1b4178b941b44a7 +msgid "0 degree ccw rotation. (Exif = 1)" +msgstr "0度の反時計回りの回転(Exif = 1)" + +#: ../../functions.rst:471 a8b3b37b73d04b739d9a8a259e1dff82 +msgid "90 degree ccw rotation. (Exif = 8)" +msgstr "90度の反時計回りの回転(Exif = 8)" + +#: ../../functions.rst:472 062c3fd11ce642f1b8a0e042967c25d8 +msgid "180 degree ccw rotation. (Exif = 3)" +msgstr "180度の反時計回りの回転(Exif = 3)" + +#: ../../functions.rst:473 21c985c2793d46ff943d7bb01b5fde98 +msgid "270 degree ccw rotation. (Exif = 6)" +msgstr "270度の反時計回りの回転(Exif = 6)" + +#: ../../functions.rst:474 3ed66a5ba1bb42b892ec496c07ea5b91 +msgid "flip on X. (Exif = 2)" +msgstr "X軸で反転(Exif = 2)" + +#: ../../functions.rst:475 54290bb6ec3b4339b21edb09c7be1a9a +msgid "flip on X, then rotate ccw by 90 degrees. (Exif = 5)" +msgstr "X軸で反転し、さらに90度反時計回りに回転(Exif = 5)" + +#: ../../functions.rst:476 c893cd5f6eaa4095b8d48dcafbe803db +msgid "flip on X, then rotate ccw by 180 degrees. (Exif = 4)" +msgstr "X軸で反転し、さらに180度反時計回りに回転(Exif = 4)" + +#: ../../functions.rst:477 a4b2bb8a084748028583799fa437e2b0 +msgid "flip on X, then rotate ccw by 270 degrees. (Exif = 7)" +msgstr "X軸で反転し、さらに270度反時計回りに回転(Exif = 7)" + +#: ../../functions.rst:482 9642e070a8d94067bd7aafb6a79a0acf +msgid "" +"For some \"exotic\" images (FAX encodings, RAW formats and the like), " +"this method will not work. You can however still work with such images in" +" PyMuPDF, e.g. by using :meth:`Document.extract_image` or create pixmaps " +"via `Pixmap(doc, xref)`. These methods will automatically convert exotic " +"images to the PNG format before returning results." +msgstr "" +"一部の「エキゾチック」な画像(FAXエンコーディング、RAWフォーマットなど)では、この方法は機能しない場合があります。ただし、PyMuPDFでは引き続きこのような画像を使用できます。たとえば、:meth:`Document.extract_image`" +" を使用したり、`Pixmap(doc, xref)` " +"を介してピクマップを作成したりできます。これらのメソッドは、結果を返す前にエキゾチックな画像を自動的にPNG形式に変換します。" + +#: ../../functions.rst:483 0730816b36d9470fbfe5ffef4c9916a3 +msgid "" +"You can also get the properties of images embedded in a PDF, via their " +":data:`xref`. In this case make sure to extract the raw stream: " +"`pymupdf.image_profile(doc.xref_stream_raw(xref))`." +msgstr "" +"また、PDFに埋め込まれた画像のプロパティをxrefを介して取得することもできます。この場合は生のストリームを抽出してください: " +"`pymupdf.image_profile(doc.xref_stream_raw(xref))`。" + +#: ../../functions.rst:484 df9a9e1daad84638aab9b2dbf6914506 +msgid "" +"Images as returned by the image blocks of :meth:`Page.get_text` using " +"\"dict\" or \"rawdict\" options are also supported." +msgstr ":meth:`Page.get_text` の画像ブロックが「dict」または「rawdict」オプションを使用して返す画像もサポートされています。" + +#: ../../functions.rst:491 a1e2b993edc14a45a3b8fe89caaa8ef1 +msgid "" +"Return the header string required to make a valid document out of page " +"text outputs." +msgstr "ページのテキスト出力を有効なドキュメントに変換するために必要なヘッダー文字列を返します。" + +#: ../../functions.rst:493 ../../functions.rst:505 +#: 6a1fbfdf58b5440b96cb4b908da4904a aed0ed261f5f497998e8def7b43eb6d2 +msgid "type of document. Use the same as the output parameter of *get_text()*." +msgstr "ドキュメントの種類。*get_text()* メソッドのoutputパラメータと同じものを使用します。" + +#: ../../functions.rst:495 ca6c0195dc2f4fe28a7a155716d6fdc8 +msgid "optional arbitrary name to use in output types \"json\" and \"xml\"." +msgstr "出力タイプ \"json\" および \"xml\" で使用するオプションの任意の名前。" + +#: ../../functions.rst:503 4afb8f14f4734193aaa42c1517d46188 +msgid "" +"Return the trailer string required to make a valid document out of page " +"text outputs. See :meth:`Page.get_text` for an example." +msgstr ":meth:`Page.get_text` の例を参照して、ページテキストの出力から有効な文書を作成するために必要なトレーラー文字列を返します。" + +#: ../../functions.rst:513 8d482e8a42044ad3afc48e2b7fffb27c +msgid "" +"Delete an object containing XML-based metadata from the PDF. (Py-) MuPDF " +"does not support XML-based metadata. Use this if you want to make sure " +"that the conventional metadata dictionary will be used exclusively. Many " +"thirdparty PDF programs insert their own metadata in XML format and thus " +"may override what you store in the conventional dictionary. This method " +"deletes any such reference, and the corresponding PDF object will be " +"deleted during next garbage collection of the file." +msgstr "PDF内からXMLベースのメタデータを含むオブジェクトを削除します。PyMuPDFではXMLベースのメタデータはサポートされていません。従って、従来のメタデータ辞書が排他的に使用されることを確認したい場合に使用します。多くのサードパーティのPDFプログラムは、独自のXML形式でメタデータを挿入し、従来の辞書に保存されている内容を上書きする可能性があります。このメソッドはそのような参照を削除し、ファイルの次回のガベージコレクション時に対応するPDFオブジェクトが削除されます。" + +#: ../../functions.rst:519 a72aa87bc68e4f4bb9703be4f74f288a +msgid "" +"Return the XML-based metadata :data:`xref` of the PDF if present -- also " +"refer to :meth:`Document.del_xml_metadata`. You can use it to retrieve " +"the content via :meth:`Document.xref_stream` and then work with it using " +"some XML software." +msgstr "" +"PDFのXMLベースのメタデータの :data:`xref` " +"を返します。存在する場合は、:meth:`Document.del_xml_metadata` " +"にも言及してください。これを使用して、:meth:`Document.xref_stream` " +"を介してコンテンツを取得し、それをいくつかのXMLソフトウェアを使用して操作できます。" + +#: ../../functions.rst:522 67817bc5551a458ba2d78406a1906f21 +msgid ":data:`xref` of PDF file level XML metadata -- or 0 if none exists." +msgstr "PDFファイルレベルのXMLメタデータの :data:`xref`。存在しない場合は0。" + +#: ../../functions.rst:528 93cbd43b947f4be192e5cebeef12cb87 +msgid "Run a page through a device." +msgstr "ページをデバイスを通じて実行します。" + +#: ../../functions.rst:530 b72d243c438742829f57b99405fd7d9d +msgid "Device, obtained from one of the :ref:`Device` constructors." +msgstr ":ref:`Device`。デバイスのコンストラクタから取得します。" + +#: ../../functions.rst:533 c8020e6ac06b490d910a4453c2720322 +msgid "" +"Transformation to apply to the page. Set it to :ref:`Identity` if no " +"transformation is desired." +msgstr "ページに適用する変換。変換を行わない場合は :ref:`Identity` に設定します。" + +#: ../../functions.rst:540 9992ddbf8ffc4b54959b0e40bc882789 +msgid "New in v1.19.0" +msgstr "新機能 v1.19.0" + +#: ../../functions.rst:541 16801d5fbea9459f9c065e2e45ad6785 +msgid "" +"Changed in v1.22.0: optionally also return the OCG name applicable to the" +" boundary box." +msgstr "v1.22.0 で変更: 境界ボックスに適用される OCG 名もオプションで返すように変更されました。" + +#: ../../functions.rst:543 67438b98643c46ff80ff211265df90a4 +msgid "" +"a list of rectangles that envelop text, image or drawing objects. Each " +"item is a tuple `(type, (x0, y0, x1, y1))` where the second tuple " +"consists of rectangle coordinates, and *type* is one of the following " +"values. If `layers=True`, there is a third item containing the OCG name " +"or `None`: `(type, (x0, y0, x1, y1), None)`. * `\"fill-text\"` -- normal" +" text (painted without character borders) * `\"stroke-text\"` -- text " +"showing character borders only * `\"ignore-text\"` -- text that should " +"not be displayed (e.g. as used by OCR text layers) * `\"fill-path\"` -- " +"drawing with fill color (and no border) * `\"stroke-path\"` -- drawing " +"with border (and no fill color) * `\"fill-image\"` -- displays an image *" +" `\"fill-shade\"` -- display a shading The item sequence represents the " +"**sequence in which these commands are executed** to build the page's " +"appearance. Therefore, if an item's bbox intersects or contains that of a" +" previous item, then the previous item may be (partially) covered / " +"hidden. So this list can be used to detect such situations. An item's " +"index in this list equals the value of a `\"seqno\"` in dictionaries as " +"returned by :meth:`Page.get_drawings` and :meth:`Page.get_texttrace`." +msgstr "" + +#: ../../functions.rst:543 4c16c87165ac4c3683c707f5278e54b2 +msgid "" +"a list of rectangles that envelop text, image or drawing objects. Each " +"item is a tuple `(type, (x0, y0, x1, y1))` where the second tuple " +"consists of rectangle coordinates, and *type* is one of the following " +"values. If `layers=True`, there is a third item containing the OCG name " +"or `None`: `(type, (x0, y0, x1, y1), None)`." +msgstr "" +"テキスト、画像、または描画オブジェクトを囲む矩形のリスト。各アイテムはタプル `(type, (x0, y0, x1, y1))` " +"で、第2のタプルは矩形の座標を表し、*type* は以下の値のいずれかです。`layers=True` の場合、OCG 名または `None` " +"を含む第3のアイテムがあります: `(type, (x0, y0, x1, y1), None)`。" + +#: ../../functions.rst:545 1f3baaa90f60489491041912650d2b50 +msgid "`\"fill-text\"` -- normal text (painted without character borders)" +msgstr "`\"fill-text\"` – 通常のテキスト(文字の境界線なしで描画)" + +#: ../../functions.rst:546 8d03bd3914d941edad4fa681105b6cde +msgid "`\"stroke-text\"` -- text showing character borders only" +msgstr "`\"stroke-text\"` – 文字の境界線のみを表示するテキスト" + +#: ../../functions.rst:547 564648c64c1c4eb1acd881a4c41559a8 +msgid "" +"`\"ignore-text\"` -- text that should not be displayed (e.g. as used by " +"OCR text layers)" +msgstr "`\"ignore-text\"` – 表示されないべきテキスト(OCR テキストレイヤーなどで使用されます)" + +#: ../../functions.rst:548 2012f24724e24d2e8b3d682c6c0dd95f +msgid "`\"fill-path\"` -- drawing with fill color (and no border)" +msgstr "`\"fill-path\"` – 塗りつぶしカラーで描画(境界線なし)" + +#: ../../functions.rst:549 e50df5ab435b4f6b90c2a709b7c028ec +msgid "`\"stroke-path\"` -- drawing with border (and no fill color)" +msgstr "`\"stroke-path\"` – 境界線で描画(塗りつぶしカラーなし)" + +#: ../../functions.rst:550 dddccd8f9bf14c8392556865bb9ff046 +msgid "`\"fill-image\"` -- displays an image" +msgstr "`\"fill-image\"` – 画像を表示" + +#: ../../functions.rst:551 516bf44416f3416186b1467931874344 +msgid "`\"fill-shade\"` -- display a shading" +msgstr "`\"fill-shade\"` – シェーディングを表示" + +#: ../../functions.rst:553 c6d414fcee334ceda5a5d4f95b414a9a +msgid "" +"The item sequence represents the **sequence in which these commands are " +"executed** to build the page's appearance. Therefore, if an item's bbox " +"intersects or contains that of a previous item, then the previous item " +"may be (partially) covered / hidden." +msgstr "" +"**アイテムのシーケンスは、ページの外観を構築するためにこれらのコマンドが実行される** 順序を表します。したがって、アイテムの bbox " +"が前のアイテムの bbox と交差または包含されている場合、前のアイテムは(部分的に)カバー / 隠される可能性があります。" + +#: ../../functions.rst:556 6c70aeead4b54e3f8ffc2e256995b69c +msgid "" +"So this list can be used to detect such situations. An item's index in " +"this list equals the value of a `\"seqno\"` in dictionaries as returned " +"by :meth:`Page.get_drawings` and :meth:`Page.get_texttrace`." +msgstr "" +"したがって、このリストを使用してそのような状況を検出できます。このリスト内のアイテムのインデックスは、:meth:`Page.get_drawings`" +" および :meth:`Page.get_texttrace` によって返される辞書の `\"seqno\"` の値と等しいです。" + +#: ../../functions.rst:563 30b8d1185b0f421f93d06d5a205a7f67 +msgid "New in v1.18.16" +msgstr "v1.18.16 で新機能" + +#: ../../functions.rst:564 620e7dd916f64b479cfda147a4f22c8d +msgid "Changed in v1.19.0: added key \"seqno\"." +msgstr "v1.19.0 で変更: キー \"seqno\" を追加。" + +#: ../../functions.rst:565 8063a63deea0416da32bcbd66284d856 +msgid "" +"Changed in v1.19.1: stroke and fill colors now always are either RGB or " +"GRAY" +msgstr "v1.19.1 で変更: ストロークと塗りつぶしのカラーは常に RGB または GRAY です" + +#: ../../functions.rst:566 8a6ee644042c4f919504d2a366486ed5 +msgid "" +"Changed in v1.19.3: span and character bboxes are now also correct if " +"`dir != (1, 0)`." +msgstr "v1.19.3 で変更: `dir != (1, 0)` の場合、スパンと文字の bbox も正確になりました。" + +#: ../../functions.rst:567 97c4ae32b3a0440c8c1f1296fbe716b2 +msgid "Changed in v1.22.0: add new dictionary key \"layer\"." +msgstr "v1.22.0 で変更: 新しい辞書キー \"layer\" を追加。" + +#: ../../functions.rst:570 45cc7e74a2654f74bcfcabb65d9e7803 +msgid "" +"Return low-level text information of the page. The method is available " +"for **all** document types. The result is a list of Python dictionaries " +"with the following content::" +msgstr "" +"ページの低レベルなテキスト情報を返します。このメソッドは **すべて** のドキュメントタイプで利用可能です。結果は、以下の内容を持つ " +"Python 辞書のリストです。" + +#: ../../functions.rst:606 759d41b8e915425ba2f090d80b0065ab +msgid "Details:" +msgstr "" + +#: ../../functions.rst:608 c027ebb8dfaf4fb9b454d85a0898a0bd +msgid "" +"Information above tagged with \"(1)\" has the same meaning and value as " +"explained in :ref:`TextPage`." +msgstr "「(1)」でタグ付けされた情報は、:ref:`TextPage` で説明された内容と同じ意味と値を持っています。" + +#: ../../functions.rst:610 71a2f731acc249aa8278de7e3edbb5a5 +#, fuzzy +msgid "" +"Please note that the font ``flags`` value will never contain a " +"*superscript* flag bit: the detection of superscripts is done within " +"MuPDF :ref:`TextPage` code -- it is not a property of any font." +msgstr "" +"フォント `flags` の値には *superscript* フラグビットが含まれないことに注意してください。上付き文字の検出はMuPDF " +":ref:`TextPage` 内で行われます。これは任意のフォントのプロパティではありません。" + +#: ../../functions.rst:611 71e973788c8e4fd29354221499b566c1 +msgid "" +"Also note, that the text *color* is encoded as the usual tuple of floats " +"0 <= f <= 1 -- not in sRGB format. Depending on `span[\"type\"]`, " +"interpret this as fill color or stroke color." +msgstr "" +"また、テキストの *color* は通常の浮動小数点数のタプル(0 <= f <= " +"1)でエンコードされており、sRGB形式ではありません。`span[\"type\"]` " +"に応じて、これを塗りつぶし色またはストローク色として解釈してください。" + +#: ../../functions.rst:613 93a130aa74244d8ea4f44649257f0b0f +msgid "There are 3 text span types:" +msgstr "テキストスパンには3つのタイプがあります:" + +#: ../../functions.rst:615 04bb9e4a8efd4f36b449a896e093e997 +msgid "" +"0: Filled text -- equivalent to PDF text rendering mode 0 (`0 Tr`, the " +"default in PDF), only each character's \"inside\" is shown." +msgstr "0:塗りつぶしテキスト - PDFテキストレンダリングモード0(`0 Tr`、PDFのデフォルト)と同等で、各文字の「内部」のみが表示されます。" + +#: ../../functions.rst:616 66862797440b4cc89f0ab0b03386f731 +msgid "" +"1: Stroked text -- equivalent to `1 Tr`, only the character borders are " +"shown." +msgstr "1:ストロークテキスト - `1 Tr` に相当し、文字の境界のみが表示されます。" + +#: ../../functions.rst:617 2acf0e18c96e49f880293e3357fd16f6 +msgid "3: Ignored text -- equivalent to `3 Tr` (hidden text)." +msgstr "3:無視されたテキスト - `3 Tr` に相当し(非表示テキスト)。" + +#: ../../functions.rst:619 9de9513b92b54ed1bbabbb458b2a9fa0 +#, python-format +msgid "" +"Line width in this context is important only for processing " +"`span[\"type\"] != 0`: it determines the thickness of the character's " +"border line. This value may not be provided at all with the text data. In" +" this case, a value of 5% of the :data:`fontsize` (`span[\"size\"] * " +"0,05`) is generated. Often, an \"artificial\" bold text in PDF is created" +" by `2 Tr`. There is no equivalent span type for this case. Instead, " +"respective text is represented by two consecutive spans -- which are " +"identical in every aspect, except for their types, which are 0, resp 1. " +"It is your responsibility to handle this type of situation - in " +":meth:`Page.get_text`, MuPDF is doing this for you." +msgstr "" +"この文脈では、線の幅は `span[\"type\"] != 0` " +"を処理する際にのみ重要であり、文字の境界線の厚さを決定します。この値はテキストデータと一緒に提供されないこともあります。この場合、:data:`fontsize`" +" の5%(`span[\"size\"] * 0.05`)の値が生成されます。PDF内の「人工」の太字テキストは、通常、`2 Tr` " +"によって作成されます。この場合、このケースの等価なスパンタイプは存在しません。代わりに、対応するテキストは2つの連続したスパンによって表されます。これらのスパンはすべての側面が同一であり、タイプ以外は異なります(0、1)。このタイプの状況を処理する責任はあなたにあります。:meth:`Page.get_text`" +" では、MuPDFがこれを代わりに行います。" + +#: ../../functions.rst:620 6487d82fbf414538ae086ba2cf0e5fab +msgid "" +"For data compactness, the character's unicode is provided here. Use " +"built-in function `chr()` for the character itself." +msgstr "データのコンパクトさのために、文字のUnicodeがここで提供されます。文字自体には `chr()` という組み込み関数を使用します。" + +#: ../../functions.rst:621 a660bf6a775c44649783a559eaf01274 +msgid "" +"The alpha / opacity value of the span's text, `0 <= opacity <= 1`, 0 is " +"invisible text, 1 (100%) is intransparent. Depending on `span[\"type\"]`," +" interpret this value as *fill* opacity or, resp. *stroke* opacity." +msgstr "" +"スパンのテキストのアルファ/不透明度値、`0 <= opacity <= " +"1`、0は見えないテキスト、1(100%)は不透明です。`span[\"type\"]` に応じて、この値を *fill* の不透明度または " +"*stroke* の不透明度として解釈してください。" + +#: ../../functions.rst:622 e6e4063809024f138da4aeb44e24eb92 +msgid "" +"*(Changed in v1.19.0)* This value is equal or close to `char[\"bbox\"]` " +"of \"rawdict\". In particular, the bbox **height** value is always " +"computed as if **\"small glyph heights\"** had been requested." +msgstr "" +"*(v1.19.0で変更)* この値は「rawdict」の `char[\"bbox\"]` と等しいか、近い値です。特に、bboxの " +"**高さ** の値は常に **「小さなグリフの高さ」** が要求されたかのように計算されます。" + +#: ../../functions.rst:623 4e62b747b8f9441b9279357f81f7e81b +msgid "*(New in v1.19.0)* This is the union of all character bboxes." +msgstr "*(v1.19.0で新規)* これはすべての文字bboxの合併です。" + +#: ../../functions.rst:624 03fcc8f1fed344049ab836df258e5e2e +msgid "" +"*(New in v1.19.0)* Enumerates the commands that build up the page's " +"appearance. Can be used to find out whether text is effectively hidden by" +" objects, which are painted \"later\", or *over* some object. So if there" +" is a drawing or image with a higher sequence number, whose bbox overlaps" +" (parts of) this text span, one may assume that such an object hides the " +"resp. text. Different text spans have identical sequence numbers if they " +"were created in one go." +msgstr "" +"*(v1.19.0で新規)* " +"ページの外観を構築するコマンドを列挙します。テキストが実際には後で「描画」されるオブジェクトによって隠れるか、またはいくつかのオブジェクトの上にかかっているかを判断するのに使用できます。したがって、bboxがこのテキストスパンのbboxと交差または含まれている場合、以前のアイテムが(部分的に)カバー/非表示にされる可能性があります。" + +#: ../../functions.rst:625 a332ef5bf29948dab52498a113c1055c +msgid "" +"*(New in v1.22.0)* The name of the Optional Content Group (OCG) if " +"applicable or `None`." +msgstr "*(v1.22.0で新規)* 該当する場合、Optional Content Group(OCG)の名前、または `None`" + +#: ../../functions.rst:627 7d10accada8e4512a251cde3f4f3cb77 +msgid "" +"Here is a list of similarities and differences of `page.get_texttrace()` " +"compared to `page.get_text(\"rawdict\")`:" +msgstr "" +"以下は、`page.get_texttrace()` と `page.get_text(\"rawdict\")` " +"を比較した類似点と相違点のリストです:" + +#: ../../functions.rst:629 14d43abeade64ae6a7e7a66f55a6b4e8 +msgid "" +"The method is up to **twice as fast,** compared to \"rawdict\" " +"extraction. Depends on the amount of text." +msgstr "メソッドは、テキストの量に依存しますが、\"rawdict\" の抽出と比較して最大 **2倍速い** です。" + +#: ../../functions.rst:630 ddaa111279c14f54bf12d70419804137 +msgid "" +"The returned data is very **much smaller in size** -- although it " +"provides more information." +msgstr "返されるデータは **非常に小さく** 、より多くの情報を提供します。" + +#: ../../functions.rst:631 b5fcaaa904c848cb99d1cfd72df19bad +msgid "" +"Additional types of text **invisibility can be detected**: opacity = 0 or" +" type > 1 or overlapping bbox of an object with a higher sequence number." +msgstr "" +"追加のテキストの **不可視性のタイプを検出できます** :不透明度 = 0またはタイプ > " +"1またはシーケンス番号の高いオブジェクトとの境界ボックスが重なる。" + +#: ../../functions.rst:632 d3ffddc4c8c74b0c87f348dfe1c80ad4 +msgid "" +"If MuPDF returns unicode 0xFFFD (65533) for unrecognized characters, you " +"may still be able to deduct desired information from the glyph id." +msgstr "" +"MuPDFが認識できない文字に対してUnicode " +"0xFFFD(65533)を返す場合、グリフIDから必要な情報を導き出すことができるかもしれません。" + +#: ../../functions.rst:633 dd5d04de31034398aa834b1cdb6e56f6 +msgid "" +"The `span[\"chars\"]` **contains no spaces**, **except** the document " +"creator has explicitly coded them. They **will never be generated** like " +"it happens in :meth:`Page.get_text` methods. To provide some help for " +"doing your own computations here, the width of a space character is " +"given. This value is derived from the font where possible. Otherwise the " +"value of a fallback font is taken." +msgstr "" +"`span[\"chars\"]` には **スペースは含まれません** 。ただし、ドキュメントの作成者が明示的にコード化しない限り、それらは " +":meth:`Page.get_text` メソッドで発生するように " +"**生成されません**。自分自身の計算を行うのを助けるために、スペース文字の幅が提供されます。この値はフォントから派生しています。それ以外の場合はフォールバックフォントの値が取られます。" + +#: ../../functions.rst:634 9652b19463904ee38c62ccc2b2239dff +msgid "" +"There is no effort to organize text like it happens for a :ref:`TextPage`" +" (the hierarchy of blocks, lines, spans, and characters). Characters are " +"simply extracted in sequence, one by one, and put in a span. Whenever any" +" of the span's characteristics changes, a new span is started. So you may" +" find characters with different `origin.y` values in the same span (which" +" means they would appear in different lines). You cannot assume, that " +"span characters are sorted in any particular order -- you must make sense" +" of the info yourself, taking `span[\"dir\"]`, `span[\"wmode\"]`, etc. " +"into account." +msgstr "" +":ref:`TextPage` " +"のようにテキストを整理する取り組みはありません(ブロック、行、スパン、および文字の階層構造)。文字は単純に順番に抽出され、スパンに配置されます。スパンの特性が変更されるたびに、新しいスパンが開始されます。したがって、同じスパン内で異なる" +" `origin.y` " +"値を持つ文字を見つけることができます(これは異なる行に表示されることを意味します)。スパンの文字が特定の順序でソートされているとは仮定できません。情報を理解し、`span[\"dir\"]`、`span[\"wmode\"]`" +" などを考慮に入れる必要があります。" + +#: ../../functions.rst:652 e762a117d84b42ecb7cf21d17049df5f +msgid "Ligatures are represented like this:" +msgstr "リガチャは次のように表されます:" + +#: ../../functions.rst:636 329c84b204634c1294fb77b93a056ef4 +msgid "" +"MuPDF handles the following ligatures: \"fi\", \"ff\", \"fl\", \"ft\", " +"\"st\", \"ffi\", and \"ffl\" (only the first 3 are mostly ever used). If " +"the page contains e.g. ligature \"fi\", you will find the following two " +"character items subsequent to each other::" +msgstr "" +"MuPDFは次のリガチャを処理します。 \"fi\"、 \"ff\"、 \"fl\"、 \"ft\"、 \"st\"、 \"ffi\"、および " +"\"ffl\"(ほとんどは最初の3つが使用されます)。したがって、ページに \"fi\" " +"のようなリガチャが含まれている場合、次の2つの文字アイテムが連続して表示されます。" + +#: ../../functions.rst:641 da6322372fb64311b85eea0cc2f68ccd +msgid "" +"This means that the bbox of the first ligature character is the area " +"containing the complete, compound glyph. Subsequent ligature components " +"are recognizable by their glyph value -1 and a bbox of width zero." +msgstr "これにより、最初の合字文字のbboxは、完全な合成グリフを含む領域です。後続の合字コンポーネントは、そのグリフ値が-1で幅がゼロであることで識別できます。" + +#: ../../functions.rst:642 2284a09cdf584b6aa6f22b5aae18d4fe +msgid "" +"You may want to replace those 2 or 3 char tuples by one, that represents " +"the ligature itself. Use the following mapping of ligatures to unicodes:" +msgstr "これらの2つまたは3つの文字のタプルを、合字自体を表す1つに置き換えたい場合があるかもしれません。次のような合字をUnicodeにマッピングします。" + +#: ../../functions.rst:644 0a799c550746408c85fef2c7576bfede +msgid "`\"ff\" -> 0xFB00`" +msgstr "" + +#: ../../functions.rst:645 9898a6f56cfe4eb4899294c8b6b06002 +msgid "`\"fi\" -> 0xFB01`" +msgstr "" + +#: ../../functions.rst:646 5e3800258f5348718ebba22eb1059a6f +msgid "`\"fl\" -> 0xFB02`" +msgstr "" + +#: ../../functions.rst:647 936b3a65e7c742f8bfe6b86441ea1a98 +msgid "`\"ffi\" -> 0xFB03`" +msgstr "" + +#: ../../functions.rst:648 a4aa6a1539d043e8abfc0b8638794818 +msgid "`\"ffl\" -> 0xFB04`" +msgstr "" + +#: ../../functions.rst:649 984fff2c59774ed1be88a1741f6a305f +msgid "`\"ft\" -> 0xFB05`" +msgstr "" + +#: ../../functions.rst:650 a20f60a691f6403e83506643220c07c0 +msgid "`\"st\" -> 0xFB06`" +msgstr "" + +#: ../../functions.rst:652 2ed2cd30247844df8bbc69833a24683a +msgid "" +"So you may want to replace the two example tuples above by the following " +"single one: `(0xFB01, glyph, (x, y), (x0, y0, x1, y1))` (there is usually" +" no need to lookup the correct glyph id for 0xFB01 in the resp. font, but" +" you may execute `font.has_glyph(0xFB01)` and use its return value)." +msgstr "" +"したがって、上記の2つの例のタプルを次の単一のタプルで置き換えたい場合があります:`(0xFB01, glyph, (x, y), (x0, " +"y0, x1, " +"y1))`(通常、0xFB01の正しいグリフIDをフォント内で調べる必要はありませんが、`font.has_glyph(0xFB01)` " +"を実行し、その戻り値を使用することができます)。" + +#: ../../functions.rst:654 ce7f0eaeb90046aab353b46f85433ed3 +#, fuzzy +msgid "" +"**Changed in v1.19.3:** Similar to other text extraction methods, the " +"character and span bboxes envelop the character quads. To recover the " +"quads, follow the same methods :meth:`recover_quad`, " +":meth:`recover_char_quad` or :meth:`recover_span_quad` as explained in " +":ref:`textpagedict`. Use either `None` or `span[\"dir\"]` for the writing" +" direction." +msgstr "" +"**v1.19.3で変更:** " +"他のテキスト抽出方法と同様に、文字とスパンのbboxは文字のクアッドを包含します。クアッドを回復するには、:ref:`textpagedict` " +"で説明されている :meth:`recover_quad`、:meth:`recover_char_quad`、または " +":meth:`recover_span_quad` の同じ方法を使用します。書き込み方向にはNoneまたはspan `span[\"dir\"]`" +" を使用してください。" + +#: ../../functions.rst:656 de65772c37bd47cf876f0743e9332fe9 +msgid "" +"**Changed in v1.21.1:** If applicable, the name of the OCG is shown in " +"`\"layer\"`." +msgstr "**v1.21.1** で変更:該当する場合、OCGの名前が `\"layer\"` に表示されます。" + +#: ../../functions.rst:662 10427f25463d47fdb17d58e686fdfc9e +msgid "" +"Ensures that the page's so-called graphics state is balanced and new " +"content can be inserted correctly." +msgstr "" + +#: ../../functions.rst:664 8259e8d0721848219547bfd39f7867b9 +msgid "" +"In versions 1.24.1+ of PyMuPDF the method was improved and is being " +"executed automatically as required, so you should no longer need to " +"concern yourself with it." +msgstr "" + +#: ../../functions.rst:666 a829f7373a644aff8f6037428bea9712 +msgid "We discourage using :meth:`Page.clean_contents` to achieve this." +msgstr "" + +#: ../../functions.rst:672 db794a7ecc3a415baa480e8c9f7a6f1c +msgid "" +"Indicate whether the page's so-called graphic state is balanced. If " +"`False`, :meth:`Page.wrap_contents` should be executed if new content is " +"inserted (only relevant in `overlay=True` mode). In newer versions " +"(1.24.1+), this check and corresponding adjustments are automatically " +"executed -- you therefore should not be concerned about this anymore." +msgstr "" + +#: ../../functions.rst:680 d203c9e2bbe24468bd5568a1db6ea2a7 +msgid "" +"Deprecated wrapper for :meth:`TextPage.extractBLOCKS`. Use " +":meth:`Page.get_text` with the \"blocks\" option instead." +msgstr "" +":meth:`TextPage.extractBLOCKS` の非推奨のラッパーです。代わりにオプション \"blocks\" を使用して " +":meth:`Page.get_text` を使用してください。" + +#: ../../functions.rst:688 99ad32943641482ab84b11366ccbbe4f +msgid "" +"Deprecated wrapper for :meth:`TextPage.extractWORDS`. Use " +":meth:`Page.get_text` with the \"words\" option instead." +msgstr "" + +#: ../../functions.rst:696 a2853d3cdede4d2db456cfb83b815124 +msgid "Run a page through a list device and return its display list." +msgstr "ページをリストデバイスを介して実行し、そのディスプレイリストを返します。" + +#: ../../functions.rst:698 21e4b6cf9da846258448b5e17cbf3282 +msgid ":ref:`DisplayList`" +msgstr "" + +#: ../../functions.rst:699 ffb9b48f375440e290527ab49901acf1 +msgid "the display list of the page." +msgstr "ページのディスプレイリスト。" + +#: ../../functions.rst:705 5c87e4c0d7cf4cee8bd5c73c0185175c +#, fuzzy +msgid "" +"PDF only: Retrieve a list of :data:`xref` of :data:`contents` objects of " +"a page. May be empty or contain multiple integers. If the page is cleaned" +" (:meth:`Page.clean_contents`), it will be no more than one entry. The " +"\"source\" of each `/Contents` object can be individually read by " +":meth:`Document.xref_stream` using an item of this list. Method " +":meth:`Page.read_contents` in contrast walks through this list and " +"concatenates the corresponding sources into one `bytes` object." +msgstr "" +"PDFのみ:ページの :data:`contents` オブジェクトの :data:`xref` " +"のリストを取得します。空であるか、複数の整数を含むことがあります。ページがクリーンアップされた場合(:meth:`Page.clean_contents`)、最大で1つのエントリになります。各/Contentsオブジェクトの" +" \"source\" は、このリストのアイテムを使用して :meth:`Document.xref_stream` " +"で個別に読み取ることができます。一方、:meth:`Page.read_contents` メソッドは、このリストを走査し、対応するソースを1つの" +" `bytes` オブジェクトに連結します。" + +#: ../../functions.rst:713 90e498eeac6e4beeafac3412b4c240b6 +msgid "" +"PDF only: Let the page's `/Contents` key point to this xref. Any " +"previously used contents objects will be ignored and can be removed via " +"garbage collection." +msgstr "" +"PDFのみ:ページの `/Contents` " +"キーをこのxrefに設定します。以前に使用されていたコンテンツオブジェクトは無視され、ガベージコレクションを使用して削除できます。" + +#: ../../functions.rst:719 f9513e84e2ca4321bff8c46802105ac6 +msgid "Changed in v1.17.6" +msgstr "v1.17.6で変更" + +#: ../../functions.rst:721 22bc66a703e54067b1e2f2c5f119ba1d +msgid "" +"PDF only: Clean and concatenate all :data:`contents` objects associated " +"with this page. \"Cleaning\" includes syntactical corrections, " +"standardizations and \"pretty printing\" of the contents stream. " +"Discrepancies between :data:`contents` and :data:`resources` objects will" +" also be corrected if sanitize is true. See :meth:`Page.get_contents` for" +" more details." +msgstr "" +"PDFのみ:このページに関連付けられたすべての :data:`contents` ツオブジェクトをクリーンアップして連結します。 " +"\"クリーニング\"には、構文の修正、標準化、およびコンテンツストリームの \"きれいな印刷\"が含まれます。 " +"sanitizeがtrueの場合、:data:`contents` と :data:`resources` " +"オブジェクト間の不一致も修正されます。詳細については、:meth:`Page.get_contents` を参照してください。" + +#: ../../functions.rst:723 a3f4d1a068824a428423ffa2da562127 +msgid "" +"Changed in version 1.16.0 Annotations are no longer implicitly cleaned by" +" this method. Use :meth:`Annot.clean_contents` separately." +msgstr "" +"バージョン1.16.0以降、注釈はこのメソッドによって暗黙的にクリーンアップされなくなりました。 " +":meth:`Annot.clean_contents` を別途使用してください。" + +#: ../../functions.rst:725 15c1db8404ca46dbaf4c906166750f5b +msgid "" +"*(new in v1.17.6)* if true, synchronization between resources and their " +"actual use in the contents object is snychronized. For example, if a font" +" is not actually used for any text of the page, then it will be deleted " +"from the `/Resources/Font` object." +msgstr "" +"*(v1.17.6で新たに)* " +"trueの場合、リソースとコンテンツオブジェクト間の同期が行われます。たとえば、ページのテキストでフォントが実際に使用されていない場合、それは " +"`/Resources/Font` オブジェクトから削除されます。" + +#: ../../functions.rst:727 d98faf235eb84369adf87e543eaba7f2 +msgid "" +"This is a complex function which may generate large amounts of new data " +"and render old data unused. It is **not recommended** using it together " +"with the **incremental save** option. Also note that the resulting " +"singleton new */Contents* object is **uncompressed**. So you should save " +"to a **new file** using options *\"deflate=True, garbage=3\"*." +msgstr "" +"これは大量の新しいデータを生成し、古いデータを使用しないようにする複雑な機能です。**増分保存** オプションと一緒に使用することは " +"**お勧めできません** 。また、結果のシングルトンの新しい */Contents* オブジェクトは **非圧縮です**。したがって、オプション " +"*\"deflate=True、garbage=3\"* を使用して **新しいファイル** に保存する必要があります。" + +#: ../../functions.rst:729 f02d605a87d143948e528d28b56a3a2d +msgid "" +"Do not any longer use this method to ensure correct insertions on PDF " +"pages. Since PyMuPDF version 1.24.2 this is taken care of automatically." +msgstr "" + +#: ../../functions.rst:735 2196c7e1b31b40ddab3b297b291177f8 +msgid "" +"*New in version 1.17.0.* Return the concatenation of all :data:`contents`" +" objects associated with the page -- without cleaning or otherwise " +"modifying them. Use this method whenever you need to parse this source in" +" its entirety without having to bother how many separate contents objects" +" exist." +msgstr "" +"*v1.17.0で新たに追加。* ページに関連付けられたすべての :data:`contents` " +"オブジェクトの連結を返します。クリーニングや変更などを行わずに、このソース全体を解析する必要がある場合にこのメソッドを使用します。" + +#: ../../functions.rst:744 9a37de9e2f3c4f5f8730e0926cb5527e +msgid "" +"Clean the :data:`contents` streams associated with the annotation. This " +"is the same type of action which :meth:`Page.clean_contents` performs -- " +"just restricted to this annotation." +msgstr "" +"アノテーションに関連付けられた :data:`contents` " +"ストリームをクリーンアップします。これは、:meth:`Page.clean_contents` " +"が実行するのと同じ種類のアクションですが、この注釈に制限されています。" + +#: ../../functions.rst:751 c7c5b8d11bab42d1ab6490984348b460 +msgid "" +"Return a list of character glyphs and their widths for a font that is " +"present in the document. A font must be specified by its PDF cross " +"reference number :data:`xref`. This function is called automatically from" +" :meth:`Page.insert_text` and :meth:`Page.insert_textbox`. So you should " +"rarely need to do this yourself." +msgstr "" +"ドキュメント内に存在するフォントに対して、文字のグリフと幅のリストを返します。フォントはPDFのクロスリファレンス番号 :data:`xref` " +"で指定する必要があります。この関数は、:meth:`Page.insert_text` および " +":meth:`Page.insert_textbox` から自動的に呼び出されます。したがって、自分で行う必要があることはほとんどありません。" + +#: ../../functions.rst:753 0810b08678db4de2aa2a906546639b32 +msgid "" +"cross reference number of a font embedded in the PDF. To find a font " +":data:`xref`, use e.g. *doc.get_page_fonts(pno)* of page number *pno* and" +" take the first entry of one of the returned list entries." +msgstr "" +"ドキュメントに埋め込まれたPDFのクロスリファレンス番号。フォントの :data:`xref` を見つけるには、例えば、ページ番号 *pno* の" +" *doc.get_page_fonts(pno)* を使用し、返されたリストエントリの最初を取得します。" + +#: ../../functions.rst:755 630cf059f27845f38b21edb71c79835d +msgid "" +"limits the number of returned entries. The default of 256 is enforced for" +" all fonts that only support 1-byte characters, so-called \"simple " +"fonts\" (checked by this method). All :ref:`Base-14-Fonts` are simple " +"fonts." +msgstr "" +"返されるエントリの数を制限します。256のデフォルト値は、1バイトの文字のみをサポートする「シンプルフォント」と呼ばれるフォントに対して適用されます(このメソッドで確認されます)。すべてのPDF" +" :ref:`Base-14-Fonts` はシンプルフォントです。" + +#: ../../functions.rst:758 c04bc439f7ef4026892771cb1184f704 +msgid "" +"a list of *limit* tuples. Each character *c* has an entry *(g, w)* in " +"this list with an index of *ord(c)*. Entry *g* (integer) of the tuple is " +"the glyph id of the character, and float *w* is its normalized width. The" +" actual width for some :data:`fontsize` can be calculated as *w * " +"fontsize*. For simple fonts, the *g* entry can always be safely ignored. " +"In all other cases *g* is the basis for graphically representing *c*." +msgstr "" +"*limit* のタプルのリストです。各文字cには、*ord(c)* のインデックスでエントリ *(g、w)* があります。タプルの " +"*g*(整数)エントリは文字のグリフIDで、float *w* はその正規化された幅です。一部のフォントサイズに対する実際の幅は、*w * " +":data:`fontsize`* として計算できます。シンプルフォントの場合、gエントリは常に安全に無視できます。それ以外の場合、*g* は " +"*c* を視覚的に表現するための基礎です。" + +#: ../../functions.rst:760 3ef4cadd1af84e83949d7678fe2c81e3 +msgid "This function calculates the pixel width of a string called *text*::" +msgstr "この関数は、*text* と呼ばれる文字列のピクセル幅を計算します::" + +#: ../../functions.rst:772 ecbe28bd36cf4da9890cb253317137d4 +msgid "New in version 1.14.14" +msgstr "バージョン1.14.14で新規追加" + +#: ../../functions.rst:774 89120d06d2c2469199a7dcb86576f510 +msgid "" +"PDF only: Check whether the object represented by :data:`xref` is a " +":data:`stream` type. Return is ``False`` if not a PDF or if the number is" +" outside the valid xref range." +msgstr "" +"PDFのみ::data:`xref` によって表されるオブジェクトが :data:`stream` " +"タイプかどうかを確認します。PDFでない場合や、有効なxref範囲外の場合はFalseを返します。" + +#: ../../functions.rst:776 a09f8f5311c14f39ab22fb0476dc55e1 +msgid ":data:`xref` number." +msgstr ":data:`xref` 番号。" + +#: ../../functions.rst:778 cefc628d4f1c49b8b44905792b59ddd8 +msgid "" +"``True`` if the object definition is followed by data wrapped in keyword " +"pair *stream*, *endstream*." +msgstr "*stream*、*endstream* のキーワードペアで囲まれたデータに続いてオブジェクト定義がある場合は ``True``。" + +#: ../../functions.rst:784 2133898daacd4e1db066969afe31bb04 +msgid "" +"Increase the :data:`xref` by one entry and return that number. This can " +"then be used to insert a new object." +msgstr ":data:`xref` を1つ増やしてその番号を返します。これは新しいオブジェクトを挿入するために使用できます。" + +#: ../../functions.rst:786 b937dc8deb524375b27622d6996b23e7 +msgid "" +"int :returns: the number of the new :data:`xref` entry. Please note, that" +" only a new entry in the PDF's cross reference table is created. At this " +"point, there will not yet exist a PDF object associated with it. To " +"create an (empty) object with this number use `doc.update_xref(xref, " +"\"<<>>\")`." +msgstr "" +"int :returns: 新しい :data:`xref` " +"エントリの数。PDFのクロスリファレンステーブルに新しいエントリのみが作成されます。この段階では、それに関連付けられたPDFオブジェクトはまだ存在しません。この番号で(空の)オブジェクトを作成するには、`doc.update_xref(xref、\"<<>>\")`" +" を使用します。" + +#: ../../functions.rst:793 232449497ffb4eedb8dae90f7b55fed0 +msgid "Return length of :data:`xref` table." +msgstr ":data:`xref` テーブルの長さを返します。" + +#: ../../functions.rst:796 245e54c66fda43548f4bb15c292b7f52 +msgid "the number of entries in the :data:`xref` table." +msgstr ":data:`xref` テーブルのエントリ数。" + +#: ../../functions.rst:802 8dc72dca25f445da9457591672f80769 +msgid "" +"Compute the quadrilateral of a text span extracted via options \"dict\" " +"or \"rawdict\" of :meth:`Page.get_text`." +msgstr "" +":meth:`Page.get_text` のオプション \"dict\" または \"rawdict\" " +"で抽出されたテキストスパンの四辺形を計算します。" + +#: ../../functions.rst:804 79949b47b0e8434da2450695ab50368e +msgid "" +"`line[\"dir\"]` of the owning line. Use `None` for a span from " +":meth:`Page.get_texttrace`." +msgstr "" +"所有する行の `line[\"dir\"]`。:meth:`Page.get_texttrace` からのスパンの場合は `None` " +"を使用します。" + +#: ../../functions.rst:805 ../../functions.rst:815 ../../functions.rst:826 +#: 0770a5ef3a8a47db94dbbe6aa5ce37d5 5c5d1cb049fa4b2db4557396cea5ef45 +#: 9218509c4f0844e3af20da0e09b821af +msgid "the span." +msgstr "スパン。" + +#: ../../functions.rst:806 659d93593c5c4212808a3586ed130e9f +msgid "" +"the :ref:`Quad` of the span, usable for text marker annotations " +"('Highlight', etc.)." +msgstr "スパンの :ref:`Quad`、テキストマーカーアノテーション('ハイライト' など)で使用できます。" + +#: ../../functions.rst:812 3cd5684ddb9d43ccb146f4a824abc81d +msgid "" +"Compute the quadrilateral of a text character extracted via option " +"\"rawdict\" of :meth:`Page.get_text`." +msgstr ":meth:`Page.get_text` のオプション \"rawdict\" で抽出されたテキスト文字の四辺形を計算します。" + +#: ../../functions.rst:814 ../../functions.rst:825 +#: d61965cb158a476ab9824dc5c8554d04 f84cc11dbc814f73857fec69aa9acd5b +msgid "" +"`line[\"dir\"]` of the owning line. Use `None` for a span from " +":meth:`Page.get_texttrace`." +msgstr "" +"所有する行の `line[\"dir\"]`。:meth:`Page.get_texttrace` からのスパンの場合は `None` " +"を使用します。" + +#: ../../functions.rst:816 67ed1cab15584b639de82d14613b4292 +msgid "the character." +msgstr "文字。" + +#: ../../functions.rst:817 b0a52cdeaedc4130a8c46b7b8ec0809d +msgid "" +"the :ref:`Quad` of the character, usable for text marker annotations " +"('Highlight', etc.)." +msgstr "文字の :ref:`Quad`、テキストマーカーアノテーション('ハイライト' など)で使用できます。" + +#: ../../functions.rst:823 ed8ff97737994061997361b365b77280 +msgid "" +"Compute the quadrilateral of a subset of characters of a span extracted " +"via option \"rawdict\" of :meth:`Page.get_text`." +msgstr ":meth:`Page.get_text` のオプション \"rawdict\" で抽出されたスパンの一部の文字の四辺形を計算します。" + +#: ../../functions.rst:827 e333f58502ed4eceb02ca72ec7d9a14b +msgid "" +"the characters to consider. If given, the selected extraction option must" +" be \"rawdict\"." +msgstr "考慮する文字。省略した場合、。指定する場合、選択した抽出オプションは \"rawdict\" である必要があります。" + +#: ../../functions.rst:828 5e23e60cd2b44deba9b5844d0be494e4 +msgid "" +"the :ref:`Quad` of the selected characters, usable for text marker " +"annotations ('Highlight', etc.)." +msgstr "選択された文字の :ref:`Quad` 、テキストマーカーアノテーション('ハイライト' など)で使用できます。" + +#: ../../functions.rst:834 d5b5fa6626ea4e5fbd3c5729c70131ca +msgid "" +"Compute the quadrilateral of a subset of spans of a text line extracted " +"via options \"dict\" or \"rawdict\" of :meth:`Page.get_text`." +msgstr "" +":meth:`Page.get_text` のオプション \"dict\" または \"rawdict\" " +"で抽出されたテキスト行の一部のスパンの四辺形を計算します。" + +#: ../../functions.rst:836 77bdfd3566c640d5abf49df4e0523400 +msgid "the line." +msgstr "行。" + +#: ../../functions.rst:837 4763d4513a72443a8cb2e1abde8e800b +msgid "" +"a sub-list of `line[\"spans\"]`. If omitted, the full line quad will be " +"returned." +msgstr "`line[\"spans\"]` のサブリスト。省略した場合、選択した行の四辺形が返されます。" + +#: ../../functions.rst:838 4457ad14e4724e9ca4aa3a5e256a3db8 +msgid "" +"the :ref:`Quad` of the selected line spans, usable for text marker " +"annotations ('Highlight', etc.)." +msgstr "選択された行のスパンの :ref:`Quad`、テキストマーカーアノテーション('ハイライト' など)で使用できます。" + +#: ../../functions.rst:844 fa6ac2d5352e447898bb5eb220bb90b4 +msgid "Detect Tesseract language support folder." +msgstr "" + +#: ../../functions.rst:846 6f11797ccc5148c581def86e479e2474 +msgid "" +"This function is used to enable OCR via Tesseract even if the language " +"support folder is not specified directly or in environment variable " +"TESSDATA_PREFIX." +msgstr "" + +#: ../../functions.rst:850 5be47dc2280f44249eb4b89a51c504a8 +msgid "If is set we return it directly." +msgstr "" + +#: ../../functions.rst:852 610202bb1cc345d08689d17027881575 +msgid "Otherwise we return `os.environ['TESSDATA_PREFIX']` if set." +msgstr "" + +#: ../../functions.rst:854 addd2c5751724dd19f1ad368ab40fab0 +msgid "" +"Otherwise we search for a Tesseract installation and return its language " +"support folder." +msgstr "" + +#: ../../functions.rst:857 aaff4ea4400b47dcac737f3a953d4d18 +msgid "Otherwise we raise an exception." +msgstr "" + +#: ../../functions.rst:867 59f441bd5735450d9f431cb8ac5d4d06 +msgid "" +"Return the (unique) infinite rectangle `Rect(-2147483648.0, " +"-2147483648.0, 2147483520.0, 2147483520.0)`, resp. the :ref:`IRect` and " +":ref:`Quad` counterparts. It is the largest possible rectangle: all valid" +" rectangles are contained in it." +msgstr "" +"(ユニークな) 無限の四角形 " +"`Rect(-2147483648.0、-2147483648.0、2147483520.0、2147483520.0)`、または " +":ref:`IRect` と :ref:`Quad` の対応するものを返します。これは最大の可能な四角形で、すべての有効な四角形が含まれます。" + +#: ../../functions.rst:877 6edd4f317e5e4c1f929fee4707063b02 +msgid "" +"Return the \"standard\" empty and invalid rectangle `Rect(2147483520.0, " +"2147483520.0, -2147483648.0, -2147483648.0)` resp. quad. Its top-left and" +" bottom-right point values are reversed compared to the infinite " +"rectangle. It will e.g. be used to indicate empty bboxes in " +"`page.get_text(\"dict\")` dictionaries. There are however infinitely many" +" empty or invalid rectangles." +msgstr "" +"「標準」の空白で無効な四角形 `Rect(2147483520.0, 2147483520.0, -2147483648.0, " +"-2147483648.0)` " +"または対応する四角形を返します。その左上と右下のポイント値は、無限の四角形と比較して反転しています。たとえば、`page.get_text(\"dict\")`" +" 辞書内の空の bboxes を示すために使用されます。ただし、無限に多くの空のまたは無効な四角形が存在します。" + +#: ../../functions.rst:883 ecbb9be9ecab43fe9b984e688d525aca +msgid "" +"Returns a dict mapping lower-case color name to `(red, green, blue)` " +"tuple, and `red`, `green`, `blue` are floats in range 0..1." +msgstr "" + +#: ../../functions.rst:888 7909544a776b416ca01ff4363ae21452 +msgid "" +"Returns a list of `(colorname, red, green, blue)` tuples, where " +"`colorname` is upper case and `red`, `green`, `blue` are integers in " +"range 0..255." +msgstr "" + +#: ../../footer.rst:60 029e415e5bba4c8cbd9b3986f648e693 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "return the quad for a text span (\"dict\" / \"rawdict\")" +#~ msgstr "テキスト スパンのクワッドを返す (\"dict\", \"rawdict\")" + +#~ msgid "*New in v1.18.9*" +#~ msgstr "*新機能 v1.18.9*" + +#~ msgid "" +#~ "Convenience function returning the " +#~ "quadrilateral enveloping the text of a" +#~ " text span, as returned by " +#~ ":meth:`Page.get_text` using the \"dict\" or" +#~ " \"rawdict\" options." +#~ msgstr "" +#~ ":meth:`Page.get_text` を使用して、テキストスパンのテキストを囲む四角形を返す便利な関数。オプションとして" +#~ " \"dict\" または \"rawdict\" を指定して " +#~ ":meth:`Page.get_text` から返されたテキストスパンの情報を使用します。" + +#~ msgid "the value `line[\"dir\"]` of the span's line." +#~ msgstr "スパンのラインの値 `line[\"dir\"]`。" + +#~ msgid "the span sub-dictionary." +#~ msgstr "スパンのサブディクショナリ。" + +#~ msgid "the quadrilateral of the span's text." +#~ msgstr "スパンのテキストの四角形。" + +#~ msgid "" +#~ "Put string pair \"q\" / \"Q\" " +#~ "before, resp. after a page's */Contents*" +#~ " object(s) to ensure that any " +#~ "\"geometry\" changes are **local** only." +#~ msgstr "" +#~ "ページの */Contents* オブジェクトの前、または後ろに文字列ペア \"q\" /" +#~ " \"Q\" を配置して、どんな \"geometry\" の変更も **ローカル**" +#~ " な変更だけになるようにします。" + +#~ msgid "" +#~ "Use this method as an alternative, " +#~ "minimalist version of :meth:`Page.clean_contents`." +#~ " Its advantage is a small footprint" +#~ " in terms of processing time and " +#~ "impact on the data size of " +#~ "incremental saves. Multiple executions of " +#~ "this method are no problem and " +#~ "have no functional impact: `b\"q q " +#~ "contents Q Q\"` is treated like " +#~ "`b\"q contents Q\"`." +#~ msgstr "" +#~ "このメソッドは、:meth:`Page.clean_contents` " +#~ "の代替として使用します。その利点は、処理時間と増分保存のデータサイズへの影響が少ないことです。このメソッドの複数回実行は問題ありませんし、機能的な影響もありません。例えば、`b\"q" +#~ " q contents Q Q\"` は `b\"q " +#~ "contents Q\"` と同様に扱われます。" + +#~ msgid "" +#~ "Indicate whether :meth:`Page.wrap_contents` may " +#~ "be required for object insertions in " +#~ "standard PDF geometry. Note that this" +#~ " is a quick, basic check only: " +#~ "a value of ``False`` may still be" +#~ " a false alarm. But nevertheless " +#~ "executing :meth:`Page.wrap_contents` will have " +#~ "no negative side effects." +#~ msgstr "" +#~ ":meth:`Page.wrap_contents` " +#~ "が標準のPDFジオメトリにオブジェクトを挿入する際に必要かどうかを示します。ただし、これは迅速で基本的なチェックであることに注意してください。``False``" +#~ " の値でも誤報の可能性があることに留意してください。ただし、:meth:`Page.wrap_contents` " +#~ "を実行しても負の副作用はありません。" + +#~ msgid ":attr:`TESSDATA_PREFIX`" +#~ msgstr "" + +#~ msgid "a copy of `os.environ[\"TESSDATA_PREFIX\"]`" +#~ msgstr "os.environ[\"TESSDATA_PREFIX\"] のコピーです" + +#~ msgid "New in v1.19.4" +#~ msgstr "バージョン1.19.4で新たに導入されました" + +#~ msgid "" +#~ "Copy of `os.environ[\"TESSDATA_PREFIX\"]` for " +#~ "convenient checking whether there is " +#~ "integrated Tesseract OCR support." +#~ msgstr "便利なチェックに使用される `os.environ[\"TESSDATA_PREFIX\"]` のコピー" + +#~ msgid "" +#~ "If this attribute is `None`, " +#~ "Tesseract-OCR is either not installed, " +#~ "or the environment variable is not " +#~ "set to point to Tesseract's language " +#~ "support folder." +#~ msgstr "" +#~ "この属性が `None` の場合、Tesseract-" +#~ "OCRはインストールされていないか、環境変数がTesseractの言語サポートフォルダを指すように設定されていない可能性があります。" + +#~ msgid "" +#~ "This variable is now checked before " +#~ "OCR functions are tried. This prevents" +#~ " verbose messages from MuPDF." +#~ msgstr "この変数は、OCR関数が試行される前に確認されます。これにより、MuPDFから冗長なメッセージが表示されるのを防ぎます。" + +#~ msgid "" +#~ "This method obsoletes the use of " +#~ ":meth:`Page.clean_contents` in most cases. The" +#~ " advantage this method is a small " +#~ "footprint in terms of processing time" +#~ " and a low impact on the data" +#~ " size of incremental saves." +#~ msgstr "" + +#~ msgid "" +#~ "Return the name of Tesseract's language" +#~ " support folder. Use this function if" +#~ " the environment variable `TESSDATA_PREFIX` " +#~ "has not been set." +#~ msgstr "" +#~ "Tesseractの言語サポートフォルダの名前を返します。環境変数 `TESSDATA_PREFIX` " +#~ "が設定されていない場合にこの関数を使用します" + +#~ msgid "" +#~ "`os.getenv(\"TESSDATA_PREFIX\")` if not `None`. " +#~ "Otherwise, if Tesseract-OCR is " +#~ "installed, locate the name of " +#~ "`tessdata`. If no installation is found," +#~ " return `False`. The folder name can" +#~ " be used as parameter `tessdata` in" +#~ " methods :meth:`Page.get_textpage_ocr`, " +#~ ":meth:`Pixmap.pdfocr_save` and " +#~ ":meth:`Pixmap.pdfocr_tobytes`." +#~ msgstr "" + +#~ msgid "" +#~ "`os.getenv(\"TESSDATA_PREFIX\")` if not `None`. " +#~ "Otherwise, if Tesseract-OCR is " +#~ "installed, locate the name of " +#~ "`tessdata`. If no installation is found," +#~ " return `False`." +#~ msgstr "" +#~ "`os.getenv(\"TESSDATA_PREFIX\")` が `None` でない場合" +#~ "、またはTesseract-OCRがインストールされている場合は、`tessdata` " +#~ "の名前を見つけます。インストールが見つからない場合、`False` を返します。" + +#~ msgid "" +#~ "The folder name can be used as " +#~ "parameter `tessdata` in methods " +#~ ":meth:`Page.get_textpage_ocr`, :meth:`Pixmap.pdfocr_save` " +#~ "and :meth:`Pixmap.pdfocr_tobytes`." +#~ msgstr "" +#~ "このフォルダの名前は、メソッド " +#~ ":meth:`Page.get_textpage_ocr`、:meth:`Pixmap.pdfocr_save`、および " +#~ ":meth:`Pixmap.pdfocr_tobytes` の `tessdata` " +#~ "パラメータに使用できます。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/glossary.mo b/docs/locales/ja/LC_MESSAGES/glossary.mo new file mode 100644 index 000000000..53cb77e34 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/glossary.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/glossary.po b/docs/locales/ja/LC_MESSAGES/glossary.po new file mode 100644 index 000000000..20dc740f6 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/glossary.po @@ -0,0 +1,505 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 b0e2cd94d0aa4b3a934fbebb0e61235b +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 f1baafce3d564895b00cf4bc56d7bb33 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 819b0bef657b4ba4a0ab2d3e4ec0b4d8 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../glossary.rst:7 9941910a374d4a0db2b472ea37c6f1f8 +msgid "Glossary" +msgstr "用語集" + +#: ../../glossary.rst:11 0c491cef7a4041028d651475e56a294b +msgid "" +"This is an essential general mathematical / geometrical term for " +"understanding this documentation. Please see this section for a more " +"detailed discussion: :ref:`Coordinates`." +msgstr "" + +#: ../../glossary.rst:15 b03cc7629633424488939ad7912ff819 +msgid "A Python sequence of 6 numbers." +msgstr "6つの数字からなるPythonのシーケンス。" + +#: ../../glossary.rst:19 bd43bb115ff04c94b19ca36c5f5d02f6 +msgid "A Python sequence of 4 numbers." +msgstr "4つの数字からなるPythonのシーケンス。" + +#: ../../glossary.rst:23 1a31b68b33264ca5a77bd2b6902ffe53 +msgid "A Python sequence of 4 integers." +msgstr "4つの整数からなるPythonのシーケンス。" + +#: ../../glossary.rst:27 2a42549c54164862ab4a896ad4f7e587 +msgid "A Python sequence of 2 numbers." +msgstr "2つの数字からなるPythonのシーケンス。" + +#: ../../glossary.rst:31 5d5f5915c82d4921bed1e2f95ab91ecb +msgid "A Python sequence of 4 :data:`point_like` items." +msgstr "4つの :data:`point_like` アイテムからなるPythonのシーケンス。" + +#: ../../glossary.rst:35 775806cbba59489bac52cba16de32799 +msgid "" +"A number of values in a PDF can inherited by objects further down in a " +"parent-child relationship. The mediabox (physical size) of pages may for " +"example be specified only once or in some node(s) of the :data:`pagetree`" +" and will then be taken as value for all *kids*, that do not specify " +"their own value." +msgstr "" +"PDF内のいくつかの値は、親子関係で下位のオブジェクトに継承されることがあります。例えば、ページのmediabox(物理的なサイズ)は一度だけ指定されるか、:data:`pagetree`" +" のいくつかのノードで指定され、それ以外の値を指定しない子供たちにとってその値が取得されます。" + +#: ../../glossary.rst:41 d6aa867681e449c0a945275210eb6353 +msgid "" +"A PDF array of 4 floats specifying a physical page size -- " +"(:data:`inheritable`, mandatory). This rectangle should contain all other" +" PDF -- optional -- page rectangles, which may be specified in addition:" +" CropBox, TrimBox, ArtBox and BleedBox. Please consult :ref:`AdobeManual`" +" for details. The MediaBox is the only rectangle, for which there is no " +"difference between MuPDF and PDF coordinate systems: " +":attr:`Page.mediabox` will always show the same coordinates as the " +"`/MediaBox` key in a page's object definition. For all other rectangles, " +"MuPDF transforms y coordinates such that the **top** border is the point " +"of reference. This can sometimes be confusing -- you may for example " +"encounter a situation like this one:" +msgstr "" +"物理的なページサイズを指定する4つの浮動小数点数からなるPDF配列 " +"-(:data:`inheritable`、必須)。この矩形には、追加で指定できる他のすべてのPDFページ矩形を含める必要があります。これらの矩形にはCropBox、TrimBox、ArtBox、BleedBoxがあります。詳細については、" +" :ref:`AdobeManual` を参照してください。MediaBoxは、MuPDFとPDFの座標系の間に違いがない唯一の矩形です。 " +":attr:`Page.mediabox` は常に、ページオブジェクト定義内の `/MediaBox` " +"キーと同じ座標を示します。他のすべての矩形に関して、MuPDFはy座標を変換し、上部境界が参照点となるようにします。これは時に混乱することがあります。例えば、次のような状況に遭遇することがあります。" + +#: ../../glossary.rst:43 4b7aa0adab17468e947f9b8eb328fd00 +msgid "" +"The page definition contains the following identical values: `/MediaBox [" +" 36 45 607.5 765 ]`, `/CropBox [ 36 45 607.5 765 ]`." +msgstr "" +"ページ定義には次のような同一の値が含まれています: `/MediaBox [ 36 45 607.5 765 ]` 、 `/CropBox [ " +"36 45 607.5 765 ]`。 " + +#: ../../glossary.rst:44 c3336320e59f48f9986d52f340b7a4d4 +msgid "" +"PyMuPDF accordingly shows `page.mediabox = Rect(36.0, 45.0, 607.5, " +"765.0)`." +msgstr "したがって、PyMuPDFは `page.mediabox = Rect(36.0, 45.0, 607.5, 765.0)` と表示します。 " + +#: ../../glossary.rst:45 a216a8839aee420cbe653cdb1e90cee1 +msgid "" +"**BUT:** `page.cropbox = Rect(36.0, 0.0, 607.5, 720.0)`, because the two " +"y-coordinates have been transformed (45 subtracted from both of them)." +msgstr "" +"しかし、 `page.cropbox = Rect(36.0, 0.0, 607.5, 720.0)` " +"です。なぜなら、2つのy座標が変換されているため(両方から45が引かれているため)です。" + +#: ../../glossary.rst:49 8fa4e076a5e744bfa85ec3664e4908ce +msgid "" +"A PDF array of 4 floats specifying a page's visible area -- " +"(:data:`inheritable`, optional). It is the default for TrimBox, ArtBox " +"and BleedBox. If not present, it defaults to MediaBox. This value is " +"**not affected** if the page is rotated -- in contrast to " +":attr:`Page.rect`. Also, other than the page rectangle, the top-left " +"corner of the cropbox may or may not be *(0, 0)*." +msgstr "" +"ページの可視領域を指定する4つの浮動小数点数からなるPDF配列 " +"-(:data:`inheritable`、任意)。これはTrimBox、ArtBox、BleedBoxのデフォルト値です。存在しない場合、デフォルトはMediaBoxです。この値はページが回転している場合には影響を受けません" +" - :attr:`Page.rect` とは対照的です。また、ページ矩形以外では、クロップボックスの左上隅が(0, " +"0)であるかどうかは問題ありません。" + +#: ../../glossary.rst:54 695d4331e2574a73a6bc119defe798a4 +msgid "" +"A central PDF :data:`dictionary` -- also called the \"root\" -- " +"containing document-wide parameters and pointers to many other " +"information. Its :data:`xref` is returned by " +":meth:`Document.pdf_catalog`." +msgstr "" +"中央のPDF :data:`dictionary` - または「ルート」とも呼ばれる - " +"で、文書全体のパラメータと多くの他の情報へのポインタを含んでいます。その :data:`xref` は " +":meth:`Document.pdf_catalog` で返されます。" + +#: ../../glossary.rst:58 972c6ce4df00407a83efb4a5e33cf1b8 +msgid "" +"More precisely, the **PDF trailer** contains information in " +":data:`dictionary` format. It is usually located at the file's end. In " +"this dictionary, you will find things like the xrefs of the catalog and " +"the metadata, the number of :data:`xref` numbers, etc. Here is the " +"definition of the PDF spec:" +msgstr "" +"より正確には、PDFトレーラーには :data:`dictionary` " +"形式の情報が含まれています。通常、ファイルの末尾に配置されています。この辞書には、カタログやメタデータの :data:`xref` " +"、:data:`xref` の数などが含まれています。PDF仕様の定義は次のとおりです:" + +#: ../../glossary.rst:60 c620690ecfe5406fbe8d467efeb91794 +msgid "" +"*\"The trailer of a PDF file enables an application reading the file to " +"quickly find the cross-reference table and certain special objects. " +"Applications should read a PDF file from its end.\"*" +msgstr "「PDFファイルのトレーラーは、ファイルを読むアプリケーションがクロスリファレンステーブルと特定の特別なオブジェクトを迅速に見つけることを可能にします。アプリケーションはPDFファイルを末尾から読むべきです。」" + +#: ../../glossary.rst:62 bbfcfb28f25f49608557cc077c9f2ff2 +msgid "" +"To access the trailer in PyMuPDF, use the usual methods " +":meth:`Document.xref_object`, :meth:`Document.xref_get_key` and " +":meth:`Document.xref_get_keys` with `-1` instead of a positive xref " +"number." +msgstr "" +"PyMuPDFでトレーラーにアクセスするには、通常の方法で :meth:`Document.xref_object` 、 " +":meth:`Document.xref_get_key` 、:meth:`Document.xref_get_keys` " +"を使用し、正のxref番号の代わりに `-1` を指定します。" + +#: ../../glossary.rst:66 52c596bdac704429bcc9b0406875b7fc +msgid "" +"A **content stream** is a PDF :data:`object` with an attached " +":data:`stream`, whose data consists of a sequence of instructions " +"describing the graphical elements to be painted on a page, see \"Stream " +"Objects\" on page 19 of :ref:`AdobeManual`. For an overview of the mini-" +"language used in these streams, see chapter \"Operator Summary\" on page " +"643 of the :ref:`AdobeManual`. A PDF :data:`page` can have none to many " +"contents objects. If it has none, the page is empty (but still may show " +"annotations). If it has several, they will be interpreted in sequence as " +"if their instructions had been present in one such object (i.e. like in a" +" concatenated string). It should be noted that there are more stream " +"object types which use the same syntax: e.g. appearance dictionaries " +"associated with annotations and Form XObjects." +msgstr "" +"コンテンツストリームは、PDF :data:`object` に添付された :data:`stream` " +"を持ち、そのデータはページに描画されるグラフィカル要素を記述する命令のシーケンスから成り立っています。 :ref:`AdobeManual` " +"の「Stream Objects」(p.19)を参照してください。これらのストリームで使用されるミニ言語の概要については、 " +":ref:`AdobeManual` の「Operator " +"Summary」章(p.643)をご覧ください。PDFページには0から多くのコンテンツオブジェクトを持つことができます。コンテンツオブジェクトが存在しない場合、ページは空になります(ただし注釈は表示される可能性があります)。複数のコンテンツオブジェクトがある場合、それらは1つのオブジェクトに存在するかのように順番に解釈されます(つまり、連結された文字列のように)。なお、同じ構文を使用する他のストリームオブジェクトタイプもあることに注意してください。たとえば、注釈に関連する外観辞書やフォームXObjectなどです。" + +#: ../../glossary.rst:68 fcc09656179a42c7974f67c0e38f4c94 +msgid "PyMuPDF provides a number of methods to deal with contents of PDF pages:" +msgstr "PyMuPDFは、PDFページのコンテンツに対処するためのいくつかのメソッドを提供しています:" + +#: ../../glossary.rst:70 aa25f61d861242e7a13b1c745587ebad +msgid "" +":meth:`Page.read_contents()` -- reads and concatenates all page contents " +"into one `bytes` object." +msgstr ":meth:`Page.read_contents()` – ページのすべてのコンテンツを読み込んで1つのバイトオブジェクトに連結します。" + +#: ../../glossary.rst:71 85a71eda07ba4442b8014231e74ff345 +msgid "" +":meth:`Page.clean_contents()` -- a wrapper of a MuPDF function that " +"reads, concatenates and syntax-cleans all page contents. After this, only" +" one `/Contents` object will exist. In addition, page :data:`resources` " +"will have been synchronized with it such that it will contain exactly " +"those images, fonts and other objects that the page actually references." +msgstr "" +":meth:`Page.clean_contents()` – " +"MuPDFの関数をラップして、ページのすべてのコンテンツを読み込んで連結し、構文をクリーンアップします。これにより、1つの " +"`/Contents` オブジェクトのみが存在します。さらに、ページの :data:`resources` " +"はそれと同期され、ページが実際に参照する画像、フォント、その他のオブジェクトのみが含まれるようになります。" + +#: ../../glossary.rst:72 47bbed208b0b4183bd6abbb069ce2eb9 +msgid "" +":meth:`Page.get_contents()` -- return a list of :data:`xref` numbers of a" +" page's :data:`contents` objects. May be empty. Use " +":meth:`Document.xref_stream()` with one of these xrefs to read the resp. " +"contents section." +msgstr "" +":meth:`Page.get_contents()` – ページの :data:`contents` オブジェクトの :data:`xref` " +"番号のリストを返します。空かもしれません。:meth:`Document.xref_stream()` " +"をこれらのxrefの1つとともに使用して、対応するコンテンツセクションを読み込むことができます。" + +#: ../../glossary.rst:73 fc38f58008f949d283ae3b24382f368a +msgid "" +":meth:`Page.set_contents()` -- set a page's `/Contents` key to the " +"provided :data:`xref` number." +msgstr ":meth:`Page.set_contents()` – ページの `/Contents` キーを指定されたxref番号に設定します。" + +#: ../../glossary.rst:77 7f136697e7be484382e85c200cb2c6dc +msgid "" +"A :data:`dictionary` containing references to any resources (like images " +"or fonts) required by a PDF :data:`page` (required, inheritable, " +":ref:`AdobeManual` p. 81) and certain other objects (Form XObjects). This" +" dictionary appears as a sub-dictionary in the object definition under " +"the key */Resources*. Being an inheritable object type, there may exist " +"\"parent\" resources for all pages or certain subsets of pages." +msgstr "" +"PDFページに必要なリソース(画像やフォントなど)への参照を含む :data:`dictionary`(必須、継承可能、 " +":ref:`AdobeManual` p. 81)および一部の他のオブジェクト(フォームXObject)が必要です。この " +":data:`dictionary` " +"は、オブジェクト定義内のキー/リソースの下にサブ辞書として表示されます。継承可能なオブジェクトタイプであるため、すべてのページまたは一部のページの「親」リソースが存在する可能性があります。" + +#: ../../glossary.rst:81 bf3936e13b07411e9f15d4dea0704603 +msgid "" +"A PDF :data:`object` type, which is somewhat comparable to the same-named" +" Python notion: \"A dictionary object is an associative table containing " +"pairs of objects, known as the dictionary's entries. The first element of" +" each entry is the key and the second element is the value. The key must " +"be a name (...). The value can be any kind of object, including another " +"dictionary. A dictionary entry whose value is null (...) is equivalent to" +" an absent entry.\" (:ref:`AdobeManual` p. 18)." +msgstr "" +"PDF :data:`object` " +"のタイプで、同じ名前のPythonの概念に多少似ています。「辞書オブジェクトは、辞書のエントリとして知られるオブジェクトのペアを含む連想テーブルです。各エントリの最初の要素はキーであり、2番目の要素は値です。キーは名前である必要があります(...)。値は他の辞書を含む他の種類のオブジェクトであることができます。値がnull" +" (...) の辞書エントリは、不在のエントリと同等です。」(:ref:`AdobeManual` p. 18)。" + +#: ../../glossary.rst:83 a5774d115d78486887db6243636d5362 +msgid "" +"Dictionaries are the most important :data:`object` type in PDF. Here is " +"an example (describing a :data:`page`)::" +msgstr "辞書は、PDF内で最も重要なオブジェクトのタイプです。以下は、:data:`page` を記述する例です::" + +#: ../../glossary.rst:102 45aa2b19b71e458e8fa25a2c78d596a6 +msgid "" +"*Contents*, *Type*, *MediaBox*, etc. are **keys**, *40 0 R*, *Page*, *[0 " +"0 595.32 841.92]*, etc. are the respective **values**. The strings " +"*\"<<\"* and *\">>\"* are used to enclose object definitions." +msgstr "" +"Contents、Type、MediaBoxなどはキーであり、40 0 R、Page、[0 0 595.32 " +"841.92]などはそれぞれの値です。「<<」と「>>」という文字列は、オブジェクト定義を括るために使用されます。" + +#: ../../glossary.rst:104 a6f9190af07842339804c618355e0e27 +msgid "" +"This example also shows the syntax of **nested** dictionary values: " +"*Resources* has an object as its value, which in turn is a dictionary " +"with keys like *ExtGState* (with the value *<>*, which is " +"another dictionary), etc." +msgstr "" +"この例は、ネストされた辞書の値の構文も示しています。Resourcesはその値としてオブジェクトを持ち、それ自体がExtGState(値は<>で、これは別の辞書です)、などのキーを持つ辞書です。" + +#: ../../glossary.rst:108 b97660907730454fb80cae8fff9da131 +msgid "" +"A PDF page is a :data:`dictionary` object which defines one page in a " +"PDF, see :ref:`AdobeManual` p. 71." +msgstr "" +"PDFページは、PDF内の1つのページを定義する :data:`dictionary` オブジェクトであり、 :ref:`AdobeManual`" +" p. 71を参照してください。" + +#: ../../glossary.rst:112 97d4db102e0e4330938ad1f4e7fca422 +msgid "" +"The pages of a document are accessed through a structure known as the " +"page tree, which defines the ordering of pages in the document. The tree " +"structure allows PDF consumer applications, using only limited memory, to" +" quickly open a document containing thousands of pages. The tree contains" +" nodes of two types: intermediate nodes, called page tree nodes, and leaf" +" nodes, called page objects. (:ref:`AdobeManual` p. 75)." +msgstr "" +"文書のページは、ページツリーとして知られる構造を介してアクセスされ、文書内のページの順序を定義します。このツリー構造により、限られたメモリしか使用しないPDFコンシューマーアプリケーションでも、数千ページを含む文書を迅速に開くことができます。ツリーには2種類のノードが含まれています。中間ノードはページツリーノードと呼ばれ、葉ノードはページオブジェクトと呼ばれます。(:ref:`AdobeManual`" +" p. 75)。" + +#: ../../glossary.rst:114 2dbcc685903a4543922f7629dbdc1af4 +msgid "" +"While it is possible to list all page references in just one array, PDFs " +"with many pages are often created using *balanced tree* structures " +"(\"page trees\") for faster access to any single page. In relation to the" +" total number of pages, this can reduce the average page access time by " +"page number from a linear to some logarithmic order of magnitude." +msgstr "ページ参照を単一の配列でリストすることは可能ですが、多くのページを含むPDFは、より迅速な単一ページへのアクセスのためにバランスの取れたツリー構造(「ページツリー」)を使用して作成されることがよくあります。全ページ数に対して、これにより平均ページアクセス時間がページ番号による線形から対数のオーダーに削減されることがあります。" + +#: ../../glossary.rst:116 aca1e345565340d197f15bb05ce23148 +msgid "" +"For fast page access, MuPDF can use its own array in memory -- " +"independently from what may or may not be present in the document file. " +"This array is indexed by page number and therefore much faster than even " +"the access via a perfectly balanced page tree." +msgstr "" +"高速なページアクセスのために、MuPDFは独自のメモリ内配列を使用できます - " +"文書ファイルに存在するかどうかに関係なく。この配列はページ番号でインデックスされるため、完全にバランスが取れたページツリーを経由するアクセスよりもはるかに高速です。" + +#: ../../glossary.rst:120 cf48ba84c41b4860a00bb4859a5f1ab6 +msgid "" +"Similar to Python, PDF supports the notion *object*, which can come in " +"eight basic types: boolean values (\"true\" or \"false\"), integer and " +"real numbers, strings (**always** enclosed in brackets -- either \"()\", " +"or \"<>\" to indicate hexadecimal), names (must always start with a " +"\"/\", e.g. `/Contents`), arrays (enclosed in brackets \"[]\"), " +"dictionaries (enclosed in brackets \"<<>>\"), streams (enclosed by " +"keywords \"stream\" / \"endstream\"), and the null object (\"null\") " +"(:ref:`AdobeManual` p. 13). Objects can be made identifiable by assigning" +" a label. This label is then called *indirect* object. PyMuPDF supports " +"retrieving definitions of indirect objects via their cross reference " +"number via :meth:`Document.xref_object`." +msgstr "" +"Pythonに類似して、PDFはオブジェクトの概念をサポートしており、8つの基本型があります:ブール値(「true」または「false」)、整数と実数、文字列(常に括弧で囲まれています" +" - 「()」または「<>」で16進数を示す)、名前(必ず「/」で始まる必要があります、例: " +"`/Contents`)、配列(括弧「[]」で囲まれています)、辞書(括弧「<<>>」で囲まれています)、ストリーム(キーワード「stream」/「endstream」で囲まれています)、およびヌルオブジェクト(「null」)(:ref:`AdobeManual`" +" p. " +"13)。オブジェクトはラベルを割り当てることで識別可能にすることができます。このラベルは、間接オブジェクトと呼ばれます。PyMuPDFは、:meth:`Document.xref_object`" +" を介して間接オブジェクトの定義をクロスリファレンス番号を使って取得することができます。" + +#: ../../glossary.rst:124 65f67d11513245eea25bbcfe453941d4 +msgid "" +"A PDF :data:`dictionary` :data:`object` type which is followed by a " +"sequence of bytes, similar to Python *bytes*. \"However, a PDF " +"application can read a stream incrementally, while a string must be read " +"in its entirety. Furthermore, a stream can be of unlimited length, " +"whereas a string is subject to an implementation limit. For this reason, " +"objects with potentially large amounts of data, such as images and page " +"descriptions, are represented as streams.\" \"A stream consists of a " +":data:`dictionary` followed by zero or more bytes bracketed between the " +"keywords *stream* and *endstream*\"::" +msgstr "" +"PDFの :data:`dictionary` :data:`object` " +"タイプで、Pythonのbytesに似たバイト列が続きます。ただし、「PDFアプリケーションはストリームを段階的に読むことができますが、文字列は完全に読む必要があります。さらに、ストリームの長さは無制限ですが、文字列は実装の制限に従います。そのため、画像やページの説明などの大量のデータを含むオブジェクトは、ストリームとして表現されます。」「ストリームは、キーワード「stream」と「endstream」で囲まれた0バイト以上のバイトで構成される" +" :data:`dictionary` の後に続きます」::" + +#: ../../glossary.rst:135 1f0727e3f450432f9923a4ffd2cc8cee +msgid "" +"See :ref:`AdobeManual` p. 19. PyMuPDF supports retrieving stream content " +"via :meth:`Document.xref_stream`. Use :meth:`Document.is_stream` to " +"determine whether an object is of stream type." +msgstr "" +":ref:`AdobeManual` p. 19を参照してください。PyMuPDFは、:meth:`Document.xref_stream` " +"を介してストリームコンテンツを取得するサポートを提供しています。オブジェクトがストリームタイプかどうかを判断するには、:meth:`Document.is_stream`" +" を使用します。" + +#: ../../glossary.rst:139 d732f9dcef7e47028ca43a5647adcb9c +msgid "" +"A mathematical notion meaning a vector of norm (\"length\") 1 -- usually " +"the Euclidean norm is implied. In PyMuPDF, this term is restricted to " +":ref:`Point` objects, see :attr:`Point.unit`." +msgstr "" +"数学的な概念で、ノルム(「長さ」)が1のベクトルを意味します。通常、ユークリッドノルムが含まれます。PyMuPDFでは、この用語は " +":ref:`Point` オブジェクトに制限されます。:attr:`Point.unit` を参照してください。" + +#: ../../glossary.rst:143 26fb51d68f234a088d1702a7b331d879 +msgid "" +"Abbreviation for cross-reference number: this is an integer unique " +"identification for objects in a PDF. There exists a cross-reference table" +" (which may physically consist of several separate segments) in each PDF," +" which stores the relative position of each object for quick lookup. The " +"cross-reference table is one entry longer than the number of existing " +"object: item zero is reserved and must not be used in any way. Many " +"PyMuPDF classes have an :data:`xref` attribute (which is zero for non-" +"PDFs), and one can find out the total number of objects in a PDF via " +":meth:`Document.xref_length` *- 1*." +msgstr "" +"クロスリファレンス番号の省略形:これはPDF内のオブジェクトに対する一意の識別子である整数です。各PDFにはクロスリファレンステーブルが存在し(物理的には複数の別々のセグメントで構成される場合があります)、各オブジェクトの相対位置をクイックルックアップのために保存します。クロスリファレンステーブルは既存のオブジェクトの数よりも1つ長いエントリを持っており、アイテム0は予約され、何の方法でも使用してはいけません。多くのPyMuPDFクラスにはxref属性(非PDFの場合はゼロ)があり、:meth:`Document.xref_length`" +" - 1を介してPDF内のオブジェクトの総数を知ることができます。" + +#: ../../glossary.rst:148 6c021fd6a5aa44d7b8b3e0905f3f7b08 +msgid "" +"When referring to font size this metric is measured in points where 1 " +"inch = 72 points." +msgstr "フォントサイズを指す際、このメトリックは1インチ = 72ポイントとして測定されます。" + +#: ../../glossary.rst:152 c558da0bdc1446d89fcd51030a930245 +msgid "" +"Images and :ref:`Pixmap` objects may contain resolution information " +"provided as \"dots per inch\", dpi, in each direction (horizontal and " +"vertical). When MuPDF reads an image from a file or from a PDF object, it" +" will parse this information and put it in :attr:`Pixmap.xres`, " +":attr:`Pixmap.yres`, respectively. If it finds no meaningful information " +"in the input (like non-positive values or values exceeding 4800), it will" +" use \"sane\" defaults instead. The usual default value is 96, but it may" +" also be 72 in some cases (e.g. for JPX images)." +msgstr "" +"イメージや :ref:`Pixmap` " +"オブジェクトには、各方向(水平および垂直)の「インチあたりのドット数」である解像度情報が含まれている場合があります。MuPDFはファイルまたはPDFオブジェクトからイメージを読み取る際、この情報を解析し、それぞれ" +" :attr:`Pixmap.xres` 、:attr:`Pixmap.yres` " +"に設定します。入力内で有意義な情報が見つからない場合(非正の値や4800を超える値など)、代わりに「適切な」デフォルト値を使用します。通常のデフォルト値は96ですが、一部の場合(例えばJPXイメージの場合)は72になる場合もあります。" + +#: ../../glossary.rst:156 8e9c840d574c4b3d8c897a834b9362df +msgid "" +"Optional content properties dictionary - a sub :data:`dictionary` of the " +"PDF :data:`catalog`. The central place to store optional content " +"information, which is identified by the key `/OCProperties`. This " +"dictionary has two required and one optional entry: (1) `/OCGs`, " +"required, an array listing all optional content groups, (2) `/D`, " +"required, the default optional content configuration dictionary (OCCD), " +"(3) `/Configs`, optional, an array of alternative OCCDs." +msgstr "" +"オプショナルコンテンツプロパティ辞書(OCPD) - PDF :data:`catalog` のサブ " +":data:`dictionary`。オプショナルコンテンツ情報を保存する中心的な場所で、キー `/OCProperties` " +"で識別されます。この辞書には2つの必須エントリと1つのオプションエントリがあります:(1) `/OCGs` " +"、必須、すべてのオプショナルコンテンツグループをリストする配列、(2) `/D` " +"、必須、デフォルトのオプショナルコンテンツ構成辞書(OCCD)、(3) `/Configs` 、オプション、代替のOCCDの配列。" + +#: ../../glossary.rst:161 0100632cdca744f69f1f6913c969d05d +msgid "" +"Optional content configuration dictionary - a PDF :data:`dictionary` " +"inside the PDF :data:`OCPD`. It stores a setting of ON / OFF states of " +"OCGs and how they are presented to a PDF viewer program. Selecting a " +"configuration is quick way to achieve temporary mass visibility state " +"changes. After opening a PDF, the `/D` configuration of the :data:`OCPD` " +"is always activated. Viewer should offer a way to switch between the " +"`/D`, or one of the optional configurations contained in array " +"`/Configs`." +msgstr "" +"オプショナルコンテンツ構成辞書(OCCD) - PDF :data:`OCPD` 内のPDF " +":data:`dictionary`。これはOCGのON / " +"OFF状態とPDFビューアプログラムにどのように表示されるかの設定を保存します。構成を選択することは、一時的な質量表示状態の変更を素早く行う方法です。PDFを開いた後、:data:`OCPD`" +" の `/D` 構成は常にアクティブになります。ビューアは `/D` 、または配列 `/Configs` " +"に含まれるオプショナル構成のいずれかに切り替える方法を提供する必要があります。" + +#: ../../glossary.rst:166 43b7c2249e154c59bf5a59cfd2cf86e2 +msgid "" +"Optional content group -- a :data:`dictionary` object used to control the" +" visibility of other PDF objects like images or annotations. " +"Independently on which page they are defined, objects with the same OCG " +"can simultaneously be shown or hidden by setting their OCG to ON or OFF. " +"This can be achieved via the user interface provided by many PDF viewers " +"(Adobe Acrobat), or programmatically." +msgstr "" +"オプショナルコンテンツグループ(OCG) - 画像や注釈などの他のPDFオブジェクトの表示を制御するために使用される " +":data:`dictionary` " +"オブジェクト。どのページで定義されているかに関係なく、同じOCGを持つオブジェクトは、OCGをONまたはOFFに設定することで同時に表示または非表示にできます。これは多くのPDFビューア(Adobe" +" Acrobat)が提供するユーザーインターフェース、またはプログラムを使用して達成できます。" + +#: ../../glossary.rst:170 f4ba5ac63bea490bbc335d7fade50a6b +msgid "" +"Optional content membership dictionary -- a :data:`dictionary` object " +"which can be used like an :data:`OCG`: it has a visibility state. The " +"visibility of an OCMD is **computed:** it is a logical expression, which " +"uses the state of one or more OCGs to produce a boolean value. The " +"expression's result is interpreted as ON (true) or OFF (false)." +msgstr "" +"オプショナルコンテンツメンバーシップ辞書(OCMD) - :data:`OCG` " +"のように使用できる辞書オブジェクトで、表示状態を持ちます。OCMDの表示は計算されます:これはOCGの1つ以上の状態を使用してブール値を生成する論理式です。式の結果はON(true)またはOFF(false)として解釈されます。" + +#: ../../glossary.rst:174 522b5dde5e3b49df8ab57178f4d58ed1 +msgid "" +"Some frequent character combinations are represented by their own special" +" glyphs in more advanced fonts. Typical examples are \"fi\", \"fl\", " +"\"ffi\" and \"ffl\". These compounds are called *ligatures*. In PyMuPDF " +"text extractions, there is the option to either return the corresponding " +"unicode unchanged, or split ligatures up into their constituent parts: " +"\"fi\" ==> \"f\" + \"i\", etc." +msgstr "" +"一部の頻繁な文字の組み合わせは、より高度なフォントでは固有の特別なグリフで表されます。典型的な例には「fi」、「fl」、「ffi」、「ffl」などがあります。これらの複合体はリガチャと呼ばれます。PyMuPDFのテキスト抽出では、対応するユニコードを変更せずに返すオプションがあり、またリガチャをその構成要素に分割することもできます:「fi」" +" ==> 「f」+「i」など。" + +#: ../../footer.rst:60 04c382aafad34653bc0f596f100e3271 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/header-404.mo b/docs/locales/ja/LC_MESSAGES/header-404.mo new file mode 100644 index 000000000..70427f6eb Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/header-404.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/header-404.po b/docs/locales/ja/LC_MESSAGES/header-404.po new file mode 100644 index 000000000..24c18bfe1 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/header-404.po @@ -0,0 +1,37 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2024. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.26\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-03-05 14:46+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header-404.rst:-1 c2c49975f2854128bb488c59ce955f84 +msgid "Artifex" +msgstr "" + +#: ../../header-404.rst:-1 371ca3eaed8242b8bf4e8df0b45a6113 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header-404.rst:-1 a60c91d091c44274bed060f48efb71b1 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + diff --git a/docs/locales/ja/LC_MESSAGES/header.mo b/docs/locales/ja/LC_MESSAGES/header.mo new file mode 100644 index 000000000..47eb357d5 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/header.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/header.po b/docs/locales/ja/LC_MESSAGES/header.po new file mode 100644 index 000000000..ea5ee8bea --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/header.po @@ -0,0 +1,33 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2023-09-28 14:54+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 5eedbfe14cba4a21b822c8197f635c3f +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 ef34ef4fc02843218e7c1e16cfb6fe2f +msgid "PyMuPDF is a high-performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 93ea3b2b433546f9b198b2f2455bb6df +msgid "PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + diff --git a/docs/locales/ja/LC_MESSAGES/how-to-open-a-file.mo b/docs/locales/ja/LC_MESSAGES/how-to-open-a-file.mo new file mode 100644 index 000000000..8552dd32f Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/how-to-open-a-file.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/how-to-open-a-file.po b/docs/locales/ja/LC_MESSAGES/how-to-open-a-file.po new file mode 100644 index 000000000..3d72e8d8f --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/how-to-open-a-file.po @@ -0,0 +1,239 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.8\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 cac88fdfe639485b93df6094e0c3030e +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 9510201d07b04c55a3114ceb4b285f81 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 f2821506806f4e1f858415f54f224785 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../how-to-open-a-file.rst:7 58b4044b498b4fa98a7e544066d6548c +msgid "Opening Files" +msgstr "ファイルを開く" + +#: ../../how-to-open-a-file.rst:15 cae84881472c4a128e9b260c5de512f1 +msgid "Supported File Types" +msgstr "サポートされているファイルタイプ" + +#: ../../how-to-open-a-file.rst:17 6696fc63a01c47ffa19215b2f6d2a5b4 +msgid "|PyMuPDF| can open files other than just |PDF|." +msgstr "|PyMuPDF| は |PDF| 以外のファイルも開くことができます。" + +#: ../../how-to-open-a-file.rst:19 52efb4e7a74844d7b47e10f1f9677f7f +msgid "The following file types are supported:" +msgstr "|PyMuPDF| は以下のファイルタイプをサポートしています:" + +#: ../../how-to-open-a-file.rst:26 fb0e540bc4eb4ac4b815230c9fe94127 +msgid "How to Open a File" +msgstr "ファイルを開く方法" + +#: ../../how-to-open-a-file.rst:28 8de724b91b61477183bf37f2a0ecd140 +msgid "To open a file, do the following:" +msgstr "ファイルを開くには、次の手順を実行します。" + +#: ../../how-to-open-a-file.rst:35 ca2f4e8e9264476f8f8a228742e663fa +msgid "" +"The above creates a :ref:`Document`. The instruction `doc = " +"pymupdf.Document(\"a.pdf\")` does exactly the same. So, `open` is just a " +"convenient alias and you can find its full API documented in that " +"chapter." +msgstr "" + +#: ../../how-to-open-a-file.rst:39 fb423287318a40b0a8079a8b86254936 +msgid "Opening with :index:`a Wrong File Extension `" +msgstr "拡張子の異なるファイルを開く" + +#: ../../how-to-open-a-file.rst:41 23a14ae19b7643cd9324eb33cc30b516 +msgid "" +"If you have a document with a wrong file extension for its type, you can " +"still correctly open it." +msgstr "拡張子がファイルタイプと異なっていても、正しく開くことができます。" + +#: ../../how-to-open-a-file.rst:43 bcddf79a279b4b23b9c0c3c02a02d0ae +msgid "Assume that *\"some.file\"* is actually an **XPS**. Open it like so:" +msgstr "例えば *\"some.file\"* が実際には **XPS** ファイルの場合は、以下のようにして開きます:" + +#: ../../how-to-open-a-file.rst:53 a05b58c6b9ed4edab534e2127dc4dd90 +msgid "" +"|PyMuPDF| itself does not try to determine the file type from the file " +"contents. **You** are responsible for supplying the file type information" +" in some way -- either implicitly, via the file extension, or explicitly " +"as shown with the `filetype` parameter. There are pure :title:`Python` " +"packages like `filetype `_ that help " +"you doing this. Also consult the :ref:`Document` chapter for a full " +"description." +msgstr "" +"|PyMuPDF| " +"自体は、ファイルの内容からファイルタイプを判断しようとはしません。このため、ユーザーがファイルの拡張子などを通じて暗黙的に、または " +"`filetype `_ " +"パラメーターを通じて明示的にファイルタイプの情報を提供する責任があります。また、詳細な説明については :ref:`Document` " +"の章を参照してください。" + +#: ../../how-to-open-a-file.rst:55 01594b5d61fd4877bc9bbe0b8a5d8de8 +msgid "" +"If |PyMuPDF| encounters a file with an unknown / missing extension, it " +"will try to open it as a |PDF|. So in these cases there is no need for " +"additional precautions. Similarly, for memory documents, you can just " +"specify `doc=pymupdf.open(stream=mem_area)` to open it as a |PDF| " +"document." +msgstr "" +"|PyMuPDF| " +"は不明なファイルや拡張子のないファイルはPDFとして開きます。このため、これらの場合は追加の情報は必要ありません。同様に、メモリ上のドキュメントの場合は、`doc=pymupdf.open(stream=mem_area)`" +" と指定するだけでPDFドキュメントとして開くことができます。" + +#: ../../how-to-open-a-file.rst:57 213861827ccf4095a7e7ec7f44323741 +msgid "" +"If you attempt to open an unsupported file then |PyMuPDF| will throw a " +"file data error." +msgstr "サポートされていないファイルを開こうとした場合、PyMuPDFはファイルデータエラーをスローします。" + +#: ../../how-to-open-a-file.rst:66 9408ee2e906e44feb3f19fde7131eee3 +#, fuzzy +msgid "Opening Remote Files" +msgstr "ファイルを開く" + +#: ../../how-to-open-a-file.rst:69 3c01cd03b6fd45c4bd767057e34ab038 +msgid "" +"For remote files on a server (i.e. non-local files), you will need to " +"*stream* the file data to |PyMuPDF|." +msgstr "" + +#: ../../how-to-open-a-file.rst:71 b8988a2a5afc458592ac62ac241e9557 +msgid "" +"For example use the `requests " +"`_ library as follows:" +msgstr "" + +#: ../../how-to-open-a-file.rst:84 3afd63c1a9de47f2b54f07057b1bda6b +msgid "Opening Files from Cloud Services" +msgstr "" + +#: ../../how-to-open-a-file.rst:86 cb885344d7a6494290518e5ac9e98541 +msgid "" +"For further examples which deal with files held on typical cloud services" +" please see these `Cloud Interactions code snippets " +"`_." +msgstr "" + +#: ../../how-to-open-a-file.rst:94 1869089fe979430ba11b6a880eed5e13 +#, fuzzy +msgid "Opening Django Files" +msgstr "C# ファイルを開く" + +#: ../../how-to-open-a-file.rst:96 aee62dcd16f3444bbef1ae267e49fbb1 +msgid "" +"Django implements a `File Storage API " +"`_ to store " +"files. The default is the `FileSystemStorage " +"`_, but the `django-storages `_ library provides a number" +" of other storage backends." +msgstr "" + +#: ../../how-to-open-a-file.rst:98 dfc6c49ca4114ccc98d1b314f2891bd5 +msgid "" +"You can open the file, move the contents into memory, then pass the " +"contents to |PyMuPDF| as a stream." +msgstr "" + +#: ../../how-to-open-a-file.rst:113 9076a6c2f6f74a4fa882b9e2f66ab944 +msgid "Please note that if the file you open is large, you may run out of memory." +msgstr "" + +#: ../../how-to-open-a-file.rst:115 3d79799d3f0a497ba8648550cd0ab745 +msgid "" +"The File Storage API works well if you're using different storage " +"backends in different environments. If you're only using the " +"`FileSystemStorage`, you can simply use the `obj.file.name` to open the " +"file directly with |PyMuPDF| as shown in an earlier example." +msgstr "" + +#: ../../how-to-open-a-file.rst:123 85288abbd82c4edc9fb1dfee730d152d +msgid "Opening Files as Text" +msgstr "ファイルをテキストとして開く" + +#: ../../how-to-open-a-file.rst:126 6aa9839fd5d84413a0ce8f6e17824b60 +msgid "" +"|PyMuPDF| has the capability to open any plain text file as a document. " +"In order to do this you should provide the `filetype` parameter for the " +"`pymupdf.open` function as `\"txt\"`." +msgstr "" +"|PyMuPDF| には、プレーン テキスト ファイルをドキュメントとして開く機能があります。 これを行うには、`pymupdf.open` " +"関数の `filetype` パラメータを「txt」として指定する必要があります。" + +#: ../../how-to-open-a-file.rst:133 72d20b3c2e33457aacb836fbe07acbf0 +msgid "" +"In this way you are able to open a variety of file types and perform the " +"typical **non-PDF** specific features like text searching, text " +"extracting and page rendering. Obviously, once you have rendered your " +"`txt` content, then saving as |PDF| or merging with other |PDF| files is " +"no problem." +msgstr "" +"このようにして、さまざまな種類のファイルを開いて、テキスト検索、テキスト抽出、ページ レンダリングなどの |PDF| " +"に固有ではない一般的な機能を実行できます。 明らかに、txt コンテンツをレンダリングしたら、 |PDF| として保存したり、他の |PDF| " +"ファイルと結合したりすることは問題ありません。" + +#: ../../how-to-open-a-file.rst:137 9b981eb61d194ca0b2ef17f23500da44 +msgid "Examples" +msgstr "例" + +#: ../../how-to-open-a-file.rst:141 3ac6d963ae5b402e94a3ad37e0fb12c9 +msgid "Opening a `C#` file" +msgstr "C# ファイルを開く" + +#: ../../how-to-open-a-file.rst:150 ff93dac3de6a4cb8bbb0076a69e1b632 +msgid "Opening an ``XML`` file" +msgstr "XML ファイルを開く" + +#: ../../how-to-open-a-file.rst:158 72a973937e15458f9a0fc4e9ae43e9ea +msgid "Opening a `JSON` file" +msgstr "JSON ファイルを開く" + +#: ../../how-to-open-a-file.rst:165 accacc140af14ec2998e5ca54b8909bb +msgid "And so on!" +msgstr "等々!" + +#: ../../how-to-open-a-file.rst:167 b47b21b5806a489b9807e5cb979132f7 +msgid "" +"As you can imagine many text based file formats can be *very simply opened* and *interpreted* by |PyMuPDF|. This can make data analysis and extraction for a wide range of previously unavailable files possible." +msgstr "" +"ご想像のとおり、多くのテキスト ベースのファイル形式は、|PyMuPDF| によって非常に簡単に開いて解釈できます。 " +"これにより、これまで利用できなかった広範囲のファイルのデータ分析と抽出が突然可能になります。" + +#: ../../footer.rst:60 c565e48ba51e43cbb1e960403a67a689 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/identity.mo b/docs/locales/ja/LC_MESSAGES/identity.mo new file mode 100644 index 000000000..305af7a68 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/identity.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/identity.po b/docs/locales/ja/LC_MESSAGES/identity.po new file mode 100644 index 000000000..a9e98e1c8 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/identity.po @@ -0,0 +1,70 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 7b37ecd8dc3743a9a786f62dd0bec6f7 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 7dda5857ca604606b110ce5cddbf35fa +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 024f5eebaeb24a6ea2775fd31cbf04c8 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../identity.rst:7 74c7b4d54f2f490aa6411228918d1102 +msgid "Identity" +msgstr "Identity (アイデンティティ)" + +#: ../../identity.rst:9 9eea7980a3264df58d614219614f391d +msgid "" +"Identity is a :ref:`Matrix` that performs no action -- to be used " +"whenever the syntax requires a matrix, but no actual transformation " +"should take place. It has the form *pymupdf.Matrix(1, 0, 0, 1, 0, 0)*." +msgstr "" +"アイデンティティは、何もアクションを実行しない :ref:`Matrix` " +"で、構文がマトリックスを必要とするが実際の変換が必要ない場合に使用されます。それは *pymupdf.Matrix(1, 0, 0, 1, 0," +" 0)* の形をしています。" + +#: ../../identity.rst:11 cf3f047b3bcd457781d6d1b0a6555a40 +msgid "" +"Identity is a constant, an \"immutable\" object. So, all of its matrix " +"properties are read-only and its methods are disabled." +msgstr "アイデンティティは、定数であり、変更できない「不変」のオブジェクトです。そのため、すべてのマトリックスのプロパティは読み取り専用であり、そのメソッドは無効になっています。" + +#: ../../identity.rst:13 0eeb4dcb5f1f4643912b79cb1dc0929a +msgid "" +"If you need a **mutable** identity matrix as a starting point, use one of" +" the following statements::" +msgstr "**変更可能な** アイデンティティマトリックスが必要な場合、次の文のいずれかを使用してください::" + +#: ../../footer.rst:60 d7eead894067453da2f782c487c8f59e +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/index.mo b/docs/locales/ja/LC_MESSAGES/index.mo new file mode 100644 index 000000000..536f1586d Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/index.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/index.po b/docs/locales/ja/LC_MESSAGES/index.po new file mode 100644 index 000000000..abb7611dc --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/index.po @@ -0,0 +1,117 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../index.rst:33 8fd6f740bd354549b09f02e29d6aeb9f +msgid "About" +msgstr "" + +#: ../../index.rst:42 325ca6b150394307ad977ddc59e8199c +msgid "User Guide" +msgstr "ユーザーガイド" + +#: ../../index.rst:54 55f11e9ee1a448aaa422415824a72910 +msgid "How to Guide" +msgstr "使用方法ガイド" + +#: ../../index.rst:61 382947f10dad49e09057aef86f6b7bdf +msgid "API Reference" +msgstr "APIリファレンス" + +#: ../../index.rst:74 cf7180ed975342a3ad0632f165b96c7b +msgid "Other" +msgstr "その他" + +#: ../../header.rst:-1 17cdca8041b545be91c717e1482c6e92 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 929d834795de4b6c9c42aede9d848712 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 883d9811455e45eea8d6c2f3434cae01 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../index.rst:20 8cfb3ae729874f858ac4b9e18e20bf6a +msgid "Welcome to |PyMuPDF|" +msgstr "PyMuPDFへようこそ" + +#: ../../index.rst:22 32ae9730ead142168b8b4abaaef4b7c3 +msgid "" +"|PyMuPDF| is a high-performance **Python** library for data extraction, " +"analysis, conversion & manipulation of |PDF| (and other) documents." +msgstr "" +"|PyMuPDF| は、|PDF| (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な :title:`Python` " +"ライブラリです。" + +#: ../../index.rst:24 6a8830c4d9b54905a1ec1b90ebb54e90 +msgid "" +"|PyMuPDF| is hosted on `GitHub `_ and" +" registered on `PyPI `_." +msgstr "" +"|PyMuPDF| は `GitHub `_ でホストされ、`PyPI" +" `_ に登録されています。" + +#: ../../footer.rst:60 ../../index.rst:29 11945e0949f341ad9d1670429005389a +#: ec790aafb26941e99e919f9eba35a52e +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Find out about **PyMuPDF Utilities**" +#~ msgstr "PyMuPDF Utilitiesについて詳しく知る" + +#~ msgid "" +#~ "The :title:`GitHub` repository `PyMuPDF-" +#~ "Utilities `_" +#~ " contains a full range of examples," +#~ " demonstrations and use cases." +#~ msgstr "" +#~ "GitHubリポジトリ `PyMuPDF-Utilities " +#~ "`_ " +#~ "には、さまざまな例、デモンストレーション、およびユースケースが含まれています。" + +#~ msgid "Do you need |PDF| to **DOCX** conversion?" +#~ msgstr "|PDF| から **DOCX** への変換が必要ですか?" + +#~ msgid "" +#~ "We recommend the pdf2docx_ library which" +#~ " uses |PyMuPDF| and the **python-" +#~ "docx** library to provide simple " +#~ "document conversion from |PDF| to " +#~ "**DOCX** format." +#~ msgstr "" +#~ "|PyMuPDF| を使用する pdf2docx_ ライブラリと、|PDF| から " +#~ "**DOCX** 形式への簡単なドキュメント変換を提供する **python-docx** " +#~ "ライブラリをお勧めします。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/installation.mo b/docs/locales/ja/LC_MESSAGES/installation.mo new file mode 100644 index 000000000..89e28f842 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/installation.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/installation.po b/docs/locales/ja/LC_MESSAGES/installation.po new file mode 100644 index 000000000..a51b93a0a --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/installation.po @@ -0,0 +1,758 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 b59db7d5f84d47c8b229355014ba2ed2 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 955a2dd45f5245208b28b50e40f044d6 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 e48fcbdec7f743c6a1327f95b397ca3a +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../installation.rst:6 ../../installation.rst:31 +#: 10ecce5e3d604f4094c4c3044665cd6e 13c81322926f479eb81525503536d3d5 +msgid "Installation" +msgstr "インストール" + +#: ../../installation.rst:9 43db83b2c9634becaa5ce17a7bc824b7 +msgid "Requirements" +msgstr "要件" + +#: ../../installation.rst:11 d16c4b4efee94685b3b883927404d895 +msgid "" +"All the examples below assume that you are running inside a Python " +"virtual environment. See: https://docs.python.org/3/library/venv.html for" +" details. We also assume that `pip` is up to date." +msgstr "" +"以下のすべての例は、Pythonの仮想環境内で実行していると仮定しています。詳細については、https://docs.python.org/3/library/venv.html" +" を参照してください。また、pipが最新であると仮定しています。" + +#: ../../installation.rst:15 1812a22b7b804b2c94a07ef59ee05dde +msgid "For example:" +msgstr "例えば:" + +#: ../../installation.rst:17 e8b165a696864a488ad441c70ff54a53 +msgid "Windows::" +msgstr "" + +#: ../../installation.rst:23 253af4ec1a814ab9aad6579d1c8f4499 +msgid "Linux, MacOS::" +msgstr "" + +#: ../../installation.rst:33 8b323123ef6d4d34a0892775e3cd63da +msgid "PyMuPDF should be installed using pip with::" +msgstr "PyMuPDFをインストールするには、次のようにpipを使用してください::" + +#: ../../installation.rst:37 32b7323f61ea4e379cf7fbbfe4951d0b +msgid "" +"This will install from a Python wheel if one is available for your " +"platform." +msgstr "この方法でインストールすると、対象のプラットフォームに対応するPythonのwheelファイルがある場合は、それを使用してインストールされます。" + +#: ../../installation.rst:41 5d18bb1311c042e5b587f6976f4731dc +msgid "Installation when a suitable wheel is not available" +msgstr "適切なwheelファイルが利用できない場合のインストール方法" + +#: ../../installation.rst:43 83b9093340b24fd4bef8c0351343e797 +msgid "" +"If a suitable Python wheel is not available, pip will automatically build" +" from source using a Python sdist." +msgstr "適切なPythonのwheelファイルが利用できない場合、pipは自動的にPythonのsdistからビルドします。" + +#: ../../installation.rst:46 1cf0140f438f450e81edd059e30668d1 +msgid "**This requires C/C++ development tools to be installed**:" +msgstr "**これにはC/C++の開発ツールがインストールされている必要があります:**" + +#: ../../installation.rst:48 ecbe8ba345644be7b2fb16b7b2e41d30 +msgid "On Windows:" +msgstr "Windowsの場合:" + +#: ../../installation.rst:51 7187b33c88b74933b24d8ecda3306b4f +msgid "" +"Install Visual Studio 2019. If not installed in a standard location, set " +"environmental variable `PYMUPDF_SETUP_DEVENV` to the location of the " +"`devenv.com` binary." +msgstr "" +"Visual Studio 2019をインストールしてください。標準の場所にインストールされていない場合は、環境変数 " +"`PYMUPDF_SETUP_DEVENV` を `devenv.com` バイナリの場所に設定してください" + +#: ../../installation.rst:56 41e4356dfc9b43e2a8cfaa55e62d8395 +msgid "" +"Having other installed versions of Visual Studio, for example Visual " +"Studio 2022, can cause problems because one can end up with MuPDF and " +"PyMuPDF code being compiled with different compiler versions." +msgstr "" +"他のバージョンのVisual Studio(例:Visual Studio " +"2022)がインストールされている場合、異なるコンパイラバージョンでMuPDFとPyMuPDFのコードがコンパイルされる可能性があるため、問題が発生することがあります。" + +#: ../../installation.rst:60 92fb4e5280d84a54a52c756428fec37d +msgid "The build will automatically download and build MuPDF." +msgstr "自動的にMuPDFをダウンロードしてビルドします。" + +#: ../../installation.rst:66 1344d3d6541c47feb0f80d0fd9bf3f35 +msgid "Problems after installation" +msgstr "インストール後の問題" + +#: ../../installation.rst:68 4ab4504c5d624b54b761ca80ddb1c621 +msgid "On Windows, Python error::" +msgstr "" + +#: ../../installation.rst:72 5444687575794dceb7d1d7ae7d7ad613 +msgid "" +"This has been occasionally seen if `MSVCP140.dll` is missing, and appears" +" to be caused by a bug in some versions (2015-2017) of `Microsoft Visual " +"C++ Redistributables`." +msgstr "" +"これは、`MSVCP140.dll` が見つからない場合に時折発生し、`Microsoft Visual C++` " +"再頒布可能パッケージの一部のバージョン (2015 ~ 2017) のバグが原因であると思われます。" + +#: ../../installation.rst:76 78be3210ec2e423c8452a91b6fbf9e43 +msgid "" +"It is recommended to search for `MSVCP140.dll` in https://msdn.com to " +"find instructions for how to reinstall it. For example " +"https://learn.microsoft.com/cpp/windows/latest-supported-vc-redist has " +"permalinks to the latest supported versions." +msgstr "" +"https://msdn.com で `MSVCP140.dll` を検索して、再インストールする方法の手順を見つけることをお勧めします。 " +"たとえば、https://learn.microsoft.com/cpp/windows/latest-supported-vc-redist " +"には、サポートされている最新バージョンへのパーマリンクがあります。" + +#: ../../installation.rst:81 95e507ac1f5748f6b6dbb48f06cff7ce +msgid "See https://github.com/pymupdf/PyMuPDF/issues/2678 for more details." +msgstr "詳細については、https://github.com/pymupdf/PyMuPDF/issues/2678 を参照してください。" + +#: ../../installation.rst:84 fc23d812f3e44692a7e71e90321f2ebb +msgid "Python error::" +msgstr "" + +#: ../../installation.rst:88 ff01b653c1044cad9f0fc405a1bb0bc5 +msgid "" +"This can happen if PyMuPDF's legacy name `fitz` is used (for example " +"`import fitz` instead of `import pymupdf`), and an unrelated Python " +"package called `fitz` (https://pypi.org/project/fitz/) is installed." +msgstr "" + +#: ../../installation.rst:92 7d8a14f94dfe4773a0a51c660d9a8aa8 +msgid "" +"The fitz package appears to be no longer maintained (the latest release " +"is from 2017), but unfortunately it does not seem possible to remove it " +"from pypi.org. It does not even work on its own, as well as breaking the " +"use of PyMuPDF's legacy name." +msgstr "" + +#: ../../installation.rst:97 5297fdbcbc3e46bcbf9e88ad36514822 +msgid "There are a few ways to avoid this problem:" +msgstr "" + +#: ../../installation.rst:100 0917d781ad454f108298fcb24e19b90d +msgid "" +"Use `import pymupdf` instead of `import fitz`, and update one's code to " +"match." +msgstr "" + +#: ../../installation.rst:103 38fbb111d3054c05b0892cd6e6fdd636 +msgid "Or uninstall the `fitz` package and reinstall PyMuPDF::" +msgstr "" + +#: ../../installation.rst:108 dbff07acdcfc41999d93bb0e99c06e87 +msgid "Or use `import pymupdf as fitz`. However this has not been well tested." +msgstr "" + +#: ../../installation.rst:110 661b1ce5caf94b3d8fd04b933406322f +msgid "With Jupyter labs on Apple Silicon (arm64), Python error::" +msgstr "" + +#: ../../installation.rst:114 0d8b0986a4bb4cc086da20efc59c10a8 +msgid "" +"This appears to be a problem in Jupyter labs; see: " +"https://github.com/pymupdf/PyMuPDF/issues/3643#issuecomment-2210588778." +msgstr "" + +#: ../../installation.rst:119 14b8fa60c27b415fa3903bed226f01ae +msgid "Notes" +msgstr "メモ" + +#: ../../installation.rst:122 5e962f055eb14733bdf1a5017acc3a83 +msgid "Wheels are available for the following platforms:" +msgstr "" + +#: ../../installation.rst:124 79d92638413a4dd38cfd1b8b99c0b4f3 +msgid "Windows 32-bit Intel." +msgstr "" + +#: ../../installation.rst:125 26892ff8170348cc8d0c14e089b19911 +msgid "Windows 64-bit Intel." +msgstr "" + +#: ../../installation.rst:126 bd7d6dc2edb44fe4bedb0a0b0c3a3dc6 +msgid "Linux 64-bit Intel." +msgstr "" + +#: ../../installation.rst:127 884c93dbdd11469d849e7a0f3e941dc3 +msgid "Linux 64-bit ARM." +msgstr "" + +#: ../../installation.rst:128 1072e27fd780438f942d5c1c319492f8 +msgid "MacOS 64-bit Intel." +msgstr "" + +#: ../../installation.rst:129 1a4c740f475b4ca7abfd942fd5aebfee +msgid "MacOS 64-bit ARM." +msgstr "" + +#: ../../installation.rst:131 20443bfba24a4ca7b1405847491fcd38 +msgid "Details:" +msgstr "" + +#: ../../installation.rst:133 8bede23066fa4d429c8a60e2836f5977 +msgid "We release a single wheel for each of the above platforms." +msgstr "" + +#: ../../installation.rst:136 f5b5390ada074e1ab5aa23deb5079b26 +msgid "" +"Each wheel uses the Python Stable ABI of the current oldest supported " +"Python version (currently 3.9), and so works with all later Python " +"versions, including new Python releases." +msgstr "" + +#: ../../installation.rst:141 6a3ea97e63974d5d87b3990a8fa154cf +msgid "" +"Wheels are tested on all Python versions currently marked as " +"\"Supported\" on https://devguide.python.org/versions/, currently 3.9, " +"3.10, 3.11, 3.12 and 3.13." +msgstr "" + +#: ../../installation.rst:146 3b5b7382bc8a459aad6dad16cedaa022 +msgid "" +"Wheels are not available for Python installed with `Chocolatey " +"`_ on Windows. Instead install Python using the " +"Windows installer from the python.org website, see: " +"http://www.python.org/downloads" +msgstr "" +"`Chocolatey `_ " +"を使用してWindowsにインストールされたPythonには、ホイールが利用できません。代わりに、python.orgのウェブサイトからWindowsインストーラーを使用してPythonをインストールしてください。詳細は、以下を参照してください:http://www.python.org/downloads" + +#: ../../installation.rst:152 2bb4e3e0f87e47ac9bf01500578038d7 +msgid "" +"Wheels are not available for Linux-aarch64 with `Musl libc " +"`_ (For example `Alpine Linux " +"`_ on aarch64), and building from source is " +"known to fail." +msgstr "" + +#: ../../installation.rst:157 93856a91296c4d988b37ebe44f7d9805 +msgid "" +"There are no **mandatory** external dependencies. However, some optional " +"feature are available only if additional components are installed:" +msgstr "必須の外部依存関係はありません。ただし、追加のコンポーネントをインストールした場合にのみ、一部のオプション機能が利用可能です:" + +#: ../../installation.rst:159 81a65c58f6874348aeecefef70590ecb +msgid "" +"`Pillow `_ is required for " +":meth:`Pixmap.pil_save` and :meth:`Pixmap.pil_tobytes`." +msgstr "" +"`Pillow `_ は :meth:`Pixmap.pil_save` と " +":meth:`Pixmap.pil_tobytes` の実行に必要です。" + +#: ../../installation.rst:160 1489ed99644246fbb42612a9492e9d12 +msgid "" +"`fontTools `_ is required for " +":meth:`Document.subset_fonts`." +msgstr "" +"`fontTools `_ は " +":meth:`Document.subset_fonts` の実行に必要です。" + +#: ../../installation.rst:161 3b2bdfbe19b04065b98e04b6419e1cfd +msgid "" +"`pymupdf-fonts `_ is a " +"collection of nice fonts to be used for text output methods." +msgstr "" +"`pymupdf-fonts `_ " +"は、テキスト出力の方法に使用するための素敵なフォントのコレクションです。" + +#: ../../installation.rst:163 d7ec430eb8944a64a6e205e182dd89f9 +msgid "" +"`Tesseract-OCR `_ for optical" +" character recognition in images and document pages. Tesseract is " +"separate software, not a Python package. To enable OCR functions in " +"PyMuPDF, Tesseract must be installed and the `tessdata` folder name " +"specified; see below." +msgstr "" + +#: ../../installation.rst:169 4ae790b6169748d3b44a6801bf8a90ed +msgid "" +"You can install these additional components at any time -- before or " +"after installing PyMuPDF. PyMuPDF will detect their presence during " +"import or when the respective functions are being used." +msgstr "これらの追加コンポーネントはいつでもインストールできます。PyMuPDFのインストール前または後に行うことができます。PyMuPDFは、これらのコンポーネントがインポート時または対応する機能が使用される際に検出します。" + +#: ../../installation.rst:173 9fdf6184383b4d6d8b6b4b59c3afddfa +msgid "Build and install from a local PyMuPDF source tree" +msgstr "ローカルのPyMuPDFソースツリーからビルドしてインストールします" + +#: ../../installation.rst:175 1ea99480f4f146e289e0b8e6c48cca01 +msgid "Initial setup:" +msgstr "初期設定:" + +#: ../../installation.rst:177 68820b466a3c4c549681be9491e430fc +msgid "Install C/C++ development tools as described above." +msgstr "上記に記載されているように、C/C++の開発ツールをインストールしてください。" + +#: ../../installation.rst:178 f1e1b7f608dc4082a25cb93c32a542b8 +msgid "Enter a Python venv and update pip, as described above." +msgstr "上記に記載されているように、Pythonのvenvに入り、pipをアップデートしてください。" + +#: ../../installation.rst:180 d78e70a460ae488db96b4a6bc800d5d0 +msgid "Get a PyMuPDF source tree:" +msgstr "PyMuPDFのソースツリーを取得します:" + +#: ../../installation.rst:182 5ab65829b8614644bad26016029c1917 +msgid "Clone the PyMuPDF git repository::" +msgstr "PyMuPDFのGitリポジトリをクローンしてください::" + +#: ../../installation.rst:187 1d0f0130f8534853ab24f51fbac5e978 +msgid "" +"Or download and extract a `.zip` or `.tar.gz` source release from " +"https://github.com/pymupdf/PyMuPDF/releases." +msgstr "" +"または、https://github.com/pymupdf/PyMuPDF/releases から `.zip` または `.tar.gz` " +"形式のソースリリースをダウンロードして展開してください。" + +#: ../../installation.rst:190 d54fc4c996a94b29962dad5a0fc82cf7 +msgid "Then one can build PyMuPDF in two ways:" +msgstr "PyMuPDFを2つの方法でビルドできます:" + +#: ../../installation.rst:192 2f9d95d7b59e45a8aafa871e89e2af4c +msgid "Build and install PyMuPDF with default MuPDF version::" +msgstr "デフォルトのMuPDFバージョンでPyMuPDFをビルドしてインストール::" + +#: ../../installation.rst:196 b4301c0b7cd247f6b5bf24a2567dd96b +msgid "" +"This will automatically download a specific hard-coded MuPDF source " +"release, and build it into PyMuPDF." +msgstr "これにより、特定のハードコードされたMuPDFソースリリースが自動的にダウンロードされ、PyMuPDFにビルドされます。" + +#: ../../installation.rst:199 a7dee54170fa4de89633986b2cc0a9b8 +msgid "Or build and install PyMuPDF using a local MuPDF source tree:" +msgstr "または、ローカルのMuPDFソースツリーを使用してPyMuPDFをビルドしてインストールします:" + +#: ../../installation.rst:201 a1bd9faae40c487db3a4f51a5b4c06ed +msgid "Clone the MuPDF git repository::" +msgstr "MuPDFのGitリポジトリをクローンします:" + +#: ../../installation.rst:206 7967535d801e4f859065dd78ad496b28 +msgid "" +"Build PyMuPDF, specifying the location of the local MuPDF tree with the " +"environmental variables `PYMUPDF_SETUP_MUPDF_BUILD`::" +msgstr "" +"環境変数 `PYMUPDF_SETUP_MUPDF_BUILD` を使用して、ローカルな MuPDF ツリーの場所を指定して PyMuPDF " +"をビルドします::" + +#: ../../installation.rst:211 b70fd263b0b64ab89388c949058fa42d +msgid "" +"Also, one can build for different Python versions in the same PyMuPDF " +"tree:" +msgstr "同じPyMuPDFツリー内で異なるPythonバージョン用にビルドする方法:" + +#: ../../installation.rst:214 a0919843b0a348b1876bb79c3a9ff4b6 +msgid "" +"PyMuPDF will build for the version of Python that is being used to run " +"`pip`. To run `pip` with a specific Python version, use `python -m pip` " +"instead of `pip`." +msgstr "" +"PyMuPDF は、`pip` を実行する Python のバージョンに対応してビルドされます。特定の Python バージョンで pip " +"を実行するには、`pip` の代わりに `python -m pip` を使用してください。" + +#: ../../installation.rst:218 5a970fc8f8364fe381c626f8b6ed3928 +msgid "So for example on Windows one can build different versions with::" +msgstr "したがって、Windows 上で異なるバージョンをビルドする場合、次のようにできます::" + +#: ../../installation.rst:222 40ea390c252347b0b1b5a0200f0763fe +msgid "or::" +msgstr "または::" + +#: ../../installation.rst:228 029df4cf902045f5961e4158c5775bb4 +msgid "Running tests" +msgstr "テストの実行" + +#: ../../installation.rst:230 641936ae8e864e2f9b5528828336f690 +msgid "" +"Having a PyMuPDF tree available allows one to run PyMuPDF's `pytest` test" +" suite::" +msgstr "PyMuPDFのディレクトリが利用可能な場合、PyMuPDFの `pytest` テストスイートを実行できます。::" + +#: ../../installation.rst:239 8b60a917fe4047f087c4190317553172 +msgid "Notes about using a non-default MuPDF" +msgstr "非デフォルトのMuPDFを使用する際の注意事項" + +#: ../../installation.rst:241 197189c327fe410985b5abda09791b45 +msgid "" +"Using a non-default build of MuPDF by setting environmental variable " +"`PYMUPDF_SETUP_MUPDF_BUILD` can cause various things to go wrong and so " +"is not generally supported:" +msgstr "" +"環境変数 `PYMUPDF_SETUP_MUPDF_BUILD` " +"を設定して非デフォルトのMuPDFビルドを使用すると、さまざまな問題が発生する可能性があるため、一般的にはサポートされていません。" + +#: ../../installation.rst:245 7882cccdbce54b4f95ffa6da76aae58d +msgid "" +"If MuPDF's major version number differs from what PyMuPDF uses by " +"default, PyMuPDF can fail to build, because MuPDF's API can change " +"between major versions." +msgstr "もしMuPDFのメジャーバージョン番号がPyMuPDFがデフォルトで使用するものと異なる場合、PyMuPDFはビルドに失敗する可能性があります。なぜなら、MuPDFのAPIはメジャーバージョン間で変更されることがあるからです。" + +#: ../../installation.rst:249 4f022dd28f75444fa6f007cebef7c47b +msgid "" +"Runtime behaviour of PyMuPDF can change because MuPDF's runtime behaviour" +" changes between different minor releases. This can also break some " +"PyMuPDF tests." +msgstr "PyMuPDFのランタイム動作は、異なるマイナーリリース間でMuPDFのランタイム動作が変更されるため、変わる可能性があります。これは一部のPyMuPDFのテストにも影響を及ぼすことがあります。" + +#: ../../installation.rst:253 94975cba2181444f9685efe3c40f3930 +msgid "" +"If MuPDF was built with its default config instead of PyMuPDF's " +"customised config (for example if MuPDF is a system install), it is " +"possible that `tests/test_textbox.py:test_textbox3()` will fail. One can " +"skip this particular test by adding `-k 'not test_textbox3'` to the " +"`pytest` command line." +msgstr "" +"MuPDFがPyMuPDFのカスタマイズされた設定ではなく、デフォルトの設定でビルドされた場合(たとえば、MuPDFがシステムにインストールされた場合)、`tests/test_textbox.py:test_textbox3()`" +" が失敗する可能性があります。この特定のテストをスキップするには、`pytest` コマンドラインに `-k 'not " +"test_textbox3'` を追加してください。" + +#: ../../installation.rst:261 4cb02fc7dc3d4436a352d9dee177f2f5 +msgid "Packaging" +msgstr "パッケージング" + +#: ../../installation.rst:263 7b00d1839b53423885d4bec0de2dc3b3 +msgid "See :doc:`packaging`." +msgstr ":doc:`packaging` を参照してください。" + +#: ../../installation.rst:267 831fbebbe6b54e5fa6d74ba85df46ea3 +msgid "Using with Pyodide" +msgstr "Pyodideとの使用" + +#: ../../installation.rst:269 c872ea949efa4dd4904134944412752d +msgid "See :doc:`pyodide`." +msgstr ":doc:`pyodide` を参照してください。" + +#: ../../installation.rst:275 1c0e891064cd44158680e97c13a553e6 +msgid "Enabling Integrated OCR Support" +msgstr "統合OCRサポートの有効化" + +#: ../../installation.rst:277 3526ef6db615496a94dbc16a01f72904 +msgid "" +"If you do not intend to use this feature, skip this step. Otherwise, it " +"is required for both installation paths: **from wheels and from " +"sources.**" +msgstr "" +"もしこの機能を使用しない場合は、このステップをスキップしてください。それ以外の場合は、**ホイールからのインストールとソースからのインストールの両方**" +" に必要です。" + +#: ../../installation.rst:279 1358ceda8fcd404ea6d8c85bed9660d3 +msgid "" +"PyMuPDF will already contain all the logic to support OCR functions. But " +"it additionally does need `Tesseract’s language support data " +"`_." +msgstr "" +"PyMuPDFにはすでにOCR機能をサポートするためのロジックが含まれていますが、追加でTesseractの言語サポートデータが必要です。そのため" +"、Tesseract-OCRのインストールが依然として必要です。" + +#: ../../installation.rst:281 ae308d2b2c67412c8821f5e6d1a32d86 +msgid "" +"If not specified explicitly, PyMuPDF will attempt to find the installed " +"Tesseract's tessdata, but this should probably not be relied upon." +msgstr "" + +#: ../../installation.rst:284 f80baa9303224bc195ba426b51082bd2 +msgid "" +"Otherwise PyMuPDF requires that Tesseract's language support folder is " +"specified explicitly either in PyMuPDF OCR functions' `tessdata` " +"arguments or `os.environ[\"TESSDATA_PREFIX\"]`." +msgstr "" + +#: ../../installation.rst:288 aff176ef01a147eeaa0bb2c600b29fb4 +msgid "So for a working OCR functionality, make sure to complete this checklist:" +msgstr "OCR機能を正常に動作させるために、以下のチェックリストを完了してください:" + +#: ../../installation.rst:290 298961af9b3d4572b5bb579888f8916c +msgid "" +"Locate Tesseract's language support folder. Typically you will find it " +"here:" +msgstr "Tesseractの言語サポートフォルダを見つけてください。通常、以下の場所にあります:" + +#: ../../installation.rst:292 17fdd80644b24caf8e45489d9a309fda +msgid "Windows: `C:/Program Files/Tesseract-OCR/tessdata`" +msgstr "" + +#: ../../installation.rst:293 b5dcf574f00046a8ad422aab01ba29e1 +msgid "Unix systems: `/usr/share/tesseract-ocr/4.00/tessdata`" +msgstr "Unixシステム: `/usr/share/tesseract-ocr/4.00/tessdata`" + +#: ../../installation.rst:295 fbe2af3a0f8041a299431922b9ad8ab4 +msgid "Specify the language support folder when calling PyMuPDF OCR functions:" +msgstr "" + +#: ../../installation.rst:297 cd354ffc94e643f7af1f995772fd15af +msgid "Set the `tessdata` argument." +msgstr "" + +#: ../../installation.rst:298 849aa0e0a81146518cda40c778d669ef +msgid "Or set `os.environ[\"TESSDATA_PREFIX\"]` from within Python." +msgstr "" + +#: ../../installation.rst:299 9dbe5b6d696d4d3b9dc00d2450842aaa +msgid "" +"Or set environment variable `TESSDATA_PREFIX` before running Python, for " +"example:" +msgstr "" + +#: ../../installation.rst:301 10d43e1320984012880cb8e99b362def +msgid "" +"Windows: `setx TESSDATA_PREFIX \"C:/Program Files/Tesseract-" +"OCR/tessdata\"`" +msgstr "" + +#: ../../installation.rst:302 31425f5c30314211b091636098821618 +msgid "" +"Unix systems: `declare -x TESSDATA_PREFIX=/usr/share/tesseract-" +"ocr/4.00/tessdata`" +msgstr "" +"Unixシステム: `declare -x TESSDATA_PREFIX=/usr/share/tesseract-" +"ocr/4.00/tessdata`" + +#: ../../footer.rst:60 6714ab4300af4cc182e033502436af86 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "On Unix-style systems such as " +#~ "Linux, OpenBSD and FreeBSD, use the " +#~ "system package manager to install SWIG." +#~ msgstr "Unixスタイルのシステム(Linux、OpenBSD、FreeBSDなど)では、SWIGをインストールするにはシステムのパッケージマネージャを使用してください。" + +#~ msgid "For example on Debian Linux, do: `sudo apt install swig`" +#~ msgstr "例えば、Debian Linuxの場合は、次のようにしてください: `sudo apt install swig`" + +#~ msgid "" +#~ "Install SWIG by following the " +#~ "instructions at: " +#~ "https://swig.org/Doc4.0/Windows.html#Windows_installation" +#~ msgstr "以下の指示に従って、SWIGをインストールしてください:https://swig.org/Doc4.0/Windows.html#Windows_installation" + +#~ msgid "" +#~ "On MacOS, install MacPorts using the " +#~ "instructions at: https://www.macports.org/install.php" +#~ msgstr "MacOSでMacPortsをインストールする場合は、以下の指示に従ってください:https://www.macports.org/install.php" + +#~ msgid "Then install SWIG with: `sudo port install swig`" +#~ msgstr "その後、以下のコマンドを使用してSWIGをインストールしてください: `sudo port install swig`" + +#~ msgid "You may also need: `sudo port install swig-python`" +#~ msgstr "以下も必要になるかもしれません: `sudo port install swig-python`" + +#~ msgid "Installation from source without using an sdist" +#~ msgstr "sdistを使用せずにソースからインストールする方法" + +#~ msgid "" +#~ "Clone the git repository at " +#~ "https://github.com/pymupdf/PyMuPDF, for example::" +#~ msgstr "https://github.com/pymupdf/PyMuPDF から git リポジトリをクローンしてください" + +#~ msgid "" +#~ "PyMuPDF will build for the version " +#~ "of Python that runs `setup.py`. So " +#~ "for example on Windows one can " +#~ "build different versions by using `py" +#~ " -3.9` or `py -3.10-32`." +#~ msgstr "" +#~ "PyMuPDFは、`setup.py` " +#~ "を実行するPythonのバージョンにビルドされます。したがって、例えばWindowsで異なるバージョンをビルドする場合、`py " +#~ "-3.9` や `py -3.10-32` を使用することができます。" + +#~ msgid "Building and testing with git checkouts of PyMuPDF and MuPDF" +#~ msgstr "PyMuPDFとMuPDFのgitチェックアウトを使ってビルドとテストを行う" + +#~ msgid "Things to do:" +#~ msgstr "やるべきこと:" + +#~ msgid "Get PyMuPDF." +#~ msgstr "PyMuPDFを取得する。" + +#~ msgid "Get MuPDF." +#~ msgstr "MuPDFを取得する。" + +#~ msgid "Create a Python virtual environment." +#~ msgstr "Pythonの仮想環境を作成する。" + +#~ msgid "" +#~ "Build PyMuPDF with environmental variable " +#~ "`PYMUPDF_SETUP_MUPDF_BUILD` set to the path" +#~ " of the local MuPDF checkout." +#~ msgstr "" +#~ "`PYMUPDF_SETUP_MUPDF_BUILD` " +#~ "環境変数をローカルのMuPDFチェックアウトのパスに設定して、PyMuPDFをビルドする。" + +#~ msgid "Run PyMuPDF tests." +#~ msgstr "PyMuPDFのテストを実行する。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ "Wheels are available for Windows (32-bit" +#~ " Intel, 64-bit Intel), Linux (64-bit " +#~ "Intel, 64-bit ARM) and Mac OSX " +#~ "(64-bit Intel, 64-bit ARM), Python " +#~ "versions 3.7 and up." +#~ msgstr "" +#~ "Windows(32ビットIntel、64ビットIntel)、Linux(64ビットIntel、64ビットARM)、およびMac " +#~ "OSX(64ビットIntel、64ビットARM)用のホイールが利用可能で、Pythonバージョン3.7以上に対応しています。" + +#~ msgid "Install Tesseract." +#~ msgstr "Tesseractをインストールしてください" + +#~ msgid "" +#~ "As of `PyMuPDF-1.20.0`, the required " +#~ "MuPDF source code is already in " +#~ "the sdist and is automatically built " +#~ "into PyMuPDF." +#~ msgstr "`PyMuPDF-1.20.0` 現在、必要なMuPDFのソースコードがsdistに含まれており、自動的にPyMuPDFに組み込まれています。" + +#~ msgid "" +#~ "Build and install from local PyMuPDF " +#~ "checkout and optional local MuPDF " +#~ "checkout" +#~ msgstr "ローカルの PyMuPDF チェックアウトとオプションのローカル MuPDF チェックアウトからビルドしてインストールします。" + +#~ msgid "Build and install PyMuPDF::" +#~ msgstr "PyMuPDFをビルドしてインストールしてください::" + +#~ msgid "" +#~ "When running Python scripts that use " +#~ "PyMuPDF, make sure that the current " +#~ "directory is not the `PyMuPDF/` " +#~ "directory." +#~ msgstr "" +#~ "PyMuPDFを使用するPythonスクリプトを実行する際は、現在のディレクトリが `PyMuPDF/` " +#~ "ディレクトリでないことを確認してください。" + +#~ msgid "" +#~ "Otherwise, confusingly, Python will attempt" +#~ " to import `fitz` from the local " +#~ "`fitz/` directory, which will fail " +#~ "because it only contains source files." +#~ msgstr "" +#~ "そうでない場合、Pythonは混乱する可能性があります。Pythonはローカルの `fitz/` ディレクトリから" +#~ " `fitz` をインポートしようとしますが、それはソースファイルのみを含んでいるため、失敗します。" + +#~ msgid "On Windows `ImportError: DLL load failed while importing _fitz`." +#~ msgstr "Windowsの場合: `ImportError: DLL load failed while importing _fitz`." + +#~ msgid "" +#~ "Wheels are available for Windows (32-bit" +#~ " Intel, 64-bit Intel), Linux (64-bit " +#~ "Intel, 64-bit ARM) and Mac OSX " +#~ "(64-bit Intel, 64-bit ARM), for Python" +#~ " versions marked as \"Supported\" on " +#~ "https://devguide.python.org/versions/." +#~ msgstr "" +#~ "ホイールは、Windows向けには(32ビットIntel、64ビットIntel)、Linux向けには(64ビットIntel、64ビットARM)、およびMac" +#~ " OSX向けには(64ビットIntel、64ビットARM)提供されています。 " +#~ "Pythonのバージョンは、「https://devguide.python.org/versions/」で「サポートされている」とマークされています。" + +#~ msgid "" +#~ "PyMuPDF does not support Python versions" +#~ " prior to 3.8. Older wheels can " +#~ "be found in `this `_ repository and on `PyPI " +#~ "`_." +#~ msgstr "" + +#~ msgid "" +#~ "Please note that we generally follow " +#~ "the official Python release schedules. " +#~ "For Python versions dropping out of " +#~ "official support this means, that " +#~ "generation of wheels will also be " +#~ "ceased for them." +#~ msgstr "" +#~ "PyMuPDFはPythonバージョン3.8以前をサポートしていません。古いバージョンのホイールは、`この " +#~ "`_ リポジトリや " +#~ "`PyPI `_ でも見つけることができます" + +#~ msgid "" +#~ "`Tesseract-OCR `_ for optical character " +#~ "recognition in images and document " +#~ "pages. Tesseract is separate software, " +#~ "not a Python package. To enable " +#~ "OCR functions in PyMuPDF, the software" +#~ " must be installed and the system " +#~ "environment variable `\"TESSDATA_PREFIX\"` must " +#~ "be defined and contain the `tessdata`" +#~ " folder name of the Tesseract " +#~ "installation location. See below." +#~ msgstr "" +#~ "`Tesseract-OCR `_ " +#~ "は、画像やドキュメントページの光学文字認識(OCR)のためのソフトウェアです。TesseractはPythonのパッケージではなく、独立したソフトウェアです。PyMuPDFでOCR機能を有効にするには、Tesseractソフトウェアをインストールし、システム環境変数の" +#~ " `\"TESSDATA_PREFIX\"` を定義して、Tesseractのインストール場所にある " +#~ "`tessdata` フォルダの名前を含める必要があります。以下を参照してください。" + +#~ msgid "" +#~ "The language support folder location " +#~ "must be communicated either via storing" +#~ " it in the environment variable " +#~ "`\"TESSDATA_PREFIX\"`, or as a parameter " +#~ "in the applicable functions." +#~ msgstr "" +#~ "言語サポートフォルダの場所は、`\"TESSDATA_PREFIX\"` " +#~ "という環境変数に保存するか、適用される関数のパラメータとして指定する必要があります。" + +#~ msgid "Set the environment variable `TESSDATA_PREFIX`" +#~ msgstr "環境変数 `TESSDATA_PREFIX` を設定してください:" + +#~ msgid "" +#~ "On Windows systems, this must happen " +#~ "outside Python -- before starting your" +#~ " script. Just manipulating `os.environ` " +#~ "will not work!" +#~ msgstr "" +#~ "Windowsシステムでは、これはPythonの外部で行われる必要があります。スクリプトを実行する前に行ってください。`os.environ`" +#~ " を操作するだけではうまく動作しません!" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/intro.mo b/docs/locales/ja/LC_MESSAGES/intro.mo new file mode 100644 index 000000000..f29b94b30 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/intro.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/intro.po b/docs/locales/ja/LC_MESSAGES/intro.po new file mode 100644 index 000000000..19701c729 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/intro.po @@ -0,0 +1,230 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2024. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.10\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-09-11 21:42+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 a41c7bb7308c4b5b830c403db1c8d522 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 4b9111f0fec54685953c69def64e45f3 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "" + +#: ../../header.rst:-1 49bb2731e94d4111965ffae8aa08a21f +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "" + +#: ../../intro.rst:4 f7699a4ad92d446ba773ba2396f91a3a +msgid "Introduction" +msgstr "" + +#: ../../intro.rst:15 12aec34e36c34e14a32ebeecf2c16431 +msgid "" +"|PyMuPDF| is a Python binding for `MuPDF `_ --" +" a lightweight PDF, XPS, and E-book viewer, renderer, and toolkit, which" +" is maintained and developed by Artifex Software, Inc" +msgstr "" + +#: ../../intro.rst:17 be6754fe8f954f80897efdca10164de8 +msgid "" +"MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB, MOBI and FB2 " +"(e-books) formats, and it is known for its top performance and high " +"rendering quality." +msgstr "" + +#: ../../intro.rst:19 b3268360e4cf4c56a5509810b370fa25 +msgid "" +"MuPDF stands out among all similar products for its top rendering " +"capability and unsurpassed processing speed. At the same time, its " +"\"light weight\" makes it an excellent choice for platforms where " +"resources are typically limited, like smartphones." +msgstr "" + +#: ../../intro.rst:21 c9802de5dae44cffb37698c1f86e6dde +msgid "" +"Check this out yourself and compare the various free PDF-viewers. In " +"terms of speed and rendering quality `SumatraPDF " +"`_ ranges at the top (apart from " +"MuPDF's own standalone viewer) -- since it has changed its library basis " +"to MuPDF!" +msgstr "" + +#: ../../intro.rst:23 c91ccfeae8fa4367815a63762c89721e +msgid "" +"With PyMuPDF you can access files with extensions like “.pdf”, “.xps”, " +"“.oxps”, “.cbz”, “.fb2”, \".mobi\" or “.epub”. In addition, about 10 " +"popular image formats can also be opened and handled like documents." +msgstr "" + +#: ../../intro.rst:25 903db6f8457c48e694fba6a66f5afd03 +msgid "" +"PyMuPDF provides access to many important functions of MuPDF from within " +"a Python environment, and we are continuously seeking to expand this " +"function set." +msgstr "" + +#: ../../intro.rst:27 d4748fd6a1fe44e19d6bd9b222cb61cd +msgid "" +"PyMuPDF runs and has been tested on Mac, Linux and Windows for Python " +"versions 3.8 [#f1]_ and up. Other platforms should work too, as long as " +"MuPDF and Python support them." +msgstr "" + +#: ../../intro.rst:29 c748d030070b427c80417beaff9b6342 +msgid "" +"PyMuPDF is hosted on `GitHub `_ and " +"registered on `PyPI `_." +msgstr "" + +#: ../../intro.rst:31 2be39641dab44b1c8e1dfa7afe144c4d +msgid "" +"For MS Windows, Mac OSX and Linux Python wheels are available -- please " +"see the installation chapter." +msgstr "" + +#: ../../intro.rst:33 edeb85c952ab458dbd75bc79773b3448 +msgid "" +"The GitHub repository `PyMuPDF-Utilities `_ contains a full range of examples, demonstrations " +"and use cases." +msgstr "" + +#: ../../intro.rst:36 21bc387c7484451aa6678a479ef7b24b +msgid "Note on the legacy module name *fitz*" +msgstr "" + +#: ../../intro.rst:37 5b06571ac8e44ca498d5d938ad978bc1 +msgid "" +"Prior to release 1.24.3, the top level Python import name for this " +"library was **\"fitz\"**. This has historical reasons:" +msgstr "" + +#: ../../intro.rst:39 54b20466f423486cb7a419f853600c0f +msgid "The original rendering library for MuPDF was called *Libart*." +msgstr "" + +#: ../../intro.rst:41 8e099c2cdacc4a9bb75ad97643b3f913 +msgid "" +"*\"After Artifex Software acquired the MuPDF project, the development " +"focus shifted on writing a new modern graphics library called \"Fitz\". " +"Fitz was originally intended as an R&D project to replace the aging " +"Ghostscript graphics library, but has instead become the rendering engine" +" powering MuPDF.\"* (Quoted from `Wikipedia " +"`_)." +msgstr "" + +#: ../../intro.rst:43 b3f34235d3034b02a4880f492a05b577 +msgid "`import fitz` is still supported for backwards compatibility." +msgstr "" + +#: ../../intro.rst:47 213da85a65254b2caf874559e9e67019 +msgid "" +"Use of legacy name `fitz` can fail if defunct package pypi.org `fitz` is " +"installed; see :ref:`problems-after-installation`." +msgstr "" + +#: ../../intro.rst:51 3577e73ae4f34a25b80069861e962f37 +msgid "License and Copyright" +msgstr "" + +#: ../../intro.rst:52 874d0e2d51844efea0695f5bd19c58c2 +msgid "" +"In order to comply with MuPDF’s dual licensing model, PyMuPDF has entered" +" into an agreement with Artifex who has the right to sublicense PyMuPDF " +"to third parties." +msgstr "" + +#: ../../intro.rst:54 0b731cd148964d8db2bd16ba896ea1f9 +msgid "" +"PyMuPDF and MuPDF are now available under both, open-source AGPL and " +"commercial license agreements. Please read the full text of the AGPL " +"license agreement, available in the distribution material (file COPYING) " +"and `here `_, to ensure that " +"your use case complies with the guidelines of the license. If you " +"determine you cannot meet the requirements of the AGPL, please contact " +"`Artifex `_ for more information regarding " +"a commercial license." +msgstr "" + +#: ../../intro.rst:56 473e50eb75544325969f223a85c2172e +msgid "Artifex is the exclusive commercial licensing agent for MuPDF." +msgstr "" + +#: ../../intro.rst:58 777d916fb52441aabdd18216e4977bf9 +msgid "" +"Artifex, the Artifex logo, MuPDF, and the MuPDF logo are registered " +"trademarks of Artifex Software Inc. © 2022 Artifex Software, Inc. All " +"rights reserved." +msgstr "" + +#: ../../version.rst:3 ac529f29c7004bdab4f7da3dbb922e12 +msgid "" +"This documentation covers **PyMuPDF v1.24.10** features as of " +"**2024-09-02 00:00:01**." +msgstr "" + +#: ../../version.rst:5 d0267645ce504840843677d4a3060147 +msgid "" +"The major and minor versions of |PyMuPDF| and |MuPDF| will always be " +"the same. Only the third qualifier (patch level) may deviate from that of" +" |MuPDF|." +msgstr "" + +#: ../../version.rst:7 f26be46a33a04d54965c32772883e3ab +msgid "" +"Typically PyMuPDF is released more frequently than MuPDF so it will often" +" be the case that the patch level of PyMuPDF will be greater than the " +"embedded MuPDF." +msgstr "" + +#: ../../version.rst:11 ccdc763817ae4046bebe37eeb819b308 +msgid "For example PyMuPDF-1.24.5 contains MuPDF-1.24.2." +msgstr "" + +#: ../../version.rst:13 d116a1d660c74042baf5a7bd51e852b0 +msgid "Also see `pymupdf_version` and `mupdf_version`." +msgstr "" + +#: ../../intro.rst:66 7fed4e7b825746bbbcf0054fbe740a34 +msgid "Footnotes" +msgstr "" + +#: ../../intro.rst:67 4cbcfdb2063e434a839290ebb3369077 +msgid "" +"PyMuPDF generally only supports Python versions that are still maintained" +" by the Python Software Foundation. Once a Python version is being " +"retired, PyMuPDF support will also be ended. This means that wheels for a" +" retired Python platform will no longer be provided, and that Python " +"language features may be used that did not exist in the retired Python " +"version." +msgstr "" + +#: ../../footer.rst:60 4201ffb7de1543f7bcbcec3989cad1e5 +msgid "This documentation covers all versions up to |version|." +msgstr "" + +#: ../../footer.rst:-1 9fc4919ea2ad4bbe8a478f523c247979 +msgid "Discord logo" +msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/irect.mo b/docs/locales/ja/LC_MESSAGES/irect.mo new file mode 100644 index 000000000..47e876fcc Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/irect.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/irect.po b/docs/locales/ja/LC_MESSAGES/irect.po new file mode 100644 index 000000000..10efe4d16 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/irect.po @@ -0,0 +1,525 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 9da9a1f0d8ad4810a00d9c3ffb986077 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 c119c5ee601a454eb3d762c5116445d1 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 e312792ce16248f98f71d027cbbe1d64 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../irect.rst:7 ddef2d577f7c46fca0650922f46a8a81 +msgid "IRect" +msgstr "" + +#: ../../irect.rst:9 64079ef1a00e45ee981d0d272179f2e3 +msgid "" +"IRect is a rectangular bounding box, very similar to :ref:`Rect`, except " +"that all corner coordinates are integers. IRect is used to specify an " +"area of pixels, e.g. to receive image data during rendering. Otherwise, " +"e.g. considerations concerning emptiness and validity of rectangles also " +"apply to this class. Methods and attributes have the same names, and in " +"many cases are implemented by re-using the respective :ref:`Rect` " +"counterparts." +msgstr "" +"IRect(整数長方形)は、:ref:`Rect` " +"と非常に似ているが、すべての角の座標が整数であるという点が異なります。IRectは、レンダリング中に画像データを受け取る領域を指定するために使用されます。それ以外にも、長方形の空白や有効性に関する考慮事項が適用されます。メソッドと属性は同じ名前を持ち、多くの場合、対応する" +" :ref:`Rect` の対応部分を再利用して実装されています。" + +#: ../../irect.rst:12 ab97696201f6481cb7a9151183d74b19 +msgid "**Attribute / Method**" +msgstr "**属性/メソッド**" + +#: ../../irect.rst:12 efcc94105da54f018a86e821fec2d8cd +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../irect.rst:14 dc94e84e46af4660871003b4bcf7a4f4 +msgid ":meth:`IRect.contains`" +msgstr "" + +#: ../../irect.rst:14 92ea7aa5de9a48a9b5ac185c1794a7e5 +msgid "checks containment of another object" +msgstr "他のオブジェクトの含まれているかを確認します。" + +#: ../../irect.rst:15 124cec4d00074c519fbf6ce8ac42a723 +msgid ":meth:`IRect.get_area`" +msgstr "" + +#: ../../irect.rst:15 2053892c2f4c422ab19ae350ecbe23e8 +msgid "calculate rectangle area" +msgstr "長方形の面積を計算します。" + +#: ../../irect.rst:16 a5bdba1573d44547b8b0673e7cd37dce +msgid ":meth:`IRect.intersect`" +msgstr "" + +#: ../../irect.rst:16 d1f0c881eaab47f3a362862bee94a3ec +msgid "common part with another rectangle" +msgstr "別の長方形との共通部分を取得します。" + +#: ../../irect.rst:17 11746224755e4a3b9da4cfe7f4269d11 +msgid ":meth:`IRect.intersects`" +msgstr "" + +#: ../../irect.rst:17 924de332ec5b4656a802d2e85f6927e7 +msgid "checks for non-empty intersection" +msgstr "非空の交差をチェックします。" + +#: ../../irect.rst:18 2132f387192449f0ac037eb9a66f3777 +msgid ":meth:`IRect.morph`" +msgstr "" + +#: ../../irect.rst:18 530d3654b8ba4812a36e35dfad701ddf +msgid "transform with a point and a matrix" +msgstr "ポイントと行列を使用して変換します。" + +#: ../../irect.rst:19 a3769ab66bee466a8c94fb47b13ff0e2 +msgid ":meth:`IRect.torect`" +msgstr "" + +#: ../../irect.rst:19 d7e2a8f91121496a914e133895de1d66 +msgid "matrix that transforms to another rectangle" +msgstr "別の長方形に変換する行列。" + +#: ../../irect.rst:20 81cff3d0715f416da3509d8bb9b099ce +msgid ":meth:`IRect.norm`" +msgstr "" + +#: ../../irect.rst:20 444fb6d41f6c4e34b0fceb673ba3dc81 +msgid "the Euclidean norm" +msgstr "ユークリッドノルム。" + +#: ../../irect.rst:21 c5fbf520ee1e47958587d44173e70ad4 +msgid ":meth:`IRect.normalize`" +msgstr "" + +#: ../../irect.rst:21 9fadec1aa0fa42899e37379ddec7c9d1 +msgid "makes a rectangle finite" +msgstr "長方形を有限にします。" + +#: ../../irect.rst:22 ad485c24a3a34720ba6cdce4aa57cd30 +msgid ":attr:`IRect.bottom_left`" +msgstr "" + +#: ../../irect.rst:22 0f87509f1d3043d49419e4bf77e7d5b6 +msgid "bottom left point, synonym *bl*" +msgstr "左下のポイント、同義語の *bl* " + +#: ../../irect.rst:23 6b2e1bb32097415c9539dbb2ae43ca43 +msgid ":attr:`IRect.bottom_right`" +msgstr "" + +#: ../../irect.rst:23 4888cfd6ae27499381d64bedabc834ae +msgid "bottom right point, synonym *br*" +msgstr "右下のポイント、同義語の *br* " + +#: ../../irect.rst:24 ef3263604d9e406da1601cab4d8bac3e +msgid ":attr:`IRect.height`" +msgstr "" + +#: ../../irect.rst:24 ed8730e7d3eb43a38680c2490024ea3a +msgid "height of the rectangle" +msgstr "長方形の高さ" + +#: ../../irect.rst:25 c4c26d29c9dc490fb9ce003895f46551 +msgid ":attr:`IRect.is_empty`" +msgstr "" + +#: ../../irect.rst:25 2c44a8bb62b7482a8ea229f24405e7a2 +msgid "whether rectangle is empty" +msgstr "長方形が空かどうか" + +#: ../../irect.rst:26 62f693c4f27d4da28c1416bdf2633620 +msgid ":attr:`IRect.is_infinite`" +msgstr "" + +#: ../../irect.rst:26 583895dab1fb49df97e3c485ef00b41b +msgid "whether rectangle is infinite" +msgstr "長方形が無限であるかどうか" + +#: ../../irect.rst:27 bf8fa54275764b07a1cc4cf60133e69e +msgid ":attr:`IRect.rect`" +msgstr "" + +#: ../../irect.rst:27 67cc7f8daa2844e0a01efba201d0c056 +msgid "the :ref:`Rect` equivalent" +msgstr ":ref:`Rect` の同等物" + +#: ../../irect.rst:28 8f89c6c745e54747aed947c8dd097575 +msgid ":attr:`IRect.top_left`" +msgstr "" + +#: ../../irect.rst:28 9593c3c6673e4014bc7f1f38035db854 +msgid "top left point, synonym *tl*" +msgstr "左上のポイント、同義語の *tl* " + +#: ../../irect.rst:29 ccd49d6f1ebd447dbc14e886376eeedd +msgid ":attr:`IRect.top_right`" +msgstr "" + +#: ../../irect.rst:29 f5d3a7dbdfc2490e90041a1cce9c6e78 +msgid "top_right point, synonym *tr*" +msgstr "右上のポイント、同義語の *tr* " + +#: ../../irect.rst:30 092c64736cb9423fbade8738a18def52 +msgid ":attr:`IRect.quad`" +msgstr "" + +#: ../../irect.rst:30 02794414a93b473e9430f42716496fae +msgid ":ref:`Quad` made from rectangle corners" +msgstr "長方形の角から作成された四角形" + +#: ../../irect.rst:31 8f40f8f7548f45d69bb49e4340ea0e8c +msgid ":attr:`IRect.width`" +msgstr "" + +#: ../../irect.rst:31 9a5de017b1ca40b7b146ca25383c738e +msgid "width of the rectangle" +msgstr "長方形の幅" + +#: ../../irect.rst:32 0c0b410baf734144851d33016cbdddd0 +msgid ":attr:`IRect.x0`" +msgstr "" + +#: ../../irect.rst:32 219bd087640142edad26e0cef53ce452 +msgid "X-coordinate of the top left corner" +msgstr "左上隅のX座標" + +#: ../../irect.rst:33 41ac1de7703f44bcbe5f615ba2dce3ea +msgid ":attr:`IRect.x1`" +msgstr "" + +#: ../../irect.rst:33 ae41326e3d484ab889ee2b9c5ff3a7de +msgid "X-coordinate of the bottom right corner" +msgstr "右下隅のX座標" + +#: ../../irect.rst:34 a3223a0657ab45b087b1c100476b4a17 +msgid ":attr:`IRect.y0`" +msgstr "" + +#: ../../irect.rst:34 19062c693a07420a8c8d2f3b3c1eb750 +msgid "Y-coordinate of the top left corner" +msgstr "左上隅のY座標" + +#: ../../irect.rst:35 c1c7be5dd2ef41a2b6d447cb43a6a07e +msgid ":attr:`IRect.y1`" +msgstr "" + +#: ../../irect.rst:35 be7ae048d8ea46edaf23968ef9c8bce1 +msgid "Y-coordinate of the bottom right corner" +msgstr "右下隅のY座標" + +#: ../../irect.rst:38 d9ba47e2fb7f40a5b94b36b2e8690bd1 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../irect.rst:50 1c5e8f4e34f047098b41ead038224c50 +msgid "" +"Overloaded constructors. Also see examples below and those for the " +":ref:`Rect` class." +msgstr "オーバーロードされたコンストラクタ。また、以下の例と :ref:`Rect` クラスの例も参照してください。" + +#: ../../irect.rst:52 50def18ce8504003afbebdd0a9a55ac1 +msgid "If another irect is specified, a **new copy** will be made." +msgstr "別のirectが指定された場合、**新しいコピー** が作成されます。" + +#: ../../irect.rst:54 9b2e5c01c35646e3ad3647dfb830f028 +msgid "" +"If sequence is specified, it must be a Python sequence type of 4 numbers " +"(see :ref:`SequenceTypes`). Non-integer numbers will be truncated, non-" +"numeric values will raise an exception." +msgstr "シーケンスが指定された場合、それは4つの数値のPythonシーケンス型である必要があります(:ref:`SequenceTypes`)。整数以外の数値は切り捨てられ、数値以外の値は例外を発生させます。" + +#: ../../irect.rst:56 2cd32524540f40ac87e1e408344a827d +msgid "The other parameters mean integer coordinates." +msgstr "その他のパラメータは整数座標を意味します。" + +#: ../../irect.rst:61 840c50265ae3462c89c54b22ce7e67f9 +msgid "" +"Calculates the area of the rectangle and, with no parameter, equals " +"*abs(IRect)*. Like an empty rectangle, the area of an infinite rectangle " +"is also zero." +msgstr "長方形の面積を計算し、パラメータなしで *abs(IRect)* に等しいです。空の長方形と同様に、無限の長方形の面積もゼロです。" + +#: ../../irect.rst 4f00a177e1bc4d3d89dda38f1909fa94 +#: 4fc3aa5f280242c986288acbfa1b8175 9b5b83039e0c4fdeaf7685aa05b6bb16 +#: aa8fbd2f167e422e9b6b3aaac458e3a9 b69ba3e050904df0afe8a6d582ef2363 +#: ed922b3129dd4c73b25f962b90ae50aa +msgid "Parameters" +msgstr "" + +#: ../../irect.rst:63 3be2e3c29aba4f5189e0cbb7f1144057 +msgid "" +"Specify required unit: respective squares of \"px\" (pixels, default), " +"\"in\" (inches), \"cm\" (centimeters), or \"mm\" (millimeters)." +msgstr "" +"必要な単位を指定します:「px」(ピクセル、デフォルト)、 「in」(インチ)、 " +"「cm」(センチメートル)、または「mm」(ミリメートル)のそれぞれの平方。" + +#: ../../irect.rst 4e47019f7b9b4864aef1988ad0bcbdd9 +#: 8a353e9947ea4ef2b6e0a6872cda119d 9f7bfdd3aed843358d17fe22401cb884 +#: a7c70439c9f2475ba9e98c25daaa8da2 +msgid "Return type" +msgstr "" + +#: ../../irect.rst:69 782b0a38b1a44a3fb17923582226e601 +msgid "" +"The intersection (common rectangular area) of the current rectangle and " +"*ir* is calculated and replaces the current rectangle. If either " +"rectangle is empty, the result is also empty. If either rectangle is " +"infinite, the other one is taken as the result -- and hence also infinite" +" if both rectangles were infinite." +msgstr "" +"現在の長方形と *ir* " +"の交差(共通の長方形領域)を計算し、現在の長方形に置き換えます。どちらかの長方形が空の場合、結果も空です。どちらかの長方形が無限の場合、もう一方が結果として取られます。したがって、両方の長方形が無限である場合、結果も無限です。" + +#: ../../irect.rst:71 fcebc17df9614b1882a8032f5fcee80a +msgid "Second rectangle." +msgstr "第2の長方形。" + +#: ../../irect.rst:75 bca6b46c6fce4c0391d350804618c7b0 +msgid "" +"Checks whether *x* is contained in the rectangle. It may be " +":data:`rect_like`, :data:`point_like` or a number. If *x* is an empty " +"rectangle, this is always true. Conversely, if the rectangle is empty " +"this is always ``False``, if *x* is not an empty rectangle and not a " +"number. If *x* is a number, it will be checked to be one of the four " +"components. *x in irect* and *irect.contains(x)* are equivalent." +msgstr "" +"*x* が長方形に含まれているかどうかを確認します。それは " +":data:`rect_like`、:data:`point_like`、または数値であるかもしれません。*x* " +"が空の長方形の場合、これは常に真です。逆に、長方形が空の場合、これは常に偽です。xが空の長方形でなく、数値でない場合、これは常に偽です。 *x* " +"が数値の場合、それは4つの構成要素の1つであることが確認されます。 *x* in *irect* および *irect.contains(x)* " +"は同等です" + +#: ../../irect.rst:77 2a8a12b2850d4bfba7bba2b40cc06f55 +msgid "the object to check." +msgstr "確認するオブジェクト。" + +#: ../../irect.rst:84 50064e8a49c54a6da52a3b0900a114f3 +msgid "" +"Checks whether the rectangle and the :data:`rect_like` \"r\" contain a " +"common non-empty :ref:`IRect`. This will always be ``False`` if either is" +" infinite or empty." +msgstr "" +"長方形と :data:`rect_like`「r」が共通の非空 :ref:`IRect` " +"を含むかどうかをチェックします。どちらかが無限または空の場合、これは常に ``False`` になります。" + +#: ../../irect.rst:86 b0c988e57dce4071a0e518ca1e93877f +msgid "the rectangle to check." +msgstr "チェックする長方形。" + +#: ../../irect.rst:92 1bbbf78a89784f7db6abf366bea409ae +msgid "New in version 1.19.3" +msgstr "バージョン1.19.3で新登場" + +#: ../../irect.rst:94 f3726b71047d42a58f879f9c00944d3f +msgid "" +"Compute the matrix which transforms this rectangle to a given one. See " +":meth:`Rect.torect`." +msgstr "この長方形を指定された長方形に変換する行列を計算します。:meth:`Rect.torect` を参照してください。" + +#: ../../irect.rst:96 dcc19ae43b814fe4b56fa555133432ce +msgid "the target rectangle. Must not be empty or infinite." +msgstr "ターゲットの長方形。空または無限であってはなりません。" + +#: ../../irect.rst:97 d473a4edcd124daab7417f12570976d0 +msgid ":ref:`Matrix`" +msgstr "" + +#: ../../irect.rst e2953f28a3ff42908f0e4d0811230263 +#: f368ffdbb210464aac1693355694b761 +msgid "Returns" +msgstr "" + +#: ../../irect.rst:98 056021e1cc164996aed45008297df188 +msgid "" +"a matrix `mat` such that `self * mat = rect`. Can for example be used to " +"transform between the page and the pixmap coordinates." +msgstr "`self * mat = rect` となるような行列 `mat` 。たとえば、ページとピクセルマップの座標間を変換するために使用できます。" + +#: ../../irect.rst:103 88b85325f704446180fd037b13f438d8 +msgid "New in version 1.17.0" +msgstr "新機能 バージョン1.17.0" + +#: ../../irect.rst:105 2b9036d190fb49e0b5f461aba0df7c7f +msgid "Return a new quad after applying a matrix to it using a fixed point." +msgstr "指定された固定点に行列を適用した後の新しい四角形を返します。" + +#: ../../irect.rst:107 3ff780516b4146f3ae0d550b2b4a36ce +msgid "the fixed point." +msgstr "固定点。" + +#: ../../irect.rst:108 1fbd651a14a1443596afb4e080f12015 +msgid "the matrix." +msgstr "行列。" + +#: ../../irect.rst:109 f932937ceb634ac7890e4cb6a194a02b +msgid "" +"a new :ref:`Quad`. This a wrapper of the same-named quad method. If " +"infinite, the infinite quad is returned." +msgstr "新しい :ref:`Quad`。これは同じ名前のquadメソッドのラッパーです。無限であれば、無限の四角形が返されます。" + +#: ../../irect.rst:113 613eaa02d46e4a9eab41724a4e10baa2 +msgid "New in version 1.16.0" +msgstr "新機能 バージョン1.16.0" + +#: ../../irect.rst:115 e05020824bd94a418f7786caff3a68e5 +msgid "" +"Return the Euclidean norm of the rectangle treated as a vector of four " +"numbers." +msgstr "四角形を4つの数字のベクトルとして扱った場合のユークリッドノルムを返します。" + +#: ../../irect.rst:119 deaf5a4b9f8440fd8fe7b802f78ec9b9 +msgid "" +"Make the rectangle finite. This is done by shuffling rectangle corners. " +"After this, the bottom right corner will indeed be south-eastern to the " +"top left one. See :ref:`Rect` for a more details." +msgstr "" +"四角形を有限にします。これは四角形の角を入れ替えることによって行われます。この操作後、右下の角は確かに左上の角の南東になります。詳細については、:ref:`Rect`" +" を参照してください。" + +#: ../../irect.rst:125 55613d17f4fd4a458eca520133551eb0 +msgid "Equals *Point(x0, y0)*." +msgstr "*Point(x0, y0)* と同等です。" + +#: ../../irect.rst 1fb6ace84c4d4e938a1ff7c0b704b38f +#: 49d4afd12c6a42c4a29bf9c269eadfe0 5b4a83c92ca649dda2e8fa0e691bbd89 +#: 6cbe1cdc288d48ee9c6b6bd68a21fea6 7559f9666807471eaa667884ab2f79b5 +#: 8d8f0f2b15df45779323571cd1a1b751 c86cbf20e5234741bd7bc1e8fd9a44ca +#: d2aad704feb949ebb36c3a4e729a8d5f d793441a05ce4bc48b85052e697d053e +#: dc8594c4fac847ac8161c79a639811ce e8ad8d35ebcb4d0795598f07a336334c +#: f52efa790e584d859f3797654913b784 f66efb6c2f134fa69083a7c6b1a857ab +#: ff031729ca2a4ce1ad1e658f12ce0cfe +msgid "type" +msgstr "" + +#: ../../irect.rst:127 ../../irect.rst:135 ../../irect.rst:143 +#: ../../irect.rst:151 308d3cb353bb49ae94161cf7dc7218d3 +#: 3249e89f3b7c4ac2bea38ea5214c52c5 785ff830f1234dc69f3c770992c2a410 +#: 884b7d46c20742c0a7283bee8b40a43f +msgid ":ref:`Point`" +msgstr "" + +#: ../../irect.rst:133 921cb3a2a7cf4cbf806f9d772749b917 +msgid "Equals *Point(x1, y0)*." +msgstr "*Point(x1, y0)* と同等です。" + +#: ../../irect.rst:141 13776e76c48248909500e8fe82c9d0a4 +msgid "Equals *Point(x0, y1)*." +msgstr "*Point(x0, y1)* と同等です。" + +#: ../../irect.rst:149 99f201fd44fd4ed0a1f49b75400f0d0f +msgid "Equals *Point(x1, y1)*." +msgstr "*Point(x1, y1)* と同等です。" + +#: ../../irect.rst:155 63204ddea7a14d2592a7ad0f9dd74810 +msgid "The :ref:`Rect` with the same coordinates as floats." +msgstr "浮動小数点座標と同じ座標を持つ :ref:`Rect` です。" + +#: ../../irect.rst:157 2d7b927314b342cb96c8fb15d3b8e3b9 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../irect.rst:161 57dfabd4e9b049e4900273a9ec4cb817 +msgid "The quadrilateral *Quad(irect.tl, irect.tr, irect.bl, irect.br)*." +msgstr "四角形 *Quad(irect.tl, irect.tr, irect.bl, irect.br)です* 。" + +#: ../../irect.rst:163 a199b927484848978ec222114a9da0cc +msgid ":ref:`Quad`" +msgstr "" + +#: ../../irect.rst:167 1d437103ee594f9b944058db4631bb9d +msgid "Contains the width of the bounding box. Equals *abs(x1 - x0)*." +msgstr "境界ボックスの幅を含みます。*abs(x1 - x0)* と同等です。" + +#: ../../irect.rst:169 ../../irect.rst:175 ../../irect.rst:181 +#: ../../irect.rst:187 ../../irect.rst:193 ../../irect.rst:199 +#: 083882bcb7354a55bf7f4dd85eef92b9 8ed88f37bd6742da83895a91a8685917 +#: 967dc9f8c2fc43b7b56954d34441687f 9958930f4f6e45b5980d2c23669ce09f +#: ac4b6a2822ce482a8dbfbf739c4ffa79 bd9b56e6197441be9f3888a17b04d9a4 +msgid "int" +msgstr "" + +#: ../../irect.rst:173 b187d2fbc4bd439e834930d74922794b +msgid "Contains the height of the bounding box. Equals *abs(y1 - y0)*." +msgstr "境界ボックスの高さを含みます。*abs(y1 - y0)* と同等です。" + +#: ../../irect.rst:179 4777545ac79b48f1a71c4b3dffbbe470 +msgid "X-coordinate of the left corners." +msgstr "左上隅のX座標です。" + +#: ../../irect.rst:185 187b3e2a62fa440b84c0265a597e11e0 +msgid "Y-coordinate of the top corners." +msgstr "上端のY座標です。" + +#: ../../irect.rst:191 4539d20b89644229aae3fcc4c1196436 +msgid "X-coordinate of the right corners." +msgstr "右上隅のX座標です。" + +#: ../../irect.rst:197 69a9d5438d1b4b199976b0b23ba70a7e +msgid "Y-coordinate of the bottom corners." +msgstr "下端のY座標です。" + +#: ../../irect.rst:203 34a8989b9f274d83b0a6c5031b749190 +msgid "``True`` if rectangle is infinite, ``False`` otherwise." +msgstr "四角形が無限の場合は ``True`` 、それ以外の場合は``False`` です。" + +#: ../../irect.rst:205 ../../irect.rst:211 c8f3501df5c14812ae13827184765c29 +#: d6f0997b994c464fa7457bb7c1cce89c +msgid "bool" +msgstr "" + +#: ../../irect.rst:209 5fea893d5a0e409aacb5452b2ffa3fcf +msgid "``True`` if rectangle is empty, ``False`` otherwise." +msgstr "四角形が空の場合は ``True``、それ以外の場合は ``False`` です。" + +#: ../../irect.rst:216 e58bd147e5934ff788a34135346e5c97 +msgid "" +"This class adheres to the Python sequence protocol, so components can be " +"accessed via their index, too. Also refer to :ref:`SequenceTypes`." +msgstr "" +"このクラスはPythonのシーケンスプロトコルに従っているため、コンポーネントはインデックスを使用してアクセスできます。また、:ref:`SequenceTypes`" +" も参照してください。" + +#: ../../irect.rst:217 0d59de499da146b2914c95df6f414f34 +msgid "" +"Rectangles can be used with arithmetic operators -- see chapter " +":ref:`Algebra`." +msgstr "また、四角形は算術演算子と一緒に使用できます - :ref:`Algebra` を参照してください。" + +#: ../../footer.rst:60 7dccd50b2f2443208394ecb0c842c1dd +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/link.mo b/docs/locales/ja/LC_MESSAGES/link.mo new file mode 100644 index 000000000..1b71282c8 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/link.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/link.po b/docs/locales/ja/LC_MESSAGES/link.po new file mode 100644 index 000000000..92e152bbf --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/link.po @@ -0,0 +1,434 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 789ef3e179a7439395eb0d05014100e0 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 d6bae0d656e54c42bbd6fabc354253ae +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 c2865acb728b48b0be9ebcbf4ca09b69 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../link.rst:7 c688c9e8319141a9be77a5892a13809d +msgid "Link" +msgstr "Link (リンク)" + +#: ../../link.rst:8 52cdea6624d9492a8f1367f1fee8b4f8 +msgid "" +"Represents a pointer to somewhere (this document, other documents, the " +"internet). Links exist per document page, and they are forward-chained to" +" each other, starting from an initial link which is accessible by the " +":attr:`Page.first_link` property." +msgstr "" +"リンクは、どこかへのポインタを表します(この文書、他の文書、インターネットなど)。リンクは文書のページごとに存在し、最初のリンクは " +":attr:`Page.first_link` プロパティでアクセスできます。リンクはお互いにフォワードチェーンされ、最初のリンクから始まります。" + +#: ../../link.rst:10 ae08ffa8e9f4429bb99c55016bdcc961 +msgid "" +"There is a parent-child relationship between a link and its page. If the " +"page object becomes unusable (closed document, any document structure " +"change, etc.), then so does every of its existing link objects -- an " +"exception is raised saying that the object is \"orphaned\", whenever a " +"link property or method is accessed." +msgstr "リンクとそのページとの親子関係があります。ページオブジェクトが使用できなくなると(ドキュメントがクローズされた場合、ドキュメント構造が変更された場合など)、そのページに存在するリンクオブジェクトも使用できなくなります。リンクのプロパティまたはメソッドにアクセスしようとすると、「オーファン」というオブジェクトが発生し、例外が発生します。" + +#: ../../link.rst:13 540b33af6e004a60a5c462085df023e5 +msgid "**Attribute**" +msgstr "**属性**" + +#: ../../link.rst:13 f47bd46a654a45a4ba851c09a22eea60 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../link.rst:15 f7c6af0af71e4dd19562174e5497a60d +msgid ":meth:`Link.set_border`" +msgstr "" + +#: ../../link.rst:15 1792d06c2e574c19a323b62f42c2d45c +msgid "modify border properties" +msgstr "境界線のプロパティを変更します" + +#: ../../link.rst:16 d9891c2fa8284703a452d3df1a0653fd +msgid ":meth:`Link.set_colors`" +msgstr "" + +#: ../../link.rst:16 5c1f73fa45454debbad1d5891c4378cd +msgid "modify color properties" +msgstr "色のプロパティを変更します" + +#: ../../link.rst:17 0af10d50600346ea8663643e1de0eb8b +msgid ":meth:`Link.set_flags`" +msgstr "" + +#: ../../link.rst:17 6e5bbdf2f3f94fb5a120e1443763be30 +msgid "modify link flags" +msgstr "リンクフラグを変更します" + +#: ../../link.rst:18 6dce9d5a48824fff84e601d4aecdc691 +msgid ":attr:`Link.border`" +msgstr "" + +#: ../../link.rst:18 604357ac107245f993ae3b1650ebcf79 +msgid "border characteristics" +msgstr "境界線の特性" + +#: ../../link.rst:19 56981bc862ef422cac26e01e4529b563 +msgid ":attr:`Link.colors`" +msgstr "" + +#: ../../link.rst:19 d2b934f42ed647d39eafa1cf42d776d1 +msgid "border line color" +msgstr "境界線の色" + +#: ../../link.rst:20 53bcd3c8bba14f7bbc40266833181e80 +msgid ":attr:`Link.dest`" +msgstr "" + +#: ../../link.rst:20 f923e0b3b9744816a353b2e8fa3e2210 +msgid "points to destination details" +msgstr "宛先の詳細を指します" + +#: ../../link.rst:21 5eed957ff70b41e98d0eb9285cd96980 +msgid ":attr:`Link.is_external`" +msgstr "" + +#: ../../link.rst:21 effa067d28d44d16bb8fe8e503593a5d +#, fuzzy +msgid "checks if the link is an external destination" +msgstr "外部宛先ですか?" + +#: ../../link.rst:22 bbe68bce0387407795c1edda367ef2ab +msgid ":attr:`Link.flags`" +msgstr "" + +#: ../../link.rst:22 ad43a0946bcc486ca5aa72ecdd1d02bf +msgid "link annotation flags" +msgstr "リンク注釈のフラグ" + +#: ../../link.rst:23 eef3e8a36ea145629db1a703e87f575d +msgid ":attr:`Link.next`" +msgstr "" + +#: ../../link.rst:23 627116875da8492cbc9685c5ef7289cc +msgid "points to next link" +msgstr "次のリンクを指します" + +#: ../../link.rst:24 08387b2d8317483d9cc61f3452cea422 +msgid ":attr:`Link.rect`" +msgstr "" + +#: ../../link.rst:24 58a914c6c1424572881b90bcc4392db4 +#, fuzzy +msgid "clickable area in untransformed coordinates" +msgstr "変換されていない座標内のクリック可能な領域" + +#: ../../link.rst:25 b115d680a21b46a6b85d654dd3a49b79 +msgid ":attr:`Link.uri`" +msgstr "" + +#: ../../link.rst:25 969ecf8c8c9341bf8ee037a0c9ee9a8b +msgid "link destination" +msgstr "リンクの宛先" + +#: ../../link.rst:26 253e0f31052f499b9e4346932b58c7d9 +msgid ":attr:`Link.xref`" +msgstr "" + +#: ../../link.rst:26 2e407687ff9648c88d884bc113153b63 +msgid ":data:`xref` number of the entry" +msgstr "エントリの :data:`xref` 番号" + +#: ../../link.rst:29 86c8b11333584703a538c055122d3356 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../link.rst:35 12ea65b799b44f38acf6a8810d508ddb +msgid "PDF only: Change border width and dashing properties." +msgstr "PDFのみ:境界線の幅と破線プロパティを変更します。" + +#: ../../link.rst:37 3336d65932154a7e8220f704075033f6 +msgid "" +"*(Changed in version 1.16.9)* Allow specification without using a " +"dictionary. The direct parameters are used if *border* is not a " +"dictionary." +msgstr "*(バージョン1.16.9で変更)* 辞書を使用せずに仕様を許可します。*border* が辞書でない場合、直接のパラメータが使用されます。" + +#: ../../link.rst 25b24c0fd56f4c30b5aed0630d379730 +#: 9ee61ab2fe8942bfabb7ea27592c0b6d +msgid "Parameters" +msgstr "" + +#: ../../link.rst:39 528de9f4f21d4757bc32323b7c87d926 +msgid "" +"a dictionary as returned by the :attr:`border` property, with keys " +"*\"width\"* (*float*), *\"style\"* (*str*) and *\"dashes\"* (*sequence*)." +" Omitted keys will leave the resp. property unchanged. To e.g. remove " +"dashing use: *\"dashes\": []*. If dashes is not an empty sequence, " +"\"style\" will automatically be set to \"D\" (dashed)." +msgstr "" +":attr:`border` プロパティによって返される辞書で、キー " +"*\"width\"*(*浮動小数点*)、*\"style\"*(*str*)、*\"dashes\"*(*シーケンス*)を持っています。省略されたキーは、対応するプロパティを変更しません。例えば点線を削除するには、*「dashes」:[]*" +" を使用します。dashes が空のシーケンスでない場合、「style」は自動的に \"D\"(点線)に設定されます。" + +#: ../../link.rst:41 ../../link.rst:42 ../../link.rst:43 ../../link.rst:54 +#: 15adf27929424619b20cae84b36ad609 8795a2d1dc084425af561ecd4c4c5474 +#: b0c1d9cdb57843a98c0a60c9326e7883 c13273a1875d45e793d46962a4b7f884 +msgid "see above." +msgstr "上記を参照してください。" + +#: ../../link.rst:47 4f104147b1394836b55a5df9c3e2ab47 +msgid "PDF only: Changes the \"stroke\" color." +msgstr "PDFのみ: \"stroke\" 色を変更します。" + +#: ../../link.rst:49 16686de7e6af4e18ab0ae3e6db98bf9b +msgid "" +"In PDF, links are a subtype of annotations technically and **do not " +"support fill colors**. However, to keep a consistent API, we do allow " +"specifying a `fill=` parameter like with all annotations, which will be " +"ignored with a warning." +msgstr "" +"PDFでは、リンクは技術的には注釈のサブタイプであり、**塗りつぶし色はサポートしていません**。ただし、一貫したAPIを維持するために、すべての注釈と同様に" +" `fill=` パラメータを指定することを許可しており、警告とともに無視されます。" + +#: ../../link.rst:51 59d3c81f089043dc9af7a58db867cfcc +msgid "" +"*(Changed in version 1.16.9)* Allow colors to be directly set. These " +"parameters are used if *colors* is not a dictionary." +msgstr "*(バージョン1.16.9で変更)*: 色を直接設定することを許可します。これらのパラメータは、*colors* が辞書でない場合に使用されます。" + +#: ../../link.rst:53 8c2d9c9d399c48e98a0d4c825a606c31 +msgid "" +"a dictionary containing color specifications. For accepted dictionary " +"keys and values see below. The most practical way should be to first make" +" a copy of the *colors* property and then modify this dictionary as " +"required." +msgstr "" +"色仕様を含む辞書。受け入れられる辞書のキーと値については以下を参照してください。最も実用的な方法は、まず *colors* " +"プロパティのコピーを作成し、必要に応じてこの辞書を修正することです。" + +#: ../../link.rst:58 ../../link.rst:65 3104b3d644e844db944ae3665b6c89cb +#: be4fe26181df4bd18dfb13ff2743cbac +msgid "*New in v1.18.16*" +msgstr "*(新バージョン1.18.16で追加)*" + +#: ../../link.rst:60 383edfd8cffc48289a05af780555e233 +msgid "" +"Set the PDF `/F` property of the link annotation. See " +":meth:`Annot.set_flags` for details. If not a PDF, this method is a no-" +"op." +msgstr "" +"リンク注釈のPDF `/F` プロパティを設定します。詳細については、:meth:`Annot.set_flags` " +"を参照してください。PDFでない場合、このメソッドは無効です。" + +#: ../../link.rst:67 8ace46d310c8449cb730957a35026758 +msgid "" +"Return the link annotation flags, an integer (see :attr:`Annot.flags` for" +" details). Zero if not a PDF." +msgstr "リンク注釈フラグ、整数(詳細については :attr:`Annot.flags` を参照)。PDFでない場合、ゼロです。" + +#: ../../link.rst:72 4f66fbc6806c4cc3afd7d0c728786903 +msgid "" +"Meaningful for PDF only: A dictionary of two tuples of floats in range `0" +" <= float <= 1` specifying the *stroke* and the interior (*fill*) colors." +" If not a PDF, ``None`` is returned. As mentioned above, the fill color " +"is always `None` for links. The stroke color is used for the border of " +"the link rectangle. The length of the tuple implicitly determines the " +"colorspace: 1 = GRAY, 3 = RGB, 4 = CMYK. So `(1.0, 0.0, 0.0)` stands for " +"RGB color red. The value of each float *f* is mapped to the integer value" +" *i* in range 0 to 255 via the computation *f = i / 255*." +msgstr "" +"(意味があるのはPDFのみ): ストロークと内部(*塗りつぶし*)の色を指定する `0 <= float <= 1` " +"の2つの浮動小数点数のタプルを含む辞書。PDFでない場合、``None`` が返されます。上記のように、リンクの塗りつぶしの色は常に `None`" +" です。ストロークの色はリンクの境界のために使用されます。タプルの長さは暗黙的に色空間を決定します: 1 = GRAY、3 = RGB、4 = " +"CMYK。したがって、 `(1.0、0.0、0.0)` はRGBカラーの赤を表します。各浮動小数点数 *f* の値は、*f = i / 255* " +"の計算を介して範囲0から255の整数値 *i* にマップされます。" + +#: ../../link.rst a7911594ade24fedb4eb4d5aa32134e3 +#: fd0f1aaed0b7451b864e11bf131bfc85 +msgid "Return type" +msgstr "戻り値のタイプ" + +#: ../../link.rst:78 8c427c80ff3040929a19af74adb2502f +msgid "" +"Meaningful for PDF only: A dictionary containing border characteristics. " +"It will be ``None`` for non-PDFs and an empty dictionary if no border " +"information exists. The following keys can occur:" +msgstr "" +"(意味があるのはPDFのみ): ボーダーの特性を含む辞書。非PDFの場合、またはボーダー情報が存在しない場合、``None`` " +"になります。次のキーが含まれる場合があります:" + +#: ../../link.rst:80 4a20f270fb5040f285e44464113e3f4e +msgid "" +"*width* -- a float indicating the border thickness in points. The value " +"is -1.0 if no width is specified." +msgstr "*width* - ポイント単位でのボーダーの厚さを示す浮動小数点数。幅が指定されていない場合、値は-1.0になります。" + +#: ../../link.rst:82 031e7d00747b4e7f97f54d9125824c13 +msgid "" +"*dashes* -- a sequence of integers specifying a line dash pattern. *[]* " +"means no dashes, *[n]* means equal on-off lengths of *n* points, longer " +"lists will be interpreted as specifying alternating on-off length values." +" See the :ref:`AdobeManual` page 126 for more detail." +msgstr "" +"*dashes* - ラインダッシュパターンを指定する整数のシーケンス。*[]* はダッシュがないことを意味し、*[n]* は " +"*n*ポイントの等しいオンオフの長さを指定し、長いリストは交互にオンオフの長さ値を指定すると解釈されます。詳細については " +":ref:`AdobeManual` のページ126を参照してください。" + +#: ../../link.rst:84 b1ce9fb22af2473084abfad8dcc81664 +msgid "" +"*style* -- 1-byte border style: *S* (Solid) = solid rectangle surrounding" +" the annotation, *D* (Dashed) = dashed rectangle surrounding the link, " +"the dash pattern is specified by the *dashes* entry, *B* (Beveled) = a " +"simulated embossed rectangle that appears to be raised above the surface " +"of the page, *I* (Inset) = a simulated engraved rectangle that appears to" +" be recessed below the surface of the page, *U* (Underline) = a single " +"line along the bottom of the annotation rectangle." +msgstr "" +"*style* - 1バイトのボーダースタイル: " +"*S*(実線)=注釈を囲む実線の四角形、*D*(破線)=リンクを囲む破線の四角形、ダッシュパターンは *dashes* エントリで指定されます、 " +"*B* (ベベル)=ページの表面から持ち上げられたように見える模倣の浮き彫りの四角形、 *I* " +"(インセット)=ページの表面からくぼみがあるように見える模倣の刻まれた四角形、 *U* (下線)=注釈四角形の底部に沿った単一の線。" + +#: ../../link.rst:90 f0b94d1fd724429f9db2c8920d74db77 +msgid "The area that can be clicked in untransformed coordinates." +msgstr "クリックできる領域、変換されていない座標での" + +#: ../../link.rst 275915bb29d54074a8eee5bb003e6f35 +#: 4cc4afb263e942e982a0c610ec312fed 61d480baed10423880ce97394eef1896 +#: 70b3b447b662426ca1084fb4942fc8f6 a60b7e4f00d946b98c5bd0406ac9f8cb +#: b81ca89a6ed84d7a914f39bc42817ff7 +msgid "type" +msgstr "" + +#: ../../link.rst:92 ef517da173d042b5b2f74bba55ab4cde +msgid ":ref:`Rect`" +msgstr ":ref:`Rect` (矩形)" + +#: ../../link.rst:96 d54fc6555294411bab3c8c9b85b0fbcc +msgid "" +"A bool specifying whether the link target is outside of the current " +"document." +msgstr "現在の文書の外部にリンクの対象があるかどうかを指定するブール値。" + +#: ../../link.rst:98 64dfe441deaf43ba9c1393ca9b801ce0 +msgid "bool" +msgstr "" + +#: ../../link.rst:102 23ce8eaf99fc4747a0d479729c096f79 +msgid "" +"A string specifying the link target. The meaning of this property should " +"be evaluated in conjunction with property `is_external`:" +msgstr "⚠️" + +#: ../../link.rst:106 434842aad107476da3635f648121b5d5 +#, fuzzy +msgid "" +"`is_external` is true: `uri` points to some target outside the current " +"PDF, which may be an internet resource (`uri` starts with ``http://`` or " +"similar), another file (`uri` starts with \"file:\" or \"file://\") or " +"some other service like an e-mail address (`uri` starts with " +"``mailto:``)." +msgstr "⚠️" + +#: ../../link.rst:112 9f133b2c3fce47338ccfeb9dad429fa6 +msgid "" +"`is_external` is false: `uri` will be `None` or point to an internal " +"location. In case of PDF documents, this should either be *#nnnn* to " +"indicate a 1-based (!) page number *nnnn*, or a named location. The " +"format varies for other document types, for example " +"\"../FixedDoc.fdoc#PG_2_LNK_1\" for page number 2 (1-based) in an XPS " +"document." +msgstr "⚠️" + +#: ../../link.rst:119 47f95869926845dfa68de8ffff9c59ab +msgid "str" +msgstr "" + +#: ../../link.rst:123 05b9e3e95335428f838140f7e0e85b7b +msgid "An integer specifying the PDF :data:`xref`. Zero if not a PDF." +msgstr "PDF :data:`xref` を指定する整数。PDFでない場合はゼロ。" + +#: ../../link.rst:125 08dd0b407efc4037b5bf9715bf5410f4 +msgid "int" +msgstr "" + +#: ../../link.rst:129 19952b4d82224998827923818129feeb +msgid "The next link or ``None``." +msgstr "次のリンクまたは ``None``。" + +#: ../../link.rst:131 7091be260ac544a5955b3c6502751efb +msgid "*Link*" +msgstr "*Link* (リンク)" + +#: ../../link.rst:135 7dd1efb015df4899bf9b0392c76b6b6d +msgid "The link destination details object." +msgstr "リンクの対象の詳細オブジェクト。" + +#: ../../link.rst:137 428b1356734c407883ebe43912f1f3ea +msgid ":ref:`linkDest`" +msgstr "" + +#: ../../footer.rst:60 9de2e7e1b27c4242976413a45087b2a6 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "A string specifying the link target. " +#~ "The meaning of this property should " +#~ "be evaluated in conjunction with " +#~ "property *isExternal*. The value may be" +#~ " ``None``, in which case *isExternal " +#~ "== False*. If *uri* starts with " +#~ "*file://*, *mailto:*, or an internet " +#~ "resource name, *isExternal* is ``True``. " +#~ "In all other cases *isExternal == " +#~ "False* and *uri* points to an " +#~ "internal location. In case of PDF " +#~ "documents, this should either be *#nnnn*" +#~ " to indicate a 1-based (!) page " +#~ "number *nnnn*, or a named location. " +#~ "The format varies for other document " +#~ "types, e.g. *uri = " +#~ "'../FixedDoc.fdoc#PG_2_LNK_1'* for page number " +#~ "2 (1-based) in an XPS document." +#~ msgstr "" +#~ "リンクの対象を指定する文字列。このプロパティの意味は、プロパティ *isExternal* " +#~ "と共に評価する必要があります。値が ``None`` の場合、*isExternal == " +#~ "False* です。 *uri* が *file://、mailto:* " +#~ "、またはインターネットリソース名で始まる場合、 *isExternal* は ``True`` " +#~ "です。それ以外の場合、 *isExternal == False* で " +#~ "*uri* は内部の場所を指します。PDF文書の場合、これは1から始まるページ番号 *nnnn* " +#~ "を示すために *#nnnn* " +#~ "であるか、名前付きの場所である必要があります。他の文書タイプの場合、フォーマットは異なります。たとえば、XPS文書の場合、 " +#~ "*uri = '../FixedDoc.fdoc#PG_2_LNK_1'* " +#~ "は、2ページ(1から始まる)を示します。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/linkdest.mo b/docs/locales/ja/LC_MESSAGES/linkdest.mo new file mode 100644 index 000000000..f4a5589f8 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/linkdest.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/linkdest.po b/docs/locales/ja/LC_MESSAGES/linkdest.po new file mode 100644 index 000000000..a8105ec2f --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/linkdest.po @@ -0,0 +1,294 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 c9d403d602c14550a5468c49b79ec151 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 2ce574ba495d4bef8dfc597a774b5eb2 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 8d5420280aa04b0e82c737fe2879efc1 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../linkdest.rst:7 5be0406fa37d496c8d775d5c14ccbc9b +msgid "linkDest" +msgstr "" + +#: ../../linkdest.rst:8 dbc5983be663459da1a305db6fd4b633 +msgid "" +"Class representing the `dest` property of an outline entry or a link. " +"Describes the destination to which such entries point." +msgstr "アウトラインエントリまたはリンクの `dest` プロパティを表すクラス。これらのエントリが指す先の目的地を説明します。" + +#: ../../linkdest.rst:10 fadcf1b2cb264b42a61422b1b39f9940 +msgid "" +"Up to MuPDF v1.9.0 this class existed inside MuPDF and was dropped in " +"version 1.10.0. For backward compatibility, PyMuPDF is still maintaining " +"it, although some of its attributes are no longer backed by data actually" +" available via MuPDF." +msgstr "" +"MuPDF " +"v1.9.0まで、このクラスはMuPDF内に存在し、バージョン1.10.0で削除されました。後方互換性のため、PyMuPDFはそれを引き続き維持していますが、その属性のいくつかは実際にはMuPDFを介して利用可能なデータでバックアップされていないことに注意してください。" + +#: ../../linkdest.rst:13 5cc4c7e9b4c94a61b74581d3ab546826 +msgid "**Attribute**" +msgstr "**属性**" + +#: ../../linkdest.rst:13 2bb1dd6fb2e14d74b6262ace5e110a91 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../linkdest.rst:15 4f46e630676f4c0d9573cd5caeeeec87 +msgid ":attr:`linkDest.dest`" +msgstr "" + +#: ../../linkdest.rst:15 b8176d37e0ab4c298fa79512e4e04ca7 +msgid "destination" +msgstr "目的地" + +#: ../../linkdest.rst:16 7d38469c9fd94cc5ae034b83b7dc6c89 +msgid ":attr:`linkDest.fileSpec`" +msgstr "" + +#: ../../linkdest.rst:16 c7535db1bddc446a8d9ec72cbe6ffee5 +msgid "file specification (path, filename)" +msgstr "ファイル仕様(パス、ファイル名)" + +#: ../../linkdest.rst:17 09737ba6f55b4fd39a12293d1d0d3425 +msgid ":attr:`linkDest.flags`" +msgstr "" + +#: ../../linkdest.rst:17 b064dee65c8e4774b3aaf4042b4297e5 +msgid "descriptive flags" +msgstr "説明的なフラグ" + +#: ../../linkdest.rst:18 4bcf60e4eab64157a425d9fc9067c73d +msgid ":attr:`linkDest.isMap`" +msgstr "" + +#: ../../linkdest.rst:18 07f875efd9754adc83c776866a862b02 +msgid "is this a MAP?" +msgstr "これはMAPですか?" + +#: ../../linkdest.rst:19 91af568ccac84cd5b4ee16d388bbb93e +msgid ":attr:`linkDest.isUri`" +msgstr "" + +#: ../../linkdest.rst:19 ee75600c30be45058d58b89f78d55876 +msgid "is this a URI?" +msgstr "これはURIですか?" + +#: ../../linkdest.rst:20 855d7ecb78564fbcbf4373cff63475fe +msgid ":attr:`linkDest.kind`" +msgstr "" + +#: ../../linkdest.rst:20 8649be963579431cbf6b4132d29871dc +msgid "kind of destination" +msgstr "目的地の種類" + +#: ../../linkdest.rst:21 2e3bd4e2ae394a558dbc24ba2ac241b8 +msgid ":attr:`linkDest.lt`" +msgstr "" + +#: ../../linkdest.rst:21 704e8f3a402648918640ee171f22159a +msgid "top left coordinates" +msgstr "左上の座標" + +#: ../../linkdest.rst:22 e72f8149e16f4a28b4994101fa9458fa +msgid ":attr:`linkDest.named`" +msgstr "" + +#: ../../linkdest.rst:22 7024a1c5676a4db182cde158adddcefd +msgid "name if named destination" +msgstr "名前付き目的地の名前" + +#: ../../linkdest.rst:23 a607a689ec9a430585a215fd17274636 +msgid ":attr:`linkDest.newWindow`" +msgstr "" + +#: ../../linkdest.rst:23 306056d18da1405fb16d64afcd5db2b1 +msgid "name of new window" +msgstr "新しいウィンドウの名前" + +#: ../../linkdest.rst:24 0e354263696b4dea884ce7b0e8659268 +msgid ":attr:`linkDest.page`" +msgstr "" + +#: ../../linkdest.rst:24 709b2315bb6242709a51deccd9017a09 +msgid "page number" +msgstr "ページ番号" + +#: ../../linkdest.rst:25 db8af5778b80458c8954ad3b5d8f4b1d +msgid ":attr:`linkDest.rb`" +msgstr "" + +#: ../../linkdest.rst:25 cc7df48156634225ab78f47225de634e +msgid "bottom right coordinates" +msgstr "右下の座標" + +#: ../../linkdest.rst:26 9591bd156430496fa34539728e98f4f2 +msgid ":attr:`linkDest.uri`" +msgstr "" + +#: ../../linkdest.rst:26 cd8d4153a6044983bdc26e8c29c809ad +msgid "URI" +msgstr "" + +#: ../../linkdest.rst:29 578749192c9f4502a4e8daf7c90af448 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../linkdest.rst:35 bcd161881123491fb6ea03b01fee7817 +msgid "" +"Target destination name if :attr:`linkDest.kind` is :data:`LINK_GOTOR` " +"and :attr:`linkDest.page` is *-1*." +msgstr "" +":attr:`linkDest.kind` が:data:`LINK_GOTOR` であり、:attr:`linkDest.page` が " +"*-1* の場合、対象の宛先名。" + +#: ../../linkdest.rst 24dee675d4504f8981a51fa2b8c9ae57 +#: 3c8f45e7a79841869726ce8c2db738bb 4e7db470641845cb993cb6d79edfc402 +#: 5282e07210cd45d1afe857765c18bb82 600998af429f4438b130f86a27543213 +#: 6190f4a6cbe547ee9bd7ce0672cbe0ea 6a314b30d1cd42cfb8ce9fbf698ea996 +#: 870a506b36024eee8203fa29ce90b30b a7317f9a30854058bcfea86fbe37948e +#: cefd59f82f5344b9bb86d527d790c30d df60e9fbe1294169ba1907f3adf1b9ee +#: f0cbb7037b834652bf39d488832ee865 +msgid "type" +msgstr "" + +#: ../../linkdest.rst:37 ../../linkdest.rst:43 ../../linkdest.rst:79 +#: ../../linkdest.rst:103 5bb912195e6349f8b39f6b50f9cb244b +#: 621a722c520f47be96d16809c80f9936 ef378de79abf404d8902449278613652 +#: f1c0a413cf6f4affa2d72ce50adf67ef +msgid "str" +msgstr "" + +#: ../../linkdest.rst:41 0d7b86556af7432ca01f62687b7105b8 +msgid "" +"Contains the filename and path this link points to, if " +":attr:`linkDest.kind` is :data:`LINK_GOTOR` or :data:`LINK_LAUNCH`." +msgstr "" +":attr:`linkDest.kind` が :data:`LINK_GOTOR` または :data:`LINK_LAUNCH` " +"の場合、このリンクが指すファイル名とパスが含まれています。" + +#: ../../linkdest.rst:47 e82ff43942aa42d181926cc8311a04ad +msgid "" +"A bitfield describing the validity and meaning of the different aspects " +"of the destination. As far as possible, link destinations are constructed" +" such that e.g. :attr:`linkDest.lt` and :attr:`linkDest.rb` can be " +"treated as defining a bounding box. But the flags indicate which of the " +"values were actually specified, see :ref:`linkDest Flags`." +msgstr "" +"宛先のさまざまな側面の有効性と意味を説明するビットフィールド。できる限り、リンクの宛先は、:attr:`linkDest.lt` と " +":attr:`linkDest.rb` " +"をバウンディングボックスを定義するものとして扱えるように構築されています。ただし、フラグは、値のうち実際に指定されたものを示します。:ref:`linkDest" +" Flags` を参照してください。" + +#: ../../linkdest.rst:49 ../../linkdest.rst:67 ../../linkdest.rst:91 +#: 1e2a30929fb3424ca85ef2673484279a c4abce4eafe44aa48a61e88929d22db8 +#: e6f10f094da249a08f0b1da68a5ad807 +msgid "int" +msgstr "" + +#: ../../linkdest.rst:53 5ca1397ac660408a8118784ff2e478e2 +msgid "" +"This flag specifies whether to track the mouse position when the URI is " +"resolved. Default value: False." +msgstr "このフラグは、URIが解決されるときにマウスの位置を追跡するかどうかを指定します。デフォルト値:False。" + +#: ../../linkdest.rst:55 ../../linkdest.rst:61 ../../linkdest.rst:85 +#: 03a9fad387d243f5936876e77a91a777 5d8a261398264f87bcb172afe9477166 +#: b1cc8e9c4d094ea7b9418b5001babd00 +msgid "bool" +msgstr "" + +#: ../../linkdest.rst:59 9653264615ef4612b74c934e94035162 +msgid "" +"Specifies whether this destination is an internet resource (as opposed to" +" e.g. a local file specification in URI format)." +msgstr "この宛先がインターネットリソースであるか(URI形式のローカルファイル仕様とは異なる場合)、指定します。" + +#: ../../linkdest.rst:65 572624f8e5234fcfabe2886d19f59fb8 +msgid "" +"Indicates the type of this destination, like a place in this document, a " +"URI, a file launch, an action or a place in another file. Look at " +":ref:`linkDest Kinds` to see the names and numerical values." +msgstr "" +"この宛先のタイプを示します。この文書内の場所、URI、ファイル起動、アクション、または他のファイル内の場所など。リンク宛先の種類を確認するには、:ref:`linkDest" +" Kinds` を参照してください。" + +#: ../../linkdest.rst:71 8df3c853090f4d899931e326f5459d67 +msgid "The top left :ref:`Point` of the destination." +msgstr "宛先の左上の :ref:`Point` 。" + +#: ../../linkdest.rst:73 ../../linkdest.rst:97 3c5d8c4d8bb041fc8c0f7572f332a8fa +#: 4501f09bd6dd44348a5a0d661f366be0 +msgid ":ref:`Point`" +msgstr "" + +#: ../../linkdest.rst:77 35ddae2dc20442baa69cb10014649914 +msgid "" +"This destination refers to some named action to perform (e.g. a " +"javascript, see :ref:`AdobeManual`). Standard actions provided are " +"*NextPage*, *PrevPage*, *FirstPage*, and *LastPage*." +msgstr "" +"この宛先は実行する名前付きアクションを指します(たとえば、JavaScriptなど、:ref:`AdobeManual` " +"を参照)。提供される標準アクションは、*NextPage*、*PrevPage*、*FirstPage*、および *LastPage* です。" + +#: ../../linkdest.rst:83 8f9e7bda1b8f488aa95b2e92bbb2e3d2 +msgid "If true, the destination should be launched in a new window." +msgstr "trueの場合、宛先は新しいウィンドウで起動する必要があります。" + +#: ../../linkdest.rst:89 2a3bd78c3548465da36247eac48b559e +msgid "" +"The page number (in this or the target document) this destination points " +"to. Only set if :attr:`linkDest.kind` is :data:`LINK_GOTOR` or " +":data:`LINK_GOTO`. May be *-1* if :attr:`linkDest.kind` is " +":data:`LINK_GOTOR`. In this case :attr:`linkDest.dest` contains the " +"**name** of a destination in the target document." +msgstr "" +"この宛先が指すページ番号(この文書または対象の文書内)です。:attr:`linkDest.kind` が :data:`LINK_GOTOR` " +"または :data:`LINK_GOTO` の場合にのみ設定されます。:attr:`linkDest.kind` が " +":data:`LINK_GOTOR` の場合、*-1* になる場合があります。この場合、:attr:`linkDest.dest` " +"には対象の文書内の宛先の名前が含まれます。" + +#: ../../linkdest.rst:95 c402b9a2b14a499684cc75932a7b3061 +msgid "The bottom right :ref:`Point` of this destination." +msgstr "この宛先の右下の :ref:`Point`。" + +#: ../../linkdest.rst:101 efac7e980e8e403c9d1c9d57cd1420bc +msgid "The name of the URI this destination points to." +msgstr "この宛先が指すURIの名前。" + +#: ../../footer.rst:60 d9aa1a371bcc4440af0a82a4a3ca0b1f +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/lowlevel.mo b/docs/locales/ja/LC_MESSAGES/lowlevel.mo new file mode 100644 index 000000000..4c8a5f86a Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/lowlevel.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/lowlevel.po b/docs/locales/ja/LC_MESSAGES/lowlevel.po new file mode 100644 index 000000000..2e4cb7fd9 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/lowlevel.po @@ -0,0 +1,54 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 f8b51ec0649943efad8a9aad2f2ae5f0 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 f7f6d9b162764bd5bab35f0de0f68994 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 320005d084534805bbe61a078db61350 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../lowlevel.rst:5 4f2d1ec9ba784ccaaa499966f89668e5 +msgid "Low Level Functions and Classes" +msgstr "低レベルの関数とクラス" + +#: ../../lowlevel.rst:6 b0646074fd384255aef9e51bfce2650a +msgid "" +"Contains a number of functions and classes for the experienced user. To " +"be used for special needs or performance requirements." +msgstr "経験豊富なユーザー向けの関数とクラスが多数含まれています。 特別なニーズやパフォーマンス要件に使用します。" + +#: ../../footer.rst:60 4631787f1e8f470d9ab48b85edc12753 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/matrix.mo b/docs/locales/ja/LC_MESSAGES/matrix.mo new file mode 100644 index 000000000..53e3fc3f5 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/matrix.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/matrix.po b/docs/locales/ja/LC_MESSAGES/matrix.po new file mode 100644 index 000000000..2911cbf68 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/matrix.po @@ -0,0 +1,539 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 242eb81f189c4016a548fd715a9f13b5 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 bb9d01d114264cef930ea9b831061c3e +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 fa0d02bf3c9342cea931efccacae42f7 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../matrix.rst:7 9283aaa03cc44045984fc152476c6dd2 +msgid "Matrix" +msgstr "Matrix (マトリックス)" + +#: ../../matrix.rst:9 4c0e27b65783400d8f57bf9ed2242836 +msgid "" +"Matrix is a row-major 3x3 matrix used by image transformations in MuPDF " +"(which complies with the respective concepts laid down in the " +":ref:`AdobeManual`). With matrices you can manipulate the rendered image " +"of a page in a variety of ways: (parts of) the page can be rotated, " +"zoomed, flipped, sheared and shifted by setting some or all of just six " +"float values." +msgstr "" +"マトリックスは、MuPDFでの画像変換に使用される行優先の3x3マトリックスです(これは :ref:`AdobeManual` " +"に規定されている対応するコンセプトに従っています)。マトリックスを使用することで、ページの描画画像をさまざまな方法で操作できます。ページの一部またはすべてを回転、拡大縮小、反転、せん断、シフトなどに設定することができます。これらの操作には、たった6つの浮動小数点数値のいくつかまたはすべてを設定します。" + +#: ../../matrix.rst:12 1a064bfd9cea4d4b9a529ccafe18376a +msgid "" +"Since all points or pixels live in a two-dimensional space, one column " +"vector of that matrix is a constant unit vector, and only the remaining " +"six elements are used for manipulations. These six elements are usually " +"represented by *[a, b, c, d, e, f]*. Here is how they are positioned in " +"the matrix:" +msgstr "" +"すべてのポイントまたはピクセルは2次元の空間に存在するため、そのマトリックスの1つの列ベクトルは定数ユニットベクトルであり、操作に使用されるのは残りの6つの要素のみです。これらの6つの要素は通常、" +" *[a、b、c、d、e、f]* で表されます。以下は、それらがマトリックス内でどのように配置されているかです。" + +#: ../../matrix.rst:17 2a5325813bb649909ff46687ddd1ecd4 +msgid "Please note:" +msgstr "注意点:" + +#: ../../matrix.rst:19 f08be1fa5d2044b8abb4481e4f6d8351 +msgid "" +"the below methods are just convenience functions -- everything they do, " +"can also be achieved by directly manipulating the six numerical values" +msgstr "以下のメソッドは便宜的な機能であり、行うすべてのことは、6つの数値の値を直接操作しても実現できます。" + +#: ../../matrix.rst:20 61ef9621468141248b63ac18be006ace +msgid "" +"all manipulations can be combined -- you can construct a matrix that " +"rotates **and** shears **and** scales **and** shifts, etc. in one go. If " +"you however choose to do this, do have a look at the **remarks** further " +"down or at the :ref:`AdobeManual`." +msgstr "" +"すべての操作を組み合わせることができます。1回の操作で回転、せん断、スケール、シフトなどを同時に実行するマトリックスを構築できます。ただし、これを行う場合は、後での" +" **注釈** または :ref:`AdobeManual` を参照してください。" + +#: ../../matrix.rst:23 e902ce55a5224c66a24024cd09f42fe6 +msgid "**Method / Attribute**" +msgstr "**メソッド/属性**" + +#: ../../matrix.rst:23 f1a484726d804fd4aee4fb7e2ab17da5 +msgid "**Description**" +msgstr "**説明**" + +#: ../../matrix.rst:25 d5be8262eaa44272a507cf4cfa406bfb +msgid ":meth:`Matrix.prerotate`" +msgstr "" + +#: ../../matrix.rst:25 7760e2c90c09424a8089b1c277c95a80 +msgid "perform a rotation" +msgstr "回転を実行" + +#: ../../matrix.rst:26 0346369679c541288f8bc97a641de409 +msgid ":meth:`Matrix.prescale`" +msgstr "" + +#: ../../matrix.rst:26 655619aef04c4325aed26c24acd0bb13 +msgid "perform a scaling" +msgstr "スケーリングを実行" + +#: ../../matrix.rst:27 9b7582d8caa146f9945fc115de600542 +msgid ":meth:`Matrix.preshear`" +msgstr "" + +#: ../../matrix.rst:27 d6bc460db1894fb7a3dbc07aa72ea3fe +msgid "perform a shearing (skewing)" +msgstr "せん断(歪み)を実行" + +#: ../../matrix.rst:28 b337189589084fc483443ca54f5143b9 +msgid ":meth:`Matrix.pretranslate`" +msgstr "" + +#: ../../matrix.rst:28 743965a5ace043e59e3bbd9d545f905e +msgid "perform a translation (shifting)" +msgstr "移動(シフト)を実行" + +#: ../../matrix.rst:29 13f27af0b48c4188a6fb06859bf34472 +msgid ":meth:`Matrix.concat`" +msgstr "" + +#: ../../matrix.rst:29 d1b607732c374b22956351394880f684 +msgid "perform a matrix multiplication" +msgstr "マトリックスの乗算を実行" + +#: ../../matrix.rst:30 f776b8b3c958417ba6a5507d84f6a4b6 +msgid ":meth:`Matrix.invert`" +msgstr "" + +#: ../../matrix.rst:30 06d30fd8d9d6449585c19f4c4220fc05 +msgid "calculate the inverted matrix" +msgstr "反転されたマトリックスを計算" + +#: ../../matrix.rst:31 4dfd9496705b442bab52c7c94e928c40 +msgid ":meth:`Matrix.norm`" +msgstr "" + +#: ../../matrix.rst:31 f30f0f921e0e4d3dba24fdb51f158b52 +msgid "the Euclidean norm" +msgstr "ユークリッドノルム" + +#: ../../matrix.rst:32 9298a772e17f4df2ad06786de3ec6010 +msgid ":attr:`Matrix.a`" +msgstr "" + +#: ../../matrix.rst:32 cf66ac10810b43aabd51ce1826ed79c8 +msgid "zoom factor X direction" +msgstr "ズームファクターX方向" + +#: ../../matrix.rst:33 f5bd431f29634be4813e1191c46b46ee +msgid ":attr:`Matrix.b`" +msgstr "" + +#: ../../matrix.rst:33 f93ed8344eec4c5d81c38f51e92106d3 +msgid "shearing effect Y direction" +msgstr "せん断効果Y方向" + +#: ../../matrix.rst:34 2af17db937cd4675b9a1a28f61a2304b +msgid ":attr:`Matrix.c`" +msgstr "" + +#: ../../matrix.rst:34 d5a463f548554be2834bfdce81edf2da +msgid "shearing effect X direction" +msgstr "せん断効果X方向" + +#: ../../matrix.rst:35 a4465153bdc5456f841e0b47bc167dc4 +msgid ":attr:`Matrix.d`" +msgstr "" + +#: ../../matrix.rst:35 fb6f90dbf6e54dbea14e73618d21710c +msgid "zoom factor Y direction" +msgstr "ズームファクターY方向" + +#: ../../matrix.rst:36 66ca967922ef4012870a53fe43ef16e7 +msgid ":attr:`Matrix.e`" +msgstr "" + +#: ../../matrix.rst:36 102c3e26603f45a49de9fa42d4e550ea +msgid "horizontal shift" +msgstr "水平シフト" + +#: ../../matrix.rst:37 d82356145558412a9731bd811389cc3f +msgid ":attr:`Matrix.f`" +msgstr "" + +#: ../../matrix.rst:37 22a7a9f4840048eba1eb07f4bcd128f2 +msgid "vertical shift" +msgstr "垂直シフト" + +#: ../../matrix.rst:38 ac127ca9ea1e49808e4f8d024d86b57d +msgid ":attr:`Matrix.is_rectilinear`" +msgstr "" + +#: ../../matrix.rst:38 e192cd84a84a43169016bda4ddde14d6 +msgid "true if rect corners will remain rect corners" +msgstr "角の位置が直線のままの場合はTrue" + +#: ../../matrix.rst:41 70b5f1b6920c4fa886a23f9c3030a78e +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../matrix.rst:59 7a2e0f40ae7a4551a3e45b239ebdfde8 +msgid "Overloaded constructors." +msgstr "オーバーロードされたコンストラクター。" + +#: ../../matrix.rst:61 a13dcebe0957414bbf1138038ab5f26a +msgid "" +"Without parameters, the zero matrix *Matrix(0.0, 0.0, 0.0, 0.0, 0.0, " +"0.0)* will be created." +msgstr "パラメーターなしの場合、ゼロ行列 *Matrix(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)* が作成されます。" + +#: ../../matrix.rst:63 e2ac98fa7803457a882513667a7bf0e9 +msgid "" +"*zoom-** and *shear-** specify zoom or shear values (float) and create a " +"zoom or shear matrix, respectively." +msgstr "*zoom-** および *shear-** はズームまたはシアーの値(float)を指定し、それぞれズームまたはシアーの行列を作成します。" + +#: ../../matrix.rst:65 f11d24ff5b7d49029ac2b71f8c429d70 +msgid "For \"matrix\" a **new copy** of another matrix will be made." +msgstr "\"matrix\" の場合、別の行列の **新しいコピー** が作成されます。" + +#: ../../matrix.rst:67 9a491f75b29f499c8b2459f23fe86c66 +msgid "" +"Float value \"degree\" specifies the creation of a rotation matrix which " +"rotates anti-clockwise." +msgstr "浮動小数点値 \"degree\" は、反時計回りに回転する行列を作成することを指定します。" + +#: ../../matrix.rst:69 4aea28e766aa41838ed8ce5ecbb30661 +msgid "" +"A \"sequence\" must be any Python sequence object with exactly 6 float " +"entries (see :ref:`SequenceTypes`)." +msgstr "" +"\"sequence\" は、正確に 6 つの浮動小数点エントリを持つ任意の Python " +"シーケンスオブジェクトである必要があります(:ref:`SequenceTypes`)。" + +#: ../../matrix.rst:71 89a09710a0cb4d71a6ce2fee2f9022b5 +#, fuzzy +msgid "" +"*pymupdf.Matrix(1, 1)* and *pymupdf.Matrix(pymupdf.Identity)* create " +"modifiable versions of the :ref:`Identity` matrix, which looks like *[1, " +"0, 0, 1, 0, 0]*." +msgstr "" +"*pymupdf.Matrix(1, 1)*、*pymupdf.Matrix(0.0)* および " +"*pymupdf.Matrix(pymupdf.Identity)*) は、*[1, 0, 0, 1, 0, 0]* のような " +":ref:`Identity` マトリックスの修正可能なバージョンを作成します。" + +#: ../../matrix.rst:75 d4dad4072a314438acfc6a4da4b03eb1 +msgid "New in version 1.16.0" +msgstr "バージョン 1.16.0 で新規追加" + +#: ../../matrix.rst:77 0835ef04181f45bfa17fcc8d3bcdb4dc +msgid "Return the Euclidean norm of the matrix as a vector." +msgstr "ベクトルとしての行列のユークリッドノルムを返します。" + +#: ../../matrix.rst:81 9f88266f068b4911b0a8919869076872 +msgid "" +"Modify the matrix to perform a counter-clockwise rotation for positive " +"*deg* degrees, else clockwise. The matrix elements of an identity matrix " +"will change in the following way:" +msgstr "" +"マトリックスを修正して、正の *deg* " +"度の場合は反時計回りに、それ以外の場合は時計回りに回転します。アイデンティティマトリックスの行列要素は以下のように変更されます:" + +#: ../../matrix.rst:83 ae02ec62a7c54c96a841c87528133cec +msgid "*[1, 0, 0, 1, 0, 0] -> [cos(deg), sin(deg), -sin(deg), cos(deg), 0, 0]*." +msgstr "*[1, 0, 0, 1, 0, 0] -> [cos(deg), sin(deg), -sin(deg), cos(deg), 0, 0]*。" + +#: ../../matrix.rst 02825da12cfe414aa676494dcb2379c6 +#: 2c0431eba042448d87fbb8870c49f5b7 30d95892cb07480d8952ab330c60047a +#: 3545fc3d74134524ba4aa7ae2c7f151f 89c5ebeb86bc40a084ef67cdeab2792f +#: d58e17106b014e639386f6ea4023f6c6 +msgid "Parameters" +msgstr "" + +#: ../../matrix.rst:85 99cb1715288c4114b0aa5248c7f42a42 +msgid "" +"The rotation angle in degrees (use conventional notation based on Pi = " +"180 degrees)." +msgstr "度単位での回転角度(180度ベースの従来の表記法を使用します)。" + +#: ../../matrix.rst:89 9e021489b4d447c09015bf937b5db569 +msgid "" +"Modify the matrix to scale by the zoom factors sx and sy. Has effects on " +"attributes *a* thru *d* only: *[a, b, c, d, e, f] -> [a*sx, b*sx, c*sy, " +"d*sy, e, f]*." +msgstr "" +"マトリックスを修正してズームファクター sx および sy でスケーリングします。属性 *a* から *d* にのみ影響を与えます: *[a, " +"b, c, d, e, f] -> [a*sx, b*sx, c*sy, d*sy, e, f]*。" + +#: ../../matrix.rst:91 6b157bcf2638428e8d65806254027d38 +msgid "" +"Zoom factor in X direction. For the effect see description of attribute " +"*a*." +msgstr "X方向のズームファクター。効果については属性 *a* の説明を参照してください。" + +#: ../../matrix.rst:93 5b09d5840db149e3870b1e1f012041dc +msgid "" +"Zoom factor in Y direction. For the effect see description of attribute " +"*d*." +msgstr "Y方向のズームファクター。効果については属性 *d* の説明を参照してください。" + +#: ../../matrix.rst:97 1a0cce8d493f472eb54aa98b8fe97859 +msgid "" +"Modify the matrix to perform a shearing, i.e. transformation of " +"rectangles into parallelograms (rhomboids). Has effects on attributes *a*" +" thru *d* only: *[a, b, c, d, e, f] -> [c*sy, d*sy, a*sx, b*sx, e, f]*." +msgstr "" +"行列を修正して、シアリング、つまり長方形を平行四辺形(ひし形)に変換する操作を実行します。属性 *a* から *d* にのみ影響を与えます: " +"*[a, b, c, d, e, f] -> [c*sy, d*sy, a*sx, b*sx, e, f]*。" + +#: ../../matrix.rst:99 e29c34cf958e4e698e856cf7ff6cee7c +msgid "Shearing effect in X direction. See attribute *c*." +msgstr "X 方向のシアリング効果。属性 *c* を参照してください。" + +#: ../../matrix.rst:101 272b7e47c7dd473da0b5b0cffd24cafd +msgid "Shearing effect in Y direction. See attribute *b*." +msgstr "Y 方向のシアリング効果。属性 *b* を参照してください。" + +#: ../../matrix.rst:105 779fd595204642b69291fe695bdcd322 +msgid "" +"Modify the matrix to perform a shifting / translation operation along the" +" x and / or y axis. Has effects on attributes *e* and *f* only: *[a, b, " +"c, d, e, f] -> [a, b, c, d, tx*a + ty*c, tx*b + ty*d]*." +msgstr "" +"X 軸および / または Y 軸に沿ったシフト / 移動操作を実行するために行列を修正します。属性 *e* と *f* にのみ影響を与えます: " +"*[a, b, c, d, e, f] -> [a, b, c, d, tx*a + ty*c, tx*b + ty*d]* 。" + +#: ../../matrix.rst:107 169023afabda445a8d903c49bfc6a2f0 +msgid "Translation effect in X direction. See attribute *e*." +msgstr "X 方向の移動効果。属性 *e* を参照してください。" + +#: ../../matrix.rst:109 44c22030a3fd4cf3a8151f058791ada8 +msgid "Translation effect in Y direction. See attribute *f*." +msgstr "Y 方向の移動効果。属性 *f* を参照してください。" + +#: ../../matrix.rst:113 d0911abb5f66408f9d6c1e1c3b4746c1 +msgid "" +"Calculate the matrix product *m1 * m2* and store the result in the " +"current matrix. Any of *m1* or *m2* may be the current matrix. Be aware " +"that matrix multiplication is not commutative. So the sequence of *m1*, " +"*m2* is important." +msgstr "" +"行列の積 *m1* * *m2* を計算し、その結果を現在の行列に格納します。*m1* または *m2* " +"のいずれかが現在の行列である場合があります。行列の乗算は可換ではないことに注意してください。したがって、*m1*、*m2* の順序が重要です。" + +#: ../../matrix.rst:115 fcce53ed3d36473fbe3db1e09a8f1092 +msgid "First (left) matrix." +msgstr "最初の(左側)行列。" + +#: ../../matrix.rst:118 6b5b3806cabd4bd4a877e8f580a07b4f +msgid "Second (right) matrix." +msgstr "2 番目の(右側)行列。" + +#: ../../matrix.rst:123 aa87c24417e544109feb48ff1438da6b +msgid "" +"Calculate the matrix inverse of *m* and store the result in the current " +"matrix. Returns *1* if *m* is not invertible (\"degenerate\"). In this " +"case the current matrix **will not change**. Returns *0* if *m* is " +"invertible, and the current matrix is replaced with the inverted *m*." +msgstr "" +"行列 *m* の逆行列を計算し、その結果を現在の行列に格納します。*m* が逆行列を持たない場合(\"degenerate\")は 1 " +"を返します。この場合、現在の行列は *変更されません* 。m が逆行列を持つ場合は 0 を返し、現在の行列は m の逆行列で置き換えられます。" + +#: ../../matrix.rst:125 4a37a5474816409f896a43b32b9a6626 +msgid "Matrix to be inverted. If not provided, the current matrix will be used." +msgstr "逆行列を計算する行列。提供されない場合、現在の行列が使用されます。" + +#: ../../matrix.rst 9c255f598db04f59af29ffbbbcd01ef4 +msgid "Return type" +msgstr "" + +#: ../../matrix.rst:132 03e20c492eae41fa992f3f498f061cc5 +msgid "" +"Scaling in X-direction **(width)**. For example, a value of 0.5 performs " +"a shrink of the **width** by a factor of 2. If a < 0, a left-right flip " +"will (additionally) occur." +msgstr "X方向 **(幅)** のスケーリング。たとえば、0.5の値は **幅** を2倍に縮小します。a < 0の場合、左右反転が追加で発生します。" + +#: ../../matrix.rst 1a83029961ca4b98a55f2db76d32479d +#: a254ab6af2f341a58d187cd0a411b834 ab09365dd4d74d78920abb2e3bec5ba7 +#: ab8ed60c547045afbb16035c829c7a60 b64a657b9e624feb9ebf9e2ae156286e +#: c9ce4c12a5c8466f8eb2ab183296a608 e8f893b6ef034364a5e313cbf43d296d +msgid "type" +msgstr "" + +#: ../../matrix.rst:134 ../../matrix.rst:140 ../../matrix.rst:146 +#: ../../matrix.rst:152 ../../matrix.rst:158 ../../matrix.rst:164 +#: 2892d51d34f54837b8fa610fe9f7bf7c 42984f053cc34da49ee3bd9a96176f11 +#: 4701381cc7c64be2ac8d7413d68ada2d 48facbed1ac343818adba55314cec682 +#: a1a352394aa843e28090dd8af03562e4 aa6fe8f2ae7e4de1b3ff8512997565d5 +msgid "float" +msgstr "" + +#: ../../matrix.rst:138 462adab49c6f4a57b29d4a1fbfa57ab9 +msgid "" +"Causes a shearing effect: each `Point(x, y)` will become `Point(x, y - " +"b*x)`. Therefore, horizontal lines will be \"tilt\"." +msgstr "" +"シアリング効果を引き起こします:各 `Point(x, y)` は `Point(x、y - b * x)` " +"になります。したがって、水平線は「傾斜」します。" + +#: ../../matrix.rst:144 2082a80b283c46719733c3dd819c13c7 +msgid "" +"Causes a shearing effect: each `Point(x, y)` will become `Point(x - c*y, " +"y)`. Therefore, vertical lines will be \"tilt\"." +msgstr "" +"シアリング効果を引き起こします:各 `Point(x, y)` は `Point(x - c * y、y)` " +"になります。したがって、垂直線は「傾斜」します。" + +#: ../../matrix.rst:150 53d9b5618cae47e0b71bff64769323a0 +msgid "" +"Scaling in Y-direction **(height)**. For example, a value of 1.5 performs" +" a stretch of the **height** by 50%. If d < 0, an up-down flip will " +"(additionally) occur." +msgstr "Y方向 **(高さ)** でのスケーリング。たとえば、1.5の値は **高さ** を50%伸ばします。d < 0の場合、上下反転が追加で発生します。" + +#: ../../matrix.rst:156 a958758f21104e2587d2e914f97025d3 +msgid "" +"Causes a horizontal shift effect: Each *Point(x, y)* will become *Point(x" +" + e, y)*. Positive (negative) values of *e* will shift right (left)." +msgstr "" +"水平シフト効果を引き起こします:各 *Point(x, y)* は *Point(x + e、y)* " +"になります。eの正の(負の)値は右に(左に)シフトします。" + +#: ../../matrix.rst:162 8cc9a9a3c2c244939a316c8f2b137673 +msgid "" +"Causes a vertical shift effect: Each *Point(x, y)* will become *Point(x, " +"y - f)*. Positive (negative) values of *f* will shift down (up)." +msgstr "" +"垂直シフト効果を引き起こします:各 *Point(x, y)* は *Point(x、y - f)* になります。*f* " +"の正の(負の)値は下に(上に)シフトします。" + +#: ../../matrix.rst:168 a127c1fe09704b199335c60561824a04 +msgid "" +"Rectilinear means that no shearing is present and that any rotations are " +"integer multiples of 90 degrees. Usually this is used to confirm that " +"(axis-aligned) rectangles before the transformation are still axis-" +"aligned rectangles afterwards." +msgstr "矩形の意味は、シアリングが存在せず、回転が90度の整数倍であることを示します。通常、これは変換前の(軸に沿った)矩形が変換後も軸に沿った矩形であることを確認するために使用されます。" + +#: ../../matrix.rst:170 7749c08090474193b3e9618acea65fea +msgid "bool" +msgstr "" + +#: ../../matrix.rst:174 e715af845f104d2296b2501b0234bc3d +msgid "" +"This class adheres to the Python sequence protocol, so components can be " +"accessed via their index, too. Also refer to :ref:`SequenceTypes`." +msgstr "" +"このクラスはPythonのシーケンスプロトコルに従っているため、コンポーネントにはインデックスを使用できます。また、:ref:`SequenceTypes`" +" を参照してください。" + +#: ../../matrix.rst:175 30b635dfd9454bc98764215fe2a30188 +msgid "" +"Matrices can be used with arithmetic operators almost like ordinary " +"numbers: they can be added, subtracted, multiplied or divided -- see " +"chapter :ref:`Algebra`." +msgstr "" +"行列は、通常の数値のようにほとんどの算術演算子を使用して操作できます。行列は加算、減算、乗算、または除算できます。ジオメトリオブジェクトの " +":ref:`Algebra`。" + +#: ../../matrix.rst:176 489e66e1271449209169986876aeae44 +msgid "" +"Matrix multiplication is **not commutative** -- changing the sequence of " +"the multiplicands will change the result in general. So it can quickly " +"become unclear which result a transformation will yield." +msgstr "" +"行列の乗算は " +"**可換ではありません**。乗算要因のシーケンスを変更すると、一般に結果が変わります。そのため、変換がどの結果を生じるかがすぐに不明確になることがあります。" + +#: ../../matrix.rst:180 8fd74d196a8441baae8ce70a6e44d0cc +msgid "Examples" +msgstr "例" + +#: ../../matrix.rst:181 6bc017c89376483da69adad4db926e9e +msgid "" +"Here are examples that illustrate some of the achievable effects. All " +"pictures show some text, inserted under control of some matrix and " +"relative to a fixed reference point (the red dot)." +msgstr "以下は、いくつかの可能な効果を示す例です。すべての図は、固定された参照点(赤いドット)に対するある行列の制御下で挿入されたテキストを示しています。" + +#: ../../matrix.rst:183 db2952a1261c40cfb514e60890c7bf62 +msgid "The :ref:`Identity` matrix performs no operation." +msgstr ":ref:`Identity` 行列は操作を行いません。" + +#: ../../matrix.rst:188 3e1fc92010bd4f62b3586eb0b776853d +msgid "" +"The scaling matrix `Matrix(2, 0.5)` stretches by a factor of 2 in " +"horizontal, and shrinks by factor 0.5 in vertical direction." +msgstr "スケーリング行列 `Matrix(2, 0.5)` は、水平方向に2倍の拡大を行い、垂直方向には0.5倍の縮小を行います。" + +#: ../../matrix.rst:193 87cec5421f2941e086a897be4f86452e +msgid "" +"Attributes :attr:`Matrix.e` and :attr:`Matrix.f` shift horizontally and, " +"respectively vertically. In the following 10 to the right and 20 down." +msgstr "" +"属性 :attr:`Matrix.e` および :attr:`Matrix.f` " +"は、それぞれ水平方向および垂直方向にシフトします。以下の場合、右に10、下に20シフトします。" + +#: ../../matrix.rst:198 20e64ee4b27d42d5ad61736843af1d2d +msgid "A negative :attr:`Matrix.a` causes a left-right flip." +msgstr "負の :attr:`Matrix.a` は左右反転を引き起こします。" + +#: ../../matrix.rst:203 12bc585057774d82800e2ea0708e4ecf +msgid "A negative :attr:`Matrix.d` causes an up-down flip." +msgstr "負の :attr:`Matrix.d` は上下反転を引き起こします。" + +#: ../../matrix.rst:208 45214abb5b9a4f55a40ff250e076b168 +msgid "Attribute :attr:`Matrix.b` tilts upwards / downwards along the x-axis." +msgstr "属性 M :attr:`Matrix.b` はx軸に沿って上向き/下向きに傾きます。" + +#: ../../matrix.rst:213 0e764434a3c54d7fa6ae19ef01e56a79 +msgid "Attribute :attr:`Matrix.c` tilts left / right along the y-axis." +msgstr "属性 :attr:`Matrix.c` はy軸に沿って左に傾く/右に傾きます。" + +#: ../../matrix.rst:218 ec1225c9881d466385194686def4c961 +msgid "" +"Matrix `Matrix(beta)` performs counterclockwise rotations for positive " +"angles `beta`." +msgstr "Matrix `Matrix(beta)` は、正の角度 `beta` の反時計回りの回転を実行します。" + +#: ../../matrix.rst:223 ba4611ee874a4629bcac703ea876fba5 +msgid "Show some effects on a rectangle::" +msgstr "" + +#: ../../footer.rst:60 4bd2797b4f5b4d08a256131b3743ec2d +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/module.mo b/docs/locales/ja/LC_MESSAGES/module.mo new file mode 100644 index 000000000..d9f6ba1a2 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/module.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/module.po b/docs/locales/ja/LC_MESSAGES/module.po new file mode 100644 index 000000000..d7a532ac0 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/module.po @@ -0,0 +1,574 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 dfc70723dfaa484db5f621fc61d3ec61 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 b0cc1f27976c49c7af0245511e43b241 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 36341b6f96de494b8f0ce5847f8137be +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../module.rst:7 e5bea3ee72c64515898c8d01ff1d6e90 +msgid "Command line interface" +msgstr "" + +#: ../../module.rst:9 3908cfc655284232afd64fb5450b3834 +msgid "New in version 1.16.8" +msgstr "バージョン 1.16.8 で新たに追加されました" + +#: ../../module.rst:11 58d929a5bf2a48c29541dd80bb1f8eb3 +msgid "" +"PyMuPDF can also be used from the command line to perform utility " +"functions. This feature should obsolete writing some of the most basic " +"scripts." +msgstr "PyMuPDFは、ユーティリティ機能を実行するためにモジュールとしてコマンドラインで使用することもできます。この機能により、最も基本的なスクリプトの記述が廃止される可能性があります。" + +#: ../../module.rst:13 e6ea3c8327d34594bbe782dc25f0eff5 +msgid "" +"Admittedly, there is some functional overlap with the MuPDF CLI `mutool`." +" On the other hand, PDF embedded files are no longer supported by MuPDF, " +"so PyMuPDF is offering something unique here." +msgstr "" +"確かに、MuPDF CLI `mutool` " +"といくつかの機能的な重複があります。一方で、PDFの埋め込みファイルはもはやMuPDFではサポートされていないため、PyMuPDFはここで独自の機能を提供しています。" + +#: ../../module.rst:16 272f549f0f5f4c6bbb78632a8cdb5c70 +msgid "Invocation" +msgstr "呼び出し" + +#: ../../module.rst:18 c9201247b07a463abc4d936939474629 +msgid "The command-line interface can be invoked in two ways." +msgstr "コマンドラインインターフェースは2つの方法で呼び出すことができます。" + +#: ../../module.rst:20 2567496a57544136bbe0fad7c7253f05 +msgid "Use the installed `pymupdf` command::" +msgstr "インストールされた `pymupdf` のコマンドを使用します。" + +#: ../../module.rst:24 9cb80171d54c40b4bba248563751d406 +msgid "Or use Python's `-m` switch with PyMuPDF's `pymupdf` module::" +msgstr "" + +#: ../../module.rst:31 45619289ca954ed7a361a9ee981b3f5c +msgid "General remarks:" +msgstr "一般的な注意事項:" + +#: ../../module.rst:33 721eb638b1ec42fca6a6bbdc7ae5bcbd +msgid "" +"Request help via `\"-h\"`, resp. command-specific help via `\"command " +"-h\"`." +msgstr "ヘルプを要求するには、`\"-h\"` 、またはコマンド固有のヘルプを要求するには `\"command -h\"` を使用します。" + +#: ../../module.rst:34 f5e98e734f074d048f1006b8adf1d172 +msgid "Parameters may be abbreviated where this does not introduce ambiguities." +msgstr "曖昧さが生じない場所では、パラメータは省略形で指定できます。" + +#: ../../module.rst:35 1c8417bb973646218f662af487334d79 +msgid "" +"Several commands support parameters `-pages` and `-xrefs`. They are " +"intended for down-selection. Please note that:" +msgstr "" +"いくつかのコマンドは、 `-pages` と `-xrefs` " +"のパラメータをサポートしています。これらは選択範囲を絞るためのものです。以下に注意してください:" + +#: ../../module.rst:37 672f2e99185b4bc6be816a31e3d2c0d4 +msgid "**page numbers** for this utility must be given **1-based**." +msgstr "このユーティリティでは、**ページ番号** は1から始まる必要があります。" + +#: ../../module.rst:38 68a1ba46f6334910accedc9683dcaa9f +msgid "valid :data:`xref` numbers start at 1." +msgstr "有効な :data:`xref` 番号は1から始まります。" + +#: ../../module.rst:39 d33d3e1e349343a496f05dca1d4a1744 +msgid "" +"Specify a comma-separated list of either *single* integers or integer " +"*ranges*. A **range** is a pair of integers separated by one hyphen " +"\"-\". Integers must not exceed the maximum page, resp. xref number. To " +"specify that maximum, the symbolic variable \"N\" may be used. Integers " +"or ranges may occur several times, in any sequence and may overlap. If in" +" a range the first number is greater than the second one, the respective " +"items will be processed in reversed order." +msgstr "" +"シングル整数または整数の範囲のコンマ区切りのリストを指定します。範囲は、ハイフン \" - \" " +"で区切られた整数のペアです。整数は、最大ページまたは xref 番号を超えてはいけません。最大値を指定するには、記号変数 \"N\" " +"を使用できます。整数または範囲は複数回、任意の順序で重複して出現する可能性があります。範囲内の最初の数値が2番目の数値よりも大きい場合、それらのアイテムは逆順で処理されます。" + +#: ../../module.rst:41 89de8f9bcd8c40fb89ea6f1caef719fc +msgid "How to use the module inside your script::" +msgstr "スクリプト内でモジュールを使用する方法::" + +#: ../../module.rst:50 8afb7c82d0884c95b010d5f44df6cc07 +msgid "" +"Use the following 2-liner and compile it with `Nuitka " +"`_ in standalone mode. This will give " +"you a CLI executable with all the module's features, that can be used on " +"all compatible platforms without Python, PyMuPDF or MuPDF being " +"installed." +msgstr "" +"以下の2行のコードを使用し、 `Nuitka `_ " +"をスタンドアロンモードでコンパイルします。これにより、Python、PyMuPDF、またはMuPDFがインストールされていないすべての互換プラットフォームで使用できる、モジュールのすべての機能を備えたCLI実行可能ファイルが得られます。" + +#: ../../module.rst:59 fc63e122807f48caa2fb4686ab628c23 +msgid "Cleaning and Copying" +msgstr "クリーニングとコピー" + +#: ../../module.rst:63 2cf8ef7d22a6443bb761dcc73f99ddb9 +msgid "" +"This command will optimize the PDF and store the result in a new file. " +"You can use it also for encryption, decryption and creating sub " +"documents. It is mostly similar to the MuPDF command line utility " +"*\"mutool clean\"*::" +msgstr "" +"このコマンドは、PDFを最適化し、その結果を新しいファイルに保存します。また、暗号化、復号化、およびサブドキュメントの作成にも使用できます。これは、ほとんど" +" MuPDF のコマンドラインユーティリティ \"mutool clean\" と似ています::" + +#: ../../module.rst:96 6f79e1e9b8a74e2b99fcb0aef05c68bb +msgid "" +"If you specify \"-pages\", be aware that only page-related objects are " +"copied, **no document-level items** like e.g. embedded files." +msgstr "「-pages」を指定する場合、ページ関連のオブジェクトのみがコピーされることに注意してください。埋め込みファイルのようなドキュメントレベルのアイテムはコピーされません。" + +#: ../../module.rst:98 d2ee7904c033483e9b988ec472f77821 +msgid "Please consult :meth:`Document.save` for the parameter meanings." +msgstr "パラメータの意味については、 :meth:`Document.save` をご参照ください。" + +#: ../../module.rst:102 636d626a5d2b41a1bb191041838460ac +msgid "Extracting Fonts and Images" +msgstr "フォントと画像の抽出" + +#: ../../module.rst:103 27adb23c648540a69f463d58b2d83db2 +msgid "Extract fonts or images from selected PDF pages to a desired directory::" +msgstr "選択したPDFページからフォントや画像を指定したディレクトリに抽出します::" + +#: ../../module.rst:123 6ca52a328ede4b26a250f28cf8a17bda +msgid "" +"**Image filenames** are built according to the naming scheme: **\"img-" +"xref.ext\"**, where \"ext\" is the extension associated with the image " +"and \"xref\" the :data:`xref` of the image PDF object." +msgstr "" +"**画像のファイル名** は、命名規則に従って構築されます: **\"img-xref.ext\"** で、\"ext\" " +"は画像に関連する拡張子であり、\"xref\" は画像PDFオブジェクトの :data:`xref` です。" + +#: ../../module.rst:125 c295828c27c44f53ae6e3f67e33114cc +msgid "" +"**Font filenames** consist of the fontname and the associated extension. " +"Any spaces in the fontname are replaced with hyphens \"-\"." +msgstr "" +"**フォントのファイル名** は、フォント名と関連する拡張子から構成されます。フォント名にスペースがある場合、ハイフン \"-\" " +"で置き換えられます。" + +#: ../../module.rst:127 4a6a50bd636f4e71b0759d0d4ba1ee9e +msgid "The output directory must already exist." +msgstr "出力ディレクトリはすでに存在している必要があります。" + +#: ../../module.rst:129 cb225d0a4c184a0c9a8114e362ae7c99 +msgid "" +"Except for output directory creation, this feature is **functionally " +"equivalent** to and obsoletes `this script `_." +msgstr "" +"出力ディレクトリの作成を除いて、この機能は `このスクリプト `_ " +"と機能的に同等であり、これを使うことで廃止できます。" + +#: ../../module.rst:133 db0389129c6f4603b099fcbac916bac4 +msgid "Joining PDF Documents" +msgstr "PDF文書の結合" + +#: ../../module.rst:134 e1e150c4ccb748d9a1b38e2dc5d53937 +msgid "To join several PDF files specify::" +msgstr "複数のPDFファイルを結合するには、次のように指定します::" + +#: ../../module.rst:153 d821855113c84e1f827da087e866e131 +msgid "" +"Each input must be entered as **\"filename,password,pages\"**. Password " +"and pages are optional." +msgstr "各入力は「ファイル名、パスワード、ページ」の形式で入力してください。パスワードとページはオプションです。" + +#: ../../module.rst:154 864440e040904b3dbd8826a88ca62bb4 +msgid "" +"The password entry **is required** if the \"pages\" entry is used. If the" +" PDF needs no password, specify two commas." +msgstr "パスワードエントリは「ページ」エントリが使用される場合に必要です。PDFにパスワードが必要ない場合は、2つのコンマを指定してください。" + +#: ../../module.rst:155 41c937061d0a4fe084316ec44a8ee609 +msgid "" +"The **\"pages\"** format is the same as explained at the top of this " +"section." +msgstr "「ページ」のフォーマットは、このセクションの先頭で説明したものと同じです。" + +#: ../../module.rst:156 cf411ea13a4f4d19b11ed8f5a76dfc0f +msgid "" +"Each input file is immediately closed after use. Therefore you can use " +"one of them as output filename, and thus overwrite it." +msgstr "各入力ファイルは使用後すぐに閉じられます。したがって、出力ファイル名の1つとして使用し、上書きすることができます。" + +#: ../../module.rst:159 e4c131ae039b4418abdb28361a9582ea +msgid "Example: To join the following files" +msgstr "例: 以下のファイルを結合する" + +#: ../../module.rst:161 158bba0e88494d6c8b2e21d84d98747a +msgid "**file1.pdf:** all pages, back to front, no password" +msgstr "**file1.pdf:** すべてのページ、最前面から最後尾、パスワードなし" + +#: ../../module.rst:162 ebc6c0f2a85e4cd787edeff16bde6324 +msgid "**file2.pdf:** last page, first page, password: \"secret\"" +msgstr "**file2.pdf:** 最後のページ、最前面、パスワード: \"secret\"" + +#: ../../module.rst:163 14bef7c6b3a24f30ad5d7927fda4d2df +msgid "**file3.pdf:** pages 5 to last, no password" +msgstr "**file3.pdf:** 5ページから最後尾、パスワードなし" + +#: ../../module.rst:165 34718d67ba6f4ceb835b3fcec6f7f449 +msgid "and store the result as **output.pdf** enter this command:" +msgstr "そして結果を **output.pdf** として保存するには、次のコマンドを入力してください:" + +#: ../../module.rst:167 d854ddfa44d74e31bbab9e89051d7c16 +msgid "" +"*pymupdf join -o output.pdf file1.pdf,,N-1 file2.pdf,secret,N,1 " +"file3.pdf,,5-N*" +msgstr "" + +#: ../../module.rst:171 1be01ded6df946e0a1446dbc7a5d9b6b +msgid "Low Level Information" +msgstr "低レベル情報" + +#: ../../module.rst:173 ec9fccebd1f04046a8f4a1fe13b47f2f +msgid "" +"Display PDF internal information. Again, there are similarities to " +"*\"mutool show\"*::" +msgstr "PDFの内部情報を表示します。再度、 *「mutool show」* との類似点があります。" + +#: ../../module.rst:194 1d48764f8b534337a574a260b536215d +msgid "Examples::" +msgstr "例::" + +#: ../../module.rst:243 f0c4d5c50c5d4d609d5cb99d5eff95bb +msgid "Embedded Files Commands" +msgstr "埋め込みファイルコマンド" + +#: ../../module.rst:245 08ac7756e144469b8a268a500e298a13 +msgid "" +"The following commands deal with embedded files -- which is a feature " +"completely removed from MuPDF after v1.14, and hence from all its command" +" line tools." +msgstr "" +"以下のコマンドは埋め込みファイルに関するもので、MuPDF " +"v1.14以降から完全に削除され、そのためすべてのコマンドラインツールからも削除されています。" + +#: ../../module.rst:248 38384c90d3754e32b411967490a3aee9 +msgid "Information" +msgstr "情報" + +#: ../../module.rst:250 0967bb1bc6a84f4da15cbfd97e2a652b +msgid "Show the embedded file names (long or short format)::" +msgstr "埋め込みファイルの名前を表示します(長いフォーマットまたは短いフォーマット)::" + +#: ../../module.rst:266 58da34953aff4db9b3ecd9964e7303b5 +msgid "Example::" +msgstr "例::" + +#: ../../module.rst:287 165603d52c0d41d19398ba5cfc6db9de +msgid "Detailed output would look like this per entry::" +msgstr "詳細な出力は、各エントリごとに以下のようになります::" + +#: ../../module.rst:297 ce60d876119142f5b2890126aab69f93 +msgid "Extraction" +msgstr "抽出" + +#: ../../module.rst:299 8b751a2f410b472c9abb5af1fc140c8e +msgid "Extract an embedded file like this::" +msgstr "埋め込みファイルを以下のように抽出します::" + +#: ../../module.rst:316 fddf683ac2a14567908174cf3fc43284 +msgid "" +"For details consult :meth:`Document.embfile_get`. Example (refer to " +"previous section)::" +msgstr "詳細については、Document.embfile_get() を参照してください。例(前のセクションを参照)::" + +#: ../../module.rst:322 1185f1a9df3146a9bc268d1196f67add +msgid "Deletion" +msgstr "削除" + +#: ../../module.rst:323 c505828edb234ccbba8146976192985a +msgid "Delete an embedded file like this::" +msgstr "以下のようにして埋め込みファイルを削除します::" + +#: ../../module.rst:339 3de3e80b402e493f9f1c1bd8dd6335c2 +msgid "For details consult :meth:`Document.embfile_del`." +msgstr "詳細については、 :meth:`Document.embfile_del` を参照してください。" + +#: ../../module.rst:342 d1672507de264f41aa7190163aabad14 +msgid "Insertion" +msgstr "挿入" + +#: ../../module.rst:343 a8086839b2cb43f3b9c6dbb070737442 +msgid "Add a new embedded file using this command::" +msgstr "このコマンドを使用して新しい埋め込みファイルを追加します::" + +#: ../../module.rst:363 32dbe086f564423fa12bebec433d3953 +msgid "" +"*\"NAME\"* **must not** already exist in the PDF. For details consult " +":meth:`Document.embfile_add`." +msgstr "" +"*\"NAME\"* は既にPDF内に存在していてはいけません。詳細については、 :meth:`Document.embfile_add` " +"を参照してください。" + +#: ../../module.rst:366 2e6d06b46c294e1c8264e4ea71cd898a +msgid "Updates" +msgstr "更新" + +#: ../../module.rst:367 e7a81b672cda44c3a9843c5f74d77e8e +msgid "Update an existing embedded file using this command::" +msgstr "このコマンドを使用して既存の埋め込みファイルを更新します::" + +#: ../../module.rst:392 ae4810297f254019ab7b519fa66be7df +msgid "" +"Use this method to change meta-information of the file -- just omit the " +"*\"PATH\"*. For details consult :meth:`Document.embfile_upd`." +msgstr "" +"ファイルのメタ情報を変更するために、この方法を使用します - 単に「PATH」を省略してください。詳細については、 " +":meth:`Document.embfile_upd` を参照してください。" + +#: ../../module.rst:396 102481707f57444bb1adca3feb2403f4 +msgid "Copying" +msgstr "コピー" + +#: ../../module.rst:397 c58bf8a5cd564738917c96c89b0948d4 +msgid "Copy embedded files between PDFs::" +msgstr "PDF間で埋め込みファイルをコピーします::" + +#: ../../module.rst:421 4c2a7e2a5cbc471eac1e4b6ca1e127ea +msgid "Text Extraction" +msgstr "テキスト抽出 " + +#: ../../module.rst:422 14d209f140b94907b55464548176d2af +msgid "New in v1.18.16" +msgstr "v1.18.16で新登場" + +#: ../../module.rst:424 1712031a4ff548c6b84feb50f0674bf5 +msgid "" +"Extract text from arbitrary :ref:`supported " +"documents` to a textfile. Currently, there are " +"three output formatting modes available: simple, block sorting and " +"reproduction of physical layout." +msgstr "" +"任意の :ref:`サポートされたドキュメント ` " +"からテキストをテキストファイルに抽出します。現在、3つの出力フォーマットモードが利用可能です:シンプル、ブロックソート、物理的なレイアウトの再現。" + +#: ../../module.rst:426 982ae84a7f67448283c284d17ab22d53 +msgid "" +"**Simple** text extraction reproduces all text as it appears in the " +"document pages -- no effort is made to rearrange in any particular " +"reading order." +msgstr "" +"**シンプル** なテキスト抽出は、ドキュメントページに表示される通りのすべてのテキストを再現します - " +"特定の読み取り順序に再配置する努力は行われません。" + +#: ../../module.rst:427 2b8226df4353428eac33d189ecdbc907 +msgid "" +"**Block sorting** sorts text blocks (as identified by MuPDF) by ascending" +" vertical, then horizontal coordinates. This should be sufficient to " +"establish a \"natural\" reading order for basic pages of text." +msgstr "" +"**ブロックソート** " +"は、テキストブロック(MuPDFによって識別される)を垂直座標、水平座標の昇順に並べ替えます。これは、基本的なテキストページの「自然な」読み取り順序を確立するために十分です。" + +#: ../../module.rst:428 248d0d12667c48f3a622e6ca10a94124 +msgid "" +"**Layout** strives to reproduce the original appearance of the input " +"pages. You can expect results like this (produced by the command `pymupdf" +" gettext -pages 1 demo1.pdf`):" +msgstr "" +"**レイアウト** は、入力ページの元の外観を再現しようとします。次のような結果が期待できます(コマンド `pymupdf gettext " +"-pages 1 demo1.pdf` によって生成されたもの):" + +#: ../../module.rst:433 019fb45a3bb2420395d210f0c241bb84 +msgid "" +"The \"gettext\" command offers a functionality similar to the CLI tool " +"`pdftotext` by XPDF software, http://www.foolabs.com/xpdf/ -- this is " +"especially true for \"layout\" mode, which combines that tool's `-layout`" +" and `-table` options." +msgstr "" +"「gettext」コマンドは、XPDFソフトウェアのCLIツール `pdftotext` " +"と似た機能を提供します。http://www.foolabs.com/xpdf/ - " +"これは特に「レイアウト」モードに当てはまり、このモードはそのツールの `-layout` と `-table` オプションを組み合わせています。" + +#: ../../module.rst:437 ba127522f3494c259b7032abdb483fb4 +msgid "" +"After each page of the output file, a formfeed character, `hex(12)` is " +"written -- even if the input page has no text at all. This behavior can " +"be controlled via options." +msgstr "" +"出力ファイルの各ページの後に、フォームフィード文字 `hex(12)` が書き込まれます - " +"入力ページにテキストがない場合でもです。この動作はオプションを介して制御できます。" + +#: ../../module.rst:439 30d0328e90b94fd8b23e68b0353a065b +msgid "" +"For \"layout\" mode, **only horizontal, left-to-right, top-to bottom** " +"text is supported, other text is ignored. In this mode, text is also " +"ignored, if its :data:`fontsize` is too small." +msgstr "" +"「レイアウト」モードでは、水平方向、左から右、上から下のテキストのみがサポートされ、他のテキストは無視されます。また、このモードでは、:data:`fontsize`" +" が小さすぎる場合もテキストが無視されます。" + +#: ../../module.rst:441 cd72c16aed314ef3a7590885d9fae841 +msgid "" +"\"Simple\" and \"blocks\" mode in contrast output **all text** for any " +"text size or orientation." +msgstr "対照的に、「シンプル」モードと「ブロック」モードでは、テキストのサイズや向きに関係なくすべてのテキストが出力されます。" + +#: ../../module.rst:443 8ecb7409221c49938c9fb9d37493484c +msgid "Command::" +msgstr "コマンド::" + +#: ../../module.rst:471 5fad84347d2445a8856906e329288d56 +msgid "" +"Command options may be abbreviated as long as no ambiguities are " +"introduced. So the following do the same:" +msgstr "コマンドのオプションは、曖昧さが生じない限り、省略形を使用することができます。したがって、以下のように同じ結果を得ることができます:" + +#: ../../module.rst:473 592f0bfc5eb549db900005b6e9f32c97 +msgid "" +"`... -output text.txt -noligatures -noformfeed -convert-white -grid 3 " +"-extra-spaces ...`" +msgstr "" + +#: ../../module.rst:474 eddbf6fc4404413299a348a9c362474d +msgid "`... -o text.txt -nol -nof -c -g 3 -e ...`" +msgstr "" + +#: ../../module.rst:476 6831e519b57348daaeda28c983995f14 +msgid "" +"The output filename defaults to the input with its extension replaced by " +"`.txt`. As with other commands, you can select page ranges **(caution: " +"1-based!)** in `mutool` format, as indicated above." +msgstr "" +"出力ファイル名は、入力ファイル名の拡張子が `.txt` に置換されたものがデフォルトです。他のコマンドと同様に、上記に示されているように、 " +"`mutool` 形式でページ範囲(注意:1から始まる)を選択できます。" + +#: ../../module.rst:478 0b7bf6de37b24078b730bc7bf19be42a +msgid "**mode:** (str) select a formatting mode -- default is \"layout\"." +msgstr "**mode:** (str) フォーマットモードを選択します - デフォルトは「レイアウト」です。" + +#: ../../module.rst:479 3bcd33bb5ce24428a07587f4712b9a18 +msgid "" +"**noligatures:** (bool) corresponds to **not** " +":data:`TEXT_PRESERVE_LIGATURES`. If specified, ligatures (present in " +"advanced fonts: glyphs combining multiple characters like \"fi\") are " +"split up into their components (i.e. \"f\", \"i\"). Default is passing " +"them through." +msgstr "" +"**noligatures:** (bool) :data:`TEXT_PRESERVE_LIGATURES` " +"に相当します。指定された場合、リガチャ(複数の文字を結合するグリフ、例:「fi」のようなもの)はその構成要素(つまり「f」、「i」)に分割されます。デフォルトはそのまま通過させることです。" + +#: ../../module.rst:480 fde0e3bb3c2246b9a71dfdb9c1d66955 +msgid "" +"**convert-white:** corresponds to **not** " +":data:`TEXT_PRESERVE_WHITESPACE`. If specified, all white space " +"characters (like tabs) are replaced with one or more spaces. Default is " +"passing them through." +msgstr "" +"**convert-white:** :data:`TEXT_PRESERVE_WHITESPACE` " +"に相当します。指定された場合、すべての空白文字(タブなど)は1つ以上のスペースに置き換えられます。デフォルトはそのまま通過させることです。" + +#: ../../module.rst:481 f37af1239c334a93824af0c9eb11ff0d +msgid "" +"**extra-spaces:** (bool) corresponds to **not** " +":data:`TEXT_INHIBIT_SPACES`. If specified, large gaps between adjacent " +"characters will be filled with one or more spaces. Default is off." +msgstr "" +"**extra-spaces:** (bool) :data:`TEXT_INHIBIT_SPACES` " +"に相当します。指定された場合、隣接する文字間の大きな間隔は1つ以上のスペースで埋められます。デフォルトはオフです。" + +#: ../../module.rst:482 ec10e29ae9104149922fda77e2386600 +#, fuzzy +msgid "" +"**noformfeed:** (bool) instead of `hex(12)` (formfeed), write linebreaks" +" ``\\n`` at end of output pages." +msgstr "" +"**noformfeed:** (bool) `hex(12)` (フォームフィード)の代わりに、出力ページの末尾に ``\\n`` " +"個の改行を書き込みます。" + +#: ../../module.rst:483 b27625b2a5e8453a9b0064ea812c95c5 +msgid "**skip-empty:** (bool) skip pages with no text." +msgstr "**skip-empty:** ((bool) テキストがないページをスキップします。" + +#: ../../module.rst:484 9d41da5687ef45a989cfeaf0a76592cf +msgid "" +"**grid:** lines with a vertical coordinate difference of no more than " +"this value (in points) will be merged into the same output line. Only " +"relevant for \"layout\" mode. **Use with care:** 3 or the default 2 " +"should be adequate in most cases. If **too large**, lines that are " +"*intended* to be different in the original may be merged and will result " +"in garbled and / or incomplete output. If **too low**, artifact separate " +"output lines may be generated for some spans in the input line, just " +"because they are coded in a different font with slightly deviating " +"properties." +msgstr "" +"**grid:** 垂直座標の差がこの値(ポイント単位)を超えない行は、同じ出力行に結合されます。 " +"「レイアウト」モードにのみ関連します。注意して使用してください: " +"ほとんどの場合、3またはデフォルトの2が適切です。大きすぎると、元のフォントで異なる行が結合され、文字化けや不完全な出力が発生する可能性があります。低すぎると、入力行の一部の範囲で個別の出力行が生成されることがありますが、これはわずかに異なる特性を持つ別のフォントでコーディングされているためです。" + +#: ../../module.rst:485 b9f7195d039a44ad9f6dcb30ae1f8552 +msgid "" +"**fontsize:** include text with :data:`fontsize` larger than this value " +"only (default 3). Only relevant for \"layout\" option." +msgstr "" +"**fontsize:** この値よりも大きな :data:`fontsize` を持つテキストのみを含めます(デフォルト3)。 " +"「レイアウト」オプションにのみ関連します。" + +#: ../../footer.rst:60 4893a2a143c54c6ea8e7ffd30db93e7c +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Module *fitz*" +#~ msgstr "モジュール *fitz*" + +#~ msgid "Invoke the module like this::" +#~ msgstr "次のようにしてモジュールを呼び出します::" + +#~ msgid "" +#~ "*python -m fitz join -o output.pdf " +#~ "file1.pdf,,N-1 file2.pdf,secret,N,1 file3.pdf,,5-N*" +#~ msgstr "" + +#~ msgid "Or use Python's `-m` switch with PyMuPDF's `fitz` module::" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/outline.mo b/docs/locales/ja/LC_MESSAGES/outline.mo new file mode 100644 index 000000000..832af92ff Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/outline.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/outline.po b/docs/locales/ja/LC_MESSAGES/outline.po new file mode 100644 index 000000000..a66393834 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/outline.po @@ -0,0 +1,271 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 eade87716b2747e7817d9414e8034522 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 6379b9d9980a4f9fabc3904f865e408b +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 dc2316e3820a455f9e030297b205a574 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../outline.rst:7 3ebaa51cc5b946e7a450900a8661c90c +msgid "Outline" +msgstr "Outline (アウトライン)" + +#: ../../outline.rst:9 5119b28a24af46d682652e038c5a1126 +msgid "" +"*outline* (or \"bookmark\"), is a property of *Document*. If not " +"``None``, it stands for the first outline item of the document. Its " +"properties in turn define the characteristics of this item and also point" +" to other outline items in \"horizontal\" or downward direction. The full" +" tree of all outline items for e.g. a conventional table of contents " +"(TOC) can be recovered by following these \"pointers\"." +msgstr "" +"*アウトライン*(または「ブックマーク」)は、*Document* のプロパティです。それが ``None`` " +"でない場合、それはドキュメントの最初のアウトライン項目を表します。そのプロパティは、この項目の特性を定義し、また「水平」または下向きの方向に他のアウトライン項目を指します。たとえば、通常の目次(TOC)のためのすべてのアウトライン項目の完全なツリーは、これらの「ポインター」をたどることで回復できます。" + +#: ../../outline.rst:12 c27e0f395b824686b522b514ecf1d098 +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../outline.rst:12 cb55fadb7745417899fe5f3e385a6737 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../outline.rst:14 0e48e54f78de4ed6a2f0ef5660d74298 +msgid ":attr:`Outline.down`" +msgstr "" + +#: ../../outline.rst:14 4bda059c28ab45bb8c9b17d781a5f749 +msgid "next item downwards" +msgstr "次の項目は下向き" + +#: ../../outline.rst:15 af4e5daadaf54eb7a89a3012aee90476 +msgid ":attr:`Outline.next`" +msgstr "" + +#: ../../outline.rst:15 89522a47c0db4668bf6da64302eda4d1 +msgid "next item same level" +msgstr "同じレベルの次の項目" + +#: ../../outline.rst:16 2e773fb937834e4192d62907e44c0eae +msgid ":attr:`Outline.page`" +msgstr "" + +#: ../../outline.rst:16 c22ddb98cc114a5c94ca6d39d1bda4f4 +msgid "page number (0-based)" +msgstr "ページ番号(0から始まる)" + +#: ../../outline.rst:17 61f54e5be3684a7e9b71f47ccdede803 +msgid ":attr:`Outline.title`" +msgstr "" + +#: ../../outline.rst:17 93a433f132584065af026642754a98fb +msgid "title" +msgstr "タイトル" + +#: ../../outline.rst:18 e41061ce2b7e48a4aa31064fe88df3cf +msgid ":attr:`Outline.uri`" +msgstr "" + +#: ../../outline.rst:18 323881cb7b1d40ae85b03dd5a27df654 +msgid "string further specifying outline target" +msgstr "アウトラインのターゲットをさらに指定する文字列" + +#: ../../outline.rst:19 198b8ace96d142379f9036f96c06bd73 +msgid ":attr:`Outline.is_external`" +msgstr "" + +#: ../../outline.rst:19 f15b6eff0b2149948a823a986f443ba9 +msgid "target outside document" +msgstr "ドキュメントの外部のターゲットかどうか" + +#: ../../outline.rst:20 7b2130996c994ef4857d2225858d3f28 +msgid ":attr:`Outline.is_open`" +msgstr "" + +#: ../../outline.rst:20 1ce23358265b4971af79d5cb7f924035 +msgid "whether sub-outlines are open or collapsed" +msgstr "サブアウトラインが開いているか折りたたまれているか" + +#: ../../outline.rst:21 3810e86f9dc24ee98109b10773fe9594 +msgid ":attr:`Outline.dest`" +msgstr "" + +#: ../../outline.rst:21 be949162a84940be8d8d5b241b3bd8dc +msgid "points to destination details object" +msgstr "宛先の詳細オブジェクトを指す" + +#: ../../outline.rst:24 d60d251f33b04aba88575166c80cd268 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../outline.rst:30 0d3278496d6f48cca68af36994524ad7 +msgid "" +"The next outline item on the next level down. Is ``None`` if the item has" +" no children." +msgstr "次の階層のアウトライン項目です。アイテムに子要素がない場合は ``None`` です。" + +#: ../../outline.rst 0f60e0258ec74a77850c19f4e818573b +#: 359863649a1b46a5b9e315592d26b967 455943c48bd0401da7d61e80ae3c1629 +#: 46b1711a00d245fa90b3ef78e5ed461c 4d3e0f6d95214fe79ecf3af1cee7d067 +#: 6f6b16d6c7924b9190b762fc5f398c39 b2720f2be62b4f0ebd718c2c00bad2fe +#: b5bdff1052b242918366fb27cd5edd6f +msgid "type" +msgstr "" + +#: ../../outline.rst:32 31df4a9a593e495e83bdab0377d48a45 +msgid ":ref:`Outline`" +msgstr "" + +#: ../../outline.rst:36 821897bbe57144a2a9515d44d5988bfd +msgid "" +"The next outline item at the same level as this item. Is ``None`` if this" +" is the last one in its level." +msgstr "このアイテムと同じレベルの次のアウトライン項目です。このアイテムがそのレベルで最後の場合、``None`` です。" + +#: ../../outline.rst:38 4805300829ae486787ed36cf2b1d07e6 +msgid "`Outline`" +msgstr "" + +#: ../../outline.rst:42 6230223ef28b4e0f874585f6bd0fbfb4 +msgid "The page number (0-based) this bookmark points to." +msgstr "このブックマークが指すページ番号(0から始まる)です。" + +#: ../../outline.rst:44 c245da52b4444de3a1a7a67e7c8a4c6e +msgid "int" +msgstr "" + +#: ../../outline.rst:48 050383aa093344d4b72c9304ea29e434 +msgid "The item's title as a string or ``None``." +msgstr "アイテムのタイトル、または ``None`` の文字列です。" + +#: ../../outline.rst:50 ../../outline.rst:83 23062e46c343424b92d244ac235c1012 +#: 2826b0f053f246028f1017763eb6beba +msgid "str" +msgstr "" + +#: ../../outline.rst:54 33486d26fcf84b4287a9b62665e5d95d +msgid "" +"Indicator showing whether any sub-outlines should be expanded (``True``) " +"or be collapsed (``False``). This information is interpreted by PDF " +"reader software." +msgstr "サブアウトラインが展開されるべきか(``True``)折りたたまれるべきか(``False``)を示すインジケーターです。この情報はPDFリーダーソフトウェアによって解釈されます。" + +#: ../../outline.rst:56 ../../outline.rst:62 39516f02d6ca4ea5818c20077384b2f7 +#: 7c679ae4b313481fa70a96efb56596b0 +msgid "bool" +msgstr "" + +#: ../../outline.rst:60 6d936126ab42453088019c3a2c2d9b85 +msgid "" +"A bool specifying whether the target is outside (``True``) of the current" +" document." +msgstr "対象が現在のドキュメントの外部(``True``)かどうかを示すブール値です。" + +#: ../../outline.rst:66 ae26e1cb6f7740eab0cd822bccd86c6b +msgid "" +"A string specifying the link target. The meaning of this property should " +"be evaluated in conjunction with property `is_external`:" +msgstr "リンクの対象を指定する文字列。このプロパティの意味は、プロパティ `is_external` と一緒に評価されるべきです。" + +#: ../../outline.rst:70 6a552a5230644d80b93626dc835ea86c +#, fuzzy +msgid "" +"`is_external` is true: ``uri`` points to some target outside the current " +"PDF, which may be an internet resource (``uri`` starts with ``http://`` " +"or similar), another file (``uri`` starts with ``file:`` or ``file://``) " +"or some other service like an e-mail address (``uri`` starts with " +"``mailto:``)." +msgstr "" +"`is_external` がtrueの場合: `uri` は、現在のPDFの外部のターゲットを指します。これは、インターネットリソース( " +"`uri` が \"http://\" などで始まる)、別のファイル(`uri` が \"file:\" または \"file://\" " +"で始まる)、または電子メールアドレスなどのその他のサービス( `uri` が \"mailto:\" で始まる)である可能性があります。" + +#: ../../outline.rst:76 c000b251feae4c25aa04f34b87aff7f3 +#, fuzzy +msgid "" +"`is_external` is false: ``uri`` will be `None` or point to an internal " +"location. In case of PDF documents, this should either be *#nnnn* to " +"indicate a 1-based (!) page number *nnnn*, or a named location. The " +"format varies for other document types, for example " +"\"../FixedDoc.fdoc#PG_2_LNK_1\" for page number 2 (1-based) in an XPS " +"document." +msgstr "" +"`is_external` がfalseの場合:`uri` は `None` " +"になるか、または内部の位置を指します。PDFドキュメントの場合、これは1から始まるページ番号 *nnnn* を示すために *#nnnn* " +"であるか、名前付きの場所を示します。他のドキュメントタイプの場合、形式は異なります。たとえば、XPSドキュメントのページ番号2(1から始まる)の場合は" +" \"../FixedDoc.fdoc#PG_2_LNK_1\" です。" + +#: ../../outline.rst:87 6ef34f3fc06140d2b6b9f9a43f092838 +msgid "The link destination details object." +msgstr "リンクの宛先詳細オブジェクトです。" + +#: ../../outline.rst:89 79a5c71cf6224a6198800f498dc0ddb3 +msgid ":ref:`linkDest`" +msgstr "" + +#: ../../footer.rst:60 e201dcf36b574d0280f4532c31067f5b +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "A string specifying the link target. " +#~ "The meaning of this property should " +#~ "be evaluated in conjunction with " +#~ "*isExternal*. The value may be ``None``," +#~ " in which case *isExternal == False*." +#~ " If *uri* starts with *file://*, " +#~ "*mailto:*, or an internet resource name," +#~ " *isExternal* is ``True``. In all " +#~ "other cases *isExternal == False* and" +#~ " *uri* points to an internal " +#~ "location. In case of PDF documents, " +#~ "this should either be *#nnnn* to " +#~ "indicate a 1-based (!) page number " +#~ "*nnnn*, or a named location. The " +#~ "format varies for other document types," +#~ " e.g. *uri = '../FixedDoc.fdoc#PG_21_LNK_84'* " +#~ "for page number 21 (1-based) in an" +#~ " XPS document." +#~ msgstr "" +#~ "リンクのターゲットを指定する文字列です。このプロパティの意味は *isExternal* " +#~ "と連動して評価されるべきです。値が ``None`` の場合、*isExternal == " +#~ "False* です。*uri* が *file://、mailto:* " +#~ "、またはインターネットリソース名で始まる場合、*isExternal* は ``True`` " +#~ "です。それ以外の場合、*isExternal == False* で、*uri* " +#~ "は内部の場所を指します。PDFドキュメントの場合、これは1から始まるページ番号 *nnnn* を示すための " +#~ "*#nnnn* " +#~ "であるか、名前付きの場所である必要があります。他のドキュメントタイプの場合、例えばXPSドキュメントの場合、*uri = " +#~ "'../FixedDoc.fdoc#PG_21_LNK_84'* は、ページ番号21(1から始まる)を示します。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/packaging.mo b/docs/locales/ja/LC_MESSAGES/packaging.mo new file mode 100644 index 000000000..695147637 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/packaging.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/packaging.po b/docs/locales/ja/LC_MESSAGES/packaging.po new file mode 100644 index 000000000..b12da034f --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/packaging.po @@ -0,0 +1,234 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-09-11 21:42+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 ea339b8cf2d7425a8df74a11b8967508 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 5cca523385734e86a2f0b714b8baa7d8 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF(およびその他の)ドキュメントのデータ抽出、解析、変換、および操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 d2a6dea50de74aee87d8c5a1392014ac +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDF テキスト抽出、PDF 画像抽出、PDF 変換、PDF テーブル、PDF 分割、PDF 作成、Pyodide、PyScript" + +#: ../../packaging.rst:5 92d36a7e439d47098564112377f93fb8 +msgid "Packaging for Linux distributions" +msgstr "Linux ディストリビューション向けのパッケージング" + +#: ../../packaging.rst:9 db33f856c6964c8f82f944305803a5c3 +msgid "Requirements" +msgstr "要件" + +#: ../../packaging.rst:11 dd51b73f026546128b58ca1dbc07d09b +msgid "Python" +msgstr "" + +#: ../../packaging.rst:12 619f89f2b9d94ee08bc949768cf1c902 +msgid "MuPDF checkout (including submodules)." +msgstr "MuPDF チェックアウト(サブモジュールを含む)。" + +#: ../../packaging.rst:13 3e8513b568314fd08934c01a489b24ff +msgid "PyMuPDF checkout." +msgstr "PyMuPDF チェックアウト。" + +#: ../../packaging.rst:14 c0fcde2fbd534fafb5383c841b93e6a0 +msgid "System packages listed in `scripts/sysinstall.py:g_sys_packages`." +msgstr "`scripts/sysinstall.py:g_sys_packages` にリストされたシステムパッケージ。" + +#: ../../packaging.rst:15 5628693598754932bd53d13010047308 +msgid "Python packages listed in `pyproject.toml`." +msgstr "`pyproject.toml` にリストされた Python パッケージ。" + +#: ../../packaging.rst:17 cb7a28284f2e4f08827c755968f0877e +msgid "Extra requirements for running tests:" +msgstr "テストの実行に必要な追加要件:" + +#: ../../packaging.rst:19 7e9780bb68154a86a58fcfad997a5bee +msgid "Python packages listed in `scripts/gh_release.py:test_packages`." +msgstr "`scripts/gh_release.py:test_packages` にリストされた Python パッケージ。" + +#: ../../packaging.rst:23 b4c0cecc40d24541aa536d3e453b991a +msgid "General steps" +msgstr "一般的な手順" + +#: ../../packaging.rst:25 968e7bb7a90f41a8869285b39c5ffb7b +msgid "Build and install MuPDF:" +msgstr "MuPDF をビルドしてインストールする:" + +#: ../../packaging.rst:27 f87b44cacfff43ab983716288eb0f4a4 +msgid "Install required system packages." +msgstr "必要なシステムパッケージをインストールします。" + +#: ../../packaging.rst:28 d9ac56053379472a852112940eadff9f +msgid "" +"Run `make install-shared-python` on MuPDF's `Makefile` with at least " +"these make variables:" +msgstr "" +"MuPDF の `Makefile` で `make install-shared-python` を実行し、少なくともこれらの make " +"変数を設定します:" + +#: ../../packaging.rst:31 f91b3554819741e18f724952afefff0d +msgid "`DESTDIR` set to the install directory, e.g. `/`." +msgstr "`DESTDIR` をインストールディレクトリに設定します。例: `/`。" + +#: ../../packaging.rst:33 a1bb9179a5924291a4d95d69779a7638 +msgid "" +"`prefix` set to location relative to DESTDIR, such as `/usr/local` or " +"`/usr`. Must start with `/`." +msgstr "" +"`prefix` を DESTDIR に相対的な場所に設定します。たとえば `/usr/local` または /usr など、必ず / " +"で始める必要があります。" + +#: ../../packaging.rst:35 ff45c57dc53a474aa442f256588c55cc +msgid "`USE_SYSTEM_LIBS=yes`." +msgstr "" + +#: ../../packaging.rst:36 6403f4dd52254fbfa53af0e1352ceeaa +msgid "`HAVE_LEPTONICA=yes`." +msgstr "" + +#: ../../packaging.rst:37 482b9da209504d059ff223aedd76019b +msgid "`HAVE_TESSERACT=yes`." +msgstr "" + +#: ../../packaging.rst:39 51e3ef8e388c4e2eb2a6f493f0f9c614 +msgid "Build and install PyMuPDF:" +msgstr "`pip install ./PyMuPDF`" + +#: ../../packaging.rst:42 499f0afa88564953ac9c266474d2aa0d +msgid "" +"Run `pip install ./PyMuPDF` or `pip wheel ./PyMuPDF` with at least these " +"environment variables:" +msgstr "" +"`pip install ./PyMuPDF` または `pip wheel ./PyMuPDF` " +"を実行します。少なくとも以下の環境変数を設定してください:" + +#: ../../packaging.rst:46 6d1d80bf3e7648e6be69dc15eac81a10 +msgid "" +"`PYMUPDF_SETUP_MUPDF_BUILD=` (empty string) to prevent download and build" +" of hard-coded MuPDF release." +msgstr "" +"`PYMUPDF_SETUP_MUPDF_BUILD=` (空の文字列):ハードコードされた MuPDF " +"リリースのダウンロードとビルドを防ぐために設定します。" + +#: ../../packaging.rst:49 f0669625912e43c2b839e632328453b7 +msgid "" +"`CFLAGS`, `CXXFLAGS` and `LDFLAGS` set to allow visibility of the " +"installed MuPDF headers and shared libraries." +msgstr "" +"`CFLAGS`、`CXXFLAGS`、および `LDFLAGS` を設定して、インストールされた MuPDF " +"のヘッダーと共有ライブラリが見えるようにします。" + +#: ../../packaging.rst:52 fa04c3bb56c941df8b521704c781e646 +msgid "Run PyMuPDF tests:" +msgstr "PyMuPDF テストを実行する:" + +#: ../../packaging.rst:54 ab5423a83a72485ba9d3f2a257dfc8d6 +msgid "Ensure required Python packages are available." +msgstr "必要な Python パッケージが利用可能であることを確認します" + +#: ../../packaging.rst:56 9549372318d448939b9315704df3c716 +msgid "Run `pytest -k \"not test_color_count and not test_3050\" PyMuPDF`" +msgstr "`pytest -k \"not test_color_count and not test_3050\" PyMuPDF` を実行します。" + +#: ../../packaging.rst:58 d6c894edd7704171b3995de2b1102647 +msgid "" +"Test `test_color_count` is known fail if MuPDF is not built with " +"PyMuPDF's custom config.h." +msgstr "" +"`test_color_count` テストは、MuPDF が PyMuPDF のカスタム config.h " +"でビルドされていない場合に失敗することが既知です。" + +#: ../../packaging.rst:59 5a4624ddb10d46b9a944a59b2579188b +msgid "" +"Test `test_3050` is known to fail if MuPDF is built without its own " +"third-party libraries." +msgstr "`test_3050` テストは、MuPDF が独自のサードパーティライブラリを使用せずにビルドされている場合に失敗することが既知です。" + +#: ../../packaging.rst:63 a2a65b48ae41408488f21ffce49014d0 +msgid "Use of scripts/sysinstall.py" +msgstr "scripts/sysinstall.py の使用" + +#: ../../packaging.rst:65 eea1b531c51e4d31a0e82c6fab02eab2 +msgid "" +"`scripts/sysinstall.py` provides a useful example of build, install and " +"test commands that are known to to work, because it is run regularly by " +"Github action `.github/workflows/test_sysinstall.yml`." +msgstr "" +"`scripts/sysinstall.py` は、定期的に GitHub アクション " +"`.github/workflows/test_sysinstall.yml` " +"で実行されるため、動作が確認されているビルド、インストール、およびテストコマンドの便利な例を提供しています。" + +#: ../../packaging.rst:69 388e54101bff4f3fa4ae2619fe831f17 +msgid "Run with `-h` or look at the doc-string to see detailed usage information." +msgstr "詳細な使用方法については、`-h` を付けて実行するか、ドキュメント文字列を参照してください。" + +#: ../../packaging.rst:70 9256146054514b818c9dcb29a92af8b5 +msgid "It uses Debian-style `apt` commands to install system packages." +msgstr "システムパッケージのインストールには、Debian スタイルの `apt` コマンドが使用されます。" + +#: ../../packaging.rst:71 cb3514ca6d8c491dba99488d16d69d11 +msgid "By default it assumes local git checkouts `mupdf/` and `PyMuPDF/`." +msgstr "デフォルトでは、ローカルの git チェックアウトである `mupdf/` と `PyMuPDF/` を想定しています。" + +#: ../../packaging.rst:73 c626332a346d49e2b6784037fc0062b1 +msgid "" +"To run a full build, install and test for both a local fake root and the " +"system root:" +msgstr "ローカルのフェイクルートとシステムルートの両方で、フルビルド、インストール、およびテストを実行するには、" + +#: ../../packaging.rst:81 808c407963ab412c91b112f433a7533a +msgid "To see what commands would be run without actually running them:" +msgstr "実際に実行せずに実行されるコマンドを確認するには:" + +#: ../../packaging.rst:89 d5029b9f860543aaa4c5a38f00c82e36 +msgid "See also" +msgstr "参考にしてください" + +#: ../../packaging.rst:92 c20265f491b4469d8ab455749f78c3b0 +msgid "" +"`setup.py`'s initial doc-comment has detailed information about the " +"environment variables used when building PyMuPDF." +msgstr "" +"参考にしてください `setup.py` の最初のドキュメントコメントには、PyMuPDF " +"をビルドする際に使用される環境変数に関する詳細な情報が含まれています。" + +#~ msgid "" +#~ "`PYMUPDF_SETUP_IMPLEMENTATIONS=b` to build only " +#~ "the rebased implementation. [This will " +#~ "become the default in a future " +#~ "release.]" +#~ msgstr "" + +#~ msgid "" +#~ "[As of 2024-04-15, tere is no need" +#~ " to set `PYMUPDF_SETUP_IMPLEMENTATIONS=b` to " +#~ "build only the rebased implementation, " +#~ "as this is now the default.]" +#~ msgstr "" +#~ "[2024年4月15日現在、`PYMUPDF_SETUP_IMPLEMENTATIONS=b` " +#~ "を設定して、リベースされた実装のみをビルドする必要はありません。これはデフォルトの動作です。]" + diff --git a/docs/locales/ja/LC_MESSAGES/page.mo b/docs/locales/ja/LC_MESSAGES/page.mo new file mode 100644 index 000000000..44aebb3b1 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/page.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/page.po b/docs/locales/ja/LC_MESSAGES/page.po new file mode 100644 index 000000000..7d930aff4 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/page.po @@ -0,0 +1,7480 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 61662a8c1daf46fcba3a5225ce5345b9 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 6ecbaf7b19a0402d920fb129cb166867 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 aedaf2f402a840eaaf980cdf15ab35ed +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../page.rst:7 c68d0e5287c946daa89cd0f50c9100a4 +msgid "Page" +msgstr "Page (ページ)" + +#: ../../page.rst:9 0e451fa94a0d46ccb18c42656dadb8c0 +msgid "" +"Class representing a document page. A page object is created by " +":meth:`Document.load_page` or, equivalently, via indexing the document " +"like `doc[n]` - it has no independent constructor." +msgstr "" +"ドキュメントページを表すクラス。ページオブジェクトは :meth:`Document.load_page` " +"またはドキュメントをインデックスで参照することで作成されます(例: `doc[n]` ) - 独立したコンストラクタはありません。" + +#: ../../page.rst:11 27908c2230814d3a9d1c48e2469f4bde +msgid "" +"There is a parent-child relationship between a document and its pages. If" +" the document is closed or deleted, all page objects (and their " +"respective children, too) in existence will become unusable " +"(\"orphaned\"): If a page property or method is being used, an exception " +"is raised." +msgstr "ドキュメントとそのページとの親子関係があります。ドキュメントが閉じられるか削除されると、存在するすべてのページオブジェクト(およびそれに関連する子供たちも)が使用できなくなります(「孤児」になります)。ページのプロパティまたはメソッドを使用している場合、例外が発生します。" + +#: ../../page.rst:13 c56f4954e82540de9466a05e1ab4c1d1 +msgid "" +"Several page methods have a :ref:`Document` counterpart for convenience. " +"At the end of this chapter you will find a synopsis." +msgstr "便宜のために、いくつかのページメソッドには :ref:`Document` の対応するメソッドがあります。この章の最後に概要があります。" + +#: ../../page.rst:15 a646bf243f1b418ea2e7d1cccfe178ec +msgid "" +"Many times in this chapter we are using the term **coordinate**. It is of" +" high importance to have at least a basic understanding of what that is " +"and that you feel comfortable with the section :ref:`Coordinates`." +msgstr "" +"この章では何度も **「座標」** という用語を使用しています。それが何を意味するかを少なくとも基本的な理解があること、そして " +":ref:`Coordinates` のセクションに慣れていることが非常に重要です。" + +#: ../../page.rst:18 1374be2ff1e4448cad12a94163fd282c +msgid "Modifying Pages" +msgstr "ページの修正" + +#: ../../page.rst:19 d8083d1622b74d8c88a9e5ab31ec1d61 +msgid "" +"Changing page properties and adding or changing page content is available" +" for PDF documents only." +msgstr "ページのプロパティを変更し、ページの内容を追加または変更することは、PDFドキュメントのみで使用可能です。" + +#: ../../page.rst:21 595cdd02c3424408946cdee8df0a08ab +msgid "In a nutshell, this is what you can do with PyMuPDF:" +msgstr "要するに、PyMuPDFでできることは次のとおりです:" + +#: ../../page.rst:23 bc39cc18888a404eb3090578b0ffce63 +msgid "Modify page rotation and the visible part (\"cropbox\") of the page." +msgstr "ページの回転とページの可視部分(「クロップボックス」)の変更。" + +#: ../../page.rst:24 02524155c10f4264ae988234c7952a24 +msgid "Insert images, other PDF pages, text and simple geometrical objects." +msgstr "画像、他のPDFページ、テキスト、単純な幾何学的オブジェクトの挿入。" + +#: ../../page.rst:25 9e3f5ef3467e4d4da095f6a7f33894d3 +msgid "Add annotations and form fields." +msgstr "アノテーションとフォームフィールドの追加。" + +#: ../../page.rst:29 70883e8610b84a57972a7d75d4726dc1 +msgid "" +"Methods require coordinates (points, rectangles) to put content in " +"desired places. Please be aware that these coordinates **must always** be" +" provided relative to the **unrotated** page (since v1.17.0). The reverse" +" is also true: except :attr:`Page.rect`, resp. :meth:`Page.bound` (both " +"*reflect* when the page is rotated), all coordinates returned by methods " +"and attributes pertain to the unrotated page." +msgstr "" +"メソッドには、コンテンツを所望の場所に配置するために座標(ポイント、矩形)が必要です。v1.17.0以降、これらの座標は常に " +"**回転していない** ページに対して提供する **必要があります**。逆もまた真実です::attr:`Page.rect`, resp. " +":meth:`Page.bound` を除いて(ページが回転したときを *反映* " +"しています)、メソッドと属性が返すすべての座標は回転していないページに関連しています。" + +#: ../../page.rst:31 e8d26b51cb0542bf911b6ee7650bb848 +msgid "" +"So the returned value of e.g. :meth:`Page.get_image_bbox` will not change" +" if you do a :meth:`Page.set_rotation`. The same is true for coordinates " +"returned by :meth:`Page.get_text`, annotation rectangles, and so on. If " +"you want to find out, where an object is located in **rotated " +"coordinates**, multiply the coordinates with " +":attr:`Page.rotation_matrix`. There also is its inverse, " +":attr:`Page.derotation_matrix`, which you can use when interfacing with " +"other readers, which may behave differently in this respect." +msgstr "" +"したがって、:meth:`Page.get_image_bbox` " +"などのメソッドの返される値は、Page.set_rotation()を実行しても変更されません。同じことが " +":meth:`Page.get_text`、アノテーションの矩形などから返される座標にも当てはまります。オブジェクトが **回転した座標** " +"でどこにあるかを調べたい場合は、座標を:attr:`Page.rotation_matrix` " +"で乗算します。:attr:`Page.derotation_matrix` " +"とその逆行列もあり、他のリーダーと連携する際に使用できます。この点で異なる動作をするかもしれません。" + +#: ../../page.rst:35 0d455c799f9646aead7c4f7859d1380e +msgid "" +"If you add or update annotations, links or form fields on the page and " +"immediately afterwards need to work with them (i.e. **without leaving the" +" page**), you should reload the page using :meth:`Document.reload_page` " +"before referring to these new or updated items." +msgstr "" +"ページに注釈、リンク、またはフォームフィールドを追加または更新し、直後にこれらの新しいまたは更新されたアイテムを操作する必要がある場合(つまり " +"**ページを離れずに**)、:meth:`Document.reload_page` を使用してページを再読み込みする必要があります。" + +#: ../../page.rst:37 9cc2e75884444a2097f4580e4d06e88a +msgid "" +"Reloading the page is generally recommended -- although not strictly " +"required in all cases. However, some annotation and widget types have " +"extended features in PyMuPDF compared to MuPDF. More of these extensions " +"may also be added in the future." +msgstr "一般的にはページを再読み込みすることをお勧めしますが、すべてのケースで厳密に必要とされるわけではありません。ただし、PyMuPDFの注釈とウィジェットの種類の一部は、MuPDFと比較して拡張機能を持っています。今後もこれらの拡張機能が増えるかもしれません" + +#: ../../page.rst:39 f0d57ff812da4db08cb5e3af326afaf1 +msgid "" +"Releoading the page ensures all your changes have been fully applied to " +"PDF structures, so you can safely create Pixmaps or successfully iterate " +"over annotations, links and form fields." +msgstr "ページを再読み込むことで、変更がPDF構造に完全に適用され、Pixa画像を作成したり、アノテーション、リンク、フォームフィールドを正常にイテレートしたりできるようになります。" + +#: ../../page.rst:42 0e2f000975c8476e9b1795f5a360a744 +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../page.rst:42 eb997d2653594414a971e3f3ba9fca55 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../page.rst:44 088e0c671d9f4a7ba21e29d6ea4f7e45 +msgid ":meth:`Page.add_caret_annot`" +msgstr "" + +#: ../../page.rst:44 4c2923d08ffe494d83f9a8493bb8d9f6 +msgid "PDF only: add a caret annotation" +msgstr "PDFのみ:キャレットアノテーションを追加します" + +#: ../../page.rst:45 869825ad65994e28bdaafacd5b65a7a8 +msgid ":meth:`Page.add_circle_annot`" +msgstr "" + +#: ../../page.rst:45 3c9b55a6e9ca4530a99fa97fcf240f19 +msgid "PDF only: add a circle annotation" +msgstr "PDFのみ:円注釈を追加します" + +#: ../../page.rst:46 f3739adb1b19428c862da3ee6565f68c +msgid ":meth:`Page.add_file_annot`" +msgstr "" + +#: ../../page.rst:46 00c03a2f41014371ab4aef02f8143ec5 +msgid "PDF only: add a file attachment annotation" +msgstr "PDFのみ:ファイル添付アノテーションを追加します" + +#: ../../page.rst:47 43a7543e97bd49428846878dfadd3765 +msgid ":meth:`Page.add_freetext_annot`" +msgstr "" + +#: ../../page.rst:47 637084fd5ac2454babf3727b3f010855 +msgid "PDF only: add a text annotation" +msgstr "PDFのみ:テキストアノテーションを追加します" + +#: ../../page.rst:48 04e31c3ec1c2428b91e6a2e348c05eac +msgid ":meth:`Page.add_highlight_annot`" +msgstr "" + +#: ../../page.rst:48 6099861df7a444d6b5e92a61e0c13f26 +msgid "PDF only: add a \"highlight\" annotation" +msgstr "PDFのみ:「ハイライト」アノテーションを追加します" + +#: ../../page.rst:49 6d87f8a2c5bd45a4aba68443addac6c9 +msgid ":meth:`Page.add_ink_annot`" +msgstr "" + +#: ../../page.rst:49 b2b4ddf817944d1da98ba5cf7caf6c7c +msgid "PDF only: add an ink annotation" +msgstr "PDFのみ:インク注釈を追加します" + +#: ../../page.rst:50 26722399bcf947d9a2d34b860df50fb6 +msgid ":meth:`Page.add_line_annot`" +msgstr "" + +#: ../../page.rst:50 f5e9aa6723f3476cbbf5b9444687c996 +msgid "PDF only: add a line annotation" +msgstr "PDFのみ:線アノテーションを追加します" + +#: ../../page.rst:51 db1a94b17cc74175aadc4bfd1e9f2749 +msgid ":meth:`Page.add_polygon_annot`" +msgstr "" + +#: ../../page.rst:51 c57ae6d6041d4df4add808c430fdbe9d +msgid "PDF only: add a polygon annotation" +msgstr "PDFのみ:多角形アノテーションを追加します" + +#: ../../page.rst:52 b2b5588a13e54317886c83e331dfe39f +msgid ":meth:`Page.add_polyline_annot`" +msgstr "" + +#: ../../page.rst:52 178f632613f54d2aaa2ad97e196b3974 +msgid "PDF only: add a multi-line annotation" +msgstr "PDFのみ:多線アノテーションを追加します" + +#: ../../page.rst:53 5cde6342f3294a01ad3fdcd684bce459 +msgid ":meth:`Page.add_rect_annot`" +msgstr "" + +#: ../../page.rst:53 36766e72c63e443bbc5e7401b75b073b +msgid "PDF only: add a rectangle annotation" +msgstr "PDFのみ:四角アノテーション釈を追加します" + +#: ../../page.rst:54 7737d6d9e1d54a62b81528189d07ae38 +msgid ":meth:`Page.add_redact_annot`" +msgstr "" + +#: ../../page.rst:54 6f3eaa81d30a47ff9322d50db0508ebb +msgid "PDF only: add a redaction annotation" +msgstr "PDFのみ:黒塗りアノテーションを追加します" + +#: ../../page.rst:55 477627aa20014ce5a7137bb51e3ab6ce +msgid ":meth:`Page.add_squiggly_annot`" +msgstr "" + +#: ../../page.rst:55 c48373af1bfb428a820fa3b56f902ab7 +msgid "PDF only: add a \"squiggly\" annotation" +msgstr "PDFのみ:「波線」アノテーションを追加します" + +#: ../../page.rst:56 15134f76ca1243c782cc348723303e31 +msgid ":meth:`Page.add_stamp_annot`" +msgstr "" + +#: ../../page.rst:56 482dc59222934e23ad4169443bf8dc44 +msgid "PDF only: add a \"rubber stamp\" annotation" +msgstr "PDFのみ:「スタンプ」アノテーションを追加します" + +#: ../../page.rst:57 d2b71e4fa0b04441a9822c99d2236187 +msgid ":meth:`Page.add_strikeout_annot`" +msgstr "" + +#: ../../page.rst:57 611b366dc6694e2d962533c75b1b4db3 +msgid "PDF only: add a \"strike-out\" annotation" +msgstr "PDFのみ:「取り消し線」アノテーションを追加します" + +#: ../../page.rst:58 6b23546d662c469d9bd228585f185ef6 +msgid ":meth:`Page.add_text_annot`" +msgstr "" + +#: ../../page.rst:58 81163430e3374f2f97164a81b8373b5e +msgid "PDF only: add a comment" +msgstr "PDFのみ:コメントを追加します" + +#: ../../page.rst:59 b986a03fae7b47a09d3708c20738ae91 +msgid ":meth:`Page.add_underline_annot`" +msgstr "" + +#: ../../page.rst:59 aa4dbb5f385441d0ad32a6ba6168b350 +msgid "PDF only: add an \"underline\" annotation" +msgstr "PDFのみ:「下線」アノテーションを追加します" + +#: ../../page.rst:60 043a3da3622b486fa0a1c02b622abc90 +msgid ":meth:`Page.add_widget`" +msgstr "" + +#: ../../page.rst:60 9a5a4f0660ef44eb95779f555a29b349 +msgid "PDF only: add a PDF Form field" +msgstr "PDFのみ:PDFフォームフィールドを追加します" + +#: ../../page.rst:61 1a42e3d7e6b642e9973fbda3358e267d +msgid ":meth:`Page.annot_names`" +msgstr "" + +#: ../../page.rst:61 c023452d01934d3e87a147c4f19187b2 +msgid "PDF only: a list of annotation (and widget) names" +msgstr "PDFのみ:アノテーション(およびウィジェット)の名前のリスト" + +#: ../../page.rst:62 43d130c02655422e912cc35076c290f7 +msgid ":meth:`Page.annot_xrefs`" +msgstr "" + +#: ../../page.rst:62 b583e7f546b240efaea4d4f3f94d04ef +msgid "PDF only: a list of annotation (and widget) xrefs" +msgstr "PDFのみ:アノテーション(およびウィジェット)のxrefのリスト" + +#: ../../page.rst:63 6b2f6bd4d28049b08438b47b3bc265fd +msgid ":meth:`Page.annots`" +msgstr "" + +#: ../../page.rst:63 4d1e3c99b6ce455abd8890286a995c7a +msgid "return a generator over the annots on the page" +msgstr "ページ上のアノテーションのジェネレーターを返します" + +#: ../../page.rst:64 68d250548ccc43a9bec0b1c4dc4964b0 +msgid ":meth:`Page.apply_redactions`" +msgstr "" + +#: ../../page.rst:64 d21570500fb848f6aee9d5de6d678efa +msgid "PDF only: process the redactions of the page" +msgstr "PDFのみ:ページの塗りつぶしを処理します" + +#: ../../page.rst:65 a2a12048c486482fbc1243b15d5acd39 +msgid ":meth:`Page.bound`" +msgstr "" + +#: ../../page.rst:65 ../../page.rst:136 16fc7caf47cd48c894e2a97f59b71b3c +#: 856f09bfd60149e0a2b6a9e33efd9e18 +msgid "rectangle of the page" +msgstr "ページの矩形" + +#: ../../page.rst:66 689de2090a0742aba4b05ce24cc9920b +msgid ":meth:`Page.cluster_drawings`" +msgstr "" + +#: ../../page.rst:66 6e1a4fc9c4e74054b90da53a79584dc1 +msgid "PDF only: bounding boxes of vector graphics" +msgstr "PDFのみ:ベクトルグラフィックスの境界ボックス" + +#: ../../page.rst:67 0b291206c9644f1ebdfd6cc95911c351 +msgid ":meth:`Page.delete_annot`" +msgstr "" + +#: ../../page.rst:67 2b1d103f7cab4ec6a3d5862fc47202e0 +msgid "PDF only: delete an annotation" +msgstr "PDFのみ:アノテーションを削除します" + +#: ../../page.rst:68 1029e8f7bd1446cc8a9e3c385f0fe2a3 +msgid ":meth:`Page.delete_image`" +msgstr "" + +#: ../../page.rst:68 e6bad8c581d34714aa3333ae457399ec +msgid "PDF only: delete an image" +msgstr "PDFのみ:画像を削除します" + +#: ../../page.rst:69 5cf2a0a21aaf4278b810aaaecccb82af +msgid ":meth:`Page.delete_link`" +msgstr "" + +#: ../../page.rst:69 b7d6df1d3b704e138ff83e0665fc0377 +msgid "PDF only: delete a link" +msgstr "PDFのみ:リンクを削除します" + +#: ../../page.rst:70 6f1937a143ae4275be1946b25f6b2ec5 +msgid ":meth:`Page.delete_widget`" +msgstr "" + +#: ../../page.rst:70 264c28aec4e745fd8cc86d543d2a8e79 +msgid "PDF only: delete a widget / field" +msgstr "PDFのみ:ウィジェット/フィールドを削除します" + +#: ../../page.rst:71 c4337bdd16ef4ab2a6cd79652a1c7aec +msgid ":meth:`Page.draw_bezier`" +msgstr "" + +#: ../../page.rst:71 47b12e4eff6543ca973ffc95d3862bb5 +msgid "PDF only: draw a cubic Bezier curve" +msgstr "PDFのみ:三次ベジエ曲線を描画します" + +#: ../../page.rst:72 c6d2568b85b8448e99b224c1064e5a54 +msgid ":meth:`Page.draw_circle`" +msgstr "" + +#: ../../page.rst:72 6112215cb8e44ac2b7b276bd71369aa9 +msgid "PDF only: draw a circle" +msgstr "PDFのみ:円を描画します" + +#: ../../page.rst:73 10d4a5c5d0ff4d1a83986a5892624226 +msgid ":meth:`Page.draw_curve`" +msgstr "" + +#: ../../page.rst:73 a7601df6163d4f8080791d3cd5c0fc34 +msgid "PDF only: draw a special Bezier curve" +msgstr "PDFのみ:特別なベジエ曲線を描画します" + +#: ../../page.rst:74 afe91de5d4ab4842bdfcdce2f53e4932 +msgid ":meth:`Page.draw_line`" +msgstr "" + +#: ../../page.rst:74 2d7a030f99a846f4a13bd6deca5a7b96 +msgid "PDF only: draw a line" +msgstr "PDFのみ:直線を描画します" + +#: ../../page.rst:75 f6578b353c814c2d9ec742b845a962e2 +msgid ":meth:`Page.draw_oval`" +msgstr "" + +#: ../../page.rst:75 94d6c3838385430d8d1c7faf3e37e538 +msgid "PDF only: draw an oval / ellipse" +msgstr "PDFのみ:楕円を描画します" + +#: ../../page.rst:76 67aa92f11b844738918b590e256a3427 +msgid ":meth:`Page.draw_polyline`" +msgstr "" + +#: ../../page.rst:76 a7d197d82f5b46ffabea926e440d6f35 +msgid "PDF only: connect a point sequence" +msgstr "PDFのみ:点のシーケンスを接続します" + +#: ../../page.rst:77 18692934b54a49e4bb03aa2f8ecf9c35 +msgid ":meth:`Page.draw_quad`" +msgstr "" + +#: ../../page.rst:77 55d89917fdf94ece9759df7281328ae6 +msgid "PDF only: draw a quad" +msgstr "PDF のみ: クアッドを描く" + +#: ../../page.rst:78 4d6819827e2b4c249af959b9d34c86b3 +msgid ":meth:`Page.draw_rect`" +msgstr "" + +#: ../../page.rst:78 d0f4dbd77e5f4c28b70ac64281ae4f27 +msgid "PDF only: draw a rectangle" +msgstr "PDFのみ:四角形を描画します" + +#: ../../page.rst:79 98de881cd3cf433d891e27108ff4c038 +msgid ":meth:`Page.draw_sector`" +msgstr "" + +#: ../../page.rst:79 5552c3983be64d5995f4250564b187f3 +msgid "PDF only: draw a circular sector" +msgstr "PDFのみ:円セクタを描画します" + +#: ../../page.rst:80 a3e9ed818bed4a078c5f52d8afc25e76 +msgid ":meth:`Page.draw_squiggle`" +msgstr "" + +#: ../../page.rst:80 91065f7eb3474bc8ada2d93179d16382 +msgid "PDF only: draw a squiggly line" +msgstr "PDFのみ:波線を描画します" + +#: ../../page.rst:81 ce9b4be9828f41ddb85390c29ed74198 +msgid ":meth:`Page.draw_zigzag`" +msgstr "" + +#: ../../page.rst:81 5a5807a017a44895ab7fa7395f228e98 +msgid "PDF only: draw a zig-zagged line" +msgstr "PDFのみ:ジグザグ線を描画します" + +#: ../../page.rst:82 917d6e0b08f8481cb78bc89218162086 +msgid ":meth:`Page.find_tables`" +msgstr "" + +#: ../../page.rst:82 d917260453274912bcab644a653a56d5 +msgid "locate tables on the page" +msgstr "ページ上のテーブルを検出します" + +#: ../../page.rst:83 a80c67bf147f4456a7d17f13fb5c0f56 +msgid ":meth:`Page.get_drawings`" +msgstr "" + +#: ../../page.rst:83 59fb46671f3f43919c7f5ae98bf45b79 +msgid "get vector graphics on page" +msgstr "ページ上のベクトルグラフィックを取得します" + +#: ../../page.rst:84 ../../page.rst:2328 6229d7b93c6845ca9e238d2e9b795f3b +#: f88c595aa10c4609a3622be46143c30a +msgid ":meth:`Page.get_fonts`" +msgstr "" + +#: ../../page.rst:84 d7222ed39cd745ea9992b38d4b703e15 +msgid "PDF only: get list of referenced fonts" +msgstr "PDFのみ:参照されたフォントのリストを取得" + +#: ../../page.rst:85 c0b5ce6fcbbc4de7aa30f2129254ff47 +msgid ":meth:`Page.get_image_bbox`" +msgstr "" + +#: ../../page.rst:85 43b13bc396dd422ab33af9875af20fb9 +msgid "PDF only: get bbox and matrix of embedded image" +msgstr "PDFのみ:埋め込まれた画像のバウンディングボックスと行列を取得" + +#: ../../page.rst:86 2f2008eab32e41c496e916cc5fa7e785 +msgid ":meth:`Page.get_image_info`" +msgstr "" + +#: ../../page.rst:86 c479d646bfb345d280646191ad904051 +msgid "get list of meta information for all used images" +msgstr "使用されるすべての画像のメタ情報のリストを取得" + +#: ../../page.rst:87 9f841024c0af4e8f8cbc147d0c1d19d6 +msgid ":meth:`Page.get_image_rects`" +msgstr "" + +#: ../../page.rst:87 a071461b91e141ef9f528f79778e9bdf +msgid "PDF only: improved version of :meth:`Page.get_image_bbox`" +msgstr "PDFのみ: :meth:`Page.get_image_bbox` の改良バージョンを取得" + +#: ../../page.rst:88 ../../page.rst:2329 4b1e001813054caea2d079dfbe51ef38 +#: c4ae999812cb419f9d26b9589aa7c8e1 +msgid ":meth:`Page.get_images`" +msgstr "" + +#: ../../page.rst:88 002d655e970848c5af445a2e3ba6068d +msgid "PDF only: get list of referenced images" +msgstr "PDFのみ:参照された画像のリストを取得" + +#: ../../page.rst:89 f7ceaa7044884fca854c0912d4751bf8 +msgid ":meth:`Page.get_label`" +msgstr "" + +#: ../../page.rst:89 4fcc37cae68548b8ae2178a2d91c50e9 +msgid "PDF only: return the label of the page" +msgstr "PDFのみ:ページのラベルを返す" + +#: ../../page.rst:90 38c59c9328dd44e9841de9deedeee098 +msgid ":meth:`Page.get_links`" +msgstr "" + +#: ../../page.rst:90 db76f5f1f41e4b9e901236bee2ddd9a8 +msgid "get all links" +msgstr "すべてのリンクを取得" + +#: ../../page.rst:91 ../../page.rst:2330 65624ce9eeb04d2f9ca8160259e6fe58 +#: ca6e5e28861d4d1993daa63690102fc2 +msgid ":meth:`Page.get_pixmap`" +msgstr "" + +#: ../../page.rst:91 c828578e0c444b50be0ba7a3f11a05a1 +msgid "create a page image in raster format" +msgstr "ラスターフォーマットのページイメージを作成" + +#: ../../page.rst:92 d907b8ff3e48468d988f24b0c84e744d +msgid ":meth:`Page.get_svg_image`" +msgstr "" + +#: ../../page.rst:92 4f03677d5eef4f38909512ab59ed88e9 +msgid "create a page image in SVG format" +msgstr "SVGフォーマットのページイメージを作成" + +#: ../../page.rst:93 ../../page.rst:2331 3ec0672bf07542e7ab84dca18b782e2e +#: 65d0129edf394a778705725b98f2fde4 +msgid ":meth:`Page.get_text`" +msgstr "" + +#: ../../page.rst:93 a106f9edd51b463aa1e9140a9e788e96 +msgid "extract the page's text" +msgstr "ページのテキストを抽出" + +#: ../../page.rst:94 f60eeef903ff48bf9cdaa107ebfe47d2 +msgid ":meth:`Page.get_textbox`" +msgstr "" + +#: ../../page.rst:94 61bfad8edc3649228c709ac1615e5e78 +msgid "extract text contained in a rectangle" +msgstr "特定の矩形に含まれるテキストを抽出" + +#: ../../page.rst:95 3b15abc336ec4bd8858e784dde6e31d2 +msgid ":meth:`Page.get_textpage_ocr`" +msgstr "" + +#: ../../page.rst:95 eb21942874514b38b5d6cce03a9a763b +msgid "create a TextPage with OCR for the page" +msgstr "ページのOCR付きのTextPageを作成" + +#: ../../page.rst:96 c9382b18963a4787af50bdff83d3d5b5 +msgid ":meth:`Page.get_textpage`" +msgstr "" + +#: ../../page.rst:96 1fff700d349047fe8f25972ca3c0541b +msgid "create a TextPage for the page" +msgstr "ページのTextPageを作成" + +#: ../../page.rst:97 accbca958e094897ba5b28fd1781fa88 +msgid ":meth:`Page.get_xobjects`" +msgstr "" + +#: ../../page.rst:97 f842f1df2d1a4367ac52a6a86963de81 +msgid "PDF only: get list of referenced xobjects" +msgstr "PDFのみ:参照されたxobjectのリストを取得" + +#: ../../page.rst:98 560cad86b28748f7b8ef9f1d92e5c744 +msgid ":meth:`Page.insert_font`" +msgstr "" + +#: ../../page.rst:98 a7fd2e4c23894f7faec1b88601464b17 +msgid "PDF only: insert a font for use by the page" +msgstr "PDFのみ:ページで使用するフォントを挿入" + +#: ../../page.rst:99 422fa2d799ef496798e1f081fd6f26d0 +msgid ":meth:`Page.insert_image`" +msgstr "" + +#: ../../page.rst:99 4367ca2a4da8472482ae22bf55fb00fc +msgid "PDF only: insert an image" +msgstr "PDFのみ:画像を挿入" + +#: ../../page.rst:100 80c4a2d5efbf4c6a96a7a191679688ef +msgid ":meth:`Page.insert_link`" +msgstr "" + +#: ../../page.rst:100 474c8d8e91a54f3094454a2c456beb1a +msgid "PDF only: insert a link" +msgstr "PDFのみ:リンクを挿入" + +#: ../../page.rst:101 62e01baa42e94b278a62505489aa477c +msgid ":meth:`Page.insert_text`" +msgstr "" + +#: ../../page.rst:101 6581c5190ec64dd4bda671253e1306cd +msgid "PDF only: insert text" +msgstr "PDFのみ:テキストを挿入" + +#: ../../page.rst:102 a086e402014342b0824957320ac85ff0 +msgid ":meth:`Page.insert_htmlbox`" +msgstr "" + +#: ../../page.rst:102 bdc8db5bf36c40c98d85c65d588043db +msgid "PDF only: insert html text in a rectangle" +msgstr "PDFのみ: 指定された矩形にテキストを追加します。" + +#: ../../page.rst:103 bfae9c37a52a4e21afbc417c8ad2cdec +msgid ":meth:`Page.insert_textbox`" +msgstr "" + +#: ../../page.rst:103 6d2b5b99bea845c1876b66e66127431f +msgid "PDF only: insert a text box" +msgstr "PDFのみ:テキストボックスを挿入" + +#: ../../page.rst:104 3fa49329024b4c8a91b93d7cf020e6e0 +msgid ":meth:`Page.links`" +msgstr "" + +#: ../../page.rst:104 1a19313c7bdf4b0cbadf7b3d6892b317 +msgid "return a generator of the links on the page" +msgstr "ページ上のリンクのジェネレータを返す" + +#: ../../page.rst:105 112197bda67d45a8b5b0cd5c729abfc0 +msgid ":meth:`Page.load_annot`" +msgstr "" + +#: ../../page.rst:105 b88e6121a71147b390cb750c15ce247f +msgid "PDF only: load a specific annotation" +msgstr "PDFのみ:特定のアノテーションを読み込む" + +#: ../../page.rst:106 3fbd5da197b848f5ab374679254f8dfc +msgid ":meth:`Page.load_widget`" +msgstr "" + +#: ../../page.rst:106 fa36f08ed59c4325b83e58f854a749c0 +msgid "PDF only: load a specific field" +msgstr "PDFのみ:特定のフィールドを読み込む" + +#: ../../page.rst:107 39d6832761aa4287b5d64850020d928c +msgid ":meth:`Page.load_links`" +msgstr "" + +#: ../../page.rst:107 413b2b2b6eda4a3698167d18f7b4e891 +msgid "return the first link on a page" +msgstr "ページ上の最初のリンクを返す" + +#: ../../page.rst:108 4c13634e34bd46e3b244843016470c48 +msgid ":meth:`Page.new_shape`" +msgstr "" + +#: ../../page.rst:108 9cbd0e0b071745c5850439c1f35ef327 +msgid "PDF only: create a new :ref:`Shape`" +msgstr "PDFのみ:新しい :ref:`Shape` を作成" + +#: ../../page.rst:109 96e0dd1204ba47f69a9a445946437939 +msgid ":meth:`Page.recolor`" +msgstr "" + +#: ../../page.rst:109 1ce88ace4ed44722943827a8401ea1ab +msgid "PDF only: change the colorspace of objects" +msgstr "" + +#: ../../page.rst:110 6ccde26da750454a97293ed3041ad6d1 +msgid ":meth:`Page.remove_rotation`" +msgstr "" + +#: ../../page.rst:110 24aceefd458c4b668e0fe6ec58877a9f +#, fuzzy +msgid "PDF only: set page rotation to 0" +msgstr "PDFのみ:ページの回転を設定" + +#: ../../page.rst:111 d9101243a0af4275b846a09587cf4710 +msgid ":meth:`Page.replace_image`" +msgstr "" + +#: ../../page.rst:111 f4790a42d72040a89eac0b0ba1b4538d +msgid "PDF only: replace an image" +msgstr "PDFのみ:画像を置換" + +#: ../../page.rst:112 ../../page.rst:2332 0f30a36bc2514748a56c858439fe79f4 +#: 525cee6c43354472a16bd42a1a903dd8 +msgid ":meth:`Page.search_for`" +msgstr "" + +#: ../../page.rst:112 781f1704b9c64dbb94c6c3a01b29a9d8 +msgid "search for a string" +msgstr "文字列を検索" + +#: ../../page.rst:113 87639ec8acb448a688d3293a846ada1e +msgid ":meth:`Page.set_artbox`" +msgstr "" + +#: ../../page.rst:113 2c969d520cdc49339c07163b60947cab +msgid "PDF only: modify `/ArtBox`" +msgstr "PDFのみ: `/ArtBox` を変更" + +#: ../../page.rst:114 8f77fd2b31ba42069b35c9a2b77be0b5 +msgid ":meth:`Page.set_bleedbox`" +msgstr "" + +#: ../../page.rst:114 f94d1b59f9f746ee8e72c85cddb228fa +msgid "PDF only: modify `/BleedBox`" +msgstr "PDFのみ:/BleedBoxを変更" + +#: ../../page.rst:115 5a086c34df134efa9ebec5e7d459d56f +msgid ":meth:`Page.set_cropbox`" +msgstr "" + +#: ../../page.rst:115 07381797f30346adaba78f74c467e565 +msgid "PDF only: modify the :data:`cropbox` (visible page)" +msgstr "PDFのみ: :data:`cropbox` (可視ページ)を変更" + +#: ../../page.rst:116 472c229bed474f2b9c42a4f6fd13e843 +msgid ":meth:`Page.set_mediabox`" +msgstr "" + +#: ../../page.rst:116 f5eb324e693941dab4db95e008824af4 +msgid "PDF only: modify `/MediaBox`" +msgstr "PDFのみ:/MediaBoxを変更" + +#: ../../page.rst:117 a06a6562846e47a0bf3c04461bbec798 +msgid ":meth:`Page.set_rotation`" +msgstr "" + +#: ../../page.rst:117 035ec0f523d54168a0b5e311e7d688a6 +msgid "PDF only: set page rotation" +msgstr "PDFのみ:ページの回転を設定" + +#: ../../page.rst:118 b4a48031c2d443e3a6a1972b7b6f1700 +msgid ":meth:`Page.set_trimbox`" +msgstr "" + +#: ../../page.rst:118 9d2602670b2f457e96e0cc93b782a2d2 +msgid "PDF only: modify `/TrimBox`" +msgstr "PDFのみ:`/TrimBox` を変更" + +#: ../../page.rst:119 fe6290e9292144efb8c2512109f958f9 +msgid ":meth:`Page.show_pdf_page`" +msgstr "" + +#: ../../page.rst:119 e9bf3f416fc745e4aa3e30dd023f95a0 +msgid "PDF only: display PDF page image" +msgstr "PDFのみ:PDFページ画像を表示" + +#: ../../page.rst:120 da1d26f46e2d4c378278ca146dd9f05d +msgid ":meth:`Page.update_link`" +msgstr "" + +#: ../../page.rst:120 d78d924cefa8438f8978e7a7c549dedc +msgid "PDF only: modify a link" +msgstr "PDFのみ:リンクを変更" + +#: ../../page.rst:121 0168f545ad9e42c3924e1a1e79990629 +msgid ":meth:`Page.widgets`" +msgstr "" + +#: ../../page.rst:121 aeb382c4dc584ecd84b68a381a02bf0f +msgid "return a generator over the fields on the page" +msgstr "ページ上のフィールドのジェネレータを返す" + +#: ../../page.rst:122 746834256a6d47d082af281ca392a879 +msgid ":meth:`Page.write_text`" +msgstr "" + +#: ../../page.rst:122 81600301b67e46a7bf5a0593285b9b42 +msgid "write one or more :ref:`Textwriter` objects" +msgstr "1つ以上の :ref:`Textwriter` オブジェクトを書き込む" + +#: ../../page.rst:123 22fb756f39a640c2961aebbd044fc305 +msgid ":attr:`Page.cropbox_position`" +msgstr "" + +#: ../../page.rst:123 641bccf7727e4cbb8a3dea3bc846c009 +msgid "displacement of the :data:`cropbox`" +msgstr ":data:`cropbox` の位置" + +#: ../../page.rst:124 cd33175e173e4c47a5d6700bfa73cf14 +msgid ":attr:`Page.cropbox`" +msgstr "" + +#: ../../page.rst:124 d20dd6b66d8c407484964c12e6fa0297 +msgid "the page's :data:`cropbox`" +msgstr "ページの :data:`cropbox`" + +#: ../../page.rst:125 69686837ecb14f98a1717f80065889a9 +msgid ":attr:`Page.artbox`" +msgstr "" + +#: ../../page.rst:125 8100ded041a0416a8049e54bd42a5705 +msgid "the page's `/ArtBox`" +msgstr "ページの `/ArtBox`" + +#: ../../page.rst:126 83dcbd9957ea4abba0df39e6b57ce629 +msgid ":attr:`Page.bleedbox`" +msgstr "" + +#: ../../page.rst:126 a4078b6a12e248a2b78385855ae8c427 +msgid "the page's `/BleedBox`" +msgstr "ページの `/BleedBox`" + +#: ../../page.rst:127 a82a9fe450d847d58ca2a70fa56a7aa6 +msgid ":attr:`Page.trimbox`" +msgstr "" + +#: ../../page.rst:127 c3052be687384369ba6383db8b9ab08d +msgid "the page's `/TrimBox`" +msgstr "ページの `/TrimBox`" + +#: ../../page.rst:128 cb0d3b66e4434a32a5ddb48e95791e78 +msgid ":attr:`Page.derotation_matrix`" +msgstr "" + +#: ../../page.rst:128 48ba0ea6ef074bc3b1f13e3ea2867e2f +msgid "PDF only: get coordinates in unrotated page space" +msgstr "PDFのみ:回転されていないページ空間内の座標を取得" + +#: ../../page.rst:129 cf4423e790b5457482f898ae5f16c038 +msgid ":attr:`Page.first_annot`" +msgstr "" + +#: ../../page.rst:129 4bb62e3b4ef04de0955c72c3da919eec +msgid "first :ref:`Annot` on the page" +msgstr "ページ上の最初の :ref:`Annot`" + +#: ../../page.rst:130 648be33d9f544cff85da2fb47d94ca02 +msgid ":attr:`Page.first_link`" +msgstr "" + +#: ../../page.rst:130 6917d2ca55e646269c31232e1dee233d +msgid "first :ref:`Link` on the page" +msgstr "ページ上の最初の :ref:`Link`" + +#: ../../page.rst:131 9243944617324220a7df0bd88edd7475 +msgid ":attr:`Page.first_widget`" +msgstr "" + +#: ../../page.rst:131 e25334201ade412cab91482159e1ba6c +msgid "first widget (form field) on the page" +msgstr "ページ上の最初のウィジェット(フォームフィールド)" + +#: ../../page.rst:132 8b9c7af9e8e44c33b50e2e3214e5f38b +msgid ":attr:`Page.mediabox_size`" +msgstr "" + +#: ../../page.rst:132 368e26974b684e56b226340acf738793 +msgid "bottom-right point of :data:`mediabox`" +msgstr ":data:`mediabox` の右下のポイント" + +#: ../../page.rst:133 3055cedb71d84fce8b387864e6836d8e +msgid ":attr:`Page.mediabox`" +msgstr "" + +#: ../../page.rst:133 3548dad5daec44ce9f377be2892d94f9 +msgid "the page's :data:`mediabox`" +msgstr "ページの :data:`mediabox`" + +#: ../../page.rst:134 01abd03b3cf744afa78b6d62a9355fb5 +msgid ":attr:`Page.number`" +msgstr "" + +#: ../../page.rst:134 efde9048b5ca4b989f139b24e557bebc +msgid "page number" +msgstr "ページ番号" + +#: ../../page.rst:135 084663951eab42de83414547a4bb2149 +msgid ":attr:`Page.parent`" +msgstr "" + +#: ../../page.rst:135 b0a25b0637d04b909532cc18344833bb +msgid "owning document object" +msgstr "所属するドキュメントオブジェクト" + +#: ../../page.rst:136 4ca2f14abc7c47c8851e6951cec2a839 +msgid ":attr:`Page.rect`" +msgstr "" + +#: ../../page.rst:137 8765de0c816349e59c2cbe931de25a06 +msgid ":attr:`Page.rotation_matrix`" +msgstr "" + +#: ../../page.rst:137 b5abd69454164760875080655f9f8c12 +msgid "PDF only: get coordinates in rotated page space" +msgstr "PDFのみ:回転したページ空間内の座標を取得" + +#: ../../page.rst:138 7645cca626724d5db33a8ad66711119a +msgid ":attr:`Page.rotation`" +msgstr "" + +#: ../../page.rst:138 af5ae3c273f14123a95e9a4bd778f9f6 +msgid "PDF only: page rotation" +msgstr "PDFのみ:ページの回転" + +#: ../../page.rst:139 4201f9a04eb4428f861e7ffeddcb6a96 +msgid ":attr:`Page.transformation_matrix`" +msgstr "" + +#: ../../page.rst:139 a3490dd1cd944f26a819e0e2fec9a671 +msgid "PDF only: translate between PDF and MuPDF space" +msgstr "PDFのみ:PDFとMuPDFのスペース間を変換" + +#: ../../page.rst:140 ea5a9817a90f41b98f4f78e4aa630f7b +msgid ":attr:`Page.xref`" +msgstr "" + +#: ../../page.rst:140 ac78af1937484fed9a52aecd476ed1a8 +msgid "PDF only: page :data:`xref`" +msgstr "PDFのみ:ページの :data:`xref`" + +#: ../../page.rst:143 7aec2adf2a3d4c828601fb1aa79b5c06 +msgid "**Class API**" +msgstr "**クラス API**" + +#: ../../page.rst:149 0d7bad78f95c4497b4665fc2b28a203c +msgid "" +"Determine the rectangle of the page. Same as property :attr:`Page.rect`. " +"For PDF documents this **usually** also coincides with :data:`mediabox` " +"and :data:`cropbox`, but not always. For example, if the page is rotated," +" then this is reflected by this method -- the :attr:`Page.cropbox` " +"however will not change." +msgstr "" +"ページの長方形を決定します。下記の :attr:`Page.rect` プロパティと同じです。PDF文書の場合、通常は " +":data:`mediabox` と :data:`cropbox` " +"と一致しますが、常にそうとは限りません。たとえば、ページが回転している場合、このメソッドに反映されますが、:attr:`Page.cropbox`" +" は変更されません。" + +#: ../../page.rst 09800d915d4f462b814820d9da1c5d1f +#: 1b49bdb9e449440faf4b12601d96673b 22ae66b30841488ba7e417986f398042 +#: 25b12dadc1994b939b5e4b8a2cfd9c85 27ee08bd8ef840f4a6dc060def5f4b8f +#: 288140f4f5a64d62bee924450212d6ef 2a19a07202ff403fa6303b5ae96700d3 +#: 2ba9537e4bf54a6da2ebc84b6935f66e 31cbeb8a7e5a4076b6af7159a75e87f3 +#: 35bd1ee1b82a4388978f998bdaf8b7f0 388f1fd6c75b4742965919a308fd97eb +#: 393caa01e96a427794f0771465370f30 3d998b5fff7c4d8097c282839b5f00d8 +#: 54c821d6f3284303860716473e091571 6101b0a009b347cd9e53cb307fb67032 +#: 842b86c8440a403a8c13645e9a71a2b2 94549fe1b41d4b82a1242332792b2827 +#: 9d065390780446be8b88ad9492c4b0c4 aeed1b5df6cc419fb9a16c85d414fb4d +#: b3ba456f95a945ea880f1a46596b8821 ba4b9a3125574504ac2a6ea814e231e6 +#: cb3097e23be4457c8cf75a1514f49d98 cb5ad1ec68ec41e28d5c239378d3b106 +#: da7c6f3a603046f484165c599b41b8f0 dce99577fa0b47d984f70345cbf5befb +#: dedd2cdbd5224c6ab6dcc6b392a0bb77 e43ae0f434644128ae26009905a32b51 +#: f87bc03a54d64ba98a3fbb74b4385664 fa898c8cb41d4f48877d309373a9007c +#: fdd47076bcd142338b6460a1624e8184 +msgid "Return type" +msgstr "戻り値の型" + +#: ../../page.rst:151 ../../page.rst:2173 ../../page.rst:2183 +#: ../../page.rst:2195 ../../page.rst:2256 ../../page.rst:2262 +#: 4dc600c7284a49bd8c350d20bf8e69f2 51eadb017f48474e8e33b93d32d6e29d +#: 85e3363ccace4f7b8bffe6dc225f2f97 ae91e665e95d4a238ba71eccfbab4a1f +#: ddba847f58c046e78eeacb24b25c6abd ff19be16d45445ef830755e944a2abb7 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../page.rst:155 d3a80abecbe949b4a5de9247e65d61bd +msgid "" +"PDF only: Add a caret icon. A caret annotation is a visual symbol " +"normally used to indicate the presence of text edits on the page." +msgstr "" +"PDFのみ: " +"ケアットアイコンを追加します。ケアットアノテーションは通常、ページ上でテキストの編集が存在することを示すために使用される視覚的なシンボルです" + +#: ../../page.rst 0c759786167649a4bf3a81f586fa575d +#: 1360015bc1ad47848cdac8102dc7df72 1b41e9143c7c48578e46fe316cc7bf83 +#: 1cb20c0ede95405fb7082c75d9860bbc 22dada0918b54eca9e9cb3d4be7a0d2e +#: 261f34b13a1a4f3da090c948f2a18861 2df2025bb6ae4e4cb980e26032bca18a +#: 2e89bf0f52e1457dbb9f24fbf1754764 2fc970dd645e4363b45b446fb4e2c763 +#: 30775b33047941cfbe784fcca2f69808 32b6ef52999c439eb0992551b5ade6f6 +#: 3554d992deeb4877aca677a4d4daf46c 39a7eb384d6845f1b122cbbe8134bc66 +#: 3b1a5e7b0e3e4bbc899e12352c877ca5 494c52c35b0f4e55af0d1a92e1035648 +#: 4e2b075a1d4e46fa96eb321732b7f5a3 557cb315deb8425eb61fb866ccc39f23 +#: 795d24eaae2e4b30a3362f7c07e49413 8402dcc723704327a11971a65f49cc58 +#: 84f540da555e47f9a1f7cfded5352a29 89690857ab554c74928d0200611048f6 +#: 8c2352d5802343cfb825e56aeb1bd14f 924c973064b9421a9d341dd0d4c46d90 +#: 95029237dbbe444983a11a2a6e9f78e5 989965e995504437a63d71f63075bdb4 +#: 98d7f86fa9574070912ba83ee9e173a4 9e7164982bd442b683851914bbb15a22 +#: a0454ac0eccf465894b0951608a3fc09 a1e9f5cf6ee141c9817c274976eab8e4 +#: a2b81a758dcd4107be579c62e8c67d96 a86c46bd997646d885e4ebe816de1a6c +#: aeee3d8ec8254492b75a79713042796b afbed008352c46c2b2d3c4cc43158480 +#: b0aadc53fac64a75b1d112ac5bd4cde7 c31ca655fc42459f87a85d46bd45e4c3 +#: c712e33a9fd84c7b954dee8bd7d7931b c8c73f76a20a40b089ffb1d650561355 +#: cdddccb6cc8d41fdb9deccb437388151 cf9abb78f6a941ed8e44a31f40f52e0b +#: dade00d4a64a4f8d9e0332aa1766e9dd e5a5a6f31a8146899eab5015149bbedd +#: e6d86608de4248daa18a1ae1333f7b1e eeab0d0f14a84bdbb7a525cea72bebfb +#: f0d8c8bdc1a049bda25be5b7540b74c1 f7e0d5f9ca844f59a136e17a638bb2d6 +#: f9876f9d807f4b8f83997d8d94cb325d +msgid "Parameters" +msgstr "パラメータ" + +#: ../../page.rst:157 8499d8b15adf4dd6934c93e4bf236beb +msgid "" +"the top left point of a 20 x 20 rectangle containing the MuPDF-provided " +"icon." +msgstr "point (point_like) – MuPDFが提供するアイコンを含む20 x 20の長方形の左上のポイント。" + +#: ../../page.rst:159 ../../page.rst:180 ../../page.rst:239 ../../page.rst:263 +#: ../../page.rst:272 ../../page.rst:283 ../../page.rst:294 ../../page.rst:322 +#: ../../page.rst:387 ../../page.rst:595 ../../page.rst:1908 +#: ../../page.rst:2231 0dc0330cb4a74731bbac096592ef80ca +#: 2709256c00de49feb1dbd47b11533f3a 29a2ca7ca51d40c392e14bd73f50da77 +#: 3f15bea4dbb440cc9cd6b44168b77108 50d5e614b51749ccbd9eb63ae369eb8b +#: 6f8c93a9f2324cca9e8dafbfa06efaaf 87c51f8fc1a94eb091f036c802a4419f +#: a970feb69ce34240b5756d5933d630ed ab57363b9a874f6a9f6048acdb993f86 +#: b4c57cb71c6d43669faa318baac928fe db6823017380446cb461b1ef24839b7a +#: fc2f2a90ed494ca5979649563a73a969 +msgid ":ref:`Annot`" +msgstr "" + +#: ../../page.rst 08fd9a98fced4ca08b671ef6946d9c7b +#: 0c4936af60b945139fa1f75b5a349bed 10e98b94fd2b4864a4636428f6f21a91 +#: 14e45f8a25604efc9adcb138c8625d54 19f96c9129fb4aef9189e8118ea57aa8 +#: 23df1cc94715493980e364bb9bf61b2e 247b29ff087e468099790862f6e0e106 +#: 28517c3484af4eacb331ad5eba1dd3e1 286a67394e8640e987484f5db767d8fb +#: 2a0d0c84577d4209b1898b4ad5088d2b 363efe9c442b40418dedb9dfd8e7b5bd +#: 3676fb2e715c458d99563b9fcae7b86e 3b545513f08a419dab7cbfe591008596 +#: 3f98b5afa81b4ca9859d67f918c42d7d 4d7033098e2146cd89092ffc55625bd9 +#: 5db0ad2394c0438e837d62e2d335c361 6aa26e52f32e4ab7a79edc5a392fb325 +#: 6f4ab45eb54343fb95b44b705d70b324 7327d2cfa8194d75b919d6b293a45565 +#: 7f95f3e8be19483db9f6a4078be346db 84d0f0c8cc3b475a8953f65caa28f829 +#: 8caf0b51949c45e1938a2763702a7786 9b81051bf528456ab82dcb5a52767b84 +#: a1aa2ca039da42d2a60f817d6818b6f2 a27f9ad9548846eea4bae07422a2ace0 +#: a2b741b58b164ca39bb0215ce5b03249 a5513cd336554892954ed5f805df5eb7 +#: bbfb4866d7ba4181b4c5c29333852530 bee1f1b70c2e482296b45f8864d5a5c3 +#: c3b97140456a43dca8d2be3dd22ff6ef c82bf67c39314e789c103507236d70e9 +#: c9057da2d842400995050db66f2d6614 ca5afaf4816b4f678211dfc38ab5f036 +#: cd9d4cad9eda4e4e9e9fa99bfdb2a2e0 d9c82a64864c4b418b14e7f2d5f159a3 +#: e2a530ad855c4fb5bc41b2bf0854dae0 ea71e9e700e7443c863ac4b1c185d11c +#: eabd95ad26864d429ebcf07e9647b4bc f5111a9bce1448c7b95cca799fe0e007 +#: f6e60b204f5a4246978f5aef943ecc70 +msgid "Returns" +msgstr "戻り値" + +#: ../../page.rst:160 ec1317ff464446759b1258298338f243 +msgid "" +"the created annotation. Stroke color blue = (0, 0, 1), no fill color " +"support." +msgstr "作成されたアノテーション。ストロークの色は青=(0, 0, 1)で、塗りつぶしの色はサポートされていません。" + +#: ../../page.rst:165 ../../page.rst:242 ../../page.rst:327 ../../page.rst:367 +#: ../../page.rst:535 ../../page.rst:608 ../../page.rst:644 ../../page.rst:666 +#: ../../page.rst:691 ../../page.rst:706 ../../page.rst:728 ../../page.rst:756 +#: ../../page.rst:785 ../../page.rst:850 ../../page.rst:879 ../../page.rst:904 +#: ../../page.rst:929 ../../page.rst:953 ../../page.rst:977 ../../page.rst:1002 +#: ../../page.rst:1026 ../../page.rst:1051 ../../page.rst:1075 +#: ../../page.rst:1100 ../../page.rst:1125 ../../page.rst:1306 +#: ../../page.rst:1354 ../../page.rst:1378 ../../page.rst:1436 +#: ../../page.rst:1464 ../../page.rst:1486 ../../page.rst:1527 +#: ../../page.rst:1638 ../../page.rst:1661 ../../page.rst:1720 +#: ../../page.rst:1750 ../../page.rst:1770 ../../page.rst:1785 +#: ../../page.rst:1826 ../../page.rst:1867 ../../page.rst:1881 +#: ../../page.rst:1895 ../../page.rst:1913 ../../page.rst:1930 +#: ../../page.rst:2019 ../../page.rst:2056 ../../page.rst:2080 +#: ../../page.rst:2098 ../../page.rst:2145 06acb7525fa345d28f749242f8b47be6 +#: 0f9fc223156b45e4a676ab5eb70b8ead 1a2b3b87c6144bdb8302eb3d58e78c53 +#: 1ab2b1edde3a4d9bb3ca70c6478477a6 21e12500b2924672bfaec8de4de14c00 +#: 22b2de474bf74c85b2f78ed2b44583f3 2717c1a780fd4164bdadbb368f6b07ef +#: 2eba93c648e34c579b7f83abc2bda2d1 338923e792254613a7f78d9be8087cd6 +#: 3b036f07260543d5865755b71a8725f6 409db0684db5497bb6c2b3ef11c48ba8 +#: 42e6978597424fccac4a978fcc1b1d42 4be4b732c92a4ecaac80e62d4c9bb625 +#: 4d1f142369254d80ad029683d5da3a4c 5202ba5596424fa1970e216ea1d101d8 +#: 535b10586d03495a8f754c9db94a0a10 53f75abc965945de8d462439098e3b93 +#: 59c623d3e0274542a3292e9787a1a82e 5a77a9a4b43e4ac5b4d7cfafc208e732 +#: 5af9bc466ac74bb49b8b6d4fa96112ff 5b066106759f4ddba0f96f427ea03bef +#: 5c3c5397df414eccb40e8b119746c75b 628780eec248445ebf4aa425da90f9c1 +#: 6974495fd9db42a0ad2bcec2ab860f09 7bbd1cc816a3433484c45705405ce41d +#: 7e74eae0e1164a3bb85228e8d0098fa4 83c866856f47496890bec21f133a6a29 +#: 89068605cd7b48acba3c666223f108f5 8c0f8b8d788842b39235739f838763c4 +#: 917e90c5f80646bf8abd2dab266a8a6f 989a1f313b07484998fbd8b1d7d93691 +#: a0219846828c4d91add801287a03e08b a5426843c4be4b15ab31c53c9481269a +#: a5b56f38886544b6bd99fc8923267b0e aa440b13daa643b98576c24d9c5b4fa7 +#: ab1c4110f36e47efa1ccbce643199f86 af11443e453048d7a725fdc3282e20e3 +#: b93585a5220c4ed09fc7f3df595369ed b941dcc21b1c4f63ae04575bed58076b +#: c8fa2de2013144f0b54dd67e33d18de4 cce75f64802e475491277a27dedb4780 +#: cf8a35726f5a4c4b8a201622b1d83104 d344985b81444b099355020e85dbf7b3 +#: d64136bd167c4a80b5e211d77fa0b2da dd703e1ef3534437bbf1c68a935d3b09 +#: f032910a6d274521b47cefa8607773cc f1e163b082594cdda50f3084293cd85b +#: f510ba583d734bc08f30d434af29a350 fb517cbc7ba8408194ed6dca086893d7 +msgid "|history_begin|" +msgstr "" + +#: ../../page.rst:167 0c25bc5956e5427cb892bd9dca5b2dc4 +msgid "New in v1.16.0" +msgstr "v1.16.0で新たに追加された" + +#: ../../page.rst:169 ../../page.rst:246 ../../page.rst:331 ../../page.rst:375 +#: ../../page.rst:540 ../../page.rst:612 ../../page.rst:648 ../../page.rst:670 +#: ../../page.rst:695 ../../page.rst:710 ../../page.rst:732 ../../page.rst:760 +#: ../../page.rst:789 ../../page.rst:855 ../../page.rst:883 ../../page.rst:908 +#: ../../page.rst:933 ../../page.rst:957 ../../page.rst:981 ../../page.rst:1006 +#: ../../page.rst:1030 ../../page.rst:1055 ../../page.rst:1079 +#: ../../page.rst:1105 ../../page.rst:1129 ../../page.rst:1330 +#: ../../page.rst:1358 ../../page.rst:1382 ../../page.rst:1444 +#: ../../page.rst:1469 ../../page.rst:1491 ../../page.rst:1532 +#: ../../page.rst:1648 ../../page.rst:1669 ../../page.rst:1725 +#: ../../page.rst:1754 ../../page.rst:1778 ../../page.rst:1789 +#: ../../page.rst:1840 ../../page.rst:1871 ../../page.rst:1885 +#: ../../page.rst:1899 ../../page.rst:1917 ../../page.rst:1934 +#: ../../page.rst:2024 ../../page.rst:2063 ../../page.rst:2085 +#: ../../page.rst:2103 ../../page.rst:2149 036b5a5a97204a56b1a3b988b8afb868 +#: 03c5be9f8ce44383b814ed4e7ccc13fb 06428cdd0f834eeab15e6b2959d8f55e +#: 074be0b70f794bdb94f11e93b20cdd11 09eb1bb0390440519aa0d55c0cf792c6 +#: 0a70f894122a4bc788b2a7edb7d0a125 0c96a8b2e3b14c3495db50e345d6d293 +#: 0ebc94691edd4812bc7541ccacc1e46f 1589d3b9736d4a428e40e3e528ee37f6 +#: 1883c7235e954c47853a5556abdfdfd9 1afae3681abd4479b09753ff86555fe9 +#: 262b2bee02ad461fade53789de66316d 28ad9439cccc43e0849a9b601260dba9 +#: 2b6da634c5894249a6c62f65bf2d942a 2f9ff98fbca448989b8bbe538c03276e +#: 31af1b01f9f84ce7809b3e03cda35b4f 45cf83b4ad3b4949ae98b2589b5853db +#: 4e3e1215560b4191981b3e91736743f9 4f221591c3704a9194271a786eceb634 +#: 56f1f5d0869f4e56b2e09d603889b8a5 5d3b8b846aaf4e25945e631e15539cab +#: 61fda409985b49f3861ef6c20076955e 66fb2b7246c74161a321735562281b9f +#: 67327137618f451c86898f9b3d9f5c5d 762712ff6e6d42d09d1befe35846d2ad +#: 857e3db2cd42451cb445b92d943c21ff 858887558f1248a68d706d588ae80786 +#: 892eed36794f4463ad4c4ccf482267bc 8dd7cbb03aaf4f73b337218ecda15eef +#: 91e1955816cb415296c704c4555a2c31 98ff9eac79cf4b2688967a08e338a409 +#: 9ff49e6fe10e496aad2e9f951e21a508 a050b798d3544202a7e48223fb7a4d13 +#: a0a2df60bc3a4c07b7ccfbeeb75cf31a afb90e55022441a2b9f34150fde93928 +#: b97ee3b2f1e04800a3e2d27efce86df0 be58b3a2161743b68d2e749ef727b67f +#: bf6a97c619c74aa89e82bac2eeae0429 c609886c63de4f8e8e428237073caec6 +#: cadb97971b1c47c88ee4fc1319ff6444 d0c35d48594c46f48839725d4f7eb688 +#: d17ded63f2464b91983626ef0549885e d42708c3d0034e08835a20d77bcfbc56 +#: e288d25695ec441cb7194eb38f71613b e405df27bc3a4601bd14c837b40c68cf +#: f319bb010dad414dba04ba0280a26f90 f927f6ad2f1c4757b1101dea3e1c93db +#: f98bf56729ff4c8dbe144e8f69cfcdf6 fe4913267a064b78bf0422e3323fe479 +msgid "|history_end|" +msgstr "" + +#: ../../page.rst:173 4fef7e9387ea46dcb9cda8056c3031c2 +msgid "" +"PDF only: Add a comment icon (\"sticky note\") with accompanying text. " +"Only the icon is visible, the accompanying text is hidden and can be " +"visualized by many PDF viewers by hovering the mouse over the symbol." +msgstr "" +"PDFのみ: " +"コメントアイコン(「付箋」)を追加し、それに関連するテキストを含めます。アイコンのみが表示され、関連するテキストは非表示で、多くのPDFビューアではアイコンの上にマウスを重ねることで可視化できます。" + +#: ../../page.rst:175 ca5332effd844be1a7fe628bc8b69b4d +msgid "" +"the top left point of a 20 x 20 rectangle containing the MuPDF-provided " +"\"note\" icon." +msgstr "提供されたMuPDFアイコンが含まれる20 x 20の矩形の左上の点。" + +#: ../../page.rst:177 cc18e4d047de40fe916fa310c149e301 +msgid "" +"the commentary text. This will be shown on double clicking or hovering " +"over the icon. May contain any Latin characters." +msgstr "コメントテキスト。これはダブルクリックまたはアイコンの上にカーソルを合わせることで表示されます。ラテン文字を含むことができます。" + +#: ../../page.rst:178 4b1c21cc0c08410fb69b90f47109d4ca +msgid "" +"choose one of \"Note\" (default), \"Comment\", \"Help\", \"Insert\", " +"\"Key\", \"NewParagraph\", \"Paragraph\" as the visual symbol for the " +"embodied text [#f4]_. (New in v1.16.0)" +msgstr "" +"*(v1.16.0で新規追加)* " +"\"Note\"(デフォルト)、\"Comment\"、\"Help\"、\"Insert\"、\"Key\"、\"NewParagraph\"、\"Paragraph\"" +" のいずれかを、具体的なテキストの視覚的なシンボルとして選択してください。 [#f4]_" + +#: ../../page.rst:181 3787b3da04af4edf834e43f2bc6d3ab1 +msgid "" +"the created annotation. Stroke color yellow = (1, 1, 0), no fill color " +"support." +msgstr "作成された注釈。ストロークカラーは黄色(1, 1, 0)、塗りつぶしカラーのサポートはありません。" + +#: ../../page.rst:201 a97b1926c3854f8fa349bc8b538f8284 +msgid "" +"PDF only: Add text in a given rectangle. Optionally, the appearance of a " +"\"callout\" shape can be requested by specifying two or three point-like " +"objects -- see below." +msgstr "" + +#: ../../page.rst:203 8462920b3f21473f87647a9e520dd8fb +#, fuzzy +msgid "" +"the rectangle into which the text should be inserted. Text is " +"automatically wrapped to a new line at box width. Text portions not " +"fitting into the rectangle will be invisible without warning." +msgstr "テキストを挿入する矩形。テキストはボックスの幅で自動的に改行されます。ボックスに収まらない行は見えません。" + +#: ../../page.rst:205 dd3a17e723d04112897e999518779b02 +msgid "" +"the text. May contain any mixture of Latin, Greek, Cyrillic, Chinese, " +"Japanese and Korean characters. If `richtext=True` (see below), the " +"string is interpreted as HTML syntax. This adds a plethora of ways for " +"attractive effects." +msgstr "" + +#: ../../page.rst:207 d53ac74553044cd28a545203048a9b6a +#, fuzzy +msgid "the :data:`fontsize`. Default is 11. Ignored if `richtext=True`." +msgstr ":data:`fontsize`。デフォルトは12です。" + +#: ../../page.rst:209 3b444938ed8b469492070e7462c4da5e +msgid "" +"The font name. Default is \"Helv\". Ignored if `richtext=True`, otherwise" +" the following **restritions apply:** * Accepted alternatives are " +"\"Helv\" (Helvetica), \"Cour\" (Courier), \"TiRo\" (Timnes-Roman), " +"\"ZaDb\" (ZapfDingBats) and \"Symb\" (Symbol). The name may be " +"abbreviated to the first two characters, like \"Co\" for \"Cour\", lower " +"case accepted. * Bold or italic variants of the fonts are **not " +"supported.**" +msgstr "" + +#: ../../page.rst:209 61e952813f634b83878e975e4e90bc34 +msgid "" +"The font name. Default is \"Helv\". Ignored if `richtext=True`, otherwise" +" the following **restritions apply:**" +msgstr "" + +#: ../../page.rst:211 492319dbc3c0406f962cbf27312431d1 +msgid "" +"Accepted alternatives are \"Helv\" (Helvetica), \"Cour\" (Courier), " +"\"TiRo\" (Timnes-Roman), \"ZaDb\" (ZapfDingBats) and \"Symb\" (Symbol). " +"The name may be abbreviated to the first two characters, like \"Co\" for " +"\"Cour\", lower case accepted." +msgstr "" + +#: ../../page.rst:213 46678b937e4f448599916282271aa9f3 +msgid "Bold or italic variants of the fonts are **not supported.**" +msgstr "" + +#: ../../page.rst:215 518d715f42794461a396e28d41f3bed4 +#, fuzzy +msgid "the text color. Default is black. Ignored if `richtext=True`." +msgstr "テキストの色。デフォルトは黒です。(v1.16.0で新規追加)" + +#: ../../page.rst:217 b47440f9874245db918409d99decab29 +msgid "" +"the fill color. This is used for ``rect`` and the end point of the " +"callout lines when applicable. Default is ``None``." +msgstr "" + +#: ../../page.rst:219 64b76c90df86415e96e7d717150dec41 +msgid "" +"This parameter only has an effect if `richtext=True`. Otherwise, " +"``text_color`` is used." +msgstr "" + +#: ../../page.rst:221 e23ad5b7916a4712b840b98830b8f221 +msgid "" +"the width of border and ``callout`` lines. Default is 0 (no border), in " +"which case callout lines may still appear with some hairline width, " +"depending on the PDF viewer used." +msgstr "" + +#: ../../page.rst:223 6a8b44d5be6d4cc2ba2cf0e2452e5f57 +msgid "" +"a list of floats specifying how border and callout lines should be " +"dashed. Default is ``None``." +msgstr "" + +#: ../../page.rst:225 bc9f1e66e37a42b18e08131ae60f9ab1 +msgid "" +"a list / tuple of two or three :data:`point_like` objects, which will be " +"interpreted as end point [, knee point] and start point (in this " +"sequence) of up to two line segments, converting this annotation into a " +"call-out shape." +msgstr "" + +#: ../../page.rst:227 7dcaaae52c76488881e81776e8156ff9 +msgid "" +"the line end symbol of the call-out line. It is drawn at the first point " +"specified in the `callout` list. Default is an open arrow. For possible " +"values see :ref:`AnnotationLineEnds`." +msgstr "" + +#: ../../page.rst:229 846b5e256f5b4d1086abdec5ef1d7cb9 +msgid "" +"a float `0 <= opacity < 1` turning the annotation transparent. Default is" +" no transparency." +msgstr "" + +#: ../../page.rst:231 3f49fad545744e46be0a305e3fdd4554 +msgid "" +"text alignment, one of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, " +"TEXT_ALIGN_RIGHT - justify is **not supported**. Ignored if " +"`richtext=True`." +msgstr "" + +#: ../../page.rst:233 9ee589cd4b674893a64b76ac9b2f8383 +#, fuzzy +msgid "" +"the text orientation. Accepted values are integer multiples of 90°. " +"Invalid entries receive a rotation of 0." +msgstr "テキストの向き。受け入れられる値は0、90、270で、無効なエントリはゼロに設定されます。" + +#: ../../page.rst:235 ae5ec59a9ec944a59d5a682f742b6b1c +msgid "" +"treat ``text`` as HTML syntax. This allows to achieve **bold**, *italic*," +" arbitrary text colors, font sizes, text alignment including justify and " +"more - as far as HTML and styling instructions support this. This is " +"similar to what happens in :meth:`Page.insert_htmlbox`. The base library " +"will for example pull in required fonts if it encounters characters not " +"contained in the standard ones. Some parameters are ignored if this " +"option is set, as mentioned above. Default is ``False``." +msgstr "" + +#: ../../page.rst:237 997862c6eac14a6f8c0f69c87ab03307 +msgid "" +"supply optional HTML styling information in CSS syntax. Ignored if " +"`richtext=False`." +msgstr "" + +#: ../../page.rst:240 f9c9742964414f188d59a37e31015ce3 +#, fuzzy +msgid "the created annotation." +msgstr "ウィジェットアノテーション。" + +#: ../../page.rst:244 0c67458bf6a74d07b2ac2a250dbcf720 +msgid "Changed in v1.19.6: add border color parameter" +msgstr "v1.19.6で変更:境界色パラメータを追加" + +#: ../../page.rst:250 da7a85dd8f5847ff84884a9fa06b3436 +msgid "" +"PDF only: Add a file attachment annotation with a \"PushPin\" icon at the" +" specified location." +msgstr "PDFのみ: 指定された場所に「PushPin」アイコンを持つファイル添付注釈を追加します。" + +#: ../../page.rst:252 e95d2ab63da04ec29a414d3f9313edf2 +msgid "" +"the top-left point of a 18x18 rectangle containing the MuPDF-provided " +"\"PushPin\" icon." +msgstr "ムPDFで提供される「PushPin」アイコンを含む18x18の四角形の左上のポイント。" + +#: ../../page.rst:254 f7c580780fe24629a7d404c86b84b2ce +#, fuzzy +msgid "" +"the data to be stored (actual file content, any data, etc.). Changed in " +"v1.14.13: *io.BytesIO* is now also supported." +msgstr "格納するデータ(実際のファイルコンテンツ、任意のデータなど)。" + +#: ../../page.rst:254 22ab37e993ec4cd587c274a0ccd50cca +msgid "the data to be stored (actual file content, any data, etc.)." +msgstr "格納するデータ(実際のファイルコンテンツ、任意のデータなど)。" + +#: ../../page.rst:256 de5774f31906451bb48a1c55e783e54d +msgid "Changed in v1.14.13: *io.BytesIO* is now also supported." +msgstr "v1.14.13で変更: *io.BytesIO* もサポートされるようになりました。" + +#: ../../page.rst:258 b2ae841e83504f929e869fd297843c4e +msgid "the filename to associate with the data." +msgstr "データに関連付けるファイル名。" + +#: ../../page.rst:259 256a665836b94adc868d6fef208f9f80 +msgid "the optional PDF unicode version of filename. Defaults to filename." +msgstr "ファイルのPDF Unicodeバージョンのオプション。デフォルトはファイル名です。" + +#: ../../page.rst:260 f7107a8bf89247a3bef87be543d63a18 +msgid "an optional description of the file. Defaults to filename." +msgstr "ファイルのオプションの説明。デフォルトはファイル名です。" + +#: ../../page.rst:261 5267c9bbba4a457a91f36a2eaaf79813 +msgid "" +"choose one of \"PushPin\" (default), \"Graph\", \"Paperclip\", \"Tag\" as" +" the visual symbol for the attached data [#f4]_. (New in v1.16.0)" +msgstr "" +"v1.16.0で新しく追加された)添付データの視覚的なシンボルとして、次のいずれかを選択します。\"PushPin\"(デフォルト)、\"Graph\"、\"Paperclip\"、\"Tag\"" +" [#f4]_。" + +#: ../../page.rst:264 1f60ac3e970f4f6ba6f5de9b8b306024 +msgid "" +"the created annotation. Stroke color yellow = (1, 1, 0), no fill color " +"support." +msgstr "作成された注釈。線の色は黄色(1, 1, 0)、塗りつぶしのサポートはありません。" + +#: ../../page.rst:268 ff1b9254d9bd493da66b60588a3edcc0 +msgid "PDF only: Add a \"freehand\" scribble annotation." +msgstr "PDFのみ: \"freehand\"の落書き注釈を追加します。" + +#: ../../page.rst:270 b3295a497afd46d78d23182ff0caea96 +msgid "" +"a list of one or more lists, each containing :data:`point_like` items. " +"Each item in these sublists is interpreted as a :ref:`Point` through " +"which a connecting line is drawn. Separate sublists thus represent " +"separate drawing lines." +msgstr "" +"1つまたは複数のリストからなり、それぞれが :data:`point_like` " +"アイテムを含むリストの1つです。これらのサブリスト内の各アイテムは、接続された線が描画される :ref:`Point` " +"として解釈されます。したがって、個々のサブリストは別々の描画ラインを表します。" + +#: ../../page.rst:273 775d0415b79040b1bb8276f940081f99 +msgid "" +"the created annotation in default appearance black =(0, 0, 0),line width " +"1. No fill color support." +msgstr "作成された注釈はデフォルトの外観で黒色(0, 0, 0)で、線の幅は1です。塗りつぶしのサポートはありません。" + +#: ../../page.rst:277 71a135f7d9c04f219408448f604f56f3 +msgid "PDF only: Add a line annotation." +msgstr "PDFのみ: 直線注釈を追加します。" + +#: ../../page.rst:279 a75af95c91b54adba9332de210fbb075 +msgid "the starting point of the line." +msgstr "直線の開始点。" + +#: ../../page.rst:281 e9ecf5d843f84e45b6bc2d20b3d6fe38 +msgid "the end point of the line." +msgstr "直線の終点。" + +#: ../../page.rst:284 b7e961c171f44c1aa71ebddd11b9d2e6 +msgid "" +"the created annotation. It is drawn with line (stroke) color red = (1, 0," +" 0) and line width 1. No fill color support. The **annot rectangle** is " +"automatically created to contain both points, each one surrounded by a " +"circle of radius 3 * line width to make room for any line end symbols." +msgstr "" +"作成された注釈。線(ストローク)の色は赤色(1, 0, 0)で、線の幅は1です。塗りつぶしのサポートはありません。**アノテーションの四角形** " +"は、各点を囲む半径 3 * 線幅の円で作成され、各点の周りにシンボルの線の終わりのためのスペースを確保します。" + +#: ../../page.rst:290 a68fe21476654536af0ce17bc4c57311 +msgid "PDF only: Add a rectangle, resp. circle annotation." +msgstr "PDFのみ: 長方形、または円の注釈を追加します。" + +#: ../../page.rst:292 990edeed703445dc8f3da8eac4a63673 +msgid "" +"the rectangle in which the circle or rectangle is drawn, must be finite " +"and not empty. If the rectangle is not equal-sided, an ellipse is drawn." +msgstr "円または長方形が描かれる矩形。有限で空でない必要があります。矩形が正方形でない場合、楕円が描画されます。" + +#: ../../page.rst:295 6daf319f42284361a82c184ee2c1af5d +msgid "" +"the created annotation. It is drawn with line (stroke) color red = (1, 0," +" 0), line width 1, fill color is supported." +msgstr "作成された注釈。線(ストローク)の色は赤色(1、0、0)、線の幅は1で、塗りつぶしのサポートがあります" + +#: ../../page.rst:300 88b890efe2934fa1bae62007057dc996 +msgid "Redactions" +msgstr "" + +#: ../../page.rst:304 a73936f86ab1443eac5c895b6bb46c2f +#, fuzzy +msgid "" +"**PDF only**: Add a redaction annotation. A redaction annotation " +"identifies an area whose content should be removed from the document. " +"Adding such an annotation is the first of two steps. It makes visible " +"what will be removed in the subsequent step, " +":meth:`Page.apply_redactions`." +msgstr "" +"**PDFのみ** " +":赤塗り注釈を追加します。赤塗り注釈は、文書から削除されるコンテンツを識別します。このような注釈を追加することは、2つの手順の最初です。次の手順、:meth:`Page.apply_redactions`" +" で削除される内容を可視化します。" + +#: ../../page.rst:306 dad50a6ce8594b7abc9c14146ad7ae5c +msgid "" +"specifies the (rectangular) area to be removed which is always equal to " +"the annotation rectangle. This may be a :data:`rect_like` or " +":data:`quad_like` object. If a quad is specified, then the enveloping " +"rectangle is taken." +msgstr "常に注釈の矩形と等しい削除する領域を指定します。これはrect_likeまたはquad_likeオブジェクトである必要があります。四角形が指定された場合、包括的な矩形が取られます。" + +#: ../../page.rst:308 2caebffbdfea4c6aa7a36594957ea469 +msgid "" +"text to be placed in the rectangle after applying the redaction (and thus" +" removing old content). (New in v1.16.12)" +msgstr "*(v1.16.12で新機能)* 赤字を適用した後に矩形に配置するテキスト(従って古いコンテンツを削除します)。" + +#: ../../page.rst:310 b05fc10db5dd4a9896d70c39c34b6d6d +msgid "" +"the font to use when ``text`` is given, otherwise ignored. Only CJK and " +"the :ref:`Base-14-Fonts` are supported. Apart from this, the same rules " +"apply as for :meth:`Page.insert_textbox` -- which is what the method " +":meth:`Page.apply_redactions` internally invokes." +msgstr "" + +#: ../../page.rst:312 fbdaaa453db14e15aa8759167d47cce4 +msgid "" +"the :data:`fontsize` to use for the replacing text. If the text is too " +"large to fit, several insertion attempts will be made, gradually reducing" +" the :data:`fontsize` to no less than 4. If then the text will still not " +"fit, no text insertion will take place at all. (New in v1.16.12)" +msgstr "" +"置換テキストに使用する :data:`fontsize` 。テキストが大きすぎて収まらない場合、:data:`fontsize` " +"を4未満にならないように徐々に縮小して、複数の挿入試行が行われます。その後もテキストが収まらない場合、テキストの挿入は行われません。 " +"(v1.16.12 で新規追加)" + +#: ../../page.rst:314 9665de46198a48619a881db1e7d9b140 +#, fuzzy +msgid "" +"the horizontal alignment for the replacing text. See " +":meth:`insert_textbox` for available values. The vertical alignment is " +"(approximately) centered." +msgstr "" +"置換テキストの水平配置です。使用可能な値については、:meth:`insert_textbox` " +"を参照してください。PDFの組み込みフォント(CJKまたは :ref:`Base-14-Fonts` " +")を使用する場合、垂直配置は(おおよそ)中央になります。 (v1.16.12で新規)" + +#: ../../page.rst:316 d6433c292a824ee98f28ed1827a0aa11 +msgid "" +"the fill color of the rectangle **after applying** the redaction. The " +"default is *white = (1, 1, 1)*, which is also taken if ``None`` is " +"specified. To suppress a fill color altogether, specify ``False``. In " +"this cases the rectangle remains transparent. (New in v1.16.12)" +msgstr "" +"**適用後** の赤塗りの四角形の塗りつぶし色です。デフォルトは *white = (1, 1, 1)* で、``None`` " +"が指定された場合も同様です。塗りつぶし色を抑制するには、``False`` " +"を指定します。この場合、四角形は透明のままです。(v1.16.12で新規追加)" + +#: ../../page.rst:318 15a5071add544cb3a8ec26aa4b439b39 +msgid "" +"the color of the replacing text. Default is *black = (0, 0, 0)*. (New in " +"v1.16.12)" +msgstr "*(新機能 v1.16.12)* 置換テキストの色です。デフォルトは *black = (0, 0, 0)* です。" + +#: ../../page.rst:320 3fcbd0df44e54455a054d5740671837f +msgid "add two diagonal lines to the annotation rectangle. (New in v1.17.2)" +msgstr "*(新機能 v1.17.2)* アノテーションの矩形に2つの対角線を追加します。" + +#: ../../page.rst:323 6115fef9c8ba4298a8db40a598de818d +msgid "" +"the created annotation. Its standard appearance looks like a red " +"rectangle (no fill color), optionally showing two diagonal lines. Colors," +" line width, dashing, opacity and blend mode can now be set and applied " +"via :meth:`Annot.update` like with other annotations. (Changed in " +"v1.17.2)" +msgstr "" +"作成された注釈です。その標準的な外観は、赤い四角形(塗りつぶし色なし)であり、必要に応じて二つの対角線を表示します。色、線の太さ、破線、不透明度、およびブレンドモードは、他の注釈と同様に、:meth:`Annot.update`" +" を介して設定および適用できます。(v1.17.2で変更)" + +#: ../../page.rst:329 ../../page.rst:369 3b79729c70104cdba1a9b9d61d69c834 +#: 3d68484d61614169ab07b2d7ad53b0d1 +msgid "New in v1.16.11" +msgstr "新機能 v1.16.11" + +#: ../../page.rst:336 f3e1f09fccde48fc833028e426216115 +#, fuzzy +msgid "" +"**PDF only**: Remove all **content** contained in any redaction rectangle" +" on the page." +msgstr "PDFのみ:赤塗りの矩形に含まれるすべての **テキストコンテンツ** を削除します。" + +#: ../../page.rst:338 ae6d96b72c8946d088788d3cb8fa772b +msgid "**This method applies and then deletes all redactions from the page.**" +msgstr "**このメソッドは、ページからすべての赤塗りを適用して削除します。**" + +#: ../../page.rst:340 a0a5d93de659411790677b9f30bb615a +msgid "" +"How to redact overlapping images. The default (2) blanks out overlapping " +"pixels. `PDF_REDACT_IMAGE_NONE | 0` ignores, and `PDF_REDACT_IMAGE_REMOVE" +" | 1` completely removes images overlapping any redaction annotation. " +"Option `PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE | 3` only removes images" +" that are actually visible." +msgstr "" +"重なる画像をレダクトする方法。デフォルトの(2)は、重なるピクセルを空白にします。 `PDF_REDACT_IMAGE_NONE | 0` " +"は無視し、 `PDF_REDACT_IMAGE_REMOVE | 1` は、いずれかのレダクション注釈と重なる画像を完全に削除します。オプション" +" `PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE | 3` は、実際に見える画像のみを削除します。" + +#: ../../page.rst:342 71ed1f9cb1ef4314968b486d3f91c6d6 +#, fuzzy, python-format +msgid "" +"How to redact overlapping vector graphics (also called \"line-art\" or " +"\"drawings\"). The default (2) removes any overlapping vector graphics. " +"`PDF_REDACT_LINE_ART_NONE | 0` ignores, and " +"`PDF_REDACT_LINE_ART_REMOVE_IF_COVERED | 1` removes graphics fully " +"contained in a redaction annotation. When removing line-art, please be " +"aware that **stroked** vector graphics (i.e. type \"s\" or \"sf\") have a" +" **larger wrapping rectangle** than one might expect: first of all, at " +"least 50% of the path's line width have to be added in each direction to " +"truly include all of the drawing. If a so-called \"miter limit\" is " +"provided (see page 121 of the PDF specification), the enlarging value is " +"`miter * width / 2`. So, when letting everything default (width = 1, " +"miter = 10), the redaction rectangle should be at least 5 points larger " +"in every direction." +msgstr "" +"重なるベクトルグラフィックス(または「ラインアート」や「図面」とも呼ばれる)を塗りつぶす方法。デフォルト値(2)では、重なるベクトルグラフィックスがすべて削除されます。`PDF_REDACT_LINE_ART_NONE" +" | 0` は無視し、`PDF_REDACT_LINE_ART_IF_COVERED | 1` " +"は、赤塗り注釈に完全に含まれるグラフィックスを削除します。ラインアートを削除する際は、 **ストロークされた** " +"ベクトルグラフィックス(つまりタイプ「s」または「sf」)が、期待される **よりも大きなラッピング長方形** " +"を持つことに注意してください:まず第一に、パスの線幅の少なくとも50%を、各方向に追加する必要があります。描画のすべてを含めるため。所謂「マイターリミット」が提供されている場合(PDF仕様書の121ページを参照)、拡大値は" +" `miter * width / " +"2`.です。すべてをデフォルト値(幅=1、マイター=10)にする場合、赤塗りの長方形は各方向に少なくとも5ポイント大きくする必要があります。" + +#: ../../page.rst:344 947aa9c7b11f4b3ea4bda239847ac1bf +msgid "" +"Whether to redact overlapping text. The default `PDF_REDACT_TEXT_REMOVE |" +" 0` removes all characters whose boundary box overlaps any redaction " +"rectangle. This complies with the original legal / data protection " +"intentions of redaction annotations. Other use cases however may require " +"to **keep text** while redacting vector graphics or images. This can be " +"achieved by setting `text=True|PDF_REDACT_TEXT_NONE | 1`. This does **not" +" comply** with the data protection intentions of redaction annotations. " +"**Do so at your own risk.**" +msgstr "" +"重なるテキストを塗りつぶすかどうかを指定します。デフォルトでは、`PDF_REDACT_TEXT_REMOVE | 0` " +"は、境界ボックスが赤塗り四角形と重なるすべての文字を削除します。これは、元の法的/データ保護の意図に適合しています。ただし、他のユースケースでは、ベクトルグラフィックスや画像を赤塗りする一方で**テキストを保持**する必要がある場合があります。これは、`text=True|PDF_REDACT_TEXT_NONE" +" | 1` を設定することで実現できます。これは、赤塗り注釈のデータ保護の意図には適合していませんので、自己責任で行ってください。" + +#: ../../page.rst:346 7ed20df074354c0c9acc708e66888c88 +msgid "" +"`True` if at least one redaction annotation has been processed, `False` " +"otherwise." +msgstr "少なくとも1つの赤字注釈が処理された場合は ``True``、それ以外の場合は ``False``。" + +#: ../../page.rst:349 eaa9a95d1bef4d8aa0a306ac622633eb +msgid "" +"Text contained in a redaction rectangle will be **physically** removed " +"from the page (assuming :meth:`Document.save` with a suitable garbage " +"option) and will no longer appear in e.g. text extractions or anywhere " +"else. All redaction annotations will also be removed. Other annotations " +"are unaffected." +msgstr "" +"赤塗りの四角に含まれるテキストは、**物理的に** " +"ページから削除されます(適切なゴミオプションを使用したDocument.save()の場合)、テキスト抽出などの場所にはもはや表示されません。また、すべての赤塗りの注釈も削除されます。他の注釈には影響しません。" + +#: ../../page.rst:351 f17adf97d56a4a11a48c91a3b2e09513 +msgid "" +"All overlapping links will be removed. If the rectangle of the link was " +"covering text, then only the overlapping part of the text is being " +"removed. Similar applies to images covered by link rectangles." +msgstr "重なっているすべてのリンクは削除されます。リンクの四角がテキストを覆っている場合、テキストの重なる部分のみが削除されます。画像もリンクの四角によってカバーされている場合、同様のことが適用されます。" + +#: ../../page.rst:353 b6fb1a37d1a8400bb9459ed7898b8204 +msgid "" +"The overlapping parts of **images** will be blanked-out for default " +"option `PDF_REDACT_IMAGE_PIXELS` (changed in v1.18.0). Option 0 does not " +"touch any images and 1 will remove any image with an overlap." +msgstr "" +"**画像** の重なり部分は、デフォルトのオプションである `PDF_REDACT_IMAGE_PIXELS` " +"では塗りつぶされます(v1.18.0で変更されました)。オプション0は画像を一切変更せず、1は重なり合う画像をすべて削除します。" + +#: ../../page.rst:355 4c4124a2c2e34a128cb17cb9e7cb5eef +msgid "" +"For option `images=PDF_REDACT_IMAGE_REMOVE` only this page's **references" +" to the images** are removed - not necessarily the images themselves. " +"Images are completely removed from the file only, if no longer referenced" +" at all (assuming suitable garbage collection options)." +msgstr "" +"`images=PDF_REDACT_IMAGE_REMOVE` のオプションの場合、このページの **画像への参照** " +"のみが削除されます。適切なゴミ収集オプションがあると、画像はファイルから完全に削除されます。" + +#: ../../page.rst:357 8b40a7c0e6d34574b9fb5ca1485ce824 +msgid "" +"For option `images=PDF_REDACT_IMAGE_PIXELS` a new image of format PNG is " +"created, which the page will use in place of the original one. The " +"original image is not deleted or replaced as part of this process, so " +"other pages may still show the original. In addition, the new, modified " +"PNG image currently is **stored uncompressed**. Do keep these aspects in " +"mind when choosing the right garbage collection method and compression " +"options during save." +msgstr "" +"`images=PDF_REDACT_IMAGE_PIXELS` " +"のオプションでは、新しいPNG形式の画像が作成され、ページは元の画像の代わりにそれを使用します。このプロセスの一環として、元の画像は削除されず、他のページでは引き続き元の画像が表示される可能性があります。さらに、新しい変更されたPNG画像は現在" +" **圧縮されていない状態で保存されています** 。保存時に適切なゴミ収集メソッドと圧縮オプションを選択する際に、これらの側面を考慮してください。" + +#: ../../page.rst:359 19b6f491d4004af3837ff48b6984c122 +msgid "" +"**Text removal** is done by character: A character is removed if its bbox" +" has a **non-empty overlap** with a redaction rectangle (changed in MuPDF" +" v1.17). Depending on the font properties and / or the chosen line " +"height, deletion may occur for undesired text parts. Using " +":meth:`Tools.set_small_glyph_heights` with a ``True`` argument before " +"text search may help to prevent this." +msgstr "" +"**テキストの削除** は文字ごとに行われます:文字のbboxが赤塗りの四角と非空の重なりを持つ場合、文字が削除されます(MuPDF " +"v1.17で変更)。フォントの特性や選択した行の高さに応じて、望ましくないテキスト部分が削除される場合があります。テキスト検索前に " +":meth:`Tools.set_small_glyph_heights` を ``True`` " +"引数で使用して、これを防ぐのに役立つ場合があります。" + +#: ../../page.rst:361 ab915d9920f64418b6cde0152fd4bfaa +msgid "" +"Redactions are a simple way to replace single words in a PDF, or to just " +"physically remove them. Locate the word \"secret\" using some text " +"extraction or search method and insert a redaction using \"xxxxxx\" as " +"replacement text for each occurrence." +msgstr "赤塗りは、PDF内の単語を置き換えるための簡単な方法であり、単語を物理的に削除するためのものです。テキスト抽出または検索方法を使用して単語「秘密」を見つけ、それぞれの出現ごとに代替テキスト「xxxxxx」を使用して赤塗りを挿入します。" + +#: ../../page.rst:363 a4860b20cc574fde9ac8fb1b1fe0349b +msgid "" +"Be wary if the replacement is longer than the original -- this may lead " +"to an awkward appearance, line breaks or no new text at all." +msgstr "注意が必要です。代替テキストが元のテキストよりも長い場合、見栄えが悪くなったり、改行が発生したり、新しいテキストがまったく表示されなくなる可能性があるためです。" + +#: ../../page.rst:365 9bcbeb3a84624670b6500eea13b1bed3 +msgid "" +"For a number of reasons, the new text may not exactly be positioned on " +"the same line like the old one -- especially true if the replacement font" +" was not one of CJK or :ref:`Base-14-Fonts`." +msgstr "" +"いくつかの理由から、新しいテキストは古いテキストとまったく同じ行に配置されないことがあります。特に、代替フォントがCJKまたはPDF " +":ref:`Base-14-Fonts` の場合には特に当てはまります。" + +#: ../../page.rst:370 e623d032e6e4471f9ad3af66e280b5f1 +msgid "" +"Changed in v1.16.12: The previous *mark* parameter is gone. Instead, the " +"respective rectangles are filled with the individual *fill* color of each" +" redaction annotation. If a *text* was given in the annotation, then " +":meth:`insert_textbox` is invoked to insert it, using parameters provided" +" with the redaction." +msgstr "" +"v1.16.12で変更:以前の *mark* " +"パラメータは削除されました。代わりに、各赤字注釈の個々の塗りつぶし色で各赤字領域が塗りつぶされます。アノテーションで *text* " +"が指定された場合、そのテキストを挿入するために、redactionで提供されたパラメータを使用して :meth:`insert_textbox` " +"が呼び出されます。" + +#: ../../page.rst:371 b59761d1d915436582b45028a61cadf8 +msgid "" +"Changed in v1.18.0: added option for handling images that overlap " +"redaction areas." +msgstr "v1.18.0で変更:赤字領域と重なる画像を処理するためのオプションが追加されました。" + +#: ../../page.rst:372 055f2b2de0784443b7706cab98b7a543 +msgid "Changed in v1.23.27: added option for removing graphics as well." +msgstr "" + +#: ../../page.rst:373 9ae33c1ffa7b4959b8a34116adeab4ec +msgid "Changed in v1.24.2: added option `keep_text` to leave text untouched." +msgstr "" + +#: ../../page.rst:383 a85096a374e94e7f972f4d5020affbcc +msgid "" +"PDF only: Add an annotation consisting of lines which connect the given " +"points. A **Polygon's** first and last points are automatically " +"connected, which does not happen for a **PolyLine**. The **rectangle** is" +" automatically created as the smallest rectangle containing the points, " +"each one surrounded by a circle of radius 3 (= 3 * line width). The " +"following shows a 'PolyLine' that has been modified with colors and line " +"ends." +msgstr "" +"PDFのみ:指定されたポイントを接続する線から成る注釈を追加します。多角形 **(Polygon)** " +"の最初と最後のポイントは自動的に接続されますが、**PolyLine** ではそれが発生しません。各ポイントは半径3の円で囲まれた最小の " +"**四角形** として自動的に作成されます(半径3 = 3 * 線の幅)。以下は、色や線端を変更した「PolyLine」の例を示しています。" + +#: ../../page.rst:385 a2a2f079c20e444fbcbb3ea51d8ba3de +msgid "a list of :data:`point_like` objects." +msgstr "points(list)– :data:`point_like` オブジェクトのリスト。" + +#: ../../page.rst:388 2141b375b4e44822b7da8c5b2d4bebea +msgid "" +"the created annotation. It is drawn with line color black, line width 1 " +"no fill color but fill color support. Use methods of :ref:`Annot` to make" +" any changes to achieve something like this:" +msgstr "" +"作成されたアノテーションです。線の色は黒で描画され、線の幅は1で、塗りつぶし色はサポートされています。このような外見を実現するために、:ref:`Annot`" +" のメソッドを使用して変更を加えることができます。" + +#: ../../page.rst:401 f7c981f6b5f94e548db828bc5a7e531d +msgid "" +"PDF only: These annotations are normally used for **marking text** which " +"has previously been somehow located (for example via " +":meth:`Page.search_for`). But this is not required: you are free to " +"\"mark\" just anything." +msgstr "" +"PDFのみ: これらのアノテーションは通常、以前に何らかの方法で見つかったテキスト(たとえば、:meth:`Page.search_for` " +"を使用して)をマーキングするために使用されます。ただし、これは必須ではありません:何でも「マーク」することができます。" + +#: ../../page.rst:403 e7b2f48bdced4ffea73d4b0368c08d10 +msgid "" +"Standard (stroke only -- no fill color support) colors are chosen per " +"annotation type: **yellow** for highlighting, **red** for striking out, " +"**green** for underlining, and **magenta** for wavy underlining." +msgstr "" +"通常、アノテーションの種類ごとに標準の(ストロークのみで、塗りつぶし色はサポートされていません)色が選択されます。ハイライト用に " +"**黄色**、取り消し線用に **赤色**、下線用に **緑色**、波線下線用に **マゼンタ色** です。" + +#: ../../page.rst:405 d005170f532240aabe56262974081b06 +msgid "" +"All these four methods convert the arguments into a list of :ref:`Quad` " +"objects. The **annotation** rectangle is then calculated to envelop all " +"these quadrilaterals." +msgstr "" +"これらの四つのメソッドは、引数を :ref:`Quad` オブジェクトのリストに変換します。その後、アノテーション " +"の矩形は、これらの四角形を包含するように計算されます。" + +#: ../../page.rst:409 7da878601a5b46b4a845004779c4da38 +msgid "" +":meth:`search_for` delivers a list of either :ref:`Rect` or :ref:`Quad` " +"objects. Such a list can be directly used as an argument for these " +"annotation types and will deliver **one common annotation** for all " +"occurrences of the search string::" +msgstr "" +":meth:`search_for` は :ref:`Rect` または :ref:`Quad` " +"オブジェクトのリストを返します。このようなリストは、これらのアノテーションタイプの引数として直接使用でき、検索文字列のすべての出現に対して " +"**共通のアノテーション** を提供します::" + +#: ../../page.rst:416 3861b71ef300487288e3d70275d10a19 +msgid "" +"Obviously, text marker annotations need to know what is the top, the " +"bottom, the left, and the right side of the area(s) to be marked. If the " +"arguments are quads, this information is given by the sequence of the " +"quad points. In contrast, a rectangle delivers much less information -- " +"this is illustrated by the fact, that 4! = 24 different quads can be " +"constructed with the four corners of a rectangle." +msgstr "" +"明らかに、テキストマーカーアノテーションは、マークされる領域の上部、下部、左部、右部が何であるかを知る必要があります。引数がquadsの場合、この情報は四角形のポイントのシーケンスによって提供されます。対照的に、矩形ははるかに少ない情報を提供します" +" - これは、四角形の四つの角を使用して24の異なる四角形が構築できるという事実によって示されています。" + +#: ../../page.rst:418 6f0906af7d944e8c9f03bfc9e5f70457 +msgid "" +"Therefore, we **strongly recommend** to use the `quads` option for text " +"searches, to ensure correct annotations. A similar consideration applies " +"to marking **text spans** extracted with the \"dict\" / \"rawdict\" " +"options of :meth:`Page.get_text`. For more details on how to compute " +"quadrilaterals in this case, see section \"How to Mark Non-horizontal " +"Text\" of :ref:`FAQ`." +msgstr "" +"したがって、正しいアノテーションを確保するために、テキスト検索に `quads` オプションを使用することを " +"**強くお勧めします**。同様の考慮事項は、:meth:`Page.get_text` の「dict」/「rawdict」オプションで抽出された " +"**テキストスパン** をマークする場合にも適用されます。この場合の四角形の計算方法の詳細については、:ref:`FAQ` " +"の「非水平テキストのマーキング方法」セクションを参照してください。" + +#: ../../page.rst:420 428c52b63b1f48e8ba0f8104c9956704 +msgid "" +"the location(s) -- rectangle(s) or quad(s) -- to be marked. (Changed in " +"v1.14.20) A list or tuple must consist of :data:`rect_like` or " +":data:`quad_like` items (or even a mixture of either). Every item must be" +" finite, convex and not empty (as applicable). **Set this parameter to** " +"``None`` if you want to use the following arguments (Changed in " +"v1.16.14). And vice versa: if not ``None``, the remaining parameters must" +" be ``None``." +msgstr "" +"*(v1.14.20で変更)* マーキングする位置、つまり矩形または四角形。リストまたはタプルは、:data:`rect_like` または " +":data:`quad_like` " +"のアイテム(またはその混合)で構成されている必要があります。各アイテムは、適用可能な限り有限で凸面で空でなければなりません " +"*(v1.16.14で変更)* 。このパラメータを ``None`` " +"に設定すると、次の引数を使用できるようになります。逆もまたしかり:Noneでない場合、残りのパラメータは ``None`` でなければなりません。" + +#: ../../page.rst:427 7840a32fab07421896c8dc0a7eddb8fd +msgid "" +"start text marking at this point. Defaults to the top-left point of " +"*clip*. Must be provided if `quads` is ``None``. (New in v1.16.14)" +msgstr "" +"*(v1.16.14で新規)* このポイントでテキストマーキングを開始します。*clip* の左上のポイントがデフォルトです。`quads` が " +"``None`` の場合、提供する必要があります。" + +#: ../../page.rst:428 2a083d78250a425fbdc27e75f3c168cd +msgid "" +"stop text marking at this point. Defaults to the bottom-right point of " +"*clip*. Must be used if `quads` is ``None``. (New in v1.16.14)" +msgstr "" +"*(v1.16.14で新規)* このポイントでテキストマーキングを停止します。*clip* の右下のポイントがデフォルトです。quadsが " +"``None`` の場合、使用する必要があります。" + +#: ../../page.rst:429 300412dc0f41420ab8c9bec6f1957e7a +msgid "" +"only consider text lines intersecting this area. Defaults to the page " +"rectangle. Only use if `start` and `stop` are provided. (New in v1.16.14)" +msgstr "" +"*(v1.16.14で新規)* この領域と交差するテキスト行のみを考慮します。ページの矩形がデフォルトです。`start` および `stop` " +"が提供されている場合にのみ使用してください。" + +#: ../../page.rst:431 788debac4b8f4e55a6046455337c7fb9 +msgid ":ref:`Annot` or ``None`` (changed in v1.16.14)." +msgstr ":ref:`Annot` または *(v1.16.14で変更)* ``None``" + +#: ../../page.rst:432 9f9ec240d6374071abeee1a6eb319084 +msgid "" +"the created annotation. If *quads* is an empty list, **no annotation** is" +" created (changed in v1.16.14)." +msgstr "作成された注釈。 *(v1.16.14で変更)* *quads* が空のリストの場合、**アノテーションは作成されません**。" + +#: ../../page.rst:435 1d40d37b8a194ec282fbc8b2593f5e7b +msgid "" +"You can use parameters *start*, *stop* and *clip* to highlight " +"consecutive lines between the points *start* and *stop* (starting with " +"v1.16.14). Make use of *clip* to further reduce the selected line bboxes " +"and thus deal with e.g. multi-column pages. The following multi-line " +"highlight on a page with three text columns was created by specifying the" +" two red points and setting clip accordingly." +msgstr "" +"v1.16.14以降、開始、停止、および *clip* というパラメータを使用して、*start* と *stop* " +"の間の連続した行をハイライトできます。*clip* " +"を使用して、選択した行のbboxをさらに縮小し、たとえば多列のページを扱うことができます。次の多行のハイライトは、2つの赤いポイントを指定し、それに応じてクリップを設定することによって、3つのテキスト列を持つページに作成されました。" + +#: ../../page.rst:444 0e979d3ecf6743b1a374782e60b95dbc +msgid "" +"Cluster vector graphics (synonyms are line-art or drawings) based on " +"their geometrical vicinity. The method walks through the output of " +":meth:`Page.get_drawings` and joins paths whose `path[\"rect\"]` are " +"closer to each other than some tolerance values (given in the arguments)." +" The result is a list of rectangles that each wrap things like tables " +"(with gridlines), pie charts, bar charts, etc." +msgstr "" +"図形のクラスター(同義語は線画やドローイングです)を、幾何学的な近接性に基づいてグループ化します。このメソッドは、 " +":meth:`Page.get_drawings` の出力を処理し、その中で、`path[\"rect\"]` " +"がある許容値(引数で指定された)よりも近いパスを結合します。結果は、各々が表(格子状の線がある)、円グラフ、棒グラフなどのものを包含する長方形のリストです。" + +#: ../../page.rst:446 a7040ace649f46ee82108bfecd0c3652 +msgid "only consider paths inside this area. The default is the full page." +msgstr "only consider paths inside this area. The default is the full page." + +#: ../../page.rst:448 64579ef398884345a1742b01a40bfe34 +msgid "" +"(optional) provide a previously generated output of " +":meth:`Page.get_drawings`. If `None` the method will execute the method." +msgstr "(任意)以前に生成された :meth:`Page.get_drawings` の出力を提供します。`None` の場合、メソッドが実行されます。" + +#: ../../page.rst:450 0f126bbeb9054ba39a8b37318af0fa46 +msgid "" +"Assume vector graphics to be close enough neighbors for belonging to the " +"same rectangle. Default is 3 points." +msgstr "" + +#: ../../page.rst:452 affc9ad662c54243a82ab470d5c557b8 +msgid "" +"If `True` (default), the method will to remove rectangles having width or" +" height smaller than the respective tolerance value. If `False` no such " +"filtering is done." +msgstr "" + +#: ../../page.rst:456 91f1da7f15f14121bb9fb836a3acdbd2 +msgid "" +"Find tables on the page and return an object with related information. " +"Typically, the default values of the many parameters will be sufficient. " +"Adjustments should ever only be needed in corner case situations." +msgstr "ページ上のテーブルを見つけ、関連情報を含むオブジェクトを返します。通常、多くのパラメータのデフォルト値は十分です。調整が必要なのは、極めてまれなケースのみです。" + +#: ../../page.rst:458 24a5c3bd448d49f4926019916cb9228e +msgid "" +"specify a region to consider within the page rectangle and ignore the " +"rest. Default is the full page." +msgstr "ページの長方形内で考慮する領域を指定します。デフォルトはページ全体です。" + +#: ../../page.rst:460 0bb7556df98e4c67a2fd4f222fd2f6d9 +msgid "" +"Request a **table detection** strategy. Valid values are \"lines\", " +"\"lines_strict\" and \"text\". Default is **\"lines\"** which uses all " +"vector graphics on the page to detect grid lines. Strategy " +"**\"lines_strict\"** ignores borderless rectangle vector graphics. " +"Sometimes single text pieces have background colors which may lead to " +"false columns or lines. This strategy ignores them and can thus increase " +"detection precision. If **\"text\"** is specified, text positions are " +"used to generate \"virtual\" column and / or row boundaries. Use " +"`min_words_*` to request the number of words for considering their " +"coordinates. Use parameters `vertical_strategy` and " +"`horizontal_strategy` **instead** for a more fine-grained treatment of " +"the dimensions." +msgstr "**テーブル検出**戦略をリクエストします。有効な値は、**「lines」**、**「lines_strict」** 、**「text」** です" + +#: ../../page.rst:460 3a1a4de0bbac4bfb8b4f8e163ddc7272 +msgid "" +"Request a **table detection** strategy. Valid values are \"lines\", " +"\"lines_strict\" and \"text\"." +msgstr "**テーブル検出**戦略をリクエストします。有効な値は、**「lines」**、**「lines_strict」** 、**「text」** です。" + +#: ../../page.rst:462 61c5f8b592124b55b6f0b034cc9caaa0 +msgid "" +"Default is **\"lines\"** which uses all vector graphics on the page to " +"detect grid lines." +msgstr "デフォルトは **「lines」** で、ページ上のすべてのベクトルグラフィックスを使用してグリッド線を検出します。" + +#: ../../page.rst:464 1cc39b20c5cc4570bf28776257646336 +msgid "" +"Strategy **\"lines_strict\"** ignores borderless rectangle vector " +"graphics. Sometimes single text pieces have background colors which may " +"lead to false columns or lines. This strategy ignores them and can thus " +"increase detection precision." +msgstr "" +"ストラテジー **「lines_strict」** " +"は、境界のない四角形のベクトルグラフィックスを無視します。時には、個々のテキスト部分に背景色があることがあり、これが誤った列や行を生じる可能性があります。この戦略はそれらを無視し、したがって検出精度を向上させることができます。" + +#: ../../page.rst:466 959245153c024460aa7b36f5f36d500a +msgid "" +"If **\"text\"** is specified, text positions are used to generate " +"\"virtual\" column and / or row boundaries. Use `min_words_*` to request " +"the number of words for considering their coordinates." +msgstr "検索アルゴリズムを指定します。デフォルトでは「lines」はベクトル描画を探します。指定された場合、「text」ではテキストの位置を使用して「仮想」列境界を生成します。x座標を考慮するための単語数を指定するには、`min_words_*`を使用します。" + +#: ../../page.rst:468 b90c42cceb0b42838439fb8ff6094578 +msgid "" +"Use parameters `vertical_strategy` and `horizontal_strategy` **instead** " +"for a more fine-grained treatment of the dimensions." +msgstr "サイズの微調整には、`vertical_strategy` と`horizontal_strategy` パラメータを使用してください。" + +#: ../../page.rst:470 ad04c6b855c0406586e92f10415bc468 +msgid "" +"y-coordinates of rows. If provided, there will be no attempt to identify " +"additional table rows. This influences table detection." +msgstr "行のy座標を含む浮動小数点数のリスト。指定した場合、追加のテーブル行を識別しないようにします。" + +#: ../../page.rst:472 fc8047d6017c454e9cbbc20b2d15f37c +msgid "" +"x-coordinates of columns. If provided, there will be no attempt to " +"identify additional table columns. This influences table detection." +msgstr "列のx座標を含む浮動小数点数のリスト。指定した場合、追加のテーブル列を識別しないようにします。" + +#: ../../page.rst:474 3e730286d5a14cf0994241e350b6dcf5 +msgid "" +"relevant for vertical strategy option \"text\": at least this many words " +"must coincide to establish a **virtual column** boundary." +msgstr "垂直戦略オプション「text」に関連します。少なくともこの数の単語が一致する必要があり、仮想の列境界を確立します。" + +#: ../../page.rst:476 d5f56082b14b41699e3c31a5b2a64e44 +msgid "" +"relevant for horizontal strategy option \"text\": at least this many " +"words must coincide to establish a **virtual row** boundary." +msgstr "水平戦略オプション「text」に関連します。少なくともこの数の単語が一致する必要があり、仮想の行境界を確立します。" + +#: ../../page.rst:478 bf9b8525504f4f70bb346c23dcec14a0 +msgid "" +"Any two horizontal lines whose y-values differ by no more than this value" +" will be **snapped** into one. Accordingly for vertical lines. Default is" +" 3. Separate values can be specified instead for the dimensions, using " +"`snap_x_tolerance` and `snap_y_tolerance`." +msgstr "" +"各縦線は、y値の差がこの値以下であれば、一つに結合されます。同様に、各横線も結合されます。デフォルトは3です。この値の代わりに、次元ごとに異なる値を指定することもできます。`snap_x_tolerance`" +" および `snap_y_tolerance` を使用してください。" + +#: ../../page.rst:480 011cdbb034de49f183f9a5162ec84132 +msgid "" +"Any two lines will be **joined** to one if the end and the start points " +"differ by no more than this value (in points). Default is 3. Instead of " +"this value, separate values can be specified for the dimensions using " +"`join_x_tolerance` and `join_y_tolerance`." +msgstr "" +"2つのラインの終点と始点の間の差がこの値(ポイント単位)以下の場合、それらは1つのラインに **結合されます** " +"。デフォルトは3です。この値の代わりに、`join_x_tolerance` と `join_y_tolerance` " +"を使用して寸法ごとに別々の値を指定できます。" + +#: ../../page.rst:482 66e8384a75fb449890e51e97fb123f66 +msgid "" +"Ignore a line if its length does not exceed this value (points). Default " +"is 3." +msgstr "この値(ポイント単位)を超えない場合、線を無視します。デフォルトは3です" + +#: ../../page.rst:484 f6a1a395da74402380cd680a2f589eea +msgid "" +"When combining lines into cell borders, orthogonal lines must be within " +"this value (points) to be considered intersecting. Default is 3. Instead " +"of this value, separate values can be specified for the dimensions using " +"`intersection_x_tolerance` and `intersection_y_tolerance`." +msgstr "" +"直交するラインをセルの境界線に結合する際に、それらの直交するラインはこの値(ポイント単位)以内である必要があります。デフォルトは3です。この値の代わりに、次元ごとに個別の値を指定することもできます。`intersection_x_tolerance`" +" と `intersection_y_tolerance` を使用します。" + +#: ../../page.rst:486 9a4667004af547ee95ce5529ee9c72b1 +msgid "" +"Characters will be combined into words only if their distance is no " +"larger than this value (points). Default is 3. Instead of this value, " +"separate values can be specified for the dimensions using " +"`text_x_tolerance` and `text_y_tolerance`." +msgstr "" +"文字が単語に結合されるのは、その距離がこの値(ポイント)を超えない場合のみです。デフォルト値は3です。この値の代わりに、次元ごとに別々の値を指定することもできます。`text_x_tolerance`" +" と `text_y_tolerance` を使用します。" + +#: ../../page.rst:488 cb59f0e842e74377952472cc2e16135c +msgid "" +"Specify a list of \"lines\" (i.e. pairs of :data:`point_like` objects) as" +" **additional**, \"virtual\" vector graphics. These lines may help with " +"table and / or cell detection and will not otherwise influence the " +"detection strategy. Especially, in contrast to parameters " +"`horizontal_lines` and `vertical_lines`, they will not prevent detecting " +"rows or columns in other ways. These lines will be treated exactly like " +"\"real\" vector graphics in terms of joining, snapping, intersectiing, " +"minimum length and containment in the `clip` rectangle. Similarly, lines " +"not parallel to any of the coordinate axes will be ignored." +msgstr "" +"以下のような「lines」(つまり、 :data:`point_like` オブジェクトのペア)のリストを追加の「仮想」ベクトル " +"グラフィックスとして指定します。これらの線は、テーブルと/またはセルの検出に役立ち、検出戦略には影響しません。特に、`horizontal_lines`" +" と `vertical_lines` " +"パラメータとは異なり、これらの線は他の方法で行または列を検出するのを妨げません。これらの線は、結合、スナップ、交差、最小長、およびクリップ矩形内への含有の点で、「実際の」ベクトル" +" グラフィックスとまったく同様に処理されます。同様に、座標軸のいずれかに平行でない線は無視されます。" + +#: ../../page.rst:492 a75a6520928d409c86d1ffe0d924c879 +msgid "" +"a `TableFinder` object that has the following significant attributes: * " +"`cells`: a list of **all bboxes** on the page, that have been identified " +"as table cells (across all tables). Each cell is a :data:`rect_like` " +"tuple `(x0, y0, x1, y1)` of coordinates or `None`. * `tables`: a list of " +"`Table` objects. This is `[]` if the page has no tables. Single tables " +"can be found as items of this list. But the `TableFinder` object itself " +"is also a sequence of its tables. This means that if `tabs` is a " +"`TableFinder` object, then table \"n\" is delivered by `tabs.tables[n]` " +"as well as by the shorter `tabs[n]`. * The `Table` object has the " +"following attributes: * ``bbox``: the bounding box of the table as a " +"tuple `(x0, y0, x1, y1)`. * ``cells``: bounding boxes of the table's " +"cells (list of tuples). A cell may also be `None`. * ``extract()``: " +"this method returns the text content of each table cell as a list of list" +" of strings. * ``to_markdown()``: this method returns the table as a " +"**string in markdown format** (compatible to Github). Supporting viewers " +"can render the string as a table. This output is optimized for **small " +"token** sizes, which is especially beneficial for LLM/RAG feeds. Pandas " +"DataFrames (see method `to_pandas()` below) offer an equivalent markdown " +"table output which however is better readable for the human eye. * " +"`to_pandas()`: this method returns the table as a `pandas " +"`_ `DataFrame " +"`_. DataFrames are " +"very versatile objects allowing a plethora of table manipulation methods " +"and outputs to almost 20 well-known formats, among them Excel files, CSV," +" JSON, markdown-formatted tables and more. `DataFrame.to_markdown()` " +"generates a Github-compatible markdown format optimized for human " +"readability. This method however requires the package `tabulate " +"`_ to be installed in addition to " +"pandas itself. * ``header``: a `TableHeader` object containing header " +"information of the table. * ``col_count``: an integer containing the " +"number of table columns. * ``row_count``: an integer containing the " +"number of table rows. * ``rows``: a list of `TableRow` objects " +"containing two attributes, ``bbox`` is the boundary box of the row, and " +"`cells` is a list of table cells contained in this row. * The " +"`TableHeader` object has the following attributes: * ``bbox``: the " +"bounding box of the header. * `cells`: a list of bounding boxes " +"containing the name of the respective column. * `names`: a list of " +"strings containing the text of each of the cell bboxes. They represent " +"the column names -- which are used when exporting the table to pandas " +"DataFrames, markdown, etc. * `external`: a bool indicating whether the " +"header bbox is outside the table body (`True`) or not. Table headers are " +"never identified by the `TableFinder` logic. Therefore, if `external` is " +"true, then the header cells are not part of any cell identified by " +"`TableFinder`. If `external == False`, then the first table row is the " +"header. Please have a look at these `Jupyter notebooks " +"`_, which cover standard situations like multiple tables on one " +"page or joining table fragments across multiple pages. .. caution:: The " +"lifetime of the `TableFinder` object, as well as that of all its tables " +"**equals the lifetime of the page**. If the page object is deleted or " +"reassigned, all tables are no longer valid. The only way to keep " +"table content beyond the page's availability is to **extract it** via " +"methods `Table.to_markdown()`, `Table.to_pandas()` or a copy of " +"`Table.extract()` (e.g. `Table.extract()[:]`). .. note:: Once a " +"table has been extracted to a **Pandas DataFrame** with `to_pandas()` it " +"is easy to convert to other file types with the **Pandas API**: - " +"table to Markdown, use `to_markdown " +"`_" +" - table to JSON, use: `to_json " +"`_" +" - table to Excel, use: `to_excel " +"`_" +" - table to CSV, use: `to_csv " +"`_" +" - table to HTML, use: `to_html " +"`_" +" - table to SQL, use: `to_sql " +"`_" +msgstr "" + +#: ../../page.rst:492 16932db7dc3b4eb29ccd45e227a4ec05 +msgid "a `TableFinder` object that has the following significant attributes:" +msgstr "`TableFinder` オブジェクトには、次の重要な属性があります:" + +#: ../../page.rst:494 a2ed489c6c8c42de855887a6ad816706 +msgid "" +"`cells`: a list of **all bboxes** on the page, that have been identified " +"as table cells (across all tables). Each cell is a :data:`rect_like` " +"tuple `(x0, y0, x1, y1)` of coordinates or `None`." +msgstr "" +"**cells:** " +"ページ上でテーブルのセルとして識別されたすべてのバウンディングボックスのリスト(すべてのテーブルを対象にします)。各セルは座標のタプル (x0, " +"y0, x1, y1) または `None` です。" + +#: ../../page.rst:495 d1f7203086fc4854afd952de86470e61 +msgid "" +"`tables`: a list of `Table` objects. This is `[]` if the page has no " +"tables. Single tables can be found as items of this list. But the " +"`TableFinder` object itself is also a sequence of its tables. This means " +"that if `tabs` is a `TableFinder` object, then table \"n\" is delivered " +"by `tabs.tables[n]` as well as by the shorter `tabs[n]`." +msgstr "" +"**tables:** Table オブジェクトのリスト。ページにテーブルが含まれていない場合、これは `[]` " +"になります。単一のテーブルはこのリストのアイテムとして見つけることができますが、`TableFinder` " +"オブジェクト自体もそのテーブルのシーケンスです。つまり、`tabs` が `TableFinder` オブジェクトである場合、テーブル番号 " +"\"n\" は `tabs.tables[n]` およびより短い `tabs[n]` によって提供されます。" + +#: ../../page.rst:498 4b154dc613f947cb88e167f4a06b0f33 +msgid "The `Table` object has the following attributes:" +msgstr "`Table` オブジェクトには次の属性があります:" + +#: ../../page.rst:500 f757cce6ee92448bac1014f23767d26a +#, fuzzy +msgid "``bbox``: the bounding box of the table as a tuple `(x0, y0, x1, y1)`." +msgstr "**bbox:** テーブルのバウンディングボックス(タプル)`(x0, y0, x1, y1)`。" + +#: ../../page.rst:501 99bdc3e89f7147caa5684ec85f10669d +#, fuzzy +msgid "" +"``cells``: bounding boxes of the table's cells (list of tuples). A cell " +"may also be `None`." +msgstr "**cells:** テーブルのセルのバウンディングボックス(タプル)のリスト。セルは `None` である場合もあります。" + +#: ../../page.rst:502 f7d09b27bdb64f14860acc3b029c3f91 +#, fuzzy +msgid "" +"``extract()``: this method returns the text content of each table cell as" +" a list of list of strings." +msgstr "**extract():** このメソッドは、各テーブルセルのテキストコンテンツを文字列のリストのリストとして返します。" + +#: ../../page.rst:503 a939bc63ffa54de1a873af990c9ddd5b +#, fuzzy +msgid "" +"``to_markdown()``: this method returns the table as a **string in " +"markdown format** (compatible to Github). Supporting viewers can render " +"the string as a table. This output is optimized for **small token** " +"sizes, which is especially beneficial for LLM/RAG feeds. Pandas " +"DataFrames (see method `to_pandas()` below) offer an equivalent markdown " +"table output which however is better readable for the human eye." +msgstr "" +"`to_markdown()` :このメソッドは、テーブルをGitHubと互換性のある **Markdown形式の文字列** " +"として返します。サポートされているビューアは、文字列をテーブルとしてレンダリングできます。この出力は、LLM/RAGフィードに特に有益な小さいトークンサイズに最適化されています。PandasのDataFrames(後述の" +" `to_pandas()` メソッドを参照)は、同等のMarkdownテーブル出力を提供しますが、人間の目にはより読みやすいです。" + +#: ../../page.rst:504 56d986576a94463da825b0753142c2d0 +#, fuzzy +msgid "" +"`to_pandas()`: this method returns the table as a `pandas " +"`_ `DataFrame " +"`_. DataFrames are " +"very versatile objects allowing a plethora of table manipulation methods " +"and outputs to almost 20 well-known formats, among them Excel files, CSV," +" JSON, markdown-formatted tables and more. `DataFrame.to_markdown()` " +"generates a Github-compatible markdown format optimized for human " +"readability. This method however requires the package `tabulate " +"`_ to be installed in addition to " +"pandas itself." +msgstr "" +"`to_pandas()` : このメソッドは、テーブルを `pandas " +"`_ `DataFrame " +"`_ として返します。DataFrame" +" は非常に多目的なオブジェクトであり、さまざまなテーブル操作メソッドや、Excel ファイル、CSV、JSON、markdown " +"形式のテーブルなど、約 20 種類のよく知られたフォーマットへの出力が可能です。`DataFrame.to_markdown()` " +"は、人間が読みやすいように最適化された Github 互換の markdown 形式を生成します。ただし、このメソッドには pandas " +"だけでなく、 [tablutate](https://pypi.org/project/tabulate/) " +"パッケージの追加インストールが必要です。" + +#: ../../page.rst:505 2fe23559963f483f90b628aeb166e42f +msgid "" +"``header``: a `TableHeader` object containing header information of the " +"table." +msgstr "**header:** テーブルのヘッダー情報を含む `TableHeader` オブジェクト。" + +#: ../../page.rst:506 9a8e9cae3f0446a09814edd2425fbb2a +#, fuzzy +msgid "``col_count``: an integer containing the number of table columns." +msgstr "**col_count:** テーブルの列数を含む整数。" + +#: ../../page.rst:507 c7552851166e4240a60bd234bd64b7ca +#, fuzzy +msgid "``row_count``: an integer containing the number of table rows." +msgstr "**row_count:** テーブルの行数を含む整数。" + +#: ../../page.rst:508 edb57dea09864a4bb7dd582bb1eee915 +#, fuzzy +msgid "" +"``rows``: a list of `TableRow` objects containing two attributes, " +"``bbox`` is the boundary box of the row, and `cells` is a list of table " +"cells contained in this row." +msgstr "" +"**rows:** `TableRow` オブジェクトのリストで、*bbox* は行の境界ボックスで、*cells* " +"はこの行に含まれるテーブルセルのリストです。" + +#: ../../page.rst:510 99b759a5609c45cf8676bf10d36b3dd4 +msgid "The `TableHeader` object has the following attributes:" +msgstr "`TableHeader` オブジェクトには次の属性があります:" + +#: ../../page.rst:512 5b12b031b3d245b1861bc666e19aa782 +msgid "``bbox``: the bounding box of the header." +msgstr "**bbox:** ヘッダーのバウンディングボックス。" + +#: ../../page.rst:513 994ce1dec2fd49d39ac5c1eab5f214f9 +msgid "" +"`cells`: a list of bounding boxes containing the name of the respective " +"column." +msgstr "**cells:** 各列の名前を含むバウンディングボックスのリスト。" + +#: ../../page.rst:514 4c137fcd2e6b4ec4a7a04f71de1e2f1f +msgid "" +"`names`: a list of strings containing the text of each of the cell " +"bboxes. They represent the column names -- which are used when exporting " +"the table to pandas DataFrames, markdown, etc." +msgstr "" +"**names:** 各セルのバウンディングボックス内のテキストを含む文字列のリスト。これらは列の名前を表します。これらはテーブルを pandas" +" DataFrame または CSV などにエクスポートする際に使用できます。" + +#: ../../page.rst:515 db42d168398a40d4bb872d6483676957 +msgid "" +"`external`: a bool indicating whether the header bbox is outside the " +"table body (`True`) or not. Table headers are never identified by the " +"`TableFinder` logic. Therefore, if `external` is true, then the header " +"cells are not part of any cell identified by `TableFinder`. If `external " +"== False`, then the first table row is the header." +msgstr "" +"**external:** ヘッダーのバウンディングボックスがテーブル本体の外部にあるかどうかを示すブール値(`True` " +"の場合、外部)。テーブルのヘッダーは TableFinder のロジックによって識別されないため、*external* が True " +"の場合、ヘッダーセルは `TableFinder` によって識別された任意のセルの一部ではありません。`external == False` " +"の場合、最初のテーブル行がヘッダーです。" + +#: ../../page.rst:517 88395b69cbac4cc982e3ecaf02b10e54 +msgid "" +"Please have a look at these `Jupyter notebooks " +"`_, which cover standard situations like multiple tables on one " +"page or joining table fragments across multiple pages." +msgstr "" +"これらの `Jupyter notebooks `_ " +"をご覧ください。これらのノートブックでは、1つのページに複数のテーブルがある場合や、複数のページにまたがるテーブル断片を結合するなど、標準的な状況がカバーされています。" + +#: ../../page.rst:519 10b41ad5dc8e4342b7e28af474508150 +msgid "" +"The lifetime of the `TableFinder` object, as well as that of all its " +"tables **equals the lifetime of the page**. If the page object is deleted" +" or reassigned, all tables are no longer valid." +msgstr "" + +#: ../../page.rst:521 d7e52c98b9f24c4cafa781ff281bb177 +msgid "" +"The only way to keep table content beyond the page's availability is to " +"**extract it** via methods `Table.to_markdown()`, `Table.to_pandas()` or " +"a copy of `Table.extract()` (e.g. `Table.extract()[:]`)." +msgstr "" + +#: ../../page.rst:525 b90b709e2c50442f9fab647656786df8 +msgid "" +"Once a table has been extracted to a **Pandas DataFrame** with " +"`to_pandas()` it is easy to convert to other file types with the **Pandas" +" API**:" +msgstr "" + +#: ../../page.rst:527 b3aee5d67c8b48049ce1b0ce5f8f0cb9 +msgid "" +"table to Markdown, use `to_markdown " +"`_" +msgstr "" + +#: ../../page.rst:528 f30554fd4e254290925725339bc9dd99 +msgid "" +"table to JSON, use: `to_json " +"`_" +msgstr "" + +#: ../../page.rst:529 5df3ccf96e104f07ae9e3312613a2290 +msgid "" +"table to Excel, use: `to_excel " +"`_" +msgstr "" + +#: ../../page.rst:530 628bf1e948334739bbf11d4f72c54bca +msgid "" +"table to CSV, use: `to_csv " +"`_" +msgstr "" + +#: ../../page.rst:531 46ea342a5507459092d02333ec3c8bf9 +msgid "" +"table to HTML, use: `to_html " +"`_" +msgstr "" + +#: ../../page.rst:532 0dd95ceb976842c8973b26a9dbf62899 +msgid "" +"table to SQL, use: `to_sql " +"`_" +msgstr "" + +#: ../../page.rst:537 b38371acb2944f7492c3060989dd22b1 +msgid "New in version 1.23.0" +msgstr "新機能バージョン1.23.0" + +#: ../../page.rst:538 f4ed84bde3c84cbf8f88f0bd85419704 +msgid "Changed in version 1.23.19: new argument `add_lines`." +msgstr "1.23.19で変更された点:新しい引数 `add_lines` 。" + +#: ../../page.rst:544 06557f142d63455aa8fe7500ceed99a5 +msgid "" +"There is also the `pdf2docx extract tables method`_ which is capable of " +"table extraction if you prefer." +msgstr "必要に応じて、テーブル抽出を行う `pdf2docx extract tables method`_ も利" + +#: ../../page.rst:549 a712bec698ca4879b1543ed3a637da15 +msgid "" +"PDF only: Add a \"rubber stamp\" annotation to e.g. indicate the " +"document's intended use (\"DRAFT\", \"CONFIDENTIAL\", etc.). The " +"parameter may be either an integer to select text from a predefined array" +" of standard texts or an image." +msgstr "" + +#: ../../page.rst:551 09fdd0c608344b09b2f92a24a122bcf1 +msgid "rectangle where to place the annotation." +msgstr "アノテーションを配置する矩形領域。" + +#: ../../page.rst:552 0a5baef86a0c43b4856a6e1f368743b8 +msgid "" +"The following options are available: * The id number (int) of the stamp " +"text. For available stamps see :ref:`StampIcons`. * A string specifying " +"an image file path. * A ``bytes``, ``bytearray`` or ``io.BytesIO`` " +"object for an image in memory. * A :ref:`Pixmap`." +msgstr "" + +#: ../../page.rst:552 295431e7160d40cba860273eb9bdd292 +msgid "The following options are available:" +msgstr "" + +#: ../../page.rst:554 9e500a32a72148eca055550f446780e7 +#, fuzzy +msgid "" +"The id number (int) of the stamp text. For available stamps see " +":ref:`StampIcons`." +msgstr "スタンプテキストのID番号。使用可能なスタンプについては :ref:`StampIcons` を参照してください。" + +#: ../../page.rst:556 fa01387b8c6e4d36a9135715d18de05a +msgid "A string specifying an image file path." +msgstr "" + +#: ../../page.rst:558 0b7af2d61f9f442d9f2eb3b6a2e599e1 +msgid "" +"A ``bytes``, ``bytearray`` or ``io.BytesIO`` object for an image in " +"memory." +msgstr "" + +#: ../../page.rst:560 1cf28c6d0d68474cbc6fc9e428d6aff4 +msgid "A :ref:`Pixmap`." +msgstr "" + +#: ../../page.rst:562 8840f0a2cfc24d4c85537d6840788773 +msgid "**Text-based stamps**" +msgstr "" + +#: ../../page.rst:564 78b6400986c74cc08d79b27755fb7520 +msgid "" +":attr:`Annot.rect` is automatically calculated as the largest rectangle " +"with an aspect ratio of ``width:height = 3.8`` that fits in the provided " +"``rect``. Its position is vertically and horizontally centered." +msgstr "" + +#: ../../page.rst:565 8dfc4069af8542acb4c4a2ed381665a0 +msgid "The font chosen is \"Times Bold\" and the text will be upper case." +msgstr "選択されるフォントは「Times Bold」で、テキストは大文字になります。" + +#: ../../page.rst:566 542dc89152d14966a8cbfddc2438ea7a +#, fuzzy +msgid "" +"The appearance can be modified using :meth:`Annot.set_opacity` and by " +"setting the \"stroke\" color. By PDF specification, stamp annotations " +"have no \"fill\" color." +msgstr "" +"外観は :meth:`Annot.set_opacity` を使用して変更でき、\"stroke\" " +"色を設定することができます(「fill」色はサポートされていません)。" + +#: ../../page.rst:570 0ef51375d1544ebb86837770241f2396 +msgid "**Image-based stamps**" +msgstr "" + +#: ../../page.rst:572 5829f548dcc74709b9c6611b7b67d095 +msgid "" +"The image is scaled to fit into the rectangle `rect` such that the " +"image's center and the center of `rect` coincide. The aspect ratio of the" +" image is preserved, so the image may not fill the entire rectangle. " +"However, at least one of the given rectangle's width or height are fully " +"covered." +msgstr "" + +#: ../../page.rst:573 d96d920f471744c2b865aaa647bcb4d6 +msgid "" +"The annotation can be modified via :meth:`Annot.set_opacity`. This method" +" therefore is a way to display images transparently even if no alpha " +"channel is present." +msgstr "" + +#: ../../page.rst:574 28ad3d421ea04a3c8dab3e88531b04b3 +msgid "Setting colors has no effect on image stamps." +msgstr "" + +#: ../../page.rst:575 aa63a66ddca74f08bc0a8bb81d515b56 +msgid "" +"Rotating image-based stamps **is not supported**. Setting the rotation " +"may lead to unexpected results." +msgstr "" + +#: ../../page.rst:579 0c5ed97b356f46dd8de0a7fd5c6f9a34 +msgid "" +"PDF only: Add a PDF Form field (\"widget\") to a page. This also **turns " +"the PDF into a Form PDF**. Because of the large amount of different " +"options available for widgets, we have developed a new class " +":ref:`Widget`, which contains the possible PDF field attributes. It must " +"be used for both, form field creation and updates." +msgstr "" +"PDFのみ:ページにPDFフォームフィールド(ウィジェット)を追加します。これにより、PDFがフォームPDFに変換されます。ウィジェットにはさまざまなオプションがあるため、フォームフィールドの作成と更新の両方に使用する必要があるため、可能なPDFフィールド属性を含む新しいクラス" +" :ref:`Widget` を開発しました。" + +#: ../../page.rst:581 51214c25c3d0436caeb09dfe9b797633 +msgid "a :ref:`Widget` object which must have been created upfront." +msgstr "事前に作成されている必要がある :ref:`Widget` オブジェクト。" + +#: ../../page.rst:584 2d0f2a345b054888b02bc8c76d40fb65 +msgid "a widget annotation." +msgstr "ウィジェットアノテーション。" + +#: ../../page.rst:588 76be8fa7c33c4d569695619e472103b1 +msgid "" +"The removal will now include any bound 'Popup' or response annotations " +"and related objects (changed in v1.16.6)." +msgstr "v1.16.6 で変更: 削除操作には、現在は関連する 'Popup' や応答アノテーションおよび関連するオブジェクトも含まれます。" + +#: ../../page.rst:590 ae2072c76db946bd829a24ad54fb47d2 +msgid "PDF only: Delete annotation from the page and return the next one." +msgstr "PDFのみ:ページから注釈を削除し、次の注釈を返します。" + +#: ../../page.rst:592 5bfd6638ad47429ea191d4eae833d900 +msgid "the annotation to be deleted." +msgstr "削除するアノテーション。" + +#: ../../page.rst:596 c207ee0e3d0546df96a916745e7c80d9 +msgid "" +"the annotation following the deleted one. Please remember that physical " +"removal requires saving to a new file with garbage > 0." +msgstr "削除された注釈の後に続く注釈。物理的な削除には、ガベージ> 0で新しいファイルに保存する必要があることを覚えておいてください。" + +#: ../../page.rst:600 d6ace6c66a3f4bf189522f75ebec56d2 +msgid "PDF only: Delete field from the page and return the next one." +msgstr "PDFのみ:ページからフィールドを削除し、次のフィールドを返します。" + +#: ../../page.rst:602 f6581342bd364c73b61fbdb7f0948d61 +msgid "the widget to be deleted." +msgstr "削除するウィジェット。" + +#: ../../page.rst:605 ../../page.rst:1925 ../../page.rst:2237 +#: 498cf6d63ef74a7ea4599842cf31d132 c45ad8706e394affa463fa67ff0a65b5 +#: cd59e4459aac4929b101b288eef60e44 +msgid ":ref:`Widget`" +msgstr "" + +#: ../../page.rst:606 8a0c76914fd8417a83a90dc1ba87de59 +msgid "" +"the widget following the deleted one. Please remember that physical " +"removal requires saving to a new file with garbage > 0." +msgstr "削除されたウィジェットの後に続くウィジェット。物理的な削除には、ガベージ> 0で新しいファイルに保存する必要があることを覚えておいてください。" + +#: ../../page.rst:610 b4e7010005f44db0bac207f84fffcef5 +msgid "(New in v1.18.4)" +msgstr "v1.18.4で変更" + +#: ../../page.rst:617 2f3972bdd1e349cc893cec8efb4e5f68 +msgid "" +"PDF only: Delete the specified link from the page. The parameter must be " +"an **original item** of :meth:`get_links()`, see " +":ref:`link_dict_description`. The reason for this is the dictionary's " +"*\"xref\"* key, which identifies the PDF object to be deleted." +msgstr "" +"PDFのみ:ページから指定したリンクを削除します。パラメータは :meth:`get_links()` " +"の元のアイテムである必要があります(以下参照)。これは辞書の *「xref」* キーがPDFオブジェクトを識別するための理由です。" + +#: ../../page.rst:619 2a272359bf744f9da47b8eb957b39a0e +msgid "the link to be deleted." +msgstr "削除するリンクです。" + +#: ../../page.rst:623 857df9b440cb45a293d89cdc047244b1 +msgid "" +"PDF only: Insert a new link on this page. The parameter must be a " +"dictionary of format as provided by :meth:`get_links()`, see " +":ref:`link_dict_description`." +msgstr "" +"PDFのみ:このページに新しいリンクを挿入します。パラメータは :meth:`get_links()` " +"で提供される形式の辞書である必要があります(以下参照)。" + +#: ../../page.rst:625 be4800a60e324de6a5d51567284f81c8 +msgid "the link to be inserted." +msgstr "挿入するリンクです。" + +#: ../../page.rst:629 b0c4a65cee99442aaf6d3f5ebe136167 +msgid "" +"PDF only: Modify the specified link. The parameter must be a (modified) " +"**original item** of :meth:`get_links()`, see " +":ref:`link_dict_description`. The reason for this is the dictionary's " +"*\"xref\"* key, which identifies the PDF object to be changed." +msgstr "" +"PDFのみ:指定されたリンクを変更します。パラメータは :meth:`get_links()` (以下参照)の **元のアイテム** " +"である必要があります(変更された場合)。これは辞書の *「xref」* キーがPDFオブジェクトを識別するための理由です。" + +#: ../../page.rst:631 81a61a22391e4ea4a23f0688a09e7386 +msgid "the link to be modified." +msgstr "変更するリンクです。" + +#: ../../page.rst:633 9220974635574c3591cf897be8fbaeb4 +msgid "" +"If updating / inserting a URI link (`\"kind\": LINK_URI`), please make " +"sure to start the value for the `\"uri\"` key with a disambiguating " +"string like `\"http://\"`, `\"https://\"`, `\"file://\"`, `\"ftp://\"`, " +"`\"mailto:\"`, etc. Otherwise -- depending on your browser or other " +"\"consumer\" software -- unexpected default assumptions may lead to " +"unwanted behaviours." +msgstr "" +"URIリンク(`\"kind\": LINK_URI`)を更新/挿入する場合は、\"uri\"キーの値を必ず " +"`「http://」`、`「https://」`、`「file://」` " +"、`「ftp://」`、「mailto:」などの区別可能な文字列で始めるようにしてください。そうしないと、ブラウザや他の「コンシューマ」ソフトウェアによって、予期しないデフォルトの仮定が不要な動作につながる可能性があります。" + +#: ../../page.rst:638 274c9d541ac74e5ea207d3f73a8dc91d +msgid "PDF only: Return the label for the page." +msgstr "PDFのみ:ページのラベルを返します。" + +#: ../../page.rst:642 33e5a11d36e1431daf1368c98a576fa2 +msgid "the label string like \"vii\" for Roman numbering or \"\" if not defined." +msgstr "ローマ数字の「vii」などのラベル文字列、または定義されていない場合は \"\"。" + +#: ../../page.rst:646 adaf8b7f2475449f90e8cd8ec54e039b +msgid "New in v1.18.6" +msgstr "v1.18.6で新規追加" + +#: ../../page.rst:652 4a923364460045d5a15187ef3fe102fc +msgid "Retrieves **all** links of a page." +msgstr "ページの **すべて** のリンクを取得します。" + +#: ../../page.rst:655 bcaa9863f51a4098aceb69d06b292a29 +msgid "" +"A list of dictionaries. For a description of the dictionary entries, see " +":ref:`link_dict_description`. Always use this or the :meth:`Page.links` " +"method if you intend to make changes to the links of a page." +msgstr "" +"辞書のリスト。辞書エントリの説明については以下を参照してください。ページのリンクを変更する意図がある場合は、:meth:`Page.links` " +"メソッドまたはこれを常に使用してください。" + +#: ../../page.rst:659 8288bcddbb8148f7bcd061593ce751e1 +msgid "" +"Return a generator over the page's links. The results equal the entries " +"of :meth:`Page.get_links`." +msgstr "ページのリンクをイテレーターとして返します。結果は :meth:`Page.get_links` のエントリと同じです。" + +#: ../../page.rst:661 90c8aff7d9e74c24931c942581bd0ae3 +msgid "" +"a sequence of integers to down-select to one or more link kinds. Default " +"is all links. Example: *kinds=(pymupdf.LINK_GOTO,)* will only return " +"internal links." +msgstr "" +"1つ以上のリンク種別をダウン選択するための整数のシーケンス。デフォルトはすべてのリンクです。例:*kinds=(pymupdf.LINK_GOTO,)*" +" は内部リンクのみを返します。" + +#: ../../page.rst:664 41ba584d31334388b490e36defff9821 +msgid "an entry of :meth:`Page.get_links()` for each iteration." +msgstr "各イテレーションごとの :meth:`Page.get_links()` のエントリ。" + +#: ../../page.rst:668 ../../page.rst:693 ../../page.rst:708 +#: 44ffeea1857d4337b00b3dc93cfdcfa4 812b6e47ac2646d48041bf20b68f8a53 +#: b282370ccf544809b0846adeb39ab92a +msgid "New in v1.16.4" +msgstr "v1.16.4で新規追加" + +#: ../../page.rst:674 3a5b9b887ccc4b38902d0d3380b5be9d +msgid "Return a generator over the page's annotations." +msgstr "ページの注釈をイテレーターとして返します。" + +#: ../../page.rst:676 db4ae3ee5f9440749fabc50395b96538 +msgid "" +"a sequence of integers to down-select to one or more annotation types. " +"Default is all annotations. Example: `types=(pymupdf.PDF_ANNOT_FREETEXT, " +"pymupdf.PDF_ANNOT_TEXT)` will only return 'FreeText' and 'Text' " +"annotations." +msgstr "" +"1つ以上の注釈タイプをダウン選択するための整数のシーケンス。デフォルトはすべての注釈です。例:`types=(pymupdf.PDF_ANNOT_FREETEXT," +" pymupdf.PDF_ANNOT_TEXT)` は「FreeText」および「Text」注釈のみを返します。" + +#: ../../page.rst:679 e6a1f7b4662f4281be60566f06ee60ec +msgid "" +"an :ref:`Annot` for each iteration. .. caution:: You **cannot " +"safely update annotations** from within this generator. This is because " +"most annotation updates require reloading the page via `page = " +"doc.reload_page(page)`. To circumvent this restriction, make a list of " +"annotations xref numbers first and then iterate over these numbers::" +" In [4]: xrefs = [annot.xref for annot in " +"page.annots(types=[...])] In [5]: for xref in xrefs: ...:" +" annot = page.load_annot(xref) ...: annot.update()" +" ...: page = doc.reload_page(page) In [6]:" +msgstr "" + +#: ../../page.rst:679 db414be5c2864845bd7d6e78f019be7a +msgid "an :ref:`Annot` for each iteration." +msgstr "各イテレーションごとの :ref:`Annot`。" + +#: ../../page.rst:682 3133ff43da70418ba1788c56adc66272 +msgid "" +"You **cannot safely update annotations** from within this generator. This" +" is because most annotation updates require reloading the page via `page " +"= doc.reload_page(page)`. To circumvent this restriction, make a list of " +"annotations xref numbers first and then iterate over these numbers::" +msgstr "" +"このジェネレータ内から **アノテーションを安全に更新することはできません**。これは、ほとんどの注釈の更新にはpage = " +"doc.reload_page(page)を介してページを再読み込みする必要があるためです。この制限を回避するために、まず注釈のxref番号のリストを作成し、その後これらの番号を繰り返し処理します::" + +#: ../../page.rst:699 81535550c47247989662c901596ef387 +msgid "Return a generator over the page's form fields." +msgstr "フォームフィールドのジェネレーターを返します。" + +#: ../../page.rst:701 d983017f69ff441e9168756305961bc3 +msgid "" +"a sequence of integers to down-select to one or more widget types. " +"Default is all form fields. Example: " +"`types=(pymupdf.PDF_WIDGET_TYPE_TEXT,)` will only return 'Text' fields." +msgstr "" +"1つ以上のウィジェットタイプに選択を絞り込むための整数のシーケンス。デフォルトではすべてのフォームフィールドが対象です。例: " +"`types=(pymupdf.PDF_WIDGET_TYPE_TEXT,)` を指定すると 'Text' フィールドのみが返されます。" + +#: ../../page.rst:704 4a15f48736184017ad2467c5ed864950 +msgid "a :ref:`Widget` for each iteration." +msgstr "各イテレーションでの :ref:`Widget`" + +#: ../../page.rst:715 26c2638924b3489bb91702fbcbbe2e7b +msgid "" +"PDF only: Write the text of one or more :ref:`Textwriter` objects to the " +"page." +msgstr "PDF のみ: 1つ以上の :ref:`Textwriter` オブジェクトのテキストをページに書き込みます。" + +#: ../../page.rst:717 974bf33370534d90b49aa2ec2ca5c149 +msgid "" +"where to place the text. If omitted, the rectangle union of the text " +"writers is used." +msgstr "テキストを配置する場所。省略した場合、テキストライターの矩形の合併が使用されます。" + +#: ../../page.rst:718 93fc43ed5b744ab69350cb7ad552333f +msgid "" +"a non-empty tuple / list of :ref:`TextWriter` objects or a single " +":ref:`TextWriter`." +msgstr "1つ以上の :ref:`TextWriter` オブジェクトのタプル/リスト、または単一の :ref:`TextWriter` です。" + +#: ../../page.rst:719 ee380b54b2674d1e871ea8dea990e399 +msgid "set transparency, overwrites resp. value in the text writers." +msgstr "透明度を設定し、テキストライターの値を上書きします。" + +#: ../../page.rst:720 be65f4135d184a04893ca56113412b5f +msgid "set the text color, overwrites resp. value in the text writers." +msgstr "テキストの色を設定し、テキストライターの値を上書きします。" + +#: ../../page.rst:721 74d40e4a94e944e0889dd089382fd5cd +msgid "put the text in foreground or background." +msgstr "テキストを前景または背景に配置するかどうか。" + +#: ../../page.rst:722 0e248c98c8fd46cc849dcd8cbe869294 +msgid "maintain the aspect ratio." +msgstr "アスペクト比を保持するかどうか。" + +#: ../../page.rst:723 7c0466e3f39944cf9971d7f909291da0 +msgid "rotate the text by an arbitrary angle." +msgstr "テキストを任意の角度で回転します。" + +#: ../../page.rst:724 15bd09fd399d4f8f82973b5aaa8a588d +msgid "the :data:`xref` of an :data:`OCG` or :data:`OCMD`. (New in v1.18.4)" +msgstr "*(v1.18.4 で新たに追加)* :data:`OCG` または :data:`OCMD` の :data:`xref`。" + +#: ../../page.rst:726 e8d0a9b7e0074058a77c6636a480a8c0 +msgid "" +"Parameters *overlay, keep_proportion, rotate* and *oc* have the same " +"meaning as in :meth:`Page.show_pdf_page`." +msgstr "" +"*overlay、keep_proportion、rotate、oc* パラメーターは :meth:`Page.show_pdf_page` " +"と同じ意味を持ちます。" + +#: ../../page.rst:730 e81046c45a374a3d8d6033bc731a5449 +msgid "New in v1.16.18" +msgstr "v1.16.18 で追加された新機能です。" + +#: ../../page.rst:754 d4c8cb87f85641eab2bbdc9bfafd9e14 +#, fuzzy +msgid "" +"PDF only: Insert text lines starting at :data:`point_like` ``point``. See" +" :meth:`Shape.insert_text`." +msgstr "" +"PDFのみ: :data:`point_like` *point* からテキストを挿入します。:meth:`Shape.insert_text` " +"を参照してください。" + +#: ../../page.rst:758 ../../page.rst:787 ../../page.rst:881 ../../page.rst:906 +#: ../../page.rst:931 ../../page.rst:955 ../../page.rst:979 ../../page.rst:1004 +#: ../../page.rst:1028 ../../page.rst:1053 ../../page.rst:1077 +#: ../../page.rst:1102 ../../page.rst:1127 02b5170904bd4f99af539705a7cc7977 +#: 2382959e8b234de98a12d43489b1562d 38788041f0d2408c9e231772bc8f0669 +#: 587229303e2945c883d8b563095c89e0 60e96af2414d4967a924e4bea3cfd6be +#: 6596c22282f94bb5bea1d4416ba6fad1 7758fbd5966c4ee6a58f951670cb510a +#: 7fb78df643f4460a94b58b4a62d612f1 9727b4112dfe459fb816f36ad2176a3d +#: a161493d6c33490597f55ed7743ab9f4 af6fca8ec2db4c2ba79d0123bda84c11 +#: c88d570c43a146efafa45f5f0d3389b5 cd339ccd076840298b9e8e4730627be5 +msgid "Changed in v1.18.4" +msgstr "v1.18.4 で変更" + +#: ../../page.rst:783 e3908054229940c584c7373a709a6f6b +msgid "" +"PDF only: Insert text into the specified :data:`rect_like` *rect*. See " +":meth:`Shape.insert_textbox`." +msgstr "" +"PDF のみ:指定された :data:`rect_like` の *rect* " +"にテキストを挿入します。:meth:`Shape.insert_textbox` を参照してください。" + +#: ../../page.rst:805 c30fb417aeb04df7a977911e10637750 +msgid "" +"**PDF only:** Insert text into the specified rectangle. The method has " +"similarities with methods :meth:`Page.insert_textbox` and " +":meth:`TextWriter.fill_textbox`, but is **much more powerful**. This is " +"achieved by letting a :ref:`Story` object do all the required processing." +msgstr "" +"**PDFのみ:** 指定された矩形にテキストを挿入します。このメソッドは、:meth:`Page.insert_textbox` メソッドと " +":meth:`TextWriter.fill_textbox` メソッドと類似していますが、はるかに強力です。これは、:ref:`Story` " +"(ストーリー) オブジェクトに必要なすべての処理を行わせることで実現されます。" + +#: ../../page.rst:807 bb4e8495727141ab9cfb0d798e5501ce +#, fuzzy +msgid "" +"Parameter ``text`` may be a string as in the other methods. But it will " +"be **interpreted as HTML source** and may therefore also contain HTML " +"language elements -- including styling. The `css` parameter may be used " +"to pass in additional styling instructions." +msgstr "" +"パラメータ `text` は、他のメソッドと同様に文字列である場合があります。ただし、これは **HTMLソース** " +"として解釈され、そのためスタイリングを含むHTML言語要素も含まれる可能性があります。`css` " +"パラメータを使用して追加のスタイリング指示を渡すことができます。" + +#: ../../page.rst:809 93b1b192c795409a90b4f05d6832351d +#, fuzzy +msgid "" +"Automatic line breaks are generated at word boundaries. The \"soft " +"hyphen\" character `\"­\"` (or `­`) can be used to cause " +"hyphenation and thus may also cause line breaks. **Forced** line breaks " +"however are only achievable via the HTML tag ``
`` - ``\\n`` is " +"ignored and will be treated like a space." +msgstr "" +"自動的な改行は単語の境界で生成されます。 \"soft hyphen\" 文字 `\"­\"`(または `­` " +")を使用するとハイフネーションが引き起こされ、その結果改行も生じる場合があります。ただし、強制的な改行はHTMLタグ `
` - " +"`\"\\\\n\"` を使用してのみ実現可能であり、\"\n" +"\"は無視され、空白と同じように扱われます。" + +#: ../../page.rst:811 2f03392514de47e0a2ee5d0014e9abea +msgid "With this method the following can be achieved:" +msgstr "このメソッドでは、次のことが可能です。" + +#: ../../page.rst:813 5484dcc58f144b8e82ec5eea5dd1a10d +msgid "" +"Styling effects like bold, italic, text color, text alignment, font size " +"or font switching." +msgstr "太字、斜体、テキストの色、テキストの配置、フォントサイズ、またはフォントの切り替えなどのスタイル効果" + +#: ../../page.rst:814 91d0a8e0b994498e918cdf6a5d95c1d8 +msgid "" +"The text may include arbitrary languages -- **including right-to-left** " +"languages." +msgstr "テキストには、**右から左に書かれる言語を含む** 任意の言語を含めることができます。" + +#: ../../page.rst:815 baa66079b30f45f498e70324c07db8f4 +msgid "" +"Scripts like `Devanagari `_ and" +" several others in Asia have a highly complex system of ligatures, where " +"two or more unicodes together yield one glyph. The Story uses the " +"software package `HarfBuzz `_ , to deal with" +" these things and produce correct output." +msgstr "" +"`Devanagari `_ " +"やその他のアジアの文字など、2つ以上のUnicodeが一つのグリフに変換される複雑なリガチャーのあるスクリプトは、正しい出力を生成するためにソフトウェアパッケージ" +" `HarfBuzz `_ を使用しています。" + +#: ../../page.rst:816 a767e7dd38ea48ac93aa4a0c8fd7d71f +msgid "" +"One can also **include images** via HTML tag `` -- the Story will " +"take care of the appropriate layout. This is an alternative option to " +"insert images, compared to :meth:`Page.insert_image`." +msgstr "" +"画像はHTMLタグ `` " +"を介して含めることができ、Storyが適切なレイアウトを担当します。これは、:meth:`Page.insert_image` " +"と比較した画像の挿入の代替オプションです。" + +#: ../../page.rst:817 94bedd3247f04a0cbb73c85d6c4b9f62 +msgid "" +"HTML tables (tag ``) may be included in the text and will be " +"handled appropriately." +msgstr "HTMLテーブル( `
` タグ)をテキストに含めることができ、適切に処理されます。" + +#: ../../page.rst:818 415030cfa2404e04925401570d22f543 +msgid "Links are automatically generated when present." +msgstr "リンクが存在する場合、自動的にリンクが生成されます。" + +#: ../../page.rst:820 e5157602b09b4c1f8e3f129d0e0b0659 +msgid "If content does not fit in the rectangle, the developer has two choices:" +msgstr "もしコンテンツが長方形に収まらない場合、開発者には2つの選択肢があります。:" + +#: ../../page.rst:822 e61c9426a0894088b5bc0cbafba02e1d +msgid "" +"**either** only be informed about this (and accept a no-op, just like " +"with the other textbox insertion methods)," +msgstr "**それとも** 、このことについての情報のみを受け取り(他のテキストボックスの挿入メソッドと同様に、何も起こらないことを受け入れる)、" + +#: ../../page.rst:823 63173984caf042f0a5454b1b1bc0b0d4 +msgid "**or** (`scale_low=0` - the default) scale down the content until it fits." +msgstr "**または** ( `scale_low=0` - デフォルト)コンテンツを収まるまで縮小します。" + +#: ../../page.rst:825 4df5a5c7e930453c8f146bdf455fa600 +msgid "rectangle on page to receive the text." +msgstr "テキストを配置する長方形の領域。" + +#: ../../page.rst:826 a0eaa447bb284b3bb74c0405422c1b7b +msgid "" +"the text to be written. Can contain a mixture of plain text and HTML tags" +" with styling instructions. Alternatively, a :ref:`Story` object may be " +"specified (in which case the internal Story generation step will be " +"omitted). A Story must have been generated with all required styling and " +"Archive information." +msgstr "" +"書き込むテキスト。スタイル指示を含むプレーンテキストとHTMLタグの混合物であることができます。代替として、 " +":ref:`Story`(ストーリー)オブジェクトを指定することもできます(その場合、内部のストーリー生成ステップは省略されます)。必要なすべてのスタイリングとアーカイブ情報で生成されたストーリーを指定する必要があります。" + +#: ../../page.rst:827 8cb5eb993afe44c193c4ea151350084e +#, fuzzy +msgid "" +"optional string containing additional CSS instructions. This parameter is" +" ignored if ``text`` is a Story." +msgstr "(オプション)追加のCSS指示を含む文字列。`text` がストーリーの場合、このパラメータは無視されます。" + +#: ../../page.rst:828 3fdf4a9d48d9448ca4f7f01dafc41a5b +msgid "" +"if necessary, scale down the content until it fits in the target " +"rectangle. This sets the down scaling limit. Default is 0, no limit. A " +"value of 1 means no down-scaling permitted. A value of e.g. 0.2 means " +"maximum down-scaling by 80%." +msgstr "コンテンツがターゲットの長方形に収まるまで必要に応じてコンテンツのスケーリングを行います。これはダウンスケーリングの制限を設定します。デフォルトは0で、制限なしを意味します。値が1の場合、ダウンスケーリングは許可されません。たとえば、0.2の値は、最大で80%のダウンスケーリングを意味します。" + +#: ../../page.rst:829 08878f73b4174dfaafca50ea0da0b6c9 +#, fuzzy +msgid "" +"an Archive object that points to locations where to find images or non-" +"standard fonts. If ``text`` refers to images or non-standard fonts, this " +"parameter is required. This parameter is ignored if ``text`` is a Story." +msgstr "画像や非標準のフォントの場所を指すアーカイブオブジェクト。テキストが画像や非標準のフォントを参照する場合、このパラメータが必要です。テキストがストーリーの場合、このパラメータは無視されます" + +#: ../../page.rst:830 817624538d3c491f95c29bcef7a16924 +msgid "" +"one of the values 0, 90, 180, 270. Depending on this, text will be " +"filled: - 0: top-left to bottom-right. - 90: bottom-left to top-right. -" +" 180: bottom-right to top-left. - 270: top-right to bottom-left. .. " +"image:: images/img-rotate.*" +msgstr "" + +#: ../../page.rst:830 2a02180f07cd4166829b23a8b91ee8c6 +msgid "one of the values 0, 90, 180, 270. Depending on this, text will be filled:" +msgstr "0、90、180、270のいずれかの値。これに応じて、テキストが埋められます:" + +#: ../../page.rst:832 6836108ad3ce4073b7b8b9a0517e2499 +msgid "0: top-left to bottom-right." +msgstr "0:左上から右下に。" + +#: ../../page.rst:833 984d5a497dbd492bb070b930b10461be +msgid "90: bottom-left to top-right." +msgstr "90:左下から右上に。" + +#: ../../page.rst:834 5fd909dd82a64971b5cd264cfa5ab103 +msgid "180: bottom-right to top-left." +msgstr "180:右下から左上に。" + +#: ../../page.rst:835 a991085aac294108a79a9f98653aaa00 +msgid "270: top-right to bottom-left." +msgstr "270:右上から左下に。" + +#: ../../page.rst:839 0f04094ba74e4bbe861fbafc64ff4094 +msgid "" +"the xref of an :data:`OCG` / :data:`OCMD` or 0. Please refer to " +":meth:`Page.show_pdf_page` for details." +msgstr "" +":data:`OCG` (オプションコンテンツグループ)/ :data:`OCMD` " +"(オプションコンテンツメタデータ)のxrefまたは0。詳細については、 :meth:`Page.show_pdf_page` を参照してください。" + +#: ../../page.rst:840 7b3f847efe234a29a685509d8ca8766d +msgid "" +"set the fill and stroke opacity of the content. Only values `0 <= opacity" +" < 1` are considered." +msgstr "コンテンツの塗りつぶしとストロークの不透明度を設定します。`0 <= opacity < 1` の値のみ考慮されます。" + +#: ../../page.rst:841 18709a93160c4ea487ef9808bed3654c +msgid "" +"put the text in front of other content. Please refer to " +":meth:`Page.show_pdf_page` for details." +msgstr "テキストを他のコンテンツの前に配置します。詳細については、:meth:`Page.show_pdf_page` を参照してください。" + +#: ../../page.rst:843 faaa1b9450d74141b35b9b28f6a0e7ff +#, fuzzy +msgid "" +"A tuple of floats `(spare_height, scale)`. - `spare_height`: -1 if " +"content did not fit, else >= 0. It is the height of the unused (still " +"available) rectangle stripe. Positive only if scale = 1 (no down-scaling " +"happened). - `scale`: down-scaling factor, 0 < scale <= 1. Please refer " +"to examples in this section of the recipes: :ref:`RecipesText_I_c`." +msgstr "" +"`spare_height`: " +"コンテンツが収まらなかった場合は-1、そうでない場合は0以上の値です。未使用の(まだ利用可能な)長方形のストライプの高さです。スケール=1の場合にのみ正の値です(ダウンスケーリングが行われなかった場合)。" + +#: ../../page.rst:843 c050301d92744179b84a38c265b4c1de +msgid "A tuple of floats `(spare_height, scale)`." +msgstr "浮動小数点数のタプル `(spare_height, scale)` です。" + +#: ../../page.rst:845 e47eab27231b4543bd2bad39b1400984 +msgid "" +"`spare_height`: -1 if content did not fit, else >= 0. It is the height of" +" the unused (still available) rectangle stripe. Positive only if scale = " +"1 (no down-scaling happened)." +msgstr "" +"`spare_height`: " +"コンテンツが収まらなかった場合は-1、そうでない場合は0以上の値です。未使用の(まだ利用可能な)長方形のストライプの高さです。スケール=1の場合にのみ正の値です(ダウンスケーリングが行われなかった場合)。" + +#: ../../page.rst:846 875cd22f27c741b48dd4f9d41bf971a5 +msgid "`scale`: down-scaling factor, 0 < scale <= 1." +msgstr "`scale`: ダウンスケーリングファクター、0 < scale <= 1。" + +#: ../../page.rst:848 4ae5ded90acb4f78b78dd292c72a143f +msgid "" +"Please refer to examples in this section of the recipes: " +":ref:`RecipesText_I_c`." +msgstr "このレシピの例を参照してください::ref:`RecipesText_I_c`。" + +#: ../../page.rst:852 d17a3f3e796841268773690bae0e39f9 +#, fuzzy +msgid "New in v1.23.8; rebased-only." +msgstr "v1.21.0で新たに追加" + +#: ../../page.rst:853 ffde9008418e4e76863d02fb93d02a03 +#, fuzzy +msgid "New in v1.23.9: `opacity` parameter." +msgstr "v1.19.1で変更:`sort` パラメータを追加" + +#: ../../page.rst:858 515a9d34c4e446dd899ffe8b847acb86 +msgid "**Drawing Methods**" +msgstr "**描画メソッド**" + +#: ../../page.rst:877 00308694f18d429ab832ff44993c3aaa +msgid "" +"PDF only: Draw a line from *p1* to *p2* (:data:`point_like` \\s). See " +":meth:`Shape.draw_line`." +msgstr "" +"PDF のみ:*p1* から *p2* までの直線を描画します(:data:`point_like` " +"\\s)。:meth:`Shape.draw_line` を参照してください。" + +#: ../../page.rst:902 313470ae51b740abab720b39136d455c +msgid "" +"PDF only: Draw a zigzag line from *p1* to *p2* (:data:`point_like` \\s). " +"See :meth:`Shape.draw_zigzag`." +msgstr "" +"PDF のみ:p1 から p2 までのジグザグ線を描画します(:data:`point_like` " +"\\s)。:meth:`Shape.draw_zigzag` を参照してください。" + +#: ../../page.rst:927 1112b73c66364a4785f9e683e7573cc4 +msgid "" +"PDF only: Draw a squiggly (wavy, undulated) line from *p1* to *p2* " +"(:data:`point_like` \\s). See :meth:`Shape.draw_squiggle`." +msgstr "" +"PDF のみ:*p1* から *p2* までの波線(うねり)を描画します(:data:`point_like` " +"\\s)。:meth:`Shape.draw_squiggle` を参照してください。" + +#: ../../page.rst:951 7b183b3297614735bc814d96cbfd97dd +msgid "" +"PDF only: Draw a circle around *center* (:data:`point_like`) with a " +"radius of *radius*. See :meth:`Shape.draw_circle`." +msgstr "" +"PDF のみ:*center* を中心に、半径 *radius* の円を描画します " +":data:`point_like`。:meth:`Shape.draw_circle` を参照してください。" + +#: ../../page.rst:975 7301c671e109481f9e83a7d776b4a2fe +msgid "" +"PDF only: Draw an oval (ellipse) within the given :data:`rect_like` or " +":data:`quad_like`. See :meth:`Shape.draw_oval`." +msgstr "" +"PDF のみ:指定された :data:`rect_like` または :data:`quad_like` " +"内に楕円を描画します。:meth:`Shape.draw_oval` を参照してください。" + +#: ../../page.rst:1000 990577977c784ba4af997f9ef7c622b6 +msgid "" +"PDF only: Draw a circular sector, optionally connecting the arc to the " +"circle's center (like a piece of pie). See :meth:`Shape.draw_sector`." +msgstr "" +"PDFのみ: " +"円形セクターを描画し、オプションで円の中心とアークを接続します(パイの一部のように)。:meth:`Shape.draw_sector` " +"を参照してください。" + +#: ../../page.rst:1024 36c61ce67c594c72a371b51e7000e291 +msgid "" +"PDF only: Draw several connected lines defined by a sequence of " +":data:`point_like` \\s. See :meth:`Shape.draw_polyline`." +msgstr "" +"PDFのみ: 一連の :data:`point_like` \\s " +"ポイントによって定義された接続された複数のラインを描画します。:meth:`Shape.draw_polyline` を参照してください。" + +#: ../../page.rst:1049 ac003f4e9849470697d81c2fa366b1ba +msgid "" +"PDF only: Draw a cubic Bézier curve from *p1* to *p4* with the control " +"points *p2* and *p3* (all are :data:`point_like` \\s). See " +":meth:`Shape.draw_bezier`." +msgstr "" +"PDFのみ: 制御ポイント *p2* および *p3* を使用して、*p1* から *p4* " +"へのキュービックベジエ曲線を描画します(すべてのポイントは :data:`point_like` " +"です)。:meth:`Shape.draw_bezier` を参照してください。" + +#: ../../page.rst:1073 d35edd81ecc442bea63861bbd25eafc9 +msgid "" +"PDF only: This is a special case of *draw_bezier()*. See " +":meth:`Shape.draw_curve`." +msgstr "PDFのみ: これは *draw_bezier()* の特別なケースです。:meth:`Shape.draw_curve` を参照してください。" + +#: ../../page.rst:1098 50418bf7a8534364808a1707e4fe246c +msgid "PDF only: Draw a rectangle. See :meth:`Shape.draw_rect`." +msgstr "PDFのみ: 四角形を描画します。:meth:`Shape.draw_rect` を参照してください。" + +#: ../../page.rst:1103 eddedc6605ce49809c24d097d2984e2e +msgid "Changed in v1.22.0: Added parameter *radius*." +msgstr "v1.22.0で変更:パラメーター *radius* を追加しました。" + +#: ../../page.rst:1123 43942d3402274a1eb4c1be77c9fd2cd1 +msgid "PDF only: Draw a quadrilateral. See :meth:`Shape.draw_quad`." +msgstr "PDFのみ: 四辺形を描画します。:meth:`Shape.draw_quad` を参照してください。" + +#: ../../page.rst:1141 b2be2fc678494eb39221ea6ff303b6ff +msgid "" +"PDF only: Add a new font to be used by text output methods and return its" +" :data:`xref`. If not already present in the file, the font definition " +"will be added. Supported are the built-in :data:`Base14_Fonts` and the " +"CJK fonts via **\"reserved\"** fontnames. Fonts can also be provided as a" +" file path or a memory area containing the image of a font file." +msgstr "" +"PDFのみ: テキスト出力メソッドで使用する新しいフォントを追加し、その :data:`xref` " +"を返します。ファイルにまだ存在しない場合、フォントの定義が追加されます。組み込みの :data:`Base14_Fonts` および CJK " +"フォントがサポートされており、**「予約済み」** " +"フォント名を介して使用できます。フォントはファイルパスまたはフォントファイルのイメージを含むメモリ領域として提供することもできます。" + +#: ../../page.rst:1143 cfdb8edc193d4e79b647f0ac2a0ec79b +msgid "" +"The name by which this font shall be referenced when outputting text on " +"this page. In general, you have a \"free\" choice here (but consult the " +":ref:`AdobeManual`, page 16, section 7.3.5 for a formal description of " +"building legal PDF names). However, if it matches one of the " +":data:`Base14_Fonts` or one of the CJK fonts, *fontfile* and *fontbuffer*" +" **are ignored**. In other words, you cannot insert a font via " +"*fontfile* / *fontbuffer* and also give it a reserved *fontname*. .. " +"note:: A reserved fontname can be specified in any mixture of upper or " +"lower case and still match the right built-in font definition: fontnames " +"\"helv\", \"Helv\", \"HELV\", \"Helvetica\", etc. all lead to the same " +"font definition \"Helvetica\". But from a :ref:`Page` perspective, these " +"are **different references**. You can exploit this fact when using " +"different *encoding* variants (Latin, Greek, Cyrillic) of the same font " +"on a page." +msgstr "" + +#: ../../page.rst:1143 5049de50208e4d3da2195ba71d574ec8 +msgid "" +"The name by which this font shall be referenced when outputting text on " +"this page. In general, you have a \"free\" choice here (but consult the " +":ref:`AdobeManual`, page 16, section 7.3.5 for a formal description of " +"building legal PDF names). However, if it matches one of the " +":data:`Base14_Fonts` or one of the CJK fonts, *fontfile* and *fontbuffer*" +" **are ignored**." +msgstr "" +"このフォントをこのページ上でテキスト出力する際に参照される名前。 " +"一般的に、ここで「自由な」選択ができます(ただし、:ref:`AdobeManual`、ページ16、セクション7.3.5を参照して正当なPDF名の形式的な説明を確認してください)。ただし、:data:`Base14_Fonts`" +" またはCJKフォントのいずれかと一致する場合、*fontfile* および *fontbuffer* は **無視されます。**" + +#: ../../page.rst:1145 13a924ba4f604a85a377adfd67e0e0cb +msgid "" +"In other words, you cannot insert a font via *fontfile* / *fontbuffer* " +"and also give it a reserved *fontname*." +msgstr "言い換えれば、フォントを *fontfile* / *fontbuffer* 経由で挿入し、予約されたフォント名も指定することはできません。" + +#: ../../page.rst:1147 b250be401d6345a88456aad2bb719047 +msgid "" +"A reserved fontname can be specified in any mixture of upper or lower " +"case and still match the right built-in font definition: fontnames " +"\"helv\", \"Helv\", \"HELV\", \"Helvetica\", etc. all lead to the same " +"font definition \"Helvetica\". But from a :ref:`Page` perspective, these " +"are **different references**. You can exploit this fact when using " +"different *encoding* variants (Latin, Greek, Cyrillic) of the same font " +"on a page." +msgstr "" +"予約されたフォント名は、大文字と小文字の組み合わせで指定でき、それでも適切な組み込みフォントの定義に一致します。フォント名「helv」、「Helv」、「HELV」、「Helvetica」などはすべて同じフォント定義「Helvetica」につながります。ただし、ページの観点からはこれらは" +" **異なる参照** です。同じフォントの異なる *エンコーディング* " +"バリアント(ラテン、ギリシャ、キリル文字など)をページ上で使用する場合、この事実を利用できます。" + +#: ../../page.rst:1149 568a3f9bcac44e76a9cae4c1619c4e19 +msgid "" +"a path to a font file. If used, *fontname* must be **different from all " +"reserved names**." +msgstr "フォントファイルへのパス。使用する場合、*fontname* は **すべての予約済みの名前と異なる必要があります**。" + +#: ../../page.rst:1151 ef58482888ff4fe88e1ef7e765450281 +msgid "" +"the memory image of a font file. If used, *fontname* must be **different " +"from all reserved names**. This parameter would typically be used with " +":attr:`Font.buffer` for fonts supported / available via :ref:`Font`." +msgstr "" +"フォントファイルのメモリイメージ。使用する場合、*fontname* は **すべての予約済みの名前と異なる** " +"必要があります。通常、このパラメーターは :attr:`Font.buffer` を介してサポート/利用可能な :ref:`Font` " +"に使用されます。" + +#: ../../page.rst:1153 6f5b3ae5c3a04c8496327fa8bc226a29 +msgid "" +"applicable for *fontfile* / *fontbuffer* cases only: enforce treatment as" +" a \"simple\" font, i.e. one that only uses character codes up to 255." +msgstr "" +"*fontfile* / *fontbuffer* " +"ケースにのみ適用可能:「シンプル」フォントとしての扱いを強制します。つまり、文字コードが255までしか使用しないフォントです。" + +#: ../../page.rst:1155 72755b828156477f800416c6306a1868 +msgid "" +"applicable for the \"Helvetica\", \"Courier\" and \"Times\" sets of " +":data:`Base14_Fonts` only. Select one of the available encodings Latin " +"(0), Cyrillic (2) or Greek (1). Only use the default (0 = Latin) for " +"\"Symbol\" and \"ZapfDingBats\"." +msgstr "" +":data:`Base14_Fonts` " +"の「Helvetica」、「Courier」、「Times」セットにのみ適用可能。利用可能なエンコーディングのうち、ラテン(0)、キリル文字(2)、ギリシャ文字(1)のいずれかを選択します。「Symbol」と「ZapfDingBats」についてはデフォルト(0" +" = ラテン)のみを使用してください。" + +#: ../../page.rst e88ec146e85641d885204a685c8289b0 +msgid "rytpe" +msgstr "" + +#: ../../page.rst:1157 ../../page.rst:2159 ../../page.rst:2243 +#: c33017a3f3ef496abb089682dc46c8c7 d7fed09e66ce41c29c43a88139d00fe7 +#: daffa429e0e64da79dd4902e22bed9bf +msgid "int" +msgstr "" + +#: ../../page.rst:1158 ed9b6c755819438a8077d348aeb84a27 +msgid "the :data:`xref` of the installed font." +msgstr "インストールされたフォントの :data:`xref`。" + +#: ../../page.rst:1160 5e42f6c62077464880eed293b24cc14f +msgid "" +"Built-in fonts will not lead to the inclusion of a font file. So the " +"resulting PDF file will remain small. However, your PDF viewer software " +"is responsible for generating an appropriate appearance -- and there " +"**exist** differences on whether or how each one of them does this. This " +"is especially true for the CJK fonts. But also Symbol and ZapfDingbats " +"are incorrectly handled in some cases. Following are the **Font Names** " +"and their correspondingly installed **Base Font** names:" +msgstr "" +"組み込みフォントはフォントファイルの追加を必要とせず、結果として生成されるPDFファイルは小さく保たれます。ただし、PDFビューアソフトウェアは適切な外観を生成する責任があり、それぞれがこれをどのように行うかには違い" +" " +"**があります**。これは特にCJKフォントに関して当てはまります。しかし、シンボルとZapfDingbatsも一部のケースで正しく扱われていないことがあります。以下は" +" **Font Names** とそれに対応するインストールされた **Base Font** 名です:" + +#: ../../page.rst:1162 9cb4875349f94ae19902b56255fc8c2b +msgid "**Base-14 Fonts** [#f1]_" +msgstr "**ベース14フォント** [1]" + +#: ../../page.rst:1165 ../../page.rst:1186 6720738e43554c81802da4c830a8ccba +#: 6986e89385d149fab8542671dd128e8e +msgid "**Font Name**" +msgstr "**フォント名**" + +#: ../../page.rst:1165 ../../page.rst:1186 7ac09aaf74a14575b0421131235693f6 +#: d0b80be471b14be0bf35b519683057e1 +msgid "**Installed Base Font**" +msgstr "**インストールされたベースフォント**" + +#: ../../page.rst:1165 ../../page.rst:1186 87418bfc0da8498488e9e16594122c42 +#: f9ae52653e654f8c9eb0457bf5e47eec +msgid "**Comments**" +msgstr "**コメント**" + +#: ../../page.rst:1167 022eb19bdd79464b8c4ee99162cfe972 +msgid "helv" +msgstr "" + +#: ../../page.rst:1167 95d6a47466e34411b8e7c0318f132d67 +msgid "Helvetica" +msgstr "" + +#: ../../page.rst:1167 ../../page.rst:1171 ../../page.rst:1175 +#: 77b1eed45e4344548d6ed7a4d83efd25 8c18d52fa34a4e69b8c9b8ad7c09f59c +#: eef309a4692d4e30ab53bb45615bd29e +msgid "normal" +msgstr "通常" + +#: ../../page.rst:1168 788497094287463b8bf205ae8593bc5e +msgid "heit" +msgstr "" + +#: ../../page.rst:1168 5eb371ec11504129849683da9800b08c +msgid "Helvetica-Oblique" +msgstr "" + +#: ../../page.rst:1168 ../../page.rst:1172 ../../page.rst:1176 +#: 3f7703efc3dd4a5bb43dbf83b457a891 c8a1d7a9a1ad4e7fb60af5db5a01933c +#: d86eead4ac5142f89b8f87f459b10017 +msgid "italic" +msgstr "斜体" + +#: ../../page.rst:1169 3f2281dfdcbd4def87c210af896d596c +msgid "hebo" +msgstr "" + +#: ../../page.rst:1169 1c4affb6221b4633a98102229d45dee1 +msgid "Helvetica-Bold" +msgstr "" + +#: ../../page.rst:1169 ../../page.rst:1173 ../../page.rst:1177 +#: 5682d76ea070418d925dbb57f9311ed4 66350aec58d349d09fea687d1c8831c7 +#: be9d473ebffa49e18b13cef0390b9b56 +msgid "bold" +msgstr "太字" + +#: ../../page.rst:1170 655ca753669644f8837390cefe236f53 +msgid "hebi" +msgstr "" + +#: ../../page.rst:1170 25bd0ccd507e464d986b308435fb6056 +msgid "Helvetica-BoldOblique" +msgstr "" + +#: ../../page.rst:1170 ../../page.rst:1174 ../../page.rst:1178 +#: 489765fd2ce3405e9aab42f0865104c4 58f0b1985d0a4d8cb13f811671963069 +#: f5aaea9a8b7543be87809b928ad50444 +msgid "bold-italic" +msgstr "太字斜体" + +#: ../../page.rst:1171 3247fbf800ed4b368627ec2084f18d83 +msgid "cour" +msgstr "" + +#: ../../page.rst:1171 73dafba6e80d4eb1b9cd1baa1bc639ca +msgid "Courier" +msgstr "" + +#: ../../page.rst:1172 d1a33abc313a4af3928a859637112332 +msgid "coit" +msgstr "" + +#: ../../page.rst:1172 17dbed1e903444f8b57f0f762513ba1b +msgid "Courier-Oblique" +msgstr "" + +#: ../../page.rst:1173 a9ab6dbba72d4f65939a2be0cd531ae4 +msgid "cobo" +msgstr "" + +#: ../../page.rst:1173 550ffcaf485a441986728a3f7f36f2b0 +msgid "Courier-Bold" +msgstr "" + +#: ../../page.rst:1174 504c1eb964ea429f989c8cbc2c49a302 +msgid "cobi" +msgstr "" + +#: ../../page.rst:1174 d86aa6bbd7094294ab03f6d7776224f0 +msgid "Courier-BoldOblique" +msgstr "" + +#: ../../page.rst:1175 bd3b8dd000cf4b218b432ecbc50fce6c +msgid "tiro" +msgstr "" + +#: ../../page.rst:1175 61ba13e1089748c2985086325c9f2c6a +msgid "Times-Roman" +msgstr "" + +#: ../../page.rst:1176 cbb2cbb175454c499374b756f177832e +msgid "tiit" +msgstr "" + +#: ../../page.rst:1176 e5ae54bf41a847848d5528bd554f0e86 +msgid "Times-Italic" +msgstr "" + +#: ../../page.rst:1177 76bd9fbfa2ca4d61bf8b209db352dde5 +msgid "tibo" +msgstr "" + +#: ../../page.rst:1177 177eab996c8547119fb98e7cc7ebdc2b +msgid "Times-Bold" +msgstr "" + +#: ../../page.rst:1178 7905b1fce0b54bf9863fa59974eabd59 +msgid "tibi" +msgstr "" + +#: ../../page.rst:1178 8cf39f48d994451e8981e0c4de6cceac +msgid "Times-BoldItalic" +msgstr "" + +#: ../../page.rst:1179 30549b9f54c04294848e2d39d2f719b6 +msgid "symb" +msgstr "" + +#: ../../page.rst:1179 e856ae196edb492a995df1c00314ddac +msgid "Symbol" +msgstr "" + +#: ../../page.rst:1179 ../../page.rst:1180 d554ad306b7a44c4bd6be1c6fecf3c66 +#: dd6656eb6a614699b287d81e85634cec +msgid "[#f3]_" +msgstr "" + +#: ../../page.rst:1180 ba39d723a5b14152b295b99792b0d9a9 +msgid "zadb" +msgstr "" + +#: ../../page.rst:1180 844f3371b4874c3eacb106acdcfaaa3a +msgid "ZapfDingbats" +msgstr "" + +#: ../../page.rst:1183 1c8b9ab151614e1da52d77751af4107c +msgid "**CJK Fonts** [#f2]_ (China, Japan, Korea)" +msgstr "**CJKフォント** [2] (中国、日本、韓国)" + +#: ../../page.rst:1188 597c42b69a7743f896440e2c9134d9e4 +msgid "china-s" +msgstr "" + +#: ../../page.rst:1188 ea073181e8d6489dbd8dc287c19574a6 +msgid "Heiti" +msgstr "" + +#: ../../page.rst:1188 1c739a248a4d4f099ba8383eeacbc3ac +msgid "simplified Chinese" +msgstr "簡体字中国語" + +#: ../../page.rst:1189 0ed0e0fcd4c341efb42dccd4e2881065 +msgid "china-ss" +msgstr "" + +#: ../../page.rst:1189 f4e34ceb652545779a4e118b771f3e5b +msgid "Song" +msgstr "" + +#: ../../page.rst:1189 55a6740952c34dd69fe1283827ed256b +msgid "simplified Chinese (serif)" +msgstr "簡体字中国語(セリフ)" + +#: ../../page.rst:1190 fdc0a9e7e4214cf68e214e5755c1730b +msgid "china-t" +msgstr "" + +#: ../../page.rst:1190 2196243f1db84774b1d956b644e82c37 +msgid "Fangti" +msgstr "" + +#: ../../page.rst:1190 ecce1527276a48dc9f2acc796ac3f059 +msgid "traditional Chinese" +msgstr "繁体字中国語" + +#: ../../page.rst:1191 c51681df733d4692a8069d7f532f4e71 +msgid "china-ts" +msgstr "" + +#: ../../page.rst:1191 4a3e421d0ef448169f15eba6008a78b3 +msgid "Ming" +msgstr "" + +#: ../../page.rst:1191 ccdd11dc218a45938de6849aaa2f4b49 +msgid "traditional Chinese (serif)" +msgstr "繁体字中国語(セリフ)" + +#: ../../page.rst:1192 b826d71f57f348d3aef4b7f29cf99979 +msgid "japan" +msgstr "" + +#: ../../page.rst:1192 d89eb3d6c6ea408da7fdeb323ca36d1c +msgid "Gothic" +msgstr "" + +#: ../../page.rst:1192 0ff3d22714e6409d94965e5c28a63ad1 +msgid "Japanese" +msgstr "" + +#: ../../page.rst:1193 7ae29f221d244557af6ad879260878a3 +msgid "japan-s" +msgstr "" + +#: ../../page.rst:1193 2f6e726715dc491db0127e1df1b29277 +msgid "Mincho" +msgstr "" + +#: ../../page.rst:1193 1b9852853717404e9ec823331b4bd183 +msgid "Japanese (serif)" +msgstr "" + +#: ../../page.rst:1194 b3809400d6e24405b6bdb1c43c5bd8f5 +msgid "korea" +msgstr "" + +#: ../../page.rst:1194 883f13d777e848bfa319d36a099924dc +msgid "Dotum" +msgstr "" + +#: ../../page.rst:1194 2995f7cdc27c4109912a42a3b41e6279 +msgid "Korean" +msgstr "" + +#: ../../page.rst:1195 0a173da8c6f347a7a0576081a6786325 +msgid "korea-s" +msgstr "" + +#: ../../page.rst:1195 1c3e275378b5432dbf80a6b365953a76 +msgid "Batang" +msgstr "" + +#: ../../page.rst:1195 33bcb0dfb2534179b634faf9384a349d +msgid "Korean (serif)" +msgstr "" + +#: ../../page.rst:1211 9b28b2d898fc41d78cc64894c467059e +msgid "" +"PDF only: Put an image inside the given rectangle. The image may already " +"exist in the PDF or be taken from a pixmap, a file, or a memory area." +msgstr "PDFのみ:指定された矩形内に画像を配置します。画像はすでにPDF内に存在するか、ピクスマップ、ファイル、またはメモリ領域から取得できます。" + +#: ../../page.rst:1214 caf88ed456594f1b8d6120ca2dbaddcf +msgid "where to put the image. Must be finite and not empty." +msgstr "画像を配置する場所。有限で空でない必要があります。" + +#: ../../page.rst:1215 0b25f7eb7d3347bea80fa10d2782c95a +msgid "deprecated and ignored." +msgstr "非推奨であり、無視されます。" + +#: ../../page.rst:1216 8a6784f6f8994f6098c6ab8f05179155 +msgid "" +"name of an image file (all formats supported by MuPDF -- see " +":ref:`ImageFiles`)." +msgstr "画像ファイルの名前(MuPDFでサポートされているすべての形式 – :ref:`ImageFiles` を参照)。" + +#: ../../page.rst:1220 093fbee1731b4980a3dc9a434b23de1d +msgid "maintain the aspect ratio of the image." +msgstr "アスペクト比を保持するかどうか。" + +#: ../../page.rst:1222 7fc2324c33734d0f9e5c922f840e6267 +msgid "" +"image in memory -- to be used as image mask (alpha values) for the base " +"image. When specified, the base image must be provided as a filename or a" +" stream -- and must not be an image that already has a mask." +msgstr "" +"メモリ内の画像 -- " +"ベース画像のマスク(アルファ値)として使用されます。指定する場合、ベース画像はファイル名またはストリームとして提供する必要があります。また、既にマスクを持つ画像ではない必要があります。" + +#: ../../page.rst:1226 5424ec6498a84179978f3ad78d40fea1 +msgid "" +"(:data:`xref`) make image visibility dependent on this :data:`OCG` or " +":data:`OCMD`. Ignored after the first of multiple insertions. The " +"property is stored with the generated PDF image object and therefore " +"controls the image's visibility throughout the PDF." +msgstr "" +"(:data:`xref`) この画像の表示をこの :data:`OCG` または :data:`OCMD` " +"に依存させます。複数回の挿入の最初の後には無視されます。このプロパティは生成された PDF 画像オブジェクトに格納されるため、PDF " +"全体で画像の表示を制御します。" + +#: ../../page.rst:1231 3bfcf48c843c4c5e822db747ef9f09ec +msgid "see :ref:`CommonParms`." +msgstr ":ref:`CommonParms` を参照してください。" + +#: ../../page.rst:1232 554292f26f1f403f8aa520730fc7859f +msgid "a pixmap containing the image." +msgstr "画像を含むピクスマップ。" + +#: ../../page.rst:1233 5e9a51b175dc455cbd9b46d83815641c +msgid "" +"rotate the image. Must be an integer multiple of 90 degrees. Positive " +"values rotate anti-clockwise. If you need a rotation by an arbitrary " +"angle, consider converting the image to a PDF " +"(:meth:`Document.convert_to_pdf`) first and then use " +":meth:`Page.show_pdf_page` instead." +msgstr "" +"(v1.14.11で新機能)* " +"画像を回転させます。90度の整数倍である必要があります。正の値は反時計回りに回転します。任意の角度での回転が必要な場合は、まず画像をPDFに変換(:meth:`Document.convert_to_pdf`)し、:meth:`Page.show_pdf_page`" +" を使用することを検討してください。" + +#: ../../page.rst:1240 71585d3f34ee4f29bcffb1034a53ae60 +msgid "image in memory (all formats supported by MuPDF -- see :ref:`ImageFiles`)." +msgstr "メモリ内の画像(MuPDFでサポートされているすべての形式 – :ref:`ImageFiles` を参照)。" + +#: ../../page.rst:1243 8882d90364d1407eb43b03e86f478a4c +msgid "" +"the :data:`xref` of an image already present in the PDF. If given, " +"parameters `filename`, `pixmap`, `stream`, `alpha` and `mask` are " +"ignored. The page will simply receive a reference to the existing image." +msgstr "" +"PDF内にすでに存在する画像の :data:`xref` 。指定された場合、 `filename` 、 `Pixmap` 、 `stream` 、" +" `alpha` 、および `mask` パラメータは無視されます。ページは単純に既存の画像への参照を受け取ります。" + +#: ../../page.rst:1251 21c518190825418eab20993f3da6ffbd +msgid "" +"The `xref` of the embedded image. This can be used as the `xref` argument" +" for very significant performance boosts, if the image is inserted again." +msgstr "埋め込まれた画像の `xref` です。この値は、画像を再度挿入する場合のxref引数として使用でき、非常に大幅なパフォーマンス向上に役立ちます。" + +#: ../../page.rst:1256 67fdd5cf494f47478fb1a215525039eb +msgid "This example puts the same image on every page of a document::" +msgstr "この例では、ドキュメントのすべてのページに同じ画像を配置します:" + +#: ../../page.rst:1271 cee8d80cc9314928998b2c0ceea6e0fd +msgid "" +"The method detects multiple insertions of the same image (like in the " +"above example) and will store its data only on the first execution. This" +" is even true (although less performant), if using the default `xref=0`." +msgstr "" +"このメソッドは、同じ画像が複数回挿入される場合(上記の例のように)を検出し、データは最初の実行時にのみ保存されます。これは、デフォルトの " +"`xref=0` を使用している場合でも(性能は劣るが)同様です" + +#: ../../page.rst:1276 ca3862ea700340f99e7ec4a839e571a6 +msgid "" +"The method cannot detect if the same image had already been part of the " +"file before opening it." +msgstr "このメソッドは、ファイルを開く前に同じ画像がすでにファイルの一部であるかどうかを検出できません。" + +#: ../../page.rst:1280 7e3c094f99ee4af3bcbe01e822ace6f1 +msgid "" +"You can use this method to provide a background or foreground image for " +"the page, like a copyright or a watermark. Please remember, that " +"watermarks require a transparent image if put in foreground ..." +msgstr "このメソッドを使用して、ページの背景または前景画像(著作権表示や透かしのようなもの)を提供できます。ただし、前景に透明な画像が必要な場合は、そのことを覚えておいてください..." + +#: ../../page.rst:1285 fc72c486660b45fd82bbf8eef1f04144 +msgid "" +"The image may be inserted uncompressed, e.g. if a `Pixmap` is used or if " +"the image has an alpha channel. Therefore, consider using `deflate=True` " +"when saving the file. In addition, there are ways to control the image " +"size -- even if transparency comes into play. Have a look at " +":ref:`RecipesImages_O`." +msgstr "" +"画像は非圧縮で挿入されることがあります。たとえば、``Pixmap`` " +"を使用するか、画像にアルファチャンネルがある場合です。したがって、ファイルを保存する際には *deflate=True* " +"を使用することを検討してください。また、透明性が関与する場合でも、画像サイズを効果的に制御する方法が存在します。ドキュメンテーションの " +":ref:`RecipesImages_O` セクションをご覧ください。" + +#: ../../page.rst:1292 9d70103e5e4d48aa86fa7897f4bbdedc +msgid "" +"The image is stored in the PDF at its original quality level. This may be" +" much better than what you need for your display. Consider **decreasing " +"the image size** before insertion -- e.g. by using the pixmap option and " +"then shrinking it or scaling it down (see :ref:`Pixmap` chapter). The PIL" +" method `Image.thumbnail()` can also be used for that purpose. The file " +"size savings can be very significant." +msgstr "" +"画像はPDF内でその元の品質で保存されます。これは、ディスプレイに必要なものよりもはるかに優れている場合があります。挿入前に " +"**画像サイズを減少させる** " +"ことを検討してください。たとえば、Pixmapオプションを使用してから縮小または縮小することができます(:ref:`Pixmap` " +"の章を参照)。PILメソッドの *Image.thumbnail()* " +"もそのために使用できます。ファイルサイズの節約は非常に大きい場合があります。" + +#: ../../page.rst:1301 19054921957e4475930254e79829a684 +msgid "" +"Another efficient way to display the same image on multiple pages is " +"another method: :meth:`show_pdf_page`. Consult " +":meth:`Document.convert_to_pdf` for how to obtain intermediary PDFs " +"usable for that method." +msgstr "同じ画像を複数のページで効率的に表示する別の方法は、別のメソッドです::meth:`show_pdf_page`。そのメソッドで使用可能な中間のPDFを取得する方法については、:meth:`Document.convert_to_pdf`" + +#: ../../page.rst:1308 174cfad8dac3422fa1aa8af5f6941b99 +msgid "Changed in v1.14.1: By default, the image keeps its aspect ratio." +msgstr "v1.14.1で変更:デフォルトでは、画像はアスペクト比を保持します" + +#: ../../page.rst:1309 b16ac519a78e423b826fc692d1e479e1 +#, fuzzy +msgid "Changed in v1.14.11: Added args `keep_proportion`, `rotate`." +msgstr "v1.19.1で変更:`sort` パラメータを追加" + +#: ../../page.rst:1310 1d06a529b4134c8f80e72af90b76fd70 +msgid "Changed in v1.14.13:" +msgstr "v1.14.17で変更されました" + +#: ../../page.rst:1313 7fe6c3ed323f424f91494aa4a37d0b7f +#, fuzzy +msgid "" +"The image is now always placed **centered** in the rectangle, i.e. the " +"centers of image and rectangle are equal." +msgstr "v1.14.13で変更:画像は矩形の **中央に** 常に配置されます。つまり、画像と矩形の中心が等しいです。" + +#: ../../page.rst:1315 20112854c2d64eaab8fa3b8b5efb9bdb +msgid "Added support for `stream` as `io.BytesIO`." +msgstr "" + +#: ../../page.rst:1317 6c9255bf24a84d97a7cdd300170d4477 +msgid "" +"Changed in v1.17.6: Insertion rectangle no longer needs to have a non-" +"empty intersection with the page's :attr:`Page.cropbox` [#f5]_." +msgstr "v1.17.6で変更:挿入矩形はもはやページの :attr:`Page.cropbox` [#f5]_ と非空の交差を持つ必要はありません。" + +#: ../../page.rst:1320 8289824611bb4292866db7fa989dfd1a +#, fuzzy +msgid "Changed in v1.18.1: Added `mask` arg." +msgstr "v1.19.1で変更:`sort` パラメータを追加" + +#: ../../page.rst:1321 37dea0ca3e704b8195a5da49ae9ab9b8 +#, fuzzy +msgid "Changed in v1.18.3: Added `oc` arg." +msgstr "v1.19.1で変更:`sort` パラメータを追加" + +#: ../../page.rst:1322 80f881b404dc482bb8588a8d2e1bef0b +#, fuzzy +msgid "Changed in v1.18.13:" +msgstr "v1.18.17で変更" + +#: ../../page.rst:1324 816e04ec81c14e2c80e993a6c3cea6ca +#, fuzzy +msgid "Allow providing the image as the xref of an existing one." +msgstr "v1.18.13で変更:既存の画像のxrefとして画像を提供できるようになりました。" + +#: ../../page.rst:1325 48af6b717fee4dba843bc62c84acf897 +msgid "Added `xref` arg." +msgstr "" + +#: ../../page.rst:1326 c752c95f1d78413a88f08c925b7ecce4 +#, fuzzy +msgid "Return `xref` of stored image." +msgstr "画像の :data:`xref`。" + +#: ../../page.rst:1328 ba12bdf1aac94b9f9689d2109614acd8 +#, fuzzy +msgid "Changed in v1.19.3: deprecate and ignore `alpha` arg." +msgstr "*(v1.19.3で変更)* 非推奨。指定された場合、無視されます。" + +#: ../../page.rst:1341 fe0447fc76f0423a80b3ecac6839cd50 +msgid "Replace the image at xref with another one." +msgstr "xrefで指定された画像を別の画像で置き換えます。" + +#: ../../page.rst:1343 ../../page.rst:1368 1b33038c6ee3441395f8dfb708c7665c +#: 43cc5eb02ffa4c4da30b62b8b00abb66 +msgid "the :data:`xref` of the image." +msgstr "画像の :data:`xref`。" + +#: ../../page.rst:1344 d1a4c801374244e09171338aef674506 +msgid "the filename of the new image." +msgstr "新しい画像のファイル名。" + +#: ../../page.rst:1345 6d87a017d82040f2a81f74cd6385b75f +msgid "the :ref:`Pixmap` of the new image." +msgstr "新しい画像の :ref:`Pixmap`。" + +#: ../../page.rst:1346 945d6f45a57947d98a130b7e2e86749f +msgid "the memory area containing the new image." +msgstr "新しい画像を含むメモリ領域。" + +#: ../../page.rst:1348 45f68398b3eb4d1390d4c212b48b7f54 +msgid "" +"Arguments `filename`, `pixmap`, `stream` have the same meaning as in " +":meth:`Page.insert_image`, especially exactly one of these must be " +"provided." +msgstr "" +"`filename`、`pixmap`、`stream` の引数は、特に :meth:`Page.insert_image` " +"での意味と同じです。特に、これらのうちの1つだけを指定する必要があります。" + +#: ../../page.rst:1350 4faf8d6cfb0d4f8eba45531e474028ae +msgid "" +"This is a **global replacement:** the new image will also be shown " +"wherever the old one has been displayed throughout the file." +msgstr "これは **グローバルな置換** です:新しい画像は、古い画像がファイル全体で表示されていた場所でも表示されます。" + +#: ../../page.rst:1352 7a1d8a0dc96e48fda647ffb83eaa97d1 +msgid "" +"This method mainly exists for technical purposes. Typical uses include " +"replacing large images by smaller versions, like a lower resolution, " +"graylevel instead of colored, etc., or changing transparency." +msgstr "このメソッドは主に技術的な目的で存在しています。典型的な使用例には、大きな画像を解像度の低いバージョン、カラーではなくグレースケールなど、より小さなバージョンで置き換えることが含まれます。または透明度を変更することもあります。" + +#: ../../page.rst:1356 ../../page.rst:1380 17ec989e104f4b95822cfc83a85dc280 +#: b4e1a790d8ed4e22974e6b84fba51c41 +msgid "New in v1.21.0" +msgstr "v1.21.0で新たに追加" + +#: ../../page.rst:1366 14e832f315eb4986824ff5f4f4ac7931 +msgid "" +"Delete the image at xref. This is slightly misleading: actually the image" +" is being replaced with a small transparent :ref:`Pixmap` using above " +":meth:`Page.replace_image`. The visible effect however is equivalent." +msgstr "" +"画像のxrefを削除します。これはわずかに誤解を招くかもしれませんが、実際には、画像は上記の :meth:`Page.replace_image`" +" を使用して小さな透明な :ref:`Pixmap` で置き換えられます。しかし、視覚的な効果は同等です。" + +#: ../../page.rst:1370 a4b4d7becbf344339301e38dc2b7c7bc +msgid "" +"This is a **global replacement:** the image will disappear wherever the " +"old one has been displayed throughout the file." +msgstr "これは **グローバルな置換です:** 新しい画像は、古い画像がファイル全体で表示されていた場所でも表示されなくなります。" + +#: ../../page.rst:1372 484b23123a444de69b98dfed01566c31 +msgid "" +"If you inspect / extract a page's images by methods like " +":meth:`Page.get_images`, :meth:`Page.get_image_info` or " +":meth:`Page.get_text`, the replacing \"dummy\" image will be detected " +"like so `(45, 47, 1, 1, 8, 'DeviceGray', '', 'Im1', 'FlateDecode')` and " +"also seem to \"cover\" the same boundary box on the page." +msgstr "" +":meth:`Page.get_images`、:meth:`Page.get_image_info`、または " +":meth:`Page.get_text` " +"などのメソッドを使用してページの画像を調査/抽出する場合、置き換えられた「ダミー」画像は次のように検出されます`(45, 47, 1, 1, 8," +" 'DeviceGray', '', 'Im1', 'FlateDecode')` 、また同じ境界ボックスをページ上に「覆う」ように見えます。" + +#: ../../page.rst:1403 8308a4ff4f504e2fb4ad2ecf24470e38 +#, fuzzy +msgid "" +"Retrieves the content of a page in a variety of formats. Depending on the" +" ``flags`` value, this may include text, images and several other object " +"types. The method is a wrapper for multiple :ref:`TextPage` methods by " +"choosing the output option `opt` as follows:" +msgstr "" +"さまざまな形式でページのコンテンツを取得します。出力オプション `opt` を選択することで、複数の :ref:`TextPage` " +"メソッドのラッパーとなります。" + +#: ../../page.rst:1405 452aad37ecdc4dcf8afcd5b8cbe2ae0c +#, fuzzy +msgid "" +"\"text\" -- :meth:`TextPage.extractTEXT`, default. Always includes **text" +" only.**" +msgstr "“text” – :meth:`TextPage.extractTEXT`、デフォルト" + +#: ../../page.rst:1406 02a7df769f4141dd96b0618120183f52 +msgid "" +"\"blocks\" -- :meth:`TextPage.extractBLOCKS`. Includes text and **may** " +"include image meta information." +msgstr "" + +#: ../../page.rst:1407 434f558853dc4ef297a4bff847f30a94 +msgid "\"words\" -- :meth:`TextPage.extractWORDS`. Always includes **text only.**" +msgstr "" + +#: ../../page.rst:1408 3632e16a205d4e5e9a5f0cddd5460024 +msgid "\"html\" -- :meth:`TextPage.extractHTML`. May include text and images." +msgstr "" + +#: ../../page.rst:1409 2981b96bbe90453091a210dc3fce33bb +msgid "\"xhtml\" -- :meth:`TextPage.extractXHTML`. May include text and images." +msgstr "" + +#: ../../page.rst:1410 1d41d88d2597445689305faea90a2583 +msgid "\"xml\" -- :meth:`TextPage.extractXML`. Always includes **text only.**" +msgstr "" + +#: ../../page.rst:1411 e552781435524d03bcf6f26ba28a5e15 +msgid "\"dict\" -- :meth:`TextPage.extractDICT`. May include text and images." +msgstr "" + +#: ../../page.rst:1412 d11fe01ef22a4daba97be9950a45cc43 +msgid "\"json\" -- :meth:`TextPage.extractJSON`. May include text and images." +msgstr "" + +#: ../../page.rst:1413 bd296e0297d74b9b830e45fb6faa8409 +msgid "" +"\"rawdict\" -- :meth:`TextPage.extractRAWDICT`. May include text and " +"images." +msgstr "" + +#: ../../page.rst:1414 9c6c9e6fc7c745c3876c959ce393ce8a +msgid "" +"\"rawjson\" -- :meth:`TextPage.extractRAWJSON`. May include text and " +"images." +msgstr "" + +#: ../../page.rst:1416 ccf6702d5aa0424f9ce5a59cf04577e5 +#, fuzzy +msgid "" +"A string indicating the requested format, one of the above. A mixture of " +"upper and lower case is supported. If misspelled, option \"text\" is " +"silently assumed." +msgstr "要求される形式を示す文字列、上記のいずれか。大文字と小文字の組み合わせがサポートされています" + +#: ../../page.rst:1418 9142feb79e394e408bb80ef000e1cb99 +msgid "" +"restrict the extraction to this rectangle. If ``None`` (default), the " +"visible part of the page is taken. Any content (text, images) that is " +"**not fully contained** in ``clip`` will be completely omitted. To avoid " +"clipping altogether use ``clip=pymupdf.INFINITE_RECT()``. Only then the " +"extraction will contain all items. This parameter has **no effect** on " +"options \"html\", \"xhtml\" and \"xml\"." +msgstr "" + +#: ../../page.rst:1420 47940232bda448a5a84ea18382f62861 +msgid "" +"indicator bits to control whether to include images or how text should be" +" handled with respect to white spaces and :data:`ligatures`. See " +":ref:`TextPreserve` for available indicators and " +":ref:`text_extraction_flags` for default settings. (New in v1.16.2)" +msgstr "" +"画像を含めるか、テキストをどのように空白や :data:`ligatures` に対応させるかを制御するための指示ビット。 " +"使用可能な指示ビットについては、 :ref:`TextPreserve` を参照してください。 また、デフォルト設定については、 " +":ref:`text_extraction_flags` を参照してください。 (v1.16.2で新規追加)" + +#: ../../page.rst:1422 5830c1684d704db0a69122ab1a1dee32 +#, fuzzy +msgid "" +"use a previously created :ref:`TextPage`. This reduces execution time " +"**very significantly:** by more than 50% and up to 95%, depending on the " +"extraction option. If specified, the 'flags' and 'clip' arguments are " +"ignored, because they are textpage-only properties. If omitted, a new, " +"temporary textpage will be created." +msgstr "" +"(v1.19.0で新たに追加) 事前に作成したTextPageを使用します。これにより、実行時間が **非常に大幅に** " +"削減されます:抽出オプションに応じて50%以上、95%まで削減されます。指定した場合、 'flags' および 'clip' " +"引数は無視されます。テキストページ専用のプロパティであるためです。省略した場合、新しい一時的なテキストページが作成されます。" + +#: ../../page.rst:1424 9b2d391e568944a5beb94b59e95abd9c +msgid "" +"sort the output by vertical, then horizontal coordinates. In many cases, " +"this should suffice to generate a \"natural\" reading order. Has no " +"effect on (X)HTML and XML. For options \"blocks\", \"dict\", \"json\", " +"\"rawdict\", \"rawjson\", sorting happens by coordinates `(y1, x0)` of " +"the respective block bbox. For options \"words\" and \"text\", the text " +"lines are completely re-synthesized to follow the reading sequence and " +"appearance in the document -- which even establishes the original layout " +"to some extent." +msgstr "" + +#: ../../page.rst:1426 9d08f6f3b86e4bac9691d58c3a2dc750 +msgid "" +"use these characters as *additional* word separators with the \"words\" " +"output option (ignored otherwise). By default, all white spaces " +"(including non-breaking space `0xA0`) indicate start and end of a word. " +"Now you can specify more characters causing this. For instance, the " +"default will return `\"john.doe@outlook.com\"` as **one** word. If you " +"specify `delimiters=\"@.\"` then the **four** words `\"john\"`, " +"`\"doe\"`, `\"outlook\"`, `\"com\"` will be returned. Other possible uses" +" include ignoring punctuation characters `delimiters=string.punctuation`." +" The \"word\" strings will not contain any delimiting character. (New in " +"v1.23.5)" +msgstr "" +"これらの文字を、追加の単語の区切りとして、\"words\" " +"出力オプションで使用します(それ以外の場合は無視されます)。デフォルトでは、すべての空白(非改行スペース `0xA0` " +"を含む)が単語の開始と終了を示します。これにより、さらにこれを引き起こす文字を指定できます。例えば、デフォルトでは " +"\"john.doe@outlook.com\" は1つの単語として返されます。`delimiters=\"@.\"` " +"と指定すると、\"john\"、\"doe\"、\"outlook\"、\"com\" " +"の4つの単語が返されます。その他の可能な用途には、句読点を無視するための `delimiters = string.punctuation` " +"があります。\"word\" 文字列には、区切り文字は含まれません。 (v1.23.5で新たに追加)" + +#: ../../page.rst:1428 d97c42dd73cf44b69c3b867d5b46c8f4 +msgid "*str, list, dict*" +msgstr "" + +#: ../../page.rst:1429 aa4cc9fdf3c94379ac6a8f9eee9211cc +msgid "" +"The page's content as a string, a list or a dictionary. Refer to the " +"corresponding :ref:`TextPage` method for details." +msgstr "ページの内容を表す文字列、リスト、または辞書。詳細については対応するTextPageメソッドを参照してください。" + +#: ../../page.rst:1433 3ab1bbe9190a4158ab80a42f8d45f395 +msgid "" +"You can use this method as a **document conversion tool** from :ref:`any " +"supported document type` to one of TEXT, HTML, " +"XHTML or XML documents." +msgstr "" +"このメソッドを、:ref:`any supported document type` " +"からTEXT、HTML、XHTML、またはXMLドキュメントのいずれかに変換する **ドキュメント変換ツール** として使用できます。" + +#: ../../page.rst:1434 7ab0efc7d9074d6986ec414d7b427423 +msgid "" +"The inclusion of text via the *clip* parameter is decided on a by-" +"character level: a character becomes part of the output, if its bbox is " +"contained in `clip`. This **deviates** from the algorithm used in " +"redaction annotations: a character will be **removed if its bbox " +"intersects** any redaction annotation." +msgstr "" + +#: ../../page.rst:1438 ef4868405e3c4c62b05ec310996bfa02 +msgid "Changed in v1.19.0: added `textpage` parameter" +msgstr "v1.19.0で変更:`textpage` パラメータを追加" + +#: ../../page.rst:1439 6b7b4fb5d00f4f328d415d3d128830f2 +msgid "Changed in v1.19.1: added `sort` parameter" +msgstr "v1.19.1で変更:`sort` パラメータを追加" + +#: ../../page.rst:1440 fdbbed76abc9428997d44c91c1647c7c +msgid "" +"Changed in v1.19.6: added new constants for defining default flags per " +"method." +msgstr "v1.19.6で変更:各メソッドごとのデフォルトフラグを定義するための新しい定数を追加" + +#: ../../page.rst:1441 15282d01b33848c1859ed7578c596804 +#, fuzzy +msgid "Changed in v1.23.5: added `delimiters` parameter" +msgstr "v1.19.1で変更:`sort` パラメータを追加" + +#: ../../page.rst:1442 9ddf6015bf36464fbb48c09652673998 +msgid "" +"Changed in v1.24.11: changed the effect of `sort_True` for \"text\" and " +"\"words\" to closely follow natural reading sequence." +msgstr "" + +#: ../../page.rst:1452 5a360b6151a840dabaa9d16fcab011af +msgid "Retrieve the text contained in a rectangle." +msgstr "指定された矩形に含まれるテキストを取得します。" + +#: ../../page.rst:1454 88ad8c824e18426494ab18de0a207e22 +msgid "rect-like." +msgstr "矩形のようなもの。" + +#: ../../page.rst:1455 8f3368c15a84497bbe45a6a454e5fccb +msgid "" +"a :ref:`TextPage` to use. If omitted, a new, temporary textpage will be " +"created." +msgstr "使用する :ref:`TextPage`。省略した場合、新しい一時的なテキストページが作成されます。" + +#: ../../page.rst:1457 d9487bed71ee497c94721af0708893fb +#, fuzzy +msgid "" +"a string with interspersed linebreaks where necessary. It is based on " +"dedicated code (changed in v1.19.0). A typical use is checking the result" +" of :meth:`Page.search_for`: >>> rl = page.search_for(\"currency:\") >>>" +" page.get_textbox(rl[0]) 'Currency:' >>>" +msgstr "" +"必要に応じて改行が挿入された文字列。v1.19.0 で変更: " +"それは専用のコードに基づいています。典型的な使用例は、:meth:`Page.search_for` の結果をチェックすることです" + +#: ../../page.rst:1457 52ca51f4545a47a2a05b9db0471547f9 +msgid "" +"a string with interspersed linebreaks where necessary. It is based on " +"dedicated code (changed in v1.19.0). A typical use is checking the result" +" of :meth:`Page.search_for`:" +msgstr "" +"必要に応じて改行が挿入された文字列。v1.19.0 で変更: " +"それは専用のコードに基づいています。典型的な使用例は、:meth:`Page.search_for` の結果をチェックすることです" + +#: ../../page.rst:1466 7f177e688d9f4fc29242ffe745e69a04 +msgid "New in v1.17.7" +msgstr "新機能 v1.17.7" + +#: ../../page.rst:1467 81a55c6099654e378c111dd17aa154c7 +msgid "Changed in v1.19.0: add `textpage` parameter" +msgstr "v1.19.0 で変更: `textpage` パラメータを追加" + +#: ../../page.rst:1478 adbe39b30773441db3e7baa4cf775b66 +msgid "Create a :ref:`TextPage` for the page." +msgstr "ページ用の :ref:`TextPage` を作成します" + +#: ../../page.rst:1480 5b0cf9ded7f84b91aa9caf26fcf202a6 +msgid "" +"indicator bits controlling the content available for subsequent text " +"extractions and searches -- see the parameter of :meth:`Page.get_text`." +msgstr "後続のテキスト抽出と検索で使用可能なコンテンツを制御する指示ビット – :meth:`Page.get_text` のパラメータを参照してください。" + +#: ../../page.rst:1482 0037c15d3c5b44ff9f8b8c47ab25b23f +msgid "restrict extracted text to this area. (New in v1.17.7)" +msgstr "*(v1.17.7 で新機能)* 抽出されたテキストをこの領域に制限します。" + +#: ../../page.rst:1484 8f8e5c359c7742ffab8a38b67b433c4c +msgid ":ref:`TextPage`" +msgstr "" + +#: ../../page.rst:1488 6952e059cfc246a286b4a08861b8cadb +msgid "New in v1.16.5" +msgstr "v1.16.5 で新機能。" + +#: ../../page.rst:1489 c1ba15b30be74e12889195304dea7ce4 +msgid "Changed in v1.17.7: introduced `clip` parameter." +msgstr "v1.17.7 で変更: `clip` パラメータが導入されました。" + +#: ../../page.rst:1503 7f911d88a72744b1b229485d4e25336b +msgid "" +"**Optical Character Recognition** (**OCR**) technology can be used to " +"extract text data for documents where text is in a raster image format " +"throughout the page. Use this method to **OCR** a page for text " +"extraction." +msgstr "" +"**光学式文字認識** ( **OCR** " +")技術は、ページ全体でテキストがラスター画像形式であるドキュメントからテキストデータを抽出するために使用できます。このメソッドを使用して、テキストの抽出のためにページを" +" **OCR** します。" + +#: ../../page.rst:1505 ff5001b1bcff456b8148d592a1006daf +msgid "" +"This method returns a :ref:`TextPage` for the page that includes OCRed " +"text. MuPDF will invoke Tesseract-OCR if this method is used. Otherwise " +"this is a normal :ref:`TextPage` object." +msgstr "" +"OCRed テキストを含むページの :ref:`TextPage` を作成します。このメソッドを使用すると、MuPDF は Tesseract-" +"OCR を呼び出します。それ以外の場合、これは通常の :ref:`TextPage` オブジェクトです。" + +#: ../../page.rst:1507 2cade91f447846489a035c9db8c19497 +msgid "" +"indicator bits controlling the content available for subsequent test " +"extractions and searches -- see the parameter of :meth:`Page.get_text`." +msgstr "後続のテキスト抽出と検索に使用可能なコンテンツを制御する指示ビット – :meth:`Page.get_text` のパラメータを参照してください。" + +#: ../../page.rst:1508 31c3b501d1f34d8ebdafbf369eaee2a7 +msgid "" +"the expected language(s). Use \"+\"-separated values if multiple " +"languages are expected, \"eng+spa\" for English and Spanish." +msgstr "期待される言語。複数の言語が期待される場合は \"+\" で区切って指定します。たとえば英語とスペイン語の場合は \"eng+spa\" です。" + +#: ../../page.rst:1509 fbc77f5dd2a64c9580e379dca42b840b +msgid "" +"the desired resolution in dots per inch. Influences recognition quality " +"(and execution time)." +msgstr "インチ当たりのドット数で指定された解像度。認識品質(および実行時間)に影響を与えます。" + +#: ../../page.rst:1510 76846001509146cb928d95ea24c6addc +msgid "whether to OCR the full page, or just the displayed images." +msgstr "ページ全体を OCR するか、表示された画像のみを OCR するかを指定します。" + +#: ../../page.rst:1511 0da892b6642e40da9665478da7a3cf35 +msgid "" +"The name of Tesseract's language support folder `tessdata`. If omitted, " +"this information must be present as environment variable " +"`TESSDATA_PREFIX`. Can be determined by function :meth:`get_tessdata`." +msgstr "" +"Tesseract の言語サポートフォルダ `tessdata` の名前。省略した場合、この情報は環境変数 `TESSDATA_PREFIX` " +"として存在している必要があります。tessdata を取得する関数 :meth:`get_tessdata` によって決定できます。" + +#: ../../page.rst:1513 8e1aa2e10adf43be84b56ac4599bbd59 +msgid "" +"This method does **not** support a clip parameter -- OCR will always " +"happen for the complete page rectangle." +msgstr "このメソッドは clip パラメータをサポート **していない** ため、OCR は常に完全なページ矩形に対して行われます" + +#: ../../page.rst:1515 e893905f0abd451583502b8d296b098d +msgid "" +"a :ref:`TextPage`. Execution may be significantly longer than " +":meth:`Page.get_textpage`. For a full page OCR, **all text** will have " +"the font \"GlyphlessFont\" from Tesseract. In case of partial OCR, normal" +" text will keep its properties, and only text coming from images will " +"have the GlyphlessFont. .. note:: **OCRed text is only available** " +"to PyMuPDF's text extractions and searches if their `textpage` parameter " +"specifies the output of this method. `This Jupyter notebook " +"`_ walks through an example for using OCR " +"textpages." +msgstr "" + +#: ../../page.rst:1517 8bb21c9ca89640a8a4036d03c4d9fdc5 +msgid "" +"a :ref:`TextPage`. Execution may be significantly longer than " +":meth:`Page.get_textpage`." +msgstr ":ref:`TextPage`。実行時間は :meth:`Page.get_textpage` よりも大幅に長くなる場合があります。" + +#: ../../page.rst:1519 4ce634db83204890b32edcb9f8467239 +msgid "" +"For a full page OCR, **all text** will have the font \"GlyphlessFont\" " +"from Tesseract. In case of partial OCR, normal text will keep its " +"properties, and only text coming from images will have the GlyphlessFont." +msgstr "" +"フルページの OCR の場合、**すべてのテキスト** は Tesseract の \"GlyphlessFont\" になります。部分的な " +"OCR の場合、通常のテキストはそのプロパティを保持し、画像から来たテキストのみが GlyphlessFont になります。" + +#: ../../page.rst:1523 953ebed17b4c4b1f872503a17886772e +msgid "" +"**OCRed text is only available** to PyMuPDF's text extractions and " +"searches if their `textpage` parameter specifies the output of this " +"method." +msgstr "" +"OCRed textは、PyMuPDFのテキスト抽出と検索でのみ利用可能であり、その `TextPage` " +"パラメータがこのメソッドの出力を指定している場合にのみ利用できます。" + +#: ../../page.rst:1525 4e52cfd40e2145529ffb872a072bc357 +#, fuzzy +msgid "" +"`This Jupyter notebook `_ walks " +"through an example for using OCR textpages." +msgstr "" +"`このJupyter `_ ノートブックは、OCRテキストページを使用する例を説明します。" + +#: ../../page.rst:1529 5564269d89fd4527a9d8fac8f4a6e8a5 +msgid "New in v.1.19.0" +msgstr "v1.19.0 で新機能" + +#: ../../page.rst:1530 e3a425ead27c4c15b2ffb42572282313 +msgid "Changed in v1.19.1: support full and partial OCRing a page." +msgstr "v1.19.1 で変更: ページのフルと部分的な OCR をサポート" + +#: ../../page.rst:1537 f899149460444da892e7dd5050576760 +msgid "" +"Return the vector graphics of the page. These are instructions which draw" +" lines, rectangles, quadruples or curves, including properties like " +"colors, transparency, line width and dashing, etc. Alternative terms are " +"\"line art\" and \"drawings\"." +msgstr "ページのベクトルグラフィックスを返します。これらは線、四角形、四角形または曲線を描画するための命令で、色、透明度、線の幅、点線などのプロパティを含みます。代替用語は「ラインアート」と「ドローイング」です。" + +#: ../../page.rst:1539 ddea020aa19e4cfb9411b83e555d96a9 +msgid "" +"a list of dictionaries. Each dictionary item contains one or more single " +"draw commands belonging together: they have the same properties (colors, " +"dashing, etc.). This is called a **\"path\"** in PDF, so we adopted that " +"name here, but the method **works for all document types**." +msgstr "" +"辞書のリスト。各辞書アイテムには、同じプロパティ(色、破線など)を持つ1つ以上の単一の描画コマンドが含まれます。これらはPDFでは " +"**\"path\"** と呼ばれ、ここではその名前を採用していますが、このメソッドは **すべてのドキュメントタイプに対して機能します**。" + +#: ../../page.rst:1541 bd74116303164beeb44f4e4f8fcbf086 +msgid "" +"The path dictionary for fill, stroke and fill-stroke paths has been " +"designed to be compatible with class :ref:`Shape`. There are the " +"following keys:" +msgstr "" +"fill、stroke、fill-strokeパスのパス辞書は、:ref:`Shape` " +"クラスと互換性があるように設計されています。次のキーがあります:" + +#: ../../page.rst:1544 ../../page.rst:1609 ../../page.rst:1624 +#: 393408d2b822457095e3087546f62541 458dba2523d345e5a88ca32cea9f2563 +#: 71aa6f1d87a14dec9bc593c4852b8b92 +msgid "Key" +msgstr "キー" + +#: ../../page.rst:1544 ../../page.rst:1609 ../../page.rst:1624 +#: 13b51a65d0cb4aa6a596d32da8fd7e7d 42168f268feb43eb9865e11f0cbbf1b2 +#: c0187aad35ef4383a39f6386051c5813 +msgid "Value" +msgstr "値" + +#: ../../page.rst:1546 ../../page.rst:1611 26696677b08447ffbe0b9f71a9decd1b +#: d0b10a7040ac4b17b8132c59a297125a +msgid "closePath" +msgstr "" + +#: ../../page.rst:1546 ../../page.rst:1553 027accb388bc4b94959cfe7f7db803c4 +#: dc04d87b26674fe088c9f7cfb89d5523 +msgid "Same as the parameter in :ref:`Shape`." +msgstr ":ref:`Shape` のパラメーターと同じです。" + +#: ../../page.rst:1547 e1e087cdb53d426fad06bd4f0f085678 +msgid "color" +msgstr "" + +#: ../../page.rst:1547 a9b159506d3d4cf38106beda57e6d28c +msgid "Stroke color (see :ref:`Shape`)." +msgstr "ストロークカラー(:ref:`Shape` を参照)。" + +#: ../../page.rst:1548 a252e93f6e0b48ffa953488444e5a4bb +msgid "dashes" +msgstr "" + +#: ../../page.rst:1548 8bb14fa16e1845bf935f94c6d4736b01 +msgid "Dashed line specification (see :ref:`Shape`)." +msgstr "破線の仕様(:ref:`Shape` を参照)。" + +#: ../../page.rst:1549 ../../page.rst:1612 aabc659aa5db46d89350b0c21e42c0ec +#: e8709a0b56c94d548479899a2b3094a6 +msgid "even_odd" +msgstr "" + +#: ../../page.rst:1549 1692a5ead1db4cfe96c3a10c2b1a245e +msgid "Fill colors of area overlaps -- same as the parameter in :ref:`Shape`." +msgstr "領域のオーバーラップの塗りつぶし色(:ref:`Shape` を参照)。" + +#: ../../page.rst:1550 6b2d98f984794442816100bbcd18f6d4 +msgid "fill" +msgstr "" + +#: ../../page.rst:1550 ba3a0087c0984e058fa735b55901644d +msgid "Fill color (see :ref:`Shape`)." +msgstr "塗りつぶしカラー(:ref:`Shape` を参照)。" + +#: ../../page.rst:1551 ../../page.rst:1613 2a44968871e149419b600306017f1fef +#: 60e2eb406ea44234a799cfd34b5eb326 +msgid "items" +msgstr "" + +#: ../../page.rst:1551 3a6a1f2e61bf45a49e7f7860c8f095ad +msgid "List of draw commands: lines, rectangles, quads or curves." +msgstr "描画コマンド(直線、四角形、四角形、曲線など)のリスト。" + +#: ../../page.rst:1552 cb9b76e8cf524a5c98ca480839f3d46f +msgid "lineCap" +msgstr "" + +#: ../../page.rst:1552 df94fed78bb8464785ae39848ff3f6ce +msgid "Number 3-tuple, use its max value on output with :ref:`Shape`." +msgstr "3つの数値からなるタプル。出力時に :ref:`Shape` との最大値を使用します。" + +#: ../../page.rst:1553 a74d930f12b74359b983626712abe4bb +msgid "lineJoin" +msgstr "" + +#: ../../page.rst:1554 6eae801db51541a5b15273d2c97236f6 +msgid "fill_opacity" +msgstr "" + +#: ../../page.rst:1554 b9c24e4f81fb4f0a915896089b46ea5e +msgid "fill color transparency (see :ref:`Shape`). (New in v1.18.17)" +msgstr "v1.18.17で新しく追加された塗りつぶしカラーの透明度(:ref:`Shape` を参照)。" + +#: ../../page.rst:1555 1534d530487d404fb1cb27fcc65e6bad +msgid "stroke_opacity" +msgstr "" + +#: ../../page.rst:1555 01c66d6eb5194825a65ce561ef2386a6 +msgid "stroke color transparency (see :ref:`Shape`). (New in v1.18.17)" +msgstr "v1.18.17で新しく追加されたストロークカラーの透明度(:ref:`Shape` を参照)。" + +#: ../../page.rst:1556 ../../page.rst:1614 ../../page.rst:1626 +#: 130c57e63d4248269a2d24efddd1dc87 46412bde9048457dbb1dccd27401636d +#: 806a7134a13246b38ff024e7254d053a +msgid "rect" +msgstr "" + +#: ../../page.rst:1556 1336dc577f4b46768dd98f3adcc9849a +msgid "Page area covered by this path. Information only." +msgstr "このパスでカバーされるページ領域。情報のみ。" + +#: ../../page.rst:1557 ../../page.rst:1615 ../../page.rst:1627 +#: 110f5af1db4a400ab89318a54580360c a83adc3a34334bcca12f1710b2b3d8c3 +#: c3151b8cbcf44e8f827889ac3fb4f99c +msgid "layer" +msgstr "" + +#: ../../page.rst:1557 4cac8f9bb75c445ebada436459309ae7 +msgid "name of applicable Optional Content Group. (New in v1.22.0)" +msgstr "v1.22.0で新しく追加された適用可能なオプションコンテンツグループの名前" + +#: ../../page.rst:1558 ../../page.rst:1616 ../../page.rst:1628 +#: 32a388c0ae9e43b4864436ba5529bb51 6884400e8f7b455a84bd11dc8109d30d +#: a3159f4039944ea495f41a21ba9863b0 +msgid "level" +msgstr "" + +#: ../../page.rst:1558 edae31d7cf5044199b5f797d6674b0ef +msgid "the hierarchy level if `extended=True`. (New in v1.22.0)" +msgstr "v1.22.0で新しく追加された `extended=True` の場合の階層レベル" + +#: ../../page.rst:1559 098b52475c684292aa26bb7288ead6ce +msgid "seqno" +msgstr "" + +#: ../../page.rst:1559 8f2a52910e614ac0aba436bc05c8db59 +msgid "command number when building page appearance. (New in v1.19.0)" +msgstr "v1.19.0で新しく追加されたページ表示を構築する際のコマンド番号" + +#: ../../page.rst ../../page.rst:1560 ../../page.rst:1618 ../../page.rst:1633 +#: 0187845dfd2d4ee084a262cb36b8985c 0eadb3e4525c45bb86f3b1af78e7cf34 +#: 1158885c029f44aaa3aec86f864b90ff 16618b69d5694c7e8c3de116af457f29 +#: 2d3676885927439284814cb8621c96e1 3ad717fc68c14f8dbba07109137899c6 +#: 3bd25acd69dd4c2fb54e978b7031a741 3ccf929dae10448c8b83d52d1f9d984f +#: 54e233a919754060825591f98bd4c900 58befc90975f4205a4182a917cf36d8d +#: 67932413ef2145cba61ec47fa54f7b7e 6840933d410c401dbbd55c4201013c06 +#: 6ded2cd06d63442e8dfd90dfe1b81f98 9de10b64fe424db78eced2a5f76d03ae +#: b897b219782646a68f777df1d12f2e16 cfb41b6757a04aa08b31c12a2aca5952 +#: fc7913bd3618478385f9fbee9046c79a fd791ed75f8d4613941e44c51417f2cf +msgid "type" +msgstr "" + +#: ../../page.rst:1560 1209bf05f4134e49be33f9d80ee2c7fb +msgid "type of this path. (New in v1.18.17)" +msgstr "このパスのタイプ。 (v1.18.17で新規追加)" + +#: ../../page.rst:1561 ../../page.rst:1704 53eee255c71e4a8fb09a4bb18a1ef79f +#: 9dc965006ec748258c1a164c5bdc705a +msgid "width" +msgstr "" + +#: ../../page.rst:1561 1f2f3ef30724469bb3d678374e796801 +msgid "Stroke line width. (see :ref:`Shape`)." +msgstr "ストロークラインの幅(:ref:`Shape` を参照)。" + +#: ../../page.rst:1564 81ef9447d1ec4960929d0585a4608a21 +msgid "" +"Key `\"opacity\"` has been replaced by the new keys `\"fill_opacity\"` " +"and `\"stroke_opacity\"`. This is now compatible with the corresponding " +"parameters of :meth:`Shape.finish`. (Changed in v1.18.17)" +msgstr "" +"*(バージョン1.18.17で変更)* キー `\"opacity\"` は新しいキー `\"fill_opacity\"` および " +"\"stroke_opacity\"` に置き換えられました。これは、:meth:`Shape.finish` " +"の対応するパラメーターと互換性があるようになりました。" + +#: ../../page.rst:1567 b167ed1c31b143ac9dcf19ad183e37b4 +msgid "" +"For paths other than groups or clips, key `\"type\"` takes one of the " +"following values:" +msgstr "グループやクリップ以外のパスに対して、キー `\"type\"` は次のいずれかの値を取ります:" + +#: ../../page.rst:1569 3b3d69ec50114ee18f0080c18ec9f065 +msgid "" +"**\"f\"** -- this is a *fill-only* path. Only key-values relevant for " +"this operation have a meaning, not applicable ones are present with a " +"value of ``None``: `\"color\"`, `\"lineCap\"`, `\"lineJoin\"`, " +"`\"width\"`, `\"closePath\"`, `\"dashes\"` and should be ignored." +msgstr "" +"**\"f\"** - これは *fill-only* のパスです。この操作に関連するキーの値のみが意味を持ち、適用されないものは値が " +"``None`` で存在します。: " +"`\"color\"`、`\"lineCap\"`、\"lineJoin\"`、`\"width\"`、`\"closePath\"`、`\"dashes\"`" +" は無視すべきです。" + +#: ../../page.rst:1570 988a037de5f24f12aec744aacf3cf3e8 +msgid "" +"**\"s\"** -- this is a *stroke-only* path. Similar to previous, key " +"`\"fill\"` is present with value ``None``." +msgstr "" +"**\"s\"** - これは *stroke-only* のパスです。以前と同様に、キー `\"fill\"` は値が ``None`` " +"で存在します。" + +#: ../../page.rst:1571 07bfe3bf200d42a2855eceffd0a4fe03 +msgid "" +"**\"fs\"** -- this is a path performing combined *fill* and *stroke* " +"operations." +msgstr "**\"fs\"** - これは *fill* と *stroke* の組み合わせ操作を実行するパスです" + +#: ../../page.rst:1573 03e22e6694944c63910532f9f142335a +msgid "Each item in `path[\"items\"]` is one of the following:" +msgstr "`path[\"items\"]` の各アイテムは、次のいずれかです:" + +#: ../../page.rst:1575 3c00827256044dbfbc79a52cb11e361b +msgid "`(\"l\", p1, p2)` - a line from p1 to p2 (:ref:`Point` objects)." +msgstr "`(\"l\", p1, p2)` - p1 から p2 への直線(:ref:`Point` オブジェクト)。" + +#: ../../page.rst:1576 91ede118ec4c46d1a6aab2b448107ce7 +msgid "" +"`(\"c\", p1, p2, p3, p4)` - cubic Bézier curve **from p1 to p4** (p2 and " +"p3 are the control points). All objects are of type :ref:`Point`." +msgstr "" +"`(\"c\", p1, p2, p3, p4)` - **p1 から p4 へ** の三次ベジエ曲線(p2 と p3 " +"は制御点です)。すべてのオブジェクトは :ref:`Point` タイプです。" + +#: ../../page.rst:1577 9f78d0c38d1c4ed1909ddfafc7d7088f +msgid "" +"`(\"re\", rect, orientation)` - a :ref:`Rect`. Multiple rectangles within" +" the same path are now detected (changed in v1.18.17). Integer " +"`orientation` is 1 resp. -1 indicating whether the enclosed area is " +"rotated left (1 = anti-clockwise), or resp. right [#f7]_ (changed in " +"v1.19.2)." +msgstr "" +"`(\"re\", rect, orientation)` - a :ref:`Rect` 同じパス内の複数の矩形が検出されます " +"(v1.18.17で変更)。整数のorientationは、含まれる領域が左に回転しているかどうかを示します (1 = " +"反時計回り)。または右に回転しているかどうかを示します [#f7]_ (v1.19.2で変更)" + +#: ../../page.rst:1578 371921535bb54730bd6e6ae40f6f0ec6 +msgid "" +"`(\"qu\", quad)` - a :ref:`Quad`. 3 or 4 consecutive lines are detected " +"to actually represent a :ref:`Quad` (changed in v1.19.2:). (New in " +"v1.18.17)" +msgstr "" +"`(\"qu\", quad)` - " +":ref:`Quad`。*バージョン1.18.17で新しく追加され、バージョン1.19.2で変更されました:* 3つまたは4つの連続する線が " +"Quad を実際に表すことが検出されます。" + +#: ../../page.rst:1582 2fdae2c2334145c398860d9a461ae5d0 +#, fuzzy +msgid "" +"Using class :ref:`Shape`, you should be able to recreate the original " +"drawings on a separate (PDF) page with high fidelity under normal, not " +"too sophisticated circumstances. Please see the following comments on " +"restrictions. A coding draft can be found in :ref:`How to Extract " +"Drawings `." +msgstr "" +"クラス :ref:`Shape` " +"を使用すると、通常の、あまり洗練されていない状況で、高い忠実度で元の図面を別の(PDF)ページに再作成できるはずです。制約事項に関する以下のコメントをご覧ください。コーディングのドラフトは、:ref:`FAQ`" +" の「図面の抽出」章のセクションにあります。" + +#: ../../page.rst:1584 72acac2299b34c81a392f3bccec0e477 +msgid "" +"Specifying `extended=True` significantly alters the output. Most " +"importantly, new dictionary types are present: \"clip\" and \"group\". " +"All paths will now be organized in a hierarchic structure which is " +"encoded by the new integer key \"level\", the hierarchy level. Each group" +" or clip establishes a new hierarchy, which applies to all subsequent " +"paths having a *larger* level value. (New in v1.22.0)" +msgstr "" +"extended=Trueを指定すると、出力が大幅に変更されます。最も重要なのは、新しい辞書タイプが存在することです: \"clip\" および " +"\"group\"。すべてのパスは、新しい整数キー " +"\"level\"、つまり階層レベルでエンコードされた階層構造に組織されます。各グループまたはクリップは、新しい階層を確立し、それ以降のすべてのパスに適用されます。" +" (v1.22.0で新規)" + +#: ../../page.rst:1586 78f9993923e4497095371055bed7337c +msgid "" +"Any path with a smaller level value than its predecessor will end the " +"scope of (at least) the preceding hierarchy level. A \"clip\" path with " +"the same level as the preceding clip will end the scope of that clip. " +"Same is true for groups. This is best explained by an example::" +msgstr "" +"前任者よりも小さいレベル値を持つパスは、少なくとも前の階層レベルのスコープを終了します。前のクリップと同じレベルの \"clip\" " +"パスは、そのクリップのスコープを終了します。同様に、グループも同じです。これは、次の例で最もよく説明されます:" + +#: ../../page.rst:1602 518670f803774fef8d597ef4f8582fff +msgid "" +"The clip in line 0 applies to line including line 7. Group in line 2 " +"applies to lines 3 to 5, clip in line 3 only applies to line 4." +msgstr "行0の「clip」は行7を含む行全体に適用されます。行2の「group」は行3から5までの行に適用され、行3の「clip」は行4にのみ適用されます。" + +#: ../../page.rst:1604 ff542185175047f39093ef6713daa77e +msgid "" +"\"stroke\" in line 4 is under control of \"group\" in line 2 and \"clip\"" +" in line 3 (which in turn is a subset of line 0 clip)." +msgstr "行4の「stroke」は行2の「group」と行3の「clip」(それ自体が行0の「clip」のサブセットです)の制御下にあります。" + +#: ../../page.rst:1606 4c9eb2889b1341abae507671eeb4430f +msgid "" +"**\"clip\"** dictionary. Its values (most importantly \"scissor\") remain" +" valid / apply as long as following dictionaries have a **larger " +"\"level\"** value." +msgstr "**「clip」** 辞書。その値(特に「scissor」)は、後続の辞書が「level」の値が大きい限り、有効で適用されます。" + +#: ../../page.rst:1611 ../../page.rst:1612 ../../page.rst:1613 +#: ../../page.rst:1614 ../../page.rst:1615 ../../page.rst:1616 +#: ../../page.rst:1626 ../../page.rst:1627 ../../page.rst:1628 +#: 00708434df3e4b10b0b7ca3b1961e46e 36261a32f85343e18df60acdef3de80e +#: 3b1be494f64548488186845048e73b93 6894568d50fd4817a0495ad67f263519 +#: 6bef9c67c7874524adc53b80c6c509c2 844ec8c3cb634f2a8e56a264fd7c5b76 +#: 97b5b323326742448390087dd206b52e b4a138abbc7547c688d138332903b9bd +#: fe4aa6cbb7544adbad42a2545538a195 +msgid "Same as in \"stroke\" or \"fill\" dictionaries" +msgstr "「stroke」または「fill」の辞書と同じ" + +#: ../../page.rst:1617 5799467150d447b8975d22e3ed57991d +msgid "scissor" +msgstr "" + +#: ../../page.rst:1617 98ed3a3f26a5468e8dd3a2f702b807ce +msgid "the clip rectangle" +msgstr "クリップ矩形" + +#: ../../page.rst:1618 0ac8a416c31246f18dffe4ea218c4f41 +msgid "\"clip\"" +msgstr "" + +#: ../../page.rst:1621 ab06e320a99a484184c964948f721d12 +msgid "" +"\"group\" dictionary. Its values remain valid (apply) as long as " +"following dictionaries have a **larger \"level\"** value. Any dictionary " +"with an equal or lower level end this group." +msgstr "「group」辞書。その値は、後続の辞書が「level」の値が大きい限り、有効で適用されます。同じレベルまたはそれ以下の辞書がこのグループを終了します。" + +#: ../../page.rst:1629 21f70ce4824140369709f403395aeb58 +msgid "isolated" +msgstr "" + +#: ../../page.rst:1629 305aef306fdd4e899633800405864d16 +msgid "(bool) Whether this group is isolated" +msgstr "(ブール)このグループが孤立しているかどうか" + +#: ../../page.rst:1630 72571437b8314dd3b58ec87c7e7512eb +msgid "knockout" +msgstr "" + +#: ../../page.rst:1630 a7ec60d6c46a4878be2e0e27e387ecdf +msgid "(bool) Whether this is a \"Knockout Group\"" +msgstr "(ブール)これが「Knockout Group」であるかどうか" + +#: ../../page.rst:1631 a12505526ca0472192aca874933194ea +msgid "blendmode" +msgstr "" + +#: ../../page.rst:1631 787b1336af0a45f8ad2739d7489abf1d +msgid "Name of the BlendMode, default is \"Normal\"" +msgstr "BlendModeの名前、デフォルトは「Normal」" + +#: ../../page.rst:1632 bb99f31673f2428baf49ad239fdaef45 +msgid "opacity" +msgstr "" + +#: ../../page.rst:1632 5cfdfb34fceb40dab8047cfe1567f318 +msgid "Float value in range [0, 1]." +msgstr "範囲[0、1]内の浮動小数点値" + +#: ../../page.rst:1633 e7316c83d8c7478bb2e00fdd22c6b896 +msgid "\"group\"" +msgstr "" + +#: ../../page.rst:1636 da8f4621891e416f828a89cb326b02ab +msgid "" +"The method is based on the output of :meth:`Page.get_cdrawings` -- which " +"is much faster, but requires somewhat more attention processing its " +"output." +msgstr "" +"このメソッドは、 :meth:`Page.get_cdrawings` " +"の出力に基づいています。これははるかに高速ですが、出力の処理には多少の注意が必要です。" + +#: ../../page.rst:1640 32a464cfe34343d9b6204cad42d58265 +msgid "New in v1.18.0" +msgstr "v1.18.0で新規追加" + +#: ../../page.rst:1641 5557f6c323b047e8a79e12a115c5e18d +msgid "Changed in v1.18.17" +msgstr "v1.18.17で変更" + +#: ../../page.rst:1642 d7882895ef5c47d9ac796cd7abccab8c +msgid "Changed in v1.19.0: add \"seqno\" key, remove \"clippings\" key" +msgstr "v1.19.0で変更: “seqno”キーを追加、“clippings”キーを削除" + +#: ../../page.rst:1643 2917e8fbf31d401eb7239aa8197386d5 +msgid "" +"Changed in v1.19.1: \"color\" / \"fill\" keys now always are either are " +"RGB tuples or `None`. This resolves issues caused by exotic colorspaces." +msgstr "" +"v1.19.1で変更: “color” / " +"“fill”キーは常にRGBタプルまたはNoneのいずれかであるように変更。これにより、異常なカラースペースに起因する問題が解消されます。" + +#: ../../page.rst:1644 f91d5d6bf1454c399ff142ef9d09ac82 +msgid "" +"Changed in v1.19.2: add an indicator for the *\"orientation\"* of the " +"area covered by an \"re\" item." +msgstr "v1.19.2で変更: \"re\" アイテムでカバーされる領域の *\"orientation\"* を示すインジケーターを追加" + +#: ../../page.rst:1645 25ed451c52654edb900e8f2b2c3f6975 +msgid "" +"Changed in v1.22.0: add new key `\"layer\"` which contains the name of " +"the Optional Content Group of the path (or `None`)." +msgstr "v1.22.0で変更: 新しいキー `\"layer\"` を追加。これにはパスのオプションコンテンツグループの名前が含まれます(またはNone)。" + +#: ../../page.rst:1646 87f19548799a4a4caf065edc85c41c04 +msgid "" +"Changed in v1.22.0: add parameter `extended` to also return clipping and " +"group paths." +msgstr "v1.22.0で変更: クリッピングとグループパスも返すようにするためのパラメーター `extended` を追加" + +#: ../../page.rst:1654 7c7f3eeb00d74a00b9419f994f8d9534 +msgid "" +"Extract the vector graphics on the page. Apart from following technical " +"differences, functionally equivalent to :meth:`Page.get_drawings`, but " +"much faster:" +msgstr "" +"ページ上のベクトルグラフィックスを抽出します。技術的な違いを除いて、:meth:`Page.get_drawings` " +"と機能的に同等ですが、はるかに高速です:" + +#: ../../page.rst:1656 023de9744ac9411b958d3054d8e3c115 +msgid "" +"Every path type only contains the relevant keys, e.g. a stroke path has " +"no `\"fill\"` color key. See comment in method :meth:`Page.get_drawings`." +msgstr "" +"各パスタイプには関連するキーのみ含まれます。たとえば、ストロークパスには `\"fill\"` カラーキーはありません。 " +":meth:`Page.get_drawings` メソッドのコメントを参照してください。" + +#: ../../page.rst:1657 ae5e1a20ce2349fd85b839dec6d625e9 +msgid "" +"Coordinates are given as :data:`point_like`, :data:`rect_like` and " +":data:`quad_like` **tuples** -- not as :ref:`Point`, :ref:`Rect`, " +":ref:`Quad` objects." +msgstr "" +"座は :data:`point_like`、:data:`rect_like`、:data:`quad_like` の **tuples** " +"として与えられます。:ref:`Point`、:ref:`Rect`、:ref:`Quad` オブジェクトとしてではなく。" + +#: ../../page.rst:1659 672220608ee04133868cae370cffa4de +msgid "" +"If performance is a concern, consider using this method: Compared to " +"versions earlier than 1.18.17, you should see much shorter response " +"times. We have seen pages that required 2 seconds then, now only need 200" +" ms with this method." +msgstr "性能が懸念される場合、このメソッドを使用することを検討してください。バージョン1.18.17より前と比較して、応答時間が大幅に短縮されるはずです。以前は2秒かかったページが、このメソッドを使用すると200ミリ秒で完了する場合もあります。" + +#: ../../page.rst:1663 ba2fc7d605a447d68614c99ed6bcf5a4 +msgid "New in v1.18.17" +msgstr "新機能(v1.18.17)" + +#: ../../page.rst:1664 36e4da9e6b204e919d667956b868c5bb +msgid "Changed in v1.19.0: removed \"clippings\" key, added \"seqno\" key." +msgstr "v1.19.0で変更:「clippings」キーを削除、新たに「seqno」キーを追加。" + +#: ../../page.rst:1665 088a36a4a3a64ca5ad2314d3b87b768b +msgid "Changed in v1.19.1: always generate RGB color tuples." +msgstr "v1.19.1で変更:常にRGBカラータプルを生成します。" + +#: ../../page.rst:1666 5e3d920ef2fa413c9c20d2da38d1d0c9 +msgid "" +"Changed in v1.22.0: added new key `\"layer\"` which contains the name of " +"the Optional Content Group of the path (or `None`)." +msgstr "v1.22.0で変更:新たに「layer」というキーが追加され、パスのオプションコンテンツグループの名前(またはNone)が含まれます。" + +#: ../../page.rst:1667 443a908c4dfb4f3e82db28d4019f2bd8 +#, fuzzy +msgid "" +"Changed in v1.22.0: added parameter `extended` to also return clipping " +"paths." +msgstr "v1.22.0で変更:クリッピングパスを返すためのパラメータ「extended」が追加されました。" + +#: ../../page.rst:1674 471ae179e62947fe816a041b22a17f96 +msgid "" +"PDF only: Return a list of fonts referenced by the page. Wrapper for " +":meth:`Document.get_page_fonts`." +msgstr "PDFのみ:ページで参照されているフォントのリストを返します。:meth:`Document.get_page_fonts` のラッパーです。" + +#: ../../page.rst:1679 678652f759594090a964a286b0eb5416 +msgid "" +"PDF only: Return a list of images referenced by the page. Wrapper for " +":meth:`Document.get_page_images`." +msgstr "PDFのみ:ページで参照されているイメージのリストを返します。:meth:`Document.get_page_images` のラッパーです。" + +#: ../../page.rst:1688 3f5a4bd61cb647f89854e64498c7d4bc +msgid "" +"Return a list of meta information dictionaries for all images displayed " +"by the page. This works for all document types." +msgstr "" + +#: ../../page.rst:1690 62d3cc2b9c8b4122a4b3c7bd6e6f6244 +msgid "" +"Compute the MD5 hashcode for each encountered image, which allows " +"identifying image duplicates. This adds the key `\"digest\"` to the " +"output, whose value is a 16 byte `bytes` object. (New in v1.18.13)" +msgstr "" +"*新機能(v1.18.13)*:各イメージのMD5ハッシュコードを計算し、イメージの重複を識別できるようにします。これにより、出力に " +"`\"digest\"` キーが追加され、その値は16バイトのバイトオブジェクトです。" + +#: ../../page.rst:1692 212bbe5e3a9e4a398d240ae31071e3a1 +msgid "" +"**PDF only.** Try to find the :data:`xref` for each image. Implies " +"`hashes=True`. Adds the `\"xref\"` key to the dictionary. If not found, " +"the value is 0, which means, the image is either \"inline\" or its xref " +"is undetectable for some reason. Please note that this option has an " +"extended response time, because the MD5 hashcode will be computed at " +"least two times for each image with an xref. (New in v1.18.13)" +msgstr "" + +#: ../../page.rst:1695 80d27211efe549a2bba271837f108080 +msgid "" +"A list of dictionaries. This includes information for **exactly those** " +"images, that are shown on the page -- including *\"inline images\"*. The " +"dictionary layout is similar to that of image blocks in " +"`page.get_text(\"dict\")`. In contrast to images included in " +":meth:`Page.get_text`, image **binary content** is not loaded by this " +"method, which drastically reduces memory usage. Another difference is " +"that image detection is not restricted to the visible part of the page or" +" any ``clip`` parameter: method :meth:`Page.get_text` will only extract " +"images **fully contained** in the provided ``clip``. =============== " +"=============================================================== **Key**" +" **Value** =============== " +"=============================================================== number" +" block number (``int``) bbox image bbox on page, " +":data:`rect_like` width original image width (``int``) height" +" original image height (``int``) cs-name colorspace name" +" (``str``) colorspace colorspace.n (``int``) xres " +"resolution in x-direction (``int``) yres resolution in " +"y-direction (``int``) bpc bits per component (``int``) size" +" storage occupied by image (``int``) digest MD5 " +"hashcode (``bytes``), if ``hashes`` is true xref image " +":data:`xref` or 0, if *xrefs* is true transform matrix transforming" +" image rect to bbox, :data:`matrix_like` has-mask whether the " +"image is transparent and has a mask (``bool``) =============== " +"=============================================================== Multiple" +" occurrences of the same image are always reported. You can detect " +"duplicates by comparing their `digest` values." +msgstr "" + +#: ../../page.rst:1695 df3d733c760b42caae6ddeb348dad159 +#, fuzzy +msgid "" +"A list of dictionaries. This includes information for **exactly those** " +"images, that are shown on the page -- including *\"inline images\"*. The " +"dictionary layout is similar to that of image blocks in " +"`page.get_text(\"dict\")`." +msgstr "" +"辞書のリスト。これには、ページに表示されているイメージに関する情報が含まれます。:meth:`Page.get_text` " +"で含まれている画像ブロックとは異なり、画像の **binary content** " +"は読み込まれないため、メモリの使用量が大幅に削減されます。辞書のレイアウトは、ページ.get_text(\"dict\")内のイメージブロックと似ています。" + +#: ../../page.rst:1697 bb7f0eb8920446e2a95b41dad2cd03aa +msgid "" +"In contrast to images included in :meth:`Page.get_text`, image **binary " +"content** is not loaded by this method, which drastically reduces memory " +"usage. Another difference is that image detection is not restricted to " +"the visible part of the page or any ``clip`` parameter: method " +":meth:`Page.get_text` will only extract images **fully contained** in the" +" provided ``clip``." +msgstr "" + +#: ../../page.rst:1700 d6808dea01c2462db5d3f26ea9e8ce99 +msgid "**Key**" +msgstr "**キー**" + +#: ../../page.rst:1700 47eb29fdafe740e1bc3b6ad8db5aaa19 +msgid "**Value**" +msgstr "**値**" + +#: ../../page.rst:1702 c8347895111c4af184ddba4555adb866 +msgid "number" +msgstr "" + +#: ../../page.rst:1702 828deab1c726405abdb2e4101d57e251 +#, fuzzy +msgid "block number (``int``)" +msgstr "ブロック番号 *(整数)*" + +#: ../../page.rst:1703 784154b86fb047a98620096b1802e3b4 +msgid "bbox" +msgstr "" + +#: ../../page.rst:1703 635228e6f5b64be6836b3b52a61020c0 +msgid "image bbox on page, :data:`rect_like`" +msgstr "ページ上の画像の境界ボックス、:data:`rect_like`" + +#: ../../page.rst:1704 32b217ddd7ea4689b499fa074a537239 +#, fuzzy +msgid "original image width (``int``)" +msgstr "元の画像の幅 *(整数)*" + +#: ../../page.rst:1705 856cc2edb6404f7b8efd5d034d3c33b3 +msgid "height" +msgstr "" + +#: ../../page.rst:1705 0fcad0d7aecc4417a327e0391c7baca0 +#, fuzzy +msgid "original image height (``int``)" +msgstr "元の画像の高さ *(整数*" + +#: ../../page.rst:1706 15bfa86900ab44958e5e421c870cc709 +msgid "cs-name" +msgstr "" + +#: ../../page.rst:1706 ba058ffc7e2b44fda9eb69e699988a0d +#, fuzzy +msgid "colorspace name (``str``)" +msgstr "カラースペース名 *(文字列)*" + +#: ../../page.rst:1707 7dff7ef3d704460ba32f37e2c4c42757 +msgid "colorspace" +msgstr "" + +#: ../../page.rst:1707 c9e510a635cf4f55a06406539518ac8a +#, fuzzy +msgid "colorspace.n (``int``)" +msgstr "colorspace.n *(整数)*" + +#: ../../page.rst:1708 4e9e21dec18a471894429e8831d92398 +msgid "xres" +msgstr "" + +#: ../../page.rst:1708 6183ea28a0f94b6ba7bec837d338a1d7 +#, fuzzy +msgid "resolution in x-direction (``int``)" +msgstr "x方向の解像度 *(整数)*" + +#: ../../page.rst:1709 3f1c22a1d82f4c27868a8058a370756b +msgid "yres" +msgstr "" + +#: ../../page.rst:1709 0766826d58a84e3d89b3143c5393d545 +#, fuzzy +msgid "resolution in y-direction (``int``)" +msgstr "y方向の解像度 *(整数)*" + +#: ../../page.rst:1710 ae01cbb1647e4a2e91845e39036178f6 +msgid "bpc" +msgstr "" + +#: ../../page.rst:1710 3308071e1b894f5da96b0e69d9c7e20d +#, fuzzy +msgid "bits per component (``int``)" +msgstr "コンポーネントごとのビット数 *(整数)*" + +#: ../../page.rst:1711 c8e5eb4d00bc44708d3f5230422b946e +msgid "size" +msgstr "" + +#: ../../page.rst:1711 f816c64b60fc4726b7fcfa2e8ff41661 +#, fuzzy +msgid "storage occupied by image (``int``)" +msgstr "画像が占めるストレージ容量 *(整数)*" + +#: ../../page.rst:1712 062e8140ddd54d8ca8c3949837c8fccb +msgid "digest" +msgstr "" + +#: ../../page.rst:1712 147f3d5fe07a482bbc68b41c972925e0 +#, fuzzy +msgid "MD5 hashcode (``bytes``), if ``hashes`` is true" +msgstr "MD5ハッシュコード(バイト)、*hashes* がtrueの場合" + +#: ../../page.rst:1713 851b93bd375b4505a170f6c83584e6f4 +msgid "xref" +msgstr "" + +#: ../../page.rst:1713 9ec3c23efd3f4c1c816d68aca11e1d0e +msgid "image :data:`xref` or 0, if *xrefs* is true" +msgstr "画像の :data:`xref` または0、*xrefs* がtrueの場合" + +#: ../../page.rst:1714 e490715ce16c43d891f3ddd27a7cfa36 +msgid "transform" +msgstr "" + +#: ../../page.rst:1714 fd7ef4ea5f644766af5e84743c319664 +msgid "matrix transforming image rect to bbox, :data:`matrix_like`" +msgstr "画像の境界ボックスをbboxに変換するための行列、:data:`matrix_like`" + +#: ../../page.rst:1715 c529fa1784e349fd8bf0d11b9d0a28da +msgid "has-mask" +msgstr "" + +#: ../../page.rst:1715 9ee4fd2f5e3a45bc8b7d9f1c1e033133 +msgid "whether the image is transparent and has a mask (``bool``)" +msgstr "" + +#: ../../page.rst:1718 b4e862f8f943414eb557a737640e444a +msgid "" +"Multiple occurrences of the same image are always reported. You can " +"detect duplicates by comparing their `digest` values." +msgstr "同じ画像の複数の出現は常に報告されます。digestの値を比較して重複を検出できます。" + +#: ../../page.rst:1722 0c95320613674728b7f2d34d98acbc80 +msgid "New in v1.18.11" +msgstr "*新機能(v1.18.11)*" + +#: ../../page.rst:1723 e07cc51bcf1b47f6b2085199addd75ce +msgid "" +"Changed in v1.18.13: added image MD5 hashcode computation and " +":data:`xref` search." +msgstr "*v1.18.13で変更:* イメージのMD5ハッシュコードの計算と :data:`xref` の検索が追加されました。" + +#: ../../page.rst:1730 5c8ef65bf74b405e9f9fd6a701444e1e +msgid "" +"PDF only: Return a list of Form XObjects referenced by the page. Wrapper " +"for :meth:`Document.get_page_xobjects`." +msgstr "" +"PDFのみ:ページで参照されているフォームXObjectのリストを返します。:meth:`Document.get_page_xobjects` " +"のラッパーです。" + +#: ../../page.rst:1738 111cc4a11e304e0cbdc5d409a89bf667 +msgid "" +"PDF only: Return boundary boxes and transformation matrices of an " +"embedded image. This is an improved version of " +":meth:`Page.get_image_bbox` with the following differences:" +msgstr "" +"PDFのみ:埋め込み画像の境界ボックスと変換行列を返します。これは :meth:`Page.get_image_bbox` " +"の改良バージョンで、次の違いがあります:" + +#: ../../page.rst:1740 041da4952e984e1e8d4504ee464794af +msgid "" +"There is no restriction on **how** the image is invoked (by the page or " +"one of its Form XObjects). The result is always complete and correct." +msgstr "" +"画像が **どのように** " +"呼び出されるかに制限はありません(ページまたはそのフォームXObjectのいずれかによって)。結果は常に完全かつ正確です。" + +#: ../../page.rst:1741 b0d24a61fd924429baec834bb3136faf +msgid "" +"The result is a list of :ref:`Rect` or (:ref:`Rect`, :ref:`Matrix`) " +"objects -- depending on *transform*. Each list item represents one " +"location of the image on the page. Multiple occurrences might not be " +"detectable by :meth:`Page.get_image_bbox`." +msgstr "" +"結果は :ref:`Rect` または(:ref:`Rect`、:ref:`Matrix`)オブジェクトのリストです(*transform* " +"に応じて異なります)。各リスト項目は、ページ上の画像の1つの場所を表します。:meth:`Page.get_image_bbox` " +"では複数の出現を検出できない場合があります。" + +#: ../../page.rst:1742 675b184de839446d8241dc2643bead1a +msgid "" +"The method invokes :meth:`Page.get_image_info` with `xrefs=True` and " +"therefore has a noticeably longer response time than " +":meth:`Page.get_image_bbox`." +msgstr "" +"このメソッドは、`xrefs=True` で :meth:`Page.get_image_info` " +"を呼び出すため、:meth:`Page.get_image_bbox` よりも明らかに長い応答時間がかかります。" + +#: ../../page.rst:1744 5cf64fdd46de46ba9b4a4e4b18950e0e +msgid "" +"an item of the list :meth:`Page.get_images`, or the reference **name** " +"entry of such an item (item[7]), or the image :data:`xref`." +msgstr "" +":meth:`Page.get_images` のリストアイテム、そのようなアイテムの参照 **name** " +"エントリ(item[7])、または画像の :data:`xref`。" + +#: ../../page.rst:1745 b5c476d3b5434c67ac0f8cda2819b352 +msgid "" +"also return the matrix used to transform the image rectangle to the bbox " +"on the page. If true, then tuples `(bbox, matrix)` are returned." +msgstr "画像の矩形をbboxに変換するために使用される行列も返すかどうか。trueの場合、タプル `(bbox, matrix)` が返されます。" + +#: ../../page.rst:1748 db71cfbadd6a4e8c9547136da73da74c +msgid "" +"Boundary boxes and respective transformation matrices for each image " +"occurrence on the page. If the item is not on the page, an empty list " +"`[]` is returned." +msgstr "ページ上の各画像出現に対する境界ボックスとそれに対応する変換行列。アイテムがページ上にない場合、空のリスト `[]` が返されます。" + +#: ../../page.rst:1752 78b5451c3e764c18b742e71138eff3b1 +#, fuzzy +msgid "New in v1.18.13" +msgstr "*v1.18.13で新規追加*" + +#: ../../page.rst:1762 64c2680b84764f1c8c93f2ad607ac44b +msgid "" +"PDF only: Return boundary box and transformation matrix of an embedded " +"image." +msgstr "PDFのみ:埋め込まれたイメージの境界ボックスと変換行列を返します。" + +#: ../../page.rst:1764 692329adf807444293146b7cf42686eb +msgid "" +"an item of the list :meth:`Page.get_images` with *full=True* specified, " +"or the reference **name** entry of such an item, which is item[-3] (or " +"item[7] respectively)." +msgstr "" +":meth:`Page.get_images` のリストのアイテムで *full=True* " +"が指定されているもの、またはそのようなアイテムの参照名 **name** エントリ、つまりitem[-3](またはitem[7])。" + +#: ../../page.rst:1765 83bfa287d98c4c21b0148f5687ce47c8 +msgid "" +"return the matrix used to transform the image rectangle to the bbox on " +"the page (new in v1.18.11). Default is just the bbox. If true, then a " +"tuple `(bbox, matrix)` is returned." +msgstr "" +"*(v1.18.11で新規)* イメージの矩形をページのbboxに変換するために使用される行列も返すかどうか。デフォルトはbboxのみです。 " +"trueの場合、タプル `(bbox, matrix)` が返されます。" + +#: ../../page.rst:1767 dc06cbcf6ec247c69b6112d38ceb8610 +msgid ":ref:`Rect` or (:ref:`Rect`, :ref:`Matrix`)" +msgstr ":ref:`Rect` または(:ref:`Rect`、:ref:`Matrix`)" + +#: ../../page.rst:1768 cbbf9a59b1394b85bbaa0936b1c976ac +msgid "" +"the boundary box of the image -- optionally also its transformation " +"matrix. |history_begin| * (Changed in v1.16.7): If the page in fact " +"does not display this image, an infinite rectangle is returned now. In " +"previous versions, an exception was raised. Formally invalid parameters " +"still raise exceptions. * (Changed in v1.17.0): Only images referenced " +"directly by the page are considered. This means that images occurring in " +"embedded PDF pages are ignored and an exception is raised. * (Changed in " +"v1.18.5): Removed the restriction introduced in v1.17.0: any item of the " +"page's image list may be specified. * (Changed in v1.18.11): Partially " +"re-instated a restriction: only those images are considered, that are " +"either directly referenced by the page or by a Form XObject directly " +"referenced by the page. * (Changed in v1.18.11): Optionally also return " +"the transformation matrix together with the bbox as the tuple `(bbox, " +"transform)`. |history_end|" +msgstr "" + +#: ../../page.rst:1768 78cc0ccf99754626b7f93f18aa358e3d +msgid "" +"the boundary box of the image -- optionally also its transformation " +"matrix." +msgstr "イメージの境界ボックス - オプションでその変換行列も。" + +#: ../../page.rst:1772 157bed932e804690af8e0f684d405ce0 +#, fuzzy +msgid "" +"(Changed in v1.16.7): If the page in fact does not display this image, an" +" infinite rectangle is returned now. In previous versions, an exception " +"was raised. Formally invalid parameters still raise exceptions." +msgstr "" +"*(v1.16.7で変更)* " +"–実際にはこのイメージを表示していない場合、無限の矩形が返されるようになりました。以前のバージョンでは、例外が発生しました。形式的に無効なパラメータは引き続き例外を発生させます。" + +#: ../../page.rst:1773 39aa3e88687f4a0f961dbb8c58e03085 +#, fuzzy +msgid "" +"(Changed in v1.17.0): Only images referenced directly by the page are " +"considered. This means that images occurring in embedded PDF pages are " +"ignored and an exception is raised." +msgstr "" +"*(v1.17.0で変更)* " +"–ページで直接参照されているイメージのみが考慮されます。これは、埋め込まれたPDFページに存在するイメージは無視され、例外が発生します。" + +#: ../../page.rst:1774 dc88a76dc5d847df8103277879e64b3c +#, fuzzy +msgid "" +"(Changed in v1.18.5): Removed the restriction introduced in v1.17.0: any " +"item of the page's image list may be specified." +msgstr "*(v1.18.5で変更)* –v1.17.0で導入された制限を削除しました:ページのイメージリストの任意のアイテムを指定できます。" + +#: ../../page.rst:1775 d9feb315fe16451fa2e1ee6123c8cc03 +#, fuzzy +msgid "" +"(Changed in v1.18.11): Partially re-instated a restriction: only those " +"images are considered, that are either directly referenced by the page or" +" by a Form XObject directly referenced by the page." +msgstr "" +"*(v1.18.11で変更)* " +"–一部の制限を部分的に再導入しました:ページで直接参照されるイメージまたはページで直接参照されるフォームXObjectによって参照されるイメージのみが考慮されます。" + +#: ../../page.rst:1776 0f1ccad6d7874577a5b4a5e28faef7b2 +#, fuzzy +msgid "" +"(Changed in v1.18.11): Optionally also return the transformation matrix " +"together with the bbox as the tuple `(bbox, transform)`." +msgstr "*(v1.18.11で変更)* –オプションでbboxと一緒に変換行列も返すことができます(タプルとして)。" + +#: ../../page.rst:1782 32dd7086bbec409fa23a3f4ab705f284 +msgid "" +"Be aware that :meth:`Page.get_images` may contain \"dead\" entries i.e. " +"images, which the page **does not display**. This is no error, but " +"intended by the PDF creator. No exception will be raised in this case, " +"but an infinite rectangle is returned. You can avoid this from happening " +"by executing :meth:`Page.clean_contents` before this method." +msgstr "" +":meth:`Page.get_images` " +"には「不要な」エントリが含まれている場合があることに注意してください。これはPDF作成者によって意図的に設定されたものであり、エラーではありません。この場合、例外は発生しませんが、無限の矩形が返されます。このような状況を回避するには、このメソッドの前に" +" :meth:`Page.clean_contents` を実行することができます。" + +#: ../../page.rst:1783 7863c77d70684c50b665e2193bccdd9e +msgid "" +"The image's \"transformation matrix\" is defined as the matrix, for which" +" the expression `bbox / transform == pymupdf.Rect(0, 0, 1, 1)` is true, " +"lookup details here: :ref:`ImageTransformation`." +msgstr "" +"イメージの「変換行列」は、`bbox / transform == pymupdf.Rect(0, 0, 1, 1)` " +"という式が真であるための行列であり、詳細はこちらを参照してください::ref:`ImageTransformation`。" + +#: ../../page.rst:1787 0f47d38ff23b463fad5ec3b1a3d8bfba +msgid "Changed in v1.18.11: return image transformation matrix" +msgstr "変更点 v1.18.11:イメージの変換行列を返すようになりました" + +#: ../../page.rst:1796 294f09963ba24bf18361007a46123547 +msgid "" +"Create an SVG image from the page. Only full page images are currently " +"supported." +msgstr "ページからSVGイメージを作成します。現在、フルページのイメージのみがサポートされています。" + +#: ../../page.rst:1798 5cc04087a9144ff68401bb2066002698 +msgid "a matrix, default is :ref:`Identity`." +msgstr "行列、デフォルトは :ref:`Identity` です。" + +#: ../../page.rst:1799 e2035718de414f338fdbcc01496a7665 +msgid "" +"-- controls how text is represented. ``True`` outputs each character as a" +" series of elementary draw commands, which leads to a more precise text " +"display in browsers, but a **very much larger** output for text-oriented " +"pages. Display quality for ``False`` relies on the presence of the " +"referenced fonts on the current system. For missing fonts, the internet " +"browser will fall back to some default -- leading to unpleasant " +"appearances. Choose ``False`` if you want to parse the text of the SVG. " +"(New in v1.17.5)" +msgstr "" +"テキストの表現方法を制御します。``True`` " +"は、各文字を一連の基本的な描画コマンドとして出力し、これによりブラウザでのテキスト表示がより正確になりますが、テキスト指向のページの場合、非常に大きな出力になります。``False``" +" " +"の場合、表示品質は現在のシステムに参照されるフォントの存在に依存します。欠落しているフォントの場合、インターネットブラウザはいくつかのデフォルトにフォールバックします" +" -- これにより見栄えが悪くなります。SVGのテキストを解析したい場合は ``False`` を選択してください。(v1.17.5で新規追加)" + +#: ../../page.rst:1801 e5cac2bee2274605a5a2eaa00d1a850c +msgid "" +"a UTF-8 encoded string that contains the image. Because SVG has XML " +"syntax it can be saved in a text file, the standard extension is `.svg`." +" .. note:: In case of a PDF, you can circumvent the \"full page image " +"only\" restriction by modifying the page's CropBox before using the " +"method." +msgstr "" + +#: ../../page.rst:1801 b8650b6da460403c94f3cdef21fb84ff +msgid "" +"a UTF-8 encoded string that contains the image. Because SVG has XML " +"syntax it can be saved in a text file, the standard extension is `.svg`." +msgstr "" +"UTF-8エンコードされた文字列で、イメージを含みます。 SVGにはXML構文があるため、テキストファイルに保存でき、標準の拡張子は `.svg`" +" です。" + +#: ../../page.rst:1803 afae168aaaa34e41a2bd3c0ddb4d6656 +msgid "" +"In case of a PDF, you can circumvent the \"full page image only\" " +"restriction by modifying the page's CropBox before using the method." +msgstr "PDFの場合、メソッドを使用する前に、ページのCropBoxを変更して「フルページイメージのみ」制限を回避できます。" + +#: ../../page.rst:1815 7815d15406e34873b6f746aa3d0e663c +msgid "" +"Create a pixmap from the page. This is probably the most often used " +"method to create a :ref:`Pixmap`." +msgstr "ページからピクスマップを作成します。おそらく、:ref:`Pixmap` を作成するために最も頻繁に使用されるメソッドでしょう。" + +#: ../../page.rst:1817 a37543741184408cb72d2fb84c118f18 +msgid "All parameters are *keyword-only.*" +msgstr "すべてのパラメータは *keyword-only.* です。" + +#: ../../page.rst:1819 246c4dc3a2d74918b8d706bd45d0e7b9 +msgid "default is :ref:`Identity`." +msgstr "デフォルトは :ref:`Identity` です。" + +#: ../../page.rst:1820 614bb0010d02467e9b70a7ee68f93a10 +#, fuzzy +msgid "" +"desired resolution in x and y direction. If not `None`, the `\"matrix\"` " +"parameter is ignored. (New in v1.19.2)" +msgstr "(v1.19.2で新たに追加) x方向およびy方向の所望の解像度。`None` でない場合、`\"matrix\"` パラメータは無視されます。" + +#: ../../page.rst:1821 32c84baad8cb41058b9ef83f5bffd363 +msgid "" +"The desired colorspace, one of \"GRAY\", \"RGB\" or \"CMYK\" (case " +"insensitive). Or specify a :ref:`Colorspace`, ie. one of the predefined " +"ones: :data:`csGRAY`, :data:`csRGB` or :data:`csCMYK`." +msgstr "" +"所望のカラースペース、\"GRAY\"、\"RGB\"、または\"CMYK\"のいずれか(大文字/小文字を区別しない)。または、:ref:`Colorspace`" +" のように、事前定義されたもののいずれかを指定できます::data:`csGRAY`、:data:`csRGB`、:data:`csCMYK`。" + +#: ../../page.rst:1823 3daf3b67c2654e7883f9cc4742712dfb +msgid "" +"restrict rendering to the intersection of this area with the page's " +"rectangle." +msgstr "ページの矩形とこの領域の交差に描画を制限します。" + +#: ../../page.rst:1824 618a9cdeb06f496c99cdcd4d8fb37d6c +#, fuzzy, python-format +msgid "" +"whether to add an alpha channel. Always accept the default ``False`` if " +"you do not really need transparency. This will save a lot of memory (25% " +"in case of RGB ... and pixmaps are typically **large**!), and also " +"processing time. Also note an **important difference** in how the image " +"will be rendered: with ``True`` the pixmap's samples area will be pre-" +"cleared with *0x00*. This results in **transparent** areas where the page" +" is empty. With ``False`` the pixmap's samples will be pre-cleared with " +"*0xff*. This results in **white** where the page has nothing to show. " +"|history_begin| Changed in v1.14.17 The default alpha value is now " +"``False``. * Generated with *alpha=True* .. image:: images/img-" +"alpha-1.* * Generated with *alpha=False* .. image:: images/img-" +"alpha-0.* |history_end|" +msgstr "" +"透明チャネルを追加するかどうか。本当に透明性が必要でない場合は、常にデフォルトの ``False`` " +"を受け入れてください。これにより、メモリ(RGBの場合25%…ピクスマップは通常大きいです!)と処理時間が大幅に節約されます。また、画像がレンダリングされる方法についても重要な違いに注意してください:" +" ``True`` " +"の場合、ピクスマップのサンプル領域は0x00で事前クリアされます。これにより、ページが空白の場所には透明な領域が表示されます。Falseの場合、ピクスマップのサンプルは" +" *0xff* で事前クリアされます。これにより、ページに表示する内容がない場所には **white** が表示されます。" + +#: ../../page.rst:1824 069a0ac3dc8a4df2b04f4be4c53e1a1b +#, python-format +msgid "" +"whether to add an alpha channel. Always accept the default ``False`` if " +"you do not really need transparency. This will save a lot of memory (25% " +"in case of RGB ... and pixmaps are typically **large**!), and also " +"processing time. Also note an **important difference** in how the image " +"will be rendered: with ``True`` the pixmap's samples area will be pre-" +"cleared with *0x00*. This results in **transparent** areas where the page" +" is empty. With ``False`` the pixmap's samples will be pre-cleared with " +"*0xff*. This results in **white** where the page has nothing to show." +msgstr "" +"透明チャネルを追加するかどうか。本当に透明性が必要でない場合は、常にデフォルトの ``False`` " +"を受け入れてください。これにより、メモリ(RGBの場合25%…ピクスマップは通常大きいです!)と処理時間が大幅に節約されます。また、画像がレンダリングされる方法についても重要な違いに注意してください:" +" ``True`` " +"の場合、ピクスマップのサンプル領域は0x00で事前クリアされます。これにより、ページが空白の場所には透明な領域が表示されます。Falseの場合、ピクスマップのサンプルは" +" *0xff* で事前クリアされます。これにより、ページに表示する内容がない場所には **white** が表示されます。" + +#: ../../page.rst:1838 da257fd66b7e48739cdd552eb7e5a295 +msgid "Changed in v1.14.17" +msgstr "v1.14.17で変更されました" + +#: ../../page.rst:1829 22b4255c80b94f27932cd1d3f34f50ca +msgid "The default alpha value is now ``False``." +msgstr "デフォルトのalpha値は ``False`` になりました。" + +#: ../../page.rst:1831 95ba89d42805403a8f650ea9c10add4e +msgid "Generated with *alpha=True*" +msgstr "*alpha=True* で生成されたもの" + +#: ../../page.rst:1836 6bcccaa32cfd4e0788d48d8a91bc8d23 +msgid "Generated with *alpha=False*" +msgstr "*alpha=False* で生成されたもの" + +#: ../../page.rst:1842 2fd0d0a18782432e8785e426b7380d5e +msgid "" +"*(new in version 1.16.0)* whether to also render annotations or to " +"suppress them. You can create pixmaps for annotations separately." +msgstr "*(v1.16.0で新たに追加)* アノテーションをレンダリングするか抑制するか。注釈用に個別にピクスマップを作成できます。" + +#: ../../page.rst:1844 77575780aa6b4b16a7e33696709a1b27 +msgid ":ref:`Pixmap`" +msgstr "" + +#: ../../page.rst:1845 212f7e146ee648499ad0c93a71f4f5ca +msgid "" +"Pixmap of the page. For fine-controlling the generated image, the by far " +"most important parameter is **matrix**. E.g. you can increase or decrease" +" the image resolution by using **Matrix(xzoom, yzoom)**. If zoom > 1, you" +" will get a higher resolution: zoom=2 will double the number of pixels in" +" that direction and thus generate a 2 times larger image. Non-positive " +"values will flip horizontally, resp. vertically. Similarly, matrices also" +" let you rotate or shear, and you can combine effects via e.g. matrix " +"multiplication. See the :ref:`Matrix` section to learn more." +msgstr "" +"ページのピクスマップ。生成されたイメージを細かく制御するために、最も重要なパラメータは **matrix** " +"です。たとえば、Matrix(xzoom, yzoom)を使用してイメージの解像度を増減させることができます。zoom > " +"1の場合、より高い解像度が得られ、zoom=2はその方向のピクセル数を2倍にし、したがって2倍の大きさのイメージを生成します。非正の値は水平または垂直に反転させます。同様に、行列は回転やシアーも可能にし、行列の乗算を介して効果を組み合わせることもできます。詳細については、:ref:`Matrix`" +" セクションをご覧ください。" + +#: ../../page.rst:1849 7f082dc993f84e9e8d5de381c38bd61d +msgid "" +"The pixmap will have *\"premultiplied\"* pixels if `alpha=True`. To learn" +" about some background, e.g. look for \"Premultiplied alpha\" `here " +"`_." +msgstr "" + +#: ../../page.rst:1851 133dee7d819e41a29b8b465e0745173f +msgid "" +"The method will respect any page rotation and will not exceed the " +"intersection of `clip` and :attr:`Page.cropbox`. If you need the page's " +"mediabox (and if this is a different rectangle), you can use a snippet " +"like the following to achieve this::" +msgstr "" +"このメソッドはページの回転を尊重し、`clip` と :attr:`Page.cropbox` " +"の交差を超えません。ページのmediaboxが必要な場合(およびこれが異なる矩形の場合)、次のようなスニペットを使用してこれを実現できます::" + +#: ../../page.rst:1869 4a385dde3eab4da39c6b4c9622418506 +msgid "Changed in v1.19.2: added support of parameter dpi." +msgstr "v1.19.2で変更:dpiパラメータのサポートを追加。" + +#: ../../page.rst:1877 a09dc5780d704f0197d5c57f5fb35dfc +msgid "" +"PDF only: return a list of the names of annotations, widgets and links. " +"Technically, these are the */NM* values of every PDF object found in the " +"page's */Annots* array." +msgstr "" +"PDFのみ:アノテーション、ウィジェット、およびリンクの名前のリストを返します。技術的には、これらはページの */Annots* " +"配列で見つかるすべてのPDFオブジェクトの */NM* 値です。" + +#: ../../page.rst:1883 1b87c2dc9b9d41e78f762c9729b9b970 +msgid "New in v1.16.10" +msgstr "新機能 v1.16.10" + +#: ../../page.rst:1890 ce93fd825c634b47948f985d718eeb3d +msgid "" +"PDF only: return a list of the :data:`xref` numbers of annotations, " +"widgets and links -- technically of all entries found in the page's " +"*/Annots* array." +msgstr "" +"PDFのみ:アノテーション、ウィジェット、およびリンクの :data:`xref` " +"番号のリストを返します。技術的には、これらはページの/Annots配列で見つかるすべてのエントリのxrefです。" + +#: ../../page.rst:1893 a5a0cce75c0943078217f1c00674b8c9 +msgid "" +"a list of items *(xref, type)* where type is the annotation type. Use the" +" type to tell apart links, fields and annotations, see " +":ref:`AnnotationTypes`." +msgstr "" +"xref、タイプがアノテーションのタイプであるアイテム *(xref, type)* " +"のリスト。リンク、フィールド、およびアノテーションを区別するためにタイプを使用します。:ref:`AnnotationTypes` " +"を参照してください。" + +#: ../../page.rst:1897 ../../page.rst:1915 2854fc280ceb47b982e13f84dca08c72 +#: 57bb6341ff60410796aabd0615bd3270 +msgid "New in v1.17.1" +msgstr "新機能 v1.17.1" + +#: ../../page.rst:1904 60f61f29857141e09389fc75cdd0e088 +msgid "" +"PDF only: return the annotation identified by *ident*. This may be its " +"unique name (PDF `/NM` key), or its :data:`xref`." +msgstr "PDFのみ:*ident* で識別されるアノテーションを返します。これはその一意の名前(PDF `/NM` キー)またはxrefかもしれません。" + +#: ../../page.rst:1906 67b38a72f9c04b64bb66ea64241948a9 +msgid "the annotation name or xref." +msgstr "アノテーションの名前またはxref。" + +#: ../../page.rst:1909 7a262c80da2142dba6fc49085cdca2a6 +msgid "the annotation or ``None``." +msgstr "アノテーションまたは ``None``。" + +#: ../../page.rst:1911 805fc360e59046df81cbcfe8c29b8096 +msgid "" +"Methods :meth:`Page.annot_names`, :meth:`Page.annot_xrefs` provide lists " +"of names or xrefs, respectively, from where an item may be picked and " +"loaded via this method." +msgstr "" +"メソッド :meth:`Page.annot_names`、:meth:`Page.annot_xrefs` " +"は、アイテムが取得およびこのメソッドを介して読み込まれる名前またはxrefのリストを提供します。" + +#: ../../page.rst:1921 8780cca4e0634efb990c28b20e367329 +msgid "PDF only: return the field identified by :data:`xref`." +msgstr "PDFのみ::data:`xref` で識別されるフィールドを返します。" + +#: ../../page.rst:1923 914a5c9b8e1643329ad85d0c7e4a7ca0 +msgid "the field's xref." +msgstr "フィールドのxref。" + +#: ../../page.rst:1926 d38fefc2959a4768b1584dda7ed9b4a0 +msgid "the field or ``None``." +msgstr "フィールドまたは ``None``。" + +#: ../../page.rst:1928 dc6660eb6ef74ab6a22ceef976a165e8 +msgid "" +"This is similar to the analogous method :meth:`Page.load_annot` -- except" +" that here only the xref is supported as identifier." +msgstr "これはメソッド :meth:`Page.load_annot` と同様ですが、ここでは識別子としてxrefのみがサポートされています。" + +#: ../../page.rst:1932 33d61d3d65074e4fb8add503dab4ff4d +msgid "New in v1.19.6" +msgstr "新機能 v1.19.6" + +#: ../../page.rst:1938 f50d8824983c4a3994c20d5375a05de4 +msgid "Return the first link on a page. Synonym of property :attr:`first_link`." +msgstr "最初のリンクを返します。プロパティ :attr:`first_link` の同義語です。" + +#: ../../page.rst:1940 ../../page.rst:2225 0d2d8e040fd04a768cb811991ae06180 +#: ff75620ea3274e4096e1070d096ff3ac +msgid ":ref:`Link`" +msgstr "" + +#: ../../page.rst:1941 37237376de794ce781b6aac906a17582 +msgid "first link on the page (or ``None``)." +msgstr "ページ上の最初のリンク(または ``None``)。" + +#: ../../page.rst:1948 fe1f4977f63f47c99547c0a1080c79dc +msgid "PDF only: Set the rotation of the page." +msgstr "PDFのみ:ページの回転を設定します。" + +#: ../../page.rst:1950 9e379a1317de4372b79f47c8c7150fcb +msgid "" +"An integer specifying the required rotation in degrees. Must be an " +"integer multiple of 90. Values will be converted to one of 0, 90, 180, " +"270." +msgstr "度数で指定された必要な回転を表す整数。90の整数倍である必要があります。値は0、90、180、270のいずれかに変換されます。" + +#: ../../page.rst:1954 c34897ef582e44d995e004a75d625d61 +#, fuzzy +msgid "PDF only: Change the colorspace components of all objects on page." +msgstr "PDFのみ: ページの表示領域を変更します。" + +#: ../../page.rst:1956 527cc0e7c9794aabb7a723d883d56e42 +msgid "" +"The desired count of color components. Must be one of 1, 3 or 4, which " +"results in color spaces DeviceGray, DeviceRGB or DeviceCMYK respectively." +" The method affects text, images and vector graphics. For instance, with " +"the default value 1, a page will be converted to grayscale. If a page is " +"already grayscale, the method will not cause visible changes -- " +"independent of the value of ``components``." +msgstr "" + +#: ../../page.rst:1958 b2b4968561df4134b86804c0e86146eb +msgid "These changes are **permanent** and cannot be reverted." +msgstr "" + +#: ../../page.rst:1962 8faaf3431e604048bf7ab373e633fb0f +msgid "" +"PDF only: Set page rotation to 0 while maintaining appearance and page " +"content." +msgstr "PDFのみ:外観とページ内容を維持しながらページの回転を0に設定します。" + +#: ../../page.rst:1964 f487132c0602407890443e4f4be1d062 +msgid "" +"The inverted matrix used to achieve this change. If the page was not " +"rotated (rotation 0), :ref:`Identity` is returned. The method " +"automatically recomputes the rectangles of any annotations, links and " +"widgets present on the page. This method may come in handy when e.g. " +"used with :meth:`Page.show_pdf_page`." +msgstr "" +"この変更を実現するために使用される反転した行列。ページが回転していない場合(回転0)、:ref:`Identity` " +"が返されます。メソッドは、ページに存在するアノテーション、リンク、およびウィジェットの矩形を自動的に再計算します" + +#: ../../page.rst:1964 0b610a12deba4ca9a0cef1fc0fcd8ee9 +msgid "" +"The inverted matrix used to achieve this change. If the page was not " +"rotated (rotation 0), :ref:`Identity` is returned. The method " +"automatically recomputes the rectangles of any annotations, links and " +"widgets present on the page." +msgstr "" +"この変更を実現するために使用される反転した行列。ページが回転していない場合(回転0)、:ref:`Identity` " +"が返されます。メソッドは、ページに存在するアノテーション、リンク、およびウィジェットの矩形を自動的に再計算します。" + +#: ../../page.rst:1966 786ba8c828c5424893773c7c91b7f588 +msgid "" +"This method may come in handy when e.g. used with " +":meth:`Page.show_pdf_page`." +msgstr "このメソッドは、たとえば :meth:`Page.show_pdf_page` と一緒に使用する場合に便利です。" + +#: ../../page.rst:1976 a55c2adc8a884012a6ac999de952eb08 +#, fuzzy +msgid "" +"PDF only: Display a page of another PDF as a **vector image** (otherwise " +"similar to :meth:`Page.insert_image`). This is a multi-purpose method. " +"For example, you can use it to:" +msgstr "" +"PDFのみ:別のPDFのページをベクトルイメージとして表示します(それ以外は :meth:`Page.insert_image` " +"に類似)。これは多目的なメソッドです。たとえば、次のようなことに使用できます。" + +#: ../../page.rst:1978 605dfd2041954e059252cb3e52b4a8bd +msgid "" +"create \"n-up\" versions of existing PDF files, combining several input " +"pages into **one output page** (see example `combine.py " +"`_)," +msgstr "" +"既存のPDFファイルの「n-up」バージョンを作成し、複数の入力ページを1つの出力ページに結合します(例: `combine.py " +"`_ を参照)。" + +#: ../../page.rst:1979 c872d3edfaf84c1ca6c7bb36a489d61a +msgid "" +"create \"posterized\" PDF files, i.e. every input page is split up in " +"parts which each create a separate output page (see `posterize.py " +"`_)," +msgstr "" +"「ポスター化」されたPDFファイルを作成します。つまり、各入力ページは別々の出力ページを作成する部分に分割されます(`posterize.py " +"`_ を参照)。" + +#: ../../page.rst:1980 5b2ea4251e0a49b49af4185cd4fa5520 +msgid "" +"include PDF-based vector images like company logos, watermarks, etc., see" +" `svg-logo.py `_, which puts an SVG-based " +"logo on each page (requires additional packages to deal with SVG-to-PDF " +"conversions)." +msgstr "" +"企業のロゴ、透かし画像など、PDFベースのベクトル画像を含めます。`svg-logo.py `_ " +"は、各ページにSVGベースのロゴを配置します(SVGからPDFへの変換を処理するために追加のパッケージが必要です)。" + +#: ../../page.rst:1982 5465d0ecb03849a499c0e18c0fa169de +msgid "" +"where to place the image on current page. Must be finite and its " +"intersection with the page must not be empty." +msgstr "現在のページに画像を配置する場所。有限である必要があり、ページとの交差部分が空でない必要があります。" + +#: ../../page.rst:1983 1c227ce40fc145828629a96e7dbb48d9 +msgid "" +"source PDF document containing the page. Must be a different document " +"object, but may be the same file." +msgstr "ページを含むソースPDFドキュメント。異なるドキュメントオブジェクトである必要がありますが、同じファイルであることもあります。" + +#: ../../page.rst:1986 20704113b4da4d179731bc4cd4d50efe +msgid "page number (0-based, in `-∞ < pno < docsrc.page_count`) to be shown." +msgstr "表示するページ番号(0から始まる、`-∞ < pno < docsrc.page_count`)。指定されたページ。" + +#: ../../page.rst:1988 2d89fa1daf624951b22d8ee2dbb09d6c +msgid "" +"whether to maintain the width-height-ratio (default). If false, all 4 " +"corners are always positioned on the border of the target rectangle -- " +"whatever the rotation value. In general, this will deliver distorted and " +"/or non-rectangular images." +msgstr "幅高さ比率を維持するかどうか(デフォルト)。falseの場合、4つの角は常にターゲット矩形の境界に配置されます(回転値に関係なく)。一般的に、これは歪んだおよび/または非四角形の画像を提供します。" + +#: ../../page.rst:1990 0b7025689ec541c8ad3cbab422ac6c44 +msgid "put image in foreground (default) or background." +msgstr "画像を前景(デフォルト)または背景に配置します。" + +#: ../../page.rst:1992 fe37ba37dc4f421486814c0d60fd61a0 +msgid "" +"(:data:`xref`) make visibility dependent on this :data:`OCG` / " +":data:`OCMD` (which must be defined in the target PDF) [#f9]_. (New in " +"v1.18.3)" +msgstr "" +"*(v1.18.3で新機能)* (:data:`xref`)この :data:`OCG` / :data:`OCMD` " +"(ターゲットPDFで定義されている必要があります)に依存する可視性を作成します [#f9]_。" + +#: ../../page.rst:1993 e0401613a480439684d35d98fb8d6921 +msgid "" +"show the source rectangle rotated by some angle. Any angle is supported " +"(changed in v1.14.11). (New in v1.14.10)" +msgstr "*(v1.14.10で新機能)* ソースの矩形を一定の角度で表示します" + +#: ../../page.rst:1995 1afca8d881af4012a739a00fa83ca2cd +msgid "" +"choose which part of the source page to show. Default is the full page, " +"else must be finite and its intersection with the source page must not be" +" empty." +msgstr "表示するソースページの一部を選択します。デフォルトはフルページですが、有限である必要があり、ソースページとの交差部分が空でない必要があります。" + +#: ../../page.rst:1997 165e7ba9c93e42efb22bc7eb6e632e91 +msgid "" +"In contrast to method :meth:`Document.insert_pdf`, this method does not " +"copy annotations, widgets or links, so these are not included in the " +"target [#f6]_. But all its **other resources (text, images, fonts, " +"etc.)** will be imported into the current PDF. They will therefore appear" +" in text extractions and in :meth:`get_fonts` and :meth:`get_images` " +"lists -- even if they are not contained in the visible area given by " +"*clip*." +msgstr "" +"メソッド :meth:`Document.insert_pdf` " +"とは異なり、このメソッドは注釈、ウィジェット、リンクをコピーしないため、これらは対象に含まれません " +"[#f6]_。ただし、その他のすべてのリソース(テキスト、画像、フォントなど)は現在のPDFにインポートされます。したがって、テキストの抽出と " +":meth:`get_fonts` および :meth:`get_images` リストに表示されます。表示領域に含まれていなくても。" + +#: ../../page.rst:1999 91dcf1cf496547dda9448d7d92de25bd +msgid "Example: Show the same source page, rotated by 90 and by -90 degrees:" +msgstr "例:同じソースページを90度と-90度回転して表示します。" + +#: ../../page.rst:2021 e1a5d31c40634d7ca0a5b13c4c486cd9 +msgid "" +"Changed in v1.14.11: Parameter *reuse_xref* has been deprecated. Position" +" the source rectangle centered in target rectangle. Any rotation angle is" +" now supported." +msgstr "v1.14.11で変更:パラメータreuse_xrefは非推奨となりました。ソースの矩形をターゲットの矩形の中央に配置します。任意の回転角度がサポートされます。" + +#: ../../page.rst:2022 e34ab43ae3344596a1cf974cf096efd8 +msgid "Changed in v1.18.3: New parameter `oc`." +msgstr "v1.18.3で変更:新しいパラメータ `oc` が追加されました。" + +#: ../../page.rst:2028 1f75825b2fa8496f8d7838a79dcb59d1 +msgid "PDF only: Create a new :ref:`Shape` object for the page." +msgstr "PDFのみ:ページ用の新しい :ref:`Shape` オブジェクトを作成します。" + +#: ../../page.rst:2030 448a4d9e36ed4f388fd9281af5fa6163 +msgid ":ref:`Shape`" +msgstr "" + +#: ../../page.rst:2031 2e8e49e396304e859679ff624ea5332f +msgid "a new :ref:`Shape` to use for compound drawings. See description there." +msgstr "複合描画に使用する新しい :ref:`Shape` オブジェクト。詳細はそちらの説明を参照してください。" + +#: ../../page.rst:2042 ae3bfa9248a44159b596cff72b4cb764 +msgid "Search for *needle* on a page. Wrapper for :meth:`TextPage.search`." +msgstr "ページ上で *needle* を検索します。:meth:`TextPage.search` のラッパーです。" + +#: ../../page.rst:2044 561a8b22613b4e0fab9c07fe560b9b39 +msgid "" +"Text to search for. May contain spaces. Upper / lower case is ignored, " +"but only works for ASCII characters: For example, \"COMPÉTENCES\" will " +"not be found if needle is \"compétences\" -- \"compÉtences\" however " +"will. Similar is true for German umlauts and the like." +msgstr "検索対象のテキスト。スペースを含めることができます。大文字/小文字は無視されますが、ASCII文字に対してのみ機能します:たとえば、needleが「COMPÉTENCES」の場合、needleが「compétences」の場合には見つかりません。「compÉtences」の場合は見つかります。同様に、ドイツ語のウムラウトなどにも当てはまります。" + +#: ../../page.rst:2045 a74b6d07b4ca426c9c6a2ff29c389fdc +#, fuzzy +msgid "only search within this area. (New in v1.18.2)" +msgstr "*(v1.18.2で新規追加)* この領域内でのみ検索します。" + +#: ../../page.rst:2046 cc4bceee8bcc400495ae347f1428ca33 +msgid "Return object type :ref:`Quad` instead of :ref:`Rect`." +msgstr "四角形の代わりに :ref:`Quad` オブジェクトのタイプを返します。" + +#: ../../page.rst:2047 5e2cb8a4a4534227890a3a20f7d790fa +msgid "" +"Control the data extracted by the underlying :ref:`TextPage`. By default," +" ligatures and white spaces are kept, and hyphenation [#f8]_ is detected." +msgstr "" +"基本となる :ref:`TextPage` によって抽出されるデータを制御します。デフォルトでは、リガチャと空白を保持し、ハイフン化 [#f8]_" +" が検出されます。" + +#: ../../page.rst:2048 f4f5f3480369432797d931a09ae0bd08 +msgid "" +"use a previously created :ref:`TextPage`. This reduces execution time " +"**significantly.** If specified, the 'flags' and 'clip' arguments are " +"ignored. If omitted, a temporary textpage will be created. (New in " +"v1.19.0)" +msgstr "" +"以前に作成された :ref:`TextPage` (テキストページ)を使用します。これにより、実行時間が大幅に短縮されます。指定された場合、 " +"'flags'および 'clip'引数は無視されます。省略された場合、一時的なテキストページが作成されます。 (v1.19.0で新規)" + +#: ../../page.rst:2052 d915c2467304441f8e652132ac02f5b9 +msgid "" +"A list of :ref:`Rect` or :ref:`Quad` objects, each of which -- " +"**normally!** -- surrounds one occurrence of *needle*. **However:** if " +"parts of *needle* occur on more than one line, then a separate item is " +"generated for each these parts. So, if `needle = \"search string\"`, two " +"rectangles may be generated. |history_begin| Changes in v1.18.2: * " +"There no longer is a limit on the list length (removal of the `hit_max` " +"parameter). * If a word is **hyphenated** at a line break, it will still " +"be found. E.g. the needle \"method\" will be found even if hyphenated as " +"\"meth-od\" at a line break, and two rectangles will be returned: one " +"surrounding \"meth\" (without the hyphen) and another one surrounding " +"\"od\". |history_end|" +msgstr "" + +#: ../../page.rst:2054 77656e09591c4d1f980f7b07d90ee220 +msgid "" +"A list of :ref:`Rect` or :ref:`Quad` objects, each of which -- " +"**normally!** -- surrounds one occurrence of *needle*. **However:** if " +"parts of *needle* occur on more than one line, then a separate item is " +"generated for each these parts. So, if `needle = \"search string\"`, two " +"rectangles may be generated." +msgstr "" +":ref:`Rect` または :ref:`Quad` " +"オブジェクトのリストで、通常、needleの一致を1つ囲みます。ただし、needleの一部が複数の行にまたがる場合、それぞれの部分に対して別のアイテムが生成されます。したがって、`needle" +" = \"search string\"` の場合、2つの四角形が生成される可能性があります。" + +#: ../../page.rst:2058 cdd97cbacd9746f9842747b461ef916d +#, fuzzy +msgid "Changes in v1.18.2:" +msgstr "**v1.18.2の変更点:**" + +#: ../../page.rst:2060 09d4f787268146a6af0c882bd73cec00 +msgid "" +"There no longer is a limit on the list length (removal of the `hit_max` " +"parameter)." +msgstr "リストの長さに制限はもうありません(`hit_max` パラメータの削除)。" + +#: ../../page.rst:2061 69338b4777fe432e9ead49881456e85d +msgid "" +"If a word is **hyphenated** at a line break, it will still be found. E.g." +" the needle \"method\" will be found even if hyphenated as \"meth-od\" at" +" a line break, and two rectangles will be returned: one surrounding " +"\"meth\" (without the hyphen) and another one surrounding \"od\"." +msgstr "" +"単語が行の区切りでハイフン化されている場合でも、検出されます。たとえば、needleが行の区切りで「meth-" +"od」としてハイフン化されていても、「method」としてハイフン化されていない部分を囲む1つの四角形と、「od」としてハイフン化されていない部分を囲む別の四角形が返されます。" + +#: ../../page.rst:2065 afad5936869b45ce8fada25fae4af13f +msgid "" +"The method supports multi-line text marker annotations: you can use the " +"full returned list as **one single** parameter for creating the " +"annotation." +msgstr "このメソッドは、複数行のテキストマーカーアノテーションをサポートしており、返されたリスト全体を1つのパラメータとして使用してアノテーションを作成できます。" + +#: ../../page.rst:2069 ca7c4e5131f7455a8d09e18eeb7866e4 +msgid "" +"There is a tricky aspect: the search logic regards **contiguous multiple " +"occurrences** of *needle* as one: assuming *needle* is \"abc\", and the " +"page contains \"abc\" and \"abcabc\", then only **two** rectangles will " +"be returned, one for \"abc\", and a second one for \"abcabc\"." +msgstr "" +"トリッキーな側面があります。検索ロジックは、連続した複数の *needle* の出現を1つと見なします。つまり、*needle* " +"が「abc」で、ページに「abc」と「abcabc」が含まれている場合、2つの矩形のみが返され、1つは「abc」に、もう1つは「abcabc」になります。" + +#: ../../page.rst:2070 a4ab315bd8484981b8e5859e86e06dc8 +msgid "" +"You can always use :meth:`Page.get_textbox` to check what text actually " +"is being surrounded by each rectangle." +msgstr "常に :meth:`Page.get_textbox` を使用して、各矩形で実際に囲まれているテキストを確認できます。" + +#: ../../page.rst:2072 2f59c4bd6c354c979632dcdffc850183 +msgid "" +"A feature repeatedly asked for is supporting **regular expressions** when" +" specifying the `\"needle\"` string: **There is no way to do this.** If " +"you need something in that direction, first extract text in the desired " +"format and then subselect the result by matching with some regex pattern." +" Here is an example for matching words::" +msgstr "" +"`\"needle\"` " +"文字列を指定する際に正規表現をサポートする機能が何度も要求されていますが、これを行う方法はありません。この方向性の何かが必要な場合は、まず希望の形式でテキストを抽出し、それを正規表現パターンと一致させて結果をサブセレクトしてください。単語を一致させる例を以下に示します。" + +#: ../../page.rst:2078 d96b1e2b4cd04bf190f0253c6e6bbee2 +msgid "" +"The `matches` list will contain the words matching the given pattern. In " +"the same way you can select `span[\"text\"]` from the output of " +"`page.get_text(\"dict\")`." +msgstr "" +"`matches` リストには、指定されたパターンに一致する単語が含まれます。同様の方法で、`page.get_text(\"dict\")` " +"の出力から `span[\"text\"]` を選択できます。" + +#: ../../page.rst:2082 b7a76108e8fd4fd39baf37a8d5756c85 +msgid "" +"Changed in v1.18.2: added `clip` parameter. Remove `hit_max` parameter. " +"Add default \"dehyphenate\"." +msgstr "v1.18.2で変更:`clip` パラメータを追加。`hit_max` パラメータを削除。デフォルトの「デハイフェネート」を追加。" + +#: ../../page.rst:2083 5a3460e1977445b1ad77eab57ea183c0 +msgid "Changed in v1.19.0: added `textpage` parameter." +msgstr "v1.19.0で変更:`textpage` パラメータを追加。" + +#: ../../page.rst:2090 5052dee6a0e746a1b38d4abab9f078fc +msgid "" +"PDF only: Change the physical page dimension by setting :data:`mediabox` " +"in the page's object definition." +msgstr "PDFのみ: ページのオブジェクト定義内で :data:`mediabox` を設定することにより、物理ページの寸法を変更します。" + +#: ../../page.rst:2092 8bfd058d5d0d4593b0d0e5474dd55b15 +msgid "the new :data:`mediabox` value." +msgstr "新しい :data:`mediabox` の値。" + +#: ../../page.rst:2094 e2cbffaad0654201b64464a89d720253 +msgid "" +"This method also removes the page's other (optional) rectangles " +"(:data:`cropbox`, ArtBox, TrimBox and Bleedbox) to prevent inconsistent " +"situations. This will cause those to assume their default values." +msgstr "このメソッドは、ページの他の(オプションの)矩形(:data:`cropbox`、ArtBox、TrimBox、Bleedbox)も削除し、一貫性のない状況を防ぐためにそれらをデフォルト値に戻します。" + +#: ../../page.rst:2096 a2958ab783fb4b0b9c2c379a48d1d301 +msgid "" +"For non-empty pages this may have undesired effects, because the location" +" of all content depends on this value and will therefore change position " +"or even disappear." +msgstr "これにより、非空のページでは望ましくない効果が発生する可能性があるため、すべてのコンテンツの位置がこの値に依存し、したがって位置が変わるか、完全に消える可能性があります。" + +#: ../../page.rst:2100 d8769a93b9b54728a06f34c303a15b32 +msgid "New in v1.16.13" +msgstr "v1.16.13で新規追加" + +#: ../../page.rst:2101 7ae9c86319004190b34044348164ecfc +msgid "Changed in v1.19.4: remove all other rectangle definitions." +msgstr "v1.19.4で変更: 他のすべての矩形定義を削除しました。" + +#: ../../page.rst:2108 7ba1cd4d86ec4c01983f0067f7c51e34 +msgid "PDF only: change the visible part of the page." +msgstr "PDFのみ: ページの表示領域を変更します。" + +#: ../../page.rst:2110 c42f5429f28a46b9a8eb0ed655c4e2ae +msgid "" +"the new visible area of the page. Note that this **must** be specified in" +" **unrotated coordinates**, not empty, nor infinite and be completely " +"contained in the :attr:`Page.mediabox`." +msgstr "" +"ページの新しい表示領域。これは回転していない座標で指定する必要があり、空ではなく、無限ではなく、:attr:`Page.mediabox` " +"に完全に含まれている必要があります。" + +#: ../../page.rst:2112 dd15508eaae2499d8d7c81145ba2b252 +msgid "" +"After execution **(if the page is not rotated)**, :attr:`Page.rect` will " +"equal this rectangle, but be shifted to the top-left position (0, 0) if " +"necessary. Example session:" +msgstr "" +"実行後(ページが回転していない場合)、:attr:`Page.rect` " +"はこの矩形と等しくなりますが、必要に応じて左上の位置(0、0)にシフトされます。以下は例セッションです:" + +#: ../../page.rst:2143 cc950b3dfa33495eb30fa27778dfb205 +msgid "" +"PDF only: Set the resp. rectangle in the page object. For the meaning of " +"these objects see :ref:`AdobeManual`, page 77. Parameter and restrictions" +" are the same as for :meth:`Page.set_cropbox`." +msgstr "" +"PDFのみ: ページオブジェクト内の対応する矩形を設定します。これらのオブジェクトの意味については、:ref:`AdobeManual` " +"の77ページを参照してください。パラメータと制約は :meth:`Page.set_cropbox` と同じです。" + +#: ../../page.rst:2147 be83b97e21e24302b50e38b065f0de37 +msgid "New in v1.19.4" +msgstr "v1.19.4で新規追加" + +#: ../../page.rst:2153 bf9fbfb5f3d548d19aae0c10a8918ffa +msgid "" +"Contains the rotation of the page in degrees (always 0 for non-PDF " +"types). This is a copy of the value in the PDF file. The PDF " +"documentation says:" +msgstr "ページの回転角度を度数で含みます(非PDFタイプでは常に0)。" + +#: ../../page.rst:2155 593ae6fa82f848c7a753dba02c68d0ab +msgid "" +"*\"The number of degrees by which the page should be rotated clockwise " +"when displayed or printed. The value must be a multiple of 90. Default " +"value: 0.\"*" +msgstr "*「ページを表示または印刷する際に、時計回りに回転させる度数。値は90の倍数でなければなりません。デフォルト値:0」* " + +#: ../../page.rst:2157 b25012b1d7b647fb95377e29f135d955 +msgid "" +"In PyMuPDF, we make sure that this attribute is always one of 0, 90, 180 " +"or 270." +msgstr "PyMuPDFでは、この属性が常に0、90、180、または270のいずれかであることを確認しています。" + +#: ../../page.rst:2163 a420975fc5304c84b4311ea7438f2838 +msgid "" +"Contains the top-left point of the page's `/CropBox` for a PDF, otherwise" +" *Point(0, 0)*." +msgstr "PDFの場合、ページの `/CropBox` の左上の点を含みます。それ以外の場合は *Point(0, 0)*。" + +#: ../../page.rst:2165 ../../page.rst:2189 14b42064952e495f86517d524fb9bf7d +#: a1e439e66ea94339a4584e5d84acfceb +msgid ":ref:`Point`" +msgstr "" + +#: ../../page.rst:2169 d7a7ecfe84a64514a731bcaf26fb9f4b +msgid "" +"The page's `/CropBox` for a PDF. Always the **unrotated** page rectangle " +"is returned. For a non-PDF this will always equal the page rectangle." +msgstr "" +"PDFのページの `/CropBox` です。常に **回転していない** " +"ページの矩形が返されます。非PDFの場合、これは常にページの矩形と等しいです。" + +#: ../../page.rst:2171 00ff97efefe14656a544e0787ea997c2 +msgid "" +"In PDF, the relationship between `/MediaBox`, `/CropBox` and page " +"rectangle may sometimes be confusing, please do lookup the glossary for " +":data:`MediaBox`." +msgstr "" +"PDFでは、`/MediaBox`、`/CropBox`、およびページの矩形の関係は混乱することがあります。:data:`MediaBox` " +"の用語集を参照してください。" + +#: ../../page.rst:2181 3fe46310939e47b2ae6afd93da61ddef +msgid "" +"The page's `/ArtBox`, `/BleedBox`, `/TrimBox`, respectively. If not " +"provided, defaulting to :attr:`Page.cropbox`." +msgstr "" +"PDFのページの/ArtBox、/BleedBox、/TrimBoxです。指定されていない場合、:attr:`Page.cropbox` " +"にデフォルトで設定されます。" + +#: ../../page.rst:2187 edac8cd9d72347c2bddffd9f0bea44e7 +msgid "" +"Contains the width and height of the page's :attr:`Page.mediabox` for a " +"PDF, otherwise the bottom-right coordinates of :attr:`Page.rect`." +msgstr "" +"PDFのページの :attr:`Page.mediabox` の幅と高さを含みます。それ以外の場合、:attr:`Page.rect` " +"の右下の座標です。" + +#: ../../page.rst:2193 bb0f8c84a70647398ac8d07b1b07a718 +msgid "The page's :data:`mediabox` for a PDF, otherwise :attr:`Page.rect`." +msgstr "PDFのページの :data:`mediabox`、それ以外の場合は :attr:`Page.rect` です。" + +#: ../../page.rst:2197 84f1b8e8aec84fbdaa06100cba90416e +msgid "" +"For most PDF documents and for **all other document types**, `page.rect " +"== page.cropbox == page.mediabox` is true. However, for some PDFs the " +"visible page is a true subset of :data:`mediabox`. Also, if the page is " +"rotated, its `Page.rect` may not equal `Page.cropbox`. In these cases the" +" above attributes help to correctly locate page elements." +msgstr "" +"ほとんどのPDF文書および他のすべてのドキュメントタイプに対して、`page.rect == page.cropbox == " +"page.mediabox` が真です。ただし、一部のPDFでは、表示ページが v " +"の真の部分集合である場合があります。また、ページが回転している場合、`Page.rect` は `Page.cropbox` " +"と等しくないかもしれません。これらの場合、上記の属性はページの要素を正しく位置付けるのに役立ちます。" + +#: ../../page.rst:2201 ffcef36971a148b38bb9a735082a427f +msgid "" +"This matrix translates coordinates from the PDF space to the MuPDF space." +" For example, in PDF `/Rect [x0 y0 x1 y1]` the pair (x0, y0) specifies " +"the **bottom-left** point of the rectangle -- in contrast to MuPDF's " +"system, where (x0, y0) specify top-left. Multiplying the PDF coordinates " +"with this matrix will deliver the (Py-) MuPDF rectangle version. " +"Obviously, the inverse matrix will again yield the PDF rectangle." +msgstr "" +"この行列は、PDF空間からMuPDF空間への座標の変換に使用されます。たとえば、PDFの `/Rect [x0 y0 x1 y1]` " +"では、ペア(x0、y0)が矩形の左下の点を指定します。これはMuPDFのシステムとは異なり、ここでは(x0、y0)は左上を指定します。PDF座標をこの行列で掛け算すると、(Py-)MuPDF矩形バージョンが得られます。明らかに、逆行列は再びPDF矩形を返します。" + +#: ../../page.rst:2203 ../../page.rst:2219 9c48ca4e33ac45efacad2e7435706737 +#: eb6073a66b17463dbfbbca88a268af92 +msgid ":ref:`Matrix`" +msgstr "" + +#: ../../page.rst:2209 a878eb1463294598b1c5c285f2c1844c +msgid "" +"These matrices may be used for dealing with rotated PDF pages. When " +"adding / inserting anything to a PDF page, the coordinates of the " +"**unrotated** page are always used. These matrices help translating " +"between the two states. Example: if a page is rotated by 90 degrees -- " +"what would then be the coordinates of the top-left Point(0, 0) of an A4 " +"page?" +msgstr "" +"これらの行列は、回転したPDFページの取り扱いに使用できます。PDFページに何かを追加/挿入する際、**回転していない** " +"ページの座標が常に使用されます。これらの行列は、2つの状態間での変換を支援します。例:ページが90度回転した場合、A4ページの左上のPoint(0," +" 0)の座標は何になりますか?" + +#: ../../page.rst:2223 00863695e6124d6cba58ee5e6628a66d +msgid "Contains the first :ref:`Link` of a page (or ``None``)." +msgstr "ページの最初の :ref:`Link` を含みます(または ``None``)。" + +#: ../../page.rst:2229 7c3edf6a8463452683307122e26a0048 +msgid "Contains the first :ref:`Annot` of a page (or ``None``)." +msgstr "ページの最初の :ref:`Annot` を含みます(または ``None``)。" + +#: ../../page.rst:2235 f9288c63a9544b60863128ddc0e278c9 +msgid "Contains the first :ref:`Widget` of a page (or ``None``)." +msgstr "ページの最初の :ref:`Widget` を含みます(または ``None``)。" + +#: ../../page.rst:2241 c5a61c437e804ce98569fae44f541bb3 +msgid "The page number." +msgstr "ページ番号。" + +#: ../../page.rst:2247 a32f1b5ed58542fdbf98aec4d2bc782a +msgid "The owning document object." +msgstr "所属するドキュメントオブジェクト。" + +#: ../../page.rst:2249 56a8b14cf1e14ae180ffde01047fadc8 +msgid ":ref:`Document`" +msgstr "" + +#: ../../page.rst:2254 6074ff47c8244957a9f82c80e79b7a77 +msgid "" +"Contains the rectangle of the page. Same as result of " +":meth:`Page.bound()`." +msgstr "ページの矩形を含みます。:meth:`Page.bound()` の結果と同じです。" + +#: ../../page.rst:2260 860e65f83b5a4fe88ae89093d4988c2c +msgid "The page's PDF :data:`xref`. Zero if not a PDF." +msgstr "ページのPDF :data:`xref`。PDFでない場合はゼロです。" + +#: ../../page.rst:2269 6d139a4bb5274ecc80cf0a71b09dad34 +msgid "Description of *get_links()* Entries" +msgstr "*get_links()* エントリの説明" + +#: ../../page.rst:2270 2f3db543ce7349c88fc7f283957eb869 +msgid "" +"Each entry of the :meth:`Page.get_links` list is a dictionary with the " +"following keys:" +msgstr ":meth:`Page.get_links` リストの各エントリは、以下のキーを持つ辞書です:" + +#: ../../page.rst:2272 03738b38c13c40ecb311b8775f9ede72 +msgid "" +"*kind*: (required) an integer indicating the kind of link. This is one " +"of *LINK_NONE*, *LINK_GOTO*, *LINK_GOTOR*, *LINK_LAUNCH*, or *LINK_URI*. " +"For values and meaning of these names refer to :ref:`linkDest Kinds`." +msgstr "" +"*kind*:(必須)リンクの種類を示す整数。*LINK_NONE*、*LINK_GOTO*、*LINK_GOTOR*、*LINK_LAUNCH*、または" +" *LINK_URI* のいずれかです。" + +#: ../../page.rst:2274 03b31ab2794f4c1d801a0d3ff2d468b1 +msgid "" +"*from*: (required) a :ref:`Rect` describing the \"hot spot\" location on" +" the page's visible representation (where the cursor changes to a hand " +"image, usually)." +msgstr "" +"*from*:(必須)ページの可視な表現上の「ホットスポット」の場所を示す :ref:`Rect` " +"(通常、カーソルが手のイメージに変わる場所です)。" + +#: ../../page.rst:2276 2e837820055d4bb8af5be74dc8193389 +msgid "" +"*page*: a 0-based integer indicating the destination page. Required for " +"*LINK_GOTO* and *LINK_GOTOR*, else ignored." +msgstr "page:宛先ページを示す0ベースの整数。*LINK_GOTO*および *LINK_GOTOR* の場合に必要ですが、それ以外の場合は無視されます。" + +#: ../../page.rst:2278 dce39dda894c43e3b2c4670c7306e0ae +msgid "" +"*to*: either a *pymupdf.Point*, specifying the destination location on " +"the provided page, default is *pymupdf.Point(0, 0)*, or a symbolic " +"(indirect) name. If an indirect name is specified, *page = -1* is " +"required and the name must be defined in the PDF in order for this to " +"work. Required for *LINK_GOTO* and *LINK_GOTOR*, else ignored." +msgstr "" +"*to*:宛先ページ上の宛先場所を指定する *pymupdf.Point*、デフォルトは *pymupdf.Point(0, " +"0)*、またはシンボリック(間接)名です。間接名が指定された場合、*page = -1* " +"が必要で、名前はPDFで定義されている必要があります。*LINK_GOTO* および *LINK_GOTOR* " +"の場合に必要ですが、それ以外の場合は無視されます。" + +#: ../../page.rst:2280 1b901ce50408445883b29e399681fcd4 +msgid "" +"*file*: a string specifying the destination file. Required for " +"*LINK_GOTOR* and *LINK_LAUNCH*, else ignored." +msgstr "file:宛先ファイルを指定する文字列。LINK_GOTORおよびLINK_LAUNCHの場合に必要ですが、それ以外の場合は無視されます。" + +#: ../../page.rst:2282 45804997184e4049a34060e0bb1bf783 +msgid "" +"*uri*: a string specifying the destination internet resource. Required " +"for *LINK_URI*, else ignored. You should make sure to start this string " +"with an unambiguous substring, that classifies the subtype of the URL, " +"like `\"http://\"`, `\"https://\"`, `\"file://\"`, `\"ftp://\"`, " +"`\"mailto:\"`, etc. Otherwise your browser will try to interpret the text" +" and come to unwanted / unexpected conclusions about the intended URL " +"type." +msgstr "" +"*uri*: LINK_URI用に指定された、インターネットリソースの宛先を示す文字列。*LINK_URI* " +"の場合に必要で、それ以外の場合は無視されます。この文字列は、`\"http://\"`、`\"https://\"`、`\"file://\"`、`\"ftp://\"`、`\"mailto:\"`" +" " +"など、URLのサブタイプを識別する明確なサブストリングで始めるようにしてください。そうしないと、ブラウザがテキストを解釈し、意図しない/予期しない結論に達する可能性があります。" + +#: ../../page.rst:2284 f1d4c1775f844474ba8f468712d8c8c8 +msgid "" +":data:`xref`: an integer specifying the PDF :data:`xref` of the link " +"object. Do not change this entry in any way. Required for link deletion " +"and update, otherwise ignored. For non-PDF documents, this entry contains" +" *-1*. It is also *-1* for **all** entries in the *get_links()* list, if " +"**any** of the links is not supported by MuPDF - see " +":ref:`notes_on_supporting_links`." +msgstr "" +":data:`xref`: リンクオブジェクトのPDF :data:`xref` " +"を指定する整数。このエントリを何らかの方法で変更しないでください。リンクの削除と更新に必要で、それ以外の場合は無視されます。非PDFドキュメントの場合、このエントリには" +" *-1* が含まれます。また、MuPDFがサポートしていないリンクがある場合、*get_links()* " +"リストのすべてのエントリに対しても-1になります。詳細については以下の注釈を参照してください。" + +#: ../../page.rst:2289 a0a340eff9134da98b3a440610ab37fd +msgid "Notes on Supporting Links" +msgstr "リンクのサポートに関する注記" + +#: ../../page.rst:2290 2b1ffa9e5c5a4e339bbe59d6c18661ed +msgid "" +"MuPDF's support for links has changed in **v1.10a**. These changes affect" +" link types :data:`LINK_GOTO` and :data:`LINK_GOTOR`." +msgstr "" +"**v1.10a** 以降、MuPDFのリンクサポートが変更されました。これらの変更は :data:`LINK_GOTO` および " +":data:`LINK_GOTOR` というリンクタイプに影響を与えます。" + +#: ../../page.rst:2293 e605547c21b540dfa11ed9f44cafb707 +msgid "" +"Reading (pertains to method *get_links()* and the *first_link* property " +"chain)" +msgstr "読み取り *get_links()* メソッドおよび*first_link* プロパティチェーンに関連)" + +#: ../../page.rst:2295 aa4ccddb768b43429d798d7b48ce0377 +msgid "" +"If MuPDF detects a link to another file, it will supply either a " +"*LINK_GOTOR* or a *LINK_LAUNCH* link kind. In case of *LINK_GOTOR* " +"destination details may either be given as page number (eventually " +"including position information), or as an indirect destination." +msgstr "" +"MuPDFが別のファイルへのリンクを検出する場合、*LINK_GOTOR* または *LINK_LAUNCH* " +"リンクの種類を提供します。*LINK_GOTOR* " +"の場合、宛先の詳細はページ番号(位置情報を含むことがある)または間接的な宛先として指定できます。" + +#: ../../page.rst:2297 250ef4abe3404260ac50f121a9956dbf +msgid "" +"If an indirect destination is given, then this is indicated by *page = " +"-1*, and *link.dest.dest* will contain this name. The dictionaries in the" +" *get_links()* list will contain this information as the *to* value." +msgstr "" +"間接的な宛先が指定された場合、page = -1で示され、*link.dest.dest* にこの名前が含まれます。 *get_links()* " +"リスト内の辞書には、この情報がto値として含まれます。" + +#: ../../page.rst:2299 298a64a6454e491fbeb1b9965b58c5f9 +msgid "" +"**Internal links are always** of kind *LINK_GOTO*. If an internal link " +"specifies an indirect destination, it **will always be resolved** and the" +" resulting direct destination will be returned. Names are **never " +"returned for internal links**, and undefined destinations will cause the " +"link to be ignored." +msgstr "内部リンクは常にLINK_GOTOの種類です。内部リンクが間接的な宛先を指定した場合、常に解決され、結果の直接的な宛先が返されます。内部リンクには名前は返されず、未定義の宛先はリンクが無視される原因になります。" + +#: ../../page.rst:2302 1b529c20ff1d4c0687b091a553627f85 +msgid "Writing" +msgstr "書き込み" + +#: ../../page.rst:2304 038c28789f044556b4134db72f888939 +msgid "" +"PyMuPDF writes (updates, inserts) links by constructing and writing the " +"appropriate PDF object **source**. This makes it possible to specify " +"indirect destinations for *LINK_GOTOR* **and** *LINK_GOTO* link kinds " +"(pre *PDF 1.2* file formats are **not supported**)." +msgstr "" +"PyMuPDFはリンクを構築して適切なPDFオブジェクト **ソース** を書き込むことにより、リンクを書き込み(更新、挿入)します。これにより、" +" *LINK_GOTOR* および *LINK_GOTO* リンク種別に対して間接的な宛先を指定できます(**PDF 1.2** " +"ファイル形式以前はサポートされていません)。" + +#: ../../page.rst:2306 48933c3fc42d43febf79d2a54cced857 +msgid "" +"If a *LINK_GOTO* indirect destination specifies an undefined name, this " +"link can later on not be found / read again with MuPDF / PyMuPDF. Other " +"readers however **will** detect it, but flag it as erroneous." +msgstr "" +"*LINK_GOTO* の間接的な宛先が未定義の名前を指定した場合、このリンクは後でMuPDF / " +"PyMuPDFで再び見つけることはできません。ただし、他のリーダーはそれを検出し、エラーとしてフラグ付けます。" + +#: ../../page.rst:2308 5a5898a48b284c999670d3b92fe1c0b8 +msgid "" +"Indirect *LINK_GOTOR* destinations can in general of course not be " +"checked for validity and are therefore **always accepted**." +msgstr "一般的な注意: 間接的な *LINK_GOTOR* の宛先は一般的に有効性を確認できないため、常に受け入れられます。" + +#: ../../page.rst:2310 206140c8e16346429b497db8f7ecc4b0 +msgid "" +"**Example: How to insert a link pointing to another page in the same " +"document**" +msgstr "**例: 同じドキュメント内の別のページを指すリンクを挿入する方法**" + +#: ../../page.rst:2312 ce035c3ccbc04027b704a1be3baab857 +msgid "" +"Determine the rectangle on the current page, where the link should be " +"placed. This may be the bbox of an image or some text." +msgstr "リンクを配置する現在のページの矩形を決定します。これは画像または一部のテキストのbboxである場合があります。" + +#: ../../page.rst:2314 17034ec326d24de19f8f569f04502acd +msgid "" +"Determine the target page number (\"pno\", 0-based) and a :ref:`Point` on" +" it, where the link should be directed to." +msgstr "ターゲットページ番号(0から始まる)と、リンクを指定するためのそのページ上の :ref:`Point` を決定します。" + +#: ../../page.rst:2316 891ee313cd244aba8c84cc9388e8bed9 +msgid "" +"Create a dictionary `d = {\"kind\": pymupdf.LINK_GOTO, \"page\": pno, " +"\"from\": bbox, \"to\": point}`." +msgstr "" +"辞書 `d = {\"kind\": pymupdf.LINK_GOTO, \"page\": pno, \"from\": bbox, " +"\"to\": point}` を作成します。" + +#: ../../page.rst:2318 35505ce21ebb4b94b4b66f1d61a78619 +msgid "Execute `page.insert_link(d)`." +msgstr "page.insert_link(d)を実行します。" + +#: ../../page.rst:2322 95a3bb206a1c46a58ff4b1108cfe3b07 +msgid "Homologous Methods of :ref:`Document` and :ref:`Page`" +msgstr ":ref:`Document` と :ref:`Page` の同様のメソッドに関する説明です。" + +#: ../../page.rst:2323 5a4a99de9f6a46fc8ca01c4a18782712 +msgid "" +"This is an overview of homologous methods on the :ref:`Document` and on " +"the :ref:`Page` level." +msgstr "これは、:ref:`Document` と :ref:`Page` レベルでの同様のメソッドの概要です。" + +#: ../../page.rst:2326 4837b7e49e7840229a2cbee44724f774 +msgid "**Document Level**" +msgstr "**Document Level(ドキュメントレベル)**" + +#: ../../page.rst:2326 bb0c90432beb487aaec3b85fd8cbc5c4 +msgid "**Page Level**" +msgstr "**Page Level(ページレベル)**" + +#: ../../page.rst:2328 2d0a3d11246749ec86131260bee9feec +msgid "*Document.get_page_fonts(pno)*" +msgstr "" + +#: ../../page.rst:2329 d11b933611e54b058edce56da847b5a7 +msgid "*Document.get_page_images(pno)*" +msgstr "" + +#: ../../page.rst:2330 059e7f23d9eb4a86ab24dd99863bf354 +msgid "*Document.get_page_pixmap(pno, ...)*" +msgstr "" + +#: ../../page.rst:2331 2cadf132cee3443fbf711668c1fa6b4e +msgid "*Document.get_page_text(pno, ...)*" +msgstr "" + +#: ../../page.rst:2332 258818fb4e74478fbb4538b405bf2a9c +msgid "*Document.search_page_for(pno, ...)*" +msgstr "" + +#: ../../page.rst:2335 0f67d85503cc449b89d3c0d1161f1e63 +msgid "The page number \"pno\" is a 0-based integer `-∞ < pno < page_count`." +msgstr "ページ番号「pno」は0から始まる整数であり、`-∞ < pno < page_count` です。" + +#: ../../page.rst:2339 88eab71df0714ee485f28f425f71657e +msgid "" +"Most document methods (left column) exist for convenience reasons, and " +"are just wrappers for: *Document[pno].*. So they **load and " +"discard the page** on each execution." +msgstr "" +"多くのドキュメントメソッド(左側の列)は利便性のために存在し、*Document[pno].* " +"のラッパーであるだけで、各実行でページを読み込んで破棄します。" + +#: ../../page.rst:2341 e3550b328290420c9cb06a736000111a +msgid "" +"However, the first two methods work differently. They only need a page's " +"object definition statement - the page itself will **not** be loaded. So " +"e.g. :meth:`Page.get_fonts` is a wrapper the other way round and defined " +"as follows: *page.get_fonts == page.parent.get_page_fonts(page.number)*." +msgstr "" +"ただし、最初の2つのメソッドは異なる方法で動作します。これらはページのオブジェクト定義ステートメントだけを必要とし、ページ自体は読み込まれません。例えば、:meth:`Page.get_fonts`" +" は逆に定義され、次のようになります: *page.get_fonts == " +"page.parent.get_page_fonts(page.number)*。" + +#: ../../page.rst:2344 05d1a718ae2d4d59ab848e33409726e4 +msgid "Footnotes" +msgstr "脚注" + +#: ../../page.rst:2345 e15be7f2fd234149b3e60381e98fac47 +msgid "" +"If your existing code already uses the installed base name as a font " +"reference (as it was supported by PyMuPDF versions earlier than 1.14), " +"this will continue to work." +msgstr "既存のコードがフォントの参照としてインストール済みのベース名を使用している場合(これはPyMuPDFバージョン1.14以前でサポートされていました)、これは引き続き機能します。" + +#: ../../page.rst:2347 58bef5078de14949addbcf32c02d2b3d +msgid "" +"Not all PDF reader software (including internet browsers and office " +"software) display all of these fonts. And if they do, the difference " +"between the **serifed** and the **non-serifed** version may hardly be " +"noticeable. But serifed and non-serifed versions lead to different " +"installed base fonts, thus providing an option to be displayable with " +"your specific PDF viewer." +msgstr "PDFリーダーソフトウェア(インターネットブラウザやオフィスソフトウェアを含む)がすべてのこれらのフォントを表示するわけではありません。そして、表示されても、セリフ付きとセリフなしバージョンの違いはほとんど気付かれないかもしれません。ただし、セリフ付きとセリフなしバージョンは異なるインストール済みベースのフォントにリードするため、特定のPDFビューアで表示可能なオプションが提供されます。" + +#: ../../page.rst:2349 e7925013b16b48198bba6f2a828f75c8 +msgid "" +"Not all PDF readers display these fonts at all. Some others do, but use a" +" wrong character spacing, etc." +msgstr "すべてのPDFリーダーがこれらのフォントを表示するわけではありません。一部の他のソフトウェアは表示するかもしれませんが、文字間隔が間違っているなどの問題が発生する場合があります。" + +#: ../../page.rst:2351 bf496c2253e84ed280ba206ddc534300 +msgid "" +"You are generally free to choose any of the :ref:`mupdficons` you " +"consider adequate." +msgstr "適切と思われるMuPDFの注釈アイコンのいずれを選択することは自由です。" + +#: ../../page.rst:2353 2dcee6d5f66e46d6aa8e90828fd8a02a +msgid "" +"The previous algorithm caused images to be **shrunk** to this " +"intersection. Now the image can be anywhere on :attr:`Page.mediabox`, " +"potentially being invisible or only partially visible if the cropbox " +"(representing the visible page part) is smaller." +msgstr "" +"以前のアルゴリズムでは、画像がこの交差点に縮小されることがありました。今では画像は :attr:`Page.mediabox` " +"のどこにでも配置でき、cropbox(表示ページ部分を表す)が小さい場合、画像は見えないか部分的にしか見えない可能性があります。" + +#: ../../page.rst:2355 a6cb78d77d014c9e89386abcc9878c38 +msgid "" +"If you need to also see annotations or fields in the target page, you can" +" convert the source PDF using :meth:`Document.bake`. The underlying MuPDF" +" function of that method will convert these objects to normal page " +"content. Then use :meth:`Page.show_pdf_page` with the converted PDF page." +msgstr "" +"ターゲットページで注釈やフィールドも表示する必要がある場合、ソースPDFを別のPDFに変換して :meth:`Document.bake` " +"を使用してみることができます。そのメソッドの基本となるMuPDFの機能は、これらのオブジェクトを通常のページコンテンツに変換します。その後、変換されたPDFページを使用して" +" :meth:`Page.show_pdf_page` を使用します。" + +#: ../../page.rst:2357 d053987c44a74feeb4ea14fd1db9fd0a +msgid "" +"In PDF, an area enclosed by some lines or curves can have a property " +"called \"orientation\". This is significant for switching on or off the " +"fill color of that area when there exist multiple area overlaps - see " +"discussion in method :meth:`Shape.finish` using the \"non-zero winding " +"number\" rule. While orientation of curves, quads, triangles and other " +"shapes enclosed by lines always was detectable, this has been impossible " +"for \"re\" (rectangle) items in the past. Adding the orientation " +"parameter now delivers the missing information." +msgstr "" +"PDFにおいて、いくつかの線や曲線で囲まれた領域は、「方向性(orientation)」と呼ばれるプロパティを持つことがあります。これは、複数の領域が重なる場合にその領域の塗りつぶし色をオンまたはオフに切り替える際に重要です。この方向性は「非ゼロの巡回数" +"(non-zero winding number)ルール」を使用した :meth:`Shape.finish` " +"メソッドの議論で詳しく説明されています。曲線、四角形、三角形などの線で囲まれた形状の方向性は常に検出可能でしたが、これまでは「re」(長方形)アイテムに対しては不可能でした。orientationパラメータを追加することで、この不足していた情報を提供できるようになりました。" + +#: ../../page.rst:2359 e95c484cd25c417782b03f2f75c9867c +msgid "" +"Hyphenation detection simply means that if the last character of a line " +"is \"-\", it will be assumed to be a continuation character. That " +"character will not be found by text searching with its default flag " +"setting. Please take note, that a MuPDF *line* may not always be what you" +" expect: words separated by overly large gaps (e.g. caused by text " +"justification) may constitute separate MuPDF lines. If then any of these " +"words ends with a hyphen, it will only be found by text searching if " +"hyphenation is switched off." +msgstr "" +"ハイフネーション検出とは、行の最後の文字が「-」である場合、それが連続文字であると仮定することを意味します。この文字は、デフォルトのフラグ設定を使用してテキスト検索されないでしょう。MuPDFの行が常に期待どおりであるわけではないことに注意してください:文字の間隔が非常に大きい(テキストの正当化によって引き起こされる場合など)、単語が別々のMuPDFの" +" *line* " +"を構成する可能性があります。その後、これらの単語のいずれかがハイフンで終わる場合、ハイフネーションがオフになっていない限り、テキスト検索でのみ見つかるでしょう。" + +#: ../../page.rst:2361 0910e3da442545cea526ff2e404e88b2 +msgid "" +"Objects inside the source page, like images, text or drawings, are never " +"aware of whether their owning page now is under OC control inside the " +"target PDF. If source page objects are OC-controlled in the source PDF, " +"then this will not be retained on the target: they will become " +"unconditionally visible." +msgstr "ソースページ内のオブジェクト、例えば画像、テキスト、または図面などは、所有ページがターゲットPDF内でOC(オプションコンテンツ)制御下にあるかどうかを認識することは決してありません。ソースページのオブジェクトがソースPDF内でOCに制御されている場合、これはターゲットに保持されないため、それらは無条件に表示されます。" + +#: ../../footer.rst:60 6b5dfc62c0974e1db350511c1379b041 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "image in memory (all formats supported" +#~ " by MuPDF -- see :ref:`ImageFiles`). " +#~ "Changed in v1.14.13: *io.BytesIO* is now" +#~ " also supported." +#~ msgstr "" + +#~ msgid "Changed in v1.14.13: *io.BytesIO* is now also supported." +#~ msgstr "v1.14.13で変更:*io.BytesIO* もサポートされるようになりました。" + +#~ msgid "" +#~ "*(new in version v1.18.1)* image in " +#~ "memory -- to be used as image " +#~ "mask (alpha values) for the base " +#~ "image. When specified, the base image" +#~ " must be provided as a filename " +#~ "or a stream -- and must not " +#~ "be an image that already has a " +#~ "mask." +#~ msgstr "" +#~ "*(v1.18.1で新機能)* " +#~ "ベース画像の画像マスク(アルファ値)として使用するためのメモリ内の画像。指定する場合、ベース画像はファイル名またはストリームとして提供され、すでにマスクを持っている画像ではない必要があります。" + +#~ msgid "" +#~ "*(New in v1.18.13)* the :data:`xref` of" +#~ " an image already present in the " +#~ "PDF. If given, parameters `filename`, " +#~ "`pixmap`, `stream`, `alpha` and `mask` " +#~ "are ignored. The page will simply " +#~ "receive a reference to the existing " +#~ "image." +#~ msgstr "" +#~ "(v1.18.13で新機能)* " +#~ "PDFに既に存在する画像のxref。指定された場合、`filename`、`pixmap`、`stream`、`alpha`、および " +#~ "`mask` のパラメータは無視されます。ページは単に既存の画像への参照を受け取ります。" + +#~ msgid "" +#~ "*(new in v1.18.3)* (:data:`xref`) make " +#~ "image visibility dependent on this " +#~ ":data:`OCG` or :data:`OCMD`. Ignored after " +#~ "the first of multiple insertions. The" +#~ " property is stored with the " +#~ "generated PDF image object and therefore" +#~ " controls the image's visibility throughout" +#~ " the PDF." +#~ msgstr "" +#~ "(v1.18.3で新機能)*(:data:`xref`)画像の表示をこの :data:`OCG`または " +#~ ":data:`OCMD` " +#~ "に依存させます。複数の挿入の最初の後は無視されます。このプロパティは生成されたPDF画像オブジェクトに保存されるため、PDF全体で画像の表示を制御します。" + +#~ msgid "*(new in version v1.14.11)* maintain the aspect ratio of the image." +#~ msgstr "(v1.14.11で新機能)* 画像のアスペクト比を保持します。" + +#~ msgid "For a description of *overlay* see :ref:`CommonParms`." +#~ msgstr "*overlay* の説明については :ref:`CommonParms` を参照してください。" + +#~ msgid "*Changed in v1.18.13:* Return xref of stored image." +#~ msgstr "*v1.18.13で変更:* 格納された画像のxrefを返します。" + +#~ msgid "" +#~ "the font name. Default is \"Helv\". " +#~ "Accepted alternatives are \"Cour\", \"TiRo\"," +#~ " \"ZaDb\" and \"Symb\". The name may" +#~ " be abbreviated to the first two " +#~ "characters, like \"Co\" for \"Cour\". " +#~ "Lower case is also accepted. *(Changed" +#~ " in v1.16.0)* Bold or italic variants" +#~ " of the fonts are **no longer " +#~ "accepted**. A user-contributed script " +#~ "provides a circumvention for this " +#~ "restriction -- see section *Using " +#~ "Buttons and JavaScript* in chapter " +#~ ":ref:`FAQ`. *(New in v1.17.0)* The " +#~ "actual font to use is now " +#~ "determined on a by-character level, " +#~ "and all required fonts (or sub-" +#~ "fonts) are automatically included. Therefore," +#~ " you should rarely ever need to " +#~ "care about this parameter and let " +#~ "it default (except you insist on a" +#~ " serifed font for your non-CJK " +#~ "text parts)." +#~ msgstr "" +#~ "フォント名。デフォルトは「Helv」です。受け入れられる代替案は「Cour」、「TiRo」、「ZaDb」、「Symb」です。名前は、最初の2文字まで省略することができます。「Co」のように、小文字も受け入れられます。*(v1.16.0で変更)*" +#~ " " +#~ "フォントの太字または斜体のバリアントはもはや受け入れられません。この制限を回避するユーザー投稿のスクリプトが提供されており、:ref:`FAQ`" +#~ " " +#~ "の章で「ボタンとJavaScriptの使用」を参照してください。(v1.17.0で新規追加)使用する実際のフォントは、文字ごとに決定され、必要なすべてのフォント(またはサブフォント)が自動的に含まれます。したがって、このパラメータについて心配する必要はほとんどありません。デフォルトにしておくべきです(CJK以外のテキスト部分にセリフフォントを希望する場合を除く)。" + +#~ msgid "*(new in v1.16.0)* the text color. Default is black." +#~ msgstr "*(v1.16.0で新規追加)* テキストの色。デフォルトは黒です。" + +#~ msgid "*(new in v1.16.0)* the fill color. Default is white." +#~ msgstr "*(v1.16.0で新規追加)* 塗りつぶしの色。デフォルトは白です。" + +#~ msgid "" +#~ "*(new in v1.17.0)* text alignment, one" +#~ " of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, " +#~ "TEXT_ALIGN_RIGHT - justify is **not " +#~ "supported**." +#~ msgstr "" +#~ "*(v1.17.0で新規追加)* " +#~ "テキストの配置、TEXT_ALIGN_LEFT、TEXT_ALIGN_CENTER、TEXT_ALIGN_RIGHTのいずれか。ジャスティファイは" +#~ " **サポートされていません**。" + +#~ msgid "" +#~ "the data to be stored (actual file" +#~ " content, any data, etc.). Changed " +#~ "in v1.14.13 *io.BytesIO* is now also " +#~ "supported." +#~ msgstr "" + +#~ msgid "" +#~ "*(New in v1.16.12)* the font to " +#~ "use when *text* is given, otherwise " +#~ "ignored. The same rules apply as " +#~ "for :meth:`Page.insert_textbox` -- which is" +#~ " the method :meth:`Page.apply_redactions` " +#~ "internally invokes. The replacement text " +#~ "will be **vertically centered**, if this" +#~ " is one of the CJK or " +#~ ":ref:`Base-14-Fonts`. .. note:: * For " +#~ "an **existing** font of the page, " +#~ "use its reference name as *fontname* " +#~ "(this is *item[4]* of its entry in" +#~ " :meth:`Page.get_fonts`). * For a **new," +#~ " non-builtin** font, proceed as " +#~ "follows:: page.insert_text(point, # " +#~ "anywhere, but outside all redaction " +#~ "rectangles \"something\", # some " +#~ "non-empty string fontname=\"newname\", " +#~ "# new, unused reference name " +#~ "fontfile=\"...\", # desired font file" +#~ " render_mode=3, # makes the text" +#~ " invisible ) page.add_redact_annot(..., " +#~ "fontname=\"newname\")" +#~ msgstr "" + +#~ msgid "" +#~ "*(New in v1.16.12)* the :data:`fontsize` " +#~ "to use for the replacing text. If" +#~ " the text is too large to fit," +#~ " several insertion attempts will be " +#~ "made, gradually reducing the :data:`fontsize`" +#~ " to no less than 4. If then " +#~ "the text will still not fit, no" +#~ " text insertion will take place at" +#~ " all." +#~ msgstr "" +#~ "*(新機能 v1.16.12)* 置換テキストに使用する :data:`fontsize` " +#~ "です。テキストが収まりきらない場合、:data:`fontsize` " +#~ "を徐々に4未満にならないまで縮小し、テキストがまだ収まらない場合はテキストの挿入は行われません。" + +#~ msgid "" +#~ "*(New in v1.16.12)* the horizontal " +#~ "alignment for the replacing text. See" +#~ " :meth:`insert_textbox` for available values. " +#~ "The vertical alignment is (approximately) " +#~ "centered if a PDF built-in font" +#~ " is used (CJK or :ref:`Base-14-Fonts`)." +#~ msgstr "" +#~ "*(新機能 v1.16.12)* 置換テキストの水平方向の配置を指定します。使用可能な値については " +#~ ":meth:`insert_textbox` を参照してください。PDF組み込みフォント(CJKまたは " +#~ ":ref:`Base-14-Fonts`)が使用されている場合、垂直方向の配置は(おおよそ)中央になります。" + +#~ msgid "" +#~ "*(New in v1.16.12)* the fill color " +#~ "of the rectangle **after applying** the" +#~ " redaction. The default is *white =" +#~ " (1, 1, 1)*, which is also " +#~ "taken if ``None`` is specified. " +#~ "*(Changed in v1.16.13)* To suppress a" +#~ " fill color altogether, specify ``False``." +#~ " In this cases the rectangle remains" +#~ " transparent." +#~ msgstr "" +#~ "*(新機能 v1.16.12)* レッドアクションを **適用した後** " +#~ "の矩形の塗りつぶし色です。デフォルトは *white = (1, 1, 1)*" +#~ " で、``None`` が指定された場合もこれが適用されます。 *(新機能 v1.16.13)*" +#~ " 塗りつぶし色を抑制するには、``False`` を指定します。この場合、矩形は透明のままです。" + +#~ msgid "" +#~ "the created annotation. *(Changed in " +#~ "v1.17.2)* Its standard appearance looks " +#~ "like a red rectangle (no fill " +#~ "color), optionally showing two diagonal " +#~ "lines. Colors, line width, dashing, " +#~ "opacity and blend mode can now be" +#~ " set and applied via :meth:`Annot.update`" +#~ " like with other annotations." +#~ msgstr "" +#~ "作成されたアノテーション。 *(新機能 v1.17.2)* " +#~ "その標準の外観は赤い矩形(塗りつぶし色なし)のようです。オプションで2つの対角線を表示できます。色、線の幅、破線、不透明度、ブレンドモードは、他の注釈と同様に" +#~ " :meth:`Annot.update` を介して設定および適用できます。" + +#~ msgid "" +#~ "Find tables on the page and return" +#~ " an object with related information. " +#~ "Typically, only very few of the " +#~ "many arguments ever need to be " +#~ "specified -- they mainly are tools " +#~ "to react to corner case situations." +#~ msgstr "ページ上のテーブルを検出し、関連情報を持つオブジェクトを返します。通常、指定する必要がある引数は非常に少数であり、主に特殊な状況に対応するためのツールです。" + +#~ msgid "" +#~ "request a search algorithm. The " +#~ "\"lines\" default looks for vector " +#~ "drawings. If \"text\" is specified, text" +#~ " positions are used to generate " +#~ "\"virtual\" row boundaries. The \"text\" " +#~ "choices are recommended when dealing " +#~ "with pages without any vector graphics" +#~ " -- like when this is an OCRed" +#~ " page." +#~ msgstr "検索アルゴリズムを指定します。デフォルトでは「lines」はベクトル描画を探します。指定された場合、「text」ではテキストの位置を使用して「仮想」行境界を生成します。「text」の選択肢は、ベクトルグラフィックが含まれていないページと取り扱う場合に推奨されます。" + +#~ msgid "" +#~ "The remaining parameters are limits for" +#~ " merging different objects. For instance:" +#~ " Two horizontal lines with the same" +#~ " x-coordinates and a vertical distance " +#~ "less than 3 will be merged " +#~ "(\"snapped\") to one line." +#~ msgstr "残りのパラメーターは、異なるオブジェクトを統合するための制限です。例えば、x座標が同じで垂直距離が3未満の2つの水平線は1つの線に「スナップ」されます。" + +#~ msgid "" +#~ "a `TableFinder` object that has the " +#~ "following significant attributes: * " +#~ "**cells:** a list of **all bboxes** " +#~ "on the page, that have been " +#~ "identified as table cells (across all" +#~ " tables). Each cell is a tuple " +#~ "`(x0, y0, x1, y1)` of coordinates " +#~ "or `None`. * **tables:** a list of" +#~ " `Table` objects. This is `[]` if " +#~ "the page has no tables. Please " +#~ "note that while single tables can " +#~ "be found as items of this list," +#~ " the `TableFinder` object itself is " +#~ "also a sequence of it tables. This" +#~ " means that if `tabs` is a " +#~ "`TableFinder` object, then table number " +#~ "\"n\" is delivered by `tabs.tables[n]` " +#~ "as well as by the shorter " +#~ "`tabs[n]`. * The `Table` object has" +#~ " the following attributes: * **bbox:**" +#~ " the bounding box of the table " +#~ "as a tuple `(x0, y0, x1, y1)`." +#~ " * **cells:** bounding boxes of " +#~ "the table's cells (list of tuples). " +#~ "A cell may also be `None`. *" +#~ " **extract():** this method returns the " +#~ "text content of each table cell as" +#~ " a list of list of strings. " +#~ "* **to_pandas():** this method returns " +#~ "the table as a `pandas " +#~ "`_ `DataFrame " +#~ "`_. *" +#~ " **header:** a `TableHeader` object " +#~ "containing header information of the " +#~ "table. * **col_count:** an integer " +#~ "containing the number of table columns." +#~ " * **row_count:** an integer containing" +#~ " the number of table rows. * " +#~ "**rows:** a list of `TableRow` objects" +#~ " containing two attributes: *bbox* is " +#~ "the boundary box of the row, and" +#~ " *cells* is a list of table " +#~ "cells contained in this row. * " +#~ "The `TableHeader` object has the " +#~ "following attributes: * **bbox:** the " +#~ "bounding box of the header. * " +#~ "**cells:** a list of bounding boxes " +#~ "containing the name of the respective" +#~ " column. * **names:** a list of" +#~ " strings containing the text of each" +#~ " of the cell bboxes. They represent" +#~ " the column names -- which can " +#~ "be used when exporting the table " +#~ "to pandas DataFrames or CSV, etc. " +#~ "* **external:** a bool indicating " +#~ "whether the header bbox is outside " +#~ "the table body (`True`) or not. " +#~ "Table headers are never identified by" +#~ " the `TableFinder` logic. Therefore, if " +#~ "*external* is true, then the header " +#~ "cells are not part of any cell " +#~ "identified by `TableFinder`. If `external " +#~ "== False`, then the first table " +#~ "row is the header. Please have a" +#~ " look at these `Jupyter notebooks " +#~ "`_, which cover standard " +#~ "situations like multiple tables on one" +#~ " page or joining table fragments " +#~ "across multiple pages." +#~ msgstr "" + +#~ msgid "" +#~ "*(Changed in v1.18.0)* The overlapping " +#~ "parts of **images** will be blanked-" +#~ "out for default option " +#~ "`PDF_REDACT_IMAGE_PIXELS`. Option 0 does not" +#~ " touch any images and 1 will " +#~ "remove any image with an overlap. " +#~ "Please be aware that there is a" +#~ " bug for option *PDF_REDACT_IMAGE_PIXELS =" +#~ " 2*: transparent images will be " +#~ "incorrectly handled!" +#~ msgstr "" +#~ "*(v1.18.0で変更)* デフォルトオプション `PDF_REDACT_IMAGE_PIXELS` " +#~ "の場合、**画像** " +#~ "の重なる部分はブランクにされます。オプション0は画像には影響せず、オプション1は重なる画像を完全に削除します。ただし、オプション " +#~ "*PDF_REDACT_IMAGE_PIXELS = 2* " +#~ "にはバグがあることに注意してください:透明な画像が誤って処理されます!" + +#~ msgid "" +#~ "Automatic line breaks are inserted at" +#~ " word boundaries. The \"soft hyphen\" " +#~ "character `\"­\"` can be used to" +#~ " cause hyphenation and thus also " +#~ "cause line breaks. **Forced** line " +#~ "breaks however are only achievable via" +#~ " the HTML tag `
` - `\"\\\\n\"`" +#~ " is ignored and will be treated " +#~ "like a space." +#~ msgstr "" + +#~ msgid "" +#~ "The text may include arbitrary languages" +#~ " -- **including right-to-left** " +#~ "languages." +#~ msgstr "" + +#~ msgid "**either** be just informed (and accept a no-op)," +#~ msgstr "" + +#~ msgid "" +#~ "**or** (`scale=True` - the default) " +#~ "scale down the content until it " +#~ "fits." +#~ msgstr "" + +#~ msgid "" +#~ "the text to be written. Can " +#~ "contain plain text and HTML tags " +#~ "with styling instructions. Alternatively, a" +#~ " :ref:`Story` object may be specified " +#~ "(in which case the internal Story " +#~ "generation step will be omitted). A " +#~ "Story must have been generated with " +#~ "all required styling and Archive " +#~ "information." +#~ msgstr "" + +#~ msgid "" +#~ "optional string containing additional CSS " +#~ "instructions. Ignored if `text` is a " +#~ "Story." +#~ msgstr "" + +#~ msgid "" +#~ "if necessary scale down the content " +#~ "until it fits in the target " +#~ "rectangle. This sets the down scaling" +#~ " limit. Default is 0, no limit. " +#~ "A value of 1 means no down-" +#~ "scaling. A value of e.g. 0.2 means" +#~ " maximum down-scaling by 80%." +#~ msgstr "" + +#~ msgid "" +#~ "an Archive object that points to " +#~ "locations where to find images or " +#~ "non-standard fonts. If `text` refers " +#~ "to images, this parameter is always " +#~ "required. Ignored if `text` is a " +#~ "Story." +#~ msgstr "" + +#~ msgid "" +#~ "A string indicating the requested " +#~ "format, one of the above. A " +#~ "mixture of upper and lower case is" +#~ " supported. Changed in v1.16.3 Values " +#~ "\"words\" and \"blocks\" are now also" +#~ " accepted." +#~ msgstr "" + +#~ msgid "" +#~ "*(new in v1.16.2)* indicator bits to " +#~ "control whether to include images or " +#~ "how text should be handled with " +#~ "respect to white spaces and " +#~ ":data:`ligatures`. See :ref:`TextPreserve` for " +#~ "available indicators and " +#~ ":ref:`text_extraction_flags` for default settings." +#~ msgstr "" +#~ "*(v1.16.2で新たに追加)* 画像を含めるか、テキストを空白文字と :data:`ligatures` " +#~ "にどのように処理するかを制御するための指示ビット。使用可能な指示子については:ref:`TextPreserve` " +#~ "を、デフォルト設定についてはText :ref:`text_extraction_flags` を参照してください。" + +#~ msgid "" +#~ "(new in v1.19.1) sort the output " +#~ "by vertical, then horizontal coordinates. " +#~ "In many cases, this should suffice " +#~ "to generate a \"natural\" reading order." +#~ " Has no effect on (X)HTML and " +#~ "XML. Output option **\"words\"** sorts " +#~ "by `(y1, x0)` of the words' " +#~ "bboxes. Similar is true for \"blocks\"," +#~ " \"dict\", \"json\", \"rawdict\", \"rawjson\":" +#~ " they all are sorted by `(y1, " +#~ "x0)` of the resp. block bbox. If" +#~ " specified for \"text\", then internally" +#~ " \"blocks\" is used." +#~ msgstr "" +#~ "(v1.19.1で新たに追加) " +#~ "出力を垂直方向、水平方向の座標で並べ替えます。多くの場合、これで「自然な」読み取り順序を生成できるはずです。これは(X)HTMLおよびXMLには影響しません。出力オプション" +#~ " “words” は単語のbboxの `(y1, x0)` " +#~ "で並べ替えます。同様のことは “blocks”、 “dict”、 “json”、 " +#~ "“rawdict”、 “rawjson”にも当てはまります。それらはすべて、respです。ブロックbboxの " +#~ "`(y1, x0)` でソートされます。 “text” に対して指定された場合、内部的には" +#~ " “blocks” が使用されます。" + +#~ msgid "" +#~ "(new in v1.23.5) use these characters" +#~ " as *additional* word separators with " +#~ "the \"words\" output option (ignored " +#~ "otherwise). By default, all white spaces" +#~ " (including non-breaking space `0xA0`) " +#~ "indicate start and end of a word." +#~ " Now you can specify more characters" +#~ " causing this. For instance, the " +#~ "default will return `\"john.doe@outlook.com\"` " +#~ "as **one** word. If you specify " +#~ "`delimiters=\"@.\"` then the **four** words" +#~ " `\"john\"`, `\"doe\"`, `\"outlook\"`, `\"com\"`" +#~ " will be returned. Other possible " +#~ "uses include ignoring punctuation characters" +#~ " `delimiters=string.punctuation`. The \"word\" " +#~ "strings will not contain any delimiting" +#~ " character." +#~ msgstr "" + +#~ msgid "" +#~ "The inclusion of text via the " +#~ "*clip* parameter is decided on a " +#~ "by-character level: **(changed in " +#~ "v1.18.2)** a character becomes part of" +#~ " the output, if its bbox is " +#~ "contained in *clip*. This **deviates** " +#~ "from the algorithm used in redaction " +#~ "annotations: a character will be " +#~ "**removed if its bbox intersects** any" +#~ " redaction annotation." +#~ msgstr "" +#~ "clipパラメータを使用してテキストを含めるかどうかは、文字単位で決定されます **(v1.18.2で変更)** " +#~ "。キャラクタのbboxが *clip* " +#~ "に含まれている場合、そのキャラクタは出力の一部となります。これは、赤塗りのアノテーションで使用されるアルゴリズムとは " +#~ "**異なります** :" + +#~ msgid "" +#~ "a string with interspersed linebreaks " +#~ "where necessary. Changed in v1.19.0: It" +#~ " is based on dedicated code. A " +#~ "tyical use is checking the result " +#~ "of :meth:`Page.search_for`: >>> rl = " +#~ "page.search_for(\"currency:\") >>> page.get_textbox(rl[0])" +#~ " 'Currency:' >>>" +#~ msgstr "" + +#~ msgid "(new in v1.18.17) type of this path." +#~ msgstr "v1.18.17で新しく追加されたパスのタイプ。" + +#~ msgid "" +#~ "`(\"re\", rect, orientation)` - a " +#~ ":ref:`Rect`. *Changed in v1.18.17:* Multiple" +#~ " rectangles within the same path are" +#~ " now detected. *Changed in v1.19.2:* " +#~ "added integer `orientation` which is 1" +#~ " resp. -1 indicating whether the " +#~ "enclosed area is rotated left (1 =" +#~ " anti-clockwise), or resp. right " +#~ "[#f7]_." +#~ msgstr "" +#~ "`(\"re\", rect, orientation)` - " +#~ ":ref:`Rect`。*バージョン1.18.17で変更:* " +#~ "同じパス内の複数の矩形が検出されるようになりました。*バージョン1.19.2で変更:* 整数の " +#~ "orientation が追加され、内包された領域が左に回転(1 = " +#~ "反時計回り)するか、右に回転するかを示します [#f7]_。" + +#~ msgid "" +#~ "Starting with v1.19.2, quads and " +#~ "rectangles are more reliably recognized " +#~ "as such." +#~ msgstr "バージョン1.19.2から、四角形と四角形は、より信頼性のある方法で認識されるようになりました。" + +#~ msgid "" +#~ "**New in v1.22.0:** Specifying `extended=True`" +#~ " significantly alters the output. Most " +#~ "importantly, new dictionary types are " +#~ "present: \"clip\" and \"group\". All " +#~ "paths will now be organized in a" +#~ " hierarchic structure which is encoded " +#~ "by the new integer key \"level\", " +#~ "the hierarchy level. Each group or " +#~ "clip establishes a new hierarchy, which" +#~ " applies to all subsequent paths " +#~ "having a *larger* level value." +#~ msgstr "" +#~ "**新機能 v1.22.0:** `extended=True` " +#~ "を指定すると、出力が大幅に変更されます。最も重要な変更点は、新しい辞書タイプ「clip」および「group」が存在することです。すべてのパスは、新しい整数キー「level」でエンコードされる階層構造に整理されます。各グループまたはクリップは、その後に大きなレベル値を持つすべての後続のパスに適用される新しい階層を確立します。" + +#~ msgid "Key `\"type\"` takes one of the following values:" +#~ msgstr "`\"type\"` キーは、以下のいずれかの値を取ります:" + +#~ msgid "" +#~ "**\"f\"** -- this is a *fill-only*" +#~ " path. Only key-values relevant for" +#~ " this operation have a meaning, " +#~ "irrelevant ones have been added with " +#~ "default values for backward compatibility: " +#~ "`\"color\"`, `\"lineCap\"`, `\"lineJoin\"`, " +#~ "`\"width\"`, `\"closePath\"`, `\"dashes\"` and " +#~ "should be ignored." +#~ msgstr "" +#~ "**\"f\"**– これは *fill-only* " +#~ "のパスです。この操作に関連するキーと値だけが意味を持ち、無関係なものは後方互換性を保つためにデフォルトの値が追加されています。したがって、`\"color\"`、`\"lineCap\"`、`\"lineJoin\"`、`\"width\"`、`\"closePath\"`、`\"dashes\"`" +#~ " などは無視されるべきです。" + +#~ msgid "" +#~ "**\"s\"** -- this is a *stroke-" +#~ "only* path. Similar to previous, key " +#~ "`\"fill\"` is present with value `None`." +#~ msgstr "" +#~ "**\"s\"** – これは *stroke-only* " +#~ "のパスです。前述のように、キー \"fill\" が値 `None` で存在します。" + +#~ msgid "" +#~ "Using class :ref:`Shape`, you should be" +#~ " able to recreate the original " +#~ "drawings on a separate (PDF) page " +#~ "with high fidelity under normal, not " +#~ "too sophisticated circumstances. Please see" +#~ " the following comments on restrictions." +#~ " A coding draft can be found in" +#~ " section \"How to Extract Drawings\" " +#~ "of chapter :ref:`FAQ`." +#~ msgstr "" +#~ ":ref:`Shape` " +#~ "クラスを使用すると、通常の、あまり複雑でない状況で、元の図面を高い忠実度で別の(PDF)ページに再作成できるはずです。制約事項に関する以下のコメントを参照してください。コーディングの草案は、FAQの「図面の抽出方法」セクションにあります。" + +#~ msgid "" +#~ "*New in v1.18.13:* **PDF only.** Try " +#~ "to find the :data:`xref` for each " +#~ "image. Implies `hashes=True`. Adds the " +#~ "`\"xref\"` key to the dictionary. If " +#~ "not found, the value is 0, which" +#~ " means, the image is either " +#~ "\"inline\" or otherwise undetectable. Please" +#~ " note that this option has an " +#~ "extended response time, because the MD5" +#~ " hashcode will be computed at least" +#~ " two times for each image with " +#~ "an xref." +#~ msgstr "" +#~ "*新機能(v1.18.13)*:**PDFのみ。** b各イメージの :data:`xref` " +#~ "を見つけようとします。`hashes=True` を含意します。辞書に `\"xref\"` " +#~ "キーが追加されます。見つからない場合、値は0で、イメージが「インライン」または他の方法で検出できないことを意味します。このオプションは応答時間が延長されるため、注意が必要です。少なくとも2回のMD5ハッシュコードの計算が各イメージに対して行われます。" + +#~ msgid "" +#~ "the boundary box of the image --" +#~ " optionally also its transformation matrix." +#~ " * *(Changed in v1.16.7)* -- If " +#~ "the page in fact does not display" +#~ " this image, an infinite rectangle is" +#~ " returned now. In previous versions, " +#~ "an exception was raised. Formally " +#~ "invalid parameters still raise exceptions. " +#~ "* *(Changed in v1.17.0)* -- Only " +#~ "images referenced directly by the page" +#~ " are considered. This means that " +#~ "images occurring in embedded PDF pages" +#~ " are ignored and an exception is " +#~ "raised. * *(Changed in v1.18.5)* -- " +#~ "Removed the restriction introduced in " +#~ "v1.17.0: any item of the page's " +#~ "image list may be specified. * " +#~ "*(Changed in v1.18.11)* -- Partially " +#~ "re-instated a restriction: only those " +#~ "images are considered, that are either" +#~ " directly referenced by the page or" +#~ " by a Form XObject directly " +#~ "referenced by the page. * *(Changed " +#~ "in v1.18.11)* -- Optionally also return" +#~ " the transformation matrix together with" +#~ " the bbox as the tuple `(bbox, " +#~ "transform)`." +#~ msgstr "" + +#~ msgid "" +#~ "*(new in v1.17.5)* -- controls how " +#~ "text is represented. ``True`` outputs " +#~ "each character as a series of " +#~ "elementary draw commands, which leads to" +#~ " a more precise text display in " +#~ "browsers, but a **very much larger** " +#~ "output for text-oriented pages. Display" +#~ " quality for ``False`` relies on the" +#~ " presence of the referenced fonts on" +#~ " the current system. For missing " +#~ "fonts, the internet browser will fall" +#~ " back to some default -- leading " +#~ "to unpleasant appearances. Choose ``False``" +#~ " if you want to parse the text" +#~ " of the SVG." +#~ msgstr "" +#~ "*(v1.17.5で新規)* " +#~ "–テキストの表現方法を制御します。Trueは、各文字を基本的な描画コマンドのシリーズとして出力し、ブラウザでより正確なテキスト表示を提供しますが、テキスト指向のページでは非常に大きな出力になります。" +#~ " ``False`` " +#~ "の表示品質は、現在のシステム上の参照フォントの存在に依存しています。不足しているフォントの場合、インターネットブラウザはデフォルトにフォールバックし、不快な外観になります。" +#~ " SVGのテキストを解析する場合は ``False`` を選択してください。" + +#~ msgid "" +#~ "whether to add an alpha channel. " +#~ "Always accept the default ``False`` if" +#~ " you do not really need transparency." +#~ " This will save a lot of memory" +#~ " (25% in case of RGB ... and" +#~ " pixmaps are typically **large**!), and " +#~ "also processing time. Also note an " +#~ "**important difference** in how the " +#~ "image will be rendered: with ``True``" +#~ " the pixmap's samples area will be" +#~ " pre-cleared with *0x00*. This " +#~ "results in **transparent** areas where " +#~ "the page is empty. With ``False`` " +#~ "the pixmap's samples will be pre-" +#~ "cleared with *0xff*. This results in " +#~ "**white** where the page has nothing " +#~ "to show. Changed in v1.14.17 The " +#~ "default alpha value is now ``False``." +#~ " * Generated with *alpha=True* .." +#~ " image:: images/img-alpha-1.* * " +#~ "Generated with *alpha=False* .. image::" +#~ " images/img-alpha-0.*" +#~ msgstr "" + +#~ msgid "" +#~ "(new in v1.19.0) use a previously " +#~ "created :ref:`TextPage`. This reduces " +#~ "execution time **significantly.** If " +#~ "specified, the 'flags' and 'clip' " +#~ "arguments are ignored. If omitted, a " +#~ "temporary textpage will be created." +#~ msgstr "" +#~ "(v1.19.0で新規追加) 以前に作成した :ref:`TextPage` " +#~ "を使用します。これにより、実行時間が大幅に短縮されます。指定した場合、「flags」と「clip」引数は無視されます。省略した場合、一時的なテキストページが作成されます。" + +#~ msgid "" +#~ "A list of :ref:`Rect` or :ref:`Quad`" +#~ " objects, each of which -- " +#~ "**normally!** -- surrounds one occurrence " +#~ "of *needle*. **However:** if parts of" +#~ " *needle* occur on more than one " +#~ "line, then a separate item is " +#~ "generated for each these parts. So, " +#~ "if `needle = \"search string\"`, two " +#~ "rectangles may be generated. **Changes " +#~ "in v1.18.2:** * There no longer " +#~ "is a limit on the list length " +#~ "(removal of the `hit_max` parameter). " +#~ "* If a word is **hyphenated** at" +#~ " a line break, it will still be" +#~ " found. E.g. the needle \"method\" " +#~ "will be found even if hyphenated " +#~ "as \"meth-od\" at a line break," +#~ " and two rectangles will be returned:" +#~ " one surrounding \"meth\" (without the " +#~ "hyphen) and another one surrounding " +#~ "\"od\"." +#~ msgstr "" + +#~ msgid "" +#~ "a `TableFinder` object that has the " +#~ "following significant attributes: * " +#~ "**cells:** a list of **all bboxes** " +#~ "on the page, that have been " +#~ "identified as table cells (across all" +#~ " tables). Each cell is a tuple " +#~ "`(x0, y0, x1, y1)` of coordinates " +#~ "or `None`. * **tables:** a list of" +#~ " `Table` objects. This is `[]` if " +#~ "the page has no tables. Single " +#~ "tables can be found as items of" +#~ " this list. But the `TableFinder` " +#~ "object itself is also a sequence " +#~ "of its tables. This means that if" +#~ " `tabs` is a `TableFinder` object, " +#~ "then table \"n\" is delivered by " +#~ "`tabs.tables[n]` as well as by the " +#~ "shorter `tabs[n]`. * The `Table` " +#~ "object has the following attributes: " +#~ "* **bbox:** the bounding box of " +#~ "the table as a tuple `(x0, y0, " +#~ "x1, y1)`. * **cells:** bounding boxes" +#~ " of the table's cells (list of " +#~ "tuples). A cell may also be " +#~ "`None`. * **extract():** this method " +#~ "returns the text content of each " +#~ "table cell as a list of list " +#~ "of strings. * **to_pandas():** this " +#~ "method returns the table as a " +#~ "`pandas `_ `DataFrame" +#~ " `_. *" +#~ " **header:** a `TableHeader` object " +#~ "containing header information of the " +#~ "table. * **col_count:** an integer " +#~ "containing the number of table columns." +#~ " * **row_count:** an integer containing" +#~ " the number of table rows. * " +#~ "**rows:** a list of `TableRow` objects" +#~ " containing two attributes: *bbox* is " +#~ "the boundary box of the row, and" +#~ " *cells* is a list of table " +#~ "cells contained in this row. * " +#~ "The `TableHeader` object has the " +#~ "following attributes: * **bbox:** the " +#~ "bounding box of the header. * " +#~ "**cells:** a list of bounding boxes " +#~ "containing the name of the respective" +#~ " column. * **names:** a list of " +#~ "strings containing the text of each " +#~ "of the cell bboxes. They represent " +#~ "the column names -- which can be" +#~ " used when exporting the table to " +#~ "pandas DataFrames or CSV, etc. * " +#~ "**external:** a bool indicating whether " +#~ "the header bbox is outside the " +#~ "table body (`True`) or not. Table " +#~ "headers are never identified by the " +#~ "`TableFinder` logic. Therefore, if *external*" +#~ " is true, then the header cells " +#~ "are not part of any cell " +#~ "identified by `TableFinder`. If `external " +#~ "== False`, then the first table " +#~ "row is the header. Please have a" +#~ " look at these `Jupyter notebooks " +#~ "`_, which cover standard " +#~ "situations like multiple tables on one" +#~ " page or joining table fragments " +#~ "across multiple pages." +#~ msgstr "" + +#~ msgid "" +#~ "**to_pandas():** this method returns the " +#~ "table as a `pandas " +#~ "`_ `DataFrame " +#~ "`_." +#~ msgstr "" +#~ "**to_pandas():** このメソッドは、テーブルを `pandas " +#~ "`_ `DataFrame " +#~ "`_ として返します。" + +#~ msgid "" +#~ "How to redact overlapping images. The" +#~ " default (2) blanks out overlapping " +#~ "pixels. *PDF_REDACT_IMAGE_NONE* (0) ignores, " +#~ "and *PDF_REDACT_IMAGE_REMOVE* (1) completely " +#~ "removes all overlapping images." +#~ msgstr "重なる画像の赤字領域の処理方法。デフォルト(2)では重なるピクセルを消去します" + +#~ msgid "" +#~ "The overlapping parts of **images** will" +#~ " be blanked-out for default option" +#~ " `PDF_REDACT_IMAGE_PIXELS` (changed in v1.18.0)." +#~ " Option 0 does not touch any " +#~ "images and 1 will remove any image" +#~ " with an overlap. Please be aware " +#~ "that there is a bug for option " +#~ "*PDF_REDACT_IMAGE_PIXELS = 2*: transparent " +#~ "images will be incorrectly handled!" +#~ msgstr "" + +#~ msgid "PDF only: Add text in a given rectangle." +#~ msgstr "PDFのみ: 指定された矩形にテキストを追加します。" + +#~ msgid "" +#~ "the text. May contain any mixture " +#~ "of Latin, Greek, Cyrillic, Chinese, " +#~ "Japanese and Korean characters. The " +#~ "respective required font is automatically " +#~ "determined. (New in v1.17.0)" +#~ msgstr "" +#~ "テキスト。*(v1.17.0で新規追加)* " +#~ "ラテン文字、ギリシャ文字、キリル文字、中国語、日本語、韓国語の文字を任意の組み合わせで含めることができます。必要なフォントは自動的に決定されます。" + +#~ msgid "" +#~ "the font name. Default is \"Helv\". " +#~ "Accepted alternatives are \"Cour\", \"TiRo\"," +#~ " \"ZaDb\" and \"Symb\". The name may" +#~ " be abbreviated to the first two " +#~ "characters, like \"Co\" for \"Cour\". " +#~ "Lower case is also accepted. Bold " +#~ "or italic variants of the fonts " +#~ "are **not accepted** (changed in " +#~ "v1.16.0). A user-contributed script " +#~ "provides a circumvention for this " +#~ "restriction -- see section *Using " +#~ "Buttons and JavaScript* in chapter " +#~ ":ref:`FAQ`. The actual font to use " +#~ "is now determined on a by-" +#~ "character level, and all required fonts" +#~ " (or sub-fonts) are automatically " +#~ "included. Therefore, you should rarely " +#~ "ever need to care about this " +#~ "parameter and let it default (except " +#~ "you insist on a serifed font for" +#~ " your non-CJK text parts). (New " +#~ "in v1.17.0)" +#~ msgstr "" +#~ "フォント名。デフォルトは \"Helv\" です。受け入れられる代替は " +#~ "\"Cour\"、\"TiRo\"、\"ZaDb\"、\"Symb\" です。名前は \"Cour\" " +#~ "のように最初の2文字に省略されることがあります。小文字も受け入れられます。太字やイタリックのバリアントのフォントは受け入れられません(v1.16.0で変更)。ユーザー投稿のスクリプトがこの制限を回避する方法を提供しています" +#~ " - " +#~ "FAQの章の「ボタンとJavaScriptの使用」セクションを参照してください。使用する実際のフォントは、今では文字ごとに決定され、必要なすべてのフォント(またはサブフォント)が自動的に含まれます。したがって、このパラメーターについて心配する必要があることはほとんどありませんし、デフォルトのままにしておくことが推奨されます(非CJKテキスト部分にセリフのあるフォントが必要な場合を除く)。(v1.17.0で新規)" + +#~ msgid "the fill color. Default is white. (New in v1.16.0)" +#~ msgstr "塗りつぶしの色です。デフォルトは白です。(v1.16.0 で新規追加)" + +#~ msgid "the text color. Default is black." +#~ msgstr "テキストの色。デフォルトは黒です。" + +#~ msgid "the border color. Default is `None`. (New in v1.19.6)" +#~ msgstr "*(v1.19.6で新規追加)* 境界色。デフォルトは `None` です。" + +#~ msgid "" +#~ "text alignment, one of TEXT_ALIGN_LEFT, " +#~ "TEXT_ALIGN_CENTER, TEXT_ALIGN_RIGHT - justify " +#~ "is **not supported**. (New in v1.17.0)" +#~ msgstr "" +#~ "テキストの配置は、TEXT_ALIGN_LEFT、TEXT_ALIGN_CENTER、TEXT_ALIGN_RIGHTのいずれかを指定します。ジャスティファイは" +#~ " **サポートされていません** 。(v1.17.0で新規追加)" + +#~ msgid "" +#~ "the created annotation. Color properties " +#~ "**can only be changed** using special" +#~ " parameters of :meth:`Annot.update`. There, " +#~ "you can also set a border color" +#~ " different from the text color." +#~ msgstr "" +#~ "作成された注釈。色のプロパティは、:meth:`Annot.update` の特別なパラメータを使用して " +#~ "**のみ変更できます**。そこでは、テキストの色とは異なる境界色を設定することもできます。" + +#~ msgid "" +#~ "the font to use when *text* is " +#~ "given, otherwise ignored. The same rules" +#~ " apply as for :meth:`Page.insert_textbox` " +#~ "-- which is the method " +#~ ":meth:`Page.apply_redactions` internally invokes. " +#~ "The replacement text will be " +#~ "**vertically centered**, if this is one" +#~ " of the CJK or :ref:`Base-14-Fonts`. " +#~ "(New in v1.16.12) .. note:: * " +#~ "For an **existing** font of the " +#~ "page, use its reference name as " +#~ "*fontname* (this is *item[4]* of its " +#~ "entry in :meth:`Page.get_fonts`). * For " +#~ "a **new, non-builtin** font, proceed " +#~ "as follows:: page.insert_text(point, # " +#~ "anywhere, but outside all redaction " +#~ "rectangles \"something\", # some " +#~ "non-empty string fontname=\"newname\", " +#~ "# new, unused reference name " +#~ "fontfile=\"...\", # desired font file" +#~ " render_mode=3, # makes the text" +#~ " invisible ) page.add_redact_annot(..., " +#~ "fontname=\"newname\")" +#~ msgstr "" +#~ "(新機能 v1.16.12)テキストが指定された場合に使用するフォントで、それ以外の場合は無視されます。 " +#~ ":meth:`Page.insert_textbox` " +#~ "に適用されるルールと同じです。:meth:`Page.apply_redactions` " +#~ "が内部的に呼び出すメソッドです。置換テキストは、CJKフォントまたは P :ref:`Base-14-Fonts`" +#~ " の場合、垂直方向に中央揃えになります。" + +#~ msgid "" +#~ "the font to use when *text* is " +#~ "given, otherwise ignored. The same rules" +#~ " apply as for :meth:`Page.insert_textbox` " +#~ "-- which is the method " +#~ ":meth:`Page.apply_redactions` internally invokes. " +#~ "The replacement text will be " +#~ "**vertically centered**, if this is one" +#~ " of the CJK or :ref:`Base-14-Fonts`. " +#~ "(New in v1.16.12)" +#~ msgstr "" +#~ "(新機能 v1.16.12)テキストが指定された場合に使用するフォントで、それ以外の場合は無視されます。 " +#~ ":meth:`Page.insert_textbox` " +#~ "に適用されるルールと同じです。:meth:`Page.apply_redactions` " +#~ "が内部的に呼び出すメソッドです。置換テキストは、CJKフォントまたは P :ref:`Base-14-Fonts`" +#~ " の場合、垂直方向に中央揃えになります。" + +#~ msgid "" +#~ "For an **existing** font of the " +#~ "page, use its reference name as " +#~ "*fontname* (this is *item[4]* of its " +#~ "entry in :meth:`Page.get_fonts`)." +#~ msgstr "" +#~ "ページの **既存** のフォントの場合、参照名を *fontname* " +#~ "として使用してください(これは :meth:`Page.get_fonts` のエントリの " +#~ "*item[4]* です)。" + +#~ msgid "For a **new, non-builtin** font, proceed as follows::" +#~ msgstr "**新しいビルトインでない** フォントの場合、次の手順を実行します::" + +#~ msgid "" +#~ "a `TableFinder` object that has the " +#~ "following significant attributes: * `cells`:" +#~ " a list of **all bboxes** on " +#~ "the page, that have been identified " +#~ "as table cells (across all tables). " +#~ "Each cell is a :data:`rect_like` tuple" +#~ " `(x0, y0, x1, y1)` of coordinates" +#~ " or `None`. * `tables`: a list " +#~ "of `Table` objects. This is `[]` " +#~ "if the page has no tables. Single" +#~ " tables can be found as items " +#~ "of this list. But the `TableFinder` " +#~ "object itself is also a sequence " +#~ "of its tables. This means that if" +#~ " `tabs` is a `TableFinder` object, " +#~ "then table \"n\" is delivered by " +#~ "`tabs.tables[n]` as well as by the " +#~ "shorter `tabs[n]`. * The `Table` " +#~ "object has the following attributes: " +#~ "* `bbox`: the bounding box of the" +#~ " table as a tuple `(x0, y0, x1," +#~ " y1)`. * `cells`: bounding boxes of" +#~ " the table's cells (list of tuples)." +#~ " A cell may also be `None`. *" +#~ " `extract()`: this method returns the " +#~ "text content of each table cell as" +#~ " a list of list of strings. *" +#~ " `to_markdown()`: this method returns the" +#~ " table as a **string in markdown " +#~ "format** (compatible to Github). Supporting" +#~ " viewers can render the string as " +#~ "a table. This output is optimized " +#~ "for **small token** sizes, which is " +#~ "especially beneficial for LLM/RAG feeds. " +#~ "Pandas DataFrames (see method `to_pandas()`" +#~ " below) offer an equivalent markdown " +#~ "table output which however is better " +#~ "readable for the human eye. * " +#~ "`to_pandas()`: this method returns the " +#~ "table as a `pandas " +#~ "`_ `DataFrame " +#~ "`_. " +#~ "DataFrames are very versatile objects " +#~ "allowing a plethora of table " +#~ "manipulation methods and outputs to " +#~ "almost 20 well-known formats, among " +#~ "them Excel files, CSV, JSON, " +#~ "markdown-formatted tables and more. " +#~ "`DataFrame.to_markdown()` generates a Github-" +#~ "compatible markdown format optimized for " +#~ "human readability. This method however " +#~ "requires the package " +#~ "[tablutate](https://pypi.org/project/tabulate/) to " +#~ "installed in addition to pandas itself." +#~ " * ``header``: a `TableHeader` object " +#~ "containing header information of the " +#~ "table. * `col_count`: an integer " +#~ "containing the number of table columns." +#~ " * `row_count`: an integer containing " +#~ "the number of table rows. * " +#~ "`rows`: a list of `TableRow` objects " +#~ "containing two attributes, ``bbox`` is " +#~ "the boundary box of the row, and" +#~ " `cells` is a list of table " +#~ "cells contained in this row. * " +#~ "The `TableHeader` object has the " +#~ "following attributes: * ``bbox``: the " +#~ "bounding box of the header. * " +#~ "`cells`: a list of bounding boxes " +#~ "containing the name of the respective" +#~ " column. * `names`: a list of " +#~ "strings containing the text of each " +#~ "of the cell bboxes. They represent " +#~ "the column names -- which are used" +#~ " when exporting the table to pandas" +#~ " DataFrames, markdown, etc. * `external`:" +#~ " a bool indicating whether the header" +#~ " bbox is outside the table body " +#~ "(`True`) or not. Table headers are " +#~ "never identified by the `TableFinder` " +#~ "logic. Therefore, if `external` is true," +#~ " then the header cells are not " +#~ "part of any cell identified by " +#~ "`TableFinder`. If `external == False`, " +#~ "then the first table row is the" +#~ " header. Please have a look at " +#~ "these `Jupyter notebooks `_, which" +#~ " cover standard situations like multiple" +#~ " tables on one page or joining " +#~ "table fragments across multiple pages." +#~ msgstr "" + +#~ msgid "" +#~ "PDF only: Add a \"rubber stamp\" " +#~ "like annotation to e.g. indicate the " +#~ "document's intended use (\"DRAFT\", " +#~ "\"CONFIDENTIAL\", etc.)." +#~ msgstr "PDF専用: \"ドラフト\"、\"機密\"などの文書の意図した使用を示すための「スタンプ」のような注釈を追加します。" + +#~ msgid "" +#~ "The stamp's text and its border " +#~ "line will automatically be sized and " +#~ "be put horizontally and vertically " +#~ "centered in the given rectangle. " +#~ ":attr:`Annot.rect` is automatically calculated " +#~ "to fit the given **width** and " +#~ "will usually be smaller than this " +#~ "parameter." +#~ msgstr "" +#~ "スタンプのテキストとその境界線は、自動的にサイズ変更され、指定された矩形内で水平および垂直方向に中央に配置されます。:attr:`Annot.rect`" +#~ " は指定された **幅** に合わせて自動的に計算され、通常はこのパラメータよりも小さくなります。" + +#~ msgid "" +#~ "This can be used to create " +#~ "watermark images: on a temporary PDF " +#~ "page create a stamp annotation with " +#~ "a low opacity value, make a pixmap" +#~ " from it with *alpha=True* (and " +#~ "potentially also rotate it), discard the" +#~ " temporary PDF page and use the " +#~ "pixmap with :meth:`insert_image` for your " +#~ "target PDF." +#~ msgstr "" +#~ "これは透かし画像を作成するために使用できます。一時的なPDFページ上に低い不透明度のスタンプ注釈を作成し、*alpha=True* " +#~ "でそれからピクスマップを作成し(おそらく回転させることもあります)、一時的なPDFページを破棄し、ターゲットのPDFに挿入するためにピクスマップを" +#~ " :meth:`insert_image` で使用します。" + +#~ msgid "\"blocks\" -- :meth:`TextPage.extractBLOCKS`" +#~ msgstr "" + +#~ msgid "\"words\" -- :meth:`TextPage.extractWORDS`" +#~ msgstr "" + +#~ msgid "\"html\" -- :meth:`TextPage.extractHTML`" +#~ msgstr "" + +#~ msgid "\"xhtml\" -- :meth:`TextPage.extractXHTML`" +#~ msgstr "" + +#~ msgid "\"xml\" -- :meth:`TextPage.extractXML`" +#~ msgstr "" + +#~ msgid "\"dict\" -- :meth:`TextPage.extractDICT`" +#~ msgstr "" + +#~ msgid "\"json\" -- :meth:`TextPage.extractJSON`" +#~ msgstr "" + +#~ msgid "\"rawdict\" -- :meth:`TextPage.extractRAWDICT`" +#~ msgstr "" + +#~ msgid "\"rawjson\" -- :meth:`TextPage.extractRAWJSON`" +#~ msgstr "" + +#~ msgid "" +#~ "A string indicating the requested " +#~ "format, one of the above. A " +#~ "mixture of upper and lower case is" +#~ " supported. Values \"words\" and " +#~ "\"blocks\" are also accepted (changed in" +#~ " v1.16.3)." +#~ msgstr "要求される形式を示す文字列、上記のいずれか。大文字と小文字の組み合わせがサポートされています" + +#~ msgid "Values \"words\" and \"blocks\" are also accepted (changed in v1.16.3)." +#~ msgstr "v1.16.3で変更された値 “words” と “blocks” も受け入れられるようになりました。" + +#~ msgid "" +#~ "restrict extracted text to this " +#~ "rectangle. If None, the full page " +#~ "is taken. Has **no effect** for " +#~ "options \"html\", \"xhtml\" and \"xml\". " +#~ "(New in v1.17.7)" +#~ msgstr "" +#~ "*(v1.17.7で新たに追加)* 抽出されたテキストをこの矩形に制限します。Noneの場合、フルページが取得されます。 " +#~ "“html”、“xhtml”、“xml”のオプションには **影響しません**。" + +#~ msgid "" +#~ "sort the output by vertical, then " +#~ "horizontal coordinates. In many cases, " +#~ "this should suffice to generate a " +#~ "\"natural\" reading order. Has no effect" +#~ " on (X)HTML and XML. Output option" +#~ " **\"words\"** sorts by `(y1, x0)` of" +#~ " the words' bboxes. Similar is true" +#~ " for \"blocks\", \"dict\", \"json\", " +#~ "\"rawdict\", \"rawjson\": they all are " +#~ "sorted by `(y1, x0)` of the resp." +#~ " block bbox. If specified for " +#~ "\"text\", then internally \"blocks\" is " +#~ "used. (New in v1.19.1)" +#~ msgstr "" +#~ "出力を垂直座標、次に水平座標でソートします。多くの場合、これで「自然な」読み取り順序を生成するのに十分です。 " +#~ "(X)HTMLおよびXMLには影響しません。出力オプション「words」は、単語の境界ボックスの `(y1, " +#~ "x0)` " +#~ "でソートされます。\"blocks\"、\"dict\"、\"json\"、\"rawdict\"、\"rawjson\" " +#~ "についても同様であり、それぞれのブロックの境界ボックスの `(y1, x0)` でソートされます。" +#~ " \"text\" に対して指定された場合、内部的には \"blocks\" が使用されます。" +#~ " (v1.19.1で新たに追加)" + +#~ msgid "" +#~ "The inclusion of text via the " +#~ "*clip* parameter is decided on a " +#~ "by-character level: a character becomes " +#~ "part of the output, if its bbox" +#~ " is contained in *clip* (changed in" +#~ " v1.18.2). This **deviates** from the " +#~ "algorithm used in redaction annotations: " +#~ "a character will be **removed if " +#~ "its bbox intersects** any redaction " +#~ "annotation." +#~ msgstr "" +#~ "*clip* パラメータを使用したテキストの含み方は、文字ごとのレベルで決定されます:文字のバウンディングボックスが " +#~ "*clip* に含まれる場合、その文字は出力の一部となります(v1.18.2で変更)。 " +#~ "これは、レダクション注釈で使用されるアルゴリズムとは異なります:文字のバウンディングボックスがどのレダクション注釈とも交差する場合、文字は削除されます。" + +#~ msgid "" +#~ "a :ref:`TextPage`. Execution may be " +#~ "significantly longer than :meth:`Page.get_textpage`." +#~ " For a full page OCR, **all " +#~ "text** will have the font " +#~ "\"GlyphlessFont\" from Tesseract. In case " +#~ "of partial OCR, normal text will " +#~ "keep its properties, and only text " +#~ "coming from images will have the " +#~ "GlyphlessFont. .. note:: **OCRed text " +#~ "is only available** to PyMuPDF's text" +#~ " extractions and searches if their " +#~ "`textpage` parameter specifies the output " +#~ "of this method. `This " +#~ "`_ Jupyter " +#~ "notebook walks through an example for" +#~ " using OCR textpages." +#~ msgstr "" + +#~ msgid "" +#~ "Return a list of meta information " +#~ "dictionaries for all images shown on " +#~ "the page. This works for all " +#~ "document types. Technically, this is a" +#~ " subset of the dictionary output of" +#~ " :meth:`Page.get_text`: the image binary " +#~ "content and any text on the page" +#~ " are ignored." +#~ msgstr "" +#~ "ページ上に表示されているすべてのイメージに関するメタ情報辞書のリストを返します。これはすべての文書タイプで機能します。技術的には、これは" +#~ " :meth:`Page.get_text` " +#~ "の辞書出力のサブセットであり、画像のバイナリコンテンツとページ上のテキストは無視されます。" + +#~ msgid "" +#~ "**PDF only.** Try to find the " +#~ ":data:`xref` for each image. Implies " +#~ "`hashes=True`. Adds the `\"xref\"` key " +#~ "to the dictionary. If not found, " +#~ "the value is 0, which means, the" +#~ " image is either \"inline\" or " +#~ "otherwise undetectable. Please note that " +#~ "this option has an extended response " +#~ "time, because the MD5 hashcode will " +#~ "be computed at least two times for" +#~ " each image with an xref. (New " +#~ "in v1.18.13)" +#~ msgstr "" +#~ "**PDFのみ。** 各画像の :data:`xref` " +#~ "を見つけようとします。`hashes=True` を意味します。辞書に `\"xref\"` " +#~ "キーを追加します。見つからない場合、値は0で、画像が「インライン」であるか、または他の方法で検出できないことを意味します。このオプションは、少なくとも各画像に対してMD5ハッシュコードが2回計算されるため、応答時間が延びることに注意してください。(v1.18.13で新規追加)" + +#~ msgid "" +#~ "A list of dictionaries. This includes" +#~ " information for **exactly those** images," +#~ " that are shown on the page --" +#~ " including *\"inline images\"*. In contrast" +#~ " to images included in " +#~ ":meth:`Page.get_text`, image **binary content** " +#~ "is not loaded, which drastically reduces" +#~ " memory usage. The dictionary layout " +#~ "is similar to that of image blocks" +#~ " in `page.get_text(\"dict\")`. =============== " +#~ "=============================================================== " +#~ "**Key** **Value** =============== " +#~ "=============================================================== " +#~ "number block number *(int)* bbox" +#~ " image bbox on page, " +#~ ":data:`rect_like` width original image " +#~ "width *(int)* height original image" +#~ " height *(int)* cs-name colorspace" +#~ " name *(str)* colorspace colorspace.n " +#~ "*(int)* xres resolution in " +#~ "x-direction *(int)* yres resolution " +#~ "in y-direction *(int)* bpc bits" +#~ " per component *(int)* size " +#~ "storage occupied by image *(int)* digest" +#~ " MD5 hashcode *(bytes)*, if " +#~ "*hashes* is true xref image " +#~ ":data:`xref` or 0, if *xrefs* is " +#~ "true transform matrix transforming image" +#~ " rect to bbox, :data:`matrix_like` " +#~ "=============== " +#~ "=============================================================== " +#~ "Multiple occurrences of the same image" +#~ " are always reported. You can detect" +#~ " duplicates by comparing their `digest` " +#~ "values." +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/pixmap.mo b/docs/locales/ja/LC_MESSAGES/pixmap.mo new file mode 100644 index 000000000..3c79519e5 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/pixmap.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/pixmap.po b/docs/locales/ja/LC_MESSAGES/pixmap.po new file mode 100644 index 000000000..c183a58c5 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/pixmap.po @@ -0,0 +1,2032 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 f1b1a7f0a4634aaeb54cc0b5874ab28c +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 9b7f06cdc04a4af28bf3417f5859a599 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 6c224f6a8b5d41c9b69fa3daecfaa92b +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../pixmap.rst:7 d95251aabeda466d86e4a43f44a77fcb +msgid "Pixmap" +msgstr "" + +#: ../../pixmap.rst:9 1919b532b8ab404da458c9f170fddff1 +msgid "" +"Pixmaps (\"pixel maps\") are objects at the heart of MuPDF's rendering " +"capabilities. They represent plane rectangular sets of pixels. Each pixel" +" is described by a number of bytes (\"components\") defining its color, " +"plus an optional alpha byte defining its transparency." +msgstr "Pixmap(「ピクセルマップ」)は、MuPDFの描画機能の中心にあるオブジェクトです。それらは平面的な長方形のピクセルセットを表します。各ピクセルは、その色を定義するバイト数(「コンポーネント」)と、透明度を定義するオプションのアルファバイトで説明されます" + +#: ../../pixmap.rst:11 9486bfc43c4846ee9e7e44f6fea9705c +msgid "" +"In PyMuPDF, there exist several ways to create a pixmap. Except the first" +" one, all of them are available as overloaded constructors. A pixmap can " +"be created ..." +msgstr "PyMuPDFでは、ピクセルマップを作成するためのいくつかの方法が存在します。最初の方法を除いて、すべての方法はオーバーロードされたコンストラクタとして使用できます。ピクセルマップは、以下の方法で作成できます..." + +#: ../../pixmap.rst:13 5856db6b457648038a199c62b3682e92 +msgid "from a document page (method :meth:`Page.get_pixmap`)" +msgstr "ドキュメントページから(メソッド :meth:`Page.get_pixmap` を使用)" + +#: ../../pixmap.rst:14 90fe46c3478d4b96b209fbdaf5081b7b +msgid "empty, based on :ref:`Colorspace` and :ref:`IRect` information" +msgstr ":ref:`Colorspace` と :ref:`IRect` 情報に基づいて空のものを作成" + +#: ../../pixmap.rst:15 3e34e939b62e44afaf21e619fb10ad36 +msgid "from a file" +msgstr "ファイルから" + +#: ../../pixmap.rst:16 d09799bf949f48f6abb5dcf83b49481a +msgid "from an in-memory image" +msgstr "メモリ内のイメージから" + +#: ../../pixmap.rst:17 2eee9e4101a948b79a0075e4240d5a73 +msgid "from a memory area of plain pixels" +msgstr "平易なピクセルのメモリ領域から" + +#: ../../pixmap.rst:18 2d3e24e4928c499cb236455abd0cd31c +msgid "from an image inside a PDF document" +msgstr "PDFドキュメント内のイメージから" + +#: ../../pixmap.rst:19 54101a4bc1c84b02bd4da615683275df +msgid "as a copy of another pixmap" +msgstr "他のピクセルマップのコピーとして" + +#: ../../pixmap.rst:21 b17903252e15401bb880d1a57830b7cc +msgid "" +"A number of image formats is supported as input for points 3. and 4. " +"above. See section :ref:`ImageFiles`." +msgstr "" +"3.と4.のポイントに対する入力として多くの画像フォーマットがサポートされています。サポートされている入力画像フォーマットの詳細については、:ref:`ImageFiles`" +" のセクションを参照してください。" + +#: ../../pixmap.rst:23 a656abca152d4678928047358a2885fa +msgid "" +"Have a look at the :ref:`FAQ` section to see some pixmap usage \"at " +"work\"." +msgstr "ピクセルマップの使用例については、:ref:`FAQ` セクションをご覧ください。" + +#: ../../pixmap.rst:26 9776682c8c63410182791a7931cc6ea8 +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../pixmap.rst:26 efacb9c8b8604d5082365a84df87f288 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../pixmap.rst:28 a663d99b6c2a46d0962d0c909af2a40e +msgid ":meth:`Pixmap.clear_with`" +msgstr "" + +#: ../../pixmap.rst:28 3c0d7d8f0a5b4906855b1a2d36b5fea2 +msgid "clear parts of the pixmap" +msgstr "ピクセルマップの一部をクリアします。" + +#: ../../pixmap.rst:29 52cbee35a17e4e12909328f7fcf6aa77 +msgid ":meth:`Pixmap.color_count`" +msgstr "" + +#: ../../pixmap.rst:29 14d1d9f70b254f81a3abe5c43a1e537e +msgid "determine used colors" +msgstr "使用された色を決定します。" + +#: ../../pixmap.rst:30 7a6da5837cc5436ca5b86185921412ef +msgid ":meth:`Pixmap.color_topusage`" +msgstr "" + +#: ../../pixmap.rst:30 ec4d405a88d44d9ebd213e83aca42234 +msgid "determine share of most used color" +msgstr "最も使用される色のシェアを決定します。" + +#: ../../pixmap.rst:31 58a2bb4580f24606adf63bb3fee24c2a +msgid ":meth:`Pixmap.copy`" +msgstr "" + +#: ../../pixmap.rst:31 ff8f9de2afc740448705f85ba7dd034a +msgid "copy parts of another pixmap" +msgstr "別のピクセルマップの一部をコピーします。" + +#: ../../pixmap.rst:32 4996b53890524fcdbcc26b9c2afe9b1d +msgid ":meth:`Pixmap.gamma_with`" +msgstr "" + +#: ../../pixmap.rst:32 22029953270f41a6807854232050de16 +msgid "apply a gamma factor to the pixmap" +msgstr "ピクセルマップにガンマ係数を適用します。" + +#: ../../pixmap.rst:33 40cf6037eadb4832a1b44506d8d01399 +msgid ":meth:`Pixmap.invert_irect`" +msgstr "" + +#: ../../pixmap.rst:33 0707ed5e821a4a4d81f4ea9a52658267 +msgid "invert the pixels of a given area" +msgstr "指定された領域のピクセルを反転させます。" + +#: ../../pixmap.rst:34 d23537d6fee14f07aa66138511dc6c65 +msgid ":meth:`Pixmap.pdfocr_save`" +msgstr "" + +#: ../../pixmap.rst:34 ../../pixmap.rst:35 8f6ccda63f43474180b9586a57391f58 +#: 945e614e77b64eb482a19001cb8de49f +msgid "save the pixmap as an OCRed 1-page PDF" +msgstr "OCR処理済みの1ページのPDFとしてピクセルマップを保存します。" + +#: ../../pixmap.rst:35 d5176b33c73340a6a2906f65b588a4f7 +msgid ":meth:`Pixmap.pdfocr_tobytes`" +msgstr "" + +#: ../../pixmap.rst:36 2da9bdc5ad864f37b2c62da533d67cf6 +msgid ":meth:`Pixmap.pil_image`" +msgstr "" + +#: ../../pixmap.rst:36 29e69cbd73be44089903ba1740e2cf07 +msgid "create a Pillow Image" +msgstr "" + +#: ../../pixmap.rst:37 6b929dd44136455a858f7179f537c18a +msgid ":meth:`Pixmap.pil_save`" +msgstr "" + +#: ../../pixmap.rst:37 37b0e24c872346ae9f1a647c4bd24012 +#, fuzzy +msgid "save as a Pillow Image" +msgstr "Pillowを使用してイメージとして保存します。" + +#: ../../pixmap.rst:38 c3921105394e4b3eb76fdc5cb6ad144a +msgid ":meth:`Pixmap.pil_tobytes`" +msgstr "" + +#: ../../pixmap.rst:38 36ae1252a8db4e1b9277468406a618fb +#, fuzzy +msgid "write to `bytes` as a Pillow Image" +msgstr "Pillowを使用してバイトオブジェクトに書き込みます。" + +#: ../../pixmap.rst:39 7c75d4ea1f1a4fb58bad635407d3e379 +msgid ":meth:`Pixmap.pixel`" +msgstr "" + +#: ../../pixmap.rst:39 04aafaa7a54f4026bb9e10428ea2e587 +msgid "return the value of a pixel" +msgstr "ピクセルの値を返します。" + +#: ../../pixmap.rst:40 07d7d6018e0345919ffa6c3ddb7ff4c0 +msgid ":meth:`Pixmap.save`" +msgstr "" + +#: ../../pixmap.rst:40 cbe561326c8a448cb7de762f58476155 +msgid "save the pixmap in a variety of formats" +msgstr "さまざまな形式でピクセルマップを保存します。" + +#: ../../pixmap.rst:41 696958d881f14015aa1faf107f4bc044 +msgid ":meth:`Pixmap.set_alpha`" +msgstr "" + +#: ../../pixmap.rst:41 ab4fdc89e97142e182425396b40ef97a +msgid "set alpha values" +msgstr "アルファ値を設定します。" + +#: ../../pixmap.rst:42 3817002ddb67438ab6df965afcb5320a +msgid ":meth:`Pixmap.set_dpi`" +msgstr "" + +#: ../../pixmap.rst:42 8a12351eeb954305980a638264a4fd54 +msgid "set the image resolution" +msgstr "イメージの解像度を設定します。" + +#: ../../pixmap.rst:43 a7301742a2114f6f8ab65df62129d10c +msgid ":meth:`Pixmap.set_origin`" +msgstr "" + +#: ../../pixmap.rst:43 aee6bba31fc04b56b7c710c5f095b7f5 +msgid "set pixmap x,y values" +msgstr "ピクセルマップのx、y値を設定します。" + +#: ../../pixmap.rst:44 e0ac8dd1a57f40d0945b8173d48c4570 +msgid ":meth:`Pixmap.set_pixel`" +msgstr "" + +#: ../../pixmap.rst:44 cb2f37a2a7d44bb580341d508b707253 +msgid "set color and alpha of a pixel" +msgstr "ピクセルの色とアルファを設定します。" + +#: ../../pixmap.rst:45 95436967c5a14d328b60e860cb021fd5 +msgid ":meth:`Pixmap.set_rect`" +msgstr "" + +#: ../../pixmap.rst:45 3001983c35d54c1c885c6bbb3caec2a9 +msgid "set color and alpha of all pixels in a rectangle" +msgstr "四角形内のすべてのピクセルの色とアルファを設定します。" + +#: ../../pixmap.rst:46 7eb6d64ab1c8421ab755cfccb2c22809 +msgid ":meth:`Pixmap.shrink`" +msgstr "" + +#: ../../pixmap.rst:46 a5e458763b8647049d6747f7cd17b090 +msgid "reduce size keeping proportions" +msgstr "比率を保持しながらサイズを縮小します。" + +#: ../../pixmap.rst:47 2ff2589ddd7d456c9bc8b6cbaebb6e4b +msgid ":meth:`Pixmap.tint_with`" +msgstr "" + +#: ../../pixmap.rst:47 29e0e49cd2c04309950e986eda59b280 +msgid "tint the pixmap" +msgstr "ピクセルマップに色調を付けます。" + +#: ../../pixmap.rst:48 b7cae291504143d78af18adba4d49907 +msgid ":meth:`Pixmap.tobytes`" +msgstr "" + +#: ../../pixmap.rst:48 a26221b5a3bc43099df949aca8048bdb +msgid "return a memory area in a variety of formats" +msgstr "さまざまな形式のメモリ領域を返します。" + +#: ../../pixmap.rst:49 ae2d32cf29374accbbc65429417194f4 +msgid ":meth:`Pixmap.warp`" +msgstr "" + +#: ../../pixmap.rst:49 6b5e465bd6d247cebcdf5944e094d2b7 +msgid "return a pixmap made from a quad inside" +msgstr "内部の四角形から作成されたピクセルマップを返します。" + +#: ../../pixmap.rst:50 6ba96e42ecd541fdaa95d63b609c3f41 +msgid ":attr:`Pixmap.alpha`" +msgstr "" + +#: ../../pixmap.rst:50 3b7c2ea159d14c73b409a6a1dc1b8534 +msgid "transparency indicator" +msgstr "透明度指示子" + +#: ../../pixmap.rst:51 fa9053d32e4c45e8a8eccc4f38ca49a9 +msgid ":attr:`Pixmap.colorspace`" +msgstr "" + +#: ../../pixmap.rst:51 33f9f6c061f842418df3b7806886b98b +msgid "pixmap's :ref:`Colorspace`" +msgstr "ピクセルマップの :ref:`Colorspace`" + +#: ../../pixmap.rst:52 20cad8a8d6484916abe2f4ba0d63ace9 +msgid ":attr:`Pixmap.digest`" +msgstr "" + +#: ../../pixmap.rst:52 489c06ce324645d88ed77246e571757e +msgid "MD5 hashcode of the pixmap" +msgstr "ピクセルマップのMD5ハッシュコード" + +#: ../../pixmap.rst:53 234a88bff92c4e49846b2048d469c54c +msgid ":attr:`Pixmap.height`" +msgstr "" + +#: ../../pixmap.rst:53 18a053aba3ec43c8afd44bf4be96c026 +msgid "pixmap height" +msgstr "ピクセルマップの高さ" + +#: ../../pixmap.rst:54 398b74503d5b49879f02786a35330b11 +msgid ":attr:`Pixmap.interpolate`" +msgstr "" + +#: ../../pixmap.rst:54 91977f5e1b7543f8a2b03b19e18d1501 +msgid "interpolation method indicator" +msgstr "補間メソッド指示子" + +#: ../../pixmap.rst:55 54b207f4eab2405b816d884877ad9c4c +msgid ":attr:`Pixmap.is_monochrome`" +msgstr "" + +#: ../../pixmap.rst:55 fb08d786db114396b984c5d030c45d02 +msgid "check if only black and white occur" +msgstr "黒と白だけが存在するか確認します。" + +#: ../../pixmap.rst:56 3832489ec6eb4347921772d35d4cfd59 +msgid ":attr:`Pixmap.is_unicolor`" +msgstr "" + +#: ../../pixmap.rst:56 ff68226cd8014c249ae8fdd8c2cff414 +msgid "check if only one color occurs" +msgstr "単一の色しか存在しないか確認します。" + +#: ../../pixmap.rst:57 fa0770af685443109d3f4640946c8159 +msgid ":attr:`Pixmap.irect`" +msgstr "" + +#: ../../pixmap.rst:57 f8e324284a7d40e6955f1b8ddd6f2315 +msgid ":ref:`IRect` of the pixmap" +msgstr "ピクセルマップの :ref:`IRect` " + +#: ../../pixmap.rst:58 7c2c5f3990c44ab0968a07d38671855d +msgid ":attr:`Pixmap.n`" +msgstr "" + +#: ../../pixmap.rst:58 428d23880e74446f9a312a5881b14f3f +msgid "bytes per pixel" +msgstr "ピクセルごとのバイト数" + +#: ../../pixmap.rst:59 c49a0d0f062c42668552f8ecb5da0b9e +msgid ":attr:`Pixmap.samples_mv`" +msgstr "" + +#: ../../pixmap.rst:59 f30d3be26197416bb0fb72016cd0d7ec +msgid "`memoryview` of pixel area" +msgstr "ピクセル領域の `memoryview` " + +#: ../../pixmap.rst:60 07c58756a4814330a53d2862c3dc9eb0 +msgid ":attr:`Pixmap.samples_ptr`" +msgstr "" + +#: ../../pixmap.rst:60 72404d375527475bb8ba045f0a4ff0d6 +msgid "Python pointer to pixel area" +msgstr "ピクセル領域へのPythonポインタ" + +#: ../../pixmap.rst:61 6c6478b2c83846a09684bd5f600e8915 +msgid ":attr:`Pixmap.samples`" +msgstr "" + +#: ../../pixmap.rst:61 5822f1f53ba74e128e75ce40d18e9440 +msgid "`bytes` copy of pixel area" +msgstr "ピクセル領域の `bytes` コピー" + +#: ../../pixmap.rst:62 94a4d840ade34b14a05adda1fcc8e4c4 +msgid ":attr:`Pixmap.size`" +msgstr "" + +#: ../../pixmap.rst:62 89478a5e4ab546798cd82132e9309e3d +msgid "pixmap's total length" +msgstr "ピクセルマップの合計長さ" + +#: ../../pixmap.rst:63 73c0d5608362402887a7c810885470e0 +msgid ":attr:`Pixmap.stride`" +msgstr "" + +#: ../../pixmap.rst:63 62b855ce82e24e63b8d146ac28be7489 +msgid "size of one image row" +msgstr "1つの画像行のサイズ" + +#: ../../pixmap.rst:64 d1467286769b4d3ea7798bffedc07b8e +msgid ":attr:`Pixmap.width`" +msgstr "" + +#: ../../pixmap.rst:64 dbb3847a398f49c786d38b316353a732 +msgid "pixmap width" +msgstr "ピクセルマップの幅" + +#: ../../pixmap.rst:65 7a3a9ef923b3497592409c890367e1ad +msgid ":attr:`Pixmap.x`" +msgstr "" + +#: ../../pixmap.rst:65 e00e9a4607a14feead94f7ba401671f0 +msgid "X-coordinate of top-left corner" +msgstr "左上隅のX座標" + +#: ../../pixmap.rst:66 1337a114bbf34fbfa97d3fea51fe5ee2 +msgid ":attr:`Pixmap.xres`" +msgstr "" + +#: ../../pixmap.rst:66 af03d65619b640d2aafc512876ee5094 +msgid "resolution in X-direction" +msgstr "X方向の解像度" + +#: ../../pixmap.rst:67 a8d8d3f044dd41fdb06c38ba95356e2b +msgid ":attr:`Pixmap.y`" +msgstr "" + +#: ../../pixmap.rst:67 88ca23844fdd4a13a0c82946ea9d9604 +msgid "Y-coordinate of top-left corner" +msgstr "左上隅のY座標" + +#: ../../pixmap.rst:68 937853506029460e923b4840f23b6172 +msgid ":attr:`Pixmap.yres`" +msgstr "" + +#: ../../pixmap.rst:68 eca2b563beb946958293e50aba010c60 +msgid "resolution in Y-direction" +msgstr "Y方向の解像度" + +#: ../../pixmap.rst:71 3831cc44d77c448483fb36609c531af8 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../pixmap.rst:77 e82689d4f22140fda01d0a1f1f961166 +msgid "" +"**New empty pixmap:** Create an empty pixmap of size and origin given by " +"the rectangle. So, *irect.top_left* designates the top left corner of the" +" pixmap, and its width and height are *irect.width* resp. *irect.height*." +" Note that the image area is **not initialized** and will contain crap " +"data -- use eg. :meth:`clear_with` or :meth:`set_rect` to be sure." +msgstr "" +"**新しい空のピクマップ:** 指定された矩形のサイズと原点を持つ空のピクマップを作成します。したがって、*irect.top_left* " +"はピクマップの左上隅を示し、その幅と高さは *irect.width* および *irect.height* です。イメージ領域は " +"**初期化されず** 、データが格納されます。データを初期化するには、:meth:`clear_with` や :meth:`set_rect` " +"などを使用してください。" + +#: ../../pixmap.rst 04c82acbb0704deb9b8e5c18165b6bf8 +#: 3727ed9732064cdbbdf44c77c088bed4 43ec6c91b1d64ca8974534981e3cb2fe +#: 46501be6a08942d4ba0f524dd9f99336 598ec9d89b3844acaa2768e506e1ef84 +#: 649475c8ecfb464c9a6ae739de94f281 6b985c0bf351436a9cfa625a54c1663b +#: 744515809c7f4c24be498f9282730a42 89e68a41c4ab45c7ba0df17aa9e45060 +#: 8b28e1c83aa64bdc848974dc2794c508 8f78a6d5a5404e2cb7a1664e7dfbe25c +#: 97edca6f7e0d4f258235e00ff34dcfc2 9940b74d0d8b4c70bb697f48c9e9759d +#: 9ce878d378ba480d9776dfa61598d068 a0e92ef5f10048548aab0373bdbeac5c +#: a19bfcafbcfa4894833112ca5e9c4b14 a68d91085636432db6ebe4af6b1d1eed +#: ae311133670f4224a80bdacc16dd9421 b2e3cbd6ddeb4711961585117e6bdf75 +#: c2f1342870c0459a8f8f4548dec2e90e d3f6dc68b14b4fd7a9992ed78ef26d74 +#: daba744043af44fe9f3519358e39f5b5 de7213366a444b92a8e0027f86b8ed23 +#: df96a79ef0f04acf842a1d57511a32d3 e3c085f3fcd64c96bb4c6c637182193f +#: e4c967fecb7c4109b1bd470f975a0b97 f48cc9d057a74523a74a4fb856788ddb +#: fd47620a6c93482dbb9a81e5019c6d94 +msgid "Parameters" +msgstr "" + +#: ../../pixmap.rst:79 d6bdcdbc91ee45fb94a2a9582c61fc21 +msgid "colorspace." +msgstr "カラースペース。" + +#: ../../pixmap.rst:82 5402c078cd664e5da159da173e297681 +msgid "The pixmap's position and dimension." +msgstr "ピクマップの位置と寸法。" + +#: ../../pixmap.rst:84 d9476783cc2e430a87b9e0fbdd157a2d +msgid "" +"Specifies whether transparency bytes should be included. Default is " +"``False``." +msgstr "透明度バイトを含めるかどうかを指定します。デフォルトは ``False`` です。" + +#: ../../pixmap.rst:88 4eb4292d85c2465492ae894a48bdd47c +msgid "" +"**Copy and set colorspace:** Copy *source* pixmap converting colorspace. " +"Any colorspace combination is possible, but source colorspace must not be" +" ``None``." +msgstr "" +"**コピーとカラースペースの設定:** カラースペースを変換しながら *ソース* " +"ピクマップをコピーします。どのカラースペースの組み合わせでも可能ですが、ソースカラースペースは ``None`` であってはいけません。" + +#: ../../pixmap.rst:90 d46ac4b8cb6448e3b6b89babd13d2f7c +msgid "" +"desired **target** colorspace. This **may also be** ``None``. In this " +"case, a \"masking\" pixmap is created: its :attr:`Pixmap.samples` will " +"consist of the source's alpha bytes only." +msgstr "" +"**ターゲット** となるカラースペース。これは ``None`` **である場合もあります** 。この場合、 \"マスク\" " +"ピクマップが作成されます。その :attr:`Pixmap.samples` は、ソースのアルファバイトだけで構成されます。" + +#: ../../pixmap.rst:93 ../../pixmap.rst:112 37a3a54715b24a98ae2d3e3568ab220b +#: d14a39f52b72446da45629a3f0106813 +msgid "the source pixmap." +msgstr "ソースピクマップ。" + +#: ../../pixmap.rst:98 3d9371c0866044dbb696026132fda243 +msgid "New in v1.18.18" +msgstr "バージョン 1.18.18 で新規追加" + +#: ../../pixmap.rst:100 9adab13dd6aa458eb5007fa2d87356be +msgid "" +"**Copy and add image mask:** Copy *source* pixmap, add an alpha channel " +"with transparency data from a mask pixmap." +msgstr "**コピーとイメージマスクの追加:** ソースピクマップをコピーし、マスクピクマップから透明度データを持つアルファチャネルを追加します。" + +#: ../../pixmap.rst:102 9032a54c3bbe423889497b70db24173e +msgid "pixmap without alpha channel." +msgstr "アルファチャネルを持たないピクマップ。" + +#: ../../pixmap.rst:105 b3628e7ef37d4f59bd58c1706097c9d4 +msgid "a mask pixmap. Must be a graysale pixmap." +msgstr "マスクピクマップ。グレースケールのピクマップである必要があります。" + +#: ../../pixmap.rst:110 b2fa248a54904ef7bf47c4b6485fa524 +msgid "" +"**Copy and scale:** Copy *source* pixmap, scaling new width and height " +"values -- the image will appear stretched or shrunk accordingly. Supports" +" partial copying. The source colorspace may be ``None``." +msgstr "" +"**コピーとスケーリング:** " +"ソースピクマップをコピーし、新しい幅と高さの値にスケーリングします。イメージはそれに応じてストレッチまたは縮小されます。部分的なコピーをサポートしています。ソースカラースペースは" +" ``None`` であってもかまいません。" + +#: ../../pixmap.rst:115 fb20f915c5c34d8eb95a37f556c276db +msgid "desired target width." +msgstr "ターゲットの幅。" + +#: ../../pixmap.rst:117 369c74bc223b4d638f825842f16c1702 +msgid "desired target height." +msgstr "ターゲットの高さ。" + +#: ../../pixmap.rst:119 317c6cb2f5b94e99946548f9f90c0874 +msgid "restrict the resulting pixmap to this region of the **scaled** pixmap." +msgstr "**スケーリングされた** ピクマップのこの領域に制限します。" + +#: ../../pixmap.rst:121 8567b7f81bb14779aa9cef3108556966 +msgid "" +"If width or height do not *represent* integers (i.e. `value.is_integer() " +"!= True`), then the resulting pixmap **will have an alpha channel**." +msgstr "" +"幅または高さが整数を *表していない* 場合(つまり、`value.is_integer() != True` の場合)、結果のピクマップには " +"**アルファチャンネルが含まれます** 。" + +#: ../../pixmap.rst:125 76c709b8b48042c58ce3db57a8f83ddf +msgid "" +"**Copy and add or drop alpha:** Copy *source* and add or drop its alpha " +"channel. Identical copy if *alpha* equals *source.alpha*. If an alpha " +"channel is added, its values will be set to 255." +msgstr "" +"**コピーしてアルファの追加または削除:** *ソース* をコピーし、そのアルファチャンネルを追加または削除します。*alpha* が " +"*source.alpha* と等しい場合、同一のコピーになります。アルファチャンネルが追加される場合、その値は255に設定されます。" + +#: ../../pixmap.rst:127 ../../pixmap.rst:305 cf7368bac4e641b1a0cf7dd4a638d7c6 +#: d49c796034f54eb4b7f1e43679eb0288 +msgid "source pixmap." +msgstr "ソースのピクマップ。" + +#: ../../pixmap.rst:130 efb80ba0754740939eaab8d979f6b613 +msgid "" +"whether the target will have an alpha channel, default and mandatory if " +"source colorspace is ``None``." +msgstr "対象にアルファチャンネルがあるかどうか、デフォルトで、ソースのcolorspaceが ``None`` の場合は必須です。" + +#: ../../pixmap.rst:132 79e713cd359b428f85275cf68ed67f18 +msgid "" +"A typical use includes separation of color and transparency bytes in " +"separate pixmaps. Some applications require this like e.g. " +"*wx.Bitmap.FromBufferAndAlpha()* of *wxPython*:" +msgstr "" +"典型的な使用例には、カラーと透明バイトを別々のピクマップに分離することが含まれます。一部のアプリケーションでは、*wxPython* の " +"*wx.Bitmap.FromBufferAndAlpha()* など、これが必要です。" + +#: ../../pixmap.rst:142 46408f56f93c41a092af425f5ed67231 +msgid "" +"**From a file:** Create a pixmap from *filename*. All properties are " +"inferred from the input. The origin of the resulting pixmap is *(0, 0)*." +msgstr "" +"**ファイルから:** ファイル名から pixmap を作成します。すべてのプロパティは入力から推測されます。生成される pixmap の原点は " +"*(0, 0)* です。" + +#: ../../pixmap.rst:144 d3fbb1369429407bbceaf1dbeccd451f +msgid "Path of the image file." +msgstr "画像ファイルのパス。" + +#: ../../pixmap.rst:148 d6a2b16b581a44f4bbc0f0f4b98cd818 +msgid "" +"**From memory:** Create a pixmap from a memory area. All properties are " +"inferred from the input. The origin of the resulting pixmap is *(0, 0)*." +msgstr "" +"**メモリから:** メモリ領域から pixmap を作成します。すべてのプロパティは入力から推測されます。生成される pixmap の原点は " +"*(0, 0)* です。" + +#: ../../pixmap.rst:150 0284b771e4d64adb8514224b52cdbb59 +msgid "" +"Data containing a complete, valid image. Could have been created by e.g. " +"*stream = bytearray(open('image.file', 'rb').read())*. Type *bytes* is " +"supported in **Python 3 only**, because *bytes == str* in Python 2 and " +"the method will interpret the stream as a filename. *Changed in version " +"1.14.13:* *io.BytesIO* is now also supported." +msgstr "" + +#: ../../pixmap.rst:150 e85d94e7e7724fed90ad33c9a06b8970 +msgid "" +"Data containing a complete, valid image. Could have been created by e.g. " +"*stream = bytearray(open('image.file', 'rb').read())*. Type *bytes* is " +"supported in **Python 3 only**, because *bytes == str* in Python 2 and " +"the method will interpret the stream as a filename." +msgstr "" +"完全で有効な画像を含むデータ。例えば、*stream = bytearray(open('image.file', 'rb').read())* " +"などで作成できます。Python 2 では *bytes* はサポートされていないため、**Python 3 のみ** " +"対応しています。なぜなら、Python 2 では *bytes == str* " +"となり、このメソッドはストリームをファイル名と解釈する可能性があるからです。" + +#: ../../pixmap.rst:152 8f9e9f6db15b440ab212e1a2626983a5 +msgid "*Changed in version 1.14.13:* *io.BytesIO* is now also supported." +msgstr "*バージョン 1.14.13 で変更:* *io.BytesIO* もサポートされるようになりました。" + +#: ../../pixmap.rst:157 c8766adeba194cd0aa61ddff1e42b371 +msgid "" +"**From plain pixels:** Create a pixmap from *samples*. Each pixel must be" +" represented by a number of bytes as controlled by the *colorspace* and " +"*alpha* parameters. The origin of the resulting pixmap is *(0, 0)*. This " +"method is useful when raw image data are provided by some other program " +"-- see :ref:`FAQ`." +msgstr "" +"**生のピクセルから:** *サンプル* から pixmap を作成します。各ピクセルは、*カラースペース* と *alpha* " +"パラメーターによって制御されるバイト数で表現される必要があります。生成される pixmap の原点は *(0, 0)* " +"です。このメソッドは、他のプログラムによって生の画像データが提供される場合に有用です - :ref:`FAQ` を参照してください。" + +#: ../../pixmap.rst:159 748eae75794c4310900a51b65cc9be9d +msgid "Colorspace of image." +msgstr "画像のカラースペース。" + +#: ../../pixmap.rst:162 b9049341bb784d6391fd8770cfe62e9b +msgid "image width" +msgstr "画像の幅" + +#: ../../pixmap.rst:164 bbf94fa288d949db8b7c860097c779e0 +msgid "image height" +msgstr "画像の高さ" + +#: ../../pixmap.rst:166 49f8235d84c4479baa637c94f5801efe +msgid "" +"an area containing all pixels of the image. Must include alpha values if " +"specified. *Changed in version 1.14.13:* (1) *io.BytesIO* can now also " +"be used. (2) Data are now **copied** to the pixmap, so may safely be " +"deleted or become unavailable." +msgstr "" + +#: ../../pixmap.rst:166 fdfde73a7ead42b98a6f0ba2474a0302 +msgid "" +"an area containing all pixels of the image. Must include alpha values if " +"specified." +msgstr "画像のすべてのピクセルを含む領域。指定されている場合はアルファ値を含める必要があります。" + +#: ../../pixmap.rst:168 8db70acf22fe4db7922f7ac5d3710a37 +msgid "" +"*Changed in version 1.14.13:* (1) *io.BytesIO* can now also be used. (2) " +"Data are now **copied** to the pixmap, so may safely be deleted or become" +" unavailable." +msgstr "" +"*バージョン 1.14.13 で変更:* (1) *io.BytesIO* も使用できるようになりました。 (2) データは pixmap に " +"**コピーされる** ようになり、安全に削除または利用不可能になります。" + +#: ../../pixmap.rst:170 3dc4a28f2cb64370bf2b37f89021c8d4 +msgid "whether a transparency channel is included." +msgstr "透明チャネルを含めるかどうか。" + +#: ../../pixmap.rst:174 4572fc41779d42daab5dc8f513e35d38 +msgid "" +"The following equation **must be true**: *(colorspace.n + alpha) * width " +"* height == len(samples)*." +msgstr "" +"以下の式が **成り立つ必要があります** : *(colorspace.n + alpha) * width * height == " +"len(samples)*。" + +#: ../../pixmap.rst:175 91c653a957194cf9aad7b1c2ec50024d +msgid "" +"Starting with version 1.14.13, the samples data are **copied** to the " +"pixmap." +msgstr "バージョン 1.14.13 以降、サンプルデータは pixmap に **コピーされます** 。" + +#: ../../pixmap.rst:180 fd23748ab8c64e67b31168fc0f996533 +msgid "" +"**From a PDF image:** Create a pixmap from an image **contained in PDF** " +"*doc* identified by its :data:`xref`. All pimap properties are set by the" +" image. Have a look at `extract-img1.py " +"`_ " +"and `extract-img2.py `_ to see how this can be used to recover all of a PDF's" +" images." +msgstr "" +"**PDFイメージから:** PDFドキュメント内の :data:`xref` " +"で識別されるイメージからピクスマップを作成します。ピクスマップのすべてのプロパティはイメージによって設定されます。これがどのように使用されるかを確認するには" +"、`extract-img1.py `_ と `extract-img2.py " +"`_ " +"をご覧ください。これにより、PDFのすべてのイメージを復元できます。" + +#: ../../pixmap.rst:182 e57129f36fc442d7baabec0f51abc872 +msgid "an opened |PDF| document." +msgstr "開かれた |PDF| ドキュメント。" + +#: ../../pixmap.rst:185 9384428260114615b87eaa0f8f724b7c +msgid "" +"the :data:`xref` of an image object. For example, you can make a list of " +"images used on a particular page with :meth:`Document.get_page_images`, " +"which also shows the :data:`xref` numbers of each image." +msgstr "" +"画像オブジェクトの :data:`xref`。たとえば、:meth:`Document.get_page_images` " +"を使用して特定のページで使用されるすべてのイメージのリストを作成し、各イメージの :data:`xref` 番号も表示できます。" + +#: ../../pixmap.rst:189 1981e302f46a4610b7a2e501f8e6239c +msgid "Initialize the samples area." +msgstr "サンプル領域を初期化します。" + +#: ../../pixmap.rst:191 5e1dbe6c76e94fd9a2abfa5fe50a04fb +msgid "" +"if specified, values from 0 to 255 are valid. Each color byte of each " +"pixel will be set to this value, while alpha will be set to 255 (non-" +"transparent) if present. If omitted, then all bytes (including any alpha)" +" are cleared to *0x00*." +msgstr "" +"指定された場合、0から255の値が有効です。各ピクセルの各カラーバイトはこの値に設定され、存在する場合はアルファが255(非透明)に設定されます。省略された場合、すべてのバイト(アルファを含む)が" +" *0x00* にクリアされます。" + +#: ../../pixmap.rst:193 9d7e7d36e32f479e80f5409d40483a8c +msgid "" +"the area to be cleared. Omit to clear the whole pixmap. Can only be " +"specified, if *value* is also specified." +msgstr "クリアする領域。ピクスマップ全体をクリアするには省略します。*value* も指定されている場合のみ指定できます。" + +#: ../../pixmap.rst:197 1867b419449b4b1b809f327a5dc3a5a2 +msgid "" +"Colorize a pixmap by replacing black and / or white with colors given as " +"**sRGB integer** values. Only colorspaces :data:`CS_GRAY` and " +":data:`CS_RGB` are supported, others are ignored with a warning." +msgstr "" +"ピクスマップを色付けして、黒と/または白を **sRGB整数値** として指定された色で置き換えます。:data:`CS_GRAY` と " +":data:`CS_RGB` のカラースペースのみサポートされており、他のカラースペースは警告付きで無視されます。" + +#: ../../pixmap.rst:199 ee0538d709654cafb0022a5998e893c5 +msgid "" +"If the colorspace is :data:`CS_GRAY`, the average *(red + green + " +"blue)/3* will be taken. The pixmap will be changed in place." +msgstr "カラースペースが :data:`CS_GRAY` の場合、平均(赤+緑+青)/3が取得されます。ピクスマップはその場で変更されます。" + +#: ../../pixmap.rst:201 c70e75dfe0c3421eb54e30276f0284f5 +msgid "replace black with this value. Specifying 0x000000 makes no changes." +msgstr "黒をこの値で置き換えます。0x000000を指定しても変更はありません。" + +#: ../../pixmap.rst:202 f27cad4bd7904458a7a1fe6ba0ed6956 +msgid "replace white with this value. Specifying 0xFFFFFF makes no changes." +msgstr "白をこの値で置き換えます。0xFFFFFFを指定しても変更はありません。" + +#: ../../pixmap.rst:204 4177fb0e3b154e05ab526555cd5cd17b +msgid "Examples:" +msgstr "例:" + +#: ../../pixmap.rst:206 e5c1b2331953426b92f104ec7b4eefea +msgid "`tint_with(0x000000, 0xFFFFFF)` is a no-op." +msgstr "`tint_with(0x000000, 0xFFFFFF)` は操作なしです。" + +#: ../../pixmap.rst:207 484345c4ad054003bdd3963f19c34815 +msgid "" +"`tint_with(0x00FF00, 0xFFFFFF)` changes black to green, leaves white " +"intact." +msgstr "`tint_with(0x00FF00, 0xFFFFFF)` は黒を緑に変更し、白はそのままです。" + +#: ../../pixmap.rst:208 a66550a8ae304016b8440aadfc19ff9f +msgid "`tint_with(0xFF0000, 0x0000FF)` changes black to red and white to blue." +msgstr "`tint_with(0xFF0000, 0x0000FF)` は黒を赤に変更し、白を青に変更します。" + +#: ../../pixmap.rst:213 6f6c315eb07246bebf541517bef1c9af +msgid "" +"Apply a gamma factor to a pixmap, i.e. lighten or darken it. Pixmaps with" +" colorspace ``None`` are ignored with a warning." +msgstr "ピクセルマップにガンマ係数を適用し、つまり明るくしたり暗くしたりします。色空間が ``None`` のピクセルマップは警告とともに無視されます。" + +#: ../../pixmap.rst:215 ab0e5804c9284ea4b62d73c90c08e97c +msgid "" +"*gamma = 1.0* does nothing, *gamma < 1.0* lightens, *gamma > 1.0* darkens" +" the image." +msgstr "*gamma = 1.0* は何も行いません。*gamma < 1.0* は明るくし、*gamma > 1.0* は暗くします。" + +#: ../../pixmap.rst:219 dcf93c5b5fc74c65b0e7e7a38afb730e +#, fuzzy +msgid "" +"Shrink the pixmap by dividing both, its width and height by 2\\ " +":sup:``n``." +msgstr "Pixmapを2の :sup:`n` 乗で縮小します。" + +#: ../../pixmap.rst:221 ed3452ecabbb456a9a4125ec50244103 +msgid "" +"determines the new pixmap (samples) size. For example, a value of 2 " +"divides width and height by 4 and thus results in a size of one 16\\ " +":sup:`th` of the original. Values less than 1 are ignored with a warning." +msgstr "新しいPixmap(サンプル)のサイズを決定します。例えば、値が2の場合、幅と高さを4分の1に分割し、元のサイズの16分の1のサイズになります。1未満の値は警告として無視されます。" + +#: ../../pixmap.rst:223 622d36faf6a84ca784a5f39e4a6afecf +msgid "" +"Use this methods to reduce a pixmap's size retaining its proportion. The " +"pixmap is changed \"in place\". If you want to keep original and also " +"have more granular choices, use the resp. copy constructor above." +msgstr "これを使用して比を保持したままPixmapのサイズを縮小します。Pixmapは「その場で」変更されます。元のピクセルを保持し、より詳細な選択肢を持つ場合は、上記のコピーコンストラクタを使用してください。" + +#: ../../pixmap.rst:227 b41a23c5cbcf4851a4dfea41bebbd818 +msgid "" +"*New in version:: 1.14.5:* Return the value of the pixel at location (x, " +"y) (column, line)." +msgstr "*バージョン1.14.5* で新規追加:位置(x、y)(列、行)のピクセルの値を返します。" + +#: ../../pixmap.rst:229 ../../pixmap.rst:239 34e3f8a207914947b7ae864e627f476b +#: b5899296b9574931a00793fcbed8a228 +msgid "the column number of the pixel. Must be in `range(pix.width)`." +msgstr "ピクセルの列番号。範囲 `range(pix.width)` 内である必要があります。" + +#: ../../pixmap.rst:230 de7004ec9480408d817e9173d2aeec9a +msgid "the line number of the pixel, Must be in `range(pix.height)`." +msgstr "ピクセルの行番号、範囲 `range(pix.height)` 内である必要があります。" + +#: ../../pixmap.rst 0f003f56bf334c439f6f6b6a7574e806 +#: 39f61f7639f54885b80b47c9b688e4fd 4c2d78f00b5545afa93b0237d9d3f83c +#: 5b0451f6760948e99e3a479b036bd134 6199399248b547f8b31035934dea477b +#: 6ffebba666d245319137b3f60b558d14 ab7d935cf0014d7e923fde5f07eaf8c0 +msgid "Return type" +msgstr "" + +#: ../../pixmap.rst 525feb713e504326aa9bdf81dc095cf5 +#: 575a0ab6647f45edaadc1b402789c2dc 6c245aebb10e4bc68e2466b7ce643745 +#: 778f2bee2f0047eea3156192ee064a3a 82376bcf78494f7781128fcd3cf9c925 +#: e30edb3a24a94205b22864eec8be09dd f0c8fa3e3758421bbf20f90fd89c5d98 +msgid "Returns" +msgstr "" + +#: ../../pixmap.rst:233 e34cdb0ee52b442b8342283c7fd67240 +msgid "" +"a list of color values and, potentially the alpha value. Its length and " +"content depend on the pixmap's colorspace and the presence of an alpha. " +"For RGBA pixmaps the result would e.g. be *[r, g, b, a]*. All items are " +"integers in `range(256)`." +msgstr "" +"色の値と、必要に応じてアルファ値のリスト。その長さと内容は、Pixmap " +"の色空間とアルファの存在に依存します。RGBAピクセルマップの場合、結果は例えば *[r、g、b、a]* となります。すべてのアイテムは " +"`range(256)` の整数です。" + +#: ../../pixmap.rst:237 1214d4d289dd487e8e37d0008feabd3f +msgid "" +"*New in version 1.14.7:* Manipulate the pixel at location (x, y) (column," +" line)." +msgstr "*バージョン1.14.7で新規追加:* 位置(x、y)(列、行)のピクセルを操作します。" + +#: ../../pixmap.rst:240 415c7880cf0d47f99f160198e045cea4 +msgid "the line number of the pixel. Must be in `range(pix.height)`." +msgstr "ピクセルの行番号、`range(pix.height)` 内である必要があります。" + +#: ../../pixmap.rst:241 4e3a399db293443d8dcbf77d7fff982c +msgid "" +"the desired pixel value given as a sequence of integers in `range(256)`. " +"The length of the sequence must equal :attr:`Pixmap.n`, which includes " +"any alpha byte." +msgstr "" +"`range(256)` の整数で表されるシーケンスとして指定された所望のピクセル値。シーケンスの長さは :attr:`Pixmap.n` " +"に等しくなければならず、これにはアルファバイトも含まれます。" + +#: ../../pixmap.rst:245 da97d5dacdf44c3ca5094216f3abd5ef +msgid "*New in version 1.14.8:* Set the pixels of a rectangle to a value." +msgstr "*新しいバージョン1.14.8で導入されました:* 特定の値で長方形のピクセルを設定します。" + +#: ../../pixmap.rst:247 a72865e349784eb693a3f0fa93ad1e88 +msgid "" +"the rectangle to be filled with the value. The actual area is the " +"intersection of this parameter and :attr:`Pixmap.irect`. For an empty " +"intersection (or an invalid parameter), no change will happen." +msgstr "" +"値で埋める長方形。実際のエリアはこのパラメータと :attr:`Pixmap.irect` " +"の交差です。空の交差(または無効なパラメータ)の場合、変更は行われません。" + +#: ../../pixmap.rst:248 c5c5f7d02f6740ba8b971722477d6cb6 +msgid "" +"the desired value, given as a sequence of integers in `range(256)`. The " +"length of the sequence must equal :attr:`Pixmap.n`, which includes any " +"alpha byte." +msgstr "" +"`range(256)` 内の整数のシーケンスとして指定された所望の値。シーケンスの長さは :attr:`Pixmap.n` " +"と等しくなければならず、これにはアルファバイトも含まれます。" + +#: ../../pixmap.rst:251 8cdc83e7d0e345d7867dcbaba407f32d +msgid "" +"``False`` if the rectangle was invalid or had an empty intersection with " +":attr:`Pixmap.irect`, else ``True``." +msgstr "irectが無効であるか、:attr:`Pixmap.irect` と交差しない場合は ``False``、それ以外の場合は ``True`` 。" + +#: ../../pixmap.rst:255 6ba322bc83d8475eaa533372fdfd7a32 +msgid "" +"This method is equivalent to :meth:`Pixmap.set_pixel` executed for each " +"pixel in the rectangle, but is obviously **very much faster** if many " +"pixels are involved." +msgstr "" +"このメソッドは、多くのピクセルが関与する場合に **非常に高速** であるため、長方形内の各ピクセルに対して実行される " +":meth:`Pixmap.set_pixel` と同等です。" + +#: ../../pixmap.rst:256 1deb34875fc84aa495a7d4d8f851a8f8 +msgid "" +"This method can be used similar to :meth:`Pixmap.clear_with` to " +"initialize a pixmap with a certain color like this: " +"*pix.set_rect(pix.irect, (255, 255, 0))* (RGB example, colors the " +"complete pixmap with yellow)." +msgstr "" +"このメソッドは、:meth:`Pixmap.clear_with` " +"のように、次のようにして特定の色でピクセルマップを初期化するために使用できます。 *pix.set_rect(pix.irect, (255, " +"255, 0))* (RGBの例、ピクセルマップ全体を黄色で色付けします)。" + +#: ../../pixmap.rst:260 52fe6e363b2b455d9c586691ba4152db +msgid "New in v1.17.7" +msgstr "v1.17.7で新規導入" + +#: ../../pixmap.rst:262 0c090e71bae74209acd402089dcc45dd +msgid "Set the x and y values of the pixmap's top-left point." +msgstr "ピクセルマップの左上の点のxとyの値を設定します。" + +#: ../../pixmap.rst:264 dfef8fa4195648868405de776251cd14 +msgid "x coordinate" +msgstr "x座標" + +#: ../../pixmap.rst:265 f312425c39dc4a46a348ec79c872dc81 +msgid "y coordinate" +msgstr "y座標" + +#: ../../pixmap.rst:270 5ff12410d1164a1c9a9c7864cbbe4b54 +msgid "New in v1.16.17" +msgstr "v1.16.17で新規導入." + +#: ../../pixmap.rst:272 423bf464c80345aba348c96dc8bcb0e6 +msgid "" +"Changed in v1.18.0: When saving as a PNG image, these values will be " +"stored now." +msgstr "v1.18.0で変更:PNGイメージとして保存する場合、これらの値が保存されるようになりました。" + +#: ../../pixmap.rst:274 90228b29c1c540bbbf78a622f0227699 +msgid "Set the resolution (dpi) in x and y direction." +msgstr "xおよびy方向の解像度(dpi)を設定します。" + +#: ../../pixmap.rst:276 d61cbba5abb14f57906f7489c1c2d1dd +msgid "resolution in x direction." +msgstr "x方向の解像度。" + +#: ../../pixmap.rst:277 c20a6df47d304b2ba8d5138d14639369 +msgid "resolution in y direction." +msgstr "y方向の解像度。" + +#: ../../pixmap.rst:282 97a47c5d70e64ccd923e2b02e57d4c1c +msgid "Changed in v 1.18.13" +msgstr "変更内容:v1.18.13で変更" + +#: ../../pixmap.rst:284 031092ad74ad47b6980a495da9db7978 +msgid "Change the alpha values. The pixmap must have an alpha channel." +msgstr "アルファ値を変更します。ピクマップにはアルファチャンネルが必要です。" + +#: ../../pixmap.rst:286 a3a1d0280e85483da4e2198c4fe3d4c4 +msgid "" +"the new alpha values. If provided, its length must be at least *width * " +"height*. If omitted (`None`), all alpha values are set to 255 (no " +"transparency). *Changed in version 1.14.13:* *io.BytesIO* is now also " +"accepted." +msgstr "" +"新しいアルファ値。指定された場合、その長さは少なくとも *幅×高* " +"さでなければなりません。省略した場合(`None`)、すべてのアルファ値が255(透明でない)に設定されます。*バージョン1.14.13で変更:*" +" *io.BytesIO* も受け入れられるようになりました。" + +#: ../../pixmap.rst:287 be097376a4b64098bf04631d15d8d255 +msgid "" +"*New in v1.18.13:* whether to premultiply color components with the alpha" +" value." +msgstr "*v1.18.13で新登場:* カラーコンポーネントをアルファ値と乗算するかどうか。" + +#: ../../pixmap.rst:288 faeff545849749908ec45bb759eb4afc +msgid "" +"ignore the alpha value and set this color to fully transparent. A " +"sequence of integers in `range(256)` with a length of :attr:`Pixmap.n`. " +"Default is ``None``. For example, a typical choice for RGB would be " +"`opaque=(255, 255, 255)` (white)." +msgstr "" +"アルファ値を無視し、この色を完全に透明に設定します。長さが :attr:`Pixmap.n` で `range(256)` " +"内の整数のシーケンスです。デフォルトは ``None`` です。たとえば、RGBの典型的な選択肢は `opaque=(255, 255, " +"255)` (白)です。" + +#: ../../pixmap.rst:293 b21513eb6af641f6a87a82d6f13d64af +msgid "" +"Invert the color of all pixels in :ref:`IRect` *irect*. Will have no " +"effect if colorspace is ``None``." +msgstr "" +":ref:`IRect` *irect* 内のすべてのピクセルの色を反転させます。colorspaceが ``None`` " +"の場合は効果がありません。" + +#: ../../pixmap.rst:295 31dacab7f6084c39b9a628fedb660dd6 +msgid "The area to be inverted. Omit to invert everything." +msgstr "反転する領域。すべて反転するには省略します。" + +#: ../../pixmap.rst:299 16aece860cd84bf184bfdb1d5db2ab65 +msgid "" +"Copy the *irect* part of the *source* pixmap into the corresponding area " +"of this one. The two pixmaps may have different dimensions and can each " +"have :data:`CS_GRAY` or :data:`CS_RGB` colorspaces, but they currently " +"**must** have the same alpha property [#f2]_. The copy mechanism " +"automatically adjusts discrepancies between source and target like so:" +msgstr "" +"*ソース* ピクマップの *irect* " +"部分を、このピクマップの対応する領域にコピーします。2つのピクマップは異なる寸法を持つことができ、それぞれが :data:`CS_GRAY` " +"または :data:`CS_RGB` カラースペースを持つことができますが、現在は同じアルファプロパティ [#f2]_ " +"を持っている必要があります。コピー機構は、次のようにソースとターゲットの間の不一致を自動的に調整します。" + +#: ../../pixmap.rst:301 73f9ee74e599424998d07298e0817340 +msgid "" +"If copying from :data:`CS_GRAY` to :data:`CS_RGB`, the source gray-shade " +"value will be put into each of the three rgb component bytes. If the " +"other way round, *(r + g + b) / 3* will be taken as the gray-shade value " +"of the target." +msgstr "" +":data:`CS_GRAY` から :data:`CS_RGB` " +"にコピーする場合、ソースのグレーシェード値は、3つのRGBコンポーネントバイトの各々に配置されます。逆の場合、*(r + g + b)/ 3* " +"がターゲットのグレーシェード値として取られます。" + +#: ../../pixmap.rst:303 7dd1e495b0d348aab71b77210668e6b4 +msgid "" +"Between *irect* and the target pixmap's rectangle, an \"intersection\" is" +" calculated at first. This takes into account the rectangle coordinates " +"and the current attribute values :attr:`Pixmap.x` and :attr:`Pixmap.y` " +"(which you are free to modify for this purpose via " +":meth:`Pixmap.set_origin`). Then the corresponding data of this " +"intersection are copied. If the intersection is empty, nothing will " +"happen." +msgstr "" +"*irect* とターゲットピクマップの長方形の間で、まず「交差」を計算します。これは、長方形の座標と現在の属性値 " +":attr:`Pixmap.x` および :attr:`Pixmap.y` (これを目的のために " +":meth:`Pixmap.set_origin` " +"を介して自由に変更できます)を考慮に入れます。その後、この交差のデータがコピーされます。交差が空の場合、何も起こりません。" + +#: ../../pixmap.rst:308 dd184b119e5248d5920ac872de33ada1 +msgid "The area to be copied." +msgstr "コピーする領域。" + +#: ../../pixmap.rst:310 d4e8ddacfc5a40b49f665af44c36940a +msgid "" +"Example: Suppose you have two pixmaps, `pix1` and `pix2` and you want to " +"copy the lower right quarter of `pix2` to `pix1` such that it starts at " +"the top-left point of `pix1`. Use the following snippet::" +msgstr "" +"例: `pix1` と `pix2` という2つのピクマップがあるとし、`pix2` の右下の四半期を `pix1` にコピーし、それが " +"`pix1` の左上の点から開始するようにしたい場合、次のスニペットを使用します::" + +#: ../../pixmap.rst:329 c6bd487346be451f86926fcdc1cd0c70 +msgid "" +"Changed in v1.22.0: Added **direct support of JPEG** images. Image " +"quality can be controlled via parameter \"jpg_quality\"." +msgstr "v1.22.0で変更:**JPEG画像の直接サポート** が追加されました。画像の品質は「jpg_quality」パラメータを使用して制御できます。" + +#: ../../pixmap.rst:331 590ed99d62e2415ba0874b02ba3aacbc +msgid "" +"Save pixmap as an image file. Depending on the output chosen, only some " +"or all colorspaces are supported and different file extensions can be " +"chosen. Please see the table below." +msgstr "Pixmapを画像ファイルとして保存します。選択した出力に応じて、一部またはすべてのカラースペースがサポートされ、異なるファイル拡張子を選択できます。詳細については以下の表をご覧ください。" + +#: ../../pixmap.rst:333 0f576e9f35364fc19691c25253f80a05 +msgid "" +"The file to save to. May be provided as a string, as a ``pathlib.Path`` " +"or as a Python file object. In the latter two cases, the filename is " +"taken from the resp. object. The filename's extension determines the " +"image format, which can be overruled by the output parameter." +msgstr "" +"保存先のファイル。文字列、``pathlib.Path`` " +"、またはPythonファイルオブジェクトとして提供できます。後の2つの場合、ファイル名は対応するオブジェクトから取得されます。ファイル名の拡張子は画像フォーマットを決定し、出力パラメータで上書きできます。" + +#: ../../pixmap.rst:335 be2871f35d3642ffbc1bd779953e07e2 +msgid "" +"The desired image format. The default is the filename's extension. If " +"both, this value and the file extension are unsupported, an exception is " +"raised. For possible values see :ref:`PixmapOutput`." +msgstr "" +"望ましい画像フォーマット。デフォルトはファイル名の拡張子です。この値とファイル拡張子の両方がサポートされていない場合、例外が発生します。:ref:`PixmapOutput`" +" を参照してください。" + +#: ../../pixmap.rst:336 ../../pixmap.rst:346 1e7816aef8254a35a92b20bbc9bae993 +#: b6b1cfa84bb1478fba319137e9812cfb +msgid "" +"The desired image quality, default 95. Only applies to JPEG images, else " +"ignored. This parameter trades quality against file size. A value of 98 " +"is close to lossless. Higher values should not lead to better quality." +msgstr "望ましい画像品質、デフォルトは95です。JPEG画像にのみ適用され、それ以外の場合は無視されます。このパラメータは品質とファイルサイズをトレードオフにします。値が98の場合、ほぼロスレスです。より高い値は品質を向上させることはありません。" + +#: ../../pixmap.rst 0049f315abdb4f7ebc9ae0144d80cfc6 +#: 03e24b9c91d045aabfa1397d65835db9 203b2007acbe498383e4c61b7bd3c90d +#: 9ac540976ca14e449083f2459c8e1023 a4be174bfa2f490c960f79c93a378b50 +msgid "Raises" +msgstr "例外" + +#: ../../pixmap.rst:338 ../../pixmap.rst:348 21ee933f9485440588050de142bf7403 +#: 754a7250ef82449d803a1defc433b02f +msgid "For unsupported image formats." +msgstr "サポートされていない画像フォーマットの場合。" + +#: ../../pixmap.rst:342 be739c75501040f2a079dfbdb48f4cc9 +msgid "" +"New in version 1.14.5: Return the pixmap as a *bytes* memory object of " +"the specified format -- similar to :meth:`save`." +msgstr "" +"新機能(バージョン1.14.5):指定されたフォーマットのピクマップをバイトメモリオブジェクトとして返します。これは :meth:`save` " +"と似ています。" + +#: ../../pixmap.rst:343 9dab17ea3bef4aa1ac4db2370bd6acc5 +msgid "" +"Changed in v1.22.0: Added **direct JPEG support**. Image quality can be " +"influenced via new parameter \"jpg_quality\"." +msgstr "" +"v1.22.0で変更: **JPEG画像の直接サポート** " +"が追加されました。画像の品質は「jpg_quality」パラメータを使用して制御できます。" + +#: ../../pixmap.rst:345 f867093c639347c6beaf8ee355647985 +msgid "" +"The desired image format. The default is \"png\". For possible values see" +" :ref:`PixmapOutput`." +msgstr "望ましい画像フォーマット。デフォルトは \"png\" です。:ref:`PixmapOutput` を参照してください。" + +#: ../../pixmap.rst:351 50414751cdad4f979734b28d6ea70d38 +msgid "" +"The requested image format. The default is \"png\". For other possible " +"values see :ref:`PixmapOutput`." +msgstr "リクエストされた画像フォーマットです。デフォルトは \"png\" です。:ref:`PixmapOutput` を参照してください。" + +#: ../../pixmap.rst:355 ../../pixmap.rst:370 5d835438e5e14ea183646da2c545e117 +#: 8aa55ac28fb04c2eb37abffc6662f510 +msgid "New in v1.19.0" +msgstr "v1.19.0 で新規追加" + +#: ../../pixmap.rst:357 ../../pixmap.rst:372 0d701da722c848208fb1fdca1ef0cbc3 +#: 5741b6f174484e07881ec1c6e310293c +msgid "Changed in v1.22.5: Support of new parameter for Tesseract's tessdata." +msgstr "v1.22.5 で変更:Tesseract の tessdata に関する新しいパラメータのサポート。" + +#: ../../pixmap.rst:359 20e9cb25cc134e988d4ee64fb854485d +msgid "" +"Perform text recognition using Tesseract and save the image as a 1-page " +"PDF with an OCR text layer." +msgstr "Tesseract を使用してテキスト認識を実行し、OCR テキスト レイヤーを持つ 1 ページの PDF として画像を保存します。" + +#: ../../pixmap.rst:361 eed00e99897b441594fad5c8b5410f4a +msgid "" +"identifies the file to save to. May be either a string or a pointer to a " +"file opened with \"wb\" (includes `io.BytesIO()` objects)." +msgstr "" +"保存先のファイルを識別します。文字列または \"wb\" で開かれたファイルへのポインタ (`io.BytesIO()` " +"オブジェクトを含む)のいずれかである必要があります。" + +#: ../../pixmap.rst:362 cd441e3bfe8c4037af7676f03e04caf0 +msgid "whether to compress the resulting PDF, default is `True`." +msgstr "結果の PDF を圧縮するかどうか。デフォルトは `True` です。" + +#: ../../pixmap.rst:363 96b9a6ac7dd04d56a1a8d2950c1415f2 +msgid "" +"the languages occurring in the image. This must be specified in Tesseract" +" format. Default is \"eng\" for English. Use \"+\"-separated Tesseract " +"language codes for multiple languages, like \"eng+spa\" for English and " +"Spanish." +msgstr "" +"画像内で使用される言語。Tesseract の形式で指定する必要があります。デフォルトは " +"\"eng\"(英語)です。複数の言語を使用する場合、\"eng+spa\" のように \"+\" で区切った Tesseract " +"言語コードを使用します(英語とスペイン語の場合など)。" + +#: ../../pixmap.rst:364 da78e16ab51946d2a085c5e5f1292d45 +#, fuzzy +msgid "" +"folder name of Tesseract's language support. If omitted, this information" +" must be present as environment variable `TESSDATA_PREFIX`." +msgstr "" +":arg str tessdata: Tesseractの言語サポートフォルダーの名前です。省略した場合、この情報は環境変数 " +"`TESSDATA_PREFIX` として存在している必要があります。" + +#: ../../pixmap.rst:366 2701e093822949dcad9f7e0bcbc5d043 +msgid "" +"**Will fail** if Tesseract is not installed or if the environment " +"variable \"TESSDATA_PREFIX\" is not set to the `tessdata` folder name and" +" not provided as parameter." +msgstr "" +"Tesseract がインストールされていない場合や、環境変数 \"TESSDATA_PREFIX\" が `tessdata` " +"フォルダ名に設定されておらず、またはパラメータとして提供されていない場合、この関数は **失敗します** 。" + +#: ../../pixmap.rst:374 f06b9cc64aad479c87a29ada5e866651 +msgid "" +"Perform text recognition using Tesseract and convert the image to a " +"1-page PDF with an OCR text layer. Internally invokes " +":meth:`Pixmap.pdfocr_save`." +msgstr "" +"Tesseractを使用してテキスト認識を実行し、画像をOCRテキストレイヤーを持つ1ページのPDFに変換します。内部的には " +":meth:`Pixmap.pdfocr_save` を呼び出します" + +#: ../../pixmap.rst:376 680223ff3239437a81388de854f6edcd +msgid "" +"A 1-page PDF file in memory. Could be opened like " +"`doc=pymupdf.open(\"pdf\", pix.pdfocr_tobytes())`, and text extractions " +"could be performed on its `page=doc[0]`. .. note:: Another possible " +"use is insertion into some pdf. The following snippet reads the images of" +" a folder and stores them as pages in a new PDF that contain an OCR text " +"layer:: doc = pymupdf.open() for imgfile in " +"os.listdir(folder): pix = pymupdf.Pixmap(imgfile) " +"imgpdf = pymupdf.open(\"pdf\", pix.pdfocr_tobytes()) " +"doc.insert_pdf(imgpdf) pix = None imgpdf.close() " +"doc.save(\"ocr-images.pdf\")" +msgstr "" + +#: ../../pixmap.rst:376 035e29c89b8947a4938f0def16d20c20 +msgid "" +"A 1-page PDF file in memory. Could be opened like " +"`doc=pymupdf.open(\"pdf\", pix.pdfocr_tobytes())`, and text extractions " +"could be performed on its `page=doc[0]`." +msgstr "" +"メモリ内の1ページのPDFファイル。次のようにして開くことができます: `doc=pymupdf.open(\"pdf\", " +"pix.pdfocr_tobytes())` 、そしてそのページ=doc[0]でテキストの抽出が行えます。" + +#: ../../pixmap.rst:380 d5488ec9ed704e57806aab8bb8151fa0 +msgid "" +"Another possible use is insertion into some pdf. The following snippet " +"reads the images of a folder and stores them as pages in a new PDF that " +"contain an OCR text layer::" +msgstr "" +"別の可能性として、PDF に挿入することが考えられます。次のスニペットは、フォルダ内の画像を読み取り、OCR テキスト レイヤーを含む新しい " +"PDF ページとして保存します::" + +#: ../../pixmap.rst:394 745b1318fcc7495cb968031ca6bbcf1e +msgid "Create a Pillow Image from the pixmap. PIL / Pillow must be installed." +msgstr "" + +#: ../../pixmap.rst:396 ../../pixmap.rst:416 ../../pixmap.rst:424 +#: 3b635a6f7f364e478d4844dd4120de50 c7b56b17e4394785a408110c3609519d +#: d56c7e5220b440128dbe66467e450e13 +msgid "if Pillow is not installed." +msgstr "Pillow がインストールされていない場合" + +#: ../../pixmap.rst:397 82d1388ec50c4f7486bb6d61d8b119a4 +msgid "a ˇˇPIL.Imageˇˇ object" +msgstr "" + +#: ../../pixmap.rst:401 d1bff7b145cf49259879515c8961e6d2 +msgid "" +"Write the pixmap as an image file using Pillow. Use this method for " +"output unsupported by MuPDF. Examples are" +msgstr "" +"Pillow を使用して pixmap を画像ファイルとして書き込みます。これは MuPDF " +"でサポートされていない出力に使用します。例として、以下が挙げられます。" + +#: ../../pixmap.rst:403 01f026e6ce964eddab44d42bc5e97ffb +msgid "Formats JPX, J2K, WebP, etc." +msgstr "JPX、J2K、WebP などの形式" + +#: ../../pixmap.rst:404 ea133fada434413d89120b525a33b8ac +msgid "Storing EXIF information." +msgstr "EXIF 情報の保存" + +#: ../../pixmap.rst:405 cb2a01d50a48427badd26e679d9c0943 +msgid "" +"If you do not provide dpi information, the values *xres*, *yres* stored " +"with the pixmap are automatically used." +msgstr "dpi 情報を提供しない場合、pixmap に格納されている *xres*、*yres* の値が自動的に使用されます。" + +#: ../../pixmap.rst:407 148e41347b5b4b0a8a811716d7543ac6 +#, fuzzy +msgid "" +"A simple example: `pix.pil_save(\"some.webp\", optimize=True, dpi=(150, " +"150))`." +msgstr "" +"簡単な例: `pix.pil_save(\"some.webp\", optimize=True, dpi=(150, 150))` " +"。他のパラメータの詳細については、Pillow のドキュメンテーションを参照してください。" + +#: ../../pixmap.rst:409 c98713fd3fb24a9d81551f753eba41a3 +msgid "" +"If the pixmap's colorspace is RGB with transparency, the alpha values may" +" or may not already be multiplied into the color components " +"ref/green/blue (called \"premultiplied\"). To enforce undoing " +"premultiplication, set this parameter to `True`. To learn about some " +"background, e.g. look for \"Premultiplied alpha\" `here " +"`_." +msgstr "" + +#: ../../pixmap.rst:412 9e01652a4c8143718d808823e0196d13 +msgid "For details on other parameters see the Pillow documentation." +msgstr "" + +#: ../../pixmap.rst:414 6a52588a2f3a43e8b86982c3f13c8fbe +msgid "" +"Since v1.22.0, PyMuPDF supports JPEG output directly. We recommended to " +"no longer use this method for JPEG output -- for performance reasons and " +"for avoiding unnecessary external dependencies." +msgstr "" + +#: ../../pixmap.rst:420 915be2ac4cef41b187533b3ba635d674 +msgid "New in v1.17.3" +msgstr "v1.17.3 で新しく追加されました" + +#: ../../pixmap.rst:422 d0b1024f3b884c3fa578196e425b8d43 +#, fuzzy +msgid "" +"Return an image as a bytes object in the specified format using Pillow. " +"For example `stream = pix.pil_tobytes(format=\"WEBP\", optimize=True, " +"dpi=(150, 150))`. Also see above. For details on other parameters see the" +" Pillow documentation." +msgstr "" +"Pillow を使用して指定された形式の画像として bytes オブジェクトとして画像を返します。例: `stream = " +"pix.pil_tobytes(format=\"WEBP\", optimize=True)` 。詳細なパラメータについては、Pillow " +"のドキュメンテーションを参照してください。" + +#: ../../pixmap.rst:431 ../../pixmap.rst:466 db4bc8ea97bd4e009133c4e33f686f46 +#: e5cc0fd9f6484bbc86f3395950e5c7c9 +msgid "New in v1.19.3" +msgstr "v1.19.3 で新しく追加されました" + +#: ../../pixmap.rst:433 d4188d9ac11948548ba79cd4118136c1 +msgid "" +"Return a new pixmap by \"warping\" the quad such that the quad corners " +"become the new pixmap's corners. The target pixmap's `irect` will be `(0," +" 0, width, height)`." +msgstr "" +"四角形を \"ワープ\" して、四角形の角が新しい pixmap の角になるようにします。対象 pixmap の `irect` は `(0, " +"0, width, height)` になります。" + +#: ../../pixmap.rst:435 b16ac0435a794bbba491838cd086698c +msgid "" +"a convex quad with coordinates inside :attr:`Pixmap.irect` (including the" +" border points)." +msgstr ":attr:`Pixmap.irect` の内部にある座標を持つ凸四角形(境界点も含む)" + +#: ../../pixmap.rst:436 07826d60127a4d3a9f2dc58f845323e5 +msgid "desired resulting width." +msgstr "望ましい幅" + +#: ../../pixmap.rst:437 ee90eecd90714bf7a889988bf2f0f5ed +msgid "desired resulting height." +msgstr "望ましい高さ" + +#: ../../pixmap.rst:438 e86d8fee5e064e7ca21a864637a15e0a +msgid "" +"A new pixmap where the quad corners are mapped to the pixmap corners in a" +" clockwise fashion: `quad.ul -> irect.tl`, `quad.ur -> irect.tr`, etc." +msgstr "" +"新しいピクスマップで、四角形の角が時計回りにピクスマップの角にマップされます: `quad.ul -> irect.tl`、`quad.ur ->" +" irect.tr` など。" + +#: ../../pixmap.rst:439 734dc253be574678afcc4555204015a8 +msgid "" +":ref:`Pixmap` .. image:: images/img-warp.* :scale: 40 :align: " +"center" +msgstr "" + +#: ../../pixmap.rst:439 b610c28724094330bcf1554cabdb88d9 +msgid ":ref:`Pixmap`" +msgstr "" + +#: ../../pixmap.rst:448 ../../pixmap.rst:505 ../../pixmap.rst:514 +#: 61f8c546e87a48c0a8e6da6d8f649f8e bbf5f7b837184e65a1f543ff2f40aec3 +#: d0456b5401584cf99608ad02462eedc0 +msgid "New in v1.19.2" +msgstr "v1.19.2で導入" + +#: ../../pixmap.rst:449 4e0e845959da4fff83068ba57b335e6c +msgid "Changed in v1.19.3" +msgstr "v1.19.3で変更" + +#: ../../pixmap.rst:451 fe27ad1c40bf403babc8d30e3af2ac21 +msgid "Determine the pixmap's unique colors and their count." +msgstr "Pixmapのユニークな色とそのカウントを特定します。" + +#: ../../pixmap.rst:453 7ee90af6b2ae4386a6c7553bb618eebd +msgid "" +"*(changed in v1.19.3)* If `True` return a dictionary of color pixels and " +"their usage count, else just the number of unique colors." +msgstr "*(v1.19.3で変更)* `True` の場合、色ピクセルとその使用回数の辞書を返し、それ以外の場合はユニークな色の数だけを返します。" + +#: ../../pixmap.rst:454 0dfa03e10a9b45a8ab24b74c8e8fc5ab +msgid "" +"a rectangle inside :attr:`Pixmap.irect`. If provided, only those pixels " +"are considered. This allows inspecting sub-rectangles of a given pixmap " +"directly -- instead of building sub-pixmaps." +msgstr "" +":attr:`Pixmap.irect` " +"内の四角形。指定した場合、そのピクセルのみが考慮されます。これにより、指定されたPixmapのサブ四角形を直接調査できます。" + +#: ../../pixmap.rst:456 4c326412d6c0448985b8e87ce4638409 +msgid "" +"either the number of colors, or a dictionary with the items `pixel: " +"count`. The pixel key is a `bytes` object of length :attr:`Pixmap.n`. .." +" note:: To recover the **tuple** of a pixel, use " +"`tuple(colors.keys()[i])` for the i-th item. * The response time " +"depends on the pixmap's samples size and may be more than a second for " +"very large pixmaps. * Where applicable, pixels with different alpha " +"values will be treated as different colors." +msgstr "" + +#: ../../pixmap.rst:456 7db22ff7cf05486982b356633b77da57 +msgid "" +"either the number of colors, or a dictionary with the items `pixel: " +"count`. The pixel key is a `bytes` object of length :attr:`Pixmap.n`." +msgstr "" +"色の数、または `pixel: count` の項目を持つ辞書。pixelキーは :attr:`Pixmap.n` の長さの `bytes` " +"オブジェクトです。" + +#: ../../pixmap.rst:458 bd48043357324c3897234a7b756ba1e5 +msgid "" +"To recover the **tuple** of a pixel, use `tuple(colors.keys()[i])` for " +"the i-th item." +msgstr "ピクセルのタプルを復元するには、i番目の項目に対して `tuple(colors.keys()[i])` を使用します。" + +#: ../../pixmap.rst:460 a13116d89eb9439faa675e352ace52fc +msgid "" +"The response time depends on the pixmap's samples size and may be more " +"than a second for very large pixmaps." +msgstr "応答時間はPixmapのsamplesサイズに依存し、非常に大きなPixmapの場合は1秒以上かかることがあります。" + +#: ../../pixmap.rst:461 2664635a30434ebe86e382e0611358ad +msgid "" +"Where applicable, pixels with different alpha values will be treated as " +"different colors." +msgstr "該当する場合、異なるアルファ値を持つピクセルは異なる色として扱われます。" + +#: ../../pixmap.rst:468 656ee402dc71457bbba7b9ef2aeb0021 +msgid "Return the most frequently used color and its relative frequency." +msgstr "最も頻繁に使用される色とその相対頻度を返します。" + +#: ../../pixmap.rst:470 f6d914f0fcd947929c311e3e02c6ad91 +msgid "" +"A rectangle inside :attr:`Pixmap.irect`. If provided, only those pixels " +"are considered. This allows inspecting sub-rectangles of a given pixmap " +"directly -- instead of building sub-pixmaps." +msgstr "" +":attr:`Pixmap.irect` " +"内の四角形。指定した場合、そのピクセルのみが考慮されます。これにより、指定されたPixmapのサブ四角形を直接調査できます。" + +#: ../../pixmap.rst:472 9964a69b1bdf469f90fb138cb7640f67 +#, python-format +msgid "" +"A tuple `(ratio, pixel)` where `0 < ratio <= 1` and *pixel* is the pixel " +"value of the color. Use this to decide if the image is \"almost\" " +"unicolor: a response `(0.95, b\"\\x00\\x00\\x00\")` means that 95% of all" +" pixels are black. See an example here :ref:`RecipesImages_P`." +msgstr "" +"比率 `0 < ratio <= 1` および色のピクセル値を持つタプル。これを使用して画像が「ほぼ」単一の色であるかどうかを判断します。応答 " +"`(0.95, b\"\\x00\\x00\\x00\")` " +"は、すべてのピクセルの95%が黒であることを示します。こちらの例を参照してください:「Pixmapsの使用方法:テキストの可視性の確認」" + +#: ../../pixmap.rst:477 5616759facc741ffb03d12a74ed68b50 +msgid "Indicates whether the pixmap contains transparency information." +msgstr "Pixmapに透明情報が含まれているかどうかを示します。" + +#: ../../pixmap.rst 09caf267bf0c4357b1d649f5223f0568 +#: 0a138b7070ab4b32be0424aecfd9db4f 100b6ea398d843d3a06b27c024d52b65 +#: 17ac16b6b1ee49a2ae806e8b755d2c0a 2feafeac0755436793da55055a5a913d +#: 31c5f6e06c4b4ee48c1288e3ef6e1c29 332118753d2b48b1bd58d13b3cf2a606 +#: 34f5bb9bf1354edc89835fecb1053a19 36498f0405d54e41a96ac186b2ba68cf +#: 4e3f7658060449c08c1ae6538550b1ab 8c19fc922781430d84ddbaf21fe68a52 +#: 929eb43a19974fe1937ab13e00ba01f3 931f0353c59a4a4aa0870b827533484c +#: a4275da98b5341abbdd6518cf95c282a d555ab3229a14a38a8ce8e47be6d6d21 +#: dc660e5a536f43078522d21269142def e7cb73487d0a48d885810f976fd00e7b +#: f29ab1c336f341b69dc8d8d4a7a5c50f ffc816d2b9b8492c9ac1e6826265fd47 +msgid "type" +msgstr "" + +#: ../../pixmap.rst:479 ../../pixmap.rst:509 ../../pixmap.rst:518 +#: ../../pixmap.rst:633 097f8175747d43b4b4cc6268485d439b +#: 3301afce71724277906a27e29996ca0b 906a5684d73f429aa18a5cef6b00c69a +#: cdd7b921fc1f4f559e75ac5b81d65d9a +msgid "bool" +msgstr "" + +#: ../../pixmap.rst:483 e70a00b3dcb84a4599d6d0b697bd2687 +msgid "" +"The MD5 hashcode (16 bytes) of the pixmap. This is a technical value used" +" for unique identifications." +msgstr "PixmapのMD5ハッシュコード(16バイト)。これは一意の識別に使用される技術的な値です。" + +#: ../../pixmap.rst:485 ../../pixmap.rst:537 b6c9900d9c1a4a6b9515506c0f668a22 +#: f48e7f527cae4721a3094b4c7abb52a4 +msgid "bytes" +msgstr "" + +#: ../../pixmap.rst:489 660ed52ce4964afba3b92ff355adc428 +msgid "" +"The colorspace of the pixmap. This value may be ``None`` if the image is " +"to be treated as a so-called *image mask* or *stencil mask* (currently " +"happens for extracted PDF document images only)." +msgstr "" +"Pixmapのカラースペース。この値は、イメージが *イメージマスク* または *ステンシルマスク* として扱われる場合、``None`` " +"になることがあります(現在、抽出されたPDFドキュメントイメージのみが該当)。" + +#: ../../pixmap.rst:491 a4ece184f5c7444c8e2a416ae04b77b0 +msgid ":ref:`Colorspace`" +msgstr "" + +#: ../../pixmap.rst:495 692b9515c569482d912774bf56826035 +msgid "" +"Contains the length of one row of image data in :attr:`Pixmap.samples`. " +"This is primarily used for calculation purposes. The following " +"expressions are true:" +msgstr ":attr:`Pixmap.samples` 内の画像データの1行の長さを含みます。これは主に計算目的で使用されます。次の式が真です:" + +#: ../../pixmap.rst:497 ceaed632b88e4fa49ceeed66aa285cef +msgid "`len(samples) == height * stride`" +msgstr "" + +#: ../../pixmap.rst:498 6ae73ce100ff468ea52549907fae423e +msgid "`width * n == stride`" +msgstr "" + +#: ../../pixmap.rst:500 ../../pixmap.rst:575 ../../pixmap.rst:581 +#: ../../pixmap.rst:589 ../../pixmap.rst:597 ../../pixmap.rst:603 +#: ../../pixmap.rst:609 ../../pixmap.rst:615 ../../pixmap.rst:621 +#: ../../pixmap.rst:627 14a8ec2123834004bd050c5b71fc43e2 +#: 4e1c7461f5294586954882c7d881ad27 69a7ebcad393482b8639888cc114b2e6 +#: 7e59cf4ded1a4cf2a190bcb6bdce063b 83a375d7ea0c4debac94399f760a51cb +#: d9aa2f62a5a14398952c00f62e342eb7 db17dced3d7e409089921c9861c4f6f2 +#: dc8282e0ad8249658e7762e577b3c60f ecfbf5db4ab54047b7430bc9ace8ea93 +#: fa05ddd43a3a4f878fcd48c6f02d6aa1 +msgid "int" +msgstr "" + +#: ../../pixmap.rst:507 2d5d22c11dad4b19b5234e8072ba2eed +msgid "Is `True` for a gray pixmap which only has the colors black and white." +msgstr "灰色のピクマップで、黒と白の色しか持たない場合は `True` です。" + +#: ../../pixmap.rst:516 1febbc7fac8b4fb2a67b32cec3fdc940 +msgid "" +"Is `True` if all pixels are identical (any colorspace). Where applicable," +" pixels with different alpha values will be treated as different colors." +msgstr "" +"すべてのピクセルが同じ場合、`True` " +"です(どのカラースペースでも適用)。該当する場合、異なるアルファ値を持つピクセルは異なる色として扱われます。" + +#: ../../pixmap.rst:523 97ae6ecbc323433d8c92f114f45225f0 +msgid "Contains the :ref:`IRect` of the pixmap." +msgstr "ピクマップの :ref:`IRect` を含みます。" + +#: ../../pixmap.rst:525 c584049378344137a7e02b6f2b80c61d +msgid ":ref:`IRect`" +msgstr "" + +#: ../../pixmap.rst:529 a0770be995ff49a98077b54eda3622d8 +msgid "" +"The color and (if :attr:`Pixmap.alpha` is true) transparency values for " +"all pixels. It is an area of `width * height * n` bytes. Each n bytes " +"define one pixel. Each successive n bytes yield another pixel in scanline" +" order. Subsequent scanlines follow each other with no padding. E.g. for " +"an RGBA colorspace this means, *samples* is a sequence of bytes like " +"*..., R, G, B, A, ...*, and the four byte values R, G, B, A define one " +"pixel." +msgstr "" +"すべてのピクセルの色と( :attr:`Pixmap.alpha` がtrueの場合)透明度の値です。これは `width * height * " +"n` " +"バイトの領域です。各nバイトは1つのピクセルを定義します。各続くnバイトは、スキャンラインの順序で別のピクセルを生成します。連続するスキャンラインはパディングなしで続きます。たとえばRGBAカラースペースの場合、*samples*" +" は *…、R、G、B、A、…* のようなバイトのシーケンスで、4つのバイト値R、G、B、Aが1つのピクセルを定義します。" + +#: ../../pixmap.rst:531 4a9b9ac67b8449c7ab86e821fa247e3e +msgid "" +"This area can be passed to other graphics libraries like PIL (Python " +"Imaging Library) to do additional processing like saving the pixmap in " +"other image formats." +msgstr "" +"この領域は、PIL(Python Imaging " +"Library)などの他のグラフィックライブラリに渡すことができ、ピクマップを他の画像形式で保存するなどの追加の処理を行うのに使用できます。" + +#: ../../pixmap.rst:534 c3655424982d452eb78367c16d7e66b9 +msgid "" +"The underlying data is typically a **large** memory area, from which a " +"`bytes` copy is made for this attribute ... each time you access it: for " +"example an RGB-rendered letter page has a samples size of almost 1.4 MB. " +"So consider assigning a new variable to it or use the `memoryview` " +"version :attr:`Pixmap.samples_mv` (new in v1.18.17)." +msgstr "" +"基本データは通常、この属性にアクセスするたびに `bytes` " +"のコピーが作成される大きなメモリ領域です。たとえば、RGBAでレンダリングされた文字ページのsamplesサイズはほぼ1.4 " +"MBです。したがって、新しい変数に代入するか、`memoryview` バージョン :attr:`Pixmap.samples_mv` " +"(v1.18.17で新機能)を使用するか、などの検討が必要です。" + +#: ../../pixmap.rst:535 8c32c5335d7c447496e0e1d5be88c933 +msgid "" +"Any changes to the underlying data are available only after accessing " +"this attribute again. This is different from using the memoryview " +"version." +msgstr "基本データへの変更は、この属性に再度アクセスするまで利用できません。これは `memoryview` バージョンを使用する場合とは異なります。" + +#: ../../pixmap.rst:541 ../../pixmap.rst:563 3c9854d4dc0d4bf7909094bb8076d42f +#: beae6012398647cfa8be41387af7d018 +msgid "New in v1.18.17" +msgstr "新機能 v1.18.17" + +#: ../../pixmap.rst:543 2492575ba835437f8ef3c780ed587dcd +msgid "" +"Like :attr:`Pixmap.samples`, but in Python `memoryview` format. It is " +"built pointing to the memory in the pixmap -- not from a copy of it. So " +"its creation speed is independent from the pixmap size, and any changes " +"to pixels will be available immediately." +msgstr "" +":attr:`Pixmap.samples` と同様ですが、Pythonの `memoryview` " +"形式です。これはピクマップ内のメモリを指すように構築されており、コピーではありません。そのため、作成速度はピクマップのサイズに依存せず、ピクセルへの変更はすぐに利用可能です。" + +#: ../../pixmap.rst:545 83d376fd4a9e4ea5930ad4bfbb750db0 +msgid "" +"Copies like `bytearray(pix.samples_mv)`, or `bytes(pixmap.samples_mv)` " +"are equivalent to and can be used in place of `pix.samples`." +msgstr "" +"`bytearray(pix.samples_mv)` や `bytes(pixmap.samples_mv)` " +"などのコピーは、`pix.samples` の代わりに使用でき、同等です。" + +#: ../../pixmap.rst:547 5fe293540fee4e4a9327990904d98658 +msgid "We also have `len(pix.samples) == len(pix.samples_mv)`." +msgstr "また、`len(pix.samples) == len(pix.samples_mv)` です。" + +#: ../../pixmap.rst:549 6210a926a737475aa3dc6044387a1b72 +msgid "" +"Look at this example from a 2 MB JPEG: the memoryview is **ten thousand " +"times faster**::" +msgstr "この2 MBのJPEGからのこの例をご覧ください: `memoryview` は **10000倍高速** です::" + +#: ../../pixmap.rst:556 16ccebe16d504d85bbdac9f4ec187d39 +msgid "" +"After the Pixmap has been destroyed, any attempt to use the memoryview " +"will fail with ValueError." +msgstr "" + +#: ../../pixmap.rst:559 0f7b50383018415a90d003dec7b43fc8 +msgid "memoryview" +msgstr "" + +#: ../../pixmap.rst:565 0e5cda1c402b4f95b89aea3cb932e255 +msgid "" +"Python pointer to the pixel area. This is a special integer format, which" +" can be used by supporting applications (such as PyQt) to directly " +"address the samples area and thus build their images extremely fast. For " +"example::" +msgstr "ピクセル領域へのPythonポインターです。これは特別な整数形式で、サポートするアプリケーション(PyQtなど)がサンプル領域に直接アクセスし、非常に高速に画像を構築できるように使用できます。例えば::" + +#: ../../pixmap.rst:570 c273781241c7435680ec26d5f2e174b5 +msgid "" +"Both of the above lead to the same Qt image, but (2) can be **many " +"hundred times faster**, because it avoids an additional copy of the pixel" +" area." +msgstr "以下はQtイメージへの2つの方法ですが、(2)はピクセル領域の追加のコピーを回避するため、通常 **何百倍も高速** です。" + +#: ../../pixmap.rst:572 da5db87ce00549ef8c367574db178362 +msgid "" +"Warning: after the Pixmap has been destroyed, the Python pointer will be " +"invalid and attempting to use it may crash the Python interpreter." +msgstr "" + +#: ../../pixmap.rst:579 64910abafc9b4a16b94a32c6195d3796 +msgid "" +"Contains *len(pixmap)*. This will generally equal *len(pix.samples)* plus" +" some platform-specific value for defining other attributes of the " +"object." +msgstr "" +"これは *pixmap の長さ* を含んでいます。通常、これは *pix.samples の長さ* " +"にプラットフォーム固有の他の属性を定義するためのいくつかの値を加えたものです。" + +#: ../../pixmap.rst:587 1bd872d4e7fb42c6be71ce643ea628e5 +msgid "Width of the region in pixels." +msgstr "ピクセル単位の領域の幅。" + +#: ../../pixmap.rst:595 614de35cac294b5aa4a3cac39fb96f4a +msgid "Height of the region in pixels." +msgstr "ピクセル単位の領域の高さ。" + +#: ../../pixmap.rst:601 c18ef180c3d14a8f9e7893891ebe6488 +msgid "" +"X-coordinate of top-left corner in pixels. Cannot directly be changed -- " +"use :meth:`Pixmap.set_origin`." +msgstr "ピクセル単位での左上隅のX座標。直接変更できません。:meth:`Pixmap.set_origin` を使用してください。" + +#: ../../pixmap.rst:607 808f56c9f61b4a3fb2c275b735746bb6 +msgid "" +"Y-coordinate of top-left corner in pixels. Cannot directly be changed -- " +"use :meth:`Pixmap.set_origin`." +msgstr "ピクセル単位での左上隅のY座標。直接変更できません。:meth:`Pixmap.set_origin` を使用してください。" + +#: ../../pixmap.rst:613 458068ce6d7b4c7d9398081bd77874e7 +#, fuzzy +msgid "" +"Number of components per pixel. This number depends on colorspace and " +"alpha. If colorspace is not ``None`` (stencil masks), then *Pixmap.n - " +"Pixmap.alpha == pixmap.colorspace.n* is true. If colorspace is ``None``, " +"then *n == alpha == 1*." +msgstr "" +"ピクセルごとのコンポーネントの数。この数は色空間とアルファに依存します。色空間が ``None`` " +"でない場合(ステンシルマスク)、*Pixmap.n - Pixmap.alpha == pixmap.colorspace.n* " +"がtrueです。色空間が ``None`` の場合、*n == alpha == 1* です。" + +#: ../../pixmap.rst:619 4537136a8d5b482983ba8a93e445f390 +msgid "" +"Horizontal resolution in dpi (dots per inch). Please also see " +":data:`resolution`. Cannot directly be changed -- use " +":meth:`Pixmap.set_dpi`." +msgstr "" +"水平解像度(dpi単位)。:data:`resolution` " +"も参照してください。直接変更できません。:meth:`Pixmap.set_dpi` を使用してください。" + +#: ../../pixmap.rst:625 eefd9342350049919e25e277d6c3b3fb +msgid "" +"Vertical resolution in dpi (dots per inch). Please also see " +":data:`resolution`. Cannot directly be changed -- use " +":meth:`Pixmap.set_dpi`." +msgstr "" +"垂直解像度(dpi単位)。:data:`resolution` " +"も参照してください。直接変更できません。:meth:`Pixmap.set_dpi` を使用してください。" + +#: ../../pixmap.rst:631 2575f6edfd324a92bd5a447fec1e31df +msgid "" +"An information-only boolean flag set to ``True`` if the image will be " +"drawn using \"linear interpolation\". If ``False`` \"nearest neighbour " +"sampling\" will be used." +msgstr "" +"情報のみのブールフラグで、イメージが「線形補間」を使用して描画される場合に ``True`` に設定されます。``False`` " +"の場合、「最近傍サンプリング」が使用されます。" + +#: ../../pixmap.rst:638 fb19a5d83a8049128d4b83c24c65dbf6 +msgid "Supported Input Image Formats" +msgstr "サポートされている入力画像フォーマット" + +#: ../../pixmap.rst:639 6d47997ec58540749bc7a5f1eacaa3b6 +msgid "" +"The following file types are supported as **input** to construct pixmaps:" +" **BMP, JPEG, GIF, TIFF, JXR, JPX**, **PNG**, **PAM** and all of the " +"**Portable Anymap** family (**PBM, PGM, PNM, PPM**). This support is two-" +"fold:" +msgstr "" +"次のファイルタイプは、ピクスマップを構築するための **入力** " +"としてサポートされています:**BMP、JPEG、GIF、TIFF、JXR、JPX**、**PNG**、**PAM**、およびすべての " +"**Portable Anymap** ファミリー(**PBM、PGM、PNM、PPM**)。このサポートは二重の方法で提供されています:" + +#: ../../pixmap.rst:641 9ea7a063be4547c18c258b2d3177008e +msgid "" +"Directly create a pixmap with *Pixmap(filename)* or *Pixmap(byterray)*. " +"The pixmap will then have properties as determined by the image." +msgstr "" +"*Pixmap(ファイル名)* または *Pixmap(バイト配列)* " +"を使用してピクスマップを直接作成します。その後、ピクスマップには画像によって決定されるプロパティが含まれます。" + +#: ../../pixmap.rst:643 732ebf18eb7d4514920c5f29ae06e8fc +msgid "" +"Open such files with *pymupdf.open(...)*. The result will then appear as " +"a document containing one single page. Creating a pixmap of this page " +"offers all the options available in this context: apply a matrix, choose " +"colorspace and alpha, confine the pixmap to a clip area, etc." +msgstr "" +"*pymupdf.open(...)* " +"を使用してこのようなファイルを開きます。その結果、単一のページを含むドキュメントとして表示されます。このページのピクスマップを作成すると、このコンテキストで利用可能なすべてのオプションを使用できます:行列を適用、色空間とアルファを選択、ピクスマップをクリップエリアに制限などが含まれます。" + +#: ../../pixmap.rst:645 980d86630c2c44de8e6d19e5467a0ce6 +msgid "" +"**SVG images** are only supported via method 2 above, not directly as " +"pixmaps. But remember: the result of this is a **raster image** as is " +"always the case with pixmaps [#f1]_." +msgstr "" +"**SVG画像** は、直接ピクスマップとしてではなく、上記の方法2でのみサポートされています。ただし、ピクスマップの場合と同様、その結果は " +"**ラスターイメージ** です [#f1]_。" + +#: ../../pixmap.rst:650 a908d73cc347496b82241f304fa3fe2a +msgid "Supported Output Image Formats" +msgstr "サポートされている出力画像フォーマット" + +#: ../../pixmap.rst:651 73fac07184704c369cc728f98e3f5c2f +msgid "" +"A number of image **output** formats are supported. You have the option " +"to either write an image directly to a file (:meth:`Pixmap.save`), or to " +"generate a bytes object (:meth:`Pixmap.tobytes`). Both methods accept a " +"string identifying the desired format (**Format** column below). Please " +"note that not all combinations of pixmap colorspace, transparency support" +" (alpha) and image format are possible." +msgstr "" +"いくつかの画像 **出力** " +"フォーマットがサポートされています。画像を直接ファイルに書き込むオプション(:meth:`Pixmap.save`)またはバイトオブジェクトを生成するオプション(:meth:`Pixmap.tobytes`)があります。どちらのメソッドも、希望の" +" **フォーマット** " +"を識別する文字列を受け入れます(下のフォーマット列)。ただし、すべてのピクスマップの色空間、透明度サポート(アルファ)、および画像フォーマットの組み合わせが可能であるわけではないことに注意してください。" + +#: ../../pixmap.rst:654 a87afb44cc5e4b73ab588b5b11f9ccfd +msgid "**Format**" +msgstr "**フォーマット**" + +#: ../../pixmap.rst:654 1f2cded3b1b44c0b937fa0032e9cdae6 +msgid "**Colorspaces**" +msgstr "**カラースペース**" + +#: ../../pixmap.rst:654 49b0734f82764d6990cf44c77df6c570 +msgid "**alpha**" +msgstr "**アルファ**" + +#: ../../pixmap.rst:654 83f92d8a4a7a4760a8e40ed6c0f06f42 +msgid "**Extensions**" +msgstr "**拡張子**" + +#: ../../pixmap.rst:654 0fd30df7107b4f34935e701d014dbe14 +msgid "**Description**" +msgstr "**説明**" + +#: ../../pixmap.rst:656 5418f8bbb77c4279bb7e54b0467ed853 +msgid "jpg, jpeg" +msgstr "" + +#: ../../pixmap.rst:656 ../../pixmap.rst:657 ../../pixmap.rst:663 +#: ../../pixmap.rst:664 32dc7fd5ff4044589a06da6569985cbb +#: 446d676d4cb746debc50ec9080b3bd59 85b80fa1ebd1428896973689aeb0152f +#: f60599eee436479ea2ac04183c71f206 +msgid "gray, rgb, cmyk" +msgstr "" + +#: ../../pixmap.rst:656 ../../pixmap.rst:658 ../../pixmap.rst:659 +#: ../../pixmap.rst:661 ../../pixmap.rst:662 ../../pixmap.rst:663 +#: 1ff87a7445e842ef87f801374ff82433 5f411ac1900d4a828a2adc5f8d66e217 +#: 8189a51b7a194649810e0d7d577bd10d c8609d9e76144b8c95dc48aded05950e +#: d62197f6b05c43c8b9e1dec64faf9443 d70b4f54e80348028d6fae84243c3789 +msgid "no" +msgstr "なし" + +#: ../../pixmap.rst:656 ea88730047b74e13b318b9c914e5b2a3 +msgid ".jpg, .jpeg" +msgstr "" + +#: ../../pixmap.rst:656 4b3d4bb369ed4ff58e4947e367cf2da8 +msgid "Joint Photographic Experts Group" +msgstr "" + +#: ../../pixmap.rst:657 69193e09d372420faa98f2f58eb9ad36 +msgid "pam" +msgstr "" + +#: ../../pixmap.rst:657 ../../pixmap.rst:660 ../../pixmap.rst:664 +#: 6b1dd7ce634b4293a39d364858cc0a39 939ff0c9a94043c8bd74794b59b4f53e +#: fb225f76ad8f4ddd91435f0958c1ad46 +msgid "yes" +msgstr "あり" + +#: ../../pixmap.rst:657 6df33eaa96db4f4885c2c1ac36d3898b +msgid ".pam" +msgstr "" + +#: ../../pixmap.rst:657 a77956d10e0045d7938da7707a785c51 +msgid "Portable Arbitrary Map" +msgstr "" + +#: ../../pixmap.rst:658 56d719af8b2c472dbf9841d1a146f690 +msgid "pbm" +msgstr "" + +#: ../../pixmap.rst:658 ../../pixmap.rst:659 ../../pixmap.rst:660 +#: ../../pixmap.rst:661 ../../pixmap.rst:662 04315e33b77d48308ccfbc95ed57967a +#: 1f4054fd2a684269bb85dd145f27277b 67c49422d7ab4404bd940af064c350cc +#: c5b3279ea5204093993c92e0c7175e94 fd213587a48240199094d468ced8cb88 +msgid "gray, rgb" +msgstr "" + +#: ../../pixmap.rst:658 7efc47baf20c4d478cdd93f72ee19691 +msgid ".pbm" +msgstr "" + +#: ../../pixmap.rst:658 aac42bc50ded46558fa5a3a6dc28a2c9 +msgid "Portable Bitmap" +msgstr "" + +#: ../../pixmap.rst:659 ebae291f38a045c3918f18364e7a764c +msgid "pgm" +msgstr "" + +#: ../../pixmap.rst:659 207e7dda0de94db1bac522dbd9d144ad +msgid ".pgm" +msgstr "" + +#: ../../pixmap.rst:659 09c87d4fef0c43e4a030ac46f2866496 +msgid "Portable Graymap" +msgstr "" + +#: ../../pixmap.rst:660 44c71e50c1264ec8a777d16b34baa514 +msgid "png" +msgstr "" + +#: ../../pixmap.rst:660 9f7ea19523f842bb9952da3a40937a8d +msgid ".png" +msgstr "" + +#: ../../pixmap.rst:660 ecff212519374cc3b31feca734f5c127 +msgid "Portable Network Graphics" +msgstr "" + +#: ../../pixmap.rst:661 07ee9232b2f14f0f806ee8375c5bcdb6 +msgid "pnm" +msgstr "" + +#: ../../pixmap.rst:661 eaeeabaace0b40c78d19fcd2df7d76c6 +msgid ".pnm" +msgstr "" + +#: ../../pixmap.rst:661 7a9977ee828a437197675289f095f034 +msgid "Portable Anymap" +msgstr "" + +#: ../../pixmap.rst:662 c2cd3c5ddeaa476d91b20222d5d597b5 +msgid "ppm" +msgstr "" + +#: ../../pixmap.rst:662 b0c3f4646d8d4d8d83ba3c0744754ce8 +msgid ".ppm" +msgstr "" + +#: ../../pixmap.rst:662 54edec4bccd54b978d33acc89db3cd5b +msgid "Portable Pixmap" +msgstr "" + +#: ../../pixmap.rst:663 6ff874d1305245f2a5549175dce6ea0a +msgid "ps" +msgstr "" + +#: ../../pixmap.rst:663 8a5fbceab8824142abcedeb7cdca4d7a +msgid ".ps" +msgstr "" + +#: ../../pixmap.rst:663 c23c6e92c41d45f39702eb0ea2a4b820 +msgid "Adobe PostScript Image" +msgstr "" + +#: ../../pixmap.rst:664 26727475505643169225164d63ce07c1 +msgid "psd" +msgstr "" + +#: ../../pixmap.rst:664 147e407f9ce444a4a7d31d01b067a810 +msgid ".psd" +msgstr "" + +#: ../../pixmap.rst:664 3eac54f79f7844d7973d5597d36ff81a +msgid "Adobe Photoshop Document" +msgstr "" + +#: ../../pixmap.rst:668 db2ea52051144bffbd7d5732d79a7cb7 +msgid "" +"Not all image file types are supported (or at least common) on all OS " +"platforms. E.g. PAM and the Portable Anymap formats are rare or even " +"unknown on Windows." +msgstr "" +"すべての画像ファイル形式がすべてのOSプラットフォームでサポートされているわけではありません(または少なくとも一般的ではありません)。たとえば、PAMおよびPortable" +" Anymap形式はWindowsでは珍しいか、またはまったく知られていません。" + +#: ../../pixmap.rst:669 6fb758ded39b4629aa4b829ca8e18c06 +msgid "" +"Especially pertaining to CMYK colorspaces, you can always convert a CMYK " +"pixmap to an RGB pixmap with *rgb_pix = pymupdf.Pixmap(pymupdf.csRGB, " +"cmyk_pix)* and then save that in the desired format." +msgstr "" +"特にCMYKカラースペースに関連することについて、常にCMYK pixmapを *rgb_pix = " +"pymupdf.Pixmap(pymupdf.csRGB、cmyk_pix)* に変換し、その後、その形式で保存できます。" + +#: ../../pixmap.rst:670 21ad7f33d55e40579a6d2a8fe62ca5f9 +msgid "" +"As can be seen, MuPDF's image support range is different for input and " +"output. Among those supported both ways, PNG and JPEG are probably the " +"most popular." +msgstr "ご覧のように、MuPDFの画像サポート範囲は入力と出力で異なります。両方の方法でサポートされているものの中で、PNGとJPEGはおそらく最も人気があります。" + +#: ../../pixmap.rst:671 6d3a484e6d2249c29360c66b0455e207 +msgid "" +"We also recommend using \"ppm\" formats as input to tkinter's " +"*PhotoImage* method like this: *tkimg = " +"tkinter.PhotoImage(data=pix.tobytes(\"ppm\"))* (also see the tutorial). " +"This is **very** fast (**60 times** faster than PNG)." +msgstr "" +"また、tkinterの *PhotoImage* メソッドへの入力として「ppm」形式を使用することをお勧めします。*tkimg = " +"tkinter.PhotoImage(data=pix.tobytes(\"ppm\"))* のように(チュートリアルも参照してください)。これは" +" **非常に** 高速です(PNGよりも60倍速いです)。" + +#: ../../pixmap.rst:676 5d6b6387a4fe46f6b30d61a232534941 +msgid "Footnotes" +msgstr "脚注" + +#: ../../pixmap.rst:677 07abc9afd1ba4f3db4c1a2e0cb7be2f6 +msgid "" +"If you need a **vector image** from the SVG, you must first convert it to" +" a PDF. Try :meth:`Document.convert_to_pdf`. If this is not good enough, " +"look for other SVG-to-PDF conversion tools like the Python packages " +"`svglib `_, `CairoSVG " +"`_, `Uniconvertor " +"`_" +" or the Java solution `Apache Batik `_. " +"Have a look at our Wiki for more examples." +msgstr "" +"SVGから **ベクトル画像** " +"が必要な場合、まずそれをPDFに変換する必要があります。:meth:`Document.convert_to_pdf` " +"を試してみてください。これで十分でない場合、他のSVGからPDFへの変換ツールを探してみてください。Pythonパッケージ `svglib " +"`_ 、 `CairoSVG " +"`_ 、 `Uniconvertor " +"`_" +" 、またはJavaソリューションである `Apache Batik `_ " +"などがあります。詳細な例についてはWikiをご覧ください。" + +#: ../../pixmap.rst:679 3abb171feca64cf8a913cc117dd0b7e8 +msgid "" +"To also set the alpha property, add an additional step to this method by " +"dropping or adding an alpha channel to the result." +msgstr "アルファプロパティも設定する場合は、このメソッドに追加ステップを追加し、結果にアルファチャンネルを追加または削除してください。" + +#: ../../footer.rst:60 d8a58e5d5f2e49aebde3f03934825fdd +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "Since v1.22.0, PyMuPDF supports JPEG " +#~ "output directly. For both, performance " +#~ "reasons and for reducing external " +#~ "dependencies, the use of this method " +#~ "is no longer recommended when outputting" +#~ " JPEG images." +#~ msgstr "" +#~ "v1.22.0 以降、PyMuPDF はJPEG " +#~ "出力を直接サポートしています。パフォーマンスの理由と外部の依存関係を減らすために、JPEG " +#~ "画像を出力する場合にはこのメソッドの使用は推奨されなくなりました。" + +#~ msgid ".raises ImportError: if Pillow is not installed." +#~ msgstr "**ImportError** -- Pillow がインストールされていない場合" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/point.mo b/docs/locales/ja/LC_MESSAGES/point.mo new file mode 100644 index 000000000..bc78b1937 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/point.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/point.po b/docs/locales/ja/LC_MESSAGES/point.po new file mode 100644 index 000000000..5ccd76df0 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/point.po @@ -0,0 +1,279 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 0d500c9fea194e18bdf21282347590bd +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 e33c40891e6e4d32874b82f04358d0c3 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 66e5e2ad40694dd291575cb985997e52 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../point.rst:7 1a7b12dde1344c6b8af9cf30d960d3a2 +msgid "Point" +msgstr "Point (ポイント)" + +#: ../../point.rst:9 9373b96698944d69a97d4aa9025c1380 +msgid "" +"*Point* represents a point in the plane, defined by its x and y " +"coordinates." +msgstr "*Point* は、そのx座標とy座標で定義される平面上のポイントを表します。" + +#: ../../point.rst:12 ddd49ef47c8f4f308f1ddf551cc2e567 +msgid "**Attribute / Method**" +msgstr "**属性/メソッド**" + +#: ../../point.rst:12 ed814359d1e345869d70ff6ee4cdc137 +msgid "**Description**" +msgstr "**説明**" + +#: ../../point.rst:14 4032a33dd2b0453585391c0e32c93025 +msgid ":meth:`Point.distance_to`" +msgstr "" + +#: ../../point.rst:14 9b1b84ecc1484fa58a5bd82f7efcc3e0 +msgid "calculate distance to point or rect" +msgstr "ポイントまたは長方形までの距離を計算します" + +#: ../../point.rst:15 1d732af9b4494df9b35b7fc90450942a +msgid ":meth:`Point.norm`" +msgstr "" + +#: ../../point.rst:15 24cca31dc32d48ebb45024fe75817652 +msgid "the Euclidean norm" +msgstr "ユークリッドノルム" + +#: ../../point.rst:16 a4fdce716744491d81882b1974682151 +msgid ":meth:`Point.transform`" +msgstr "" + +#: ../../point.rst:16 0af15635b5b848668ada5ffbe0cbd6df +msgid "transform point with a matrix" +msgstr "行列でポイントを変換します" + +#: ../../point.rst:17 d888e3fab54a4373b8feaaf91ebd5ae1 +msgid ":attr:`Point.abs_unit`" +msgstr "" + +#: ../../point.rst:17 183111208bb2467cb4b0394b7b4ab584 +msgid "same as unit, but positive coordinates" +msgstr "ユニットと同じですが、座標が正です" + +#: ../../point.rst:18 9d60bb6c9d1c47ab98ee6fa40c9e579e +msgid ":attr:`Point.unit`" +msgstr "" + +#: ../../point.rst:18 3d36d8d93e024d4fb909fb12f00b7ff2 +msgid "point coordinates divided by *abs(point)*" +msgstr "座標を *abs(point)* で割ったもの" + +#: ../../point.rst:19 0f6cbe3e87d741b298d27c89be5c87cd +msgid ":attr:`Point.x`" +msgstr "" + +#: ../../point.rst:19 ed960716ff06486ba3d689e34b3796e3 +msgid "the X-coordinate" +msgstr "X座標" + +#: ../../point.rst:20 dafbd6c6033d4c93957636f1b93bbb34 +msgid ":attr:`Point.y`" +msgstr "" + +#: ../../point.rst:20 14468a77b47b4dd6befbf66696bd10e2 +msgid "the Y-coordinate" +msgstr "Y座標" + +#: ../../point.rst:23 d7c7ec15f0c64ec0bde77f8040a3949b +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../point.rst:35 4f945447bee6442cad5956d1a6846689 +msgid "Overloaded constructors." +msgstr "オーバーロードされたコンストラクタ。" + +#: ../../point.rst:37 67c4744696474b708d6c7b4519ea6369 +msgid "Without parameters, *Point(0, 0)* will be created." +msgstr "パラメーターなしで、*Point(0, 0)* が作成されます。" + +#: ../../point.rst:39 83a7b067d7034877898de81fa905a27b +msgid "" +"With another point specified, a **new copy** will be created, " +"\"sequence\" is a Python sequence of 2 numbers (see " +":ref:`SequenceTypes`)." +msgstr "" +"他のポイントが指定された場合、**新しいコピー** が作成され、 " +"\"sequence\"は2つの数値からなるPythonシーケンスです(:ref:`SequenceTypes`)。" + +#: ../../point.rst 019a616c168b4a2f861f9af0ee399ea7 +#: 2bca886d5a7047579d56cd8ca0b2d161 e9644af7e0554639a33a4f730ebd999f +msgid "Parameters" +msgstr "" + +#: ../../point.rst:41 016a723eaaa64a778d1da1aaff44c2c0 +msgid "x coordinate of the point" +msgstr "ポイントのx座標" + +#: ../../point.rst:43 043d8b3675e44b1b8a995cfffb851812 +msgid "y coordinate of the point" +msgstr "ポイントのy座標" + +#: ../../point.rst:47 bda3f4adf4774b85838e3f7624d90d0c +msgid "" +"Calculate the distance to *x*, which may be :data:`point_like` or " +":data:`rect_like`. The distance is given in units of either pixels " +"(default), inches, centimeters or millimeters." +msgstr "" +"*x* までの距離を計算します。 *x* は :data:`point_like` または :data:`rect_like` " +"である可能性があります。距離は、ピクセル(デフォルト)、インチ、センチメートル、ミリメートルのいずれかの単位で指定されます。" + +#: ../../point.rst:49 51eb5b1d201a4f12a44618b02b2f0ac7 +msgid "to which to compute the distance." +msgstr "距離を計算する対象" + +#: ../../point.rst:51 0ae910e1ebbb43498a2d28bc4183b6b8 +msgid "the unit to be measured in. One of \"px\", \"in\", \"cm\", \"mm\"." +msgstr "測定単位。 \"px\"、 \"in\"、 \"cm\"、 \"mm\"のいずれか" + +#: ../../point.rst 3a555709592f4392804556d4a3ec4e27 +#: 73ea2dd2fe974651a11d7cc2de129e1b +msgid "Return type" +msgstr "" + +#: ../../point.rst 477b136a22ab4265a3da36bf573fe39d +msgid "Returns" +msgstr "" + +#: ../../point.rst:54 2030d7a487e149959a154537312a572e +msgid "" +"the distance to *x*. If this is :data:`rect_like`, then the distance * " +"is the length of the shortest line connecting to one of the rectangle " +"sides * is calculated to the **finite version** of it * is zero if it " +"**contains** the point" +msgstr "" + +#: ../../point.rst:54 af0a3e229e7e43cebbe0f3ab85666636 +msgid "the distance to *x*. If this is :data:`rect_like`, then the distance" +msgstr "*x* までの距離。これが :data:`rect_like` の場合、距離" + +#: ../../point.rst:56 35dd44147c624976886941cec496f10d +msgid "" +"is the length of the shortest line connecting to one of the rectangle " +"sides" +msgstr "長方形の任意の辺に接続する最短線の長さ" + +#: ../../point.rst:57 b958d1db25694ade9997b6fb88065cb1 +msgid "is calculated to the **finite version** of it" +msgstr "その **有限バージョン** が計算されます" + +#: ../../point.rst:58 50b8b68670d44ab58ff2a5e1471a2375 +msgid "is zero if it **contains** the point" +msgstr "ポイントを **含む** 場合はゼロ" + +#: ../../point.rst:62 961b212deb1f4d2ab46a27116cacccab +msgid "New in version 1.16.0" +msgstr "バージョン1.16.0で新規追加" + +#: ../../point.rst:64 ebf5e01ee8414746b3bc426ee94ea1b3 +msgid "" +"Return the Euclidean norm (the length) of the point as a vector. Equals " +"result of function *abs()*." +msgstr "ベクトルとしてのポイントのユークリッドノルム(長さ)を返します。 *abs()* 関数の結果と等しいです。" + +#: ../../point.rst:68 74d77a8f32a14ab58d2ea6a9d9fbd224 +msgid "Apply a matrix to the point and replace it with the result." +msgstr "ポイントに行列を適用して、その結果で置き換えます。" + +#: ../../point.rst:70 781600b0f789421093c8d211a828214c +msgid "The matrix to be applied." +msgstr "適用する行列。" + +#: ../../point.rst:72 ../../point.rst:80 ../../point.rst:86 +#: 5bb59ebd897348deb9ddbeecd467559a 93624a23e8f049d89ba4d5f25f6d5ca5 +#: f42faeb785d84a6aa8ca09b294d45d72 +msgid ":ref:`Point`" +msgstr "" + +#: ../../point.rst:76 ffda14407cc94228ab7552a1ff1596ad +msgid "" +"Result of dividing each coordinate by *norm(point)*, the distance of the " +"point to (0,0). This is a vector of length 1 pointing in the same " +"direction as the point does. Its x, resp. y values are equal to the " +"cosine, resp. sine of the angle this vector (and the point itself) has " +"with the x axis." +msgstr "" +"各座標を *norm(point)* " +"、ポイントが(0、0)からの距離で割った結果です。これは、ポイント自体がx軸との角度を持つベクトルと同じ方向を指す長さ1のベクトルです。そのx、resp。" +" yの値は、このベクトル(およびポイント自体)がx軸とどのような角度を持っているかに等しいです。" + +#: ../../point.rst 365fd452be064ca7b35dd82343f61722 +#: 61aabb3fb7b242c483bcbc3dfbf244a5 ee619eddf0ff4ef4b1cb279d8fa7a6f3 +#: f4ef146bdd194e079d0bbdb3fe0a7a02 +msgid "type" +msgstr "" + +#: ../../point.rst:84 98dd11ae2ded46fba9aa5bc2f6b1667e +msgid "" +"Same as :attr:`unit` above, replacing the coordinates with their absolute" +" values." +msgstr "同じく、上記の :attr:`unit` と同様のもので、座標をそれぞれの絶対値に置き換えたものです。" + +#: ../../point.rst:90 19dceabb090d4f1db7c2c31ba2634756 +msgid "The x coordinate" +msgstr "x座標" + +#: ../../point.rst:92 ../../point.rst:98 3c19788c87c14335baeb6e261c4491a0 +#: d9f4c536626243e68f4e6c81133e0338 +msgid "float" +msgstr "" + +#: ../../point.rst:96 6300e816dea5405293aee117b39c4d79 +msgid "The y coordinate" +msgstr "y座標" + +#: ../../point.rst:102 c67aba539e2441e1b48edda3875e6285 +msgid "" +"This class adheres to the Python sequence protocol, so components can be " +"accessed via their index, too. Also refer to :ref:`SequenceTypes`." +msgstr "" +"このクラスはPythonのシーケンスプロトコルに従っており、コンポーネントはインデックスを使用してアクセスできます。また、PyMuPDFでの引数としてPythonのシーケンスを使用する方法については、:ref:`SequenceTypes`" +" を参照してください。" + +#: ../../point.rst:103 b49de5b5939f4ea2b4916b2303345e68 +msgid "" +"Rectangles can be used with arithmetic operators -- see chapter " +":ref:`Algebra`." +msgstr "長方形は算術演算子と共に使用できます - 幾 :ref:`Algebra` を参照してください。" + +#: ../../footer.rst:60 6e7a0e3f1a044447ac39d419d2f8145a +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/pymupdf-pro.mo b/docs/locales/ja/LC_MESSAGES/pymupdf-pro.mo new file mode 100644 index 000000000..fbc03995d Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/pymupdf-pro.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/pymupdf-pro.po b/docs/locales/ja/LC_MESSAGES/pymupdf-pro.po new file mode 100644 index 000000000..06b3959d5 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/pymupdf-pro.po @@ -0,0 +1,276 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2024. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.10\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 0dfe01c73aa0450484e10fe2c161a22f +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 b4ed4e93bf294eb098966f465bc797c5 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "" + +#: ../../header.rst:-1 ec46200e47134602a82ee60a1118c156 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "" + +#: ../../pymupdf-pro.rst:9 26812dee4da947c7918d490500c2067c +msgid "PyMuPDF Pro" +msgstr "" + +#: ../../pymupdf-pro.rst:12 aa17b9e367774bb89e40e6d34d5ec0fe +msgid "|PyMuPDF Pro| is a set of *commercial extensions* for |PyMuPDF|." +msgstr "" + +#: ../../pymupdf-pro.rst:14 11f22babd0e94a46b70d2de6efd179bc +msgid "" +"Enhance |PyMuPDF| capability with **Office** document support & " +"**RAG/LLM** integrations." +msgstr "" + +#: ../../pymupdf-pro.rst:16 38e3e965f4184ecc965744d00c2568d5 +msgid "" +"Enables Office document handling, including ``doc``, ``docx``, ``hwp``, " +"``hwpx``, ``ppt``, ``pptx``, ``xls``, ``xlsx``, and others." +msgstr "" + +#: ../../pymupdf-pro.rst:17 0a12a5f74ed941a9be6cef8798aa2cbd +msgid "Supports text and table extraction, document conversion and more." +msgstr "" + +#: ../../pymupdf-pro.rst:18 49a700600f224867985cf301867199d8 +msgid "Includes the commercial version of |PyMuPDF4LLM|." +msgstr "" + +#: ../../pymupdf-pro.rst:20 9450c9107df04fe5bc38e0324bf54c01 +msgid "" +"To enquire about obtaining a commercial license, then `use this contact " +"page `_." +msgstr "" + +#: ../../pymupdf-pro.rst:25 1321bc95b45541b48337e44f78b58020 +msgid "" +"A licensed version of |PyMuPDF Pro| also gives you a licensed version of " +"|PyMuPDF4LLM|. If you are interested in using the |PyMuPDF4LLM| package " +"you should install it separately." +msgstr "" + +#: ../../pymupdf-pro.rst:29 f655cf9fdd1f48dd9e890f8c5aef653f +msgid "Platform support" +msgstr "" + +#: ../../pymupdf-pro.rst:31 367befe5f1a34b7fb9665758548f6448 +msgid "Available for these platforms only:" +msgstr "" + +#: ../../pymupdf-pro.rst:33 a256db349f5242a38297eda82fd9ee71 +msgid "Windows x86_64." +msgstr "" + +#: ../../pymupdf-pro.rst:34 9e2ffb09f61f430f9594a418d1ee8ab0 +msgid "Linux x86_64 (glibc)." +msgstr "" + +#: ../../pymupdf-pro.rst:35 97423a8756fc40408f80993fce6a1fc6 +msgid "MacOS x86_64." +msgstr "" + +#: ../../pymupdf-pro.rst:36 1a59aca2bbc44194b2dde06a89fac54f +msgid "MacOS arm64." +msgstr "" + +#: ../../pymupdf-pro.rst:40 0abf65ed31ca4a949b4b81e0bb521522 +msgid "Office file support" +msgstr "" + +#: ../../pymupdf-pro.rst:42 b8f3023035f74b3dacbf2e3210f2c12f +msgid "" +"In addition to the `standard file types supported by PyMuPDF " +"`, |PyMuPDF Pro| supports:" +msgstr "" + +#: ../../pymupdf-pro.rst:47 45349c74355f48c0abf3ae8f486843a6 +msgid "**DOC/DOCX**" +msgstr "" + +#: ../../pymupdf-pro.rst:48 777b729313244ad298c7b6d5bb49c369 +msgid "**XLS/XLSX**" +msgstr "" + +#: ../../pymupdf-pro.rst:49 371c1c089e4c475caf8acc946169b878 +msgid "**PPT/PPTX**" +msgstr "" + +#: ../../pymupdf-pro.rst:50 d6ec7208ebdc4049815db59e7ec6641e +msgid "**HWP/HWPX**" +msgstr "" + +#: ../../pymupdf-pro.rst:67 097b80f124fa49fd92bab9375f53b975 +msgid "Usage" +msgstr "" + +#: ../../pymupdf-pro.rst:70 f5c7cbdc4f62456199de7159bd404d58 +msgid "Installation" +msgstr "" + +#: ../../pymupdf-pro.rst:72 dabefb8cfac54dabb7e1859dbc4578ed +msgid "Install via pip with:" +msgstr "" + +#: ../../pymupdf-pro.rst:80 47d8f70087754faa8e60ff7f7e0d1b2a +msgid "Loading an **Office** document" +msgstr "" + +#: ../../pymupdf-pro.rst:82 f5cf1df845294870b35e718fadf85b1f +msgid "" +"Import |PyMuPDF Pro| and you can then reference **Office** documents " +"directly, e.g.:" +msgstr "" + +#: ../../pymupdf-pro.rst:93 470f5d9c683947b2b97f3e90e42f8a5d +msgid "" +"All standard |PyMuPDF| functionality is exposed as expected - |PyMuPDF " +"Pro| handles the extended **Office** file types" +msgstr "" + +#: ../../pymupdf-pro.rst:96 72c50a703f314197b00a1d336392d8dc +msgid "" +"From then on you can work with document pages just as you would do " +"normally, but with respect to the `restrictions " +"`." +msgstr "" + +#: ../../pymupdf-pro.rst:100 5b447b0138f849efa8d4cb7a1cfd910b +msgid "Converting an **Office** document to |PDF|" +msgstr "" + +#: ../../pymupdf-pro.rst:102 b8e5b07700944b9da32ae37ca9175e00 +msgid "" +"The following code snippet can convert your **Office** document to |PDF| " +"format:" +msgstr "" + +#: ../../pymupdf-pro.rst:120 26c5a530a9a54f56bc28027850958f25 +msgid "Restrictions" +msgstr "" + +#: ../../pymupdf-pro.rst:123 2391a063f21c4de092b0beca7b94377e +msgid "" +"|PyMuPDF Pro| functionality is restricted without a license key as " +"follows:" +msgstr "" + +#: ../../pymupdf-pro.rst:125 1e6b67342d31405ab0f72646252650df +msgid "**Only the first 3 pages of any document will be available.**" +msgstr "" + +#: ../../pymupdf-pro.rst:127 c176b676614c4507959f495c339a3e71 +msgid "" +"To unlock full functionality you should `obtain a trial key " +"`_." +msgstr "" + +#: ../../pymupdf-pro.rst:133 9a01dcbd3af143b6bad7400a98e6b69d +msgid "Trial keys" +msgstr "" + +#: ../../pymupdf-pro.rst:135 89e312aa37bd4b749727cd14913e2e70 +msgid "" +"To obtain a license key `please fill out the form on this page " +"`_. You will then have the trial key " +"emailled to the address you submitted." +msgstr "" + +#: ../../pymupdf-pro.rst:139 371fa0ea70f44d9e8e6e9155a631426f +msgid "Using a key" +msgstr "" + +#: ../../pymupdf-pro.rst:142 25f50e294ef5429e86781bea1e1b1e29 +msgid "Initialize |PyMuPDF Pro| with a key as follows:" +msgstr "" + +#: ../../pymupdf-pro.rst:150 b01044f9b9f34927892044fa1c007b7f +msgid "" +"This will allow you to evaluate the product for a limited time. If you " +"want to use |PyMuPDF Pro| after this time you should then `enquire about " +"obtaining a commercial license `_." +msgstr "" + +#: ../../pymupdf-pro.rst:154 fcfee54b997f4b5a8368464ac33e8568 +msgid "Fonts" +msgstr "" + +#: ../../pymupdf-pro.rst:156 c22971d466c14a31aeb4f6e851c88144 +msgid "" +"By default `pymupdf.pro.unlock()` searches for all installed font " +"directories." +msgstr "" + +#: ../../pymupdf-pro.rst:158 c388a7575cdf4d709d86899170008a23 +msgid "This can be controlled with keyword-only args:" +msgstr "" + +#: ../../pymupdf-pro.rst:160 9c1f44585d134e12bb9b74ce4a83121b +msgid "" +"`fontpath`: specific font directories, either as a list/tuple or " +"`os.sep`-separated string. If None (the default), we use " +"`os.environ['PYMUPDFPRO_FONT_PATH']` if set." +msgstr "" + +#: ../../pymupdf-pro.rst:162 ae8654a3d7f14d329aa5caf0f2ccde78 +msgid "" +"`fontpath_auto`: Whether to append system font directories. If None (the " +"default) we use true if `os.environ['PYMUPDFPRO_FONT_PATH_AUTO']` is '1'." +" If true we append all system font directories." +msgstr "" + +#: ../../pymupdf-pro.rst:166 28aa600bf1164c318f946da4d23619db +msgid "" +"Function `pymupdf.pro.get_fontpath()` returns a tuple of all font " +"directories used by `unlock()`." +msgstr "" + +#: ../../footer.rst:60 fdcd80e27b7c41f882a85b921e2ee500 +msgid "This documentation covers all versions up to |version|." +msgstr "" + +#~ msgid "Enhance |PyMuPDF| capability with **Office** document support." +#~ msgstr "" + +#~ msgid "" +#~ "|PyMuPDF Pro| offers all the features" +#~ " of |PyMuPDF|, plus enhanced functionality" +#~ " to support **Office** documents." +#~ msgstr "" + +#~ msgid "Load, parse and extract text data from **Office** files" +#~ msgstr "" + +#~ msgid "Ablility to render **Office** files" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/pymupdf4llm/api.mo b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/api.mo new file mode 100644 index 000000000..fbc03995d Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/api.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/pymupdf4llm/api.po b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/api.po new file mode 100644 index 000000000..853d7abb6 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/api.po @@ -0,0 +1,689 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2024. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.10\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 161fa590288f4fd0b6ddb48d1fcf5000 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 2f1e24042a8a417186402dba23a28c1a +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "" + +#: ../../header.rst:-1 1ead176e0cf24a12aa54d5bd556c6eba +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "" + +#: ../../pymupdf4llm/api.rst:9 dd1b10222a314b44977b976d86511926 +msgid "API" +msgstr "" + +#: ../../pymupdf4llm/api.rst:12 308ebba9a9b747ec98873583cc86d573 +msgid "The |PyMuPDF4LLM| API" +msgstr "" + +#: ../../pymupdf4llm/api.rst:17 29230644e34341699ebb41eda2822f8b +msgid "Prints the version of the library." +msgstr "" + +#: ../../pymupdf4llm/api.rst:21 7a3872169c9342cd931823dbc9b0c9db +msgid "" +"Read the pages of the file and outputs the text of its pages in " +"|Markdown| format. How this should happen in detail can be influenced by " +"a number of parameters. Please note that there exists **support for " +"building page chunks** from the |Markdown| text." +msgstr "" + +#: ../../pymupdf4llm/api.rst 941ff3e5d21144598a204c133232c827 +#: f8a657b6752e41dba12869934f34b892 fd61c3859cb646a5a245a80966304da4 +msgid "Parameters" +msgstr "" + +#: ../../pymupdf4llm/api.rst:23 ../../pymupdf4llm/api.rst:117 +#: 34f6f50149e945258b44f54dd7171378 bf4d0f40f7ea4522bc4209d74f4d470c +msgid "" +"the file, to be specified either as a file path string, or as a |PyMuPDF|" +" Document (created via `pymupdf.open`). In order to use `pathlib.Path` " +"specifications, Python file-like objects, documents in memory etc. you " +"**must** use a |PyMuPDF| Document." +msgstr "" + +#: ../../pymupdf4llm/api.rst:25 6efdf54f275f45698e578a73fc418ff9 +msgid "" +"optional, the pages to consider for output (caution: specify 0-based page" +" numbers). If omitted all pages are processed." +msgstr "" + +#: ../../pymupdf4llm/api.rst:27 dfc90f44dd6d48dbaa8f4932cc65c126 +msgid "" +"optional. Use this if you want to provide your own header detection " +"logic. This may be a callable or an object having a method named " +"`get_header_id`. It must accept a text span (a span dictionary as " +"contained in :meth:`~.extractDICT`) and a keyword parameter \"page\" " +"(which is the owning :ref:`Page ` object). It must return a string " +"\"\" or up to 6 \"#\" characters followed by 1 space. If omitted, a full " +"document scan will be performed to find the most popular font sizes and " +"derive header levels based on them. To completely avoid this behavior " +"specify `hdr_info=lambda s, page=None: \"\"` or `hdr_info=False`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:29 df8708774f1d47c4a5c89b81744fd416 +msgid "" +"when encountering images or vector graphics, images will be created from " +"the respective page area and stored in the specified folder. Markdown " +"references will be generated pointing to these images. Any text contained" +" in these areas will not be included in the text output (but appear as " +"part of the images). Therefore, if for instance your document has text " +"written on full page images, make sure to set this parameter to `False`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:31 7d5b9c4adcdf4b978bdede054e4b208a +msgid "" +"like `write_images`, but images will be included in the markdown text as " +"base64-encoded strings. Ignores `write_images` and `image_path` if used. " +"This may drastically increase the size of your markdown text." +msgstr "" + +#: ../../pymupdf4llm/api.rst:33 13da05a3d8474a6da6180273be172342 +msgid "" +"(New in v.0.0.20) Disregard images on the page. This may help detecting " +"text correctly when pages are very crowded (often the case for documents " +"representing presentation slides). Also speeds up processing time." +msgstr "" + +#: ../../pymupdf4llm/api.rst:35 093a637de1f94e6b9662bb50238e3509 +msgid "" +"(New in v.0.0.20) Disregard vector graphics on the page. This may help " +"detecting text correctly when pages are very crowded (often the case for " +"documents representing presentation slides). Also speeds up processing " +"time. Vector graphics are still used for table detection." +msgstr "" + +#: ../../pymupdf4llm/api.rst:37 1b13e6911f7a493881726a6c198e6a6f +#, python-format +msgid "" +"this must be a positive value less than 1. Images are ignored if `width /" +" page.rect.width <= image_size_limit` or `height / page.rect.height <= " +"image_size_limit`. For instance, the default value 0.05 means that to be " +"considered for inclusion, an image's width and height must be larger than" +" 5% of the page's width and height, respectively." +msgstr "" + +#: ../../pymupdf4llm/api.rst:39 c90a083414394044a30a65e82b80b02b +msgid "" +"specify the desired image resolution in dots per inch. Relevant only if " +"`write_images=True`. Default value is 150." +msgstr "" + +#: ../../pymupdf4llm/api.rst:41 fb19d8fd4e6e44688a5677a3b7bfa5e1 +msgid "" +"store images in this folder. Relevant if `write_images=True`. Default is " +"the path of the script directory." +msgstr "" + +#: ../../pymupdf4llm/api.rst:43 9f8ca9ade66c4e6f84cc5e70869c3015 +msgid "" +"specify the desired image format via its extension. Default is \"png\" " +"(portable network graphics). Another popular format may be \"jpg\". " +"Possible values are all :ref:`supported output formats " +"`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:45 c2184864835d4f2f830f1d7d39fe25b8 +msgid "" +"generate text output even when overlapping images / graphics. This text " +"then appears after the respective image. If `write_images=True` this " +"parameter may be `False` to suppress repetition of text on images." +msgstr "" + +#: ../../pymupdf4llm/api.rst:47 75d9541669704af890db0d2712359ad3 +msgid "" +"a float or a sequence of 2 or 4 floats specifying page borders. Only " +"objects inside the margins will be considered for output. * `margin=f` " +"yields `(f, f, f, f)` for `(left, top, right, bottom)`. * `(top, bottom)`" +" yields `(0, top, 0, bottom)`. * To always read full pages " +"**(default)**, use `margins=0`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:47 4ee549281ffb40acac6450df14f68ad2 +msgid "" +"a float or a sequence of 2 or 4 floats specifying page borders. Only " +"objects inside the margins will be considered for output." +msgstr "" + +#: ../../pymupdf4llm/api.rst:49 68496561cfa84781b9daf4f2baf057b5 +msgid "`margin=f` yields `(f, f, f, f)` for `(left, top, right, bottom)`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:50 a0d0c48ee9f046bb8b64d202127a7386 +msgid "`(top, bottom)` yields `(0, top, 0, bottom)`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:51 a9b037ad3086480a99f975d52c507035 +msgid "To always read full pages **(default)**, use `margins=0`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:53 9cde949c00e44ababef22b54e7f08589 +msgid "" +"if `True` the output will be a list of `Document.page_count` dictionaries" +" (one per page). Each dictionary has the following structure: - " +"**\"metadata\"** - a dictionary consisting of the document's metadata " +":attr:`Document.metadata`, enriched with additional keys " +"**\"file_path\"** (the file name), **\"page_count\"** (number of pages in" +" document), and **\"page_number\"** (1-based page number). - " +"**\"toc_items\"** - a list of Table of Contents items pointing to this " +"page. Each item of this list has the format `[lvl, title, pagenumber]`, " +"where `lvl` is the hierarchy level, `title` a string and `pagenumber` as " +"a 1-based page number. - **\"tables\"** - a list of tables on this page." +" Each item is a dictionary with keys \"bbox\", \"row_count\" and " +"\"col_count\". Key \"bbox\" is a `pymupdf.Rect` in tuple format of the " +"table's position on the page. - **\"images\"** - a list of images on the" +" page. This a copy of page method :meth:`Page.get_image_info`. - " +"**\"graphics\"** - a list of vector graphics rectangles on the page. This" +" is a list of boundary boxes of clustered vector graphics as delivered by" +" method :meth:`Page.cluster_drawings`. - **\"text\"** - page content as " +"|Markdown| text. - **\"words\"** - if `extract_words=True` was used. " +"This is a list of tuples `(x0, y0, x1, y1, \"wordstring\", bno, lno, " +"wno)` as delivered by `page.get_text(\"words\")`. The **sequence** of " +"these tuples however is the same as produced in the markdown text string " +"and thus honors multi-column text. This is also true for text in tables: " +"words are extracted in the sequence of table row cells." +msgstr "" + +#: ../../pymupdf4llm/api.rst:53 abc8f7f8a91e4d97b93a9eab887bc295 +msgid "" +"if `True` the output will be a list of `Document.page_count` dictionaries" +" (one per page). Each dictionary has the following structure:" +msgstr "" + +#: ../../pymupdf4llm/api.rst:55 01581b16bd3849528107486df1e89416 +msgid "" +"**\"metadata\"** - a dictionary consisting of the document's metadata " +":attr:`Document.metadata`, enriched with additional keys " +"**\"file_path\"** (the file name), **\"page_count\"** (number of pages in" +" document), and **\"page_number\"** (1-based page number)." +msgstr "" + +#: ../../pymupdf4llm/api.rst:57 f3be96984c334a7b918bebbc9fb08cc4 +msgid "" +"**\"toc_items\"** - a list of Table of Contents items pointing to this " +"page. Each item of this list has the format `[lvl, title, pagenumber]`, " +"where `lvl` is the hierarchy level, `title` a string and `pagenumber` as " +"a 1-based page number." +msgstr "" + +#: ../../pymupdf4llm/api.rst:59 b7213edeefad4d1ea798afc49d6991e9 +msgid "" +"**\"tables\"** - a list of tables on this page. Each item is a dictionary" +" with keys \"bbox\", \"row_count\" and \"col_count\". Key \"bbox\" is a " +"`pymupdf.Rect` in tuple format of the table's position on the page." +msgstr "" + +#: ../../pymupdf4llm/api.rst:61 91dc6a17d4bc40ed9d06b1a8356b42b1 +msgid "" +"**\"images\"** - a list of images on the page. This a copy of page method" +" :meth:`Page.get_image_info`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:63 c54878d36f5146adb1fe1b606d4791bc +msgid "" +"**\"graphics\"** - a list of vector graphics rectangles on the page. This" +" is a list of boundary boxes of clustered vector graphics as delivered by" +" method :meth:`Page.cluster_drawings`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:65 3ac5b97ce14e432eb3a16cf5a66f7bbc +msgid "**\"text\"** - page content as |Markdown| text." +msgstr "" + +#: ../../pymupdf4llm/api.rst:67 a5cfe753b006454aa357cbbe832c210a +msgid "" +"**\"words\"** - if `extract_words=True` was used. This is a list of " +"tuples `(x0, y0, x1, y1, \"wordstring\", bno, lno, wno)` as delivered by " +"`page.get_text(\"words\")`. The **sequence** of these tuples however is " +"the same as produced in the markdown text string and thus honors multi-" +"column text. This is also true for text in tables: words are extracted in" +" the sequence of table row cells." +msgstr "" + +#: ../../pymupdf4llm/api.rst:69 133ebcf3b9294b2b8366a8aba0b23ff0 +msgid "" +"(New in v.0.0.19) Overwrites or sets the desired image file name of " +"written images. Useful when the document is provided as a memory object " +"(which has no inherent file name)." +msgstr "" + +#: ../../pymupdf4llm/api.rst:71 3940dc63e1554b3d9d74de2b17a78718 +msgid "" +"specify a desired page width. This is ignored for documents with a fixed " +"page width like PDF, XPS etc. **Reflowable** documents however, like " +"e-books, office [#f2]_ or text files have no fixed page dimensions and by" +" default are assumed to have Letter format width (612) and an " +"**\"infinite\"** page height. This means that the **full document is " +"treated as one large page.**" +msgstr "" + +#: ../../pymupdf4llm/api.rst:73 6acbf074eb9d4865a46a6022c941a389 +msgid "" +"specify a desired page height. For relevance see the `page_width` " +"parameter. If using the default `None`, the document will appear as one " +"large page with a width of `page_width`. Consequently in this case, no " +"markdown page separators will occur (except the final one), respectively " +"only one page chunk will be returned." +msgstr "" + +#: ../../pymupdf4llm/api.rst:75 b4f7575c9b304d5c85bb2720b91f43a1 +msgid "" +"`table detection strategy " +"`_. " +"Default is `\"lines_strict\"` which ignores background colors. In some " +"occasions, other strategies may be more successful, for example " +"`\"lines\"` which uses all vector graphics objects for detection. " +"**Changed in v0.0.19:** A value of `None` will not perform any table " +"detection at all. This may be useful when you know that your document " +"contains no tables. Execution time savings can be significant." +msgstr "" + +#: ../../pymupdf4llm/api.rst:77 551ad070a28c4c4bbef0cf2630c6e10c +msgid "" +"use this to limit dealing with excess amounts of vector graphics " +"elements. Scientific documents, or pages simulating text via graphics " +"commands may contain tens of thousands of these objects. As vector " +"graphics are analyzed for multiple purposes, runtime may quickly become " +"intolerable. With this parameter, all vector graphics will be ignored if " +"their count exceeds the threshold. **Changed in v0.0.19:** The page will " +"still be processed, and text, tables and images should be extracted." +msgstr "" + +#: ../../pymupdf4llm/api.rst:79 d0103d4e9c6946cf8321f2294f23fcf2 +msgid "" +"if `True` then mono-spaced text does not receive special formatting. Code" +" blocks will no longer be generated. This value is set to `True` if " +"`extract_words=True` is used." +msgstr "" + +#: ../../pymupdf4llm/api.rst:81 8f48ac78fc384c959709db63b5730a8f +msgid "" +"a value of `True` enforces `page_chunks=True` and adds key \"words\" to " +"each page dictionary. Its value is a list of words as delivered by " +"PyMuPDF's `Page` method `get_text(\"words\")`. The sequence of the words " +"in this list is the same as the extracted text." +msgstr "" + +#: ../../pymupdf4llm/api.rst:83 1779ec1e4ae04f0c804b23ba2da9cbec +msgid "" +"Default is `False`. A value of `True` displays a text-based progress bar " +"as pages are being converted to Markdown. It will look similar to the " +"following:: Processing input.pdf... [====================" +" ] (148/291)" +msgstr "" + +#: ../../pymupdf4llm/api.rst:83 c653a2e980b64aa785be22f126457523 +msgid "" +"Default is `False`. A value of `True` displays a text-based progress bar " +"as pages are being converted to Markdown. It will look similar to the " +"following::" +msgstr "" + +#: ../../pymupdf4llm/api.rst:85 60a64f73ce9e4e36b6104e04ab4832a6 +msgid "" +"Processing input.pdf... [==================== ] " +"(148/291)" +msgstr "" + +#: ../../pymupdf4llm/api.rst:88 6bb91dff3b59492082791509607679a4 +msgid "" +"(New in v.0.0.19) Default is `False`. A value of `True` will use the " +"glyph number of the characters instead of the character itself." +msgstr "" + +#: ../../pymupdf4llm/api.rst 7488db8324964ad488f9fafb1dd92f9c +#: 7587197f19fe4c238ed9e365ad329b65 9df1c3b59d134dd9be4aeae849c5c46e +#: b9e22ae716c742b2a0a6d7706e7d35c1 +msgid "Returns" +msgstr "" + +#: ../../pymupdf4llm/api.rst:90 d1f8b5ca3d924f048a6f59cde9e2be0d +msgid "" +"Either a string of the combined text of all selected document pages, or a" +" list of dictionaries." +msgstr "" + +#: ../../pymupdf4llm/api.rst:94 8220254dd0384d59ac22680bb9256286 +msgid "" +"Create a `pdf_markdown_reader.PDFMarkdownReader` using the `LlamaIndex`_ " +"package. Please note that this package will **not automatically be " +"installed** when installing **pymupdf4llm**." +msgstr "" + +#: ../../pymupdf4llm/api.rst:96 c58efce17afb4f5da0df5be15f065deb +msgid "" +"For details on the possible arguments, please consult the LlamaIndex " +"documentation [#f1]_." +msgstr "" + +#: ../../pymupdf4llm/api.rst ad546a9c3d0049e796fd7b47e8a1108d +msgid "raises" +msgstr "" + +#: ../../pymupdf4llm/api.rst:98 2c059472fef84384bd37e55710ca095c +msgid "`NotImplementedError`: Please install required `LlamaIndex`_ package." +msgstr "" + +#: ../../pymupdf4llm/api.rst:99 9118c323bb1448fc9adbafad8b6ffbad +msgid "" +"a `pdf_markdown_reader.PDFMarkdownReader` and issues message " +"\"Successfully imported LlamaIndex\". Please note that this method needs " +"several seconds to execute. For details on using the markdown reader " +"please see below." +msgstr "" + +#: ../../pymupdf4llm/api.rst:108 134f6dcec56b4f5a83b7fdc905a221bf +msgid "" +"Create an object which maps text font sizes to the respective number of " +"'#' characters which are used by Markdown syntax to indicate header " +"levels. The object is created by scanning the document for font size " +"\"popularity\". The most popular font size and all smaller sizes are used" +" for body text. Larger font sizes are mapped to the respective header " +"levels - which correspond to the HTML tags `

` to `

`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:110 f7c809d6de944ff9bcb30e6fbec94a4b +msgid "All font sizes are rounded to integer values." +msgstr "" + +#: ../../pymupdf4llm/api.rst:112 b499cbba0af94b69b8af0f94cd8863cc +msgid "" +"If more than 6 header levels would be required, then the largest number " +"smaller than the `
` font size is used for body text." +msgstr "" + +#: ../../pymupdf4llm/api.rst:114 50bd2d2ded7a490e820b5930fb174b45 +msgid "" +"Please note that creating the object will read and inspect the text of " +"the entire document - independently of reading the document again in the " +"`to_markdown()` method subsequently. Method `to_markdown()` by default " +"**will create this object** if you do not override its `hdr_info=None` " +"parameter." +msgstr "" + +#: ../../pymupdf4llm/api.rst:119 812f574b306a47c486655e86ff50596c +msgid "optional, the pages to consider. If omitted all pages are processed." +msgstr "" + +#: ../../pymupdf4llm/api.rst:121 8a5e1a0685b149b1af66ad795d69f955 +msgid "" +"the default font size limit for body text. Only used when the document " +"scan does not deliver valid information." +msgstr "" + +#: ../../pymupdf4llm/api.rst:123 564f318802954586965f8ec524f6af29 +msgid "" +"the maximum number of header levels to be used. Valid values are in " +"`range(1, 7)`. The default is 6, which corresponds to the HTML tags " +"`

` to `

`. A smaller value will limit the number of generated " +"header levels. For instance, a value of 3 will only generate header tags " +"\"#\", \"##\" and \"###\". Body text will be assumed for all font sizes " +"smaller than the one corresponding to \"###\"." +msgstr "" + +#: ../../pymupdf4llm/api.rst:128 ddb0ae41e91f44618b88dea05580fed3 +msgid "" +"Return appropriate markdown header prefix. This is either \"\" or a " +"string of \"#\" characters followed by a space." +msgstr "" + +#: ../../pymupdf4llm/api.rst:130 38868ba2c7784951a15fe8d1ef2011ae +msgid "" +"Given a text span from a \"dict\"\" extraction, determine the markdown " +"header prefix string of 0 to n concatenated '#' characters." +msgstr "" + +#: ../../pymupdf4llm/api.rst:133 96186998ca204d6b95c983eaec78baab +msgid "" +"a dictionary containing the text span information. This is the same " +"dictionary as returned by `page.get_text(\"dict\")`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:135 e91180aa37384fbd8d76c061b1b0b50c +msgid "" +"the owning page object. This can be used when additional information " +"needs to be extracted." +msgstr "" + +#: ../../pymupdf4llm/api.rst:137 876fd6dd009f40bb888c26996ad84fee +msgid "a string of \"#\" characters followed by a space." +msgstr "" + +#: ../../pymupdf4llm/api.rst:141 e9f0fbb3d9914a14b20acd8c93aecb8d +msgid "" +"A dictionary mapping (integer) font sizes to Markdown header strings like" +" ``{14: '# ', 12: '## '}``. The dictionary is created by the " +"`IdentifyHeaders` constructor. The keys are the font sizes of the text " +"spans in the document. The values are the respective header strings." +msgstr "" + +#: ../../pymupdf4llm/api.rst:145 a387e9ca8cf6422694f17f11b8900f8d +msgid "" +"An integer value indicating the font size limit for body text. This is " +"computed as ``min(header_id.keys()) - 1``. In the above example, " +"body_limit would be 11." +msgstr "" + +#: ../../pymupdf4llm/api.rst:148 12b6e9b778b142108500f8b3aca00951 +msgid "**How to limit header levels (example)**" +msgstr "" + +#: ../../pymupdf4llm/api.rst:150 97f840c665744d5aa56345bd771df406 +msgid "Limit the generated header levels to 3::" +msgstr "" + +#: ../../pymupdf4llm/api.rst:160 12e402ed008d42198663e949e45c9049 +msgid "**How to provide your own header logic (example 1)**" +msgstr "" + +#: ../../pymupdf4llm/api.rst:162 c2d130c42052420c80db16b810beb9c6 +msgid "Provide your own function which uses pre-determined, fixed font sizes::" +msgstr "" + +#: ../../pymupdf4llm/api.rst:189 89cb7ad3a36644e7934dc0bb4c12ee88 +msgid "**How to provide your own header logic (example 2)**" +msgstr "" + +#: ../../pymupdf4llm/api.rst:191 cf9a2b58e9ce4086a9ab3af9f16990f8 +msgid "" +"This user function uses the document's Table of Contents -- under the " +"assumption that the bookmark text is also present as a header line on the" +" page (which certainly need not be the case!)::" +msgstr "" + +#: ../../pymupdf4llm/api.rst:231 70402737f5cc4a82acff6c74929cbce2 +msgid "" +"This is the only method of the markdown reader you should currently use " +"to extract markdown data. Please in any case ignore methods " +"`aload_data()` and `lazy_load_data()`. Other methods like " +"`use_doc_meta()` may or may not make sense. For more information, please " +"consult the LlamaIndex documentation [#f1]_." +msgstr "" + +#: ../../pymupdf4llm/api.rst:233 6d6bfa39899e46da9ee69ca8102c340c +msgid "Under the hood the method will execute `to_markdown()`." +msgstr "" + +#: ../../pymupdf4llm/api.rst:235 089f629a239748b1a011c9cc4d45e47a +msgid "a list of `LlamaIndexDocument` documents - one for each page." +msgstr "" + +#: ../../pymupdf4llm/api.rst:239 96a7245167a64bc9937198ad512201c5 +msgid "" +"For a list of changes, please see file `CHANGES.md " +"`_." +msgstr "" + +#: ../../pymupdf4llm/api.rst:242 bd4edb68eae44ad8989bfb56e9320b2d +msgid "Footnotes" +msgstr "" + +#: ../../pymupdf4llm/api.rst:243 b746861d660d446ab994c8904d5d41e2 +msgid "`LlamaIndex documentation `_" +msgstr "" + +#: ../../pymupdf4llm/api.rst:245 6ac5aea47ace4dd795a865534c09ea86 +msgid "" +"When using PyMuPDF-Pro, supported office documents are converted " +"internally into a PDF-like format. Therefore, they **will have fixed page" +" dimensions** and be no longer \"reflowable\". Consequently, the page " +"width and page height specifications will be ignored as well in these " +"cases." +msgstr "" + +#: ../../footer.rst:60 9b34609a103941c9ba673a283a4b70b4 +msgid "This documentation covers all versions up to |version|." +msgstr "" + +#~ msgid "" +#~ "a float or a sequence of 2 " +#~ "or 4 floats specifying page borders. " +#~ "Only objects inside the margins will " +#~ "be considered for output. * `margin=f`" +#~ " yields `(f, f, f, f)` for " +#~ "`(left, top, right, bottom)`. * `(top," +#~ " bottom)` yields `(0, top, 0, " +#~ "bottom)`. * To always read full " +#~ "pages, use `margins=0`." +#~ msgstr "" + +#~ msgid "To always read full pages, use `margins=0`." +#~ msgstr "" + +#~ msgid "" +#~ "if `True` the output will be a " +#~ "list of `Document.page_count` dictionaries " +#~ "(one per page). Each dictionary has " +#~ "the following structure: - **\"metadata\"**" +#~ " - a dictionary consisting of the " +#~ "document's metadata :attr:`Document.metadata`, " +#~ "enriched with additional keys " +#~ "**\"file_path\"** (the file name), " +#~ "**\"page_count\"** (number of pages in " +#~ "document), and **\"page_number\"** (1-based " +#~ "page number). - **\"toc_items\"** - a" +#~ " list of Table of Contents items " +#~ "pointing to this page. Each item " +#~ "of this list has the format `[lvl," +#~ " title, pagenumber]`, where `lvl` is " +#~ "the hierarchy level, `title` a string" +#~ " and `pagenumber` as a 1-based page" +#~ " number. - **\"tables\"** - a list" +#~ " of tables on this page. Each " +#~ "item is a dictionary with keys " +#~ "\"bbox\", \"row_count\" and \"col_count\". Key" +#~ " \"bbox\" is a `pymupdf.Rect` in " +#~ "tuple format of the table's position " +#~ "on the page. - **\"images\"** - a" +#~ " list of images on the page. " +#~ "This a copy of page method " +#~ ":meth:`Page.get_image_info`. - **\"graphics\"** -" +#~ " a list of vector graphics rectangles" +#~ " on the page. This is a list" +#~ " of boundary boxes of clustered " +#~ "vector graphics as delivered by method" +#~ " :meth:`Page.cluster_drawings`. - **\"text\"** -" +#~ " page content as |Markdown| text." +#~ msgstr "" + +#~ msgid "" +#~ "specify a desired page width. This " +#~ "is ignored for documents with a " +#~ "fixed page width like PDF, XPS " +#~ "etc. **Reflowable** documents however, like" +#~ " e-books, office or text files have" +#~ " no fixed page dimensions and by " +#~ "default are assumed to have Letter " +#~ "format width (612) and an " +#~ "**\"infinite\"** page height. This means " +#~ "that the full document is treated " +#~ "as one large page." +#~ msgstr "" + +#~ msgid "" +#~ "table detection strategy. Default is " +#~ "`\"lines_strict\"` which ignores background " +#~ "colors. In some occasions, other " +#~ "strategies may be more successful, for" +#~ " example `\"lines\"` which uses all " +#~ "vector graphics objects for detection." +#~ msgstr "" + +#~ msgid "" +#~ "use this to limit dealing with " +#~ "excess amounts of vector graphics " +#~ "elements. Typically, scientific documents or" +#~ " pages simulating text using graphics " +#~ "commands may contain tens of thousands" +#~ " of these objects. As vector graphics" +#~ " are used for table detection mainly," +#~ " analyzing pages of this kind may " +#~ "result in excessive runtimes. You can" +#~ " exclude problematic pages via for " +#~ "instance `graphics_limit=5000` or even a " +#~ "smaller value as desired. The respective" +#~ " pages will then be ignored and " +#~ "be represented by one message line " +#~ "in the output text." +#~ msgstr "" + +#~ msgid "" +#~ "if `True` then mono-spaced text " +#~ "does not receive special formatting " +#~ "treatment. Code blocks will also no " +#~ "longer be generated." +#~ msgstr "" + +#~ msgid "" +#~ "a value of `True` (the default) " +#~ "displays a text-based progress bar " +#~ "as pages are being converted to " +#~ "Markdown. It will look similar to " +#~ "the following:: Processing input.pdf... " +#~ "[==================== ] (148/291)" +#~ msgstr "" + +#~ msgid "" +#~ "a value of `True` (the default) " +#~ "displays a text-based progress bar " +#~ "as pages are being converted to " +#~ "Markdown. It will look similar to " +#~ "the following::" +#~ msgstr "" + +#~ msgid "" +#~ "Either a string of the combined " +#~ "text of all selected document pages " +#~ "or a list of dictionaries." +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/pymupdf4llm/index.mo b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/index.mo new file mode 100644 index 000000000..fb005278b Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/index.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/pymupdf4llm/index.po b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/index.po new file mode 100644 index 000000000..4bceb515c --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/pymupdf4llm/index.po @@ -0,0 +1,277 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 a84d929820f24736805d1a33254caaa4 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 a1b9b769f9164850b5833a74b4609b4f +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "" + +#: ../../header.rst:-1 80467e59d59b4910b2d8cf9af1052bd1 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "" + +#: ../../pymupdf4llm/index.rst:8 703ddb8e7e9548e2b22644982881c8e8 +msgid "PyMuPDF4LLM" +msgstr "" + +#: ../../pymupdf4llm/index.rst:10 dae2ff70c8bd479fb23e2d60283f0739 +msgid "" +"|PyMuPDF4LLM| is aimed to make it easier to extract |PDF| content in the " +"format you need for **LLM** & **RAG** environments. It supports " +":ref:`Markdown extraction ` as well as :ref:`LlamaIndex" +" document output `." +msgstr "" +"|PyMuPDF4LLM| は、**LLM** や **RAG** 環境で必要な形式で |PDF| " +"コンテンツを簡単に抽出できるようにすることを目的としています。 **Markdown** 形式での抽出や、 **LlamaIndex** " +"ドキュメント出力もサポートしています。" + +#: ../../pymupdf4llm/index.rst:14 479be0d6bcbf423a8f21eaad1099d850 +msgid "" +"You can extend the supported file types to also include **Office** " +"document formats (DOC/DOCX, XLS/XLSX, PPT/PPTX, HWP/HWPX) by :ref:`using " +"PyMuPDF Pro with PyMuPDF4LLM `." +msgstr "" +":ref:`PyMuPDF ProをPyMuPDF4LLMと併用することで " +"`、対応するファイル形式を拡張し、 **Office** " +"ドキュメント形式(DOC/DOCX、XLS/XLSX、PPT/PPTX、HWP/HWPX)も含めることができます。" + +#: ../../pymupdf4llm/index.rst:17 88bc23fd6aab4c13999f607fcaa24fe8 +msgid "Features" +msgstr "特徴" + +#: ../../pymupdf4llm/index.rst:19 0e7d39d10d844ea190a65305e8a5383b +msgid "Support for multi-column pages" +msgstr "マルチカラムページのサポート" + +#: ../../pymupdf4llm/index.rst:20 e86a689a833942978ed6351570c186ae +msgid "" +"Support for image and vector graphics extraction (and inclusion of " +"references in the MD text)" +msgstr "画像およびベクターグラフィックスの抽出のサポート(MDテキストへの参照の挿入を含む)" + +#: ../../pymupdf4llm/index.rst:21 89e0c9f4fa2142d0a8d1601280872394 +msgid "Support for page chunking output." +msgstr "ページ分割出力のサポート" + +#: ../../pymupdf4llm/index.rst:22 eafc188f9e984c3a848b1c1a7e6f199f +msgid "" +"Direct support for output as :ref:`LlamaIndex Documents " +"`." +msgstr ":ref:`LlamaIndexドキュメント ` としての直接出力のサポート" + +#: ../../pymupdf4llm/index.rst:26 2ad1f7d1302746ae8e8f83b14dcdba8d +msgid "Functionality" +msgstr "機能" + +#: ../../pymupdf4llm/index.rst:28 3e1817178ea248cf8606c0aece719045 +msgid "" +"This package converts the pages of a file to text in **Markdown** format " +"using |PyMuPDF|." +msgstr "このパッケージは、|PyMuPDF| を使用して |PDF| のページを **Markdown** 形式のテキストに変換します。" + +#: ../../pymupdf4llm/index.rst:30 81a05398d1ca436485386081b8062dcd +msgid "" +"Standard text and tables are detected, brought in the right reading " +"sequence and then together converted to **GitHub**-compatible " +"**Markdown** text." +msgstr "" +"標準のテキストやテーブルが検出され、適切な読み取り順序で取り込まれ、その後一緒に **GitHub** 互換の **Markdown** " +"テキストに変換されます。" + +#: ../../pymupdf4llm/index.rst:32 593eab1fb0214b6294964584b13a0083 +msgid "" +"Header lines are identified via the font size and appropriately prefixed " +"with one or more `#` tags." +msgstr "ヘッダー行はフォントサイズで識別され、適切に1つまたは複数の#タグで接頭語が付けられます。" + +#: ../../pymupdf4llm/index.rst:34 bfbac5aee5e7496f852f4de536a8db8f +msgid "" +"Bold, italic, mono-spaced text and code blocks are detected and formatted" +" accordingly. Similar applies to ordered and unordered lists." +msgstr "太字、斜体、等幅テキスト、およびコードブロックが検出され、それに応じて書式が付けられます。順序付けられたリストと順不同リストにも同様のことが適用されます。" + +#: ../../pymupdf4llm/index.rst:36 740e76221d1448b38d46122f1776001d +msgid "" +"By default, all document pages are processed. If desired, a subset of " +"pages can be specified by providing a list of `0`-based page numbers." +msgstr "デフォルトでは、すべての文書ページが処理されます。必要に応じて、`0` から始まるページ番号のリストを指定してサブセットのページを指定できます。" + +#: ../../pymupdf4llm/index.rst:40 06d164779ff94a568128a9db39c9030a +msgid "Installation" +msgstr "インストール" + +#: ../../pymupdf4llm/index.rst:43 ea858ab3ed1f41b69849b46d74d8143c +msgid "Install the package via **pip** with:" +msgstr "パッケージを **pip** を使用してインストールするには、次のコマンドを使用します:" + +#: ../../pymupdf4llm/index.rst:54 513f20b7a04e437098ca66ee416eac9f +msgid "Extracting a file as **Markdown**" +msgstr "ファイルをMarkdown形式で抽出する" + +#: ../../pymupdf4llm/index.rst:56 39ff5fad6dd14f639b8373ad18f3c56a +msgid "" +"To retrieve your document content in **Markdown** simply install the " +"package and then use a couple of lines of **Python** code to get results." +msgstr "" +"ドキュメントの内容を **Markdown** で取得するには、パッケージをインストールし、数行の **Python** " +"コードを使用するだけで結果を得ることができます。" + +#: ../../pymupdf4llm/index.rst:60 faf1cd9fbd964a95a8f649660c300162 +msgid "Then in your **Python** script do:" +msgstr "**Python** スクリプトでは:" + +#: ../../pymupdf4llm/index.rst:71 539fd84130c04d1989f863d170eeaedd +msgid "" +"Instead of the filename string as above, one can also provide a " +":ref:`PyMuPDF Document `. A second parameter may be a list of " +"`0`-based page numbers, e.g. `[0,1]` would just select the first and " +"second pages of the document." +msgstr "" +"上記のファイル名文字列の代わりに、 :ref:`PyMuPDFの Document ` " +"を指定することもできます。2番目のパラメータは、`0` から始まるページ番号のリストであり、例えば `[0,1]` " +"はドキュメントの最初のページと2番目のページを選択します。" + +#: ../../pymupdf4llm/index.rst:74 6e32f37e871245878caa0a76588c0a22 +msgid "" +"If you want to store your **Markdown** file, e.g. store as a UTF8-encoded" +" file, then do:" +msgstr "**Markdown** ファイルを保存したい場合、例えばUTF8でエンコードされたファイルとして保存する場合は、次のようにします。" + +#: ../../pymupdf4llm/index.rst:87 13ab2d0739c946ae8bb9c7775c73eb63 +msgid "Extracting a file as a **LlamaIndex** document" +msgstr "ファイルをLlamaIndexドキュメントとして抽出する" + +#: ../../pymupdf4llm/index.rst:89 e385dbe676fa489b9173ab587b03cdaa +msgid "" +"|PyMuPDF4LLM| supports direct conversion to a **LLamaIndex** document. A " +"document is first converted into **Markdown** format and then a " +"**LlamaIndex** document is returned as follows:" +msgstr "" +"|PyMuPDF4LLM| は **LlamaIndex** ドキュメントへの直接変換をサポートしています。ドキュメントはまず " +"**Markdown** 形式に変換され、その後、以下のように **LlamaIndex** ドキュメントとして返されます。" + +#: ../../pymupdf4llm/index.rst:103 fe38b9668c864ec2b663fd24fc489e3a +msgid "Using with |PyMuPDF Pro|" +msgstr "PyMuPDF Proとの使用 " + +#: ../../pymupdf4llm/index.rst:106 ac7b2b319a75411bbed7864c904b30ec +msgid "" +"For **Office** document support, |PyMuPDF4LLM| works seamlessly with " +"|PyMuPDF Pro|. Assuming you have :doc:`../pymupdf-pro` installed you will" +" be able to work with **Office** documents as expected:" +msgstr "" +"**Office** ドキュメントのサポートのために、|PyMuPDF4LLM| は |PyMuPDF Pro| " +"とシームレスに動作します。:doc:`../pymupdf-pro` がインストールされている場合、期待通りに **Office** " +"ドキュメントを操作できます。" + +#: ../../pymupdf4llm/index.rst:117 e192eeb9c87d4c5c9bdfce1a07c20cee +msgid "" +"As you can see |PyMuPDF Pro| functionality will be available within the " +"|PyMuPDF4LLM| context!" +msgstr "ご覧のとおり、|PyMuPDF Pro| の機能は |PyMuPDF4LLM| のコンテキスト内で利用可能になります!" + +#: ../../pymupdf4llm/index.rst:122 d36b718aca944afe8346bd4e994359c6 +msgid "API" +msgstr "" + +#: ../../pymupdf4llm/index.rst:124 909e996df58542a2a27e35eb6cd018fd +msgid "See :ref:`the PyMuPDF4LLM API `." +msgstr ":ref:`PyMuPDF4LLM API ` をご覧ください。" + +#: ../../pymupdf4llm/index.rst:127 cd8ef0dca8034281a46603d2d4eb7fab +msgid "Further Resources" +msgstr "追加リソース" + +#: ../../pymupdf4llm/index.rst:131 b22cbb362d6d40f5b0948407ab7c9181 +msgid "Sample code" +msgstr "サンプルコード" + +#: ../../pymupdf4llm/index.rst:133 d7cfdfa3054c49a1bd12d190550076a0 +#, fuzzy +msgid "" +"`Command line RAG Chatbot with PyMuPDF " +"`_" +msgstr "" +"`PyMuPDFを使用したコマンドラインRAGチャットボット `_" + +#: ../../pymupdf4llm/index.rst:134 4e68994fede248e1ad26e816a6603f09 +#, fuzzy +msgid "" +"`Example of a Browser Application using Langchain and PyMuPDF " +"`_" +msgstr "" +"`LangchainとPyMuPDFを使用したブラウザアプリケーションの例 " +"`_" + +#: ../../pymupdf4llm/index.rst:138 70432248c1a84d729f6836dbf522d9e6 +msgid "Blogs" +msgstr "ブログ" + +#: ../../pymupdf4llm/index.rst:140 72e25633aa344939ae72cb881f754bfe +msgid "" +"`RAG/LLM and PDF: Enhanced Text Extraction `_" +msgstr "" +"`RAG/LLMとPDF: テキスト抽出の強化 `_" + +#: ../../pymupdf4llm/index.rst:141 2f216379d0ed46a682a7b58f7f31ac5d +msgid "" +"`Creating a RAG Chatbot with ChatGPT and PyMuPDF " +"`_" +msgstr "" +"`ChatGPTとPyMuPDFを使用したRAGチャットボットの作成 `_" + +#: ../../pymupdf4llm/index.rst:142 3ce05b35ac46423ebeaeee45e9fedebe +msgid "" +"`Building a RAG Chatbot GUI with the ChatGPT API and PyMuPDF " +"`_" +msgstr "" +"`ChatGPT APIとPyMuPDFを使用してRAGチャットボットGUIを構築 `_" + +#: ../../pymupdf4llm/index.rst:143 a9e4304031124d608de6572eba56224f +msgid "" +"`RAG/LLM and PDF: Conversion to Markdown Text with PyMuPDF " +"`_" +msgstr "" +"`RAG/LLMとPDF:PyMuPDFを使用したMarkdownテキストへの変換 `_" + +#: ../../footer.rst:60 d3e889c035a6497bbfee92a94a12fe1c +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/pyodide.mo b/docs/locales/ja/LC_MESSAGES/pyodide.mo new file mode 100644 index 000000000..c3c2bdeca Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/pyodide.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/pyodide.po b/docs/locales/ja/LC_MESSAGES/pyodide.po new file mode 100644 index 000000000..c32de47d8 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/pyodide.po @@ -0,0 +1,182 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 14998f025d1148cda63f25bfef057aa2 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 5c54bd9a0f054a82914a040bee536ad6 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF(およびその他の)ドキュメントのデータ抽出、解析、変換、および操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 ce438b15da5b4a318f4e36069383eb06 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDF テキスト抽出、PDF 画像抽出、PDF 変換、PDF テーブル、PDF 分割、PDF 作成、Pyodide、PyScript" + +#: ../../pyodide.rst:4 92e84ecde2114f28830f75ff139cc150 +msgid "Pyodide" +msgstr "" + +#: ../../pyodide.rst:8 3c7cff0a2616443fbc6d49b71a7d5ef9 +msgid "Overview" +msgstr "概要" + +#: ../../pyodide.rst:11 e962a3c031f2439688768b2b5d926e9a +msgid "" +"`Pyodide `_ is a client-side Python implementation " +"that runs in a web browser." +msgstr "`Pyodide `_ は、ウェブブラウザで実行されるクライアントサイドの Python 実装です" + +#: ../../pyodide.rst:14 e26bc674818d456fb786a4cc5b979023 +msgid "The Pyodide build of PyMuPDF is currently experimental." +msgstr "PyMuPDF の Pyodide ビルドは現在実験的な段階です。" + +#: ../../pyodide.rst:18 d8c36794d9a943e19a0cd695b98e4172 +msgid "Building a PyMuPDF wheel for Pyodide" +msgstr "Pyodide 用の PyMuPDF ホイールの構築" + +#: ../../pyodide.rst:20 1dcd6f33eb094fc284df0d904ce1cefa +msgid "" +"A PyMuPDF wheel for Pyodide can be built by running " +"`scripts/gh_release.py` with some environmental variable settings. This " +"is regularly tested on Github by `.github/workflows/test_pyodide.yml`." +msgstr "" +"Pyodide 用の PyMuPDF ホイールは、いくつかの環境変数設定を使用して `scripts/gh_release.py` " +"を実行することで構築できます。これは、`.github/workflows/test_pyodide.yml` によって定期的に GitHub " +"上でテストされています。" + +#: ../../pyodide.rst:24 04f1fabb002143fb9a715d8db14af45a +msgid "" +"Here is an example of this, a single Linux command (to be run with the " +"current directory set to a PyMuPDF checkout), that builds a Pyodide " +"wheel::" +msgstr "以下は、これの例です。現在のディレクトリが PyMuPDF のチェックアウトに設定されている場合に実行される単一の Linux コマンドです。" + +#: ../../pyodide.rst:33 eef89f5827ec4b709ca1d2e0ac5c097f +msgid "This does the following (all inside Python venv's):" +msgstr "これは以下の作業を行います(すべて Python venv 内で実行):" + +#: ../../pyodide.rst:35 12858012b88247c7bb3c683954628295 +msgid "" +"Download (git clone and pip install) and customise a Pyodide build " +"environment." +msgstr "Pyodide ビルド環境をダウンロード(git clone および pip install)し、カスタマイズします" + +#: ../../pyodide.rst:36 3e87e7f44f7545bbbb191af42283fe54 +msgid "Download (git clone) the latest MuPDF." +msgstr "最新の MuPDF をダウンロード(git clone)します。" + +#: ../../pyodide.rst:37 160cb2ec1c1c4a209a7fba7562c0d145 +msgid "Build MuPDF and PyMuPDF in the Pyodide build environment." +msgstr "MuPDF と PyMuPDF を Pyodide ビルド環境でビルドします。" + +#: ../../pyodide.rst:38 ccad3592e88243efab0d160923ed215a +msgid "Create a wheel in `dist/`." +msgstr "`dist/` にホイールを作成します" + +#: ../../pyodide.rst:40 5a3384a91a534d249a926eb839230cea +msgid "" +"For more information, see the comments for functions " +"`build_pyodide_wheel()` and `pyodide_setup()` in `scripts/gh_release.py`." +msgstr "" +"詳細については、`scripts/gh_release.py` 内の関数 `build_pyodide_wheel()` と " +"`pyodide_setup()` のコメントを参照してください。" + +#: ../../pyodide.rst:45 b46c76365b9046c195143674ad6f9337 +msgid "Using a Pyodide wheel" +msgstr "Pyodide ホイールの使用方法" + +#: ../../pyodide.rst:48 6a5b9ebd00244e88809b7cddb3efb929 +msgid "" +"Upload the wheel (for example " +"`PyMuPDF/dist/PyMuPDF-1.24.2-cp311-cp311-emscripten_3_1_32_wasm32.whl`) " +"to a webserver which has been configured to allow Cross-origin resource " +"sharing (https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)." +msgstr "" +"`PyMuPDF/dist/PyMuPDF-1.24.2-cp311-cp311-emscripten_3_1_32_wasm32.whl` " +"のようなホイールを、Cross-origin resource sharing(https://en.wikipedia.org/wiki" +"/Cross-origin_resource_sharing)を許可するように構成されたウェブサーバにアップロードします。" + +#: ../../pyodide.rst:54 12bd07e412b5464e9cd32cbc111e2a26 +msgid "" +"The wheel can be used in a Pyodide console running in a web browser, or a" +" JupyterLite notebook running in a web browser." +msgstr "このホイールは、ウェブブラウザで実行されている Pyodide コンソールまたは JupyterLite ノートブックで使用できます。" + +#: ../../pyodide.rst:57 fe96c467a1374441b725c5ed97d79a57 +msgid "To create a Pyodide console, go to:" +msgstr "Pyodide コンソールを作成するには、次のURLにアクセスしてください:" + +#: ../../pyodide.rst:59 910ab71af3894948a37af8412a6954fb +msgid "https://pyodide.org/en/stable/console.html" +msgstr "" + +#: ../../pyodide.rst:61 6433c259686c469bac6c3f3a824351f2 +msgid "To create a JupyterLite notebook, go to:" +msgstr "JupyterLite ノートブックを作成するには、次のURLにアクセスしてください:" + +#: ../../pyodide.rst:63 4903ffff457f434691fffa07bd2f73a5 +msgid "https://jupyterlite.readthedocs.io/en/latest/_static/lab/index.html" +msgstr "" + +#: ../../pyodide.rst:66 09c760f53c434ee6b3ce75b594a85cad +msgid "" +"In both these cases, one can use the following code to download the wheel" +" (replace `url` with the URL of the uploaded wheel) and import it::" +msgstr "れらの場合、以下のコードを使用してホイールをダウンロード(`url` をアップロードされたホイールの URL に置き換える)し、インポートできます:" + +#: ../../pyodide.rst:74 dbd55ab15c224387b85ff3b29f3fd2c7 +msgid "" +"Note that `micropip.install()` does not work, because of PyMuPDF's use of" +" shared libraries." +msgstr "`micropip.install()` は PyMuPDF が共有ライブラリを使用しているため機能しないことに注意してください。" + +#: ../../pyodide.rst:79 d9fce175d654431a9a455df1cddc005f +msgid "Loading a PDF document from a URL into PyMuPDF" +msgstr "PyMuPDF にURLからPDFドキュメントを読み込む" + +#: ../../pyodide.rst:82 5e286969bc7e464ab645cff0b5e85861 +msgid "" +"Pyodide browser console does not have generic network access, so for " +"example `urllib.request.urlopen(url)` fails. But Pyodide has a built-in " +"`pyodide.http` module that uses javascript internally, which one can use " +"to download into a `bytes` instance, which can be used to create a " +"PyMuPDF `Document` instance::" +msgstr "" +"Pyodide ブラウザコンソールには一般的なネットワークアクセスがないため、例えば `urllib.request.urlopen(url)` " +"は失敗します。しかし、Pyodide には JavaScript を内部で使用する組み込みの `pyodide.http` " +"モジュールがあります。これを使用して `bytes` インスタンスにダウンロードし、それを使用して PyMuPDF `Document` " +"インスタンスを作成できます:" + +#: ../../pyodide.rst:93 9022520dcaa9430fb6e28b813ef995c2 +msgid "It looks like this only works with `https://`, not `http://`." +msgstr "これは `https://` でのみ機能するようです。`http://` では機能しません。" + +#: ../../footer.rst:60 84f62677a793440aa7ea160b7c3c1a2f +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは、|version| までのすべてのバージョンをカバーしています" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/quad.mo b/docs/locales/ja/LC_MESSAGES/quad.mo new file mode 100644 index 000000000..5b47ba903 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/quad.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/quad.po b/docs/locales/ja/LC_MESSAGES/quad.po new file mode 100644 index 000000000..e4e4ec9bc --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/quad.po @@ -0,0 +1,394 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 3ad4b4dd8a9d444dbdb71d2f893d147d +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 8e281561019a467090b604cbf8c25baa +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 7a22fe4efbda4a6e9221d09c9a1e31f5 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../quad.rst:7 35e2527763ab458fa5ba06887a186967 +msgid "Quad" +msgstr "Quad (クアッド)" + +#: ../../quad.rst:9 7800e12444f748a6831e1efc77128fa3 +msgid "" +"Represents a four-sided mathematical shape (also called \"quadrilateral\"" +" or \"tetragon\") in the plane, defined as a sequence of four " +":ref:`Point` objects ul, ur, ll, lr (conveniently called upper left, " +"upper right, lower left, lower right)." +msgstr "" +"Quad(四角形または四辺形とも呼ばれる)は、平面上の数学的な四角形を表すもので、ul(上部左側)、ur(上部右側)、ll(下部左側)、lr(下部右側)の4つの" +" :ref:`Point` オブジェクトのシーケンスとして定義されます。" + +#: ../../quad.rst:11 4d17c9c46c584b239de386721cba1810 +msgid "" +"Quads can **be obtained** as results of text search methods " +"(:meth:`Page.search_for`), and they **are used** to define text marker " +"annotations (see e.g. :meth:`Page.add_squiggly_annot` and friends), and " +"in several draw methods (like :meth:`Page.draw_quad` / " +":meth:`Shape.draw_quad`, :meth:`Page.draw_oval`/ " +":meth:`Shape.draw_quad`)." +msgstr "" +"Quadはテキスト検索メソッド( :meth:`Page.search_for` など)の結果として **取得でき** " +"、テキストマーカーアノテーション :meth:`Page.add_squiggly_annot` " +"などを参照)の定義や、:meth:`Page.draw_oval` / :meth:`Shape.draw_quad` " +"などのいくつかの描画メソッドで使用されます。" + +#: ../../quad.rst:15 7b24b5db8f874de9a5211dce20991a59 +msgid "" +"If the corners of a rectangle are transformed with a **rotation**, " +"**scale** or **translation** :ref:`Matrix`, then the resulting quad is " +"**rectangular** (= congruent to a rectangle), i.e. all of its corners " +"again enclose angles of 90 degrees. Property :attr:`Quad.is_rectangular` " +"checks whether a quad can be thought of being the result of such an " +"operation." +msgstr "" +"四角形の角が **回転**、**スケール**、または **移動** :ref:`Matrix` で変換される場合、結果の四角形は **長方形** " +"です(長方形と合同の意味)、つまりそのすべての角は再び90度の角度を囲んでいます。:attr:`Quad.is_rectangular` " +"プロパティは、四角形がこのような操作の結果であるかどうかを確認します。" + +#: ../../quad.rst:17 688788bd21fd45f692790d0473acb161 +msgid "" +"This is not true for all matrices: e.g. shear matrices produce " +"parallelograms, and non-invertible matrices deliver \"degenerate\" " +"tetragons like triangles or lines." +msgstr "これはすべての行列に対して当てはまるわけではありません。例えば、シア行列は平行四辺形を生成し、非可逆行列は三角形や線のような「退化」した四角形を提供します。" + +#: ../../quad.rst:19 cf1183ff01fc48bf97106c5a287a5ff7 +msgid "" +"Attribute :attr:`Quad.rect` obtains the enveloping rectangle. Vice versa," +" rectangles now have attributes :attr:`Rect.quad`, resp. " +":attr:`IRect.quad` to obtain their respective tetragon versions." +msgstr "" +"属性 :attr:`Quad.rect` は包括的な長方形を取得します。その逆も成り立ち、長方形には " +":attr:`Rect.quad`、:attr:`IRect.quad` 属性が含まれ、それぞれそれらの四辺形バージョンを取得できます。" + +#: ../../quad.rst:23 50d7d842d9734fa599c188cc646d7b45 +msgid "**Methods / Attributes**" +msgstr "**メソッド / 属性**" + +#: ../../quad.rst:23 19f74f68bb13495c8ebf6c8b0b42f965 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../quad.rst:25 e538183055244dc08b0b15c4b8912add +msgid ":meth:`Quad.transform`" +msgstr "" + +#: ../../quad.rst:25 9f2dd03a77524dfd9b216fbb23b9cda3 +msgid "transform with a matrix" +msgstr "行列を使用して変換します" + +#: ../../quad.rst:26 6235ae5796024a74a6be5aa7991d9d4e +msgid ":meth:`Quad.morph`" +msgstr "" + +#: ../../quad.rst:26 ee610799d4d446bc8e8b0e6c5bbbb1d6 +msgid "transform with a point and matrix" +msgstr "ポイントと行列を使用して変換します" + +#: ../../quad.rst:27 046b52453a10469e8acd23a2744d436c +msgid ":attr:`Quad.ul`" +msgstr "" + +#: ../../quad.rst:27 6a03caf34312460985dc2fa98abdf184 +msgid "upper left point" +msgstr "左上のポイント" + +#: ../../quad.rst:28 4ccf2ab7cc924c399255bcdbd26f5233 +msgid ":attr:`Quad.ur`" +msgstr "" + +#: ../../quad.rst:28 be0476ca60a84fc08b7232ed236c4a76 +msgid "upper right point" +msgstr "右上のポイント" + +#: ../../quad.rst:29 a9835aa97d1b4882af33c92e9119563a +msgid ":attr:`Quad.ll`" +msgstr "" + +#: ../../quad.rst:29 b7fffd7413fc4d0cafb1f629cf052760 +msgid "lower left point" +msgstr "左下のポイント" + +#: ../../quad.rst:30 c28f7725993c42398e1efe0361e4cd0c +msgid ":attr:`Quad.lr`" +msgstr "" + +#: ../../quad.rst:30 928da6d5fb114af49b9f822994eeca25 +msgid "lower right point" +msgstr "右下のポイント" + +#: ../../quad.rst:31 b6205ef513fc4875b98f196a42177c78 +msgid ":attr:`Quad.is_convex`" +msgstr "" + +#: ../../quad.rst:31 5b826a02006c4d41b209de06b14a5189 +msgid "true if quad is a convex set" +msgstr "四角形が凸集合である場合は true" + +#: ../../quad.rst:32 51d60aeda5e54bcbaf2e39b528354d95 +msgid ":attr:`Quad.is_empty`" +msgstr "" + +#: ../../quad.rst:32 352c6bcfc0894aaeb7400e72b81e1e94 +msgid "true if quad is an empty set" +msgstr "四角形が空の集合である場合は true" + +#: ../../quad.rst:33 36eca873465943169ce2e899cbb1a5e6 +msgid ":attr:`Quad.is_rectangular`" +msgstr "" + +#: ../../quad.rst:33 bc110a7f43304b5f9cf4b9e5f348d237 +msgid "true if quad is congruent to a rectangle" +msgstr "四角形が長方形と合同である場合は true" + +#: ../../quad.rst:34 42b941820433460aa6e4f0939cbd93a7 +msgid ":attr:`Quad.rect`" +msgstr "" + +#: ../../quad.rst:34 6870fe5a0464469ab8425e8f89f7a530 +msgid "smallest containing :ref:`Rect`" +msgstr "最も小さい包含 :ref:`Rect`" + +#: ../../quad.rst:35 79aa77876e4d45a289d673fe00a95995 +msgid ":attr:`Quad.width`" +msgstr "" + +#: ../../quad.rst:35 1082a3b6b72b40888b3797974c6f1191 +msgid "the longest width value" +msgstr "最も長い幅の値" + +#: ../../quad.rst:36 7a4558a4427841528a563259add0a04b +msgid ":attr:`Quad.height`" +msgstr "" + +#: ../../quad.rst:36 a3081f95dafb42188a9b34f2e0664367 +msgid "the longest height value" +msgstr "最も長い高さの値" + +#: ../../quad.rst:39 b2cf4d9a187b48ff8960c68e29a2f5ce +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../quad.rst:51 5500a6189d9147a996038ae4dccde288 +msgid "" +"Overloaded constructors: \"ul\", \"ur\", \"ll\", \"lr\" stand for " +":data:`point_like` objects (the four corners), \"sequence\" is a Python " +"sequence with four :data:`point_like` objects." +msgstr "" +"オーバーロードされたコンストラクター: \"ul\"、\"ur\"、\"ll\"、\"lr\"は :data:`point_like` " +"オブジェクト(4つの角)を表し、\"sequence\" は4つの :data:`point_like` " +"オブジェクトを持つPythonのシーケンスです。" + +#: ../../quad.rst:53 93948e27f11149d3b2de812391df3cbe +msgid "If \"quad\" is specified, the constructor creates a **new copy** of it." +msgstr "\"quad\" が指定されている場合、コンストラクターはそれの **新しいコピー** を作成します。" + +#: ../../quad.rst:55 f2671c8b3169475cb8037f714c6b1f02 +msgid "" +"Without parameters, a quad consisting of 4 copies of *Point(0, 0)* is " +"created." +msgstr "パラメーターが指定されていない場合、4つの *Point(0, 0)* のコピーからなる四角形が作成されます。" + +#: ../../quad.rst:60 111c1f817d0f46f4a18dcfd6c663c18a +msgid "" +"Modify the quadrilateral by transforming each of its corners with a " +"matrix." +msgstr "四角形の各角を行列で変換して四角形を変更します。" + +#: ../../quad.rst 8266d43d9f56420bb2aa23d082c0b121 +#: 8b1eac95560f446b8e20b55dd12a6816 +msgid "Parameters" +msgstr "" + +#: ../../quad.rst:62 ../../quad.rst:69 0d634d4d35d54c30baabad227f22028f +#: 9a047b3f95b14bb7b64ad40042d33edf +msgid "the matrix." +msgstr "行列。" + +#: ../../quad.rst:66 3267067416df4dd8b8c8377a0d0e60c2 +msgid "" +"*(New in version 1.17.0)* \"Morph\" the quad with a matrix-like using a " +"point-like as fixed point." +msgstr "*(バージョン1.17.0で新たに導入)* マトリクスのようなものを使用して四角形を\"変形\"させ、固定ポイントとしてポイントライクを使用します。" + +#: ../../quad.rst:68 d59f47deb52f43bbaac3720001127772 +msgid "the point." +msgstr "ポイント。" + +#: ../../quad.rst aa094e5057b24cf88c8f90ad40d9a6cb +msgid "Returns" +msgstr "" + +#: ../../quad.rst:70 c0beb373c7644f0aa60225c4310d6f7e +msgid "a new quad (no operation if this is the infinite quad)." +msgstr "新しい四角形(これが無限の四角形の場合、操作は行われません)。" + +#: ../../quad.rst:75 dddc8689476548da8e50846d44537f2d +msgid "" +"The smallest rectangle containing the quad, represented by the blue area " +"in the following picture." +msgstr "四角形を含む最小の長方形で、以下の図の青い領域で表されます。" + +#: ../../quad.rst 0d91757c4c9c4ded9d29ce0f2a2c7b6e +#: 25ea7b95ca1c470bbd0c8c43d4724eb8 3608dfc13924419a90f0f72f4942d4bb +#: 46bbb10a82e94600b8fc797424ddd669 47c64c1542614109b19d7cfe7ff52b48 +#: 8c84322b6cca4f0aaeb2da2b9c139ad2 9c0e2fe975e94117ad5671c3ae223f92 +#: d3ab357c8c344e4fbbffbbf6e592d8f7 d6db0b6368ce47b28d082a226b28263e +#: ea8761efdea544869198ae33343519b9 +msgid "type" +msgstr "" + +#: ../../quad.rst:79 a447749ed60c435c945ea6d5ac0e1531 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../quad.rst:83 44f1f1d4404e42f0a16619c78d94e880 +msgid "Upper left point." +msgstr "左上のポイント" + +#: ../../quad.rst:85 ../../quad.rst:91 ../../quad.rst:97 ../../quad.rst:103 +#: 03736f05a8f5421189f98f0b2563c031 a6060101e92f44bf89658804e52b0c6c +#: cd76cb6712a84b878c58e674036c6b57 e745c97cd50f406d99f625378bc07267 +msgid ":ref:`Point`" +msgstr "" + +#: ../../quad.rst:89 a15d9aa488ac46398e1300d7a621789d +msgid "Upper right point." +msgstr "右上のポイント" + +#: ../../quad.rst:95 e58949b5a6124ea7b84c82246b331fa5 +msgid "Lower left point." +msgstr "左下のポイント" + +#: ../../quad.rst:101 459751b609574eee8992e1f4ed63a8be +msgid "Lower right point." +msgstr "右下のポイント" + +#: ../../quad.rst:107 42081dff926d4b2dbfc85eebd7c6b91c +msgid "New in version 1.16.1" +msgstr "新しいバージョン1.16.1で追加されました" + +#: ../../quad.rst:109 18727351228e45f18f66be1e6b1a0431 +msgid "" +"Checks if for any two points of the quad, all points on their connecting " +"line also belong to the quad." +msgstr "このクアッドの任意の2点について、それらを結ぶ直線上のすべての点もこのクアッドに属するかどうかをチェックします。" + +#: ../../quad.rst:114 ../../quad.rst:120 ../../quad.rst:126 +#: 269c1e4494824e2cb5936c9aeaae79e7 c41948d6f881462aac8ced961485b1ec +#: dea8b14143c94257a8c383e3f0c1f9a2 +msgid "bool" +msgstr "" + +#: ../../quad.rst:118 fb252aa17185476b93956d6738fff2e7 +msgid "" +"True if enclosed area is zero, which means that at least three of the " +"four corners are on the same line. If this is false, the quad may still " +"be degenerate or not look like a tetragon at all (triangles, " +"parallelograms, trapezoids, ...)." +msgstr "" +"Enclosed " +"areaがゼロの場合はTrueで、これは4つの角のうち少なくとも3つが同じ直線上にあることを意味します。これがfalseの場合、クアッドは依然として退化しているか、全くテトラゴンのようには見えないかもしれません(三角形、平行四辺形、台形など)。" + +#: ../../quad.rst:124 df1dab125bdf4e06b610448db4325b06 +msgid "" +"True if all corner angles are 90 degrees. This implies that the quad is " +"**convex and not empty**." +msgstr "4つの角のすべての角度が90度の場合はTrueです。これは、クアッドが **凸面であり、空ではない** ことを意味します。" + +#: ../../quad.rst:130 4f39bb3710a4402da4e1c2a2d8e7f5f4 +msgid "The maximum length of the top and the bottom side." +msgstr "上辺と下辺の最大の長さ。" + +#: ../../quad.rst:132 ../../quad.rst:138 dbf8525cf293442a8ac2170ef5d5830b +#: dc2fb941fd18465297cb72af2629fce0 +msgid "float" +msgstr "" + +#: ../../quad.rst:136 637d9c47fc5448c6b6b12ed59410f8cf +msgid "The maximum length of the left and the right side." +msgstr "上辺と下辺の最大の長さ。" + +#: ../../quad.rst:141 deee9fb98ab34d4494ac466fabaea29d +msgid "Remark" +msgstr "リマーク" + +#: ../../quad.rst:142 1b61ac0c866a4df28afb4a722645cf08 +msgid "" +"This class adheres to the sequence protocol, so components can be dealt " +"with via their indices, too. Also refer to :ref:`SequenceTypes`." +msgstr "" +"このクラスはシーケンスプロトコルに従っており、要素にはインデックスを使用できます。また、:ref:`SequenceTypes` " +"を参照してください。" + +#: ../../quad.rst:145 6215145a706f4cd4a73d3ab4595807a9 +msgid "Algebra and Containment Checks" +msgstr "代数と包含チェック" + +#: ../../quad.rst:146 fb0679a33ccb4fdb98427f9407938675 +msgid "" +"Starting with v1.19.6, quads can be used in algebraic expressions like " +"the other geometry object -- the respective restrictions have been " +"lifted. In particular, all the following combinations of containment " +"checking are now possible:" +msgstr "" +"v1.19.6から、クアッドは他のジオメトリオブジェクトと同様に代数的な式で使用できるようになりました - " +"対応する制限が解除されました。特に、次の包含チェックの組み合わせがすべて可能になりました。" + +#: ../../quad.rst:148 0e4401adcb5f4fc485e655c876a8ae28 +msgid "`{Point | IRect | Rect | Quad} in {IRect | Rect | Quad}`" +msgstr "" + +#: ../../quad.rst:150 7f8244052cb6402a939f0594f0b6a759 +msgid "Please note the following interesting detail:" +msgstr "次の興味深い詳細に注意してください:" + +#: ../../quad.rst:152 9671396ad46747c2ae802633663007ee +msgid "" +"For a rectangle, only its top-left point belongs to it. Since v1.19.0, " +"rectangles are defined to be \"open\", such that its bottom and its right" +" edge do not belong to it -- including the respective corners. But for " +"quads there exists no such notion like \"openness\", so we have the " +"following somewhat surprising implication:" +msgstr "" +"長方形の場合、その上辺のみがそれに属しています。v1.19.0以降、長方形は「オープン」であると定義されており、その底辺と右端はそれに属していません" +" - それには対応する角も含まれます。しかし、クアッドには「オープン」のような概念が存在しないため、次のようなやや驚くべき含意があります:" + +#: ../../footer.rst:60 cc3f6c4d45844fe8ad36b35c6e3bd3ab +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/rag.mo b/docs/locales/ja/LC_MESSAGES/rag.mo new file mode 100644 index 000000000..150a7d365 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/rag.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/rag.po b/docs/locales/ja/LC_MESSAGES/rag.po new file mode 100644 index 000000000..b381e8770 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/rag.po @@ -0,0 +1,232 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 d58a444fc92843a18660ede50d62d095 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 6e2254ee60a14d48a1348f2b46d484d2 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDFは、PDF(およびその他)ドキュメントのデータ抽出、解析、変換、および操作のための高性能なPythonライブラリです。" + +#: ../../header.rst:-1 bc86116b658f4d3fb0e20919e6179a1e +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDF画像抽出、PDF変換、PDFテーブル、PDF分割、PDF作成、Pyodide、PyScript" + +#: ../../rag.rst:6 1e69d82c08b049a6b539ae49ee8df24e +msgid "PyMuPDF, LLM & RAG" +msgstr "PyMuPDF、LLM、およびRAG" + +#: ../../rag.rst:9 056a5c68abb744c7863c87748f5cc0b1 +msgid "" +"Integrating |PyMuPDF| into your :title:`Large Language Model (LLM)` " +"framework and overall :title:`RAG (Retrieval-Augmented Generation`) " +"solution provides the fastest and most reliable way to deliver document " +"data." +msgstr "" +"|PyMuPDF| を :title:`大規模言語モデル(LLM)` フレームワークおよび全体的な :title:`RAG(Retrieval-" +"Augmented Generation)` ソリューションに統合することで、文書データを提供する最も高速かつ信頼性の高い方法が提供されます。" + +#: ../../rag.rst:11 fee93922167c4b2d9d9054f8565193f4 +msgid "" +"There are a few well known :title:`LLM` solutions which have their own " +"interfaces with |PyMuPDF| - it is a fast growing area, so please let us " +"know if you discover any more!" +msgstr "" +"いくつかのよく知られた :title:`LLM` ソリューションは、|PyMuPDF| " +"と独自のインターフェースを持っています。この分野は急速に成長しているため、もっと見つけた場合はお知らせください。" + +#: ../../rag.rst:13 87158dfd9b9a4174a93d56546e871f72 +msgid "" +"If you need to export to :title:`Markdown` or obtain a " +":title:`LlamaIndex` Document from a file:" +msgstr ":title:`Markdown` へのエクスポートやファイルから :title:`LlamaIndex` ドキュメントを取得する必要がある場合は:" + +#: ../../rag.rst:31 51971a71e3ca420db71fb2080da653ad +msgid "Integration with :title:`LangChain`" +msgstr ":title:`LangChain` との統合" + +#: ../../rag.rst:33 9e2065dc5185491abcf869e4a276c5be +msgid "" +"It is simple to integrate directly with :title:`LangChain` by using their" +" dedicated loader as follows:" +msgstr ":title:`LangChain` の専用ローダーを使用して直接統合するのは簡単です。以下のようにします:" + +#: ../../rag.rst:43 236dcfb83a264f6c83e921a7905a29bc +msgid "" +"See `LangChain Using PyMuPDF " +"`_ for full details." +msgstr "" +"`LangChainを完全に利用する詳細については、PyMuPDF " +"`_ を参照してください。" + +#: ../../rag.rst:47 f9ce52737bda4fc5b4075b6ad14b9e2e +msgid "Integration with :title:`LlamaIndex`" +msgstr ":title:`LlamaIndex` との統合" + +#: ../../rag.rst:50 1b7d2270a3c74a44b70efbcee8b0d37f +msgid "" +"Use the dedicated `PyMuPDFReader` from :title:`LlamaIndex` 🦙 to manage " +"your document loading." +msgstr ":title:`LlamaIndex` 🦙 の専用 `PyMuPDFReader` を使用して、文書の読み込みを管理します。" + +#: ../../rag.rst:58 40923d58ee41475bb0ee10e7014a578a +msgid "" +"See `Building RAG from Scratch " +"`_" +" for more." +msgstr "" +"詳細は、 `ゼロからRAGを構築する " +"`_" +" をご覧ください。" + +#: ../../rag.rst:62 3055cf26690d4e289489b37eaf7b5333 +msgid "Preparing Data for Chunking" +msgstr "データのチャンキングの準備" + +#: ../../rag.rst:64 e37920b16364455dad472a5c99218cda +msgid "" +"Chunking (or splitting) data is essential to give context to your " +":title:`LLM` data and with :title:`Markdown` output now supported by " +"|PyMuPDF| this means that `Level 3 chunking " +"`_ is " +"supported." +msgstr "" +"データのチャンキング(または分割)は、:title:`LLM` データに文脈を与えるために不可欠であり、|PyMuPDF| が " +":title:`Markdown` 出力をサポートするようになったことで、`レベル3のチャンキング " +"`_ " +"がサポートされることを意味します。" + +#: ../../rag.rst:71 9a8d957771b04869afd1441e12a280be +msgid "Outputting as :title:`Markdown`" +msgstr ":title:`Markdown` 形式で出力" + +#: ../../rag.rst:73 6076246f267449deace88b822ddabfe0 +#, fuzzy +msgid "" +"In order to export your document in :title:`Markdown` format you will " +"need a separate helper. Package :doc:`pymupdf4llm/index` is a high-level " +"wrapper of |PyMuPDF| functions which for each page outputs standard and " +"table text in an integrated Markdown-formatted string across all document" +" pages:" +msgstr "" +":title:`Markdown` 形式で文書をエクスポートするには、別途のヘルパーが必要です。パッケージ `pymupdf4llm " +"`_ " +"は、各ページに対して標準テキストとテーブルテキストを統合されたMarkdown形式の文字列で出力する、|PyMuPDF| " +"関数の高レベルなラッパーです。" + +#: ../../rag.rst:87 58590b8ebc244b9db1cc83c63adc2d1b +#, fuzzy +msgid "For further information please refer to: :doc:`pymupdf4llm/index`." +msgstr "" +"詳細については、次を参照してください: `pymupdf4llmのドキュメント " +"`_。" + +#: ../../rag.rst:91 b32a1b079e794491a5e021d26fb18453 +msgid "How to use :title:`Markdown` output" +msgstr ":title:`Markdown` 出力の使用方法" + +#: ../../rag.rst:93 8045470b5fa04886b83960e80fc71aae +msgid "" +"Once you have your data in :title:`Markdown` format you are ready to " +"chunk/split it and supply it to your :title:`LLM`, for example, if this " +"is :title:`LangChain` then do the following:" +msgstr "" +":title:`Markdown` 形式のデータが準備できたら、データをチャンク化/分割して :title:`LLM` " +"に供給する準備が整います。例えば、:title:`LangChain` の場合は、次の手順を行います。" + +#: ../../rag.rst:109 4d0c5d7c2dc04c059fbdb5ad0709c2af +msgid "" +"For more see `5 Levels of Text Splitting `_" +msgstr "" +"詳細は `「テキスト分割の5レベル」 `_" +" を参照してください。" + +#: ../../rag.rst:113 896b1dd74ff24363becbdac98144b86c +msgid "Related Blogs" +msgstr "関連ブログ" + +#: ../../rag.rst:115 a036501ebb68414192f47e189aec5f3d +msgid "" +"To find out more about |PyMuPDF|, :title:`LLM` & :title:`RAG` check out " +"our blogs for implementations & tutorials." +msgstr "" +"|PyMuPDF| 、 :title:`LLM` 、および :title:`RAG` " +"について詳しく知りたい場合は、実装やチュートリアルに関するブログをチェックしてください。" + +#: ../../rag.rst:119 a8b36bcfa4c746e4b21ffd62e4c38618 +msgid "Methodologies to Extract Text" +msgstr "テキストを抽出するための方法論" + +#: ../../rag.rst:121 f69b739e4b764e0b856de55aa7f97093 +msgid "" +"`Enhanced Text Extraction `_" +msgstr "`テキスト抽出の強化 `_ " + +#: ../../rag.rst:122 3ae24a0e46354d408bcdaf4deb16af33 +msgid "" +"`Conversion to Markdown Text with PyMuPDF `_" +msgstr "" +"`PyMuPDFを使用したMarkdownテキストへの変換 `_" + +#: ../../rag.rst:127 bbe057c5d511484c8b9bd0b656c6216c +msgid "Create a Chatbot to discuss your documents" +msgstr "文書を議論するためのチャットボットを作成する" + +#: ../../rag.rst:129 1255eb5a25064e6c8ae8ca5b8af9b0a4 +msgid "" +"`Make a simple command line Chatbot `_" +msgstr "" +"`シンプルなコマンドラインのチャットボットを作成するシンプルなコマンドラインのチャットボットを作成する `_" + +#: ../../rag.rst:130 569c7f488bc747a2801a4b6532a9c3dd +msgid "" +"`Make a Chatbot GUI `_" +msgstr "" +"`チャットボットGUIを作成する `_" + +#: ../../footer.rst:60 45a2c818f8ae4c1fa455751e51ab3e21 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは、|version| までのすべてのバージョンをカバーしています。" + +#~ msgid "If you need to export to :title:`Markdown`:" +#~ msgstr ":title:`Markdown` にエクスポートする必要がある場合:" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-annotations.mo b/docs/locales/ja/LC_MESSAGES/recipes-annotations.mo new file mode 100644 index 000000000..f2040cb9e Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-annotations.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-annotations.po b/docs/locales/ja/LC_MESSAGES/recipes-annotations.po new file mode 100644 index 000000000..a61441bca --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-annotations.po @@ -0,0 +1,156 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 d1ad502e2163461abd37b8e0afad7b09 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 9b5018bc6d0347e3ac0dca7c86ee8578 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 9215720d8e8043e38ff196c7e9cfe5f0 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-annotations.rst:7 43c8624c55e14be2aac3624caf95357e +msgid "Annotations" +msgstr "注釈" + +#: ../../recipes-annotations.rst:12 3eb67bbfa6584d6e8bfe087440a64e64 +msgid "How to Add and Modify Annotations" +msgstr "注釈の追加と変更方法" + +#: ../../recipes-annotations.rst:14 0e7efcdb24c74719b4c01251f4a59aa5 +msgid "" +"In |PyMuPDF|, new annotations can be added via :ref:`Page` methods. Once " +"an annotation exists, it can be modified to a large extent using methods " +"of the :ref:`Annot` class." +msgstr "" +"PyMuPDFでは、新しい注釈を :ref:`Page` メソッドを介して追加することができます。一度注釈が存在すると、 :ref:`Annot`" +" クラスのメソッドを使用して大幅に変更できます。" + +#: ../../recipes-annotations.rst:16 2bf001f41c6b451b9937901bb58f7e23 +msgid "" +"Annotations can **only** be inserted in |PDF| pages - other document " +"types do not support annotation insertion." +msgstr "注釈は PDF ページにのみ 挿入可能です。他のドキュメント形式では、注釈の挿入はサポートされていません。" + +#: ../../recipes-annotations.rst:18 ba516eb1a56945e5a9bac7e9fa24fed1 +msgid "" +"In contrast to many other tools, initial insert of annotations happens " +"with a minimum number of properties. We leave it to the programmer to " +"e.g. set attributes like author, creation date or subject." +msgstr "他の多くのツールとは異なり、注釈の初期挿入は最小限のプロパティで行われます。例えば、著者、作成日、サブジェクトなどの属性をプログラマーが設定することができます。" + +#: ../../recipes-annotations.rst:20 bc78178891f1440fbfa8e0aaff01e11c +msgid "" +"As an overview for these capabilities, look at the following script that " +"fills a PDF page with most of the available annotations. Look in the next" +" sections for more special situations:" +msgstr "これらの機能の概要については、次のスクリプトを参照してください。このスクリプトはPDFページに利用可能な注釈のほとんどを埋めるものです。より特殊な状況については、次のセクションをご覧ください。" + +#: ../../recipes-annotations.rst:26 c8504dce5bb44372b1c68fe8f9b98fb7 +msgid "This script should lead to the following output:" +msgstr "このスクリプトは次のような出力を生成します:" + +#: ../../recipes-annotations.rst:36 96b9bb9ba45a444c973060143fc425cf +msgid "How to Use FreeText" +msgstr "FreeTextの使用方法" + +#: ../../recipes-annotations.rst:37 e91fc84e2d79457190b40cbf621719b3 +msgid "" +"This script shows a couple of basic ways to deal with 'FreeText' " +"annotations:" +msgstr "このスクリプトは、'FreeText'注釈を扱ういくつかの方法を示しています::" + +#: ../../recipes-annotations.rst:41 ../../recipes-annotations.rst:50 +#: 2cdef3691b1341f4a6850d9d8de067bb 350586a50d5144a2b8d9db627bd71e24 +msgid "The result looks like this:" +msgstr "結果は次のようになります。" + +#: ../../recipes-annotations.rst:46 e86ede73d664430791e03da9a5d63702 +msgid "Here is an example for using rich text and call-out lines:" +msgstr "" + +#: ../../recipes-annotations.rst:63 f49b197275794adaa27d02d5ac0b32cf +msgid "How to Use Ink Annotations" +msgstr "インク注釈の使用方法" + +#: ../../recipes-annotations.rst:64 e0df9dc619c746da9593612a87cd3f0a +msgid "" +"Ink annotations are used to contain freehand scribbling. A typical " +"example may be an image of your signature consisting of first name and " +"last name. Technically an ink annotation is implemented as a **list of " +"lists of points**. Each point list is regarded as a continuous line " +"connecting the points. Different point lists represent independent line " +"segments of the annotation." +msgstr "インク注釈は、自由な手書きの落書きを含むために使用されます。典型的な例として、名前と姓からなる署名の画像が挙げられます。技術的には、インク注釈は点のリストのリストとして実装されます。各点のリストは、点を結んだ連続的な線と見なされます。異なる点のリストは、注釈の独立した線セグメントを表します。" + +#: ../../recipes-annotations.rst:66 dbdd441a25c44bd4aae5f2367cf85f3d +#, fuzzy +msgid "" +"The following script creates an ink annotation with two mathematical " +"curves (sine and cosine function graphs) as line segments:" +msgstr "以下のスクリプトは、二つの数学的な曲線(正弦関数と余弦関数のグラフ)を線セグメントとして持つインク注釈を作成します::" + +#: ../../recipes-annotations.rst:70 bb6171a96157475f875163c29bc8e76f +msgid "This is the result:" +msgstr "これが結果です:" + +#: ../../footer.rst:60 79b679f62bef466ea3444589a3f3ef6f +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Using Buttons and JavaScript" +#~ msgstr "ボタンとJavaScriptの使用" + +#~ msgid "" +#~ "Since MuPDF v1.16, 'FreeText' annotations " +#~ "no longer support bold or italic " +#~ "versions of the Times-Roman, Helvetica" +#~ " or Courier fonts." +#~ msgstr "" +#~ "MuPDF v1.16以降、'FreeText'注釈はもはやTimes-" +#~ "Roman、Helvetica、Courierフォントの太字や斜体バージョンをサポートしていません。" + +#~ msgid "" +#~ "A big **thank you** to our user" +#~ " `@kurokawaikki `_, " +#~ "who contributed the following script to" +#~ " **circumvent this restriction**." +#~ msgstr "" +#~ "この制限を回避するために、次のスクリプトを提供してくれたユーザー `@kurokawaikki " +#~ "`_ さんに心から感謝します。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.mo b/docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.mo new file mode 100644 index 000000000..df4c7069e Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.po b/docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.po new file mode 100644 index 000000000..2018b70e5 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.po @@ -0,0 +1,614 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 4668aba35041418d9985c1a35575a6b3 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 1011e86add0c4e648dea40533c7fd1a2 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 10324b8854a4413791056120093832e9 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-common-issues-and-their-solutions.rst:7 +#: 20bc28cec0154c7c9136122bd912f369 +msgid "Common Issues and their Solutions" +msgstr "一般的な問題とその解決方法" + +#: ../../recipes-common-issues-and-their-solutions.rst:10 +#: 7f1290c0169445d78461fff755cfc034 +msgid "How To Dynamically Clean Up Corrupt :title:`PDFs`" +msgstr "壊れた |PDF| を動的にクリーンアップする方法" + +#: ../../recipes-common-issues-and-their-solutions.rst:12 +#: ce6827f92d194d24abb5efce34f05be9 +msgid "" +"This shows a potential use of |PyMuPDF| with another Python PDF library " +"(the excellent pure Python package `pdfrw " +"`_ is used here as an example)." +msgstr "" +"これは、別のPython PDFライブラリ(素晴らしい純粋なPythonパッケージである `pdfrw " +"`_ " +"を例として使用しています)と組み合わせてPyMuPDFを潜在的に使用する方法を示しています。" + +#: ../../recipes-common-issues-and-their-solutions.rst:14 +#: 4b79f85178e14c0bada317304b337db0 +msgid "" +"If a clean, non-corrupt / decompressed PDF is needed, one could " +"dynamically invoke PyMuPDF to recover from many problems like so::" +msgstr "" +"クリーンで壊れていない / " +"解凍されていないPDFが必要な場合、次のようにしてPyMuPDFを動的に呼び出して多くの問題から回復することができます::" + +#: ../../recipes-common-issues-and-their-solutions.rst:50 +#: e6d9c9e53bdc4e6ea4fff95948a77ce0 +msgid "" +"With the command line utility *pdftk* (`available " +"`_ for Windows " +"only, but reported to also run under `Wine `_) a" +" similar result can be achieved, see `here " +"`_. " +"However, you must invoke it as a separate process via *subprocess.Popen*," +" using stdin and stdout as communication vehicles." +msgstr "" +"コマンドラインユーティリティ `pdftk `_ (Windowsのみで利用可能ですが、 `Wine `_ " +"下でも動作すると報告されています)を使用すると、類似の結果を得ることができます。詳細は `こちら " +"`_ " +"をご覧ください。ただし、stdinとstdoutを通信手段として使用して別のプロセスとしてsubprocess.Popenを介して呼び出す必要があります。" + +#: ../../recipes-common-issues-and-their-solutions.rst:55 +#: 3f9ab9bf78a74d9781937269cf8ef3cb +msgid "How to Convert Any Document to |PDF|" +msgstr "どの文書も |PDF| に変換する方法" + +#: ../../recipes-common-issues-and-their-solutions.rst:57 +#: ca2811011d7b40808c8a55f65188b25f +msgid "" +"Here is a script that converts any |PyMuPDF| :ref:`supported " +"document` to a |PDF|. These include XPS, EPUB, FB2," +" CBZ and image formats, including multi-page TIFF images." +msgstr "" +"以下は、任意のPyMuPDFが :ref:`サポートされている ` " +"文書をPDFに変換するスクリプトです。これにはXPS、EPUB、FB2、CBZ、および複数ページのTIFFイメージを含む画像フォーマットが含まれます。" + +#: ../../recipes-common-issues-and-their-solutions.rst:59 +#: d7e2dfdcb75b40739daf46856a819f58 +msgid "" +"It features maintaining any metadata, table of contents and links " +"contained in the source document::" +msgstr "これにはソース文書に含まれるメタデータ、目次、リンクを保持する機能が備わっています::" + +#: ../../recipes-common-issues-and-their-solutions.rst:131 +#: a41bc630ff314f64bcfa4150a08c3c84 +msgid "Changing Annotations: Unexpected Behaviour" +msgstr "注釈の変更:予期しない動作" + +#: ../../recipes-common-issues-and-their-solutions.rst:134 +#: 9b87b714c12948aa94ae2868604c760c +msgid "Problem" +msgstr "問題" + +#: ../../recipes-common-issues-and-their-solutions.rst:135 +#: 5870f9a9b1184a3cb1849e71b71e5026 +msgid "There are two scenarios:" +msgstr "2つのシナリオがあります:" + +#: ../../recipes-common-issues-and-their-solutions.rst:137 +#: b360eb295e35468b9a48ce456cc717e7 +msgid "" +"**Updating** an annotation with PyMuPDF which was created by some other " +"software." +msgstr "他のソフトウェアで作成された注釈をPyMuPDFで**更新**する。" + +#: ../../recipes-common-issues-and-their-solutions.rst:138 +#: c2f4bd673f784318906b6982c2873fdf +msgid "" +"**Creating** an annotation with PyMuPDF and later changing it with some " +"other software." +msgstr "PyMuPDFで注釈を**作成**し、後で他のソフトウェアで変更する。" + +#: ../../recipes-common-issues-and-their-solutions.rst:140 +#: 3a2c5f9ca0da4058b2119d1e5ee6432c +msgid "" +"In both cases you may experience unintended changes, like a different " +"annotation icon or text font, the fill color or line dashing have " +"disappeared, line end symbols have changed their size or even have " +"disappeared too, etc." +msgstr "どちらの場合でも、異なる注釈アイコンやテキストフォント、塗りつぶしの色や線の破線が消えたり、線の端のシンボルのサイズが変わったり、さらには消えたりするなど、意図しない変更が発生する可能性があります。" + +#: ../../recipes-common-issues-and-their-solutions.rst:143 +#: ../../recipes-common-issues-and-their-solutions.rst:182 +#: ../../recipes-common-issues-and-their-solutions.rst:195 +#: 0ec3d7b289ed40d1b1c9a6e6509cb81c 3db222ba67d34ffe8b9c76049639a28f +#: 4947608029184f4eacde97cb22e6b0a0 +msgid "Cause" +msgstr "原因" + +#: ../../recipes-common-issues-and-their-solutions.rst:144 +#: 580b99ed7b294df89844c876e4930bb6 +msgid "" +"Annotation maintenance is handled differently by each PDF maintenance " +"application. Some annotation types may not be supported, or not be " +"supported fully or some details may be handled in a different way than in" +" another application. **There is no standard.**" +msgstr "注釈の保守は、各PDF保守アプリケーションごとに異なる方法で処理されます。一部の注釈タイプはサポートされないか、完全にはサポートされていない場合もあり、また、他のアプリケーションとは異なる方法で詳細が処理される場合もあります。標準規格は存在しません。" + +#: ../../recipes-common-issues-and-their-solutions.rst:146 +#: 0d39ed4b97424a2a87eb4bec722728b0 +msgid "" +"Almost always a PDF application also comes with its own icons (file " +"attachments, sticky notes and stamps) and its own set of supported text " +"fonts. For example:" +msgstr "ほとんどの場合、PDFアプリケーションには独自のアイコン(ファイル添付、付箋、スタンプなど)とサポートされるテキストフォントのセットが付属しています。例えば:" + +#: ../../recipes-common-issues-and-their-solutions.rst:148 +#: 65134846c0a240acb342d722b107de3f +msgid "" +"(Py-) MuPDF only supports these 5 basic fonts for 'FreeText' annotations:" +" Helvetica, Times-Roman, Courier, ZapfDingbats and Symbol -- no italics /" +" no bold variations. When changing a 'FreeText' annotation created by " +"some other app, its font will probably not be recognized nor accepted and" +" be replaced by Helvetica." +msgstr "" +"(Py-) MuPDFは、'FreeText'注釈に対してこれらの5つの基本フォントのみをサポートしています:Helvetica、Times-" +"Roman、Courier、ZapfDingbats、Symbol - 斜体や太字のバリエーションはありません。他のアプリで作成された " +"'FreeText' 注釈を変更する際、そのフォントはおそらく認識されず、Helveticaに置き換えられる可能性があります。" + +#: ../../recipes-common-issues-and-their-solutions.rst:150 +#: e2cbd8bfb3d14500a0612d1d8d2c5ec6 +msgid "" +"PyMuPDF supports all PDF text markers (highlight, underline, strikeout, " +"squiggly), but these types cannot be updated with Adobe Acrobat Reader." +msgstr "" +"PyMuPDFはすべてのPDFテキストマーカー(ハイライト、下線、取り消し線、波線)をサポートしていますが、これらのタイプはAdobe " +"Acrobat Readerで更新することはできません。" + +#: ../../recipes-common-issues-and-their-solutions.rst:152 +#: 5f42aed9c005492a92b5b6aa0cbcd03e +msgid "" +"In most cases there also exists limited support for line dashing which " +"causes existing dashes to be replaced by straight lines. For example:" +msgstr "ほとんどの場合、破線には限定的なサポートが存在し、既存の破線が直線に置き換えられることがあります。例えば:" + +#: ../../recipes-common-issues-and-their-solutions.rst:154 +#: 7651dba336694614afc84cba3418f5cc +msgid "" +"PyMuPDF fully supports all line dashing forms, while other viewers only " +"accept a limited subset." +msgstr "PyMuPDFはすべての線の破線形式を完全にサポートしていますが、他のビューアは一部の形式しか受け入れません。" + +#: ../../recipes-common-issues-and-their-solutions.rst:158 +#: 01eaf095c9b6491d88ab187baa0e5739 +msgid "Solutions" +msgstr "解決策" + +#: ../../recipes-common-issues-and-their-solutions.rst:159 +#: ecb162091bfd40ba807179842faf6a91 +msgid "Unfortunately there is not much you can do in most of these cases." +msgstr "残念ながら、これらの多くの場合、あまり対処できることはありません。" + +#: ../../recipes-common-issues-and-their-solutions.rst:161 +#: f4ff16b309524593aafb3a90fa47384c +msgid "Stay with the same software for **creating and changing** an annotation." +msgstr "注釈の作成と変更に同じソフトウェアを使用してください。" + +#: ../../recipes-common-issues-and-their-solutions.rst:162 +#: ddc8bae66d8249efbc79df236e51d0d0 +msgid "" +"When using PyMuPDF to change an \"alien\" annotation, try to **avoid** " +":meth:`Annot.update`. The following methods **can be used without it,** " +"so that the original appearance should be maintained:" +msgstr "" +"\"異なる\"注釈を変更する際にPyMuPDFを使用する場合、:meth:`Annot.update` " +"を避けるようにしてください。次のメソッドは、元の外観を維持するために、:meth:`Annot.update` を使用せずに使用できます:" + +#: ../../recipes-common-issues-and-their-solutions.rst:164 +#: 57c092ce6a7d45d4952de8c67279fa4f +msgid ":meth:`Annot.set_rect` (location changes)" +msgstr ":meth:`Annot.set_rect` (位置の変更)" + +#: ../../recipes-common-issues-and-their-solutions.rst:165 +#: d313ef9ae75449729e1849ab57d05a32 +msgid ":meth:`Annot.set_flags` (annotation behaviour)" +msgstr ":meth:`Annot.set_flags` (注釈の動作)" + +#: ../../recipes-common-issues-and-their-solutions.rst:166 +#: ff9f33feb2224cd69192417071637524 +msgid ":meth:`Annot.set_info` (meta information, except changes to *content*)" +msgstr ":meth:`Annot.set_info` (メタ情報、*コンテンツ* の変更以外)" + +#: ../../recipes-common-issues-and-their-solutions.rst:167 +#: 90c72eaaebfa4a848238a0cd487b74e6 +msgid ":meth:`Annot.set_popup` (create popup or change its rect)" +msgstr ":meth:`Annot.set_popup` (ポップアップの作成または位置の変更)" + +#: ../../recipes-common-issues-and-their-solutions.rst:168 +#: fea8fa06e33040f2b13ce9a7b5306f8f +#, fuzzy +msgid "" +":meth:`Annot.set_oc` (add / remove reference to optional content " +"information)" +msgstr ":meth:`Annot.set_optional_content` (オプションコンテンツ情報への参照の追加/削除)" + +#: ../../recipes-common-issues-and-their-solutions.rst:169 +#: cee27cf68bda482690d014a3468ad911 +msgid ":meth:`Annot.set_open`" +msgstr "" + +#: ../../recipes-common-issues-and-their-solutions.rst:170 +#: 85c766ba5c7c4ec9a875af07684f5f8f +msgid ":meth:`Annot.update_file` (file attachment changes)" +msgstr ":meth:`Annot.update_file` (ファイル添付の変更)" + +#: ../../recipes-common-issues-and-their-solutions.rst:174 +#: 6e6d7441feb444b08da4f309c51f2989 +msgid "Missing or Unreadable Extracted Text" +msgstr "欠落または読み取り不可能な抽出テキスト" + +#: ../../recipes-common-issues-and-their-solutions.rst:175 +#: 01ae9eb704e14a1e8f24cf021dc91c0e +msgid "" +"Fairly often, text extraction does not work text as you would expect: " +"text may be missing, or may not appear in the reading sequence visible on" +" your screen, or contain garbled characters (like a ? or a \"TOFU\" " +"symbol), etc. This can be caused by a number of different problems." +msgstr "非常にしばしば、テキスト抽出が期待通りに機能しないことがあります。テキストが欠落しているか、画面上で表示される読み取り順序に表示されないか、文字が文字化けしている(「?」や「TOFU」シンボルなど)ことがあります。これはさまざまな問題が原因で起こる可能性があります。" + +#: ../../recipes-common-issues-and-their-solutions.rst:178 +#: e80411b38d4845d9b2714609a65db94c +msgid "Problem: no text is extracted" +msgstr "問題:テキストが抽出されない" + +#: ../../recipes-common-issues-and-their-solutions.rst:179 +#: 29d32a1dea084e1886b86272d02868c8 +msgid "" +"Your PDF viewer does display text, but you cannot select it with your " +"cursor, and text extraction delivers nothing." +msgstr "PDFビューアはテキストを表示しているが、カーソルで選択できず、テキストの抽出結果が何も表示されない。" + +#: ../../recipes-common-issues-and-their-solutions.rst:183 +#: 407916e2cc0148868141e8b6e9837feb +msgid "" +"You may be looking at an image embedded in the PDF page (e.g. a scanned " +"PDF)." +msgstr "PDFページに埋め込まれた画像を表示している可能性がある(例:スキャンされたPDF)。" + +#: ../../recipes-common-issues-and-their-solutions.rst:184 +#: 94397a359e604251a6ce3e563fe0ef48 +msgid "" +"The PDF creator used no font, but **simulated** text by painting it, " +"using little lines and curves. E.g. a capital \"D\" could be painted by a" +" line \"|\" and a left-open semi-circle, an \"o\" by an ellipse, and so " +"on." +msgstr "PDFの作成者がフォントを使用せず、テキストをペイントして小さな線と曲線を使用して模倣している場合がある。たとえば、大文字の「D」は縦棒「|」と左開きの半円で描かれ、「o」は楕円で描かれるなど。" + +#: ../../recipes-common-issues-and-their-solutions.rst:187 +#: ../../recipes-common-issues-and-their-solutions.rst:200 +#: 4fedb4cf875041c2a5e70dae89f7991f fb8a38a5ca084321aac57be8d1ebb7bb +msgid "Solution" +msgstr "解決策" + +#: ../../recipes-common-issues-and-their-solutions.rst:188 +#: 3ab64c25a2be4f59907562b0f68b24dc +msgid "" +"Use an OCR software like `OCRmyPDF `_" +" to insert a hidden text layer underneath the visible page. The resulting" +" PDF should behave as expected." +msgstr "" +"`OCRmyPDF `_ " +"のようなOCRソフトウェアを使用して、表示ページの下に非表示のテキストレイヤーを挿入します。その結果のPDFは期待どおりに動作するはずです。" + +#: ../../recipes-common-issues-and-their-solutions.rst:191 +#: a8f6e51df1dc464fbb0f7475eba7a54b +msgid "Problem: unreadable text" +msgstr "問題:読み取れないテキスト" + +#: ../../recipes-common-issues-and-their-solutions.rst:192 +#: 7e6f3f7dc9964fac985a7fb4c533b3f2 +msgid "" +"Text extraction does not deliver the text in readable order, duplicates " +"some text, or is otherwise garbled." +msgstr "テキストの抽出が読みやすい順序で行われず、一部のテキストが重複しているか、その他の理由で文字化けしている。" + +#: ../../recipes-common-issues-and-their-solutions.rst:196 +#: 060884ca10f34a9fa6daba7b2794c592 +msgid "" +"The single characters are readable as such (no \"\" symbols), but the " +"sequence in which the text is **coded in the file** deviates from the " +"reading order. The motivation behind may be technical or protection of " +"data against unwanted copies." +msgstr "個々の文字はそのまま読み取れるが(「」シンボルがない)、テキストがファイル内でコード化された順序が読み取り順序と異なる場合がある。その背後には技術的な問題やデータの不正なコピーからの保護がある可能性がある。" + +#: ../../recipes-common-issues-and-their-solutions.rst:197 +#: 99dd9930809d4551bfe5d806b32e28bc +msgid "" +"Many \"\" symbols occur, indicating MuPDF could not interpret these " +"characters. The font may indeed be unsupported by MuPDF, or the PDF " +"creator may haved used a font that displays readable text, but on purpose" +" obfuscates the originating corresponding unicode character." +msgstr "多くの「」シンボルが表示され、MuPDFがこれらの文字を解釈できないことを示している場合。フォントがMuPDFでサポートされていない可能性があるか、PDFの作成者が読み取り可能なテキストを表示するフォントを使用しているが、意図的に元の対応するUnicode文字を曖昧にしている場合があります。" + +#: ../../recipes-common-issues-and-their-solutions.rst:201 +#: 7e9a1b033ba149c29732bf6d66164707 +msgid "Use layout preserving text extraction: `python -m fitz gettext file.pdf`." +msgstr "レイアウトを保持するテキスト抽出を使用します: `python -m fitz gettext file.pdf`。" + +#: ../../recipes-common-issues-and-their-solutions.rst:202 +#: 05bbf78e0c8c447794e360a1dda9f6df +msgid "" +"If other text extraction tools also don't work, then the only solution " +"again is OCRing the page." +msgstr "他のテキスト抽出ツールも機能しない場合は、再びOCRでページを処理するのが唯一の解決策です。" + +#: ../../footer.rst:60 8010577d7dfe46e7ab6edda3adc1976a +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Misplaced Item Insertions on PDF Pages" +#~ msgstr "PDFページ上の誤ったアイテムの挿入" + +#~ msgid "" +#~ "You inserted an item (like an " +#~ "image, an annotation or some text) " +#~ "on an existing PDF page, but later" +#~ " you find it being placed at a" +#~ " different location than intended. For " +#~ "example an image should be inserted " +#~ "at the top, but it unexpectedly " +#~ "appears near the bottom of the " +#~ "page." +#~ msgstr "既存のPDFページにアイテム(画像、注釈、テキストなど)を挿入しましたが、後で意図した場所とは異なる位置に配置されていることがあります。たとえば、画像はページの上部に挿入する予定でしたが、予想外にページの下部近くに表示されています。" + +#~ msgid "" +#~ "The creator of the PDF has " +#~ "established a non-standard page geometry" +#~ " without keeping it \"local\" (as " +#~ "they should!). Most commonly, the PDF" +#~ " standard point (0,0) at *bottom-" +#~ "left* has been changed to the " +#~ "*top-left* point. So top and bottom" +#~ " are reversed -- causing your " +#~ "insertion to be misplaced." +#~ msgstr "PDFの作成者が標準ではないページジオメトリを設定しており、それを「ローカル」に保持していないためです(正しい方法で保持すべきです)。最も一般的には、PDF標準の座標(0,0)が左下にある点から、左上の点に変更されています。したがって、上下が逆転し、挿入が誤って配置されてしまいます。" + +#~ msgid "" +#~ "The visible image of a PDF page" +#~ " is controlled by commands coded in" +#~ " a special mini-language. For an " +#~ "overview of this language consult " +#~ "\"Operator Summary\" on pp. 643 of " +#~ "the :ref:`AdobeManual`. These commands are " +#~ "stored in :data:`contents` objects as " +#~ "strings (*bytes* in PyMuPDF)." +#~ msgstr "" +#~ "PDFページの可視イメージは、特別なミニ言語でコード化されたコマンドによって制御されています。この言語の概要については、 " +#~ ":ref:`AdobeManual` のpp. " +#~ "643にある「オペレーターサマリー」を参照してください。これらのコマンドは、 :data:`contents` " +#~ "オブジェクトに文字列(PyMuPDFではバイト)として格納されます。" + +#~ msgid "" +#~ "There are commands in that language, " +#~ "which change the coordinate system of" +#~ " the page for all the following " +#~ "commands. In order to limit the " +#~ "scope of such commands to \"local\", " +#~ "they must be wrapped by the " +#~ "command pair *q* (\"save graphics " +#~ "state\", or \"stack\") and *Q* " +#~ "(\"restore graphics state\", or \"unstack\")." +#~ msgstr "この言語には、次に続くすべてのコマンドの座標系を変更するコマンドがあります。このようなコマンドのスコープを「ローカル」に制限するためには、コマンドペアq(「グラフィックスステートの保存」または「スタック」)およびQ(「グラフィックスステートの復元」または「アンスタック」)で囲む必要があります。" + +#~ msgid "So the PDF creator did this::" +#~ msgstr "したがって、PDFの作成者は次のように行いました::" + +#~ msgid "where they should have done this::" +#~ msgstr "正しくは、次のように行うべきでした::" + +#~ msgid "" +#~ "In the mini-language's syntax, spaces" +#~ " and line breaks are equally accepted" +#~ " token delimiters." +#~ msgstr "ミニ言語の構文では、スペースと改行は同様にトークンの区切りとして受け入れられます。" + +#~ msgid "Multiple consecutive delimiters are treated as one." +#~ msgstr "複数の連続した区切りは、1つとして扱われます。" + +#~ msgid "" +#~ "Keywords \"stream\" and \"endstream\" are " +#~ "inserted automatically -- not by the " +#~ "programmer." +#~ msgstr "キーワード「stream」と「endstream」は自動的に挿入されます - プログラマーによって挿入されるものではありません。" + +#~ msgid "" +#~ "Since v1.16.0, there is the property " +#~ ":attr:`Page.is_wrapped`, which lets you check" +#~ " whether a page's contents are " +#~ "wrapped in that string pair." +#~ msgstr "v1.16.0以降、プロパティPage.is_wrappedがあり、ページの内容がその文字列ペアでラップされているかどうかを確認できます。" + +#~ msgid "" +#~ "If it is ``False`` or if you " +#~ "want to be on the safe side, " +#~ "pick one of the following:" +#~ msgstr "それが `False` であるか、安全側に立ちたい場合、次のいずれかを選択してください:" + +#~ msgid "" +#~ "The easiest way: in your script, " +#~ "do a :meth:`Page.clean_contents` before you" +#~ " do your first item insertion." +#~ msgstr "最も簡単な方法:スクリプトで最初のアイテム挿入を行う前に :meth:`Page.clean_contents` を実行します。" + +#~ msgid "" +#~ "Pre-process your PDF with the " +#~ "MuPDF command line utility *mutool clean" +#~ " -c ...* and work with its " +#~ "output file instead." +#~ msgstr "PDFをMuPDFコマンドラインユーティリティ `mutool clean -c …` で前処理し、その出力ファイルで作業します。" + +#~ msgid "" +#~ "Directly wrap the page's :data:`contents` " +#~ "with the stacking commands before you" +#~ " do your first item insertion." +#~ msgstr "最初のアイテム挿入を行う前に、直接ページの内容をスタッキングコマンドでラップします。" + +#~ msgid "" +#~ "**Solutions 1. and 2.** use the " +#~ "same technical basis and **do a " +#~ "lot more** than what is required " +#~ "in this context: they also clean " +#~ "up other inconsistencies or redundancies " +#~ "that may exist, multiple */Contents* " +#~ "objects will be concatenated into one," +#~ " and much more." +#~ msgstr "" +#~ "**解決策1と2** " +#~ "は同じ技術的基盤を使用しており、この文脈で必要なもの以上のことを行います:他の不整合や冗長性もクリーンアップされ、複数の " +#~ "*/Contents* オブジェクトが1つに結合されるなど、さまざまな操作が行われます。" + +#~ msgid "" +#~ "For **incremental saves,** solution 1. " +#~ "has an unpleasant implication: it will" +#~ " bloat the update delta, because it" +#~ " changes so many things and, in " +#~ "addition, stores the **cleaned contents " +#~ "uncompressed**. So, if you use " +#~ ":meth:`Page.clean_contents` you should consider " +#~ "**saving to a new file** with (at" +#~ " least) *garbage=3* and *deflate=True*." +#~ msgstr "" +#~ "増分保存の場合、解決策1には不快な影響があります:多くの変更を行い、さらにクリーンされた内容を非圧縮で保存するため、更新デルタが膨れる可能性があります。したがって、" +#~ " :meth:`Page.clean_contents` を使用する場合は、(少なくとも) " +#~ "`garbage=3` および `deflate=True` " +#~ "を指定して新しいファイルに保存することを検討する必要があります。" + +#~ msgid "" +#~ "**Solution 3.** is completely under your" +#~ " control and only does the minimum" +#~ " corrective action. There is a handy" +#~ " utility method :meth:`Page.wrap_contents` which" +#~ " -- as twe name suggests -- " +#~ "**wraps** the page's :data:`contents` " +#~ "object(s) by the PDF commands `q` " +#~ "and `Q`." +#~ msgstr "" +#~ "**解決策3** は完全にあなたの制御下にあり、最小限の修正しか行いません。便利なユーティリティメソッド " +#~ ":meth:`Page.wrap_contents` があり、名前が示すように、ページの内容 " +#~ "(:data:`contents`) オブジェクトをPDFコマンド `q` と `Q`" +#~ " でラップします。" + +#~ msgid "" +#~ "This solution is extremely fast and " +#~ "the changes to the PDF are " +#~ "minimal. This is useful in situations" +#~ " where incrementally saving the file " +#~ "is desirable -- or even a must " +#~ "when the PDF has been digitally " +#~ "signed and you cannot change this " +#~ "status." +#~ msgstr "この解決策は非常に高速で、PDFへの変更は最小限です。これは、ファイルを増分保存することが望ましい場合、またはPDFがデジタル署名されており、このステータスを変更できない場合に便利です。" + +#~ msgid "We recommend the following snippet to get the situation under control:" +#~ msgstr "次のスニペットを使用して状況をコントロールすることをお勧めします:" + +#~ msgid "How to Deal with Messages Issued by :title:`MuPDF`" +#~ msgstr ":title:`MuPDF` から発行されるメッセージの処理方法" + +#~ msgid "" +#~ "Since |PyMuPDF| v1.16.0, **error messages**" +#~ " issued by the underlying :title:`MuPDF`" +#~ " library are being redirected to the" +#~ " Python standard device *sys.stderr*. So" +#~ " you can handle them like any " +#~ "other output going to this devices." +#~ msgstr "" +#~ "PyMuPDF v1.16.0以降、基盤となるMuPDFライブラリから発行されるエラーメッセージは、Python標準デバイス" +#~ " *sys.stderr* " +#~ "にリダイレクトされます。したがって、これらのメッセージはこのデバイスに出力される他の出力と同様に扱うことができます。" + +#~ msgid "" +#~ "In addition, these messages go to " +#~ "the internal buffer together with any" +#~ " :title:`MuPDF` warnings -- see below." +#~ msgstr "さらに、これらのメッセージはMuPDFの警告とともに内部バッファに送られます - 以下を参照してください。" + +#~ msgid "" +#~ "We always prefix these messages with " +#~ "an identifying string *\"mupdf:\"*. If " +#~ "you prefer to not see recoverable " +#~ "MuPDF errors at all, issue the " +#~ "command `pymupdf.TOOLS.mupdf_display_errors(False)`." +#~ msgstr "" +#~ "これらのメッセージは常に識別用の文字列 `\"mupdf:\"` " +#~ "で始まります。復旧可能なMuPDFエラーを全く表示したくない場合は、コマンド " +#~ "`pymupdf.TOOLS.mupdf_display_errors(False)` を発行してください。" + +#~ msgid "" +#~ "MuPDF warnings continue to be stored " +#~ "in an internal buffer and can be" +#~ " viewed using :meth:`Tools.mupdf_warnings`." +#~ msgstr "MuPDFの警告は引き続き内部バッファに保存され、 :meth:`Tools.mupdf_warnings` を使用して表示できます。" + +#~ msgid "" +#~ "Please note that MuPDF errors may " +#~ "or may not lead to Python " +#~ "exceptions. In other words, you may " +#~ "see error messages from which MuPDF " +#~ "can recover and continue processing." +#~ msgstr "MuPDFのエラーがPython例外につながる場合とつながらない場合があることに注意してください。言い換えれば、MuPDFが回復して処理を続けることができるエラーメッセージが表示される可能性があります。" + +#~ msgid "" +#~ "Example output for a **recoverable " +#~ "error**. We are opening a damaged " +#~ "PDF, but MuPDF is able to repair" +#~ " it and gives us a little " +#~ "information on what happened. Then we" +#~ " illustrate how to find out whether" +#~ " the document can later be saved " +#~ "incrementally. Checking the " +#~ ":attr:`Document.is_dirty` attribute at this " +#~ "point also indicates that during " +#~ "`pymupdf.open` the document had to be" +#~ " repaired:" +#~ msgstr "" +#~ "回復可能なエラーの例出力です。損傷したPDFを開いていますが、MuPDFはそれを修復し、何が起こったかに関する少しの情報を提供しています。その後、ドキュメントが後で増分保存できるかどうかを調べる方法も示しています。この時点で" +#~ " :attr:`Document.is_dirty` 属性をチェックすることも、 `pymupdf.open`" +#~ " の際にドキュメントを修復する必要があったことを示しています::" + +#~ msgid "Example output for an **unrecoverable error**:" +#~ msgstr "**復旧できないエラー**の例出力:" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-drawing-and-graphics.mo b/docs/locales/ja/LC_MESSAGES/recipes-drawing-and-graphics.mo new file mode 100644 index 000000000..91b8e3020 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-drawing-and-graphics.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-drawing-and-graphics.po b/docs/locales/ja/LC_MESSAGES/recipes-drawing-and-graphics.po new file mode 100644 index 000000000..d3271a98c --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-drawing-and-graphics.po @@ -0,0 +1,286 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 cb49434c487144e990e8778af438e31c +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 b51ec2c5bec0471caa6c704e84d440e8 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 5049a6512fcf48e592552896861b4264 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-drawing-and-graphics.rst:7 c0b1ffca482b4acc93f866d4c279b9d4 +msgid "Drawing and Graphics" +msgstr "描画とグラフィックス" + +#: ../../recipes-drawing-and-graphics.rst:11 cead69fa4d944be19c81d6669a194971 +msgid "" +"When the terms \"Drawings\" or \"Graphics\" are mentioned here we are " +"referring to \"Vector Graphics\" or \"Line Art\"." +msgstr "ここで「Drawings」や「Graphics」という用語が言及されている場合、それは「ベクトルグラフィックス」や「線画」を指しています。" + +#: ../../recipes-drawing-and-graphics.rst:13 688ad77eb281436f8d8c2e421a35ccae +msgid "Therefore please consider these terms as being synonymous!" +msgstr "したがって、これらの用語を同義語として考えてください。" + +#: ../../recipes-drawing-and-graphics.rst:16 d4e85709e4c54144a43b7930cee0c0b5 +msgid "" +"PDF files support elementary drawing operations as part of their syntax. " +"These are **vector graphics** and include basic geometrical objects like " +"lines, curves, circles, rectangles including specifying colors." +msgstr "PDFファイルは、その構文の一部として基本的な描画操作をサポートしています。これらは**ベクトルグラフィックス**であり、線、曲線、円、長方形などの基本的な幾何学的オブジェクトを含み、色の指定も可能です。" + +#: ../../recipes-drawing-and-graphics.rst:18 9cd89e5c62e34160bf2adcbf9cb4d393 +msgid "" +"The syntax for such operations is defined in \"A Operator Summary\" on " +"page 643 of the :ref:`AdobeManual`. Specifying these operators for a PDF " +"page happens in its :data:`contents` objects." +msgstr "" +"このような操作の構文は、:ref:`AdobeManual` の「A Operator " +"Summary」のページ643で定義されています。PDFページのためのこれらのオペレータは、その内容 " +"(:data:`contents`)オブジェクト内で指定されます。" + +#: ../../recipes-drawing-and-graphics.rst:20 4a3ebf96479040d49b72aa665e43f8a3 +msgid "" +"|PyMuPDF| implements a large part of the available features via its " +":ref:`Shape` class, which is comparable to notions like \"canvas\" in " +"other packages (e.g. `reportlab `_)." +msgstr "" +"PyMuPDFは、 :ref:`Shape` クラスを介して利用可能な多くの機能を実装しており、これは他のパッケージ(例: `reportlab " +"`_ など)の「キャンバス」のような概念と類似しています。" + +#: ../../recipes-drawing-and-graphics.rst:22 1bcec4ef8d0a4c69a61128221389abfa +msgid "" +"A shape is always created as a **child of a page**, usually with an " +"instruction like `shape = page.new_shape()`. The class defines numerous " +"methods that perform drawing operations on the page's area. For example, " +"`last_point = shape.draw_rect(rect)` draws a rectangle along the borders " +"of a suitably defined `rect = pymupdf.Rect(...)`." +msgstr "" +"シェイプは常にページの子として作成され、通常は `shape = page.new_shape()` " +"のような命令で行います。このクラスは、ページの領域に描画操作を実行するための多数のメソッドを定義しています。たとえば、 `last_point =" +" shape.draw_rect(rect)` は、適切に定義された `rect = pymupdf.Rect(…)` " +"の境界に沿って四角形を描画します。" + +#: ../../recipes-drawing-and-graphics.rst:24 522b5200167c42fc91bcda96ab482884 +msgid "" +"The returned *last_point* **always** is the :ref:`Point` where drawing " +"operation ended (\"last point\"). Every such elementary drawing requires " +"a subsequent :meth:`Shape.finish` to \"close\" it, but there may be " +"multiple drawings which have one common ``finish()`` method." +msgstr "" +"返されるlast_pointは常に描画操作が終了する :ref:`Point` " +"(「最後のポイント」)です。このような基本的な描画ごとに、それを「閉じる」ために :meth:`Shape.finish` " +"が必要ですが、1つの共通の``finish()``メソッドを持つ複数の描画があるかもしれません。" + +#: ../../recipes-drawing-and-graphics.rst:26 a1a6cf576e374185b575ac29ef915ceb +msgid "" +"In fact, :meth:`Shape.finish` *defines* a group of preceding draw " +"operations to form one -- potentially rather complex -- graphics object. " +"|PyMuPDF| provides several predefined graphics in `shapes_and_symbols.py " +"`_ which demonstrate " +"how this works." +msgstr "" +"実際には、 :meth:`Shape.finish` " +"は、1つの(潜在的に非常に複雑な)グラフィックオブジェクトを形成するための前の描画操作のグループを定義します。PyMuPDFは、これがどのように機能するかを示す" +" `shapes_and_symbols.py `_ " +"内のいくつかの事前定義されたグラフィックスを提供しています。" + +#: ../../recipes-drawing-and-graphics.rst:28 6682ec89ed0243e1a7c05a40ab825358 +msgid "" +"If you import this script, you can also directly use its graphics as in " +"the following example::" +msgstr "このスクリプトをインポートすると、次の例のようにそのグラフィックスを直接使用することもできます::" + +#: ../../recipes-drawing-and-graphics.rst:86 ccbd2af5bfaa46dcbe5f2f15e412fc23 +msgid "This is the script's outcome:" +msgstr "これがスクリプトの結果です:" + +#: ../../recipes-drawing-and-graphics.rst:97 65b162adbc52489e95a0170bada6afe9 +msgid "How to Extract Drawings" +msgstr "描画の抽出方法" + +#: ../../recipes-drawing-and-graphics.rst:99 8373533f008548a89798eddeba96f9d8 +msgid "New in v1.18.0" +msgstr "v1.18.0で新登場" + +#: ../../recipes-drawing-and-graphics.rst:101 0acd4de5d8554ebaa10b1347a50cf1de +msgid "" +"Drawing commands (**vector graphics**) issued by a page can be extracted " +"as a list of dictionaries. Interestingly, this is possible for :ref:`all " +"supported document types` -- not just PDF: so you " +"can use it for XPS, EPUB and others as well." +msgstr "" +"ページから発行された描画コマンドを抽出できます。興味深いことに、これはすべての :ref:`サポートされている " +"` ドキュメントタイプに対して可能です – PDFだけでなく、XPS、EPUBなどにも使用できます。" + +#: ../../recipes-drawing-and-graphics.rst:103 7a07bc7541854578ad35f95ad5dfc330 +msgid "" +"Page method, :meth:`Page.get_drawings()` accesses draw commands and " +"converts them into a list of Python dictionaries. Each dictionary -- " +"called a \"path\" -- represents a separate drawing -- it may be simple " +"like a single line, or a complex combination of lines and curves " +"representing one of the shapes of the previous section." +msgstr "" +":ref:`Page` メソッド、:meth:`Page.get_drawings()` " +"は描画コマンドにアクセスし、それらをPythonの辞書のリストに変換します。各辞書 – 「パス」と呼ばれる – は個別の描画を表します – " +"それは単純な単一の線であるか、前のセクションの形状の1つを表す線と曲線の複雑な組み合わせであるかもしれません。" + +#: ../../recipes-drawing-and-graphics.rst:105 7ffb5ff050964de093cfe20336fa283d +msgid "" +"The *path* dictionary has been designed such that it can easily be used " +"by the :ref:`Shape` class and its methods. Here is an example for a page " +"with one path, that draws a red-bordered yellow circle inside rectangle " +"`Rect(100, 100, 200, 200)`::" +msgstr "" +"パスの辞書は、 :ref:`Shape` " +"クラスとそのメソッドで簡単に使用できるように設計されています。以下は、1つのパスを持つページの例で、そのパスは `Rect(100, 100, " +"200, 200)` の内側に赤い境界線の黄色い円を描画します::" + +#: ../../recipes-drawing-and-graphics.rst:140 c1f4f56de6e1441f988e21f4184bed57 +msgid "" +"You need (at least) 4 Bézier curves (of 3rd order) to draw a circle with " +"acceptable precision. See this `Wikipedia article " +"`_ for some background." +msgstr "" +"適切な精度で円を描画するには、(少なくとも)4つの3次ベジエ曲線が必要です。背景については、この `Wikipediaの記事 " +"`_ を参照してください。" + +#: ../../recipes-drawing-and-graphics.rst:143 be59d541be214b669a9c6663ca6abcd1 +msgid "" +"The following is a code snippet which extracts the drawings of a page and" +" re-draws them on a new page::" +msgstr "以下は、ページの描画を抽出し、それらを新しいページに再描画するコードの断片です::" + +#: ../../recipes-drawing-and-graphics.rst:194 660c9c99b6864b26ad3a377cf1d1f343 +msgid "" +"As can be seen, there is a high congruence level with the :ref:`Shape` " +"class. With one exception: For technical reasons `lineCap` is a tuple of " +"3 numbers here, whereas it is an integer in :ref:`Shape` (and in PDF). So" +" we simply take the maximum value of that tuple." +msgstr "" +"ご覧の通り、 :ref:`Shape` クラスとの高い一致度があります。ただし1つ例外があります。技術的な理由から、ここでは `lineCap` " +"は3つの数字のタプルですが、 :ref:`Shape` クラス(およびPDF内)では整数です。そのため、そのタプルの最大値を単純に取得します。" + +#: ../../recipes-drawing-and-graphics.rst:196 d8e69210bfb24c3ebf97b7efbc549af6 +msgid "" +"Here is a comparison between input and output of an example page, created" +" by the previous script:" +msgstr "以下は、前のスクリプトで作成された例のページの入力と出力の比較です。" + +#: ../../recipes-drawing-and-graphics.rst:201 9fc8264dcf1f4bfd98a7c1ad1aa9aee3 +msgid "" +"The reconstruction of graphics, like shown here, is not perfect. The " +"following aspects will not be reproduced as of this version:" +msgstr "ここに示されているようなグラフィックスの再構築は完璧ではありません。次の点は、このバージョンでは再現されません:" + +#: ../../recipes-drawing-and-graphics.rst:203 8166cbf0fbaf40bb8343e4d7155fc0ce +msgid "" +"Page definitions can be complex and include instructions for not showing " +"/ hiding certain areas to keep them invisible. Things like this are " +"ignored by :meth:`Page.get_drawings` - it will always return all paths." +msgstr "" +"ページの定義は複雑になる可能性があり、特定の領域を表示しない/非表示にするための指示を含むことがあります。こうしたことは、 " +":meth:`Page.get_drawings` によって無視されます – このメソッドは常にすべてのパスを返します。" + +#: ../../recipes-drawing-and-graphics.rst:205 fd360aa336ff48d2b72658c63221a561 +msgid "" +"You can use the path list to make your own lists of e.g. all lines or all" +" rectangles on the page and subselect them by criteria, like color or " +"position on the page etc." +msgstr "パスのリストを使用して、ページ上のすべての線またはすべての四角形などのリストを作成し、色やページ上の位置などの基準でサブセレクトすることができます。" + +#: ../../recipes-drawing-and-graphics.rst:210 93b3944bb9e543f48ca901f90fb021e4 +msgid "How to Delete Drawings" +msgstr "描画の削除方法" + +#: ../../recipes-drawing-and-graphics.rst:212 094bb3cfc57b4461b8e1fea1652d63a5 +msgid "" +"To delete drawings/vector graphics we must use a :ref:`Redaction " +"Annotation ` with the bounding box of the drawing " +"and then **add and apply** a redaction to it to delete it." +msgstr "" +"描画やベクトルグラフィックスを削除するには、その描画の境界ボックスを持つ " +":ref:`赤字アノテーション` を使用し、それに赤字を **追加して適用する** 必要があります。" + +#: ../../recipes-drawing-and-graphics.rst:215 2f4f8d0e3e694181a5ce64ea3ccbe672 +msgid "" +"The following code shows an example of deleting the first drawing found " +"on the page::" +msgstr "以下のコードは、ページ上で最初に見つかった描画を削除する例を示しています:" + +#: ../../recipes-drawing-and-graphics.rst:225 edf0ff01d38248c8b9f6608213f47b52 +msgid "" +"See :meth:`Page.apply_redactions` for the parameter options which can be " +"sent - you are able to apply deletion options to image, drawing and text " +"objects which are bound by the annotation area." +msgstr "" +":meth:`Page.apply_redactions` " +"には送信できるパラメータオプションがあります。アノテーション領域によって境界付けられたイメージ、描画、テキストオブジェクトに削除オプションを適用できます。" + +#: ../../recipes-drawing-and-graphics.rst:229 ada7ddfefd1344ee94d178c3df118ae1 +msgid "How to Draw Graphics" +msgstr "グラフィックスを描画する方法" + +#: ../../recipes-drawing-and-graphics.rst:231 4606dbe6b222488fac1f4948be78ee37 +msgid "" +"Drawing graphics is as simple as calling the type of :meth:`Drawing " +"Method ` you may want. You can draw graphics directly on " +"pages or within shape objects." +msgstr "" +"グラフィックスを描画することは、必要な :meth:`描画方法 ` " +"の種類を呼び出すだけです。グラフィックスは、ページ上または形状オブジェクト内に直接描画できます。" + +#: ../../recipes-drawing-and-graphics.rst:234 55d0693f8fc14f2e8744839a6f7afe99 +msgid "For example, to draw a circle::" +msgstr "例えば、円を描画するには:" + +#: ../../recipes-drawing-and-graphics.rst:245 1426496584b041f18b185bc3ee064f75 +msgid "" +"The :ref:`Shape` object can be used to combine multiple drawings that " +"should receive common properties as specified by :meth:`Shape.finish`." +msgstr "" +":ref:`Shape` オブジェクトは、:meth:`Shape.finish` " +"で指定された共通のプロパティを受け取る必要がある複数の描画を組み合わせるために使用できます。" + +#: ../../footer.rst:60 15d9c97491bd46d9806555b60b5da2ed +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-images.mo b/docs/locales/ja/LC_MESSAGES/recipes-images.mo new file mode 100644 index 000000000..08b058801 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-images.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-images.po b/docs/locales/ja/LC_MESSAGES/recipes-images.po new file mode 100644 index 000000000..0a3f35745 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-images.po @@ -0,0 +1,1075 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 e08ff8c04b304218ac81d35f41292ac6 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 a1f1910f4f4b427c872afb309d8717ae +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 067d19d0feee4d3ea790680243bcadde +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-images.rst:7 57931bcbe871420b8981b5294574c1f6 +msgid "Images" +msgstr "画像" + +#: ../../recipes-images.rst:14 c30d7e40298f4ae28b6931514dcdda2d +msgid "How to Make Images from Document Pages" +msgstr "ドキュメントページから画像を作成する方法" + +#: ../../recipes-images.rst:16 a5b0953e89e6485ea0cc82a8785e31cf +msgid "" +"This little script will take a document filename and generate a PNG file " +"from each of its pages." +msgstr "この小さなスクリプトは、文書のファイル名を取得し、各ページからPNGファイルを生成します。" + +#: ../../recipes-images.rst:18 f1183a990e834decbf12969a97c76cf8 +msgid "The document can be any :ref:`supported type`." +msgstr "文書の種類は、:ref:`サポートされている ` どんな形式でも構いません。" + +#: ../../recipes-images.rst:20 86fd95ad8dba47ed87f53ab293cc7058 +msgid "" +"The script works as a command line tool which expects the filename being " +"supplied as a parameter. The generated image files (1 per page) are " +"stored in the directory of the script::" +msgstr "このスクリプトはコマンドラインツールとして動作し、ファイル名をパラメータとして指定することを期待しています。生成された画像ファイル(1ページごとに1つ)は、スクリプトが格納されているディレクトリに保存されます。::" + +#: ../../recipes-images.rst:29 3e69a1bf5a8d49978cfc9c8dabfc3648 +msgid "" +"The script directory will now contain PNG image files named *page-0.png*," +" *page-1.png*, etc. Pictures have the dimension of their pages with width" +" and height rounded to integers, e.g. 595 x 842 pixels for an A4 portrait" +" sized page. They will have a resolution of 96 dpi in x and y dimension " +"and have no transparency. You can change all that -- for how to do this, " +"read the next sections." +msgstr "" +"スクリプトのディレクトリには、これから *page-0.png* 、 *page-1.png* " +"などという名前のPNG画像ファイルが含まれるようになります。画像は各ページの寸法に合わせて整数に丸められた幅と高さを持ちます。例えば、A4縦向きのページであれば595" +" x 842ピクセルとなります。これらの画像は水平方向と垂直方向の解像度が96 " +"dpiで、透明度はありません。これらの設定を変更することもできます。詳細については、次のセクションをお読みください。" + +#: ../../recipes-images.rst:37 338b2dd52c564f7d9850839bb2b6e0c9 +msgid "How to Increase :index:`Image Resolution `" +msgstr "画像の解像度を上げる方法" + +#: ../../recipes-images.rst:39 4770c88fce1d4a0782d7b461b2a4c8b6 +msgid "" +"The image of a document page is represented by a :ref:`Pixmap`, and the " +"simplest way to create a pixmap is via method :meth:`Page.get_pixmap`." +msgstr "" +"文書ページの画像はPixmapによって表されます。 :ref:`Pixmap` を作成するもっとも簡単な方法は、メソッド " +":meth:`Page.get_pixmap` を使うことです。" + +#: ../../recipes-images.rst:41 fe71e979e90a4e36ab7e012b4e4cfbaf +msgid "" +"This method has many options to influence the result. The most important " +"among them is the :ref:`Matrix`, which lets you :index:`zoom`, rotate, " +"distort or mirror the outcome." +msgstr "このメソッドには結果に影響を与える多くのオプションがあります。その中でも最も重要なのは行列(Matrix)であり、これによって結果を拡大、回転、歪ませる、または反転することができます。" + +#: ../../recipes-images.rst:43 879c14c6b55746e184a6a3659ddb8624 +msgid "" +":meth:`Page.get_pixmap` by default will use the :ref:`Identity` matrix, " +"which does nothing." +msgstr ":meth:`Page.get_pixmap` はデフォルトで :ref:`Identity` 行列を使用しますが、これは何も行いません。" + +#: ../../recipes-images.rst:45 7ae61bca1b914ef58650814764c86556 +msgid "" +"In the following, we apply a :index:`zoom factor `" +" of 2 to each dimension, which will generate an image with a four times " +"better resolution for us (and also about 4 times the size)::" +msgstr "以下では、各次元に2倍のズームを適用し、結果として解像度が4倍向上した画像を生成します(そしてサイズも約4倍になります)。::" + +#: ../../recipes-images.rst:53 a015d3083e624dd0bb52595296bcda69 +msgid "" +"Since version 1.19.2 there is a more direct way to set the resolution: " +"Parameter `\"dpi\"` (dots per inch) can be used in place of `\"matrix\"`." +" To create a 300 dpi image of a page specify `pix = " +"page.get_pixmap(dpi=300)`. Apart from notation brevity, this approach has" +" the additional advantage that the **dpi value is saved with the image** " +"file -- which does not happen automatically when using the Matrix " +"notation." +msgstr "" +"バージョン1.19.2以降では、解像度を設定するより直接的な方法があります。 `\"dpi\"` (インチあたりのドット数)というパラメータを " +"`\"matrix\"` の代わりに使用することができます。ページの300 dpiの画像を作成するには、 `pix = " +"page.get_pixmap(dpi=300)` " +"と指定します。略記法の利便性に加えて、この方法の追加の利点は、dpiの値が画像ファイルとともに保存されることです。これはMatrixの記法を使用する場合に自動的に行われることはありません。" + +#: ../../recipes-images.rst:61 8e820a068673426d8b07c503b4b9b1eb +msgid "How to Create :index:`Partial Pixmaps` (Clips)" +msgstr "部分的なPixmap(クリップ)の作成方法" + +#: ../../recipes-images.rst:62 3575f660d7f34fcf8740261d5cc13939 +msgid "" +"You do not always need or want the full image of a page. This is the case" +" e.g. when you display the image in a GUI and would like to fill the " +"respective window with a zoomed part of the page." +msgstr "常にページの完全な画像が必要なわけではありませんし、必要ともしない場合があります。例えば、GUIで画像を表示し、ページのズームされた部分でウィンドウを埋めたい場合などが該当します。" + +#: ../../recipes-images.rst:64 91a6b74e53dc47bba254ef54f6903677 +msgid "" +"Let's assume your GUI window has room to display a full document page, " +"but you now want to fill this room with the bottom right quarter of your " +"page, thus using a four times better resolution." +msgstr "GUIウィンドウにフルの文書ページを表示するスペースがあると仮定しましょうが、現在はページの右下の四分の一でこのスペースを埋めたいとします。これにより、解像度が4倍向上します。" + +#: ../../recipes-images.rst:66 0c121baa24eb407eb7c91b5f481189c8 +msgid "" +"To achieve this, define a rectangle equal to the area you want to appear " +"in the GUI and call it \"clip\". One way of constructing rectangles in " +"PyMuPDF is by providing two diagonally opposite corners, which is what we" +" are doing here." +msgstr "これを実現するために、GUIに表示したい領域に等しい矩形を定義し、「クリップ」と呼びます。PyMuPDFでは、矩形を構築する方法の1つは、対角線上にある2つの角を指定することです。これがここで行っていることです。" + +#: ../../recipes-images.rst:79 355e4acbef614ba299f86cec24da6c88 +msgid "" +"In the above we construct *clip* by specifying two diagonally opposite " +"points: the middle point *mp* of the page rectangle, and its bottom " +"right, *rect.br*." +msgstr "" +"上記では、 `clip` を構築するために、2つの対角線上の点を指定しています:ページ矩形の中心点である `mp` と、その右下の点である " +"`rect.br` です。" + +#: ../../recipes-images.rst:87 39c83e942ce74cb9baddcc259d24777e +msgid "How to Zoom a Clip to a GUI Window" +msgstr "GUIウィンドウにクリップをズームする方法" + +#: ../../recipes-images.rst:88 5ffea1d8340c4a08939f161390fd42ae +msgid "" +"Please also read the previous section. This time we want to **compute the" +" zoom factor** for a clip, such that its image best fits a given GUI " +"window. This means, that the image's width or height (or both) will equal" +" the window dimension. For the following code snippet you need to provide" +" the WIDTH and HEIGHT of your GUI's window that should receive the page's" +" clip rectangle." +msgstr "前のセクションもお読みください。今回は、クリップのズームファクターを計算して、その画像が指定されたGUIウィンドウに最適にフィットするようにします。つまり、画像の幅または高さ(または両方)がウィンドウの寸法と等しくなります。次のコードスニペットでは、GUIウィンドウのWIDTHとHEIGHTを提供する必要があります。それらはページのクリップ矩形を受け取る必要があります。" + +#: ../../recipes-images.rst:105 61635a6d0bae4040986c0bd5b66441a2 +msgid "" +"For the other way round, now assume you **have** the zoom factor and need" +" to **compute the fitting clip**." +msgstr "逆の場合は、ズームファクターがあると仮定し、フィッティングクリップを計算する必要があります。" + +#: ../../recipes-images.rst:107 2903b44af458484da6825578d79b1da4 +msgid "" +"In this case we have `zoom = HEIGHT/clip.height = WIDTH/clip.width`, so " +"we must set `clip.height = HEIGHT/zoom` and, `clip.width = WIDTH/zoom`. " +"Choose the top-left point `tl` of the clip on the page to compute the " +"right pixmap::" +msgstr "" +"この場合、 `zoom = HEIGHT/clip.height = WIDTH/clip.width` となるので、 `clip.height" +" = HEIGHT/zoom` および `clip.width = WIDTH/zoom` " +"と設定する必要があります。クリップ内のページ上の左上の点 tl を選択して、適切なピクマップを計算します。" + +#: ../../recipes-images.rst:124 956b4a0c798e4b199a96b3f962fadf70 +msgid "How to Create or Suppress Annotation Images" +msgstr "注釈画像の作成または抑制方法" + +#: ../../recipes-images.rst:125 88238495026f4e289613d5e50b81a474 +msgid "" +"Normally, the pixmap of a page also shows the page's annotations. " +"Occasionally, this may not be desirable." +msgstr "通常、ページのピクマップにはページの注釈も表示されます。しかし、時にはこれが望ましくない場合があります。" + +#: ../../recipes-images.rst:127 7cec58430afd46ed9e720d3b6fa029db +msgid "" +"To suppress the annotation images on a rendered page, just specify " +"`annots=False` in :meth:`Page.get_pixmap`." +msgstr "描画されたページから注釈画像を抑制するには、 :meth:`Page.get_pixmap` で `annots=False` を指定します。" + +#: ../../recipes-images.rst:129 f75aaef8eaa54984a2d2fe045e60d421 +msgid "" +"You can also render annotations separately: they have their own " +":meth:`Annot.get_pixmap` method. The resulting pixmap has the same " +"dimensions as the annotation rectangle." +msgstr "" +"注釈を個別にレンダリングすることもできます。注釈には独自の :meth:`Annot.get_pixmap` " +"メソッドがあります。結果のピクマップは注釈の矩形と同じ寸法です。" + +#: ../../recipes-images.rst:141 9ff376603198474f97ccc539fa162925 +msgid "How to Extract Images: Non-PDF Documents" +msgstr "画像の抽出方法:非PDFドキュメント" + +#: ../../recipes-images.rst:143 e1ee913277e849ab932b9b97874133c3 +msgid "" +"In contrast to the previous sections, this section deals with " +"**extracting** images **contained** in documents, so they can be " +"displayed as part of one or more pages." +msgstr "前のセクションとは対照的に、このセクションではドキュメントに含まれる画像の抽出に取り組みます。これにより、これらの画像を1つ以上のページの一部として表示することができます。" + +#: ../../recipes-images.rst:145 a1a829c4fe444807b6db244124c9137d +msgid "" +"If you want to recreate the original image in file form or as a memory " +"area, you have basically two options:" +msgstr "元の画像をファイル形式またはメモリ領域として再作成したい場合、基本的に2つのオプションがあります:" + +#: ../../recipes-images.rst:147 773185000e944e7daeaf940ca6fda472 +msgid "" +"Convert your document to a PDF, and then use one of the PDF-only " +"extraction methods. This snippet will convert a document to PDF::" +msgstr "ドキュメントをPDFに変換し、その後PDF専用の抽出方法のいずれかを使用します。以下のスニペットはドキュメントをPDFに変換します::" + +#: ../../recipes-images.rst:153 6416afce3c7447f98cbcd8b2d98214dd +msgid "" +"Use :meth:`Page.get_text` with the \"dict\" parameter. This works for all" +" document types. It will extract all text and images shown on the page, " +"formatted as a Python dictionary. Every image will occur in an image " +"block, containing meta information and **the binary image data**. For " +"details of the dictionary's structure, see :ref:`TextPage`. The method " +"works equally well for PDF files. This creates a list of all images shown" +" on a page::" +msgstr "" +"「dict」パラメータを使って :meth:`Page.get_text` " +"を使用します。これはすべてのドキュメントタイプに対して機能します。これにより、ページに表示されているすべてのテキストと画像がPythonの辞書としてフォーマットされて抽出されます。各画像は、メタ情報とバイナリ画像データを含む画像ブロックに含まれます。辞書の構造の詳細については、" +" :ref:`TextPage` " +"を参照してください。この方法はPDFファイルにも同じくうまく機能します。これにより、ページに表示されているすべての画像のリストが作成されます::" + +#: ../../recipes-images.rst:182 28308796e3ad4f7abf4d6c2b939aca48 +msgid "How to Extract Images: PDF Documents" +msgstr "画像の抽出方法:PDFドキュメント" + +#: ../../recipes-images.rst:184 0fc3745f79b8438d9b922c7957f09651 +msgid "" +"Like any other \"object\" in a PDF, images are identified by a cross " +"reference number (:data:`xref`, an integer). If you know this number, you" +" have two ways to access the image's data:" +msgstr "" +"PDF内の他のオブジェクトと同様に、画像は交差参照番号( :data:`xref` " +"、整数)によって識別されます。この番号を知っていれば、画像のデータにアクセスする方法が2つあります:" + +#: ../../recipes-images.rst:186 f2a6f39609bb4bcb838f8d7a34a28da5 +msgid "" +"**Create** a :ref:`Pixmap` of the image with instruction *pix = " +"pymupdf.Pixmap(doc, xref)*. This method is **very** fast (single digit " +"micro-seconds). The pixmap's properties (width, height, ...) will reflect" +" the ones of the image. In this case there is no way to tell which image " +"format the embedded original has." +msgstr "" +"**画像の** :ref:`Pixmap` を作成します。指示: `pix = pymupdf.Pixmap(doc, xref)` " +"。この方法は非常に高速です(単桁のマイクロ秒)。 :ref:`Pixmap` " +"のプロパティ(幅、高さなど)は、画像のものと同じになります。この場合、埋め込まれたオリジナルの画像形式を判別する方法はありません。" + +#: ../../recipes-images.rst:188 903b6e9628bb4a0ea1ad4d2ff2f35557 +msgid "" +"**Extract** the image with *img = doc.extract_image(xref)*. This is a " +"dictionary containing the binary image data as *img[\"image\"]*. A number" +" of meta data are also provided -- mostly the same as you would find in " +"the pixmap of the image. The major difference is string *img[\"ext\"]*, " +"which specifies the image format: apart from \"png\", strings like " +"\"jpeg\", \"bmp\", \"tiff\", etc. can also occur. Use this string as the " +"file extension if you want to store to disk. The execution speed of this " +"method should be compared to the combined speed of the statements *pix = " +"pymupdf.Pixmap(doc, xref);pix.tobytes()*. If the embedded image is in PNG" +" format, the speed of :meth:`Document.extract_image` is about the same " +"(and the binary image data are identical). Otherwise, this method is " +"**thousands of times faster**, and the **image data is much smaller**." +msgstr "" +"**画像を抽出します。指示**: `img = doc.extract_image(xref)` " +"。これはバイナリ画像データを含む辞書です。多くのメタデータも提供されますが、主に画像の :ref:`Pixmap` " +"で見つけることができるものとほぼ同じです。主な違いは、文字列 img[\"ext\"] であり、画像形式を指定します。\"png\" 以外にも " +"\"jpeg\" 、 \"bmp\"、 \"tiff\" " +"などの文字列が出現することがあります。ディスクに保存する場合は、この文字列をファイル拡張子として使用します。このメソッドの実行速度は、以下のステートメントの組み合わせ速度" +" `pix = pymupdf.Pixmap(doc, xref);pix.tobytes()` " +"と比較する必要があります。埋め込まれた画像がPNG形式の場合、 :meth:`Document.extract_image` " +"の速度はほぼ同じで(バイナリ画像データも同じです)、それ以外の場合は、このメソッドは数千倍高速であり、画像データも小さくなります" + +#: ../../recipes-images.rst:190 110186a162f04f83955d7d59285d446b +msgid "" +"The question remains: **\"How do I know those 'xref' numbers of " +"images?\"**. There are two answers to this:" +msgstr "**「どのようにして画像の'xref'番号を知るのか?」** これには2つの答えがあります:" + +#: ../../recipes-images.rst:192 ad8338028ef944a19307f2547e05528f +msgid "" +"**\"Inspect the page objects:\"** Loop through the items of " +":meth:`Page.get_images`. It is a list of list, and its items look like " +"*[xref, smask, ...]*, containing the :data:`xref` of an image. This " +":data:`xref` can then be used with one of the above methods. Use this " +"method for **valid (undamaged)** documents. Be wary however, that the " +"same image may be referenced multiple times (by different pages), so you " +"might want to provide a mechanism avoiding multiple extracts." +msgstr "" +"**「ページオブジェクトを検査する」** : :meth:`Page.get_images` " +"の項目をループ処理します。これはリストのリストであり、項目は `[xref、smask、...]` のようになっており、画像の " +":data:`xref` を含んでいます。この :data:`xref` " +"を上記の方法の1つで使用できます。これは有効(損傷していない)なドキュメントに使用しますが、同じ画像が複数回(異なるページで)参照されることがあるため、複数回の抽出を避けるメカニズムを提供することが望ましいかもしれません。" + +#: ../../recipes-images.rst:193 afeb949615fb470d958fdfc35b1fe9dd +msgid "" +"**\"No need to know:\"** Loop through the list of **all xrefs** of the " +"document and perform a :meth:`Document.extract_image` for each one. If " +"the returned dictionary is empty, then continue -- this :data:`xref` is " +"no image. Use this method if the PDF is **damaged (unusable pages)**. " +"Note that a PDF often contains \"pseudo-images\" (\"stencil masks\") with" +" the special purpose of defining the transparency of some other image. " +"You may want to provide logic to exclude those from extraction. Also have" +" a look at the next section." +msgstr "" +"**「知る必要はありません」**:ドキュメントのすべての :data:`xref` のリストをループ処理し、各 :data:`xref` に対して" +" :meth:`Document.extract_image` を実行します。返される辞書が空であれば、続けて次の :data:`xref` " +"を処理します。この :data:`xref` " +"は画像ではありません。これはPDFが損傷している(使用できないページがある)場合に使用します。PDFにはしばしば他の画像の透明度を定義する特別な目的の「擬似画像」(ステンシルマスク)が含まれていることに注意してください。これらを抽出から除外するためのロジックを提供することがあるかもしれません。次のセクションも参照してください。" + +#: ../../recipes-images.rst:195 6d656f35aa69461f8e47b7f4bf499261 +msgid "" +"For both extraction approaches, there exist ready-to-use general purpose " +"scripts:" +msgstr "これらの抽出方法の両方に対して、一般的な用途のスクリプトが存在します。" + +#: ../../recipes-images.rst:197 490e93536036437fab679b20daac6f6a +msgid "" +"`extract-from-pages.py `_ " +"extracts images page by page:" +msgstr "" +"`extract-from-pages.py `_ " +"はページごとに画像を抽出します。" + +#: ../../recipes-images.rst:202 87adba5350a8467e87dc3f0e80cae1e4 +msgid "" +"and `extract-from-xref.py `_ " +"extracts images by xref table:" +msgstr "" +"`extract-from-xref.py `_ " +"はxrefテーブルによって画像を抽出します。" + +#: ../../recipes-images.rst:213 b53c02001e2f4f7391e6e9e880b0631f +msgid "How to Handle Image Masks" +msgstr "画像マスクの処理方法" + +#: ../../recipes-images.rst:214 32034dc3accf4622b51ead4c06f67509 +msgid "" +"Some images in PDFs are accompanied by **image masks**. In their simplest" +" form, masks represent alpha (transparency) bytes stored as separate " +"images. In order to reconstruct the original of an image, which has a " +"mask, it must be \"enriched\" with transparency bytes taken from its " +"mask." +msgstr "PDF内の一部の画像には画像マスクが付属しています。最も単純な形式では、マスクは別の画像として格納されたアルファ(透明度)バイトを表します。画像の元の形を復元するには、そのマスクから取得した透明度バイトを使用して画像を「補完」する必要があります。" + +#: ../../recipes-images.rst:216 e0828025961548d4a2ed0f2e5b9cedf6 +msgid "" +"Whether an image does have such a mask can be recognized in one of two " +"ways in PyMuPDF:" +msgstr "PyMuPDFでは、画像にそのようなマスクがあるかどうかは次の2つの方法で認識できます:" + +#: ../../recipes-images.rst:218 4c26a844692448d6818018bc8e19f652 +msgid "" +"An item of :meth:`Document.get_page_images` has the general format " +"`(xref, smask, ...)`, where :data:`xref` is the image's :data:`xref` and " +"*smask*, if positive, then it is the :data:`xref` of a mask." +msgstr "" +":meth:`Document.get_page_images` の項目は一般的な形式 `(xref、smask、...)` " +"を持ちます。ここで、xrefは画像の :data:`xref` であり、 「smask」 が正の場合、それはマスクの :data:`xref` " +"です。" + +#: ../../recipes-images.rst:219 8011ba0428cc4b87a64b8031ea34ea90 +msgid "" +"The (dictionary) results of :meth:`Document.extract_image` have a key " +"*\"smask\"*, which also contains any mask's :data:`xref` if positive." +msgstr "" +":meth:`Document.extract_image` の結果(辞書)には、キー「smask」があります。このキーには、マスクの " +":data:`xref` が含まれています。" + +#: ../../recipes-images.rst:221 65aadf1969e44334a7c86d7524a50702 +msgid "" +"If *smask == 0* then the image encountered via :data:`xref` can be " +"processed as it is." +msgstr "`smask == 0` の場合、 :data:`xref` を介して遭遇した画像はそのまま処理できます。" + +#: ../../recipes-images.rst:223 302ce99937d44ca995fcedb51c11aaae +msgid "" +"To recover the original image using PyMuPDF, the procedure depicted as " +"follows must be executed:" +msgstr "PyMuPDFを使用して元の画像を復元するためには、以下に示す手順を実行する必要があります:" + +#: ../../recipes-images.rst:232 df80dda6fd5a4c4db985e0cfa929ee0a +msgid "" +"Step (1) creates a pixmap of the basic image. Step (2) does the same with" +" the image mask. Step (3) adds an alpha channel and fills it with " +"transparency information." +msgstr "ステップ(1)では、基本画像のピクマップを作成します。ステップ(2)では、同じことを画像マスクで行います。ステップ(3)では、アルファチャンネルを追加し、透明情報で埋めます。" + +#: ../../recipes-images.rst:234 4a160a9c64ac45caa88c66099e2af1b1 +msgid "" +"The scripts `extract-from-pages.py `_, " +"and `extract-from-xref.py `_ " +"above also contain this logic." +msgstr "" +"また、上記の `extract-from-pages.py `_ " +"および `extract-from-xref.py `_ " +"というスクリプトにもこのロジックが含まれています。" + +#: ../../recipes-images.rst:250 0f2b285905974f1b99efded1652e0905 +msgid "How to Make one PDF of all your Pictures (or Files)" +msgstr "すべての写真(またはファイル)を1つのPDFにする方法" + +#: ../../recipes-images.rst:251 8d8107e9ab0942c88a4abccc8f7092dc +msgid "" +"We show here **three scripts** that take a list of (image and other) " +"files and put them all in one PDF." +msgstr "以下に、(画像およびその他の)ファイルのリストを受け取り、それらをすべて1つのPDFに結合する3つのスクリプトを示します。" + +#: ../../recipes-images.rst:253 3df4d9f329c4453f9ca02d580bd2b3ba +msgid "**Method 1: Inserting Images as Pages**" +msgstr "**方法1:画像をページとして挿入する方法**" + +#: ../../recipes-images.rst:255 0bbd32458c89450b9f8adc7638530473 +msgid "" +"The first one converts each image to a PDF page with the same dimensions." +" The result will be a PDF with one page per image. It will only work for " +":ref:`supported image` file formats::" +msgstr "" +"最初の方法では、各画像を同じ寸法のPDFページに変換します。結果は、1つの画像に1ページのPDFとなります。ただし、 " +":ref:`サポートされている ` 画像ファイル形式のみで動作します。::" + +#: ../../recipes-images.rst:278 959fd3ad51dc45daa6b1f1310ad95124 +msgid "" +"This will generate a PDF only marginally larger than the combined " +"pictures' size. Some numbers on performance:" +msgstr "これにより、結合された画像のサイズとほとんど変わらないPDFが生成されます。パフォーマンスに関するいくつかの数値:" + +#: ../../recipes-images.rst:280 4996fb148b334e01b2a6baaedf636b52 +msgid "" +"The above script needed about 1 minute on my machine for 149 pictures " +"with a total size of 514 MB (and about the same resulting PDF size)." +msgstr "上記のスクリプトは、149枚の画像で合計サイズが514 MBの場合、私のマシン上で約1分かかりました(生成されたPDFのサイズもほぼ同じです)。" + +#: ../../recipes-images.rst:285 b684aa875c574e47bc3fc5f6c15767d1 +msgid "" +"Look `here `_ for a more " +"complete source code: it offers a directory selection dialog and skips " +"unsupported files and non-file entries." +msgstr "" +"より完全なソースコードは `こちら `_ " +"をご覧ください:ディレクトリ選択ダイアログを提供し、サポートされていないファイルやファイルでないエントリをスキップします。" + +#: ../../recipes-images.rst:287 03c855c845f74ac1bcf9ce3a7ee420c1 +msgid "" +"We might have used :meth:`Page.insert_image` instead of " +":meth:`Page.show_pdf_page`, and the result would have been a similar " +"looking file. However, depending on the image type, it may store **images" +" uncompressed**. Therefore, the save option *deflate = True* must be used" +" to achieve a reasonable file size, which hugely increases the runtime " +"for large numbers of images. So this alternative **cannot be " +"recommended** here." +msgstr "" +":meth:`Page.insert_image` の代わりに :meth:`Page.show_pdf_page` " +"を使用することもできましたが、結果として似たような外観のファイルになります。ただし、画像の種類によっては、非圧縮で画像を保存する場合があります。そのため、適切なファイルサイズを得るためには、保存オプションとして" +" `deflate = True` " +"を使用する必要がありますが、これにより大量の画像の場合、実行時間が大幅に増加します。そのため、この代替方法はお勧めできません。" + +#: ../../recipes-images.rst:289 41c532d33e8f42a39bb7560b25d00787 +msgid "**Method 2: Embedding Files**" +msgstr "**方法2:ファイルの埋め込み**" + +#: ../../recipes-images.rst:291 490c3dd2b7d64db682e9ea9f27e723df +msgid "" +"The second script **embeds** arbitrary files -- not only images. The " +"resulting PDF will have just one (empty) page, required for technical " +"reasons. To later access the embedded files again, you would need a " +"suitable PDF viewer that can display and / or extract embedded files::" +msgstr "2つ目のスクリプトは、画像だけでなく、任意のファイルを埋め込みます。技術的な理由で必要なので、結果として得られるPDFには1つだけ(空の)ページがあります。埋め込まれたファイルに後でアクセスするためには、埋め込まれたファイルを表示または抽出できる適切なPDFビューアが必要です::" + +#: ../../recipes-images.rst:316 aa85ab6a4d7f4c068e437bc62ecde2f7 +msgid "" +"This is by far the fastest method, and it also produces the smallest " +"possible output file size. The above pictures needed 20 seconds on my " +"machine and yielded a PDF size of 510 MB. Look `here " +"`_ for a more complete source code: it offers a directory" +" selection dialog and skips non-file entries." +msgstr "" +"これは断然最も高速な方法であり、可能な限り最小の出力ファイルサイズを生成します。上記の画像は私のマシンで20秒かかり、PDFのサイズは510 " +"MBになりました。より完全なソースコードは `こちら `_ " +"をご覧ください:ディレクトリ選択ダイアログを提供し、ファイルでないエントリをスキップします。" + +#: ../../recipes-images.rst:318 a5e66a78eac4476db9b235257835f444 +msgid "**Method 3: Attaching Files**" +msgstr "**方法3:ファイルの添付**" + +#: ../../recipes-images.rst:320 25dd45dc2fc5483686500405d2daa76a +msgid "" +"A third way to achieve this task is **attaching files** via page " +"annotations see `here `_ for the " +"complete source code." +msgstr "" +"このタスクを達成する第3の方法は、ページの注釈を介してファイルを添付する方法です。完全なソースコードについては `こちら " +"`_ をご覧ください。" + +#: ../../recipes-images.rst:322 fde0ae6309414feb89541d4ee22f054a +msgid "" +"This has a similar performance as the previous script and it also " +"produces a similar file size. It will produce PDF pages which show a " +"'FileAttachment' icon for each attached file." +msgstr "これは前のスクリプトと同様のパフォーマンスを持ち、似たようなファイルサイズも生成します。それぞれの添付ファイルに対して「FileAttachment」アイコンが表示されるPDFページを生成します。" + +#: ../../recipes-images.rst:326 807309e9b6e0443c892b7a6d9f92f145 +msgid "" +"Both, the **embed** and the **attach** methods can be used for " +"**arbitrary files** -- not just images." +msgstr "埋め込みと添付の両方の方法は、画像だけでなく任意のファイルにも使用できます。" + +#: ../../recipes-images.rst:328 26ac29237c8d43398109384507fd51a8 +msgid "" +"We strongly recommend using the awesome package `PySimpleGUI " +"`_ to display a progress meter for" +" tasks that may run for an extended time span. It's pure Python, uses " +"Tkinter (no additional GUI package) and requires just one more line of " +"code!" +msgstr "" +"長時間にわたるタスクに対して進捗メーターを表示するために、素晴らしいパッケージ `PySimpleGUI " +"`_ " +"の使用を強くお勧めします。これは純粋なPythonであり、Tkinter(追加のGUIパッケージは不要)を使用し、たった1行のコードを追加するだけで使えます!" + +#: ../../recipes-images.rst:342 ebad9685f21841c2b8ee9ac84740ecf2 +msgid "How to Create Vector Images" +msgstr "ベクター画像の作成方法" + +#: ../../recipes-images.rst:343 40ce7c0f9b0044e3939ba507d3f936fd +msgid "" +"The usual way to create an image from a document page is " +":meth:`Page.get_pixmap`. A pixmap represents a raster image, so you must " +"decide on its quality (i.e. resolution) at creation time. It cannot be " +"changed later." +msgstr "" +"ドキュメントページから画像を作成する通常の方法は、 :meth:`Page.get_pixmap` " +"を使用することです。ピクマップはラスター画像を表しますので、作成時にその品質(つまり解像度)を決定する必要があります。後から変更することはできません。" + +#: ../../recipes-images.rst:345 35c05b06ad73440aa94313c3f527dc97 +msgid "" +"PyMuPDF also offers a way to create a **vector image** of a page in SVG " +"format (scalable vector graphics, defined in XML syntax). SVG images " +"remain precise across zooming levels (of course with the exception of any" +" raster graphic elements embedded therein)." +msgstr "PyMuPDFはまた、SVG形式(XML構文で定義されたスケーラブルベクターグラフィックス)でページのベクター画像を作成する方法を提供しています。SVG画像はズームレベルで正確性を保持します(もちろん、埋め込まれたラスターグラフィックス要素を除く)。" + +#: ../../recipes-images.rst:347 3664556134d441ee8ca7d943fe0e97b5 +msgid "" +"Instruction *svg = page.get_svg_image(matrix=pymupdf.Identity)* delivers " +"a UTF-8 string *svg* which can be stored with extension \".svg\"." +msgstr "" +"指示 `svg = page.get_svg_image(matrix=pymupdf.Identity)` はUTF-8文字列 *svg* " +"を提供します。これは \".svg\" の拡張子で保存できます。" + +#: ../../recipes-images.rst:363 2e788e908541456db558a6d1e638447c +msgid "How to Convert Images" +msgstr "画像の変換方法" + +#: ../../recipes-images.rst:364 eaecfbf3be504291831504c4b64f18ba +msgid "" +"Just as a feature among others, PyMuPDF's image conversion is easy. It " +"may avoid using other graphics packages like PIL/Pillow in many cases." +msgstr "PyMuPDFの画像変換も他の機能と同様に簡単です。多くの場合、PIL/Pillowなどの他のグラフィックスパッケージを使用する必要がないかもしれません。" + +#: ../../recipes-images.rst:366 c3f0f2ee44ad425e8ec5c957c779fbd5 +msgid "Notwithstanding that interfacing with Pillow is almost trivial." +msgstr "ただし、Pillowとの連携はほとんど自明です。" + +#: ../../recipes-images.rst:369 6b90048732484c0bb7bfd07609cd24ce +msgid "**Input Formats**" +msgstr "**入力フォーマット**" + +#: ../../recipes-images.rst:369 07a5c9395a4c45299d51e3bd83222984 +msgid "**Output Formats**" +msgstr "**出力フォーマット**" + +#: ../../recipes-images.rst:369 8350dc457b254b8291594699ce54839d +msgid "**Description**" +msgstr "**説明**" + +#: ../../recipes-images.rst:371 0979a1ef86ea4e20a05e557a7442a3b2 +msgid "BMP" +msgstr "" + +#: ../../recipes-images.rst:371 ../../recipes-images.rst:373 +#: ../../recipes-images.rst:374 ../../recipes-images.rst:375 +#: ../../recipes-images.rst:376 ../../recipes-images.rst:383 +#: ../../recipes-images.rst:384 0bacf30757bb4f319e65010549478182 +#: 3ad593408aa445ce99d7f7d9479bca6d 55dc892386e3401f95cd320419c8edc9 +#: 7df71f3343174cfb84397536f4af3a84 84d138cd65ae497681d5c605ce8f2af7 +#: 95f327688bc1431c9523ec2101e353b0 c96232bb6e7340e2a5eb85364348a2a0 +msgid "." +msgstr "" + +#: ../../recipes-images.rst:371 5897545255814fd5ba5a65241d15eced +msgid "Windows Bitmap" +msgstr "" + +#: ../../recipes-images.rst:372 41f6b3399986413abe65e2869c4053ea +#: d00191a3adf74e71b0459460b1c2e159 +msgid "JPEG" +msgstr "" + +#: ../../recipes-images.rst:372 d3b57018b2e3452080901854b19d1342 +msgid "Joint Photographic Experts Group" +msgstr "" + +#: ../../recipes-images.rst:373 a3bdd964789b43cbad6247e119a6c54a +msgid "JXR" +msgstr "" + +#: ../../recipes-images.rst:373 bbea31de8177475b8158cf220a94abcb +msgid "JPEG Extended Range" +msgstr "" + +#: ../../recipes-images.rst:374 809526ac7ac74d2fb28e089e82a225d7 +msgid "JPX/JP2" +msgstr "" + +#: ../../recipes-images.rst:374 9bf0ef68929647cd85f92ab8f862d849 +msgid "JPEG 2000" +msgstr "" + +#: ../../recipes-images.rst:375 c791169bcfe4452e80b2573539c06ef1 +msgid "GIF" +msgstr "" + +#: ../../recipes-images.rst:375 4a2960cefc3b47e496db085a39846137 +msgid "Graphics Interchange Format" +msgstr "" + +#: ../../recipes-images.rst:376 4dcf6574e1574eaf922b9851c56ab035 +msgid "TIFF" +msgstr "" + +#: ../../recipes-images.rst:376 df65730301314c1f95cb3c5315179281 +msgid "Tagged Image File Format" +msgstr "" + +#: ../../recipes-images.rst:377 492608579f5a466e93c3441942845056 +#: 671fbf03f46c46f6a82cc000cfea3b33 +msgid "PNG" +msgstr "" + +#: ../../recipes-images.rst:377 6800194509e449b699a2f117857a1789 +msgid "Portable Network Graphics" +msgstr "" + +#: ../../recipes-images.rst:378 12727b15f7504523b43a9f77cc9a9923 +#: 221349ddd94740f18dc2a986e2f63e53 +msgid "PNM" +msgstr "" + +#: ../../recipes-images.rst:378 7b091d14802241e4bba073bd5e0dc0d6 +msgid "Portable Anymap" +msgstr "" + +#: ../../recipes-images.rst:379 bf3bacb00fbe43da9d560e7fbe4f78f4 +#: ed77816ad90043ff838934fe85431860 +msgid "PGM" +msgstr "" + +#: ../../recipes-images.rst:379 37c68be55f8c46a3a92da90ed0894ded +msgid "Portable Graymap" +msgstr "" + +#: ../../recipes-images.rst:380 6f4d332905c643b9899a52fd96de577c +#: 8530d6f66aaf4ed8b8e74e8de3aaf662 +msgid "PBM" +msgstr "" + +#: ../../recipes-images.rst:380 78f4ada30b5940558d7575226b665930 +msgid "Portable Bitmap" +msgstr "" + +#: ../../recipes-images.rst:381 18ad666221cb4e268aac5b2adcb051bc +#: e4ac66013a6b40ef8f5f8dbcc44cbf3d +msgid "PPM" +msgstr "" + +#: ../../recipes-images.rst:381 46c1b8ed50c14c30b04706c0bcee355d +msgid "Portable Pixmap" +msgstr "" + +#: ../../recipes-images.rst:382 4eee3125b6b24045bf368e25ea1ccfe7 +#: 65190b879c2b49ce893b4dd5d72947a4 +msgid "PAM" +msgstr "" + +#: ../../recipes-images.rst:382 9cb3d8054e01459bad986a5ff4b72d76 +msgid "Portable Arbitrary Map" +msgstr "" + +#: ../../recipes-images.rst:383 172c0b24c3c74558b48cecb435b62a71 +msgid "PSD" +msgstr "" + +#: ../../recipes-images.rst:383 d957051043af4e16a05d1bab3b7846e0 +msgid "Adobe Photoshop Document" +msgstr "" + +#: ../../recipes-images.rst:384 61adad659533407383e3030a6a26682a +msgid "PS" +msgstr "" + +#: ../../recipes-images.rst:384 bd96837df2984e15953ba98f2ce8e7ca +msgid "Adobe Postscript" +msgstr "" + +#: ../../recipes-images.rst:387 2e9599f2f28e412f95d48fc8a90716c0 +msgid "The general scheme is just the following two lines::" +msgstr "一般的なスキームは以下の2行です::" + +#: ../../recipes-images.rst:392 6a1955099311437ea3d6c722779fe787 +msgid "**Remarks**" +msgstr "**コメント**" + +#: ../../recipes-images.rst:394 5e7c8ed9975341bb9b398e94977a1c35 +msgid "" +"The **input** argument of *pymupdf.Pixmap(arg)* can be a file or a bytes " +"/ io.BytesIO object containing an image." +msgstr "`pymupdf.Pixmap(arg)` の入力引数は、画像を含むファイルまたはbytes/io.BytesIOオブジェクトを指定できます。" + +#: ../../recipes-images.rst:395 29c03f633e3c4f85905a883594780a1a +msgid "" +"Instead of an output **file**, you can also create a bytes object via " +"*pix.tobytes(\"yyy\")* and pass this around." +msgstr "出力ファイルの代わりに、 `pix.tobytes(\"yyy\")` を使用してbytesオブジェクトを作成し、それを渡すこともできます。" + +#: ../../recipes-images.rst:396 08d941cbdcde49e7894c7cd54dceeb12 +msgid "" +"As a matter of course, input and output formats must be compatible in " +"terms of colorspace and transparency. The ``Pixmap`` class has batteries " +"included if adjustments are needed." +msgstr "" +"もちろん、入力と出力のフォーマットは、色空間と透過性の面で互換性が必要です。 :ref:`Pixmap` " +"クラスには、必要に応じて調整を行うための組み込みの機能が備わっています。" + +#: ../../recipes-images.rst:399 3264d7f4236e472d9acb18b03a6c378f +msgid "**Convert JPEG to Photoshop**::" +msgstr "**JPEGをPhotoshopに変換する**::" + +#: ../../recipes-images.rst:405 03352131b8ff486f8481486724d51437 +msgid "" +"Convert **JPEG to Tkinter PhotoImage**. Any **RGB / no-alpha** image " +"works exactly the same. Conversion to one of the **Portable Anymap** " +"formats (PPM, PGM, etc.) does the trick, because they are supported by " +"all Tkinter versions::" +msgstr "アルファ付きのPNGをTkinterのPhotoImageに変換してください。これには、PPMへの変換を行う前にアルファバイトを削除する必要があります。" + +#: ../../recipes-images.rst:412 1d8ba60c17ef4537a879f8880ddf842e +msgid "" +"Convert **PNG with alpha** to Tkinter PhotoImage. This requires " +"**removing the alpha bytes**, before we can do the PPM conversion::" +msgstr "アルファ付きのPNGをTkinterのPhotoImageに変換します。このためには、PPMへの変換を行う前にアルファバイトを取り除く必要があります。" + +#: ../../recipes-images.rst:429 e998a366000347d1b54bed12c09423fd +msgid "How to Use Pixmaps: Gluing Images" +msgstr "ピクスマップの使用方法:画像の結合" + +#: ../../recipes-images.rst:431 f97460e4da964505a4f7c1625d462e07 +msgid "" +"This shows how pixmaps can be used for purely graphical, non-document " +"purposes. The script reads an image file and creates a new image which " +"consist of 3 * 4 tiles of the original::" +msgstr "" +"これは、ピクスマップを純粋にグラフィカルで、文書ではない目的で使用する方法を示しています。スクリプトは画像ファイルを読み込み、元の画像の3 * " +"4タイルからなる新しい画像を作成します。" + +#: ../../recipes-images.rst:451 4e2ddaf362f641b79cccb12467620742 +msgid "This is the input picture:" +msgstr "これが入力画像です。" + +#: ../../recipes-images.rst:456 517480af4baf43568e79bdb13b6ab85c +msgid "Here is the output:" +msgstr "こちらが出力結果です。" + +#: ../../recipes-images.rst:473 172a0fbba4984e9cb6d93f6ed02742fa +msgid "How to Use Pixmaps: Making a Fractal" +msgstr "ピクスマップの使用方法:フラクタルの作成" + +#: ../../recipes-images.rst:475 18c71aef80304611bec96e98bcd38669 +msgid "" +"Here is another Pixmap example that creates **Sierpinski's Carpet** -- a " +"fractal generalizing the **Cantor Set** to two dimensions. Given a square" +" carpet, mark its 9 sub-suqares (3 times 3) and cut out the one in the " +"center. Treat each of the remaining eight sub-squares in the same way, " +"and continue *ad infinitum*. The end result is a set with area zero and " +"fractal dimension 1.8928..." +msgstr "" +"ここでは、もう一つのピクスマップの例を紹介します。シェルピンスキーのカーペット(Sierpinski's " +"Carpet)と呼ばれるフラクタルで、カントール集合を2次元に一般化したものです。正方形のカーペットにおいて、9つの部分正方形(3行3列)をマークし、中央の正方形を切り抜きます。残りの8つの部分正方形に対しても同じ操作を行い、無限に続けます。その結果、面積がゼロであり、フラクタル次元は1.8928...となります。" + +#: ../../recipes-images.rst:477 53c09705945d4ed3bec2d17b7c9f1581 +msgid "" +"This script creates an approximate image of it as a PNG, by going down to" +" one-pixel granularity. To increase the image precision, change the value" +" of n (precision)::" +msgstr "このスクリプトは、1ピクセルの精度にまで細かくなったPNGの近似画像を作成します。画像の精度を高めるには、n(精度)の値を変更してください::" + +#: ../../recipes-images.rst:531 68789bca8b624ff491e93d5a400ff3ac +msgid "The result should look something like this:" +msgstr "結果は以下のようになるはずです:" + +#: ../../recipes-images.rst:541 93a642a1f6fb4a578a82d7b7baf97ad3 +msgid "How to Interface with NumPy" +msgstr "NumPyとのインターフェース方法" + +#: ../../recipes-images.rst:543 8f95a3058d5348dfa20391d4596a676e +msgid "" +"This shows how to create a PNG file from a numpy array (several times " +"faster than most other methods)::" +msgstr "これは、NumPy配列からPNGファイルを作成する方法を示しています(他の方法よりも数倍高速です)。" + +#: ../../recipes-images.rst:570 fe1a53c3297d463785209cfc2a811f98 +msgid "How to Add Images to a PDF Page" +msgstr "PDFページに画像を追加する方法" + +#: ../../recipes-images.rst:572 c4977664b53940488eefb7c79db6760f +msgid "" +"There are two methods to add images to a PDF page: " +":meth:`Page.insert_image` and :meth:`Page.show_pdf_page`. Both methods " +"have things in common, but there are also differences." +msgstr "" +"PDFページに画像を追加するには、2つの方法があります: :meth:`Page.insert_image` と " +":meth:`Page.show_pdf_page` です。両方の方法には共通点がありますが、違いもあります。" + +#: ../../recipes-images.rst:575 05867b3cbf1e4cfbb8b56cd82480ca49 +msgid "**Criterion**" +msgstr "**基準**" + +#: ../../recipes-images.rst:575 1a4fb4ba40034ec697f3f519a1472c05 +msgid ":meth:`Page.insert_image`" +msgstr "" + +#: ../../recipes-images.rst:575 c77d016df1554917b7a351e1213d6d30 +msgid ":meth:`Page.show_pdf_page`" +msgstr "" + +#: ../../recipes-images.rst:577 adaf610a08404152879eecde101b3358 +msgid "displayable content" +msgstr "表示可能なコンテンツ" + +#: ../../recipes-images.rst:577 be7f0aa395a84ea0967b19ea32e118ad +msgid "image file, image in memory, pixmap" +msgstr "画像ファイル、メモリ内の画像、ピクスマップ " + +#: ../../recipes-images.rst:577 5dcf084c5bf14f44b42f33e83b39ffb0 +msgid "PDF page" +msgstr "PDFページ" + +#: ../../recipes-images.rst:578 b21f95bfb2da4d1da8dc1a8de8fcd793 +msgid "display resolution" +msgstr "表示解像度 " + +#: ../../recipes-images.rst:578 e5fb57d9b06e415496d5fdd4ad9d2860 +msgid "image resolution" +msgstr "画像の解像度 " + +#: ../../recipes-images.rst:578 f162cd0a2ec54a14bc65d19390efb3ab +msgid "vectorized (except raster page content)" +msgstr "ベクトル化(ラスターページコンテンツを除く)" + +#: ../../recipes-images.rst:579 8f71139fa4184451a269095eaf3b024d +msgid "rotation" +msgstr "回転" + +#: ../../recipes-images.rst:579 e07ae649fe5a43b98128e34d2b7295f4 +msgid "0, 90, 180 or 270 degrees" +msgstr "0度、90度、180度または270度" + +#: ../../recipes-images.rst:579 6f5358e1760c4bfca5a3a418b8c268ff +msgid "any angle" +msgstr "任意の角度" + +#: ../../recipes-images.rst:580 fd811b11808e4d7fa0b6e434c51cc9de +msgid "clipping" +msgstr "クリッピング" + +#: ../../recipes-images.rst:580 c2558fbbf1774081b28e0116ee882210 +msgid "no (full image only)" +msgstr "いいえ(全体の画像のみ)" + +#: ../../recipes-images.rst:580 ../../recipes-images.rst:585 +#: a907ff72bff14ccab44936c7756bc1a0 dc5568a67d5546399d10903c2198e7b1 +msgid "yes" +msgstr "はい" + +#: ../../recipes-images.rst:581 9be3b939efd74eb9b064149922973fd7 +msgid "keep aspect ratio" +msgstr "アスペクト比を保持" + +#: ../../recipes-images.rst:581 90e967a63c944ad2a4d5d5e1a1e1f586 +#: ae736e0e2d874c22a9ff0e8ec0eafd07 +msgid "yes (default option)" +msgstr "はい(デフォルトオプション)" + +#: ../../recipes-images.rst:582 149cd46d36314fd9983779124a4b00a5 +msgid "transparency (water marking)" +msgstr "透明性(ウォーターマーキング)" + +#: ../../recipes-images.rst:582 f5ee92e1f762447294b068b3151cf8ba +msgid "depends on the image" +msgstr "画像による" + +#: ../../recipes-images.rst:582 48bce5fbaea9451c8a0205e90189408b +msgid "depends on the page" +msgstr "ページによる" + +#: ../../recipes-images.rst:583 b0cc7970f86848258e39444054ac610b +msgid "location / placement" +msgstr "位置/配置 " + +#: ../../recipes-images.rst:583 3d965dc24e154178970ddf5ed7662f16 +#: 5a690d8091e446ac921b5a5e97de9884 +msgid "scaled to fit target rectangle" +msgstr "ターゲットの矩形にフィットするようにスケーリング" + +#: ../../recipes-images.rst:584 78fa2e33851d443584542c9de0f6ea73 +msgid "performance" +msgstr "パフォーマンス" + +#: ../../recipes-images.rst:584 07bc6beb376d4561bf3ed4427bfeada8 +#: 883bd897a4de4ed2a0f2af42a61dd792 +msgid "automatic prevention of duplicates;" +msgstr "重複の自動防止" + +#: ../../recipes-images.rst:585 7ef2987d846a48a9a38bb80ba9ee36be +msgid "multi-page image support" +msgstr "マルチページ画像のサポート" + +#: ../../recipes-images.rst:585 02be9acec59f4fa3adf501ba7f63438f +msgid "no" +msgstr "いいえ" + +#: ../../recipes-images.rst:586 018cb20575654c4aa10c951f189464f3 +msgid "ease of use" +msgstr "使いやすさ" + +#: ../../recipes-images.rst:586 c28d925902ab48b389c171a05aca19c6 +msgid "simple, intuitive;" +msgstr "シンプルで直感的" + +#: ../../recipes-images.rst:586 afc67eb6d74a495c9dc4e53eb661ae08 +msgid "" +"simple, intuitive; **usable for all document types** (including images!) " +"after conversion to PDF via :meth:`Document.convert_to_pdf`" +msgstr "" +"シンプルで直感的; :meth:`Document.convert_to_pdf` " +"を介してPDFに変換後、すべてのドキュメントタイプ(画像を含む!)に使用可能" + +#: ../../recipes-images.rst:592 e4d67c78bd67458c84faeb0c8e6ea5f5 +msgid "" +"Basic code pattern for :meth:`Page.insert_image`. **Exactly one** of the " +"parameters **filename / stream / pixmap** must be given, if not re-" +"inserting an existing image::" +msgstr "" +":meth:`Page.insert_image` の基本的なコードパターン。 **filename / stream / pixmap** " +"のうち、1つだけを指定する必要があります(既存の画像を再挿入しない場合)::" + +#: ../../recipes-images.rst:607 a7bbf44ff07e494bb6788af779c4f19f +msgid "" +"Basic code pattern for :meth:`Page.show_pdf_page`. Source and target PDF " +"must be different :ref:`Document` objects (but may be opened from the " +"same file)::" +msgstr "" +":meth:`Page.show_pdf_page` の基本的なコードパターン。ソースとターゲットのPDFは異なる :ref:`Document`" +" オブジェクトである必要があります(ただし、同じファイルから開くこともできます)::" + +#: ../../recipes-images.rst:623 247c3cb945de42e3977137d8975573b2 +msgid "How to Use Pixmaps: Checking Text Visibility" +msgstr "ピクスマップの使用方法:テキストの表示可否の確認" + +#: ../../recipes-images.rst:625 265feb286a04491a9466ccd75f125d7a +msgid "" +"Whether or not a given piece of text is actually visible on a page " +"depends on a number of factors:" +msgstr "特定のテキストが実際にページ上で表示されるかどうかは、いくつかの要因に依存します:" + +#: ../../recipes-images.rst:627 cb95f70cf5474c549b283bb06a5b637d +msgid "" +"Text is not covered by another object but may have the same color as the " +"background i.e., white-on-white etc." +msgstr "テキストは他のオブジェクトによって隠されていないが、背景と同じ色を持っている場合があります。例えば、白文字が白地になっているなどです。" + +#: ../../recipes-images.rst:628 d8c9eec28274456bbae7dfd39049394b +msgid "" +"Text may be covered by an image or vector graphics. Detecting this is an " +"important capability, for example to uncover badly anonymized legal " +"documents." +msgstr "テキストは画像やベクトルグラフィックスによって隠されている場合があります。これを検出することは重要な機能であり、例えば不適切に匿名化された法的文書を解明するために使用されます。" + +#: ../../recipes-images.rst:629 54573a8eea4e421ca5e82c476f55b0e9 +msgid "" +"Text is created hidden. This technique is usually used by OCR tools to " +"store the recognized text in an invisible layer on the page." +msgstr "テキストが非表示に作成される場合があります。これは通常、OCRツールが認識されたテキストをページ上の非表示レイヤーに保存するために使用されます。" + +#: ../../recipes-images.rst:631 421ec6799ba94504a184bb6003eeeac8 +msgid "" +"The following shows how to detect situation 1. above, or situation 2. if " +"the covering object is unicolor::" +msgstr "以下では、1.の状況を検出する方法、または2.の状況を検出する方法(カバーしているオブジェクトが単色である場合)を示します::" + +#: ../../recipes-images.rst:645 a958215274f14ffc9a1039c6445e57ec +msgid "" +"Method :meth:`Pixmap.color_topusage` returns a tuple `(ratio, pixel)` " +"where 0 < ratio <= 1 and *pixel* is the pixel value of the color. Please " +"note that we create a **pixmap only once**. This can save a lot of " +"processing time if there are multiple hit rectangles." +msgstr "" +"メソッド :meth:`Pixmap.color_topusage` は、タプル `(ratio, pixel)` " +"(比率、ピクセル)を返します。ここで、0 < ratio <= " +"1であり、pixelは色のピクセル値です。複数のヒット矩形がある場合、ピクスマップを1回だけ作成することに注意してください。これにより、処理時間を大幅に節約できます。" + +#: ../../recipes-images.rst:647 675626a41d9f4ee49cee5bb637261c3f +msgid "" +"The logic of the above code is: If the needle's rectangle is (\"almost\":" +" > 95%) unicolor, then the text cannot be visible. A typical result for " +"visible text returns the color of the background (mostly white) and a " +"ratio around 0.7 to 0.8, for example `(0.685, b'\\xff\\xff\\xff')`." +msgstr "" +"上記のコードのロジックは次の通りです:もしニードルの矩形が「ほぼ」(95%以上)単色であれば、テキストは表示されないと判断します。テキストが表示される場合の典型的な結果は、背景の色(主に白)と比率が0.7から0.8程度で返されることがあります。例えば、`(0.685," +" b'xffxffxff')` のような結果が得られます。" + +#: ../../footer.rst:60 20230f8c88394d47a41e985bb5942765 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-journalling.mo b/docs/locales/ja/LC_MESSAGES/recipes-journalling.mo new file mode 100644 index 000000000..93c897003 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-journalling.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-journalling.po b/docs/locales/ja/LC_MESSAGES/recipes-journalling.po new file mode 100644 index 000000000..152923ac0 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-journalling.po @@ -0,0 +1,187 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 1dec65ff7f144a209a6670156424f1bd +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 a2496778c1824e0d8507743948d5699d +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 d11779a821a640a9b5f1a505bdf860e4 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-journalling.rst:7 71340aa0c4f8460eb610827b5dd6a0e2 +msgid "Journalling" +msgstr "ジャーナリング" + +#: ../../recipes-journalling.rst:10 982db193623640f18388d6961fe5fc64 +msgid "" +"Starting with version 1.19.0, journalling is possible when updating PDF " +"documents." +msgstr "バージョン1.19.0から、PDFドキュメントの更新時にジャーナリングが可能になりました。" + +#: ../../recipes-journalling.rst:12 239c97bb5a8f4f329aa3e2fcd8a01db7 +msgid "" +"Journalling is a logging mechanism which permits either **reverting** or " +"**re-applying** changes to a PDF. Similar to LUWs \"Logical Units of " +"Work\" in modern database systems, one can group a set of updates into an" +" \"operation\". In MuPDF journalling, an operation plays the role of a " +"LUW." +msgstr "ジャーナリングは、PDFへの変更を元に戻したり再適用したりするためのログ記録メカニズムです。近代的なデータベースシステムにおける「論理ユニット・オブ・ワーク(LUW)」のように、一連の更新を「操作」としてグループ化することができます。MuPDFジャーナリングでは、操作がLUWの役割を果たします。" + +#: ../../recipes-journalling.rst:14 2e4f6dca544a400cb7dc16a559d9e182 +msgid "" +"In contrast to LUW implementations found in database systems, MuPDF " +"journalling happens on a **per document level**. There is no support for " +"simultaneous updates across multiple PDFs: one would have to establish " +"one's own logic here." +msgstr "データベースシステムで見られるLUWの実装とは異なり、MuPDFジャーナリングはドキュメント単位で行われます。複数のPDFで同時に更新するためのサポートはありません。それぞれのPDFに対して独自のロジックを確立する必要があります。" + +#: ../../recipes-journalling.rst:16 001589c65f8641d5a1bc838871c82e35 +msgid "" +"Journalling must be *enabled* via a document method. Journalling is " +"possible for existing or new documents. Journalling **can be disabled " +"only** by closing the file." +msgstr "ジャーナリングは、ドキュメントメソッドを介して有効にする必要があります。既存のドキュメントまたは新規ドキュメントのジャーナリングが可能です。ファイルを閉じることでのみジャーナリングを無効にすることができます。" + +#: ../../recipes-journalling.rst:17 a4f5223ab97d41c0a62d52cebe7f3f76 +msgid "" +"Once enabled, every change must happen inside an *operation* -- otherwise" +" an exception is raised. An operation is started and stopped via document" +" methods. Updates happening between these two calls form an LUW and can " +"thus collectively be rolled back or re-applied, or, in MuPDF terminology " +"\"undone\" resp. \"redone\"." +msgstr "有効になると、すべての変更は操作の内部で行われる必要があります。そうでない場合、例外が発生します。操作はドキュメントメソッドを介して開始および停止されます。これらの呼び出し間で行われる更新はLUWを形成し、集合的に元に戻すか再適用するために使用できます。MuPDFの用語で言えば、「元に戻す」または「やり直す」ことができます。" + +#: ../../recipes-journalling.rst:18 25ef245260a64a689f1ba395d0478ac2 +msgid "" +"At any point, the journalling status can be queried: whether journalling " +"is active, how many operations have been recorded, whether \"undo\" or " +"\"redo\" is possible, the current position inside the journal, etc." +msgstr "いつでも、ジャーナリングの状態をクエリできます。ジャーナリングがアクティブかどうか、いくつの操作が記録されたか、元に戻すかやり直すかが可能か、ジャーナル内の現在位置などがわかります。" + +#: ../../recipes-journalling.rst:19 892e3b69463b47138a6b723746d7696b +msgid "" +"The journal can be **saved to** or **loaded from** a file. These are " +"document methods." +msgstr "ジャーナルはファイルに保存またはロードできます。これらはドキュメントメソッドです。" + +#: ../../recipes-journalling.rst:20 cdc871b87c994a95b38a0893539bf950 +msgid "" +"When loading a journal file, compatibility with the document is checked " +"and journalling is automatically enabled upon success." +msgstr "ジャーナルファイルをロードする際には、ドキュメントとの互換性がチェックされ、成功した場合に自動的にジャーナリングが有効になります。" + +#: ../../recipes-journalling.rst:21 1196a86518ec42bca06599556dfdd95a +msgid "" +"For an **existing** PDF being journalled, a special new save method is " +"available: :meth:`Document.save_snapshot`. This performs a special " +"incremental save that includes all journalled updates so far. If its " +"journal is saved at the same time (immediately after the document " +"snapshot), then document and journal are in sync and can later on be used" +" together to undo or redo operations or to continue journalled updates --" +" just as if there had been no interruption." +msgstr "" +"ジャーナリングされている既存のPDFに対しては、特別な新しい保存メソッドが利用可能です。:meth:`Document.save_snapshot`" +" " +"これにより、これまでにジャーナリングされたすべての更新を含む特別なインクリメンタル保存が行われます。そのジャーナルも同時に保存される場合(ドキュメントスナップショットの直後に保存)、ドキュメントとジャーナルは同期され、後で操作を元に戻したりやり直したりするために一緒に使用できるようになります。まるで中断がなかったかのように。" + +#: ../../recipes-journalling.rst:22 9d858c95d29142a3a507fa019d4e9d37 +msgid "" +"The snapshot PDF is a valid PDF in every aspect and fully usable. If the " +"document is however changed in any way without using its journal file, " +"then a desynchronization will take place and the journal is rendered " +"unusable." +msgstr "スナップショットPDFは、あらゆる側面で有効なPDFであり、完全に使用可能です。ただし、ジャーナルファイルを使用せずにドキュメントが変更された場合、同期が取れなくなり、ジャーナルは利用できなくなります。" + +#: ../../recipes-journalling.rst:23 4e9981f3904e441abb451168eda31059 +msgid "" +"Snapshot files are structured like incremental updates. Nevertheless, the" +" internal journalling logic requires, that saving **must happen to a new " +"file**. So the user should develop a file naming convention to support " +"recognizable relationships between an original PDF, like `original.pdf` " +"and its snapshot sets, like `original-snap1.pdf` / `original-snap1.log`, " +"`original-snap2.pdf` / `original-snap2.log`, etc." +msgstr "" +"スナップショットファイルはインクリメンタルな更新のように構造化されています。ただし、内部のジャーナリングロジックでは、新しいファイルに保存する必要があります。したがって、ユーザーはオリジナルのPDF(例:" +" `original.pdf` )とそのスナップショットセット(例: `original-snap1.pdf` / `original-" +"snap1.log` 、 `original-snap2.pdf` / `original-snap2.log` " +"など)の間に認識可能な関係をサポートするためのファイル命名規則を開発する必要があります。" + +#: ../../recipes-journalling.rst:26 264672a5144c439aa2688cf062e5a892 +msgid "Example Session 1" +msgstr "例セッション1" + +#: ../../recipes-journalling.rst:27 ../../recipes-journalling.rst:98 +#: 8fc51b450a4641898c023ca78938b76c b070a7f183aa43c1952ff72bd0f73150 +msgid "Description:" +msgstr "説明:" + +#: ../../recipes-journalling.rst:29 47b36e04f5904ef6b8e643c6c2931dc9 +msgid "" +"Make a new PDF and enable journalling. Then add a page and some text " +"lines -- each as a separate operation." +msgstr "新しいPDFを作成し、ジャーナリングを有効にします。次に、ページを追加し、いくつかのテキスト行を別々の操作として追加します。" + +#: ../../recipes-journalling.rst:30 d39c4b27b3da4f7d87244024fe21133e +msgid "" +"Navigate within the journal, undoing and redoing these updates and " +"displaying status and file results::" +msgstr "ジャーナル内を移動し、これらの更新を元に戻したりやり直したりし、状態やファイルの結果を表示します::" + +#: ../../recipes-journalling.rst:97 7f4dff6cd221460f9f0864bb65418846 +msgid "Example Session 2" +msgstr "例セッション2" + +#: ../../recipes-journalling.rst:100 739153cf7c154739af31ab2abb5d842f +msgid "" +"Similar to previous, but after undoing some operations, we now add a " +"different update. This will cause:" +msgstr "前回と同様ですが、いくつかの操作を元に戻した後、異なる更新を追加します。これにより、次のことが起こります:" + +#: ../../recipes-journalling.rst:102 d1dfb6a4d5d948b6981dc9765882191b +msgid "permanent removal of the undone journal entries" +msgstr "元に戻されたジャーナルエントリが永久に削除されます。" + +#: ../../recipes-journalling.rst:103 19eab643d86a4d72b86f188ae3dd2592 +msgid "the new update operation will become the new last entry." +msgstr "新しい更新操作は新しい最後のエントリになります。" + +#: ../../footer.rst:60 187a959e74914bfe8f8d8164226852bd +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-low-level-interfaces.mo b/docs/locales/ja/LC_MESSAGES/recipes-low-level-interfaces.mo new file mode 100644 index 000000000..d6f38323d Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-low-level-interfaces.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-low-level-interfaces.po b/docs/locales/ja/LC_MESSAGES/recipes-low-level-interfaces.po new file mode 100644 index 000000000..b9d33bef1 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-low-level-interfaces.po @@ -0,0 +1,584 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 7f129fba564145f8954ab3f7e226f0ec +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 c67d240759fd40d187a0308599adeca9 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 076a135f5e7c4cc5bd9192d710ce8cdb +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-low-level-interfaces.rst:7 4f7ca622e5254dcb820080c8ec29fe16 +msgid "Low-Level Interfaces" +msgstr "低レベルインターフェース" + +#: ../../recipes-low-level-interfaces.rst:10 1fea1e3b7f2f4f31a3c902bf61e962c9 +msgid "" +"Numerous methods are available to access and manipulate PDF files on a " +"fairly low level. Admittedly, a clear distinction between \"low level\" " +"and \"normal\" functionality is not always possible or subject to " +"personal taste." +msgstr "PDFファイルにアクセスして操作するための多くのメソッドが低レベルで利用可能です。正直なところ、「低レベル」機能と「通常」機能の明確な区別は常にできるわけではなく、個人の好みによるところもあります。" + +#: ../../recipes-low-level-interfaces.rst:12 1de25fa4e10d4576a94ca8b7096618bd +msgid "" +"It also may happen, that functionality previously deemed low-level is " +"later on assessed as being part of the normal interface. This has " +"happened in v1.14.0 for the class :ref:`Tools` - you now find it as an " +"item in the Classes chapter." +msgstr "" +"また、以前は低レベルと考えられていた機能が後に通常のインターフェースの一部として評価されることもあります。例えば、バージョン1.14.0ではクラス" +" :ref:`Tools` についてそのようなことが起きており、現在では「Classes」のセクションで見つけることができます。" + +#: ../../recipes-low-level-interfaces.rst:14 1007d8a4ff5c4147805c1dc892b9b29c +msgid "" +"It is a matter of documentation only in which chapter of the " +"documentation you find what you are looking for. Everything is available " +"and always via the same interface." +msgstr "何を探しているかに関しては、ドキュメントのどのセクションにあるかはドキュメンテーションのみの問題です。すべての情報は同じインターフェースを介して常に利用可能です。" + +#: ../../recipes-low-level-interfaces.rst:19 f087f3a96eec4b1a9dd86d6e8a3cffd7 +msgid "How to Iterate through the :data:`xref` Table" +msgstr ":data:`xref` テーブルのイテレーション方法" + +#: ../../recipes-low-level-interfaces.rst:20 0027f20fcd894b3f921d205dbf3c6fbb +msgid "" +"A PDF's :data:`xref` table is a list of all objects defined in the file. " +"This table may easily contain many thousands of entries -- the manual " +":ref:`AdobeManual` for example has 127,000 objects. Table entry \"0\" is " +"reserved and must not be touched. The following script loops through the " +":data:`xref` table and prints each object's definition::" +msgstr "" +"PDFの :data:`xref` " +"テーブルはファイル内で定義されているすべてのオブジェクトのリストです。このテーブルには非常に多くのエントリが含まれることがあります - 例えば " +":ref:`AdobeManual` " +"マニュアルには127,000のオブジェクトが含まれています。テーブルのエントリ「0」は予約されており、触れてはいけません" + +#: ../../recipes-low-level-interfaces.rst:32 bf35a4cef5db4592ba20e8879d737aff +msgid "This produces the following output::" +msgstr "これにより、以下の出力が生成されます::" + +#: ../../recipes-low-level-interfaces.rst:72 b83975ea49a84c1a8993fd6918a32c64 +msgid "A PDF object definition is an ordinary ASCII string." +msgstr "PDFオブジェクトの定義は通常のASCII文字列です。" + +#: ../../recipes-low-level-interfaces.rst:77 2cab525052174e5889a8b5eeea539330 +msgid "How to Handle Object Streams" +msgstr "オブジェクトストリームの処理方法" + +#: ../../recipes-low-level-interfaces.rst:78 658c13e1cc6947b09fc237301c2bc678 +msgid "" +"Some object types contain additional data apart from their object " +"definition. Examples are images, fonts, embedded files or commands " +"describing the appearance of a page." +msgstr "オブジェクトストリームの処理方法一部のオブジェクトタイプには、オブジェクト定義以外に追加のデータが含まれています。例として、画像、フォント、埋め込みファイル、またはページの外観を記述するコマンドが挙げられます。" + +#: ../../recipes-low-level-interfaces.rst:80 2796bcc0f1ed4c599541dd675171586e +msgid "" +"Objects of these types are called \"stream objects\". PyMuPDF allows " +"reading an object's stream via method :meth:`Document.xref_stream` with " +"the object's :data:`xref` as an argument. It is also possible to write " +"back a modified version of a stream using :meth:`Document.update_stream`." +msgstr "" +"これらのタイプのオブジェクトは「ストリームオブジェクト」と呼ばれます。PyMuPDFでは、メソッド " +":meth:`Document.xref_stream` を使用して、オブジェクトの :data:`xref` " +"を引数としてオブジェクトのストリームを読み取ることができます。また、:meth:`Document.update_stream` " +"を使用して、ストリームの変更されたバージョンを書き戻すことも可能です。" + +#: ../../recipes-low-level-interfaces.rst:82 3771d4779f8f40dba59160b01c9ee9cf +msgid "" +"Assume that the following snippet wants to read all streams of a PDF for " +"whatever reason::" +msgstr "次のスニペットが、PDFのすべてのストリームを読み取るためのものであると仮定します::" + +#: ../../recipes-low-level-interfaces.rst:91 9c27d6d70d6b40fba9999600e789863f +msgid "" +":meth:`Document.xref_stream` automatically returns a stream decompressed " +"as a bytes object -- and :meth:`Document.update_stream` automatically " +"compresses it if beneficial." +msgstr "" +":meth:`Document.xref_stream` " +"は自動的にバイトオブジェクトとして展開されたストリームを返し、:meth:`Document.update_stream` " +"は必要に応じて自動的に圧縮されます。" + +#: ../../recipes-low-level-interfaces.rst:96 8090153b26c14df4a507877668bc7624 +msgid "How to Handle Page Contents" +msgstr "ページ内容の処理方法" + +#: ../../recipes-low-level-interfaces.rst:97 848d7acefeb245dc8af34902fe0aff24 +msgid "" +"A PDF page can have zero or multiple :data:`contents` objects. These are " +"stream objects describing **what** appears **where** and **how** on a " +"page (like text and images). They are written in a special mini-language " +"described e.g. in chapter \"APPENDIX A - Operator Summary\" on page 643 " +"of the :ref:`AdobeManual`." +msgstr "" +"PDFページにはゼロまたは複数の :data:`contents` " +"オブジェクトが存在できます。これらは、ページ上に何がどこにどのように表示されるかを記述するストリームオブジェクト(テキストや画像など)です。これらは、:ref:`AdobeManual`" +" のページ643の「付録A - オペレーターサマリー」などで説明されている特別なミニ言語で記述されています。" + +#: ../../recipes-low-level-interfaces.rst:99 4188f5c77de44ced870411c31528cec5 +msgid "" +"Every PDF reader application must be able to interpret the contents " +"syntax to reproduce the intended appearance of the page." +msgstr "すべてのPDFリーダーアプリケーションは、コンテンツの構文を解釈してページの意図した表示を再現できる必要があります。" + +#: ../../recipes-low-level-interfaces.rst:101 1f87bc0453d24082b53750397e2d3f74 +msgid "" +"If multiple :data:`contents` objects are provided, they must be " +"interpreted in the specified sequence in exactly the same way as if they " +"were provided as a concatenation of the several." +msgstr "" +"複数の :data:`contents` " +"オブジェクトが提供される場合、それらは複数のコンテンツを連結した場合とまったく同じ方法で、指定された順序で解釈される必要があります。" + +#: ../../recipes-low-level-interfaces.rst:103 c8df9beb4ec84255a6972a53be5fca52 +msgid "" +"There are good technical arguments for having multiple :data:`contents` " +"objects:" +msgstr "複数の :data:`contents` オブジェクトを持つメリットには、次のような良い技術的理由があります:" + +#: ../../recipes-low-level-interfaces.rst:105 98cc88c334df4c1ebefdc905e195d627 +msgid "" +"It is a lot easier and faster to just add new :data:`contents` objects " +"than maintaining a single big one (which entails reading, decompressing, " +"modifying, recompressing, and rewriting it for each change)." +msgstr "" +"新しい :data:`contents` " +"オブジェクトを追加するだけで、単一の大きなコンテンツオブジェクトを維持するよりもはるかに簡単で高速です(各変更のたびに読み取り、展開、変更、再圧縮、書き直しが必要です)。" + +#: ../../recipes-low-level-interfaces.rst:106 f2458f2703db48d58afa2677cb1a6f0b +msgid "" +"When working with incremental updates, a modified big :data:`contents` " +"object will bloat the update delta and can thus easily negate the " +"efficiency of incremental saves." +msgstr "" +"増分更新を使用する場合、修正された大きな :data:`contents` " +"オブジェクトは更新デルタを膨らませ、増分保存の効率を簡単に打ち消す可能性があります。" + +#: ../../recipes-low-level-interfaces.rst:108 774608ae29314d87af2590cc55b0b96a +msgid "" +"For example, PyMuPDF adds new, small :data:`contents` objects in methods " +":meth:`Page.insert_image`, :meth:`Page.show_pdf_page` and the " +":ref:`Shape` methods." +msgstr "" +"例えば、PyMuPDFは :meth:`Page.insert_image` 、 :meth:`Page.show_pdf_page` 、および" +" :ref:`Shape` メソッドで新しい小さな :data:`contents` オブジェクトを追加します。" + +#: ../../recipes-low-level-interfaces.rst:110 5cc4b562e7f643678dec3abbd5c901af +msgid "" +"However, there are also situations when a **single** :data:`contents` " +"object is beneficial: it is easier to interpret and more compressible " +"than multiple smaller ones." +msgstr "" +"ただし、単一の :data:`contents` " +"オブジェクトが有益な状況もあります。それは複数の小さなオブジェクトよりも解釈が容易で、圧縮が効果的です。" + +#: ../../recipes-low-level-interfaces.rst:112 f8d8be1710d74bb9b417dc9948c862c9 +msgid "Here are two ways of combining multiple contents of a page::" +msgstr "以下は、ページの複数のコンテンツを組み合わせる2つの方法です:" + +#: ../../recipes-low-level-interfaces.rst:124 0016b0cc7db9493c9b5bfecfdd25e99c +msgid "" +"The clean function :meth:`Page.clean_contents` does a lot more than just " +"glueing :data:`contents` objects: it also corrects and optimizes the PDF " +"operator syntax of the page and removes any inconsistencies with the " +"page's object definition." +msgstr "" +":meth:`Page.clean_contents` は、:data:`contents` " +"オブジェクトを結合するだけでなく、ページのPDFオペレータ構文を修正し最適化し、ページのオブジェクト定義との整合性を保つためにも役立ちます。" + +#: ../../recipes-low-level-interfaces.rst:129 8fd84d6a4af441adb8fcf1c77aa5b0bd +msgid "How to Access the PDF Catalog" +msgstr "PDFカタログへのアクセス方法" + +#: ../../recipes-low-level-interfaces.rst:130 2a2e41fa0c34401ea22c6f2ed40a89e7 +msgid "" +"This is a central (\"root\") object of a PDF. It serves as a starting " +"point to reach important other objects and it also contains some global " +"options for the PDF::" +msgstr "これはPDFの中心的な(\"ルート\")オブジェクトです。これは重要な他のオブジェクトに到達するための出発点として機能し、PDFのいくつかのグローバルオプションも含まれています::" + +#: ../../recipes-low-level-interfaces.rst:146 5b118633d9a341a1a95aed016917f5d2 +msgid "" +"Indentation, line breaks and comments are inserted here for clarification" +" purposes only and will not normally appear. For more information on the " +"PDF catalog see section 7.7.2 on page 71 of the :ref:`AdobeManual`." +msgstr "" +"字下げ、改行、コメントは説明のために挿入されており、通常は表示されません。PDFカタログの詳細については、 :ref:`AdobeManual` " +"のページ71のセクション7.7.2を参照してください。" + +#: ../../recipes-low-level-interfaces.rst:151 f6b9f5c2736444a09a44be0af1efd3ef +msgid "How to Access the PDF File Trailer" +msgstr "PDFファイルトレーラーへのアクセス方法" + +#: ../../recipes-low-level-interfaces.rst:152 4cb33f7607ff4b7da3a368b706d7f85d +msgid "" +"The trailer of a PDF file is a :data:`dictionary` located towards the end" +" of the file. It contains special objects, and pointers to important " +"other information. See :ref:`AdobeManual` p. 42. Here is an overview:" +msgstr "" +"PDFファイルのトレーラーは、ファイルの終わりに位置する :data:`dictionary` " +"です。特別なオブジェクトと、重要な他の情報へのポインタが含まれています。:ref:`AdobeManual` を参照してください(p. " +"42)。以下に概要を示します:" + +#: ../../recipes-low-level-interfaces.rst:155 6a08c29e38b14b4f8780edbf2bbd57ed +msgid "**Key**" +msgstr "**キー**" + +#: ../../recipes-low-level-interfaces.rst:155 58913ef5bc0e42449995f9ddc470a9b6 +msgid "**Type**" +msgstr "**タイプ**" + +#: ../../recipes-low-level-interfaces.rst:155 6bd41f1c095b4146a4be333408ae4d28 +msgid "**Value**" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:157 4f3f1be3c87948899e54efc916c7ef18 +msgid "Size" +msgstr "**値**" + +#: ../../recipes-low-level-interfaces.rst:157 +#: ../../recipes-low-level-interfaces.rst:158 +#: ../../recipes-low-level-interfaces.rst:163 81a6a91f16444f15bdeeaaf278f1bcd1 +#: 8682c7b530344453ade63634c104ed4f f11a36ccb1b94cd3ae600b685375afff +msgid "int" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:157 c285913e7e4943f9867315f2d6712e7f +msgid "Number of entries in the cross-reference table + 1." +msgstr "クロスリファレンステーブル内のエントリ数 + 1 の数値。" + +#: ../../recipes-low-level-interfaces.rst:158 c49bb116e66c4fca9cdeb500ae675247 +msgid "Prev" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:158 28fbdab151c14d60acbe435e09d03829 +msgid "Offset to previous :data:`xref` section (indicates incremental updates)." +msgstr "前の :data:`xref` セクションへのオフセット(増分更新を示す)。" + +#: ../../recipes-low-level-interfaces.rst:159 96893c3d42a943ffaa753c2af598ddda +msgid "Root" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:159 +#: ../../recipes-low-level-interfaces.rst:160 +#: ../../recipes-low-level-interfaces.rst:161 17e32472677349bfa94d61bc793d8475 +#: 2d049d53c6cd4ae8a2bc07f1cbcf53bb ce33dca31774448c9bdd1e213de5d090 +msgid "dictionary" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:159 fecd0772f75c49fdb36a4962c490461e +msgid "(indirect) Pointer to the catalog. See previous section." +msgstr "(間接的) カタログへのポインタ。前のセクションを参照してください。" + +#: ../../recipes-low-level-interfaces.rst:160 b7b167190b0149ce9bdd9ef671d03ae2 +msgid "Encrypt" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:160 aa8f56b6637c4a7ca41846fa1ebf6eca +msgid "Pointer to encryption object (encrypted files only)." +msgstr "(暗号化されたファイルのみ) 暗号化オブジェクトへのポインタ。" + +#: ../../recipes-low-level-interfaces.rst:161 d9be1827baa242068f64cd1307f9bd58 +msgid "Info" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:161 597be1c0e13342c694e56f1faf8dcca5 +msgid "(indirect) Pointer to information (metadata)." +msgstr "(間接的) 情報(メタデータ)へのポインタ。" + +#: ../../recipes-low-level-interfaces.rst:162 66abcc5c41b044d7ba24bef4374f4cba +msgid "ID" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:162 e6d762fa8f38487992b7f2ddc8737388 +msgid "array" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:162 ae4de832cc2c4e1cb9943cb9a61596da +msgid "File identifier consisting of two byte strings." +msgstr "2つのバイト文字列からなるファイル識別子。" + +#: ../../recipes-low-level-interfaces.rst:163 fe39430132414ae0b7367e63724369e0 +msgid "XRefStm" +msgstr "" + +#: ../../recipes-low-level-interfaces.rst:163 2f5419e850a744d790f2b70a0e221c5a +msgid "Offset of a cross-reference stream. See :ref:`AdobeManual` p. 49." +msgstr "クロスリファレンスストリームのオフセット。:ref:`AdobeManual` を参照してください(p. 49)。" + +#: ../../recipes-low-level-interfaces.rst:166 f658e5b17a384d9c90104cba97aa85ff +msgid "" +"Access this information via PyMuPDF with :meth:`Document.pdf_trailer` or," +" equivalently, via :meth:`Document.xref_object` using -1 instead of a " +"valid :data:`xref` number." +msgstr "" +"これらの情報には、PyMuPDFを使用して :meth:`Document.pdf_trailer` または、同等の " +":meth:`Document.xref_object` を使用して -1 の代わりに有効な :data:`xref` " +"番号を指定することでアクセスします。" + +#: ../../recipes-low-level-interfaces.rst:187 98759bbd8d114604a80fbf742832adba +msgid "How to Access XML Metadata" +msgstr "XMLメタデータへのアクセス方法" + +#: ../../recipes-low-level-interfaces.rst:188 a4c560abb05646a6b21fccc292008ea0 +msgid "" +"A PDF may contain XML metadata in addition to the standard metadata " +"format. In fact, most PDF viewer or modification software adds this type " +"of information when saving the PDF (Adobe, Nitro PDF, PDF-XChange, etc.)." +msgstr "" +"PDFには、標準のメタデータ形式に加えてXMLメタデータが含まれている場合があります。実際、ほとんどのPDFビューアや編集ソフトウェアは、PDFを保存する際に(Adobe、Nitro" +" PDF、PDF-XChangeなど)この種の情報を追加します。" + +#: ../../recipes-low-level-interfaces.rst:190 a02f8f95ed834b4386f05f01b7baf217 +msgid "" +"PyMuPDF has no way to **interpret or change** this information directly, " +"because it contains no XML features. XML metadata is however stored as a " +":data:`stream` object, so it can be read, modified with appropriate " +"software and written back." +msgstr "" +"しかし、PyMuPDFはXMLの機能を持たないため、この情報を直接解釈または変更する方法はありません。ただし、XMLメタデータは " +":data:`stream` オブジェクトとして格納されているため、適切なソフトウェアで読み取り、変更し、書き戻すことができます。" + +#: ../../recipes-low-level-interfaces.rst:202 763734a1837d4847a3c820225381f4cb +msgid "" +"Using some XML package, the XML data can be interpreted and / or modified" +" and then stored back. The following also works, if the PDF previously " +"had no XML metadata::" +msgstr "あるXMLパッケージを使用して、XMLデータを解釈および/または変更し、それを保存し直すことができます。次の方法もPDFに以前にXMLメタデータがない場合にも機能します::" + +#: ../../recipes-low-level-interfaces.rst:213 b484e4babb6b4821a2d4ca50b936f40b +msgid "How to Extend PDF Metadata" +msgstr "PDFメタデータの拡張方法" + +#: ../../recipes-low-level-interfaces.rst:214 bd8ade0701494f0aa8d64389ba8c01db +msgid "" +"Attribute :attr:`Document.metadata` is designed so it works for all " +":ref:`supported document types` in the same way: it" +" is a Python dictionary with a **fixed set of key-value pairs**. " +"Correspondingly, :meth:`Document.set_metadata` only accepts standard " +"keys." +msgstr "" +"属性 :attr:`Document.metadata` は、すべての :ref:`サポートされている " +"` " +"ドキュメントタイプで同じ方法で機能するように設計されています。これは、固定されたキーと値のセットを持つPython辞書です。同様に、:meth:`Document.set_metadata`" +" は標準のキーのみを受け入れます。" + +#: ../../recipes-low-level-interfaces.rst:216 2ed9e5740749448898a94bedd85fa94f +msgid "" +"However, PDFs may contain items not accessible like this. Also, there may" +" be reasons to store additional information, like copyrights. Here is a " +"way to handle **arbitrary metadata items** by using PyMuPDF low-level " +"functions." +msgstr "しかし、PDFにはこのようにアクセスできない項目が含まれている場合があります。また、著作権などの追加情報を保存する理由もあるかもしれません。以下は、PyMuPDFの低レベル関数を使用して任意のメタデータ項目を処理する方法です。" + +#: ../../recipes-low-level-interfaces.rst:218 f2e1b634d30a481da6cb600b52f2dccc +msgid "As an example, look at this standard metadata output of some PDF::" +msgstr "例として、次のPDFの標準メタデータ出力をご覧ください::" + +#: ../../recipes-low-level-interfaces.rst:237 ae127729cd53409cb04f9ebc390699c4 +msgid "" +"Use the following code to see **all items** stored in the metadata " +"object::" +msgstr "以下のコードを使用して、メタデータオブジェクトに保存されているすべてのアイテムを表示します::" + +#: ../../recipes-low-level-interfaces.rst:265 f219d010f8094f62934815b489d0dc25 +msgid "" +"*Vice versa*, you can also **store private metadata items** in a PDF. It " +"is your responsibility to make sure that these items conform to PDF " +"specifications - especially they must be (unicode) strings. Consult " +"section 14.3 (p. 548) of the :ref:`AdobeManual` for details and caveats::" +msgstr "" +"逆に、PDFにはプライベートなメタデータアイテムを保存することもできます。これらのアイテムがPDF仕様に準拠していることを確認する責任はあなたにあります。特に、これらは(Unicode)文字列である必要があります。詳細や注意事項については、Adobe" +" PDFリファレンスのセクション14.3(p. 548)を参照してください::" + +#: ../../recipes-low-level-interfaces.rst:287 2dc3761486a541f6821711dfd609da05 +msgid "" +"To delete selected keys, use `doc.xref_set_key(xref, \"mykey\", " +"\"null\")`. As explained in the next section, string \"null\" is the PDF " +"equivalent to Python's `None`. A key with that value will be treated as " +"not being specified -- and physically removed in garbage collections." +msgstr "" +"選択したキーを削除するには、`doc.xref_set_key(xref, \"mykey\", \"null\")` " +"を使用します。次のセクションで説明されているように、文字列 `\"null\"` はPDFのバージョンでPythonの `None` " +"に相当します。その値のキーは指定されていないものとして扱われ、ガベージコレクションで物理的に削除されます。" + +#: ../../recipes-low-level-interfaces.rst:292 b8e1f01138dd4a638ffd1e3689508d1c +msgid "How to Read and Update PDF Objects" +msgstr "PDFオブジェクトの読み取りと更新方法" + +#: ../../recipes-low-level-interfaces.rst:297 c7c33727bafc478ca88a1cdae0ec859b +msgid "" +"There also exist granular, elegant ways to access and manipulate selected" +" PDF :data:`dictionary` keys." +msgstr "選択したPDF :data:`dictionary` キーにアクセスし、操作するための粒状で洗練された方法も存在します。" + +#: ../../recipes-low-level-interfaces.rst:299 04aa0e1d70b944b3a904ea382f4f0c30 +msgid "" +":meth:`Document.xref_get_keys` returns the PDF keys of the object at " +":data:`xref`::" +msgstr ":meth:`Document.xref_get_keys` は、:data:`xref` のオブジェクトのPDFキーを返します::" + +#: ../../recipes-low-level-interfaces.rst:308 ec3415a9ffc54434860c48bc58bd55d9 +msgid "Compare with the full object definition::" +msgstr "完全なオブジェクト定義と比較してください::" + +#: ../../recipes-low-level-interfaces.rst:319 4ee823aae30f4627b0f7c02f4267bfdc +msgid "" +"Single keys can also be accessed directly via " +":meth:`Document.xref_get_key`. The value **always is a string** together " +"with type information, that helps with interpreting it::" +msgstr "" +"単一のキーは、 :meth:`Document.xref_get_key` " +"を介して直接アクセスすることもできます。値は常に文字列であり、それを解釈するのに役立つタイプ情報が含まれています::" + +#: ../../recipes-low-level-interfaces.rst:324 5b603693f93c4e03a5111618579b46ce +msgid "Here is a full listing of the above page keys::" +msgstr "以下は、上記のページキーの完全な一覧です::" + +#: ../../recipes-low-level-interfaces.rst:335 f13dcde484e24d32bc88ea9b8cf371dd +msgid "" +"An undefined key inquiry returns `('null', 'null')` -- PDF object type " +"`null` corresponds to `None` in Python. Similar for the booleans `true` " +"and `false`." +msgstr "" +"未定義のキーの問い合わせは、`('null', 'null')` を返します - PDFオブジェクトタイプ `null` はPythonの " +"`None` に対応します。 `true` および `false` も同様です。" + +#: ../../recipes-low-level-interfaces.rst:336 14eee493b6bb4c06b2afcbdade3ca9e7 +msgid "" +"Let us add a new key to the page definition that sets its rotation to 90 " +"degrees (you are aware that there actually exists " +":meth:`Page.set_rotation` for this?)::" +msgstr "" +"ページ定義に新しいキーを追加して、その回転角を90度に設定しましょう(実際には :meth:`Page.set_rotation` " +"が存在することを知っているかと思いますが、そうですか?)::" + +#: ../../recipes-low-level-interfaces.rst:351 b9ecea1e87e7421faea4ae09003de94f +msgid "" +"This method can also be used to remove a key from the :data:`xref` " +"dictionary by setting its value to `null`: The following will remove the " +"rotation specification from the page: `doc.xref_set_key(page.xref, " +"\"Rotate\", \"null\")`. Similarly, to remove all links, annotations and " +"fields from a page, use `doc.xref_set_key(page.xref, \"Annots\", " +"\"null\")`. Because `Annots` by definition is an array, setting en empty " +"array with the statement `doc.xref_set_key(page.xref, \"Annots\", " +"\"[]\")` would do the same job in this case." +msgstr "" +"このメソッドは、値を `null` に設定することで :data:`xref` " +"辞書からキーを削除するためにも使用できます:次の方法は、ページから回転指定を削除します: `doc.xref_set_key(page.xref," +" \"Rotate\", \"null\")` 。同様に、ページからすべてのリンク、注釈、およびフィールドを削除するには、 " +"`doc.xref_set_key(page.xref, \"Annots\", \"null\")` を使用します。`Annots` " +"は定義上配列であるため、`doc.xref_set_key(page.xref, \"Annots\", \"[]\")` " +"という文で空の配列を設定すると、同じ操作が実行されます。" + +#: ../../recipes-low-level-interfaces.rst:353 25ee5ad120ca43a7af7fb238b8ee91cd +msgid "" +"PDF dictionaries can be hierarchically nested. In the following page " +"object definition both, `Font` and `XObject` are subdictionaries of " +"`Resources`::" +msgstr "" +"PDF辞書は階層的に入れ子にすることができます。次のページオブジェクト定義では、`Font` と `XObject` は両方とも " +"`Resources` のサブディクショナリです::" + +#: ../../recipes-low-level-interfaces.rst:373 166311ecb59d42579d5324506515d2e5 +msgid "" +"The above situation **is supported** by methods " +":meth:`Document.xref_set_key` and :meth:`Document.xref_get_key`: use a " +"path-like notation to point at the required key. For example, to retrieve" +" the value of key `Im1` above, specify the complete chain of dictionaries" +" \"above\" it in the key argument: `\"Resources/XObject/Im1\"`::" +msgstr "" +"上記の状況は、メソッド :meth:`Document.xref_set_key` と :meth:`Document.xref_get_key`" +" によってサポートされています。必要なキーを指すために、パスのような表記法を使用します。たとえば、上記の `Im1` " +"キーの値を取得するには、キー引数にその上位の辞書の完全なチェーン `\"Resources/XObject/Im1\"` を指定します::" + +#: ../../recipes-low-level-interfaces.rst:378 decc969788824b2f905d28ed9ea37c97 +msgid "" +"The path notation can also be used to **directly set a value**: use the " +"following to let `Im1` point to a different object::" +msgstr "パス表記法は、値を直接設定するためにも使用できます。以下を使用して、`Im1` を異なるオブジェクトを指すように設定します::" + +#: ../../recipes-low-level-interfaces.rst:399 8134074039144ebca0e928b428cf7ad4 +msgid "" +"Be aware, that **no semantic checks** whatsoever will take place here: if" +" the PDF has no xref 9999, it won't be detected at this point." +msgstr "ここでは、何の意味的なチェックも行われないことに注意してください。PDFにxref 9999が存在しない場合、この段階では検出されません。" + +#: ../../recipes-low-level-interfaces.rst:401 d717c7f21a174300a90bb992653ab939 +msgid "" +"If a key does not exist, it will be created by setting its value. " +"Moreover, if any intermediate keys do not exist either, they will also be" +" created as necessary. The following creates an array `D` several levels " +"below the existing dictionary `A`. Intermediate dictionaries `B` and `C` " +"are automatically created::" +msgstr "" +"キーが存在しない場合、その値を設定することで新しく作成されます。さらに、中間のキーが存在しない場合も、必要に応じて自動的に作成されます。次の例では、既存の辞書" +" `A` の下にいくつかの階層下に配列 `D` を作成しています。中間の辞書 `B` と `C` も自動的に作成されます::" + +#: ../../recipes-low-level-interfaces.rst:421 79d1fc2866c6405e852e7bc1d59c5f72 +msgid "" +"When setting key values, basic **PDF syntax checking** will be done by " +"MuPDF. For example, new keys can only be created **below a dictionary**. " +"The following tries to create some new string item `E` below the " +"previously created array `D`::" +msgstr "" +"キーの値を設定する際には、MuPDFによって基本的なPDF構文のチェックが行われます。たとえば、新しいキーは辞書の下にのみ作成できます。次の例では、以前に作成された配列" +" `D` の下に新しい文字列アイテム `E` を作成しようとしています:" + +#: ../../recipes-low-level-interfaces.rst:429 891aef9e0fe9425881312a1752d59e9f +msgid "" +"It is also **not possible**, to create a key if some higher level key is " +"an **\"indirect\"** object, i.e. an xref. In other words, xrefs can only " +"be modified directly and not implicitly via other objects referencing " +"them::" +msgstr "同様に、上位のキーが「間接」オブジェクト、つまりxrefである場合、キーを作成することはできません。言い換えれば、xrefは直接的に変更できるが、それを参照する他のオブジェクトを通じて暗黙的に変更することはできません::" + +#: ../../recipes-low-level-interfaces.rst:442 cb6c84fd491f4c3eaab967eb4ab22326 +msgid "" +"These are expert functions! There are no validations as to whether valid " +"PDF objects, xrefs, etc. are specified. As with other low-level methods " +"there is the risk to render the PDF, or parts of it unusable." +msgstr "これらは専門家向けの機能です!有効なPDFオブジェクトやxrefなどが指定されているかどうかの検証はありません。他の低レベルメソッドと同様に、PDF全体またはその一部を利用不能にする可能性があるため注意が必要です。" + +#: ../../footer.rst:60 4695c17edec440ea85382cf6f78ad072 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-multiprocessing.mo b/docs/locales/ja/LC_MESSAGES/recipes-multiprocessing.mo new file mode 100644 index 000000000..c8868512e Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-multiprocessing.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-multiprocessing.po b/docs/locales/ja/LC_MESSAGES/recipes-multiprocessing.po new file mode 100644 index 000000000..048422138 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-multiprocessing.po @@ -0,0 +1,110 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 05b9195b227d4fbc825fc852831e4554 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 d1146968098a44928b810551252f816f +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 0072b638d978499db19c4b4f4f7a3707 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-multiprocessing.rst:17 0f0b2d7d9f684b409ff0e5784ca866ac +msgid "Multiprocessing" +msgstr "マルチプロセッシング" + +#: ../../recipes-multiprocessing.rst:19 3062a6b9aec34db2aec0a9bb17a0ef61 +msgid "" +"|PyMuPDF| does not support running on multiple threads - doing so may " +"cause incorrect behaviour or even crash Python itself." +msgstr "" + +#: ../../recipes-multiprocessing.rst:21 74aa37e95d05468c982d841a2f5e2366 +msgid "" +"However, there is the option to use :title:`Python's` *multiprocessing* " +"module in a variety of ways." +msgstr "ただし、Pythonのmultiprocessingモジュールをさまざまな方法で使用するオプションがあります。" + +#: ../../recipes-multiprocessing.rst:23 83777a97e9db4e779e16756b9eb3ee39 +msgid "" +"If you are looking to speed up page-oriented processing for a large " +"document, use this script as a starting point. It should be at least " +"twice as fast as the corresponding sequential processing." +msgstr "大きなドキュメントのページ指向処理を高速化することを検討している場合は、このスクリプトを出発点として使用してください。対応する順次処理よりも少なくとも2倍の速さで動作するはずです。" + +#: ../../recipes-multiprocessing.rst:26 ../../recipes-multiprocessing.rst:37 +#: 2098918775344f8e9c7de44ba8423fb1 824c4f30793e4c428daf4a27269debd1 +msgid "|toggleStart|" +msgstr "" + +#: ../../recipes-multiprocessing.rst:31 ../../recipes-multiprocessing.rst:42 +#: dd13074c598d44c4b568775518218436 ea94b563d305488a93bcc8650477d412 +msgid "|toggleEnd|" +msgstr "" + +#: ../../recipes-multiprocessing.rst:34 9ad6e6b758c14434a47e7b271aebc806 +msgid "" +"Here is a more complex example involving inter-process communication " +"between a main process (showing a GUI) and a child process doing " +"|PyMuPDF| access to a document." +msgstr "以下は、メインプロセス(GUIを表示)とドキュメントへのPyMuPDFアクセスを行う子プロセスとの間のプロセス間通信を含む、より複雑な例です。" + +#: ../../footer.rst:60 fbfd1487311a41a4acc53de3122dc430 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ ":title:`MuPDF` has no integrated support " +#~ "for threading - calling itself " +#~ "\"thread-agnostic\". While there do exist" +#~ " tricky possibilities to still use " +#~ "threading with :title:`MuPDF`, the baseline" +#~ " consequence for |PyMuPDF| is:" +#~ msgstr "" +#~ ":title:`MuPDF` はスレッディングに対する統合サポートを持っておらず、「スレッドに対して無関心(thread-" +#~ "agnostic)」と呼ばれています。MuPDFとスレッディングを組み合わせて使用するトリッキーな可能性は存在しますが、PyMuPDFの基本的な影響は次のとおりです:" + +#~ msgid "**No Python threading support**." +#~ msgstr "**Pythonスレッディングのサポートはありません。**." + +#~ msgid "" +#~ "Using |PyMuPDF| in a :title:`Python` " +#~ "threading environment will lead to " +#~ "blocking effects for the main thread." +#~ msgstr "|PyMuPDF| を :title:`Python` スレッディング環境で使用すると、メインスレッドでのブロッキング効果が発生します。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-ocr.mo b/docs/locales/ja/LC_MESSAGES/recipes-ocr.mo new file mode 100644 index 000000000..7df4e1e20 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-ocr.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-ocr.po b/docs/locales/ja/LC_MESSAGES/recipes-ocr.po new file mode 100644 index 000000000..1ee2410e6 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-ocr.po @@ -0,0 +1,196 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 8c06718aa6d94a588549b1b5d04439ff +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 53d78f25612a42cea99758721dc13394 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDFは、PDF(およびその他)ドキュメントのデータ抽出、分析、変換、および操作のための高性能なPythonライブラリです。" + +#: ../../header.rst:-1 bffa3c55f0b648508f7e9ef104450bca +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキストの抽出、PDF画像の抽出、PDF変換、PDFテーブル、PDF分割、PDF作成、Pyodide、PyScript" + +#: ../../recipes-ocr.rst:17 c241da654a734566aa9837f159d775f0 +msgid "OCR - Optical Character Recognition" +msgstr "OCR - 光学文字認識" + +#: ../../recipes-ocr.rst:19 0aa275f26c85470caecaed5eecb9df0b +msgid "" +"|PyMuPDF| has integrated support for OCR (Optical Character Recognition)." +" It is possible to use OCR for both, images (via the :ref:`Pixmap` class)" +" and for document pages." +msgstr "" +"|PyMuPDF| にはOCR(光学文字認識)の統合サポートがあります。OCRは、画像( :ref:`Pixmap` " +"クラスを介して)および文書ページの両方に使用できます。" + +#: ../../recipes-ocr.rst:21 51aed55192864e8e92dfdbae828d1b05 + +msgid "" +"The feature is currently based on Tesseract-OCR which must be installed " +"as a separate application -- see the :ref:`installation_ocr`." +msgstr "" +"この機能は現在、 :ref:`installation_ocr` に基づいており、別途アプリケーションとしてインストールする必要があります。インストール手順は、インストールチャプターを参照してください。" + +#: ../../recipes-ocr.rst:24 b1216080a3f54b3c8f76397e1abb6bfb +msgid "How to OCR an Image" +msgstr "画像のOCR方法" + +#: ../../recipes-ocr.rst:25 3246894b688c4472b993c9cb737b849b +msgid "" +"A supported image must first be converted to a :ref:`Pixmap`. The Pixmap " +"can then be saved to a 1-page PDF. This page will look like the original " +"image with the same width and height. It will contain a layer of text as " +"recognized by Tesseract." +msgstr "" +"サポートされている画像はまず :ref:`Pixmap` " +"に変換される必要があります。次に、Pixmapを1ページのPDFに保存できます。このページは、元の画像と同じ幅と高さを持つように見えます。それには、Tesseractによって認識されたテキストのレイヤーが含まれています。" + +#: ../../recipes-ocr.rst:27 d4515bb735c54591a730fae2a142b782 +msgid "" +"The PDF can be generated via one of the methods " +":meth:`Pixmap.pdfocr_save` or :meth:`Pixmap.pdfocr_tobytes`, as a file on" +" disk or as a PDF in memory." +msgstr "" +"PDFは、次のいずれかの方法、つまり :meth:`Pixmap.pdfocr_save` または " +":meth:`Pixmap.pdfocr_tobytes` を使用して、ディスク上のファイルとして、またはメモリ内のPDFとして生成できます。" + +#: ../../recipes-ocr.rst:29 487a184cd0104d00b896c90725d548eb +msgid "" +"The text can be extracted and searched with the usual text extraction and" +" search methods (:meth:`Page.get_text`, :meth:`Page.search_for`, etc.). " +"Please also note the following important facts and prerequisites:" +msgstr "" +"通常のテキスト抽出および検索方法( :meth:`Page.get_text` 、 :meth:`Page.search_for` " +"など)を使用して、テキストを抽出および検索することができます。また、次の重要な事実と前提条件にも注意してください:" + +#: ../../recipes-ocr.rst:31 fea2a323626648cf8e2f358bf60c9790 +msgid "" +"When converting the image to a Pixmap, please confirm that the color " +"space is RGB and alpha is `False` (no transparency). Convert the original" +" Pixmap if necessary." +msgstr "" +"画像をPixmapに変換する際に、カラースペースがRGBであり、alphaが `False` " +"(透明度なし)であることを確認してください。必要に応じて元のPixmapを変換してください。" + +#: ../../recipes-ocr.rst:32 602c256e951449f1a5d1de19052b4e81 +msgid "" +"All text is written as \"hidden\" with Tesseract's own `GlyphLessFont`, a" +" mono-spaced font with metrics comparable to Courier." +msgstr "" +"すべてのテキストは、Tesseractの独自の `GlyphLessFont` " +"で「非表示」として書かれています。これは、Courierに類似したメトリクスを持つ等幅フォントです。" + +#: ../../recipes-ocr.rst:33 cec1c1656b124ccb8c05d4e5faabe5fe +msgid "" +"All text has the properties regular and black (i.e. no bold, no italic, " +"no information about the original fonts)." +msgstr "すべてのテキストは、regularとblackのプロパティを持ちます(太字や斜体はなく、元のフォントに関する情報はありません)。" + +#: ../../recipes-ocr.rst:34 40decd5ed0f641c5a1a1df6f0e46b543 +msgid "" +"Tesseract does not recognize vector graphics (i.e. no drawings / line-" +"art)." +msgstr "Tesseractはベクトルグラフィックスを認識しません(つまり、図面や線画はありません)。" + +#: ../../recipes-ocr.rst:36 92a869d875f440a1823e69cb418dd13c +msgid "This approach is also recommended to OCR a complete scanned PDF:" +msgstr "スキャンされた完全なPDFをOCRするためにも、この手法が推奨されています:" + +#: ../../recipes-ocr.rst:38 66094238af8143488b9393b6174ad73b +msgid "Render each page to a :ref:`Pixmap` with desired resolution" +msgstr "各ページを所望の解像度で :ref:`Pixmap` にレンダリングします" + +#: ../../recipes-ocr.rst:39 117254e7b2ba43c181367c88db16205e +msgid "Append the resulting 1-page PDF to the output PDF" +msgstr "得られた1ページのPDFを出力PDFに追加します" + +#: ../../recipes-ocr.rst:42 d7dece4adeb4437b99e1820d71befa7c +msgid "How to OCR a Document Page" +msgstr "ドキュメントページのOCR方法" + +#: ../../recipes-ocr.rst:43 7c3e4697da8f4603b29acd1ad05093d5 +msgid "" +"Any supported document page can be OCR-ed -- either the complete page or " +"only the image areas on it." +msgstr "サポートされているどのドキュメントページでもOCR処理が可能です。ページ全体、またはその上の画像領域のみを対象にすることができます。" + +#: ../../recipes-ocr.rst:45 ba6c9f5cda8043bf858b00e93aa23ded +msgid "" +"Because optical character recognition is about one thousand times slower " +"than standard text extraction, we make sure to do OCR only once per page " +"and store the result in a :ref:`TextPage`. Using this TextPage for all " +"subsequent extractions and text searches will then happen with " +"|PyMuPDF|'s usual top speed." +msgstr "" +"光学文字認識は通常のテキスト抽出よりも約1000倍遅いため、1ページにつき1回だけOCR処理を行い、その結果をTextPageに保存します。この " +":ref:`TextPage` をすべての後続の抽出とテキスト検索に使用することで、PyMuPDFの通常の高速性で処理が行われます。" + +#: ../../recipes-ocr.rst:47 30ced681346a45b69d752411d4546ef8 +msgid "To OCR a document page, follow this approach:" +msgstr "ドキュメントページをOCRするには、この手順に従ってください:" + +#: ../../recipes-ocr.rst:49 d100d50f78564e60a3c001137881dbe0 +msgid "" +"Determine whether OCR is needed / beneficial at all. A number of criteria" +" can be used for this decision, like:" +msgstr "まず、OCRが全く必要であるか、または有益かどうかを判断します。この決定には、次のような基準を使用できます:" + +#: ../../recipes-ocr.rst:51 d822f6b5f27e4ec8985dabafd5355ded +msgid "page is completely covered by an image" +msgstr "ページが画像で完全にカバーされている" + +#: ../../recipes-ocr.rst:52 e2b83dfda8654688ace9b92549d12d49 +msgid "no text exists on the page" +msgstr "ページにテキストが存在しない" + +#: ../../recipes-ocr.rst:53 92db54fd9afa4d38a9032f18d836b8cf +msgid "thousands of small vector graphics (indicating *simulated* text)" +msgstr "数千の小さなベクトルグラフィックス( *模擬* テキストを示す)" + +#: ../../recipes-ocr.rst:55 83ba5b35db48464f8945a3dabc2e53f0 +msgid "" +"OCR the page and store result in a :ref:`TextPage` object using an " +"instruction like `tp = page.get_textpage_ocr(...)`." +msgstr "" +"ページをOCR処理し、結果を :ref:`TextPage` オブジェクトに保存します。この操作は、 `tp = " +"page.get_textpage_ocr(...)` のような命令を使用して行います。" + +#: ../../recipes-ocr.rst:57 58b836a92fe443749da93c2ff3c9a23e +msgid "" +"Refer to the produced :ref:`TextPage` in all subsequent text extractions " +"and searches via the `textpage=tp` parameter." +msgstr "" +"以降のすべてのテキスト抽出と検索では、 `textpage=tp` パラメータを使用して生成された :ref:`TextPage` " +"を参照してください。" + +#: ../../footer.rst:60 3baf39a2c3104ca786b8b27e9c52e1a8 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは、|version| までのすべてのバージョンをカバーしています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-optional-content.mo b/docs/locales/ja/LC_MESSAGES/recipes-optional-content.mo new file mode 100644 index 000000000..c5ff1f825 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-optional-content.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-optional-content.po b/docs/locales/ja/LC_MESSAGES/recipes-optional-content.po new file mode 100644 index 000000000..9b459fd42 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-optional-content.po @@ -0,0 +1,209 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 b9adf564010b470a971c6291f3680a9f +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 4f641f2c45c0425a8a232bd594ce5a8e +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 9aa457bbf5a14960b09296e9b2e285cc +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-optional-content.rst:7 f386f73d902d4f638a7820107f09b953 +msgid "Optional Content Support" +msgstr "オプションコンテンツのサポート" + +#: ../../recipes-optional-content.rst:9 fcfec01c6f83417597feed152e9cf82e +msgid "" +"This document explains PyMuPDF's support of the PDF concept **\"Optional " +"Content\"**." +msgstr "この文書では、PyMuPDFにおけるPDFコンセプト **「オプションコンテンツ」** のサポートについて説明します。" + +#: ../../recipes-optional-content.rst:12 3f7db4b17b0c4708ba190ca50f3cf65f +msgid "Introduction: The Optional Content Concept" +msgstr "はじめに:オプションコンテンツのコンセプト" + +#: ../../recipes-optional-content.rst:13 1a20a0429dbd4d22b3e438897b87f6ec +msgid "" +"*Optional Content* in PDF is a way to show or hide parts of a document " +"based on certain conditions: Parameters that can be set to ON or to OFF " +"when using a supporting PDF consumer (viewer), or programmatically." +msgstr "PDFのオプションコンテンツは、特定の条件に基づいてドキュメントの一部を表示または非表示にする方法です。これは、サポートするPDFコンシューマ(ビューア)を使用する際にONまたはOFFに設定できるパラメータ、またはプログラムを使用して行います。" + +#: ../../recipes-optional-content.rst:15 aaecb435f0b74040bb7788f47b75ef27 +msgid "" +"This capability is useful in items such as CAD drawings, layered artwork," +" maps, and multi-language documents. Typical uses include showing or " +"hiding details of complex vector graphics like geographical maps, " +"technical devices, architectural designs and similar, including " +"automatically switching between different zooming levels. Other use cases" +" may be to automatically show different detail levels when displaying a " +"document on screen as opposed to printing it." +msgstr "この機能は、CAD図面、レイヤー化されたアートワーク、地図、多言語ドキュメントなどのアイテムで有用です。一般的な用途には、地理的な地図、技術デバイス、建築設計などの複雑なベクトルグラフィックの詳細の表示または非表示、異なるズームレベル間の自動切り替えなどが含まれます。他の用途としては、画面上でドキュメントを表示する際と印刷する際で自動的に異なる詳細レベルを表示することが考えられます。" + +#: ../../recipes-optional-content.rst:17 f07e3ad64d7846e98be13b841e104284 +msgid "" +"Special PDF objects, so-called **Optional Content Groups** (OCGs) are " +"used to define these different *layers* of content." +msgstr "特別なPDFオブジェクトである「オプションコンテンツグループ(OCG)」を使用して、これらの異なるコンテンツのレイヤーを定義します。" + +#: ../../recipes-optional-content.rst:19 8e40a4dd32264311b329d5ef6d1b3feb +msgid "" +"Assigning an OCG to a \"normal\" PDF object (like a text or an image) " +"causes that object to be visible or hidden, depending on the current " +"state of the assigned OCG." +msgstr "「通常の」PDFオブジェクト(テキストや画像など)にOCGを割り当てると、そのオブジェクトは割り当てられたOCGの現在の状態に応じて表示または非表示になります。" + +#: ../../recipes-optional-content.rst:21 55f0e10ade9d4f99abf06a67d6897563 +msgid "" +"To ease definition of the overall configuration of a PDF's Optional " +"Content, OCGs can be organized in higher level groupings, called **OC " +"Configurations**. Each configuration being a collection of OCGs, together" +" with each OCG's desired initial visibility state. Selecting one of these" +" configurations (via the PDF viewer or programmatically) causes a " +"corresponding visibility change of all affected PDF objects throughout " +"the document." +msgstr "PDFのオプションコンテンツの全体的な構成を簡単に定義するために、OCGは高レベルのグループ化である「OC構成」に組織化できます。各構成は、OCGの望ましい初期表示状態とともに、OCGのコレクションです。これらの構成のいずれかを選択する(PDFビューアまたはプログラムを介して)と、ドキュメント全体で影響を受けるすべてのPDFオブジェクトの表示状態が対応するように変更されます。" + +#: ../../recipes-optional-content.rst:23 335985a556c34d438df38487d6cae48b +msgid "Except for the default one, OC Configurations are optional." +msgstr "デフォルト以外のOC構成はオプションです。" + +#: ../../recipes-optional-content.rst:25 fccc4cf720904ea79e6efd0b6034ca87 +msgid "" +"For more explanations and additional background please refer to PDF " +"specification manuals." +msgstr "詳細な説明や追加の背景情報については、PDF仕様マニュアルを参照してください。" + +#: ../../recipes-optional-content.rst:28 c00fd4817a4b47efbf6a64bfa9134f7e +msgid "PyMuPDF Support for PDF Optional Content" +msgstr "PDFオプショナルコンテンツのPyMuPDFサポート" + +#: ../../recipes-optional-content.rst:29 58da3cf082224fbab5a10febc9e9918c +msgid "" +"PyMuPDF offers full support for viewing, defining, changing and deleting " +"Option Content Groups, Configurations, maintaining the assignment of OCGs" +" to PDF objects and programmatically switching between OC Configurations " +"and the visibility states of each single OCG." +msgstr "PyMuPDFは、オプショナルコンテンツグループや構成の表示、定義、変更、削除、OCGのPDFオブジェクトへの割り当ての維持、OC構成間のプログラムによる切り替えや各単一OCGの可視性状態のサポートを完全に提供します。" + +#: ../../recipes-optional-content.rst:32 150d36f9de0441a599f6371982441f3d +msgid "How to Add Optional Content" +msgstr "オプショナルコンテンツの追加方法" + +#: ../../recipes-optional-content.rst:33 73887253f5d24ae7ba64f723a91eaf07 +msgid "" +"This is as simple as adding an Optional Content Group, OCG, to a PDF: " +":meth:`Document.add_ocg`." +msgstr "これは、PDFにオプショナルコンテンツグループ(OCG)を追加するだけの簡単な手順です::meth:`Document.add_ocg`。" + +#: ../../recipes-optional-content.rst:35 bc0c989ada934a9a9e91392eed44e3c6 +msgid "" +"If previously the PDF had no OC support at all, the required setup (like " +"defining the default OC Configuration) will be done at this point " +"automatically." +msgstr "以前にPDFがまったくOCサポートを持っていなかった場合、必要なセットアップ(デフォルトのOC構成の定義など)は自動的に行われます。" + +#: ../../recipes-optional-content.rst:37 cdfece8bcb5842e8a95237834b218484 +msgid "" +"The method returns an :data:`xref` of the created OCG. Use this xref to " +"associate (mark) any PDF object with it, that you want to make dependent " +"on this OCG's state. For example, you can insert an image on a page and " +"refer to the xref like this::" +msgstr "" +"このメソッドは、作成されたOCGの :data:`xref` " +"を返します。このxrefを使用して、このOCGの状態に依存するようにする任意のPDFオブジェクトを関連付け(マーク)します。例えば、ページに画像を挿入し、次のようにxrefを参照できます::" + +#: ../../recipes-optional-content.rst:41 6ffb9b4bd14d4579967827abb6fab656 +msgid "" +"If you want to put an **existing** image under the control of an OCG, you" +" must first find out the image's xref number (called `img_xref` here) and" +" then do `doc.set_oc(img_xref, xref)`. After this, the image will be " +"(in-) visible everywhere throughout the document if the OCG's state is " +"\"ON\", respectively \"OFF\". You can also assign a different OCG with " +"this method." +msgstr "" +"既存の画像をOCGの制御下に配置したい場合、まず画像のxref番号(ここでは `img_xref` と呼びます)を調べ、その後 " +"`doc.set_oc(img_xref, xref)` " +"を行う必要があります。これにより、OCGの状態が「ON」または「OFF」の場合、画像はドキュメント全体で(非)表示になります。また、このメソッドで異なるOCGを割り当てることもできます。" + +#: ../../recipes-optional-content.rst:43 57ddbd4662c64c7da80144e82353dd65 +msgid "To **remove** an OCG from an image, do `doc.set_oc(img_xref, 0)`." +msgstr "画像からOCGを削除するには、 `doc.set_oc(img_xref, 0)` を行います。" + +#: ../../recipes-optional-content.rst:45 7cf89450ea9246beaa5cc79c376ec93c +msgid "" +"One single OCG can be assigned to multiple PDF objects to control their " +"visibility." +msgstr "1つのOCGは複数のPDFオブジェクトに割り当てて、それらの可視性を制御することができます。" + +#: ../../recipes-optional-content.rst:48 7410b55143ef4843a28d68ebad34fd9a +msgid "How to Define Complex Optional Content Conditions" +msgstr "複雑なオプショナルコンテンツ条件の定義方法" + +#: ../../recipes-optional-content.rst:50 896d39eb29204989ade197882ff6a5ce +msgid "" +"Sophisticated logical conditions can be established to address complex " +"visibility needs." +msgstr "複雑な可視性のニーズに対応するために、洗練された論理条件を設定することができます。" + +#: ../../recipes-optional-content.rst:52 a5c986cc1555410ebbc392e3476e93fe +msgid "" +"For example, you might want to create a multi-language document, so the " +"user may switch between languages as required." +msgstr "例えば、ユーザーが必要に応じて言語を切り替えることができるような多言語ドキュメントを作成したいかもしれません。" + +#: ../../recipes-optional-content.rst:54 48cadb3f29334ff5aa265acd08dfdc06 +msgid "Please have a look at `this Jupyter Notebook`_ and execute it as desired." +msgstr "" +"`この Jupyter Notebook `_ " +"をご覧いただき、必要に応じて実行してみてください。" + +#: ../../recipes-optional-content.rst:56 49114e4ceda7440399518471d7dc8eb7 +msgid "" +"Certainly, your requirements may even be more complex and involve " +"multiple OCGs with ON/OFF states that are connected by some kind of " +"logical relationship -- but it should give you an impression of what is " +"possible and how to plan your next steps." +msgstr "もちろん、あなたの要件はさらに複雑で、論理的な関係で接続された複数のON/OFF状態を持つ複数のOCGが関与する可能性がありますが、これは可能なことと次のステップを計画する方法を示すものとなるでしょう。" + +#: ../../footer.rst:60 067fc234c31f4747987f53713303219a +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-stories.mo b/docs/locales/ja/LC_MESSAGES/recipes-stories.mo new file mode 100644 index 000000000..387387307 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-stories.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-stories.po b/docs/locales/ja/LC_MESSAGES/recipes-stories.po new file mode 100644 index 000000000..b0e7b2b4a --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-stories.po @@ -0,0 +1,726 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 c1cb378bf8d4476fb2cf4501913fbf5e +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 3c891d0a0fb04d97936ca58bbdece2de +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 f644d1bc2e234382901d316b4c0cb553 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-stories.rst:18 c45a9555e4e144b9af3b42d2801ac42d +msgid "Stories" +msgstr "ストーリーズ" + +#: ../../recipes-stories.rst:20 61fc6f658db84ae4ac5c035a17aaebab +msgid "" +"This document showcases some typical use cases for " +":ref:`Stories`." +msgstr "このドキュメントは、 :ref:`ストーリーズ` の典型的な使用例を示しています。" + +#: ../../recipes-stories.rst:22 8153b8f5ad474b97857309ad103d02ba +msgid "" +"As mentioned in the :ref:`tutorial`, stories may be " +"created using up to three input sources: HTML, CSS and Archives -- all of" +" which are optional and which, respectively, can be provided " +"programmatically." +msgstr "" +":ref:`チュートリアル` " +"で説明されているように、ストーリーズは最大3つの入力ソース(HTML、CSS、アーカイブ)を使用して作成できます。これらのソースはすべてオプションであり、それぞれプログラムで提供することができます。" + +#: ../../recipes-stories.rst:24 d57ad4f561994730aebf4f597f37bad9 +msgid "The following examples will showcase combinations for using these inputs." +msgstr "以下の例では、これらの入力を使用した組み合わせを示します。" + +#: ../../recipes-stories.rst:28 ac8b802e808e4084b58d55ce03509ad8 +msgid "" +"Many of these recipe's source code are included as examples in the `docs`" +" folder." +msgstr "これらのレシピのソースコードの多くは、ドキュメントフォルダ内の例として含まれています。" + +#: ../../recipes-stories.rst:34 f07dc61a03bf4989b1e6604b220aef91 +msgid "How to Add a Line of Text with Some Formatting" +msgstr "テキストの行をいくつかの書式で追加する方法" + +#: ../../recipes-stories.rst:36 89f52f47ec32483480e0cf60bd8494fb +msgid "Here is the inevitable \"Hello World\" example. We will show two variants:" +msgstr "以下は、避けられない「Hello World」の例です。2つのバリアントを示します:" + +#: ../../recipes-stories.rst:38 0f4aae8b064d445d907c6ca0008c498d +msgid "Create using existing HTML source [#f1]_, that may come from anywhere." +msgstr "どこからでも取得できる既存のHTMLソース [#f1]_ を使用して作成します。" + +#: ../../recipes-stories.rst:39 5cb05c255e5b48348d3ab83f3bd57f28 +msgid "Create using the Python API." +msgstr "Python APIを使用して作成します。" + +#: ../../recipes-stories.rst:43 e8ff5379e66c49ce8776461b904793ef +msgid "" +"Variant using an existing HTML source [#f1]_ -- which in this case is " +"defined as a constant in the script::" +msgstr "既存のHTMLソース [#f1]_ を使用したバリアント – この場合、スクリプト内で定数として定義されています::" + +#: ../../recipes-stories.rst:69 fa5eb25963534e29b44ef593d2a08afd +msgid "" +"The above effect (sans-serif and blue text) could have been achieved by " +"using a separate CSS source like so::" +msgstr "上記の効果(サンセリフ体および青いテキスト)は、次のように別のCSSソースを使用することで達成できます::" + +#: ../../recipes-stories.rst:90 f582657322b343a4a158ef19912473f1 +msgid "The Python API variant -- everything is created programmatically::" +msgstr "Python APIのバリアント - すべてがプログラムで作成されます::" + +#: ../../recipes-stories.rst:114 a47a4f8569b14eb893a5ce9aeb3db643 +msgid "Both variants will produce the same output PDF." +msgstr "どちらのバリアントも同じ出力のPDFを生成します。" + +#: ../../recipes-stories.rst:122 2748abc7a81f48868dc86feadeed7cdc +msgid "How to use Images" +msgstr "画像の使用方法" + +#: ../../recipes-stories.rst:124 ecce5ae8803442768c9c0c9dcbaa0b91 +msgid "" +"Images can be referenced in the provided HTML source, or the reference to" +" a desired image can also be stored via the Python API. In any case, this" +" requires using an :ref:`Archive`, which refers to the place where the " +"image can be found." +msgstr "" +"画像は提供されたHTMLソース内で参照することができます。また、Python " +"APIを介して必要な画像への参照も保存することができます。どちらの場合も、画像が見つかる場所を指す :ref:`Archive` " +"を使用する必要があります。" + +#: ../../recipes-stories.rst:126 db9731d3f4a84908b18bd2c0bac15515 +msgid "" +"Images with the binary content embedded in the HTML source are **not " +"supported** by stories." +msgstr "HTMLソースにバイナリコンテンツが埋め込まれた画像は、ストーリーズではサポートされていません。" + +#: ../../recipes-stories.rst:128 621a5032b00c42b780871bc2b317708f +msgid "" +"We extend our \"Hello World\" example from above and display an image of " +"our planet right after the text. Assuming the image has the name " +"\"world.jpg\" and is present in the script's folder, then this is the " +"modified version of the above Python API variant::" +msgstr "" +"前述の「Hello " +"World」の例を拡張し、テキストの直後に地球の画像を表示します。画像の名前が「world.jpg」であり、スクリプトのフォルダに存在すると仮定すると、次に示すようにPython" +" APIバリアントを修正したものとなります::" + +#: ../../recipes-stories.rst:168 0bd066e55e8d42828fb2a8874920b48f +msgid "How to Read External HTML and CSS for a Story" +msgstr "外部のHTMLとCSSをストーリーに読み込む方法" + +#: ../../recipes-stories.rst:170 1dd80d2a8aa54fb7b10356e199ee9a9b +msgid "These cases are fairly straightforward." +msgstr "これらのケースはかなり直接的です。" + +#: ../../recipes-stories.rst:172 9e74fe2a5497459c880103c0b8c72e49 +msgid "" +"As a general recommendation, HTML and CSS sources should be **read as " +"binary files** and decoded before using them in a story. The Python " +"`pathlib.Path` provides convenient ways to do this::" +msgstr "" +"一般的な推奨事項として、HTMLとCSSのソースはバイナリファイルとして読み込まれ、ストーリーで使用する前にデコードされるべきです。Pythonの" +" `pathlib.Path` は、これを便利に行うための方法を提供しています。::" + +#: ../../recipes-stories.rst:193 b15949b2628f4e559db2166cab03e684 +msgid "How to Output Database Content with Story Templates" +msgstr "データベースの内容をストーリーテンプレートで出力する方法" + +#: ../../recipes-stories.rst:195 ee70b3e26cb24cd281fd481f24b0f1f0 +msgid "" +"This script demonstrates how to report SQL database content using an " +"**HTML template**." +msgstr "このスクリプトは**HTMLテンプレート**を使用してSQLデータベースの内容を報告する方法を示しています。" + +#: ../../recipes-stories.rst:199 b38843035c074c12875aa488d121d745 +msgid "The example SQL database contains two tables:" +msgstr "この例のSQLデータベースには2つのテーブルが含まれています:" + +#: ../../recipes-stories.rst:201 2a5e075abb974afb8fabb4bc379b46cc +msgid "" +"Table \"films\" contains one row per film with the fields **\"title\"**, " +"**\"director\"** and (release) **\"year\"**." +msgstr "テーブル「films」には、フィールド「title」、「director」、および(公開)「year」を持つ1つの映画ごとの行が含まれています。" + +#: ../../recipes-stories.rst:202 3ca9978ff3384e669316aec51f82f4e3 +msgid "" +"Table \"actors\" contains one row per actor and film title (fields " +"(actor) **\"name\"** and (film) **\"title\"**)." +msgstr "テーブル「actors」には、1つの行に1人の俳優と映画のタイトルが含まれています(フィールド(actor)「name」と(film)「title」)。" + +#: ../../recipes-stories.rst:204 bcb64849fff74c60903737949f68225d +msgid "" +"The story DOM consists of a template for one film, which reports film " +"data together with a list of casted actors." +msgstr "ストーリーDOMは、映画の1つのテンプレートから成り、映画のデータと出演俳優のリストを報告します。" + +#: ../../recipes-stories.rst:206 ../../recipes-stories.rst:231 +#: ../../recipes-stories.rst:257 ../../recipes-stories.rst:309 +#: ../../recipes-stories.rst:343 ../../recipes-stories.rst:377 +#: ../../recipes-stories.rst:398 ../../recipes-stories.rst:433 +#: ../../recipes-stories.rst:467 ../../recipes-stories.rst:497 +#: ../../recipes-stories.rst:531 2b3dd500f0434c64b97cf8b801ea18e2 +#: 64fc1d7aed8a43cd92ff2d020bf13ec8 7eb60a8b9a42446594e53b2bf3c68871 +#: a16e34a298a6425880ddf4fb4371e66e a22398fc103f4437ac647a31dd0d5fbf +#: b70a18528557465e8f61dc1122eca0ed c000544d4c274bbb9db7672b53850bc0 +#: d1607e50f7864da5afac7f3a5c6cfb74 d7f83da6ed26402bbc3b8fe0bc8c3e6c +#: daf59f85090b4c6dac31f2a2498b1d40 f3388231edfd4b7a869bc003cbfe749a +msgid "**Files:**" +msgstr "**ファイル:**" + +#: ../../recipes-stories.rst:208 54585f66fd3d47c8b7fa6a18c5e6d437 +msgid "`docs/samples/filmfestival-sql.py`" +msgstr "" + +#: ../../recipes-stories.rst:209 a25f79113d4845688acf7e96fc25d405 +msgid "`docs/samples/filmfestival-sql.db`" +msgstr "" + +#: ../../recipes-stories.rst:212 ../../recipes-stories.rst:235 +#: ../../recipes-stories.rst:263 ../../recipes-stories.rst:316 +#: ../../recipes-stories.rst:347 ../../recipes-stories.rst:360 +#: ../../recipes-stories.rst:381 ../../recipes-stories.rst:402 +#: ../../recipes-stories.rst:437 ../../recipes-stories.rst:471 +#: ../../recipes-stories.rst:501 ../../recipes-stories.rst:535 +#: 0db5e50af58744b9a3fb1c616e67a684 28fc170e69fd4a44b4fa959ea9b3e32e +#: 33679f3b5997465888776dda77506957 36f0be1fd15b4a8c84694c64491bd2c7 +#: 37527330942949a0bd3be5dba9e785c4 5485b7bba46c4eda83ec8b43ca3d2ecf +#: 7e7b5ee4ed9b4cf090a10dcda115473d addec11282d24d079e19186339ab1b65 +#: c2ad827c824c44718c01801f503eb406 d8b52254ec3547f19f86eaca688e79bd +#: de7617178b054648a843a36e24659eab fd40019209fb42f487a8b1836bcd3638 +msgid "|toggleStart|" +msgstr "" + +#: ../../recipes-stories.rst:216 ../../recipes-stories.rst:239 +#: ../../recipes-stories.rst:267 ../../recipes-stories.rst:320 +#: ../../recipes-stories.rst:351 ../../recipes-stories.rst:364 +#: ../../recipes-stories.rst:385 ../../recipes-stories.rst:406 +#: ../../recipes-stories.rst:441 ../../recipes-stories.rst:475 +#: ../../recipes-stories.rst:505 ../../recipes-stories.rst:539 +#: 1f64c481b7224504a02af891e0682cb3 266ea01fca494309b1cd1c30895d31bc +#: 27da42d77a6b435bb286157e8930f759 52efc716cc3147a6a6486dfc0795eb8e +#: 532a65455e3d4cc5acae0770c9f6847d 5e487c5fb8fa43078450b2f3e6e90793 +#: 78f9acaef114448fb9edcb0e9b8ad611 918069a403554628bffb1412599598c8 +#: 92734f09b2ab4e3c80847c556c5ede0f 9d3b034ebb21493683dc0e7e2bb7f19b +#: a4115389a45c49f79c5235d1cab2feba f46a83317e2744c6979346649f812fde +msgid "|toggleEnd|" +msgstr "" + +#: ../../recipes-stories.rst:225 50ac8bd58ac94ca9bad75235813275b8 +msgid "How to Integrate with Existing PDFs" +msgstr "既存のPDFと統合する方法" + +#: ../../recipes-stories.rst:227 30768c0ff00d46c8acc3718442620f18 +msgid "" +"Because a :ref:`DocumentWriter` can only write to a new file, stories " +"cannot be placed on existing pages. This script demonstrates a " +"circumvention of this restriction." +msgstr "" +":ref:`DocumentWriter` " +"は新しいファイルにのみ書き込むことができるため、ストーリーは既存のページに配置することはできません。このスクリプトは、この制限を回避する方法を示しています。" + +#: ../../recipes-stories.rst:229 c4b4b44242604021bc2168807cae604d +msgid "" +"The basic idea is letting :ref:`DocumentWriter` output to a PDF in " +"memory. Once the story has finished, we re-open this memory PDF and put " +"its pages to desired locations on **existing** pages via method " +":meth:`Page.show_pdf_page`." +msgstr "" +"基本的な考え方は、 :ref:`DocumentWriter` " +"がメモリ内のPDFに出力することです。ストーリーが完了したら、このメモリPDFを再度開き、メソッド " +":meth:`Page.show_pdf_page` を使用してそのページを既存のページに必要な位置に配置します。" + +#: ../../recipes-stories.rst:233 0ee71e800fb14b2ea803f1438f5379e4 +msgid "`docs/samples/showpdf-page.py`" +msgstr "" + +#: ../../recipes-stories.rst:248 2464de777541472497c10c81706a8467 +msgid "" +"How to Make Multi-Columned Layouts and Access Fonts from Package " +"`pymupdf-fonts`_" +msgstr "多段組のレイアウトを作成し、パッケージ `pymupdf-fonts`_ からフォントにアクセスする方法" + +#: ../../recipes-stories.rst:250 8dfd6dd8ca784d958d2a901ee4009a15 +msgid "" +"This script outputs an article (taken from Wikipedia) that contains text " +"and multiple images and uses a 2-column page layout." +msgstr "このスクリプトは、テキストと複数の画像を含む記事(Wikipediaから取得)を出力し、2列のページレイアウトを使用します。" + +#: ../../recipes-stories.rst:252 96f19cd490e0468983187394fd4f755d +msgid "" +"In addition, two \"Ubuntu\" font families from package `pymupdf-fonts`_ " +"are used instead of defaulting to Base-14 fonts." +msgstr "" +"さらに、デフォルトではBase-14フォントにデフォルトでなく、パッケージpymupdf-" +"fontsから2つの「Ubuntu」フォントファミリが使用されています。" + +#: ../../recipes-stories.rst:254 d104d23aa09e4c20a51bd1765f6ec72d +msgid "" +"Yet another feature used here is that all data -- the images and the " +"article HTML -- are jointly stored in a ZIP file." +msgstr "ここで使用される別の機能は、すべてのデータ – 画像と記事のHTML – が共にZIPファイルに格納されていることです。" + +#: ../../recipes-stories.rst:259 aabca9ed8f1545efa8e6c35a8fa570ed +msgid "`docs/samples/quickfox.py`" +msgstr "" + +#: ../../recipes-stories.rst:260 ../../recipes-stories.rst:313 +#: 56254f7b3a3f48038ba0fec7307956e4 d0d48278d419432ea98f56ca84a85f66 +msgid "`docs/samples/quickfox.zip`" +msgstr "" + +#: ../../recipes-stories.rst:276 c5b48fe9ccc34243a9cafef7d92058ac +msgid "How to Make a Layout which Wraps Around a Predefined \"no go area\" Layout" +msgstr "あらかじめ定義された「禁止エリア」レイアウトに囲まれたレイアウトの作成方法" + +#: ../../recipes-stories.rst:279 9aeb4c6c24cf45918fc271c1bb22d655 +msgid "" +"This is a demo script using PyMuPDF's Story class to output text as a PDF" +" with a two-column page layout." +msgstr "これは、PyMuPDFのStoryクラスを使用してテキストを2列のページレイアウトでPDFとして出力するデモスクリプトです。" + +#: ../../recipes-stories.rst:282 9a9deed471b545109ffbf69ae0a63c13 +msgid "The script demonstrates the following features:" +msgstr "このスクリプトは、以下の機能を示しています:" + +#: ../../recipes-stories.rst:284 8325013303e24647a592be6d38854cfa +msgid "Layout text around images of an existing (\"target\") PDF." +msgstr "既存の(「ターゲット」)PDFの画像の周りにテキストをレイアウトします。" + +#: ../../recipes-stories.rst:285 12cd3a4970c942d28bc420575afefa73 +msgid "" +"Based on a few global parameters, areas on each page are identified, that" +" can be used to receive text layouted by a Story." +msgstr "各ページの特定の領域が、Storyによってレイアウトされたテキストを受け入れるために使用できるように、いくつかのグローバルパラメータに基づいて識別されます。" + +#: ../../recipes-stories.rst:287 5fb1ec0a189e438fbbb5d287d889152d +msgid "" +"These global parameters are not stored anywhere in the target PDF and " +"must therefore be provided in some way:" +msgstr "これらのグローバルパラメータは、ターゲットPDF内のどこにも保存されず、したがってどのような方法で提供される必要があります:" + +#: ../../recipes-stories.rst:290 2939a7b2c8ff40199db1fb38e01d1c41 +msgid "The width of the border(s) on each page." +msgstr "各ページのボーダーの幅。" + +#: ../../recipes-stories.rst:291 07ff55f6cd4b4ea3ada2d0100b667d23 +msgid "" +"The fontsize to use for text. This value determines whether the provided " +"text will fit in the empty spaces of the (fixed) pages of target PDF. It " +"cannot be predicted in any way. The script ends with an exception if " +"target PDF has not enough pages, and prints a warning message if not all " +"pages receive at least some text. In both cases, the FONTSIZE value can " +"be changed (a float value)." +msgstr "テキストに使用するフォントサイズ。この値は、提供されたテキストがターゲットPDFの(固定された)ページの空白スペースに収まるかどうかを決定します。これはどのように予測することもできません。ターゲットPDFに十分なページがない場合、スクリプトは例外をスローし、すべてのページが少なくとも一部のテキストを受け取らない場合は警告メッセージが表示されます。どちらの場合も、FONTSIZEの値を変更できます(浮動小数点数)。" + +#: ../../recipes-stories.rst:297 7f2693aa52f8413c957e3697cd159a39 +msgid "Use of a 2-column page layout for the text." +msgstr "テキストのための2列のページレイアウトの使用。" + +#: ../../recipes-stories.rst:298 74329206c6d84722884bd5ba5f248027 +msgid "" +"The layout creates a temporary (memory) PDF. Its produced page content " +"(the text) is used to overlay the corresponding target page. If text " +"requires more pages than are available in target PDF, an exception is " +"raised. If not all target pages receive at least some text, a warning is " +"printed." +msgstr "レイアウトは一時的な(メモリ)PDFを作成します。その生成されたページのコンテンツ(テキスト)は、対応するターゲットページに重ねて配置されます。テキストがターゲットPDFの利用可能なページよりも多くのページを必要とする場合、例外が発生します。すべてのターゲットページが少なくとも一部のテキストを受け取らない場合、警告が表示されます。" + +#: ../../recipes-stories.rst:302 d18a70eaed5c44b98873dc74a92721a1 +msgid "" +"The script reads \"image-no-go.pdf\" in its own folder. This is the " +"\"target\" PDF. It contains 2 pages with each 2 images (from the original" +" article), which are positioned at places that create a broad overall " +"test coverage. Otherwise the pages are empty." +msgstr "" +"スクリプトは、自分自身のフォルダ内の「image-no-" +"go.pdf」を読み込みます。これが「ターゲット」PDFです。オリジナルの記事から2つの画像(各2ページ)を含み、それらは広範なテストカバレッジを作成する場所に配置されています。それ以外の場合、ページは空です。" + +#: ../../recipes-stories.rst:306 f46ddb852a574bd5a7bac7520a202c10 +msgid "" +"The script produces \"quickfox-image-no-go.pdf\" which contains the " +"original pages and image positions, but with the original article text " +"laid out around them." +msgstr "" +"スクリプトは「quickfox-image-no-" +"go.pdf」を生成し、元のページと画像の位置を含みますが、元の記事のテキストがその周りにレイアウトされます。" + +#: ../../recipes-stories.rst:311 7df54784e06541489011541c3865adce +msgid "`docs/samples/quickfox-image-no-go.py`" +msgstr "" + +#: ../../recipes-stories.rst:312 06568562593e4d99b28e5de2e3b0939a +msgid "`docs/samples/quickfox-image-no-go.pdf`" +msgstr "" + +#: ../../recipes-stories.rst:329 71c7aa076de44ec38e26d2fb754afc9b +msgid "How to Output an HTML Table" +msgstr "HTMLテーブルの出力方法" + +#: ../../recipes-stories.rst:331 3301f44c71164017b224858b8757665b +msgid "Outputting HTML tables is supported as follows:" +msgstr "HTMLテーブルの出力は次のようにサポートされています:" + +#: ../../recipes-stories.rst:333 41e583af9b0542cdb2079c8ff6906052 +msgid "" +"Flat table layouts are supported (\"rows x columns\"), no support of the " +"\"colspan\" / \"rowspan\" attributes." +msgstr "フラットなテーブルレイアウト(「行 × 列」)がサポートされており、「colspan」/「rowspan」属性はサポートされていません。" + +#: ../../recipes-stories.rst:334 47d7baea9d4e4285881ba226de5af09d +msgid "" +"Table header tag :htmlTag:`th` supports attribute \"scope\" with values " +"\"row\" or \"col\". Applicable text will be bold by default." +msgstr "" +"テーブルヘッダータグ :htmlTag:`th` は、属性 “scope” をサポートし、値として “row” または “col” " +"を持ちます。適用されるテキストはデフォルトで太字になります。" + +#: ../../recipes-stories.rst:335 74d5008eb5a141a3a1cfc8a54b018574 +msgid "" +"Column widths are computed automatically based on column content. They " +"cannot be directly set." +msgstr "列の幅は、列のコンテンツに基づいて自動的に計算されます。直接設定することはできません。" + +#: ../../recipes-stories.rst:336 eced7f475d8d4d39b1c9a6daa98cb9e3 +msgid "" +"Table **cells may contain images** which will be considered in the column" +" width calculation magic." +msgstr "テーブルのセルには画像を含めることができ、これは列幅計算の際に考慮されます。" + +#: ../../recipes-stories.rst:337 2d09432449124bd29ad88bb9203b3eb7 +msgid "" +"Row heights are computed automatically based on row content - leading to " +"multi-line rows where needed." +msgstr "行の高さは、行のコンテンツに基づいて自動的に計算され、必要に応じて複数行の行が生成されます。" + +#: ../../recipes-stories.rst:338 99cd09b56b7a4038b94e8cc59054e98e +msgid "" +"The potentially multiple lines of a table row will always be kept " +"together on one page (respectively \"where\" rectangle) and not be split." +msgstr "テーブルの行の潜在的に複数行は、常に1つのページ(または “where” 矩形)にまとめて表示され、分割されることはありません。" + +#: ../../recipes-stories.rst:339 2b0dd155fb9b46abb1a3a34b5165e5cb +msgid "" +"Table header rows are only **shown on the first page / \"where\" " +"rectangle.**" +msgstr "テーブルのヘッダー行は、最初のページ / \"where\" 矩形のみに表示されます。" + +#: ../../recipes-stories.rst:340 9c971454188c471695037fe19b0b291f +msgid "" +"The \"style\" attribute is ignored when given directly in HTML table " +"elements. Styling for a table and its elements must happen separately, in" +" CSS source or within the :htmlTag:`style` tag." +msgstr "" +"直接HTMLテーブル要素に \"style\" 属性が与えられた場合、無視されます。テーブルとその要素のスタイリングは、CSSソース内または" +" :htmlTag:`style` タグ内で別途行う必要があります。" + +#: ../../recipes-stories.rst:341 b6be54c8961a40fca5e5624606c40804 +msgid "" +"Styling for :htmlTag:`tr` elements is not supported and ignored. " +"Therefore, a table-wide grid or alternating row background colors are not" +" supported. One of the following example scripts however shows an easy " +"way to deal with this limitation." +msgstr "" +":htmlTag:`tr` " +"要素のスタイリングはサポートされておらず、無視されます。したがって、テーブル全体のグリッドや交互の行の背景色はサポートされていません。ただし、以下の例スクリプトのいずれかは、この制限に対処する簡単な方法を示しています。" + +#: ../../recipes-stories.rst:345 3250a44850a04ea2b556eaa99626d432 +msgid "`docs/samples/table01.py` This script reflects basic features." +msgstr "`docs/samples/table01.py` このスクリプトは基本的な機能を反映しています。" + +#: ../../recipes-stories.rst:353 338c9d76f79b41df86b53304991fa447 +msgid "" +"`docs/samples/national-capitals.py` Advanced script extending table " +"output options using simple additional code:" +msgstr "" +"`docs/samples/national-capitals.py` " +"シンプルな追加コードを使用してテーブル出力オプションを拡張する高度なスクリプト:" + +#: ../../recipes-stories.rst:355 90d13dbe2506480f9d05c17929eb6886 +msgid "Multi-page output simulating **repeating header rows**" +msgstr "繰り返しヘッダー行をシミュレートするマルチページ出力" + +#: ../../recipes-stories.rst:356 cbf54ca5c2024e268d1186a280b3aa24 +msgid "Alternating table row background colors" +msgstr "交互のテーブル行の背景色" + +#: ../../recipes-stories.rst:357 2f63b307cf484d58a093efeda123e5fd +msgid "Table rows and columns delimited by gridlines" +msgstr "グリッドラインで区切られたテーブル行と列" + +#: ../../recipes-stories.rst:358 e5f6f8efbb6f4165b34f336502bd9f1c +msgid "Table rows dynamically generated / filled with data from an SQL database" +msgstr "SQLデータベースからデータを動的に生成/埋めるテーブル行" + +#: ../../recipes-stories.rst:373 fe9ae949732f4a8aa60268158ee88be6 +msgid "How to Create a Simple Grid Layout" +msgstr "シンプルなグリッドレイアウトの作成方法" + +#: ../../recipes-stories.rst:375 715590c3c78d4629ad6c5345990d29e2 +msgid "" +"By creating a sequence of :ref:`Story` objects within a grid created via " +"the :ref:`make_table` function a developer can " +"create grid layouts as required." +msgstr "" +":ref:`make_table` 関数を使用して作成されたグリッド内で :ref:`Story` " +"オブジェクトのシーケンスを作成することで、開発者は必要なグリッドレイアウトを作成できます。" + +#: ../../recipes-stories.rst:379 dd275a6adf8f4b8ea21b21fe0f4be667 +msgid "`docs/samples/simple-grid.py`" +msgstr "" + +#: ../../recipes-stories.rst:394 5e3cbf68e72d48f9a4d031caea74efe1 +msgid "How to Generate a Table of Contents" +msgstr "目次の生成方法" + +#: ../../recipes-stories.rst:396 eef825fca149402fa59fd4d03097fbcc +msgid "" +"This script lists the source code of all Python scripts that live in the " +"script's directory." +msgstr "このスクリプトは、スクリプトのディレクトリに存在するすべてのPythonスクリプトのソースコードをリスト表示します。" + +#: ../../recipes-stories.rst:400 f90b481f29e84be197d7b43765469464 +msgid "`docs/samples/code-printer.py`" +msgstr "" + +#: ../../recipes-stories.rst:409 c06dd956b971493bb3b3cbb5ad0a3e22 +msgid "It features the following capabilities:" +msgstr "次の機能が備わっています:" + +#: ../../recipes-stories.rst:411 b4d75405cd1b4d2bb534b59fb384119e +msgid "" +"Automatic generation of a Table of Contents (TOC) on separately numbered " +"pages at the start of the document - using a specialized :ref:`Story`." +msgstr "専用の :ref:`Story` を使用して、文書の冒頭に独立したページごとに自動的に目次(TOC)を生成します。" + +#: ../../recipes-stories.rst:413 6337def52daa43f9abdd769a10a39184 +msgid "" +"Use of 3 separate :ref:`Story` objects per page: header story, footer " +"story and the story for printing the Python sources." +msgstr "" +"ページごとに3つの別個の :ref:`Story` " +"オブジェクトを使用:ヘッダーストーリー、フッターストーリー、Pythonソースの印刷用ストーリー。" + +#: ../../recipes-stories.rst:415 4f7e17024ef64638a584ff82424ee1e1 +msgid "" +"The page **footer is automatically changed** to show the name of the " +"current Python file." +msgstr "ページフッターは自動的に変更され、現在のPythonファイル名が表示されます。" + +#: ../../recipes-stories.rst:417 13b96aa9ead945c698966dcda2ef5de1 +msgid "" +"Use of :meth:`Story.element_positions` to collect the data for the TOC " +"and for the dynamic adjustment of page footers. This is an example of a " +"**bidirectional communication** between the story output process and the " +"script." +msgstr "" +":meth:`Story.element_positions` " +"の使用により、TOCのデータの収集とページフッターの動的調整に使用されます。これは、ストーリー出力プロセスとスクリプト間の双方向コミュニケーションの例です。" + +#: ../../recipes-stories.rst:419 a1b7b48737734821b3d4cee8cb99deb6 +msgid "" +"The main PDF with the Python sources is being written to memory by its " +":ref:`DocumentWriter`. Another :ref:`Story` / :ref:`DocumentWriter` pair " +"is then used to create a (memory) PDF for the TOC pages. Finally, both " +"these PDFs are joined and the result stored to disk." +msgstr "" +"主なPythonソースをその :ref:`DocumentWriter` によってメモリに書き込みます。その後、もう一つの " +":ref:`Story` / :ref:`DocumentWriter` " +"のペアを使用して(メモリ)PDFをTOCページのために作成します。最後に、これらのPDFは結合されて結果がディスクに保存されます。" + +#: ../../recipes-stories.rst:428 99dd73162add4ea0afbb0634cf53dcf6 +msgid "How to Display a List from JSON Data" +msgstr "JSONデータからリストを表示する方法" + +#: ../../recipes-stories.rst:430 6fdc8240537845b5b007826459e137e2 +msgid "" +"This example takes some JSON data input which it uses to populate a " +":ref:`Story`. It also contains some visual text formatting and shows how " +"to add links." +msgstr "" +"この例では、JSONデータ入力を使用して :ref:`Story` " +"を生成し、いくつかの視覚的なテキスト書式設定を行い、リンクを追加する方法を示しています。" + +#: ../../recipes-stories.rst:435 af211242fc1e45d4935f0534d472ea83 +msgid "`docs/samples/json-example.py`" +msgstr "" + +#: ../../recipes-stories.rst:451 d1800a418d944e7bbe3af548c292e262 +msgid "Using the Alternative :meth:`Story.write*()` functions" +msgstr ":meth:`Story.write*()` 関数の使用方法" + +#: ../../recipes-stories.rst:453 a6aa8d8c6606441b896c25ed8b547c36 +msgid "" +"The :meth:`Story.write*()` functions provide a different way to use the " +":ref:`Story` functionality, removing the need for calling code to " +"implement a loop that calls :meth:`Story.place()` and " +":meth:`Story.draw()` etc, at the expense of having to provide at least a " +"`rectfn()` callback." +msgstr "" +":meth:`Story.write*()` 関数は、 :ref:`Story` " +"機能を異なる方法で使用するための方法を提供します。これにより、呼び出しコードが :meth:`Story.place()` や " +":meth:`Story.draw()` などを呼び出すループを実装する必要がなくなりますが、少なくとも `rectfn()` " +"コールバックを提供する必要があります。" + +#: ../../recipes-stories.rst:462 0b9c207cf6c34028bd59a17ba745a06b +msgid "How to do Basic Layout with :meth:`Story.write()`" +msgstr ":meth:`Story.write()` を使用した基本的なレイアウトの方法" + +#: ../../recipes-stories.rst:464 3afb41fae5c04c80bb274a6695783cf8 +msgid "" +"This script lays out multiple copies of its own source code, into four " +"rectangles per page." +msgstr "このスクリプトは、自身のソースコードの複数のコピーを1ページあたり4つの四角形にレイアウトします。" + +#: ../../recipes-stories.rst:469 eaa7c3914c6e4d10b06fc5e83aef6ef9 +msgid "`docs/samples/story-write.py`" +msgstr "" + +#: ../../recipes-stories.rst:484 254625a2c4634deb8a6e5fed8ffd181e +msgid "" +"How to do Iterative Layout for a Table of Contents with " +":meth:`Story.write_stabilized()`" +msgstr ":meth:`Story.write_stabilized()` を使用した目次付きの繰り返しレイアウトの方法" + +#: ../../recipes-stories.rst:486 f99a682ccb04406c9c3fb8d6add61ab6 +msgid "" +"This script creates html content dynamically, adding a contents section " +"based on :ref:`ElementPosition` items that have non-zero `.heading` " +"values." +msgstr "" +"このスクリプトは、要素の位置情報(:ref:`ElementPosition` アイテム)を基にコンテンツを動的に作成し、`.heading` " +"値がゼロでない要素に基づいて目次セクションを追加します。" + +#: ../../recipes-stories.rst:489 f707d1092f04401f906070fe2e82c373 +msgid "" +"The contents section is at the start of the document, so modifications to" +" the contents can change page numbers in the rest of the document, which " +"in turn can cause page numbers in the contents section to be incorrect." +msgstr "目次セクションは文書の先頭に配置されているため、目次への変更によって文書の他の部分のページ番号が変更され、それに伴い目次セクションのページ番号が正しくなくなる可能性があります。" + +#: ../../recipes-stories.rst:493 0fee61fd78ed4f9ab50188a4fd67cce7 +msgid "" +"So the script uses :meth:`Story.write_stabilized()` to repeatedly lay " +"things out until things are stable." +msgstr "したがって、スクリプトは :meth:`Story.write_stabilized()` を使用して、安定するまで繰り返しレイアウトを行います。" + +#: ../../recipes-stories.rst:499 42fb0df4e27948abb714018fac0417f5 +msgid "`docs/samples/story-write-stabilized.py`" +msgstr "" + +#: ../../recipes-stories.rst:514 17025d9951f14511b6e562a16ce16843 +msgid "" +"How to do Iterative Layout and Create PDF Links with " +":meth:`Story.write_stabilized_links()`" +msgstr "繰り返しレイアウトと PDF リンクの作成方法::meth:`Story.write_stabilized_links()` の使用" + +#: ../../recipes-stories.rst:516 fb00dcb0ee8d442495b1e4dff18aa8bc +msgid "" +"This script is similar to the one described in \"How to use " +":meth:`Story.write_stabilized()`\" above, except that the generated PDF " +"also contains links that correspond to the internal links in the original" +" html." +msgstr "" +"このスクリプトは、「:meth:`Story.write_stabilized()` " +"の使用方法」で説明されているものと類似していますが、生成される PDF には、元の HTML 内の内部リンクに対応するリンクも含まれています。" + +#: ../../recipes-stories.rst:520 6dc7e0e7857b4aa5a713ffcb6869f11c +msgid "" +"This is done by using :meth:`Story.write_stabilized_links()`; this is " +"slightly different from :meth:`Story.write_stabilized()`:" +msgstr "" +"これは、:meth:`Story.write_stabilized_links()` を使用して行われます。これは " +":meth:`Story.write_stabilized()` とわずかに異なります:" + +#: ../../recipes-stories.rst:523 77950ef72ea147ec8060ff1ce1609032 +msgid "It does not take a :ref:`DocumentWriter` `writer` arg." +msgstr ":ref:`DocumentWriter` の `writer` 引数は必要ありません。" + +#: ../../recipes-stories.rst:524 f61073b1496a472eb4ffa2abdafdbcdd +msgid "It returns a PDF :ref:`Document` instance." +msgstr "PDF :ref:`Document` のインスタンスを返します。" + +#: ../../recipes-stories.rst:526 8200eb3c7ddb4141beb9fd18576a6795 +msgid "" +"[The reasons for this are a little involved; for example a " +":ref:`DocumentWriter` is not necessarily a PDF writer, so doesn't really " +"work in a PDF-specific API.]" +msgstr "" +"[これに関する理由は少し複雑です。例えば、 :ref:`DocumentWriter` が必ずしも PDF ライターであるとは限らず、PDF " +"特有の API ではあまりうまく動作しないためです。]" + +#: ../../recipes-stories.rst:533 6cb38487f6e444d1a3092633504e9b1e +msgid "`docs/samples/story-write-stabilized-links.py`" +msgstr "" + +#: ../../recipes-stories.rst:547 7ab6a39468b647919043a79fb3f1c46b +msgid "Footnotes" +msgstr "脚注" + +#: ../../recipes-stories.rst:548 6b3a6c4d3e5341b0b7eb4ef2b8a275a9 +msgid "HTML & CSS support" +msgstr "" + +#: ../../recipes-stories.rst:552 a2b43d5c16a449989cd09987682f3446 +msgid "" +"At the time of writing the HTML engine for Stories is fairly basic and " +"supports a subset of CSS2 attributes." +msgstr "執筆時点では、ストーリーのHTMLエンジンはかなり基本的であり、一部のCSS2属性をサポートしています。" + +#: ../../recipes-stories.rst:554 97098e31431e469cb54b616f0cdfed52 +msgid "Some important CSS support to consider:" +msgstr "考慮すべき重要なCSSサポート:" + +#: ../../recipes-stories.rst:556 6aea20cf470f4b5b85843b4657ecc2c2 +msgid "The only available layout is relative layout." +msgstr "唯一利用可能なレイアウトは相対レイアウトです。" + +#: ../../recipes-stories.rst:557 569101dce0664d508b481c1cce2d5b73 +msgid "`background` is unavailable, use `background-color` instead." +msgstr "`background` は利用できませんが、代わりに `background-color` を使用してください。" + +#: ../../recipes-stories.rst:558 6d32b59649cf4d4ea1b9b964f070004f +msgid "`float` is unavailable." +msgstr "`float` は利用できません。" + +#: ../../footer.rst:60 bf323a0643e14a8fbf2f882c9ed3db27 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes-text.mo b/docs/locales/ja/LC_MESSAGES/recipes-text.mo new file mode 100644 index 000000000..20480d593 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes-text.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes-text.po b/docs/locales/ja/LC_MESSAGES/recipes-text.po new file mode 100644 index 000000000..925becb51 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes-text.po @@ -0,0 +1,862 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 db5f378b70004af590010a41f0163be1 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 148acec04cbf474bb41f5d5e3c1ec508 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 e6b3ab82dcbf41f687b68d59732a00a5 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../recipes-text.rst:7 70700f14953047898a918c1a07026ce4 +msgid "Text" +msgstr "テキスト" + +#: ../../recipes-text.rst:13 83030b51505549bba2596e8da94111a2 +msgid "How to Extract all Document Text" +msgstr "すべてのドキュメントテキストを抽出する方法" + +#: ../../recipes-text.rst:15 d8d7d26a27a346a5b03d28e400fed598 +msgid "" +"This script will take a document filename and generate a text file from " +"all of its text." +msgstr "このスクリプトは、ドキュメントのファイル名を受け取り、そのテキストからテキストファイルを生成します。" + +#: ../../recipes-text.rst:17 c4c73613ac86439c93304a2549dbad7f +msgid "The document can be any :ref:`supported type`." +msgstr "ドキュメントは、:ref:`サポートされている ` 任意のタイプのものが使用できます。" + +#: ../../recipes-text.rst:19 8a339400b648449eab4a10cf290842dc +msgid "" +"The script works as a command line tool which expects the document " +"filename supplied as a parameter. It generates one text file named " +"\"filename.txt\" in the script directory. Text of pages is separated by a" +" form feed character::" +msgstr "このスクリプトはコマンドラインツールとして機能し、ドキュメントのファイル名をパラメータとして受け取ります。スクリプトのディレクトリに「filename.txt」という名前のテキストファイルが生成されます。ページのテキストはフォームフィード文字で区切られます。" + +#: ../../recipes-text.rst:28 8a4f5ec777ce40e8a27b5ef06541f000 +msgid "" +"The output will be plain text as it is coded in the document. No effort " +"is made to prettify in any way. Specifically for PDF, this may mean " +"output not in usual reading order, unexpected line breaks and so forth." +msgstr "出力はドキュメント内でコード化された通りのプレーンテキストになるため、見栄えの調整は行いません。特にPDFの場合、通常の読み順にならない出力や予期せぬ改行などが発生するかもしれません。" + +#: ../../recipes-text.rst:30 fa3eea374680406b9a9fba35f66055ca +msgid "" +"You have many options to rectify this -- see chapter :ref:`Appendix2`. " +"Among them are:" +msgstr "" +"これを修正するための多くのオプションがあります。詳細は :ref:`Appendix2` " +"章「埋め込みファイルに関する考慮事項」を参照してください。以下の方法があります:" + +#: ../../recipes-text.rst:32 93321f1d110c45879711cddd65ff7248 +msgid "" +"Extract text in HTML format and store it as a HTML document, so it can be" +" viewed in any browser." +msgstr "テキストをHTML形式で抽出し、HTMLドキュメントとして保存することで、任意のブラウザで表示できるようにします。" + +#: ../../recipes-text.rst:33 d31ce8ccb61f4333b9485c37ae95f7a1 +msgid "" +"Extract text as a list of text blocks via *Page.get_text(\"blocks\")*. " +"Each item of this list contains position information for its text, which " +"can be used to establish a convenient reading order." +msgstr "Page.get_text(\"blocks\")を使ってテキストブロックのリストとして抽出します。リストの各アイテムにはテキストの位置情報が含まれており、便利な読み順を確立するのに使用できます。" + +#: ../../recipes-text.rst:34 97d77e0ecd494eb89e58bf1dfd6f89b0 +msgid "" +"Extract a list of single words via *Page.get_text(\"words\")*. Its items " +"are words with position information. Use it to determine text contained " +"in a given rectangle -- see next section." +msgstr "Page.get_text(\"words\")を使って単語のリストを抽出します。各アイテムには位置情報が含まれています。これを使用して特定の四角形に含まれるテキストを決定します。" + +#: ../../recipes-text.rst:36 f695a7c93e504149bb7de0f587892b2a +msgid "See the following two sections for examples and further explanations." +msgstr "以下の2つのセクションを見て、例と詳細な説明をご覧ください。" + +#: ../../recipes-text.rst:44 37809012042b408fab85b6b39de8e823 +msgid "How to Extract Text as Markdown" +msgstr "テキストをMarkdown形式で抽出する方法" + +#: ../../recipes-text.rst:46 5c0dcdf0ae704f9aa9fd1bc738b0c185 +msgid "" +"This is especially useful for :title:`RAG/LLM` environments - please see " +":ref:`Outputting as Markdown `." +msgstr "" +"これは、特に :title:`RAG/LLM` 環境にとって便利です - :ref:`Outputting as Markdown " +"` を参照してください。" + +#: ../../recipes-text.rst:52 442230f4e860484f83323c6a20d08e08 +msgid "How to Extract Key-Value Pairs from a Page" +msgstr "ページからキーと値のペアを抽出する方法" + +#: ../../recipes-text.rst:53 9555400172a34d69bae562156d51b80c +msgid "" +"If the layout of a page is *\"predictable\"* in some sense, then there is" +" a simple way to find the values for a given set of keywords fast and " +"easily -- without using regular expressions. Please see `this example " +"script `_." +msgstr "" +"もしページのレイアウトがある程度予測可能であれば、正規表現を使用せずに、特定のキーワードに対する値を迅速かつ簡単に見つける方法があります。 " +"`以下の例のスクリプト `_ を参照してください。" + +#: ../../recipes-text.rst:55 f8548ec2c56441c0aa02ed28f3607350 +msgid "\"Predictable\" in this context means:" +msgstr "ここでの「予測可能」とは、次のような意味です:" + +#: ../../recipes-text.rst:57 0e297df5b4ed4548956122fabdd7e963 +msgid "" +"Every keyword is followed by its value -- no other text is present in " +"between them." +msgstr "各キーワードの後にはその値が続きます。それらの間に他のテキストはありません。" + +#: ../../recipes-text.rst:58 f9375ed8400040619728b9f1d3031882 +msgid "" +"The bottom of the value's boundary box is **not above** the one of the " +"keyword." +msgstr "値の境界ボックスの下端は、キーワードの境界ボックスよりも上にありません。" + +#: ../../recipes-text.rst:59 be1d806d61424c179b103fbdfef14d11 +msgid "" +"There are **no other restrictions**: the page layout may or may not be " +"fixed, and the text may also have been stored as one string. Key and " +"value may have any distance from each other." +msgstr "他の制約はありません:ページのレイアウトが固定されているかどうかは問いませんし、テキストは1つの文字列として保存されている可能性もあります。キーと値はお互いに任意の距離を持つかもしれません。" + +#: ../../recipes-text.rst:61 05731f2a8f5b4e72a99dbd3afb4628ff +msgid "" +"For example, the following five key-value pairs will be correctly " +"identified::" +msgstr "例として、以下の5つのキーと値のペアが正しく識別されます::" + +#: ../../recipes-text.rst:77 635adbc1c75c4e019220ca2fafd16863 +msgid "How to Extract Text from within a Rectangle" +msgstr "四角形内のテキストを抽出する方法" + +#: ../../recipes-text.rst:78 c66f827c3b4e4d40937891f31bca0ed8 +msgid "" +"There is now (v1.18.0) more than one way to achieve this. We therefore " +"have created a `folder `_ in the PyMuPDF-Utilities " +"repository specifically dealing with this topic." +msgstr "" +"現在(v1.18.0)では、これを実現するための複数の方法があります。そのため、私たちは `PyMuPDF-Utilities " +"`_ リポジトリに、この特定のトピックに対応するフォルダを作成しました。" + +#: ../../recipes-text.rst:88 d80121123daf4bf2a3e069027d837c3f +msgid "How to Extract Text in Natural Reading Order" +msgstr "自然な読み順でテキストを抽出する方法" + +#: ../../recipes-text.rst:90 c1cc039bb3804acebad6d1f03565e6be +msgid "" +"One of the common issues with PDF text extraction is, that text may not " +"appear in any particular reading order." +msgstr "PDFのテキスト抽出によくある問題の1つは、テキストが特定の読み順に表示されないことです。" + +#: ../../recipes-text.rst:92 16fbf3e452294bcd88f4d82c83bc87d6 +msgid "" +"This is the responsibility of the PDF creator (software or a human). For " +"example, page headers may have been inserted in a separate step -- after " +"the document had been produced. In such a case, the header text will " +"appear at the end of a page text extraction (although it will be " +"correctly shown by PDF viewer software). For example, the following " +"snippet will add some header and footer lines to an existing PDF::" +msgstr "これはPDFの作成者(ソフトウェアまたは人間)の責任です。たとえば、ページヘッダーはドキュメントが作成された後の別のステップで挿入された可能性があります。そのような場合、ヘッダーテキストはページテキストの抽出の最後に表示されることがあります(ただし、PDFビューアソフトウェアでは正しく表示されます)。以下のスニペットは、既存のPDFにいくつかのヘッダーとフッターの行を追加します::" + +#: ../../recipes-text.rst:104 978688d53bd949cc898416980acbc003 +msgid "" +"The text sequence extracted from a page modified in this way will look " +"like this:" +msgstr "このように変更されたページから抽出されたテキストのシーケンスは次のようになります:" + +#: ../../recipes-text.rst:106 ef048f5a008646e991b0568de7841cb2 +msgid "original text" +msgstr "元のテキスト" + +#: ../../recipes-text.rst:107 dc14bb8d05724f89a619b2d3cceb4926 +msgid "header line" +msgstr "ヘッダーライン" + +#: ../../recipes-text.rst:108 babbbbad94c4402ea4effe7f7adbca5d +msgid "footer line" +msgstr "フッターライン" + +#: ../../recipes-text.rst:110 e694010b792d4eb0855e2f6564a46a67 +msgid "" +"PyMuPDF has several means to re-establish some reading sequence or even " +"to re-generate a layout close to the original:" +msgstr "PyMuPDFには、いくつかの方法で読み順を再確立したり、元のレイアウトに近い形で再生成する手段があります:" + +#: ../../recipes-text.rst:112 716e99c2ff9344fe81fafd78de2007c3 +msgid "" +"Use `sort` parameter of :meth:`Page.get_text`. It will sort the output " +"from top-left to bottom-right (ignored for XHTML, HTML and XML output)." +msgstr "" +":meth:`Page.get_text` の `sort` " +"パラメーターを使用します。これにより、出力が左上から右下に向かってソートされます(XHTML、HTML、XML出力には無効です)。" + +#: ../../recipes-text.rst:113 bcd2685affe8476281c9b4f6df0f6ff9 +msgid "" +"Use the `pymupdf` module in CLI: `python -m pymupdf gettext ...`, which " +"produces a text file where text has been re-arranged in layout-preserving" +" mode. Many options are available to control the output." +msgstr "" +"CLIで `pymupdf` モジュールを使用します: `python -m pymupdf gettext ...` " +"。これにより、テキストがレイアウトを保持するモードで再配置されたテキストファイルが生成されます。出力を制御するための多くのオプションが利用可能です。" + +#: ../../recipes-text.rst:115 b14412bcce2e4fabab0f9c2421c08934 +msgid "" +"You can also use the above mentioned `script " +"`_ with your modifications." +msgstr "" +"また、上記の `スクリプト `_ を自分の変更とともに使用することもできます。" + +#: ../../recipes-text.rst:122 6fb4548c9a0a42da9939b9ba59fee62a +msgid "" +"How to :index:`Extract Table Content ` from " +"Documents" +msgstr "ドキュメントから表の内容を抽出する方法" + +#: ../../recipes-text.rst:123 b9aeb958d874414a8dfbe817d7e491be +msgid "" +"If you see a table in a document, you are normally not looking at " +"something like an embedded Excel or other identifiable object. It usually" +" is just normal, standard text, formatted to appear as tabular data." +msgstr "文書で表を見る場合、通常は埋め込まれたExcelなどの識別可能なオブジェクトのようなものではありません。通常、単なる通常の標準テキストで、表のデータとして表示されるようにフォーマットされています。" + +#: ../../recipes-text.rst:125 48f2b8ac5dc54b469a4fac3b57e95083 +msgid "" +"Extracting tabular data from such a page area therefore means that you " +"must find a way to **identify** the table area (i.e. its boundary box), " +"then **(1)** graphically indicate table and column borders, and **(2)** " +"then extract text based on this information." +msgstr "したがって、そのようなページ領域から表のデータを抽出するには、まず表の領域(つまり、その境界ボックス)を特定する方法を見つける必要があり、その後(1)グラフィカルに表と列の境界を示し、(2)この情報に基づいてテキストを抽出する必要があります。" + +#: ../../recipes-text.rst:127 b55a6db346bb4df3adb269cd4da2d2a1 +msgid "" +"This can be a very complex task, depending on details like the presence " +"or absence of lines, rectangles or other supporting vector graphics." +msgstr "これは、線、四角形、またはその他のサポートベクトルグラフィックの存在または不在などの詳細に依存するため、非常に複雑なタスクになる可能性があります。" + +#: ../../recipes-text.rst:129 2ab382191b1e4c8dbcb05ab17b9a6f16 +msgid "" +"Method :meth:`Page.find_tables` does all that for you, with a high table " +"detection precision. Its great advantage is that there are no external " +"library dependencies, nor the need to employ artificial intelligence or " +"machine learning technologies. It also provides an integrated interface " +"to the well-known Python package for data analysis `pandas " +"`_." +msgstr "" +"Method :meth:`Page.find_tables` " +"は、高い表検出精度を備えて、すべてをあなたのために行います。その大きな利点は、外部ライブラリの依存関係がないこと、人工知能や機械学習技術を使用する必要がないことです。また、データ分析のためのPythonパッケージである" +" `pandas `_ のための統合されたインターフェースも提供します。" + +#: ../../recipes-text.rst:131 b620d2150671401698d7a280f56ee3cc +msgid "" +"Please have a look at example `Jupyter notebooks " +"`_, which cover standard situations like multiple tables on one " +"page or joining table fragments across multiple pages." +msgstr "" +"標準的な状況をカバーする例の `Jupyter `_ " +"ノートブックをご覧いただければ幸いです。これには、1つのページに複数の表や複数のページにまたがる表の断片を結合するなどの状況が含まれています。" + +#: ../../recipes-text.rst:138 259587f74c604a45bd83bdba19dbfdd9 +msgid "How to Mark Extracted Text" +msgstr "抽出したテキストをマークする方法" + +#: ../../recipes-text.rst:139 b7f64cd47ccb4f889b2a09f74c66564f +msgid "" +"There is a standard search function to search for arbitrary text on a " +"page: :meth:`Page.search_for`. It returns a list of :ref:`Rect` objects " +"which surround a found occurrence. These rectangles can for example be " +"used to automatically insert annotations which visibly mark the found " +"text." +msgstr "" +"ページ上で任意のテキストを検索するための標準的な検索機能があります: :meth:`Page.search_for` " +"です。これは、見つかったテキストを囲む :ref:`Rect` " +"オブジェクトのリストを返します。これらの四角形は、見つかったテキストを目に見えるようにマークするために自動的に注釈を挿入するのに使用できます。" + +#: ../../recipes-text.rst:141 febfc5dbb25f420dbf6c3ef3e88b24c4 +msgid "This method has advantages and drawbacks. Pros are:" +msgstr "この方法には利点と欠点があります。利点は次のとおりです:" + +#: ../../recipes-text.rst:143 101f473976a74f1c9ab424859e8862dd +msgid "The search string can contain blanks and wrap across lines" +msgstr "検索文字列には空白を含めることができ、行をまたぐことができます。" + +#: ../../recipes-text.rst:144 6ad25d7b6043421a8142042b3b9a3ec9 +msgid "Upper or lower case characters are treated equal" +msgstr "大文字と小文字は同じように扱われます。" + +#: ../../recipes-text.rst:145 b3efeff8f1c9463ab435a4742bc0c5bd +msgid "Word hyphenation at line ends is detected and resolved" +msgstr "行末での単語のハイフネーションが検出され、解決されます。" + +#: ../../recipes-text.rst:146 7a50b0a1eba54da5a0cbd913bde35367 +msgid "" +"Return may also be a list of :ref:`Quad` objects to precisely locate text" +" that is **not parallel** to either axis -- using :ref:`Quad` output is " +"also recommended, when page rotation is not zero." +msgstr "" +"返り値は :ref:`Quad` " +"オブジェクトのリストになる場合もあり、これにより軸に対して平行でないテキストを正確に位置付けることができます。ページの回転がゼロでない場合には、 " +":ref:`Quad` の出力を使用することも推奨されます。" + +#: ../../recipes-text.rst:148 9f60ccd5aee74a1bb2f791ed857906ac +msgid "But you also have other options::" +msgstr "ただし、他にも選択肢があります::" + +#: ../../recipes-text.rst:182 324f957419b2409787a7496e75b13dee +msgid "" +"This script uses `Page.get_text(\"words\")` to look for a string, handed " +"in via cli parameter. This method separates a page's text into \"words\" " +"using white spaces as delimiters. Further remarks:" +msgstr "このスクリプトは、cliパラメーターを介して渡された文字列を検索するためにPage.get_text(\"words\")を使用します。この方法では、ページのテキストがスペースと改行を区切りとして「単語」に分割されます。さらなる注釈:" + +#: ../../recipes-text.rst:184 e11b43b6869e46b89ed4021f9ca781ea +msgid "" +"If found, the **complete word containing the string** is marked " +"(underlined) -- not only the search string." +msgstr "文字列が見つかった場合、検索文字列だけでなく、その文字列を含む完全な単語がマークされます(アンダーラインが引かれます)。" + +#: ../../recipes-text.rst:185 04e60745b3b34c6b91dcf325e659bc11 +msgid "" +"The search string may **not contain word delimiters**. By default, word " +"delimiters are white spaces and the non-breaking space `chr(0xA0)`. If " +"you use extra delimiting characters like `page.get_text(\"words\", " +"delimiters=\"./,\")` then none of these characters should be included in " +"your search string either." +msgstr "" +"検索文字列には単語の区切り文字を含めることはできません。デフォルトでは、単語の区切り文字は空白と非改行空白 `chr(0xA0)` です。もし、 " +"`page.get_text(\"words\", delimiters=\"./,\")` " +"のような追加の区切り文字を使用する場合は、これらの文字を検索文字列に含めてはいけません。" + +#: ../../recipes-text.rst:186 d2efc0fb67834d26a4980492e4ba653b +msgid "" +"As shown here, upper / lower cases are **respected**. But this can be " +"changed by using the string method *lower()* (or even regular " +"expressions) in function *mark_word*." +msgstr "" +"ここで示したように、大文字と小文字は区別されますが、`mark_word` 関数で `lower()` " +"メソッド(または正規表現)を使用することで変更できます。" + +#: ../../recipes-text.rst:187 f1f92985f2ae49809306e7b57ba6c5b1 +msgid "There is **no upper limit**: all occurrences will be detected." +msgstr "上限はありません。すべての出現を検出します。" + +#: ../../recipes-text.rst:188 6639bc0b294042f3b7da287582d935d1 +msgid "" +"You can use **anything** to mark the word: 'Underline', 'Highlight', " +"'StrikeThrough' or 'Square' annotations, etc." +msgstr "単語をマークするために何を使用しても構いません:「アンダーライン」、「ハイライト」、「取り消し線」、「四角」の注釈などがあります。" + +#: ../../recipes-text.rst:189 f003fe97efd4436285d6bc3016b36180 +msgid "" +"Here is an example snippet of a page of this manual, where \"MuPDF\" has " +"been used as the search string. Note that all strings **containing " +"\"MuPDF\"** have been completely underlined (not just the search string)." +msgstr "以下は、このマニュアルのページの一部の例スニペットで、「MuPDF」が検索文字列として使用されています。注意:「MuPDF」を含むすべての文字列が完全にアンダーラインで引かれていることに注意してください(検索文字列だけでなく)。" + +#: ../../recipes-text.rst:200 07364170c89f43d7a469ec584d3f4f06 +msgid "How to Mark Searched Text" +msgstr "検索したテキストをマークする方法" + +#: ../../recipes-text.rst:204 11638cbd201f4578b319a8b04e773026 +msgid "This script searches for text and marks it::" +msgstr "このスクリプトはテキストを検索してマークします::" + +#: ../../recipes-text.rst:230 15b785bdfebf418aba157f4f78a77e4b +msgid "The result looks like this:" +msgstr "結果は以下のようになります:" + +#: ../../recipes-text.rst:241 080eb1dc215a426e9e7a03e134dc4fa2 +msgid "How to Mark Non-horizontal Text" +msgstr "非水平テキストをマークする方法" + +#: ../../recipes-text.rst:242 6fe2ff76b1d0421d8ab98c926c234cb0 +msgid "" +"The previous section already shows an example for marking non-horizontal " +"text, that was detected by text **searching**." +msgstr "前のセクションでは、テキスト検索によって検出された非水平テキストのマークの例が既に示されています。" + +#: ../../recipes-text.rst:244 9f3587b3ee80418e80f13b3ad91233ec +msgid "" +"But text **extraction** with the \"dict\" / \"rawdict\" options of " +":meth:`Page.get_text` may also return text with a non-zero angle to the " +"x-axis. This is indicated by the value of the line dictionary's `\"dir\"`" +" key: it is the tuple `(cosine, sine)` for that angle. If `line[\"dir\"] " +"!= (1, 0)`, then the text of all its spans is rotated by (the same) angle" +" != 0." +msgstr "" +"しかし、 :meth:`Page.get_text` " +"の「dict」/「rawdict」オプションを使用したテキスト抽出では、x軸に対してゼロでない角度のテキストも返される場合があります。これは、行の辞書の" +" \"dir\" キーの値によって示されます:それはその角度に対する `(cosine, sine)` のタプルです。 " +"`line[\"dir\"] != (1, 0)` であれば、すべてのスパンのテキストは (同じ) 角度 != 0 によって回転しています。" + +#: ../../recipes-text.rst:246 b5ac4e18122a4f8985e80cf88f3152b4 +msgid "" +"The \"bboxes\" returned by the method however are rectangles only -- not " +"quads. So, to mark span text correctly, its quad must be recovered from " +"the data contained in the line and span dictionary. Do this with the " +"following utility function (new in v1.18.9)::" +msgstr "ただし、このメソッドによって返される「bboxes」は四角形のみであり、クワッドではありません。したがって、スパンテキストを正しくマークするには、行とスパンの辞書に含まれるデータからクワッドを回復する必要があります。以下のユーティリティ関数を使用してください(v1.18.9で新しく追加されました)::" + +#: ../../recipes-text.rst:251 220c8fb8fa4b46c781ff32905bc0a24e +msgid "" +"If you want to **mark the complete line** or a subset of its spans in one" +" go, use the following snippet (works for v1.18.10 or later)::" +msgstr "一度に完全な行またはその一部のスパンをマークしたい場合は、以下のスニペットを使用してください(v1.18.10以降で動作します)" + +#: ../../recipes-text.rst:258 85d1dfa0969b4848b83f9b2f117d981c +msgid "" +"The `spans` argument above may specify any sub-list of `line[\"spans\"]`." +" In the example above, the second to second-to-last span are marked. If " +"omitted, the complete line is taken." +msgstr "" +"上記の `spans` 引数は、`line[\"spans\"]` " +"の任意の部分リストを指定できます。上記の例では、2番目から最後から2番目のスパンがマークされます。省略すると、完全な行が取得されます" + +#: ../../recipes-text.rst:265 b4aaece1bac342df88b2588a53b1488f +msgid "How to Analyze Font Characteristics" +msgstr "フォントの特性を分析する方法" + +#: ../../recipes-text.rst:266 3d03a051124b43d1bae8d901b0aecd28 +msgid "" +"To analyze the characteristics of text in a PDF use this elementary " +"script as a starting point:" +msgstr "PDF内のテキストの特性を分析するには、以下の初歩的なスクリプトを出発点として使用します::" + +#: ../../recipes-text.rst:271 f055a412baca41b5a95b0ecadafe2e3e +msgid "Here is the PDF page and the script output:" +msgstr "以下はPDFページとスクリプトの出力です。" + +#: ../../recipes-text.rst:282 66219ea8cab542688f83c1bd81f081c1 +msgid "How to Insert Text" +msgstr "テキストの挿入方法" + +#: ../../recipes-text.rst:283 4e0c636dd4ac4eb387bcde62a6a2a175 +msgid "" +"PyMuPDF provides ways to insert text on new or existing PDF pages with " +"the following features:" +msgstr "PyMuPDFは、以下の機能を備えて新しいまたは既存のPDFページにテキストを挿入する方法を提供しています:" + +#: ../../recipes-text.rst:285 155bbc5aa1454c93b980b30ac316c0b0 +msgid "" +"choose the font, including built-in fonts and fonts that are available as" +" files" +msgstr "フォントの選択:組み込みのフォントやファイルとして利用可能なフォントを選択できます。" + +#: ../../recipes-text.rst:286 b9dd50f8a43d417ea405cd07b235088d +msgid "choose text characteristics like bold, italic, font size, font color, etc." +msgstr "テキストの特性の選択:太字、斜体、フォントサイズ、フォントカラーなど、テキストの特性を選択できます。" + +#: ../../recipes-text.rst:287 f938ab7b656c436297d8120a887e4fcc +msgid "position the text in multiple ways:" +msgstr "テキストの配置方法:" + +#: ../../recipes-text.rst:289 96ff2b06077841bf82fb23104230f9ed +msgid "either as simple line-oriented output starting at a certain point," +msgstr "特定のポイントを起点として単純な行指向の出力として配置することができます。 " + +#: ../../recipes-text.rst:290 dcf40c10ea294dc8a656732166297259 +msgid "" +"or fitting text in a box provided as a rectangle, in which case text " +"alignment choices are also available," +msgstr "ボックスにテキストをフィットさせる場合は、テキストの配置を選択することもできます。この場合、テキストの整列オプションも利用できます。" + +#: ../../recipes-text.rst:291 056f2e92f23f4d41beb702386a246a80 +msgid "" +"choose whether text should be put in foreground (overlay existing " +"content)," +msgstr "テキストを前面に配置するか選択できます(既存のコンテンツをオーバーレイします)。" + +#: ../../recipes-text.rst:292 5999867be03a4be495cbfd225913c3d4 +msgid "" +"all text can be arbitrarily \"morphed\", i.e. its appearance can be " +"changed via a :ref:`Matrix`, to achieve effects like scaling, shearing or" +" mirroring," +msgstr "テキストは任意に「変形」されることができます。つまり、行列を使用して拡大、せん断、反転などの効果を得ることができます。" + +#: ../../recipes-text.rst:293 25c6fa1fb2344a02a3a693135a7b5ad4 +msgid "" +"independently from morphing and in addition to that, text can be rotated " +"by integer multiples of 90 degrees." +msgstr "変形とは別に、テキストを90度の整数倍で回転させることもできます。" + +#: ../../recipes-text.rst:295 2190abc125234360a3e1fab88ef34947 +msgid "" +"All of the above is provided by three basic :ref:`Page`, resp. " +":ref:`Shape` methods:" +msgstr "以上のすべては、それぞれの基本的な :ref:`Page` 、:ref:`Shape` メソッドによって提供されています。" + +#: ../../recipes-text.rst:297 4fb036ddb792472197216092b9eefddb +msgid "" +":meth:`Page.insert_font` -- install a font for the page for later " +"reference. The result is reflected in the output of " +":meth:`Document.get_page_fonts`. The font can be:" +msgstr "" +":meth:`Page.insert_font` - ページにフォントをインストールして後で参照できるようにします。その結果は、 " +":meth:`Document.get_page_fonts` の出力に反映されます。フォントは以下の方法で提供できます:" + +#: ../../recipes-text.rst:299 3c7f3dadc700412b9396227e3e8b9ff6 +msgid "provided as a file," +msgstr "ファイルとして提供する。" + +#: ../../recipes-text.rst:300 40d5ff2c32c44c6584f3112ed978abef +msgid "via :ref:`Font` (then use :attr:`Font.buffer`)" +msgstr ":ref:`Font` を使用して提供する(その場合、 :attr:`Font.buffer` を使用します)。" + +#: ../../recipes-text.rst:301 6267fd1e928e41658810a84d52de31c7 +msgid "already present somewhere in **this or another** PDF, or" +msgstr "既にこのPDFまたは別のPDFのどこかに存在する。" + +#: ../../recipes-text.rst:302 2ebf7edb3c7e4c4482334300df5af372 +msgid "be a **built-in** font." +msgstr "組み込みフォントである。" + +#: ../../recipes-text.rst:304 e6a884a850ad4cb49e2a74eeb2f5bc61 +msgid "" +":meth:`Page.insert_text` -- write some lines of text. Internally, this " +"uses :meth:`Shape.insert_text`." +msgstr "" +":meth:`Page.insert_text` - テキストの行を書き込みます。内部的には :meth:`Shape.insert_text`" +" を使用します。" + +#: ../../recipes-text.rst:306 5252e3a0b80e49a4af1e7a62b8a2cc00 +msgid "" +":meth:`Page.insert_textbox` -- fit text in a given rectangle. Here you " +"can choose text alignment features (left, right, centered, justified) and" +" you keep control as to whether text actually fits. Internally, this uses" +" :meth:`Shape.insert_textbox`." +msgstr "" +":meth:`Page.insert_textbox` - " +"指定された矩形にテキストをフィットさせます。ここでは、テキストの整列機能(左揃え、右揃え、中央揃え、両端揃え)を選択できます。また、テキストが実際にフィットするかどうかの制御もできます。内部的には" +" :meth:`Shape.insert_textbox` を使用します。" + +#: ../../recipes-text.rst:308 0d6285e91b3c45608b972b2de7612705 +msgid "Both text insertion methods automatically install the font as necessary." +msgstr "テキスト挿入の両方の方法は、必要に応じてフォントを自動的にインストールします。" + +#: ../../recipes-text.rst:314 4615115cc7ec417e9025309e87d5c912 +msgid "How to Write Text Lines" +msgstr "テキスト行を書く方法" + +#: ../../recipes-text.rst:315 c5ec955ab13243b49f36ee01f5a5e363 +msgid "Output some text lines on a page::" +msgstr "ページにいくつかのテキスト行を出力する方法::" + +#: ../../recipes-text.rst:336 8385d05c2aaf4baf89657017938f1841 +msgid "" +"With this method, only the **number of lines** will be controlled to not " +"go beyond page height. Surplus lines will not be written and the number " +"of actual lines will be returned. The calculation uses a line height " +"calculated from the :data:`fontsize` and 36 points (0.5 inches) as bottom" +" margin." +msgstr "" +"この方法では、ページの高さを超えないように行の数だけを制御します。余剰の行は書き込まれず、実際の行数が返されます。計算には、:data:`fontsize`" +" と36ポイント(0.5インチ)のボトムマージンから計算された行の高さが使用されます。" + +#: ../../recipes-text.rst:338 86eb433806094945b5d70882c088708a +msgid "" +"Line **width is ignored**. The surplus part of a line will simply be " +"invisible." +msgstr "行の幅は無視されます。行の余剰部分は単に見えなくなります。" + +#: ../../recipes-text.rst:340 a76488ed16d54e53b404c6f7a09d9313 +msgid "" +"However, for built-in fonts there are ways to calculate the line width " +"beforehand - see :meth:`get_text_length`." +msgstr "ただし、組み込みのフォントには、行の幅を事前に計算する方法があります。 :meth:`get_text_length` を参照してください。" + +#: ../../recipes-text.rst:342 b366c349b7114db78ed6f108144468f9 +msgid "" +"Here is another example. It inserts 4 text strings using the four " +"different rotation options, and thereby explains, how the text insertion " +"point must be chosen to achieve the desired result::" +msgstr "以下は別の例です。4つの異なる回転オプションを使用してテキスト文字列を挿入し、それにより、望む結果を得るためにどのようにテキスト挿入ポイントを選択すべきかを説明しています::" + +#: ../../recipes-text.rst:378 fb820c64521f4522a238d2ce77dcaf18 +msgid "This is the result:" +msgstr "これが結果です。" + +#: ../../recipes-text.rst:390 541c84709e8840f99fe3aba0b40495a5 +msgid "How to Fill a Text Box" +msgstr "テキストボックスの塗りつぶし方" + +#: ../../recipes-text.rst:391 c6dcf5cccb0044f8b35d759a5221dfae +msgid "" +"This script fills 4 different rectangles with text, each time choosing a " +"different rotation value::" +msgstr "このスクリプトは、異なる回転値を選択して、4つの異なる長方形にテキストを塗りつぶします。" + +#: ../../recipes-text.rst:428 2e957f827c5b40aaa4e166eb9499bfbb +msgid "" +"Some default values were used above: font size 11 and text alignment " +"\"left\". The result will look like this:" +msgstr "上記ではいくつかのデフォルト値が使用されました:フォント「Helvetica」、フォントサイズ11、テキストの配置は「左寄せ」です。結果は以下のようになります。" + +#: ../../recipes-text.rst:438 c7e2172b7c074d11b35164dc32c22e23 +msgid "How to Fill a Box with HTML Text" +msgstr "HTMLテキストでボックスを埋める方法" + +#: ../../recipes-text.rst:439 42df2d56d1f34d98823882e630fe1cf7 +msgid "" +"Method :meth:`Page.insert_htmlbox` offers a **much more powerful** way to" +" insert text in a rectangle." +msgstr "メソッド :meth:`Page.insert_htmlbox` は、矩形にテキストを挿入するための **より強力な** 方法を提供します。" + +#: ../../recipes-text.rst:441 d5759271e33d478eae11f568d17338fc +msgid "" +"Instead of simple, plain text, this method accepts HTML source, which may" +" not only contain HTML tags but also styling instructions to influence " +"things like font, font weight (bold) and style (italic), color and much " +"more." +msgstr "このメソッドは、単純なプレーンテキストではなく、HTMLソースを受け入れます。HTMLタグのみならず、フォント、フォントの太さ(太字)、スタイル(イタリック)、色などを含むスタイル指示も含まれます。" + +#: ../../recipes-text.rst:443 ba663f954c914a1a8d20788c385f392c +msgid "" +"It is also possible to mix multiple fonts and languages, to output HTML " +"tables and to insert images and URI links." +msgstr "複数のフォントや言語を混在させ、HTMLテーブルを出力し、画像やURIリンクを挿入することも可能です。" + +#: ../../recipes-text.rst:445 e592608d0a5348499aff8a592bb61837 +msgid "" +"For even more styling flexibility, an additional CSS source may also be " +"given." +msgstr "さらなるスタイリングの柔軟性を求める場合、追加のCSSソースを指定することもできます。" + +#: ../../recipes-text.rst:447 11d3134828ab455a9beb1bd08b3eedc9 +msgid "" +"The method is based on the :ref:`Story` class. Therefore, complex script " +"systems like Devanagari, Nepali, Tamil and many are supported and written" +" correctly thanks to using the HarfBuzz library - which provides this so-" +"called **\"text shaping\"** feature." +msgstr "" +"このメソッドは、 :ref:`Story` " +"(ストーリー)クラスに基づいています。そのため、デーヴァナーガリ、ネパール語、タミル語などの複雑な文字体系がサポートされ、HarfBuzzライブラリを使用して正しく書き込まれています" +" - これがいわゆる **「テキストの形成」** 機能を提供します。" + +#: ../../recipes-text.rst:449 702e632bbb1846448ff097d413da8fb7 +msgid "" +"Any required fonts to output characters are automatically pulled in from " +"the Google NOTO font library - as a fallback (when the -- optionally " +"supplied -- user font(s) do not contain some glyphs)." +msgstr "" +"文字を出力するために必要なフォントは、--オプションで提供される--" +"ユーザーフォントが一部のグリフを含んでいない場合のフォールバックとして、Google NOTOフォントライブラリから自動的に取得されます。" + +#: ../../recipes-text.rst:451 6c11c27cd47042e2974d9dad0680a2a9 +msgid "" +"As a small glimpse into the features offered here, we will output the " +"following HTML-enriched text::" +msgstr "ここで提供される機能の一端をご覧いただくために、以下のHTMLエンリッチされたテキストを出力します:" + +#: ../../recipes-text.rst:476 2ad55055659d456fabde0186e22d4e26 +msgid "" +"Please note how the \"css\" parameter is used to globally select the " +"default \"sans-serif\" font and a font size of 14." +msgstr "「css」パラメータが、デフォルトの「sans-serif」フォントとフォントサイズ14をグローバルに選択する方法に注意してください。" + +#: ../../recipes-text.rst:478 ../../recipes-text.rst:546 +#: 3c4cca1121d6405eb83e0cb2871b7570 8e7f4fe263f24763bbf69efa19489d99 +msgid "The result will look like this:" +msgstr "結果は以下のようになります:" + +#: ../../recipes-text.rst:483 b7f446683eca4524b8fc2bd988191712 +msgid "How to output HTML tables and images" +msgstr "HTMLテーブルや画像を出力する方法" + +#: ../../recipes-text.rst:485 ed6bb1a3fe524fbda24f9c97da319399 +msgid "" +"Here is another example that outputs a table with this method. This time," +" we are including all the styling in the HTML source itself. Please also " +"note, how it works to include an image - even within a table cell::" +msgstr "以下は、このメソッドを使用してテーブルを出力する別の例です。今回は、すべてのスタイリングをHTMLソース自体に含めています。また、テーブルセル内に画像を含める方法についても、ご注意ください::" + +#: ../../recipes-text.rst:552 4cd24c13bbba4d8d8b0887c53d530e7e +msgid "How to Output Languages of the World" +msgstr "世界の言語を出力する方法" + +#: ../../recipes-text.rst:554 5e1849ae29cd4ee787b3105c593def1a +msgid "" +"Our third example will demonstrate the automatic multi-language support. " +"It includes automatic **text shaping** for complex scripting systems like" +" Devanagari and right-to-left languages::" +msgstr "" +"3つ目の例では、自動多言語サポートを示します。これには、デーヴァナーガリや右から左への言語などの複雑なスクリプトシステムに対する自動 " +"**テキスト整形** も含まれます:" + +#: ../../recipes-text.rst:584 7f93ecf38f45410098070b9c478823f0 +msgid "And this is the output:" +msgstr "これが結果です。" + +#: ../../recipes-text.rst:589 0fc8a571f2164e8b8fa47da93210cc9b +msgid "How to Specify your Own Fonts" +msgstr "独自のフォントを指定する方法" + +#: ../../recipes-text.rst:591 f4ddad35adde440a9851a6e69fcfcf35 +msgid "" +"Define your font files in CSS syntax using the `@font-face` statement. " +"You need a separate `@font-face` for every combination of font weight and" +" font style (e.g. bold or italic) you want to be supported. The following" +" example uses the famous MS Comic Sans font in its four variants regular," +" bold, italic and bold-italic." +msgstr "" +"`@font-face` " +"ステートメントを使用して、CSS構文でフォントファイルを定義します。サポートされるフォントのウェイトとスタイル(太字や斜体など)の組み合わせごとに、個別の" +" `@font-face` が必要です。以下の例では、有名な MS Comic Sans フォントの 4 " +"つのバリアント(通常、太字、斜体、太字斜体)を使用しています。" + +#: ../../recipes-text.rst:593 f8779468c07c47ef9000fb2979f5214c +msgid "" +"As these four font files are located in the system's folder " +"`C:/Windows/Fonts` the method needs an :ref:`Archive` definition that " +"points to that folder::" +msgstr "" +"これらの 4 つのフォントファイルがシステムのフォルダ `C:/Windows/Fonts` にあるため、このメソッドには、そのフォルダを指す " +":ref:`Archive` (アーカイブ)の定義が必要です。" + +#: ../../recipes-text.rst:642 06ab66e236cc4a2ea7d4c5a99612f4dd +msgid "How to Request Text Alignment" +msgstr "テキストの配置をリクエストする方法" + +#: ../../recipes-text.rst:644 7795bb870b4f42f187a0efe85b6c761d +msgid "This example combines multiple requirements:" +msgstr "この例では、複数の要件を組み合わせています" + +#: ../../recipes-text.rst:646 ff142aa60660494db8d4272e60183ff1 +msgid "Rotate the text by 90 degrees anti-clockwise." +msgstr "テキストを90度反時計回りに回転させます。" + +#: ../../recipes-text.rst:647 c1627013d8474a4f82eb9fcc874f012c +msgid "" +"Use a font from package `pymupdf-fonts `_. You will see that the respective CSS definitions are a lot " +"easier in this case." +msgstr "" +"`pymupdf-fonts `_ " +"パッケージからフォントを使用します。この場合、該当するCSS定義がはるかに簡単であることに気付くでしょう。" + +#: ../../recipes-text.rst:648 827ab2e55bb64443b6dcbb706662051a +msgid "Align the text with the \"justify\" option." +msgstr "テキストを \"justify\" オプションで配置します。" + +#: ../../recipes-text.rst:698 424d197ff94d4b4f9c4c0cc4513a960d +msgid "How to Extract Text with Color" +msgstr "色付きのテキストを抽出する方法" + +#: ../../recipes-text.rst:700 a842d1070655423986ddbb7afdbb289b +msgid "" +"Iterate through your text blocks and find the spans of text you need for " +"this information." +msgstr "テキストブロックを繰り返し処理し、必要な情報のテキストスパンを見つけます。" + +#: ../../footer.rst:60 b02445d3686f4215969d2dff8b922af2 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "The search string may **not contain spaces** or other white space." +#~ msgstr "検索文字列にはスペースや他の空白文字を含めることはできません。" + +#~ msgid "How to Use Non-Standard Encoding" +#~ msgstr "非標準エンコーディングの使用方法" + +#~ msgid "" +#~ "Since v1.14, MuPDF allows Greek and " +#~ "Russian encoding variants for the " +#~ ":data:`Base14_Fonts`. In PyMuPDF this is " +#~ "supported via an additional *encoding* " +#~ "argument. Effectively, this is relevant " +#~ "for Helvetica, Times-Roman and Courier" +#~ " (and their bold / italic forms) " +#~ "and characters outside the ASCII code" +#~ " range only. Elsewhere, the argument " +#~ "is ignored. Here is how to request" +#~ " Russian encoding with the standard " +#~ "font Helvetica::" +#~ msgstr "" +#~ "v1.14以降、MuPDFでは :data:`Base14_Fonts` " +#~ "のギリシャ語およびロシア語のエンコーディングバリアントが許可されています。PyMuPDFでは、これは追加のエンコーディング引数を介してサポートされています。これは実際にはHelvetica" +#~ "、Times-" +#~ "Roman、Courier(およびそれらの太字/斜体形式)およびASCIIコード範囲外の文字にのみ影響します。他の場所では、この引数は無視されます。以下は、標準フォントHelveticaでロシア語のエンコーディングを要求する方法です::" + +#~ msgid "" +#~ "The valid encoding values are " +#~ "TEXT_ENCODING_LATIN (0), TEXT_ENCODING_GREEK (1)," +#~ " and TEXT_ENCODING_CYRILLIC (2, Russian) " +#~ "with Latin being the default. Encoding" +#~ " can be specified by all relevant " +#~ "font and text insertion methods." +#~ msgstr "有効なエンコーディング値は、TEXT_ENCODING_LATIN(0)、TEXT_ENCODING_GREEK(1)、TEXT_ENCODING_CYRILLIC(2、ロシア語)であり、デフォルトはLatinです。エンコーディングは、すべての関連するフォントおよびテキスト挿入メソッドで指定できます。" + +#~ msgid "" +#~ "By the above statement, the fontname " +#~ "*helv* is automatically connected to the" +#~ " Russian font variant of Helvetica. " +#~ "Any subsequent text insertion with " +#~ "**this fontname** will use the Russian" +#~ " Helvetica encoding." +#~ msgstr "上記の記述により、フォント名「helv」は自動的にHelveticaのロシア語バリアントに接続されます。このフォント名を使用して以降のテキスト挿入では、ロシア語のHelveticaエンコーディングが使用されます。" + +#~ msgid "" +#~ "If you change the fontname just " +#~ "slightly, you can also achieve an " +#~ "**encoding \"mixture\"** for the **same " +#~ "base font** on the same page::" +#~ msgstr "フォント名をわずかに変更することで、同じベースフォントの同じページ上でエンコーディングの「混合」を実現することもできます。" + +#~ msgid "The result:" +#~ msgstr "結果は以下の通りです。" + +#~ msgid "" +#~ "The snippet above indeed leads to " +#~ "three different copies of the Helvetica" +#~ " font in the PDF. Each copy is" +#~ " uniquely identified (and referenceable) by" +#~ " using the correct upper-lower case" +#~ " spelling of the reserved word " +#~ "\"helv\"::" +#~ msgstr "上記のスニペットは実際にPDF内にHelveticaフォントの3つの異なるコピーを生成します。各コピーは正しい大文字小文字のスペル(\"helv\")を使用して一意に識別され、参照が可能です。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/recipes.mo b/docs/locales/ja/LC_MESSAGES/recipes.mo new file mode 100644 index 000000000..8f61a3638 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/recipes.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/recipes.po b/docs/locales/ja/LC_MESSAGES/recipes.po new file mode 100644 index 000000000..2cc4769a1 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/recipes.po @@ -0,0 +1,43 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 4009185189884afa815f06437219b17a +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 0945b3eedd774aa1bf2ed67c92baa34e +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 5438d71f4a5f445c9c4929c4169009b8 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../footer.rst:60 8482813d3d6a47a39130184e645552b4 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/rect.mo b/docs/locales/ja/LC_MESSAGES/rect.mo new file mode 100644 index 000000000..86bd9bcb8 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/rect.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/rect.po b/docs/locales/ja/LC_MESSAGES/rect.po new file mode 100644 index 000000000..d35caffd2 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/rect.po @@ -0,0 +1,866 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 4f175165437e4f9ea93bfd4d27e34217 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 99738076d4db4a2991da7a843d446e51 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 6893e787481d4f359fabbf13b0e18208 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../rect.rst:7 b891156537654c3bb28aedf20605826e +msgid "Rect" +msgstr "Rect (矩形)" + +#: ../../rect.rst:9 b140b4533abb40b584506436f6da3db6 +msgid "" +"*Rect* represents a rectangle defined by four floating point numbers x0, " +"y0, x1, y1. They are treated as being coordinates of two diagonally " +"opposite points. The first two numbers are regarded as the \"top left\" " +"corner P\\ :sub:`(x0,y0)` and P\\ :sub:`(x1,y1)` as the \"bottom right\" " +"one. However, these two properties need not coincide with their intuitive" +" meanings -- read on." +msgstr "" +"*Rect* " +"は、4つの浮動小数点数x0、y0、x1、y1によって定義される矩形を表します。これらは対角線上の2つの点の座標と見なされます。最初の2つの数は「左上」のコーナー" +" P\\ :sub:`(x0,y0)` とし、 P\\ :sub:`(x1,y1)` " +"は「右下」のコーナーとします。ただし、これら2つのプロパティは直感的な意味と一致する必要はありません。以下を読んでください。" + +#: ../../rect.rst:11 e59a3c8c38054ea38c6fda34de445624 +msgid "The following remarks are also valid for :ref:`IRect` objects:" +msgstr ":ref:`IRect` オブジェクトにも以下の注釈は有効です。" + +#: ../../rect.rst:13 b1d032f546b54b47b004faf1504b22f5 +msgid "" +"A rectangle in the sense of (Py-) MuPDF **(and PDF)** always has " +"**borders parallel to the x- resp. y-axis**. A general orthogonal " +"tetragon **is not a rectangle** -- in contrast to the mathematical " +"definition." +msgstr "" +"(Py-)MuPDF **(およびPDF)** の意味での矩形は常に **x-またはy軸に平行な境界** を持ちます。一般的な直交四角形は " +"**矩形ではなく** 、数学的な定義とは対照的です。" + +#: ../../rect.rst:14 9f2f8432949d4e92a415120c726f8623 +msgid "" +"The constructing points can be (almost! -- see below) anywhere in the " +"plane -- they need not even be different, and e.g. \"top left\" need not " +"be the geometrical \"north-western\" point." +msgstr "構築ポイントは平面上のどこにでも配置できます。異なる必要すらなく、たとえば「左上」が幾何学的に「北西」の点である必要はありません。" + +#: ../../rect.rst:15 79fb7e4eb80841079d3d4b9878040471 +msgid "Units are in points, where 72 points is 1 inch." +msgstr "単位はポイントで、72ポイントが1インチです。" + +#: ../../rect.rst:20 4ea3829d9cab4b818f01b0f6b07536d9 +msgid "" +"For any given quadruple of numbers, the geometrically \"same\" rectangle " +"can be defined in four different ways:" +msgstr "与えられた4つの数値に対して、幾何学的に「同じ」矩形は4つの異なる方法で定義できます。" + +#: ../../rect.rst:17 45eff7ddd561491892f3a6f4d7cbc9f7 +msgid "Rect(P\\ :sub:`(x0,y0)`, P\\ :sub:`(x1,y1)`\\ )" +msgstr "" + +#: ../../rect.rst:18 4c61fbf1c62d4a56a5a187ee9a31fa35 +msgid "Rect(P\\ :sub:`(x1,y1)`, P\\ :sub:`(x0,y0)`\\ )" +msgstr "" + +#: ../../rect.rst:19 77dad531fc664ca0a7138d3a300a552b +msgid "Rect(P\\ :sub:`(x0,y1)`, P\\ :sub:`(x1,y0)`\\ )" +msgstr "" + +#: ../../rect.rst:20 fc0dfc403d104dd5a9383b9295374244 +msgid "Rect(P\\ :sub:`(x1,y0)`, P\\ :sub:`(x0,y1)`\\ )" +msgstr "" + +#: ../../rect.rst:22 ea2ef31b56d2471c90deb626bd3fdd01 +msgid "**(Changed in v1.19.0)** Hence some classification:" +msgstr "**(v1.19.0で変更)** したがって、いくつかの分類があります。" + +#: ../../rect.rst:24 b60f9c430985435c8e55ee36fbf79944 +msgid "" +"A rectangle is called **valid** if `x0 <= x1` and `y0 <= y1` (i.e. the " +"bottom right point is \"south-eastern\" to the top left one), otherwise " +"**invalid**. Of the four alternatives above, **only the first** is valid." +" Please take into account, that in MuPDF's coordinate system, the y-axis " +"is oriented from **top to bottom**. Invalid rectangles have been called " +"infinite in earlier versions." +msgstr "" +"矩形は、 `x0 <= x1` および `y0 <= y1` (つまり、右下の点が左上の点の「南東」にある)の場合にのみ **有効** " +"と呼ばれます。したがって、上記の4つの代替案のうち、**最初のものだけ** が有効です。MuPDFの座標系では、y軸は **上から下** " +"に向かっていますので、注意してください。無効な矩形は以前のバージョンでは 無限と呼ばれていました。" + +#: ../../rect.rst:26 5eb964bddb7342ae94128ac093e06625 +msgid "" +"A rectangle is called **empty** if `x0 >= x1` or `y0 >= y1`. This " +"implies, that **invalid rectangles are also always empty.** And `width` " +"(resp. `height`) is **set to zero** if `x0 > x1` (resp. `y0 > y1`). In " +"previous versions, a rectangle was empty only if one of width or height " +"was zero." +msgstr "" +"矩形は、 `x0 >= x1` または `y0 >= y1` の場合に **空** と呼ばれます。これは、**無効な矩形** " +"も常に空であることを意味します。また、 `x0 > x1` (または `y0 > y1` )の場合、幅(または高さ)は **ゼロに設定** " +"されます。以前のバージョンでは、矩形が空であるのは幅または高さのいずれかがゼロの場合に限られていました。" + +#: ../../rect.rst:28 06d2d707d1824887a24a33aa4d29ad97 +msgid "" +"Rectangle coordinates **cannot be outside** the number range from " +"`FZ_MIN_INF_RECT = -2147483648` to `FZ_MAX_INF_RECT = 2147483520`. Both " +"values have been chosen, because they are the smallest / largest 32bit " +"integers that survive C float conversion roundtrips. In previous versions" +" there was no limit for coordinate values." +msgstr "" +"矩形の座標は、`FZ_MIN_INF_RECT = -2147483648` から `FZ_MAX_INF_RECT = 2147483520`" +" " +"までの数値範囲内にある必要があります。これらの値は、C浮動小数点変換のラウンドトリップを生き残る最小/最大の32ビット整数であるため選ばれました。以前のバージョンでは、座標値の制限はありませんでした。" + +#: ../../rect.rst:30 6705a2cca07e4f829e3040f897ffb5b3 +msgid "" +"There is **exactly one \"infinite\" rectangle**, defined by `x0 = y0 = " +"FZ_MIN_INF_RECT` and `x1 = y1 = FZ_MAX_INF_RECT`. It contains every other" +" rectangle. It is mainly used for technical purposes -- e.g. when a " +"function call should ignore a formally required rectangle argument. This " +"rectangle is not empty." +msgstr "" +"**「無限」の矩形は** 、`x0 = y0 = FZ_MIN_INF_RECT` および `x1 = y1 = FZ_MAX_INF_RECT`" +" " +"で定義され、他のすべての矩形を含みます。これは主に技術的な目的で使用されます。たとえば、関数呼び出しで形式的に必要な矩形引数を無視する必要がある場合などです。この矩形は空ではありません。" + +#: ../../rect.rst:32 6887918d2f4b40a39daec63bc7f5a589 +msgid "" +"**Rectangles are (semi-) open:** The right and the bottom edges " +"(including the resp. corners) are not considered part of the rectangle. " +"This implies, that only the top-left corner `(x0, y0)` can ever belong to" +" the rectangle - the other three corners never do. An empty rectangle " +"contains no corners at all." +msgstr "" +"**矩形は(半)開いています**。右側と下側のエッジ(およびそれに含まれるコーナー)は矩形の一部とは見なされません。したがって、矩形に属することができるのは左上のコーナー" +" `(x0, y0)` のみです。他の3つのコーナーは常に含まれません。空の矩形にはまったくコーナーが含まれていません。" + +#: ../../rect.rst:38 8036b8594a7b41baa6e2d1ef16209cef +msgid "Here is an overview of the changes." +msgstr "以下は変更の概要です。" + +#: ../../rect.rst:41 4d3829e6a9254240ba2afdd58b72070c +msgid "Notion" +msgstr "概要" + +#: ../../rect.rst:41 e9add0c061d04d6fa452757810cf6550 +msgid "Versions < 1.19.0" +msgstr "バージョン < 1.19.0" + +#: ../../rect.rst:41 bd62c9a15c284380b98b4bbd65f93710 +msgid "Versions 1.19.*" +msgstr "バージョン 1.19.*" + +#: ../../rect.rst:43 fb8b381a9c874249bc8ef952f4eab354 +msgid "empty" +msgstr "空" + +#: ../../rect.rst:43 4a8cd26b92a94d5b9f70e49f36d4a021 +msgid "x0 = x1 or y0 = y1" +msgstr "x0 = x1 または y0 = y1" + +#: ../../rect.rst:43 58ed94a4b13a41d3a1527f6d367ecbde +msgid "x0 >= x1 or y0 >= y1 -- includes invalid rects" +msgstr "x0 >= x1 または y0 >= y1 – 無効な矩形も含む" + +#: ../../rect.rst:44 3fc33de6936b4b1780f7f7c36c95678d +msgid "valid" +msgstr "有効" + +#: ../../rect.rst:44 3884214dc7b34641ab739bf591af0083 +msgid "n/a" +msgstr "なし" + +#: ../../rect.rst:44 01046b8d592241a8a23a5e5714a2088d +msgid "x0 <= x1 and y0 <= y1" +msgstr "x0 <= x1 かつ y0 <= y1" + +#: ../../rect.rst:45 7d568031d88a46098a73ceb07d3c87db +msgid "infinite" +msgstr "無限" + +#: ../../rect.rst:45 6caca284a22c45ada46d42f41853d730 +msgid "all rects where x0 > x1 or y1 > y0" +msgstr "x0 > x1 または y1 > y0 のすべての矩形" + +#: ../../rect.rst:45 0d5d8be691d84ceb9d5ee1658228715f +msgid "**exactly one infinite rect / irect!**" +msgstr "**無限の矩形 / irectは1つだけです!**" + +#: ../../rect.rst:46 26942847b7af4257abd3527495c70837 +msgid "coordinate values" +msgstr "座標値" + +#: ../../rect.rst:46 f45611b6a50645cba718559b16ebab2c +msgid "all numbers" +msgstr "すべての数値" + +#: ../../rect.rst:46 c23643e3767a44f6a9765c8eb04c8682 +msgid "`FZ_MIN_INF_RECT <= number <= FZ_MAX_INF_RECT`" +msgstr "FZ_MIN_INF_RECT <= 数値 <= FZ_MAX_INF_RECT" + +#: ../../rect.rst:47 5ab8dbefea604252a3d886cc58f53f04 +msgid "borders, corners" +msgstr "境界、コーナー" + +#: ../../rect.rst:47 07909179c08c434bada8a34339bc4f1f +msgid "are parts of the rectangle" +msgstr "矩形の一部です" + +#: ../../rect.rst:47 ad24f776c93b4893b27a9ffc89a09e30 +msgid "right and bottom corners and edges **are outside**" +msgstr "右下の角とエッジは **外側にあります**" + +#: ../../rect.rst:50 64b6790f14a4436c84c2490872f0bf02 +msgid "" +"There are new top level functions defining infinite and standard empty " +"rectangles and quads, see :meth:`INFINITE_RECT` and friends." +msgstr "" +"新しいトップレベルの関数が追加され、無限と標準の空の四角形および四角形を定義します。:meth:`INFINITE_RECT` " +"などを参照してください。" + +#: ../../rect.rst:54 8b1e46fe5d5940ae916bad372aef9847 +msgid "**Methods / Attributes**" +msgstr "**メソッド / 属性**" + +#: ../../rect.rst:54 d6d28ff300c146f399cf21662676485c +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../rect.rst:56 32cbdd51f6984573910008d616626e1e +msgid ":meth:`Rect.contains`" +msgstr "" + +#: ../../rect.rst:56 a7a16aee03b242d099e29b157a9b4b17 +msgid "checks containment of point_likes and rect_likes" +msgstr "point_likesおよびrect_likesの包含をチェックします。" + +#: ../../rect.rst:57 e144062c3f124661ad5c5c4a32f1be7f +msgid ":meth:`Rect.get_area`" +msgstr "" + +#: ../../rect.rst:57 44971c8870934e5e89dd52b5192e1a79 +msgid "calculate rectangle area" +msgstr "四角形の面積を計算します。" + +#: ../../rect.rst:58 a623f54f39034dd6856f7b268b3ccc32 +msgid ":meth:`Rect.include_point`" +msgstr "" + +#: ../../rect.rst:58 36520814a04a4656bbf62f55fb24432f +msgid "enlarge rectangle to also contain a point" +msgstr "点も含むように四角形を拡大します。" + +#: ../../rect.rst:59 cf92cb27b5304db5852e19490f45268d +msgid ":meth:`Rect.include_rect`" +msgstr "" + +#: ../../rect.rst:59 ac1035fb19b34efa9fb0de1643a027eb +msgid "enlarge rectangle to also contain another one" +msgstr "別の四角形も含むように四角形を拡大します。" + +#: ../../rect.rst:60 68858515cc564906ac5c1cbf7c425185 +msgid ":meth:`Rect.intersect`" +msgstr "" + +#: ../../rect.rst:60 2effa8a9c2284b12bf11367ee6232ce1 +msgid "common part with another rectangle" +msgstr "別の四角形との共通部分です。" + +#: ../../rect.rst:61 e78842ba8a724c2986e34df95d02c502 +msgid ":meth:`Rect.intersects`" +msgstr "" + +#: ../../rect.rst:61 fc7ca5b01e824b86aa2e6808f69a0a36 +msgid "checks for non-empty intersections" +msgstr "非空の交差をチェックします。" + +#: ../../rect.rst:62 adf78cc1c43440dfaf89b76333de5611 +msgid ":meth:`Rect.morph`" +msgstr "" + +#: ../../rect.rst:62 a49711257f064e1588dc30ce9647c38e +msgid "transform with a point and a matrix" +msgstr "点と行列を使用して四角形を変形します。" + +#: ../../rect.rst:63 731ea321e2f245c389ce96d2baf8a1dd +msgid ":meth:`Rect.torect`" +msgstr "" + +#: ../../rect.rst:63 88f5cc601a804e67bd5d5527df1dc62c +msgid "the matrix that transforms to another rectangle" +msgstr "別の四角形に変換する行列です。" + +#: ../../rect.rst:64 ee4cdc8fd6cb4410bd4cb790f88d2002 +msgid ":meth:`Rect.norm`" +msgstr "" + +#: ../../rect.rst:64 1ba39b9e8b494dc688bc7ce9bcedb2e6 +msgid "the Euclidean norm" +msgstr "ユークリッドノルム" + +#: ../../rect.rst:65 1730541f1d994d46862f44ede7d2846f +msgid ":meth:`Rect.normalize`" +msgstr "" + +#: ../../rect.rst:65 2335f286efdb472d83a140038586bcf2 +msgid "makes a rectangle valid" +msgstr "四角形を有効にします" + +#: ../../rect.rst:66 2b7e251eb22141b7ba2942184913b727 +msgid ":meth:`Rect.round`" +msgstr "" + +#: ../../rect.rst:66 3d7b98e807dc45ce80bec5615acafd27 +msgid "create smallest :ref:`Irect` containing rectangle" +msgstr "最小の :ref:`Irect` を含む四角形を作成します。" + +#: ../../rect.rst:67 dffa9501a55c4a089143d424cb418a17 +msgid ":meth:`Rect.transform`" +msgstr "" + +#: ../../rect.rst:67 7e6886efabe24990b2c14e2e9cc94df4 +msgid "transform rectangle with a matrix" +msgstr "行列で四角形を変形します。" + +#: ../../rect.rst:68 9b9609a84c4b4e1da0f70464c2e8ce3a +msgid ":attr:`Rect.bottom_left`" +msgstr "" + +#: ../../rect.rst:68 1ddacc2a6e6f4ad9936a7ef64ea2ad17 +msgid "bottom left point, synonym *bl*" +msgstr "左下の点、シノニム *bl*" + +#: ../../rect.rst:69 9cedfd00eb9a436facd134f7119f2fc1 +msgid ":attr:`Rect.bottom_right`" +msgstr "" + +#: ../../rect.rst:69 4437891974524cd6be6ad68baa5ba513 +msgid "bottom right point, synonym *br*" +msgstr "右下の点、シノニム *br*" + +#: ../../rect.rst:70 1f78731833944580b5d74413e1d5b0ee +msgid ":attr:`Rect.height`" +msgstr "" + +#: ../../rect.rst:70 f47b82338e3f436f913bae3e2ff5e4dc +msgid "rectangle height" +msgstr "四角形の高さ" + +#: ../../rect.rst:71 0b6e47727811447ea07af99781b2e3b7 +msgid ":attr:`Rect.irect`" +msgstr "" + +#: ../../rect.rst:71 e82eab643bc046a784b949a6dffe8b67 +msgid "equals result of method *round()*" +msgstr "*round()* メソッドの結果と等しい" + +#: ../../rect.rst:72 3e90e5cf638447ca9d824c79145b3cad +msgid ":attr:`Rect.is_empty`" +msgstr "" + +#: ../../rect.rst:72 b7166f50e79c493f868a43091a462071 +msgid "whether rectangle is empty" +msgstr "四角形が空かどうか" + +#: ../../rect.rst:73 502df41843404077838e4eb5fafb1e83 +msgid ":attr:`Rect.is_valid`" +msgstr "" + +#: ../../rect.rst:73 d122a760ab4d4df194bc6c90bc99f961 +msgid "whether rectangle is valid" +msgstr "四角形が有効かどうか" + +#: ../../rect.rst:74 5646513259e346be95726ffadb021c57 +msgid ":attr:`Rect.is_infinite`" +msgstr "" + +#: ../../rect.rst:74 5d9ea98ae3364247ba6be8132cacd87b +msgid "whether rectangle is infinite" +msgstr "四角形が無限かどうか" + +#: ../../rect.rst:75 012fcf9ae00c4ea2a7412539968d760e +msgid ":attr:`Rect.top_left`" +msgstr "" + +#: ../../rect.rst:75 b295efcf4751475fb66614f940f4a464 +msgid "top left point, synonym *tl*" +msgstr "左上の点、シノニム *tl*" + +#: ../../rect.rst:76 af32d4918cfc4cfdaf0d6690270662ee +msgid ":attr:`Rect.top_right`" +msgstr "" + +#: ../../rect.rst:76 6c1326343d684df4be0b468d09054494 +msgid "top_right point, synonym *tr*" +msgstr "右上の点、シノニム *tr*" + +#: ../../rect.rst:77 b879ddb0bb7a40509b77711ea1714bc3 +msgid ":attr:`Rect.quad`" +msgstr "" + +#: ../../rect.rst:77 5f754c7d792942b89e39c9ca74ce3df1 +msgid ":ref:`Quad` made from rectangle corners" +msgstr "四角形の角から作成される :ref:`Quad`" + +#: ../../rect.rst:78 06754fd4f17c4308b8e1bcb8896053bd +msgid ":attr:`Rect.width`" +msgstr "" + +#: ../../rect.rst:78 896c2341e26c4bae8c698742629680f5 +msgid "rectangle width" +msgstr "四角形の幅" + +#: ../../rect.rst:79 c6ab34b27e85452cbb533f1ef1ac0749 +msgid ":attr:`Rect.x0`" +msgstr "" + +#: ../../rect.rst:79 14e0e826a2484f1fbb6ca777c4fda492 +msgid "left corners' x coordinate" +msgstr "左上のx座標" + +#: ../../rect.rst:80 67be2aa5256546cfa1539022f668a46e +msgid ":attr:`Rect.x1`" +msgstr "" + +#: ../../rect.rst:80 eb15ef70313f44b3b5aa6aba21d5956f +msgid "right corners' x -coordinate" +msgstr "右上のx座標" + +#: ../../rect.rst:81 62ce0e154b914073ab1720edca8df46e +msgid ":attr:`Rect.y0`" +msgstr "" + +#: ../../rect.rst:81 9aca9bfad3ca4d518abfb7c6eb614698 +msgid "top corners' y coordinate" +msgstr "上部のy座標" + +#: ../../rect.rst:82 9932a7086b7a4bcc99b31fd6469b537c +msgid ":attr:`Rect.y1`" +msgstr "" + +#: ../../rect.rst:82 46fc40dbcb4e4a7fb1abf3180401d6eb +msgid "bottom corners' y coordinate" +msgstr "下部のy座標" + +#: ../../rect.rst:85 24ef39c027ae4fe7acc5db742440c26b +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../rect.rst:103 876107cdef3940b1a84c137c4c048fdb +msgid "" +"Overloaded constructors: *top_left*, *bottom_right* stand for " +":data:`point_like` objects, \"sequence\" is a Python sequence type of 4 " +"numbers (see :ref:`SequenceTypes`), \"rect\" means another " +":data:`rect_like`, while the other parameters mean coordinates." +msgstr "" +"オーバーロードされたコンストラクター:*top_left*、*bottom_right* は :data:`point_like` " +"オブジェクトを表し、「sequence」は4つの数値からなるPythonシーケンス型です(:ref:`SequenceTypes` " +"を参照)、rectは別の :data:`rect_like` を意味し、他のパラメーターは座標を意味します。" + +#: ../../rect.rst:105 dc9e11c9f0794970bc13f1c2b0d14862 +msgid "If \"rect\" is specified, the constructor creates a **new copy** of it." +msgstr "\"rect\" が指定されている場合、コンストラクターはそれの **新しいコピー** を作成します。" + +#: ../../rect.rst:107 8a8e5207467c4429adce97d79872a891 +#, fuzzy +msgid "" +"Without parameters, the empty rectangle ``Rect(0.0, 0.0, 0.0, 0.0)`` is " +"created." +msgstr "パラメーターなしで、空の長方形 *Rect(0.0, 0.0, 0.0, 0.0)* が作成されます。" + +#: ../../rect.rst:111 d0389764b67b469dacca2e806465eb38 +#, fuzzy +msgid "" +"Creates the smallest containing :ref:`IRect`. This is **not the same** as" +" simply rounding the rectangle's edges: The top left corner is rounded " +"upwards and to the left while the bottom right corner is rounded " +"downwards and to the right." +msgstr "" +"最小の :ref:`IRect` を作成します。これは単に長方形のエッジを四捨五入することとは " +"**異なります**。左上のコーナーは上方向および左方向に丸められ、右下のコーナーは下方向および右方向に丸められます。" + +#: ../../rect.rst:116 6f10ab8e587a4f8da3bd39fab6c21272 +msgid "If the rectangle is **empty**, the result is also empty." +msgstr "もし矩形が空の場合、結果も **空** です。" + +#: ../../rect.rst:117 0466199751f24e1694fe4da6c03b978f +msgid "" +"**Possible paradox:** The result may be empty, **even if** the rectangle " +"is **not** empty! In such cases, the result obviously does **not** " +"contain the rectangle. This is because MuPDF's algorithm allows for a " +"small tolerance (1e-3). Example:" +msgstr "" +"**可能性のある逆説:** 矩形が空で **ない** 場合 **でも** 、結果が空になることがあります!このような場合、結果は明らかに矩形を " +"**含みません**。これは、MuPDFのアルゴリズムがわずかな許容差(1e-3)を許容しているためです。例:" + +#: ../../rect.rst 0a7269b7d8f74c71a481ce1ef76290d2 +#: 0ac0ade2c6234906971545973bc3addb 1fa4e22a1efa49afb9db1a2e8acc84f6 +#: 287dfc79dae24ac5bdc23bd6140536f8 8281a9f73e0c420caf0268e3432135b7 +#: a5f0a2eede5d431b838c556fb19cd1c7 a8e78634da5b4b248dc91a4da9bcf0b0 +#: e909480691a64745947dfcc581389e71 +msgid "Return type" +msgstr "" + +#: ../../rect.rst:127 61d0b53003974020a0ee6c6b732ec205 +msgid ":ref:`IRect`" +msgstr "" + +#: ../../rect.rst:131 a74add066ed74ab7b932fa24a6c7b876 +msgid "" +"Transforms the rectangle with a matrix and **replaces the original**. If " +"the rectangle is empty or infinite, this is a no-operation." +msgstr "次の条件を満たす場合、行列を使用して長方形を変換し、**元の長方形を置き換えます** :長方形が空であるか無限である場合、これは操作が行われません。" + +#: ../../rect.rst 214f6f23d11049ceae1322d54b3bd0d5 +#: 369735ccc9d4497baf0ca59a08bb44a2 382fbdea207d466080b5538a9899d047 +#: 42e5275efde045f395c7491842614edb 4f151c7a3ccf4060abccd8f626ff8bbb +#: 5a93f342cc8b4530a8376e2ae713c551 8152452aa16e4a58afd85b6ee8a8deaf +#: bc314e573ce848d0846a16250afa5f50 c5c886dd49b44f79b2f0f66d357bcd4e +msgid "Parameters" +msgstr "" + +#: ../../rect.rst:133 5ccb7999734e45cbb89a3752dd1f9590 +msgid "The matrix for the transformation." +msgstr "m(行列)–変換用の行列。" + +#: ../../rect.rst:136 ddaca91bf0ac4621bd5be07017e53f19 +msgid "*Rect*" +msgstr "" + +#: ../../rect.rst 51c0f6d4e59140aebf4c99812b0c636d +#: c959d184f9b449078aa2c376641b5644 ca6981d6e36a46f893861e291e3c5b0d +msgid "Returns" +msgstr "" + +#: ../../rect.rst:137 d0c8f8613d314bba942409850b3a9c21 +msgid "the smallest rectangle that contains the transformed original." +msgstr "変換された元の長方形を含む最小の長方形。" + +#: ../../rect.rst:141 59ece54a490e49169bd9ae0b55720c46 +msgid "" +"The intersection (common rectangular area, largest rectangle contained in" +" both) of the current rectangle and *r* is calculated and **replaces the " +"current** rectangle. If either rectangle is empty, the result is also " +"empty. If *r* is infinite, this is a no-operation. If the rectangles are " +"(mathematically) disjoint sets, then the result is invalid. If the result" +" is valid but empty, then the rectangles touch each other in a corner or " +"(part of) a side." +msgstr "" +"現在の長方形とrの共通の長方形エリア(両方に含まれる最大の長方形)を計算し、**現在の長方形を置き換えます**。どちらかの長方形が空の場合、結果も空になります。*r*" +" " +"が無限である場合、これは操作が行われません。長方形が(数学的に)交差していない場合、結果は無効になります。結果が有効でも空の場合、長方形は互いに角または一部の側面に接触しています。" + +#: ../../rect.rst:143 ../../rect.rst:150 3c56583d71f44f1f9dfcc257a6b14da5 +#: 993ebcaf675348e4baf42058064f4af1 +msgid "Second rectangle" +msgstr "第二の長方形" + +#: ../../rect.rst:148 46cc81d886744a779064ff0e36d3d87e +msgid "" +"The smallest rectangle containing the current one and ``r`` is calculated" +" and **replaces the current** one. If either rectangle is infinite, the " +"result is also infinite. If ``r`` is empty, the current rectangle remains" +" unchanged. Else if the current rectangle is empty, it is replaced by " +"``r``." +msgstr "" + +#: ../../rect.rst:155 e269f594553d418089fed3cc89a03c85 +msgid "" +"The smallest rectangle containing the current one and :data:`point_like` " +"``p`` is calculated and **replaces the current** one. **The infinite " +"rectangle remains unchanged.** To create the rectangle that wraps a " +"sequence of points, start with :meth:`EMPTY_RECT` and successively " +"include the members of the sequence." +msgstr "" + +#: ../../rect.rst:157 4ce142e941c042c3a80e74cf46c850ec +msgid "Point to include." +msgstr "含めるポイント。" + +#: ../../rect.rst:163 a44ce35d4d4948dcbf2421c80a366961 +msgid "" +"Calculate the area of the rectangle and, with no parameter, equals " +"*abs(rect)*. Like an empty rectangle, the area of an infinite rectangle " +"is also zero. So, at least one of *pymupdf.Rect(p1, p2)* and " +"*pymupdf.Rect(p2, p1)* has a zero area." +msgstr "" +"長方形の面積を計算し、パラメーターなしで *abs(rect)* と同じです。空の長方形の面積はゼロであるため、少なくとも " +"*pymupdf.Rect(p1、p2)* と *pymupdf.Rect(p2、p1)* のいずれかがゼロの面積を持っている必要があります。" + +#: ../../rect.rst:165 1fa10486b170486e9fd1707061e8b621 +msgid "" +"Specify required unit: respective squares of *px* (pixels, default), *in*" +" (inches), *cm* (centimeters), or *mm* (millimeters)." +msgstr "" +"必要な単位を指定します: *px* (ピクセル、デフォルト)の平方、*in* (インチ)、*cm* (センチメートル)、または *mm* " +"(ミリメートル)の平方。" + +#: ../../rect.rst:170 540e1878a49a42a18ee2ada0192ae56a +msgid "" +"Checks whether *x* is contained in the rectangle. It may be an *IRect*, " +"*Rect*, *Point* or number. If *x* is an empty rectangle, this is always " +"true. If the rectangle is empty this is always ``False`` for all non-" +"empty rectangles and for all points. `x in rect` and `rect.contains(x)` " +"are equivalent." +msgstr "" +"*x* が四角形内に含まれているかどうかをチェックします。xは *IRect*、*Rect*、*Point*、または数値のいずれかです。もし " +"*x* " +"が空の四角形である場合、これは常にtrueです。四角形が空である場合、これはすべての空でない四角形とすべてのポイントに対して常にfalseです。`x" +" in rect` および `rect.contains(x)` は同等です。" + +#: ../../rect.rst:172 51eb679fd4ae436fbc6bf5c11a96dc06 +msgid "the object to check." +msgstr "チェックするオブジェクト。" + +#: ../../rect.rst:179 abe761296d1749f0b9151470d32f3a5c +msgid "" +"Checks whether the rectangle and a :data:`rect_like` \"r\" contain a " +"common non-empty :ref:`Rect`. This will always be ``False`` if either is " +"infinite or empty." +msgstr "" +"長方形と :data:`rect_like` \"r\" " +"が共通の非空のRectを含むかどうかをチェックします。どちらかが無限または空の場合、これは常に ``False`` になります。" + +#: ../../rect.rst:181 efbd1b668aaa4b83aea3f80ad48757f4 +msgid "the rectangle to check." +msgstr "チェックする長方形。" + +#: ../../rect.rst:187 d7073301e4604c1f9c424e96ea4f6888 +msgid "New in version 1.19.3" +msgstr "新機能(バージョン1.19.3)" + +#: ../../rect.rst:189 1a724a40a6fa4eb5b92009a72ec43d15 +msgid "Compute the matrix which transforms this rectangle to a given one." +msgstr "この長方形を指定した長方形に変換する行列を計算します。" + +#: ../../rect.rst:191 07496533a3f14d27bad85600cd6cb061 +msgid "the target rectangle. Must not be empty or infinite." +msgstr "ターゲットの長方形。空または無限であってはいけません。" + +#: ../../rect.rst:192 78c87bd6a08e46b392d2bec1b41651ef +msgid ":ref:`Matrix`" +msgstr "" + +#: ../../rect.rst:193 7b5151a9628744429575a2d457eeaddf +msgid "" +"a matrix `mat` such that `self * mat = rect`. Can for example be used to " +"transform between the page and the pixmap coordinates. See an example use" +" here :ref:`RecipesImages_P`." +msgstr "" +"`self * mat = rect` となるような行列 `mat` です。たとえば、ページとピクスマップの座標間を変換するのに使用できます。 " +"使用例はこちらを参照してください :ref:`RecipesImages_P` " + +#: ../../rect.rst:197 88ecaa3c547a47be8c1b7eddc6a4844b +msgid "New in version 1.17.0" +msgstr "新機能バージョン1.17.0" + +#: ../../rect.rst:199 9a6bd77c5b524450baf036b5e58f53f8 +msgid "" +"Return a new quad after applying a matrix to the rectangle using the " +"fixed point `fixpoint`." +msgstr "固定点 `fixpoint` を使用して、行列を長方形に適用した後の新しい四角形を返します。" + +#: ../../rect.rst:201 685ccabce2584daa8ca5cf065f916a6c +msgid "the fixed point." +msgstr "固定ポイント。" + +#: ../../rect.rst:202 305c5469cf3f497da01ea155e49b6cfa +msgid "the matrix." +msgstr "行列。" + +#: ../../rect.rst:203 f06b2d5320d9428b95c363b255ffd8ac +msgid "" +"a new :ref:`Quad`. This a wrapper for the same-named quad method. If " +"infinite, the infinite quad is returned." +msgstr "新しい :ref:`Quad` 。これは同じ名前の四角形メソッドのラッパーです。無限の場合、無限の四角形が返されます。" + +#: ../../rect.rst:207 b781e934cbc24f179d17436ec5ad4223 +msgid "New in version 1.16.0" +msgstr "新機能バージョン1.16.0" + +#: ../../rect.rst:209 cad110cf715a419a867b612210794baa +msgid "" +"Return the Euclidean norm of the rectangle treated as a vector of four " +"numbers." +msgstr "四角形を4つの数値のベクトルとして扱った場合のユークリッドノルムを返します。" + +#: ../../rect.rst:213 3d0c1479425d4e6aa4835eb4b76889c2 +msgid "" +"**Replace** the rectangle with its valid version. This is done by " +"shuffling the rectangle corners. After completion of this method, the " +"bottom right corner will indeed be south-eastern to the top left one (but" +" may still be empty)." +msgstr "" +"長方形をその有効なバージョンで " +"**置き換えます**。これは長方形のコーナーをシャッフルして行います。このメソッドの完了後、右下のコーナーは確かに左上のコーナーの南東になります(ただし、空である可能性があります)。" + +#: ../../rect.rst:217 addc9cb911fd43dfa6e84cce61e07201 +msgid "Equals result of method *round()*." +msgstr "*round()* メソッドの結果と同じです。" + +#: ../../rect.rst:223 6112dfaa9f5f49fcbdbf584273635d53 +msgid "Equals *Point(x0, y0)*." +msgstr "*Point(x0, y0)* と等しい。" + +#: ../../rect.rst 0862f90648e54766b61aef9d90f85030 +#: 22e8d621cd014016b3fbea1b3ff74403 3523354080154bddabe8999e6b9a4cf1 +#: 4cc565a45f394268904ca547347fc944 8973488d35c64d1187d569f6d1e97748 +#: 8a08c27d65304c2791b47f89c045e3c5 8bde25711e984775a0066263e55308c4 +#: 9c0d0af077684773a5b08f57fa98edae b05cc4800c194f438ef62f6227f6c261 +#: ba19d73da1b247bdbdebcd0977eaff37 f09c4322833140c3b46990aa3526a68a +#: ffaa1710013d49129985889459486d5e +msgid "type" +msgstr "" + +#: ../../rect.rst:225 ../../rect.rst:233 ../../rect.rst:241 ../../rect.rst:249 +#: 400bdc0960a642f9b5d8e38ca7c8e5a0 4ec860563cfb4b9090c51d9a177a83b0 +#: 7376dc7a087b491baa549627c8327f0b a7b5f69ee81649d8bddcfee6f86231e5 +msgid ":ref:`Point`" +msgstr "" + +#: ../../rect.rst:231 00e1b4539d6c43109b115c58ce11040e +msgid "Equals `Point(x1, y0)`." +msgstr "`Point(x1, y0)` と等しい。" + +#: ../../rect.rst:239 800d7a664dab4f8d9556d8404b3b34ba +msgid "Equals `Point(x0, y1)`." +msgstr "`Point(x0, y1)` と等しい。" + +#: ../../rect.rst:247 440272bccc064c36b74060e4ee822bd3 +msgid "Equals `Point(x1, y1)`." +msgstr "" + +#: ../../rect.rst:253 ee9c50a355474a2e8d42e16dd758d0ac +msgid "The quadrilateral `Quad(rect.tl, rect.tr, rect.bl, rect.br)`." +msgstr "四角形 `Quad(rect.tl, rect.tr, rect.bl, rect.br)`。" + +#: ../../rect.rst:255 4efc68bb37fd4c6d909baff77eab6b59 +msgid ":ref:`Quad`" +msgstr "" + +#: ../../rect.rst:259 43065cade528469eae1a424c00580bea +msgid "Width of the rectangle. Equals `max(x1 - x0, 0)`." +msgstr "長方形の幅。`max(x1 - x0, 0)` と等しい。" + +#: ../../rect.rst:265 ae121c0a14ad43c1926e5453975b9b0a +msgid "Height of the rectangle. Equals `max(y1 - y0, 0)`." +msgstr "長方形の高さ。`max(y1 - y0, 0)` と等しい。" + +#: ../../rect.rst:271 643991e87f0a4babb2714c9d06f2c0c7 +msgid "X-coordinate of the left corners." +msgstr "左上の x 座標。" + +#: ../../rect.rst:273 ../../rect.rst:279 ../../rect.rst:285 ../../rect.rst:291 +#: 7239c5df73f6410d8df2a83494348d32 72a34abfeb674143aefa0a1362bac0c8 +#: 79cb4aa5140a4444ae907f76099a6cfd 928e58362b6246f2ae849d1104d2c565 +msgid "float" +msgstr "" + +#: ../../rect.rst:277 e71a3472f7514dbcaab41d96d296c43a +msgid "Y-coordinate of the top corners." +msgstr "左上の y 座標。" + +#: ../../rect.rst:283 a7091ed52ca54a978ccd7db42afe4297 +msgid "X-coordinate of the right corners." +msgstr "右下の x 座標。" + +#: ../../rect.rst:289 d3fcfa61dfdc4d6c8600fb753d805adf +msgid "Y-coordinate of the bottom corners." +msgstr "右下の y 座標。" + +#: ../../rect.rst:295 7dbf9eb43ba940149e72afb047aaaae4 +msgid "`True` if this is the infinite rectangle." +msgstr "これが無限の長方形の場合は `True`。" + +#: ../../rect.rst:297 ../../rect.rst:303 ../../rect.rst:309 +#: 92cef39f9da84b3eb9530123a29f605f 954cb339b07543be86cf5b1a3cfc2381 +#: a5319dac00f143b69f9d26acc370ff63 +msgid "bool" +msgstr "" + +#: ../../rect.rst:301 3a630252ef604eaea03d67f2491405f6 +msgid "`True` if rectangle is empty." +msgstr "これが無限の長方形である場合は `True`。" + +#: ../../rect.rst:307 3f15c2b5cff9442a8023836872bafb93 +msgid "`True` if rectangle is valid." +msgstr "長方形が空である場合は `True`。" + +#: ../../rect.rst:313 3f1be5c4b0614a74b192aee4903e27c8 +msgid "" +"This class adheres to the Python sequence protocol, so components can be " +"accessed via their index, too. Also refer to :ref:`SequenceTypes`." +msgstr "" +"このクラスはPythonのシーケンスプロトコルに従っており、要素にはインデックスを使用できます。Using Python Sequences as" +" Arguments in PyMuPDFも参照してください" + +#: ../../rect.rst:314 908aa3bdd6f444b7b1112275de42ffde +msgid "" +"Rectangles can be used with arithmetic operators -- see chapter " +":ref:`Algebra`." +msgstr "四角形は算術演算子と組み合わせて使用できます - :ref:`Algebra` の演算子代数の章を参照してください。" + +#: ../../footer.rst:60 613ae6b506a5439397bdf5a852445f7d +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "The smallest rectangle containing the " +#~ "current one and *r* is calculated " +#~ "and **replaces the current** one. If " +#~ "either rectangle is infinite, the result" +#~ " is also infinite. If one is " +#~ "empty, the other one will be taken" +#~ " as the result." +#~ msgstr "" +#~ "現在の長方形と *r* " +#~ "を含む最小の長方形を計算し、**現在の長方形を置き換えます**。どちらかの長方形が無限の場合、結果も無限になります。1つが空である場合、もう1つが結果として取られます。" + +#~ msgid "" +#~ "The smallest rectangle containing the " +#~ "current one and point *p* is " +#~ "calculated and **replaces the current** " +#~ "one. **The infinite rectangle remains " +#~ "unchanged.** To create a rectangle " +#~ "containing a series of points, start " +#~ "with (the empty) *pymupdf.Rect(p1, p1)* " +#~ "and successively include the remaining " +#~ "points." +#~ msgstr "" +#~ "現在の長方形と点 *p* " +#~ "を含む最小の長方形を計算し、**現在の長方形を置き換えます**。無限の長方形は変更されません。一連の点を含む長方形を作成するには、(空の)" +#~ " *pymupdf.Rect(p1, p1)* から始め、残りの点を順次含めてください。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/resources.mo b/docs/locales/ja/LC_MESSAGES/resources.mo new file mode 100644 index 000000000..970768a27 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/resources.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/resources.po b/docs/locales/ja/LC_MESSAGES/resources.po new file mode 100644 index 000000000..423030542 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/resources.po @@ -0,0 +1,81 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.24.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 32a5d41df1df459eb1e9d04ab26ae6e9 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 f853d94bef1043f19d324049bb6fc247 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDFは、PDF(およびその他)ドキュメントのデータ抽出、分析、変換、および操作のための高性能なPythonライブラリです。" + +#: ../../header.rst:-1 ee9fba5239104891a698642d810eda59 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFのテキスト抽出、PDFの画像抽出、PDFの変換、PDFのテーブル、PDFの分割、PDFの作成、Pyodide、PyScript" + +#: ../../resources.rst:6 50c78b642e87465bab2c3916355a95a0 +msgid "Resources" +msgstr "リソース" + +#: ../../resources.rst:9 4ff3e7104cb64db5af2a616677e85c6e +msgid "**PyMuPDF Pro**" +msgstr "" + +#: ../../resources.rst:12 08c45bb03e394054bea50f41ce8a06d0 +msgid "For **Office** file support `try PyMuPDF Pro `." +msgstr "**Office** ファイルのサポートには、`PyMuPDF Pro ` をお試しください。" + +#: ../../resources.rst:20 5af5e4a006674fbf9c2bb32995565bc2 +msgid "Find out about **PyMuPDF Utilities**" +msgstr "**PyMuPDFユーティリティ** について調べる" + +#: ../../resources.rst:22 9107d5d9ff12463c8aaa340fe660b894 +msgid "" +"The :title:`GitHub` repository `PyMuPDF-Utilities " +"`_ contains a full range of" +" examples, demonstrations and use cases." +msgstr "" +":title:`GitHub` リポジトリ `PyMuPDF-Utilities `_ には、豊富な例、デモ、およびユースケースが含まれています。" + +#: ../../resources.rst:31 661997d9b5904871afc832b5d106ec7e +msgid "Do you need |PDF| to **DOCX** conversion?" +msgstr "|PDF| を **DOCX** に変換する必要がありますか?" + +#: ../../resources.rst:33 4d1c4a0b38be4825a6719886af0641bf +msgid "" +"We recommend the pdf2docx_ library which uses |PyMuPDF| and the **python-" +"docx** library to provide simple document conversion from |PDF| to " +"**DOCX** format." +msgstr "" +"pdf2docx_ ライブラリをお勧めします。これは、|PDF| から **DOCX** 形式への簡単なドキュメント変換を提供するために " +"|PyMuPDF| と **python-docx** ライブラリを使用しています。" + +#: ../../footer.rst:60 bd4e8b3283ae4bb3b28a33df47889855 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは、|version| までのすべてのバージョンをカバーしています" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/shape.mo b/docs/locales/ja/LC_MESSAGES/shape.mo new file mode 100644 index 000000000..1a5c68b28 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/shape.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/shape.po b/docs/locales/ja/LC_MESSAGES/shape.po new file mode 100644 index 000000000..dfeacacb6 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/shape.po @@ -0,0 +1,1675 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 018e1dece6a441ab94af6c3cf54fe2ad +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 128d7b863b2446d2aae91e7677b5e304 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 9fae52a78aa64d129df2f344f6ece1cb +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../shape.rst:6 381674330cea486491db36b97473d432 +msgid "Shape" +msgstr "Shape(シェイプ)" + +#: ../../shape.rst:8 bb103c2d3eac444c8d07a3f46988fd96 +msgid "|pdf_only_class|" +msgstr "" + +#: ../../shape.rst:10 9a755d924c2b43c58eedf195fb5900ba +msgid "" +"This class allows creating interconnected graphical elements on a PDF " +"page. Its methods have the same meaning and name as the corresponding " +":ref:`Page` methods." +msgstr "" +"このクラスは、PDFページ上で相互に接続されたグラフィカル要素を作成できるようにします。このクラスのメソッドは、対応する :ref:`Page` " +"クラスのメソッドと同じ意味と名前を持っています。" + +#: ../../shape.rst:12 4f64857a3599459884faf14f8b620778 +msgid "" +"In fact, each :ref:`Page` draw method is just a convenience wrapper for " +"(1) one shape draw method, (2) the :meth:`Shape.finish` method, and (3) " +"the :meth:`Shape.commit` method. For page text insertion, only the " +":meth:`Shape.commit` method is invoked. If many draw and text operations " +"are executed for a page, you should always consider using a Shape object." +msgstr "" +"実際には、各 :ref:`Page` クラスの描画メソッドは、(1)シェイプの描画メソッド、(2) :meth:`Shape.finish` " +"メソッド、(3) :meth:`Shape.commit` " +"メソッドの3つの要素の便利なラッパーです。ページテキストの挿入に関しては、:meth:`Shape.commit` " +"メソッドのみが呼び出されます。ページで多くの描画およびテキスト操作を実行する場合、常にShapeオブジェクトの使用を検討することをお勧めします。" + +#: ../../shape.rst:14 adf35b9492ef4032855dfaa78d362dd8 +msgid "" +"Several draw methods can be executed in a row and each one of them will " +"contribute to one drawing. Once the drawing is complete, the " +":meth:`Shape.finish` method must be invoked to apply color, dashing, " +"width, morphing and other attributes." +msgstr "" +"このクラスのいくつかの描画メソッドは、連続して実行でき、それぞれが1つの描画に貢献します。描画が完了したら、:meth:`Shape.finish`" +" メソッドを呼び出して色、破線、幅、変形などの属性を適用する必要があります。" + +#: ../../shape.rst:16 3494f9eb3ba04f4b9ffbf7ca0ba7d291 +msgid "" +"**Draw** methods of this class (and :meth:`Shape.insert_textbox`) are " +"logging the area they are covering in a rectangle (:attr:`Shape.rect`). " +"This property can for instance be used to set " +":attr:`Page.cropbox_position`." +msgstr "" +"このクラスの **描画** メソッド(および :meth:`Shape.insert_textbox` )は、カバーしている領域を矩形 " +"(:attr:`Shape.rect`)で記録します。このプロパティは、:attr:`Page.cropbox_position` " +"を設定するために使用できます。" + +#: ../../shape.rst:18 1704136564ac498fbd7ede0ae2550fd1 +msgid "" +"**Text insertions** :meth:`Shape.insert_text` and " +":meth:`Shape.insert_textbox` implicitly execute a \"finish\" and " +"therefore only require :meth:`Shape.commit` to become effective. As a " +"consequence, both include parameters for controlling properties like " +"colors, etc." +msgstr "" +"**テキストの挿入** 操作である :meth:`Shape.insert_text` および " +":meth:`Shape.insert_textbox` は、暗黙的に「finish」を実行し、効果を発揮するために " +":meth:`Shape.commit` " +"のみが必要です。その結果、両方のメソッドには色などのプロパティを制御するためのパラメータが含まれています。" + +#: ../../shape.rst:21 1a21a95d30f24b0799fbe87aa6d636ef +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../shape.rst:21 c391fd059ff34f45a5de99fe97c82269 +msgid "**Description**" +msgstr "**説明**" + +#: ../../shape.rst:23 cf628d248d1d4006b5255ffeec4c0cea +msgid ":meth:`Shape.commit`" +msgstr "" + +#: ../../shape.rst:23 2dfad511b70842e89e20a6e1cc9d14e5 +msgid "update the page's contents" +msgstr "ページの内容を更新します" + +#: ../../shape.rst:24 842848e63ed0452ba22c02b41cc1cfe6 +msgid ":meth:`Shape.draw_bezier`" +msgstr "" + +#: ../../shape.rst:24 7c32f78177d24c4a8d925e253c57f458 +msgid "draw a cubic Bezier curve" +msgstr "キュービックベジエ曲線を描画します" + +#: ../../shape.rst:25 f720cfd9b8c74cc9ac1aef6b3e9b1801 +msgid ":meth:`Shape.draw_circle`" +msgstr "" + +#: ../../shape.rst:25 7755ead8fb034deab2a273e832bc186d +msgid "draw a circle around a point" +msgstr "指定した点を中心に円を描画します" + +#: ../../shape.rst:26 2ad511029c0d4e0984e6b14e823614ad +msgid ":meth:`Shape.draw_curve`" +msgstr "" + +#: ../../shape.rst:26 60740e227103476b8fa72f47856dc277 +msgid "draw a cubic Bezier using one helper point" +msgstr "ヘルパーポイントを使用してキュービックベジエ曲線を描画します" + +#: ../../shape.rst:27 49ae1bd52d564b10bf2322d619fddf14 +msgid ":meth:`Shape.draw_line`" +msgstr "" + +#: ../../shape.rst:27 473af9b1e31842b18446ac550f383e61 +msgid "draw a line" +msgstr "直線を描画します" + +#: ../../shape.rst:28 8d046f2f961d46c2b650de78be591821 +msgid ":meth:`Shape.draw_oval`" +msgstr "" + +#: ../../shape.rst:28 ca9546441256448f86d74d91457126b3 +msgid "draw an ellipse" +msgstr "楕円を描画します" + +#: ../../shape.rst:29 9e37682670104047bf6c63de32f463a3 +msgid ":meth:`Shape.draw_polyline`" +msgstr "" + +#: ../../shape.rst:29 5125820ba65d44d681f9e47d501dabe7 +msgid "connect a sequence of points" +msgstr "一連の点を接続します" + +#: ../../shape.rst:30 71d42594aaab45cca67df700b7c9d7dc +msgid ":meth:`Shape.draw_quad`" +msgstr "" + +#: ../../shape.rst:30 d562d4d207ac4498a6ce6952f37bc599 +msgid "draw a quadrilateral" +msgstr "四角形を描画します" + +#: ../../shape.rst:31 1801c26fba4f4cacb3c33a7fa296e084 +msgid ":meth:`Shape.draw_rect`" +msgstr "" + +#: ../../shape.rst:31 680d48d35ccc4fc5a11064f76750828b +msgid "draw a rectangle" +msgstr "長方形を描画します" + +#: ../../shape.rst:32 600ce0ff98d4423b8f432d99d82d9c83 +msgid ":meth:`Shape.draw_sector`" +msgstr "" + +#: ../../shape.rst:32 6ced41fc2d9a46b5acb562a554b4d344 +msgid "draw a circular sector or piece of pie" +msgstr "円形セクターまたはパイの一部を描画します" + +#: ../../shape.rst:33 76a821083f544a3e87b3991d8069a957 +msgid ":meth:`Shape.draw_squiggle`" +msgstr "" + +#: ../../shape.rst:33 61922306fe554bb29612c3fda0a11dfc +msgid "draw a squiggly line" +msgstr "波線を描画します" + +#: ../../shape.rst:34 7db3c5e50b114c09b2c417d5caae6404 +msgid ":meth:`Shape.draw_zigzag`" +msgstr "" + +#: ../../shape.rst:34 7fa4a57c88bb447ba56795ec0fc2112f +msgid "draw a zigzag line" +msgstr "ジグザグ線を描画します" + +#: ../../shape.rst:35 c5ae0a703e38453da2b34e755b0dc2f7 +msgid ":meth:`Shape.finish`" +msgstr "" + +#: ../../shape.rst:35 1228c2a359044a3ebf7644c4ccfab931 +msgid "finish a set of draw commands" +msgstr "描画コマンドのセットを完了します" + +#: ../../shape.rst:36 3b28b7ad06bc4f0292c553e468e1b2c2 +msgid ":meth:`Shape.insert_text`" +msgstr "" + +#: ../../shape.rst:36 07d16e827e4a4f4b8d73876621a428c6 +msgid "insert text lines" +msgstr "テキスト行を挿入します" + +#: ../../shape.rst:37 d53402a799a14120992423087bb2af00 +msgid ":meth:`Shape.insert_textbox`" +msgstr "" + +#: ../../shape.rst:37 5f6f5071cc7f40f596c69d28b717ed49 +msgid "fit text into a rectangle" +msgstr "テキストを長方形に合わせて挿入します" + +#: ../../shape.rst:38 a0105f21813b4e81836706e03acc767d +msgid ":attr:`Shape.doc`" +msgstr "" + +#: ../../shape.rst:38 b2748d7f63764db5921266838f06413f +msgid "stores the page's document" +msgstr "ページのドキュメントを格納します" + +#: ../../shape.rst:39 f8c540dd347b4c47b43237da9aab7110 +msgid ":attr:`Shape.draw_cont`" +msgstr "" + +#: ../../shape.rst:39 33dfa4702c5348e0b0cf9ac37ff61738 +msgid "draw commands since last :meth:`Shape.finish`" +msgstr ":meth:`Shape.finish` 以降の描画コマンドを格納します" + +#: ../../shape.rst:40 1603283bcf0a426ca085ee2f1a809e91 +msgid ":attr:`Shape.height`" +msgstr "" + +#: ../../shape.rst:40 96c2da7c0993440eb76059affb3693e0 +msgid "stores the page's height" +msgstr "ページの高さを格納します" + +#: ../../shape.rst:41 f55f24eec7c749f9bf7982ea314ba9ec +msgid ":attr:`Shape.lastPoint`" +msgstr "" + +#: ../../shape.rst:41 4d00c8b41d2849c6a3b3d68d3288e080 +msgid "stores the current point" +msgstr "現在のポイントを格納します" + +#: ../../shape.rst:42 5e47ab8e6a58442f9e47e327459da2a7 +msgid ":attr:`Shape.page`" +msgstr "" + +#: ../../shape.rst:42 a75ddc791a444d8b8ef228726c938199 +msgid "stores the owning page" +msgstr "所有するページを格納します" + +#: ../../shape.rst:43 fafda77199424f20b6b1d16a0b4d8402 +msgid ":attr:`Shape.rect`" +msgstr "" + +#: ../../shape.rst:43 f5e628b8c6b74c178c91401e8bd52ece +msgid "rectangle surrounding drawings" +msgstr "描画を囲む長方形を格納します" + +#: ../../shape.rst:44 254fffba560b4e02a1e7679719cecc94 +msgid ":attr:`Shape.text_cont`" +msgstr "" + +#: ../../shape.rst:44 3e2d2ca68de146bba641dc74bf705109 +msgid "accumulated text insertions" +msgstr "累積テキスト挿入を格納します" + +#: ../../shape.rst:45 9e7aa22b548e46d9882662109cd1ac9c +msgid ":attr:`Shape.totalcont`" +msgstr "" + +#: ../../shape.rst:45 63bbd838c69543c9953fdcd21fe3e112 +msgid "accumulated string to be stored in :data:`contents`" +msgstr ":data:`contents` に格納する累積文字列を格納します" + +#: ../../shape.rst:46 c9c8e88b45954d1db13934b73256f642 +msgid ":attr:`Shape.width`" +msgstr "" + +#: ../../shape.rst:46 f996c217f46c4e4793158571db759310 +msgid "stores the page's width" +msgstr "ページの幅を格納します" + +#: ../../shape.rst:49 6ca9611372314acdb5c30f875dd234a7 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../shape.rst:55 daf39d44e6f842548c021bc6a003441f +msgid "" +"Create a new drawing. During importing PyMuPDF, the *pymupdf.Page* object" +" is being given the convenience method *new_shape()* to construct a " +"*Shape* object. During instantiation, a check will be made whether we do " +"have a PDF page. An exception is otherwise raised." +msgstr "" +"新しい描画を作成します。PyMuPDFをインポートする際、*pymupdf.Page* オブジェクトには *Shape* " +"オブジェクトを構築するための便利なメソッド *new_shape()* " +"が提供されます。インスタンス化中に、PDFページが存在するかどうかのチェックが行われます。それ以外の場合は例外が発生します。" + +#: ../../shape.rst 0344d8b03c4d43379aa934bb728d9e1b +#: 0c3d5dc281034fb4aabde8c4402644d0 29a450161f314c58b94c998bcc170ee6 +#: 45242d1e66a6407da6673aea690e52d5 58b0fbaa4eed4d74804c550bd6813a96 +#: 6c6055b5058f43df9bc6d919c9b74c72 71b15eb6c56a43aa8013ebc34adfbf63 +#: 730fed20d2494ff4a215150825fba66f 7b8d9e79156a46e28f6f22664cfe0859 +#: c8182f23ef594e359385b3462aa525c7 c9f6359e10c64c5aa0733b9523e6442d +#: d3ecdef82f7c4314a60e67cc498a9a35 dd592056d7cf44dbb93253e88e1d2c5f +#: f82939868248420e862d3e43dc459be1 +msgid "Parameters" +msgstr "パラメータ" + +#: ../../shape.rst:57 bf986ce0d1ab430a96705a1bd95fe932 +msgid "an existing page of a PDF document." +msgstr "PDFドキュメントの既存のページ。" + +#: ../../shape.rst:62 71430add258e419ea1149741ed7690a3 +msgid "Draw a line from :data:`point_like` objects *p1* to *p2*." +msgstr ":data:`point_like` オブジェクト *p1* から *p2* への直線を描画します。" + +#: ../../shape.rst:64 ../../shape.rst:78 ../../shape.rst:114 +#: 05744df1d30740cab116956faefcf28a 8e3c898deab04d17b0c8362dfc4d832b +#: eb9c481ddd5845278390e459cab0b266 +msgid "starting point" +msgstr "開始点" + +#: ../../shape.rst:66 ../../shape.rst:80 ../../shape.rst:116 +#: 2bb52c6f907f410b988c2a3e7b9cdc0c 3ced08a184a44628a97a8ad43a016890 +#: 530e250ba8fa42d885f4f8645ab52566 +msgid "end point" +msgstr "終了点" + +#: ../../shape.rst 0fc351e28681472b93d5403c94845815 +#: 15a686e8ba8f48ec8b3e8c41c8356651 23d60f0d51fb416faa9d0c32cf9dd5ef +#: 3a022a7f49d444178b6dd7d05c238830 5cec1881c1b14c39b3ea8960178c8f42 +#: 62dfc618ab0f45999cbede6ea19740f0 7742f78d3a014e54b3c46420e69630ca +#: 8a82a532073d4d8a83825e4859fcd9c7 a2c6102297ec4afeb996b63d69fdceb3 +#: b7093980b2d944c88cd01250efa4fb89 ccd2af6fd29640f0b5194a714cad88ac +#: d24385f8e70b46c399a986b2390d15a2 eeacd55380d142c3abdfb5cd469ae77a +msgid "Return type" +msgstr "戻り値の型" + +#: ../../shape.rst:68 ../../shape.rst:84 ../../shape.rst:120 +#: ../../shape.rst:129 ../../shape.rst:138 ../../shape.rst:157 +#: ../../shape.rst:173 ../../shape.rst:185 ../../shape.rst:207 +#: ../../shape.rst:224 ../../shape.rst:233 ../../shape.rst:448 +#: 1b58bc37f8764e6e834ed23f0917baea 63a12950e1864d6bab5898a1405013e8 +#: 6b5788f47d2947b5a8cccc7388c4a116 7ae1384504d344ba8baf4d53f1b08eb6 +#: 955ff44bdb004f4795edb890845548d1 a20cd8902a3a49b48cbca81535493ee4 +#: b6c767db7d0c4db098de09b934246f35 c96a0c6c232c43329c3b2b7ace3a3a6a +#: d18f9ceaa34f43ae86575fa8ade29263 e3808bd25c2746da9685fc8674b7176e +#: f14f9e7e1d9d4d719808c0c4b5ff2853 fec99b56f0d84a5198f8ef024548eae6 +msgid ":ref:`Point`" +msgstr "" + +#: ../../shape.rst 0caf95a0197a4276bc649b06c814929a +#: 314d073507b04cb0a45542dc10587e08 3bea676ac87b42babc6445e56e70221f +#: 82da73693cd447e7bc97d2d625f7187f 852289fa50af4dd79ed85778bb7a369b +#: 890e8d0b301841eeae0f9f0c1c2e810e 8bf31fda79bc4df58878e5081d3ad417 +#: 8e11618a17c04d7993c45564b4237aa1 93f72db01bd645a8b864d76e8c660ea2 +#: 9410c6908d7144d993187fd1f1ca15d3 aa4a0cb35653433e9ba8c5a636a98b96 +#: c58c58a8146e4245a722a05ee08910b7 d53f15e6b09b41cc8f688ea90a0e6c8e +msgid "Returns" +msgstr "戻り値" + +#: ../../shape.rst:69 ../../shape.rst:85 ../../shape.rst:121 +#: 421086fb619843a3b6b48409bd47cd14 7a5e9c7cd5264bd6a8b5d25b60f5fc11 +#: fecec1332c2b408387d3f674e818e6da +msgid "the end point, *p2*." +msgstr "終了点 *p2*。" + +#: ../../shape.rst:76 1a37e804242b49268bb7ed3c9d14ad91 +msgid "" +"Draw a squiggly (wavy, undulated) line from :data:`point_like` objects " +"*p1* to *p2*. An integer number of full wave periods will always be " +"drawn, one period having a length of *4 * breadth*. The breadth parameter" +" will be adjusted as necessary to meet this condition. The drawn line " +"will always turn \"left\" when leaving *p1* and always join *p2* from the" +" \"right\"." +msgstr "" +":data:`point_like` オブジェクト *p1* から *p2* " +"への波線(うねり、曲線)を描画します。常に整数回の完全な波の周期が描画され、1つの周期の長さは4 * " +"breadthです。必要に応じて幅パラメータが調整され、この条件を満たします。描かれる線は常に *p1* を離れる際に「左」に曲がり、*p2* " +"には常に「右」から接続します。" + +#: ../../shape.rst:82 923ab1d9473148a9ab31f184a6a584a8 +msgid "" +"the amplitude of each wave. The condition *2 * breadth < abs(p2 - p1)* " +"must be true to fit in at least one wave. See the following picture, " +"which shows two points connected by one full period." +msgstr "" +"各波の振幅。条件 *2 * breadth < abs(p2 - p1)* " +"が少なくとも1つの波に収まる必要があります。次の図は、1つの完全な周期で接続された2つの点を示しています。" + +#: ../../shape.rst:89 9b8e703ca91d45e5a834b3cd8a848ba0 +msgid "" +"Here is an example of three connected lines, forming a closed, filled " +"triangle. Little arrows indicate the stroking direction." +msgstr "以下は、閉じた塗りつぶされた三角形を形成する三つの連結した線の例です。小さな矢印はストロークの方向を示しています。" + +#: ../../shape.rst:105 2a9d364a83714aa4b2a4c721ced186c6 +msgid "" +"Waves drawn are **not** trigonometric (sine / cosine). If you need that, " +"have a look at `draw.py `_." +msgstr "" +"描かれる波は三角関数(正弦/余弦)**ではありません**。それが必要な場合は `draw.py " +"`_ を参照してください。" + +#: ../../shape.rst:112 79f8c49c2f764ee8b79884b6f50aeb64 +msgid "" +"Draw a zigzag line from :data:`point_like` objects *p1* to *p2*. " +"Otherwise works exactly like :meth:`Shape.draw_squiggle`." +msgstr "" +":data:`point_like` オブジェクト *p1* から *p2* へジグザグのラインを描画します。それ以外は " +":meth:`Shape.draw_squiggle` とまったく同じです。" + +#: ../../shape.rst:118 b5f51c99711e46e5b64ed2eed6958f4f +msgid "" +"the amplitude of the movement. The condition *2 * breadth < abs(p2 - p1)*" +" must be true to fit in at least one period." +msgstr "動きの振幅。1つの周期に収まるように振幅を調整します。条件は *2 * breadth < abs(p2 - p1)* である必要があります。" + +#: ../../shape.rst:125 52bf058662e74078983e8223772541b2 +msgid "" +"Draw several connected lines between points contained in the sequence " +"*points*. This can be used for creating arbitrary polygons by setting the" +" last item equal to the first one." +msgstr "" +"シーケンス *points* " +"に含まれるポイント間にいくつかの接続されたラインを描画します。これを使用して、最後のアイテムを最初のアイテムと等しく設定することで、任意の多角形を作成できます。" + +#: ../../shape.rst:127 e4a75e7d084541d29d7e53d3b3a85d40 +msgid "" +"a sequence of :data:`point_like` objects. Its length must at least be 2 " +"(in which case it is equivalent to *draw_line()*)." +msgstr "" +":data:`point_like` オブジェクトのシーケンス。その長さは少なくとも2である必要があります(この場合、*draw_line()* " +"と同等です)。" + +#: ../../shape.rst:130 dc706f23c2e642acb4d2b337efebdbcf +msgid "*points[-1]* -- the last point in the argument sequence." +msgstr "引数シーケンスの最後のポイント、*points[-1]*。" + +#: ../../shape.rst:134 94915cd7d656485ab712766fbefe65a0 +msgid "" +"Draw a standard cubic Bézier curve from *p1* to *p4*, using *p2* and *p3*" +" as control points." +msgstr "*p1* から *p4* への標準的な立方ベジエ曲線を *p2*と *p3* を制御ポイントとして描画します。" + +#: ../../shape.rst:136 abc194380eb0469ab698b73d44a6c7e1 +#, fuzzy +msgid "All arguments are :data:`point_like` objects." +msgstr "すべての引数は :data:`point_like` です。" + +#: ../../shape.rst:139 df3f4add09ad4d1b8476f30749a58870 +msgid "the end point, *p4*." +msgstr "終了点 *p4*。" + +#: ../../shape.rst:141 cfe360d7430b4ee49ab6d8e72ada1f60 +msgid "" +"The points do not need to be different -- experiment a bit with some of " +"them being equal!" +msgstr "ポイントは異なる必要はありません。一部のポイントが等しい場合も試してみてください!" + +#: ../../shape.rst:143 b2c7743deec640c9ad5acdf83ff1a1ac +msgid "Example:" +msgstr "例:" + +#: ../../shape.rst:149 c052a7321ee5405383af52c987124aaf +msgid "" +"Draw an \"ellipse\" inside the given tetragon (quadrilateral). If it is a" +" square, a regular circle is drawn, a general rectangle will result in an" +" ellipse. If a quadrilateral is used instead, a plethora of shapes can be" +" the result." +msgstr "指定された四角形(四辺形)内に「楕円」を描画します。正方形の場合、正円が描かれ、一般的な長方形は楕円になります。四角形が使用される場合、さまざまな形状が結果として得られる可能性があります。" + +#: ../../shape.rst:151 a268911c93934cbcb4f1633a0b218684 +msgid "" +"The drawing starts and ends at the middle point of the line `bottom-left " +"-> top-left` corners in an anti-clockwise movement." +msgstr "描画は、反時計回りの動きで、左下隅 -> 左上隅の中間点で開始し、終了します。" + +#: ../../shape.rst:153 6f2a67e6d6e54928babf23e41c6e99f9 +msgid "" +":data:`rect_like` or :data:`quad_like`. *Changed in version 1.14.5:* " +"Quads are now also supported." +msgstr "" + +#: ../../shape.rst:153 80ab603331a444e1b025cca8f98e3ecb +msgid ":data:`rect_like` or :data:`quad_like`." +msgstr ":data:`rect_like` または :data:`quad_like`。" + +#: ../../shape.rst:155 867809df376648158f4192f09600d70a +msgid "*Changed in version 1.14.5:* Quads are now also supported." +msgstr "*バージョン1.14.5で変更:* 四角形もサポートされるようになりました。" + +#: ../../shape.rst:158 0ab5658259fc4553ac0d8e6d5e00cf7d +msgid "" +"the middle point of line `rect.bl -> rect.tl`, or resp. `quad.ll -> " +"quad.ul`. Look at just a few examples here, or at the *quad-show?.py* " +"scripts in the PyMuPDF-Utilities repository." +msgstr "" +"`rect.bl -> rect.tl` 、または `quad.ll -> quad.ul` の線の中間点。いくつかの例はこちら" +"、またはPyMuPDF-Utilitiesリポジトリの *quad-show?.py* スクリプトをご覧ください。" + +#: ../../shape.rst:165 7bb6769fb24847eeb01c8e0ecaa2b717 +msgid "" +"Draw a circle given its center and radius. The drawing starts and ends at" +" point `center - (radius, 0)` in an **anti-clockwise** movement. This " +"point is the middle of the enclosing square's left side." +msgstr "" +"指定された中心と半径で円を描きます。描画は、**反時計回り** の動きで、ポイント `center - (radius, 0)` " +"で始まり、終了します。このポイントは、包括的な正方形の左側の中央です。" + +#: ../../shape.rst:167 ab0de5ea859b4c46bd7a1f2b2c268cae +msgid "" +"This is a shortcut for `draw_sector(center, start, 360, " +"fullSector=False)`. To draw the same circle in a **clockwise** movement, " +"use `-360` as degrees." +msgstr "" +"これは `draw_sector(center, start, 360, fullSector=False)` " +"のショートカットです。**時計回り** に同じ円を描画するには、度数として -360 を使用してください。" + +#: ../../shape.rst:169 ../../shape.rst:199 25184682bdee4a01915a3f613857c445 +#: 7b7b826458df4968be6d68ef9404267a +msgid "the center of the circle." +msgstr "円の中心。" + +#: ../../shape.rst:171 4ab6628f120348338cfd839e2caec97e +msgid "the radius of the circle. Must be positive." +msgstr "円の半径。正である必要があります。" + +#: ../../shape.rst:174 b3b715f97c744370ae6a8b33746f9e82 +msgid "" +"`Point(center.x - radius, center.y)`. .. image:: images/img-drawcircle.*" +" :scale: 60" +msgstr "" + +#: ../../shape.rst:174 df18f59ac1ae4314b11c396cb70c7aef +msgid "`Point(center.x - radius, center.y)`." +msgstr "" + +#: ../../shape.rst:181 511f66677f4e420882ae96776e278623 +msgid "" +"A special case of *draw_bezier()*: Draw a cubic Bezier curve from *p1* to" +" *p3*. On each of the two lines `p1 -> p2` and `p3 -> p2` one control " +"point is generated. Both control points will therefore be on the same " +"side of the line `p1 -> p3`. This guaranties that the curve's curvature " +"does not change its sign. If the lines to p2 intersect with an angle of " +"90 degrees, then the resulting curve is a quarter ellipse (resp. quarter " +"circle, if of same length)." +msgstr "" +"*draw_bezier()* の特殊なケース: *p1* から *p3* までの三次ベジエ曲線を描きます。各ライン `p1 -> p2` および" +" `p3 -> p2` に制御点が生成されます。したがって、両方の制御点はライン `p1 -> p3` " +"の同じ側にあります。これにより、曲線の曲率が符号を変更しないことが保証されます。p2 " +"へのラインが90度で交差する場合、結果の曲線は四分の楕円(同じ長さの場合は四半期の円)です。" + +#: ../../shape.rst:183 9748056723164ca58a74b33be5b6411a +msgid "All arguments are :data:`point_like`." +msgstr "すべての引数は :data:`point_like` です。" + +#: ../../shape.rst:186 234e9aef5be6430fb813b740638d577d +msgid "" +"the end point, *p3*. The following is a filled quarter ellipse segment. " +"The yellow area is oriented **clockwise:** .. image:: images/img-" +"drawCurve.png :align: center" +msgstr "" + +#: ../../shape.rst:186 ed67ce39c73141f7bef99dd5f7be27f8 +msgid "" +"the end point, *p3*. The following is a filled quarter ellipse segment. " +"The yellow area is oriented **clockwise:**" +msgstr "終点、p3。以下は塗りつぶされた四分の楕円セグメントです。黄色の領域は **時計回り** に向いています:" + +#: ../../shape.rst:197 ef03f28841984ffbba41b5e2fe20fdfa +msgid "" +"Draw a circular sector, optionally connecting the arc to the circle's " +"center (like a piece of pie)." +msgstr "円のセクタを描き、必要に応じて円の中心に弧を接続します(パイの一片のように)。" + +#: ../../shape.rst:201 afa0d50a3544433a9163b5c5ee3d99fb +msgid "" +"one of the two end points of the pie's arc segment. The other one is " +"calculated from the *angle*." +msgstr "パイの弧セグメントの2つの端点のうちの1つ。*角度* から他の端点が計算されます。" + +#: ../../shape.rst:203 2ba0911a96f24d80a6ac306a63eca16f +msgid "" +"the angle of the sector in degrees. Used to calculate the other end point" +" of the arc. Depending on its sign, the arc is drawn anti-clockwise " +"(positive) or clockwise." +msgstr "その符号に応じて、弧は反時計回り(正の値)または時計回りに描かれます。" + +#: ../../shape.rst:205 122b93568c5b450fa88163aefad21cbd +msgid "" +"whether to draw connecting lines from the ends of the arc to the circle " +"center. If a fill color is specified, the full \"pie\" is colored, " +"otherwise just the sector." +msgstr "弧の端点から円の中心に接続線を描くかどうか。塗りつぶし色が指定されている場合、完全な「パイ」が色付けされ、それ以外の場合はセクタのみが色付けされます。" + +#: ../../shape.rst:208 f9dcd56f8974458e9bf8d1a15d5ee000 +msgid "" +"the other end point of the arc. Can be used as starting point for a " +"following invocation to create logically connected pies charts. Examples:" +" .. image:: images/img-drawSector1.* .. image:: images/img-" +"drawSector2.*" +msgstr "" + +#: ../../shape.rst:208 68c46ca6da57482980a202d477b5fd12 +msgid "" +"the other end point of the arc. Can be used as starting point for a " +"following invocation to create logically connected pies charts. Examples:" +msgstr "弧のもう一方の端点。論理的に接続されたパイチャートを作成するための次の呼び出しの開始点として使用できます。以下は例です:" + +#: ../../shape.rst:217 0c89141549574512807fbd5c4e278bec +msgid "Changed in v1.22.0: Added parameter *radius*." +msgstr "変更された v1.22.0 で:パラメーター *radius* が追加されました。" + +#: ../../shape.rst:219 be1f7880d3ec44fca8aa6a24374c872f +msgid "" +"Draw a rectangle. The drawing starts and ends at the top-left corner in " +"an anti-clockwise movement." +msgstr "長方形を描画します。描画は、上部左隅から反時計回りの動きで始まり、終了します。" + +#: ../../shape.rst:221 b78e9eda24364a62af777390e749a6b9 +msgid "where to put the rectangle on the page." +msgstr "ページ上の長方形の配置先。" + +#: ../../shape.rst:222 0cc3ff19e41642c9937661c081a02ca6 +#, python-format +msgid "" +"draw rounded rectangle corners. If not `None`, specifies the radius of " +"the curvature as a percentage of a rectangle side length. This must one " +"or (a tuple of) two floats `0 < radius <= 0.5`, where 0.5 corresponds to " +"50% of the respective side. If a float, the radius of the curvature is " +"computed as `radius * min(width, height)`, drawing the corner's perimeter" +" as a quarter circle. If a tuple `(rx, ry)` is given, then the curvature " +"is asymmetric with respect to the horizontal and vertical directions. A " +"value of `radius=(0.5, 0.5)` draws an ellipse." +msgstr "" +"角の丸みを付けた長方形を描画します。`None` でない場合、曲線の半径を長方形の辺の長さのパーセンテージとして指定します。これは 1 つまたは " +"2 つの浮動小数点数、`0 < radius <= 0.5` でなければなりません。0.5 は、対応する辺の 50% " +"に相当します。浮動小数点数の場合、曲線の半径は `radius * min(width, height)` として計算され、角の周囲を 1/4 " +"円で描画します。タプル `(rx, ry)` が指定された場合、曲線は水平および垂直方向に対して非対称です。`radius=(0.5, 0.5)`" +" は楕円を描画します。" + +#: ../../shape.rst:225 02c20e94c630475e86d69b1eaad41a90 +msgid "top-left corner of the rectangle." +msgstr "長方形の上部左隅。" + +#: ../../shape.rst:229 bfc2aa459e2a4c03870479ad9826f5d9 +msgid "" +"Draw a quadrilateral. The drawing starts and ends at the top-left corner " +"(:attr:`Quad.ul`) in an anti-clockwise movement. It is a shortcut of " +":meth:`Shape.draw_polyline` with the argument `(ul, ll, lr, ur, ul)`." +msgstr "" +"四角形を描画します。描画は、上部左隅 (:attr:`Quad.ul`) から反時計回りの動きで始まり、終了します。これは " +":meth:`Shape.draw_polyline` の引数 `(ul, ll, lr, ur, ul)` を使用したショートカットです。" + +#: ../../shape.rst:231 6a0f2c584dcd41839d0c24fa7f7ece37 +msgid "where to put the tetragon on the page." +msgstr "ページ上の四角形の配置先。" + +#: ../../shape.rst:234 a5da49f4b1dd40e58397116799e42ee1 +msgid ":attr:`Quad.ul`." +msgstr "" + +#: ../../shape.rst:253 364f28a6b68a4d219e3f51a173d9726c +msgid "" +"Finish a set of *draw*()* methods by applying :ref:`CommonParms` to all " +"of them." +msgstr "次の *draw*()* メソッドの一連の描画を、:ref:`CommonParms` をそれらすべてに適用して終了します。" + +#: ../../shape.rst:255 a1ac1faf1bdd4405a81f62ad68fc6d3c +msgid "" +"It has **no effect on** :meth:`Shape.insert_text` and " +":meth:`Shape.insert_textbox`." +msgstr "" +":meth:`Shape.insert_text` および :meth:`Shape.insert_textbox` には " +"**影響を与えません**。" + +#: ../../shape.rst:257 6d42cc4751ff4c48bf35e9b50be52e4d +msgid "" +"The method also supports **morphing the compound drawing** using " +":ref:`Point` *fixpoint* and :ref:`matrix` *matrix*." +msgstr "" +"このメソッドは、:ref:`Point` *fixpoint* と :ref:`matrix` *matrix* を使用して、テキストまたは " +"**複合描画を変形させること** もサポートしています。" + +#: ../../shape.rst:259 7064330070ff4e5d91f9040e379f3789 +msgid "" +"morph the text or the compound drawing around some arbitrary :ref:`Point`" +" *fixpoint* by applying :ref:`Matrix` *matrix* to it. This implies that " +"*fixpoint* is a **fixed point** of this operation: it will not change its" +" position. Default is no morphing (``None``). The matrix can contain any " +"values in its first 4 components, *matrix.e == matrix.f == 0* must be " +"true, however. This means that any combination of scaling, shearing, " +"rotating, flipping, etc. is possible, but translations are not." +msgstr "" +"テキストまたは複合描画を、任意の :ref:`Point` *fixpoint* に :ref:`Matrix` *matrix* " +"を適用して変形します。これにより、*fixpoint* はこの操作の **固定ポイント** " +"であることを意味します。その位置は変わりません。デフォルトは変形なし(``None``)です。ただし、matrixの最初の4つの成分には任意の値を含めることができ、*matrix.e" +" == matrix.f == 0* " +"である必要があります。これは、スケーリング、シアリング、回転、反転など、移動以外の組み合わせが可能であることを意味しますが、移動はできません。" + +#: ../../shape.rst:261 a1d25d306f2b466c93662943e5d2e5a8 +msgid "" +"*(new in v1.18.1)* set transparency for stroke colors. Value < 0 or > 1 " +"will be ignored. Default is 1 (intransparent)." +msgstr "*(v1.18.1で新規追加)* ストロークの透明度を設定します。値が0未満または1を超える場合は無視されます。デフォルトは1(不透明)です。" + +#: ../../shape.rst:262 eed144cc0b8746d79c86a027b0a1bdf6 +msgid "" +"*(new in v1.18.1)* set transparency for fill colors. Default is 1 " +"(intransparent)." +msgstr "*(v1.18.1で新規追加)* 塗りつぶしの色の透明度を設定します。デフォルトは1(不透明)です。" + +#: ../../shape.rst:264 7b63bc7f52d0491d8d6933c6da2b991f +msgid "" +"request the **\"even-odd rule\"** for filling operations. Default is " +"``False``, so that the **\"nonzero winding number rule\"** is used. These" +" rules are alternative methods to apply the fill color where areas " +"overlap. Only with fairly complex shapes a different behavior is to be " +"expected with these rules. For an in-depth explanation, see " +":ref:`AdobeManual`, pp. 137 ff. Here is an example to demonstrate the " +"difference." +msgstr "" +"塗りつぶし操作に対して **「even-oddルール」** を要求します。デフォルトは ``False`` で、したがって " +"**「非ゼロのワインディング数ルール」** " +"が使用されます。これらのルールは、領域が重なる場所で塗りつぶし色を適用するための代替メソッドです。複雑な形状では、これらのルールに異なる振る舞いが期待されることはほとんどありません。詳細な説明については、:ref:`AdobeManual`" +" の137ページ以降を参照してください。以下はその違いを示す例です。" + +#: ../../shape.rst:266 7d4e6214fc3849a2985181d0b2e2058e +msgid "" +"*(new in v1.18.4)* the :data:`xref` number of an :data:`OCG` or " +":data:`OCMD` to make this drawing conditionally displayable." +msgstr "" +"*(v1.18.4で新規追加)* この描画を条件付きで表示可能にする :data:`OCG` または :data:`OCMD` の " +":data:`xref` 番号。" + +#: ../../shape.rst:270 a0093bcedd2c41a2a7f8a4c0506a33b8 +msgid "For each pixel in a shape, the following will happen:" +msgstr "各ピクセルの場合、以下のことが起こります:" + +#: ../../shape.rst:272 18afdc3a67564cd3855e90c36be5d1fc +msgid "" +"Rule **\"even-odd\"** counts, how many areas contain the pixel. If this " +"count is **odd,** the pixel is regarded **inside** the shape, if it is " +"**even**, the pixel is **outside**." +msgstr "" +"ルール **「even-odd」** は、ピクセルを含むエリアの数をカウントします。このカウントが **奇数** の場合、ピクセルは形状の " +"**内部** と見なされ、偶数の場合は **外部** と見なされます。" + +#: ../../shape.rst:274 ba4fbbd8420b44c286ecbac807b6b5ca +msgid "" +"The default rule **\"nonzero winding\"** in addition looks at the " +"*\"orientation\"* of each area containing the pixel: it **adds 1** if an " +"area is drawn anti-clockwise and it **subtracts 1** for clockwise areas. " +"If the result is zero, the pixel is regarded **outside,** pixels with a " +"non-zero count are **inside** the shape." +msgstr "" +"デフォルトのルール **「nonzero winding」** では、ピクセルを含む各エリアの *「方向」* " +"も考慮されます。エリアが反時計回りに描かれている場合は **1を追加し** 、時計回りのエリアは **1を引きます** " +"。結果がゼロの場合、ピクセルは **外部** と見なされ、非ゼロのカウントを持つピクセルは形状の **内部** と見なされます。" + +#: ../../shape.rst:276 1b87591000934e71bcba26ceb5aee7c3 +msgid "" +"Of the four shapes in above image, the top two each show three circles " +"drawn in standard manner (anti-clockwise, look at the arrows). The lower " +"two shapes contain one (the top-left) circle drawn clockwise. As can be " +"seen, area orientation is irrelevant for the right column (even-odd " +"rule)." +msgstr "" +"上記の画像の4つの形状のうち、上の2つはそれぞれ標準的な方法で描かれた3つの円を示しています(反時計回り、矢印を参照)。下の2つの形状には時計回りに描かれた1つ(左上)の円が含まれています。右の列では、エリアの方向は無視されることがわかります" +"(even-oddルール)。" + +#: ../../shape.rst:297 27b4efd177f04bd58c4f071741d5b26d +#, fuzzy +msgid "Insert text lines starting at ``point``." +msgstr "テキスト行を *point* で開始します。" + +#: ../../shape.rst:299 21d9cb7b2ed148a9b60492a9aedda427 +msgid "" +"the bottom-left position of the first character of *text* in pixels. It " +"is important to understand, how this works in conjunction with the " +"*rotate* parameter. Please have a look at the following picture. The " +"small red dots indicate the positions of *point* in each of the four " +"possible cases. .. image:: images/img-inserttext.* :scale: 33" +msgstr "" + +#: ../../shape.rst:299 c40df0e9c77d4e6e87a91795ff8f09c3 +msgid "" +"the bottom-left position of the first character of *text* in pixels. It " +"is important to understand, how this works in conjunction with the " +"*rotate* parameter. Please have a look at the following picture. The " +"small red dots indicate the positions of *point* in each of the four " +"possible cases." +msgstr "" +"テキストの最初の文字の下部左位置(ピクセル単位)。*rotate* " +"パラメータとの連携方法を理解することが重要です。以下の画像をご覧ください。小さな赤い点は、各ケースでの *point* の位置を示しています。" + +#: ../../shape.rst:304 6876d7284d7749eb8c8154d720a23813 +#, fuzzy +msgid "" +"the text to be inserted. May be specified as either a string type or as a" +" sequence type. For sequences, or strings containing line breaks ``\\n``," +" several lines will be inserted. No care will be taken if lines are too " +"wide, but the number of inserted lines will be limited by \"vertical\" " +"space on the page (in the sense of reading direction as established by " +"the *rotate* parameter). Any rest of *text* is discarded -- the return " +"code however contains the number of inserted lines." +msgstr "" +"挿入するテキスト。文字列型または文字列のシーケンス型として指定できます。シーケンスの場合、または改行文字 *\\n* " +"を含む文字列の場合、複数の行が挿入されます。ただし、行があまりにも幅が広い場合、挿入される行数はページ上の「垂直」スペース(*回転* " +"パラメータによって確立される読み取り方向の意味で)によって制限されます。*テキスト* " +"の残り部分は破棄されますが、戻りコードには挿入された行数が含まれます。" + +#: ../../shape.rst:306 ../../shape.rst:346 2ddd62ce504f4fdab2ec95f2e7b90943 +#: 74e3e3ef15434f58906434885ecf4282 +msgid "" +"a factor to override the line height calculated from font properties. If " +"not `None`, a line height of `fontsize * lineheight` will be used." +msgstr "" +"フォントのプロパティから計算された行の高さをオーバーライドするためのファクター。``None`` でない場合、`fontsize * " +"lineheight` の行の高さが使用されます。" + +#: ../../shape.rst:307 bcb42a1a18294564b272ac80287e3f5b +msgid "" +"*(new in v1.18.1)* set transparency for stroke colors (the **border " +"line** of a character). Only `0 <= value <= 1` will be considered. " +"Default is 1 (intransparent)." +msgstr "*(v1.18.1で新規追加)* ストロークの透明度を設定します。値が0未満または1を超える場合は無視されます。デフォルトは1(不透明)です。" + +#: ../../shape.rst:308 ../../shape.rst:351 78dc8d3116ba485c9d068583bc0deb49 +#: ed0aa08d38b64a99a38c604475e5ab10 +msgid "" +"*(new in v1.18.1)* set transparency for fill colors. Default is 1 " +"(intransparent). Use this value to control transparency of the text " +"color. Stroke opacity **only** affects the border line of characters." +msgstr "" +"*(v1.18.1で新たに追加)* " +"塗りつぶしカラーの透明度を設定します。デフォルトは1(不透明)です。テキストカラーの透明度を制御するためにこの値を使用します。ストロークの透明度は文字の境界線に" +" **のみ** 影響します。" + +#: ../../shape.rst:310 31070b0b7e5b4bcf835147bdee366b06 +msgid "" +"determines whether to rotate the text. Acceptable values are multiples of" +" 90 degrees. Default is 0 (no rotation), meaning horizontal text lines " +"oriented from left to right. 180 means text is shown upside down from " +"**right to left**. 90 means anti-clockwise rotation, text running " +"**upwards**. 270 (or -90) means clockwise rotation, text running " +"**downwards**. In any case, *point* specifies the bottom-left coordinates" +" of the first character's rectangle. Multiple lines, if present, always " +"follow the reading direction established by this parameter. So line 2 is " +"located **above** line 1 in case of `rotate = 180`, etc." +msgstr "" +"テキストを回転させるかどうかを決定します。許容される値は90度の倍数です。デフォルトは0(回転なし)で、左から右への水平テキストが表示されます。180は、**右から左**" +" への上下逆さテキストを意味し、90は反時計回りの回転で、テキストが **上向き** " +"に表示されます。270(または-90)は時計回りの回転で、テキストが **下向き** " +"に表示されます。いずれの場合も、pointは最初の文字の矩形の左下の座標を指定します。存在する場合、複数行は常にこのパラメータによって確立された読み取り方向に従います。したがって、" +" *rotate = 180* の場合、行2は行1の **上に** 配置されます。" + +#: ../../shape.rst:312 ../../shape.rst:355 00187385dc1f4841bfa53a9d811dd79a +#: 637e2ddab242428ea8f57d2059584df5 +msgid "" +"*(new in v1.18.4)* the :data:`xref` number of an :data:`OCG` or " +":data:`OCMD` to make this text conditionally displayable." +msgstr "" +"*(v1.18.4 新機能)* :data:`OCG` または :data:`OCMD` の :data:`xref` " +"番号、テキストを条件付きで表示可能にするためのものです。" + +#: ../../shape.rst:315 454a6660578a4891948c63e1c5b52d5c +msgid "number of lines inserted." +msgstr "挿入された行数。" + +#: ../../shape.rst:317 ../../shape.rst:367 9d122c97f6c14ba4b71b6ff10e4330d0 +#: e82c82e616404ab399b088e67162d600 +msgid "For a description of the other parameters see :ref:`CommonParms`." +msgstr "他のパラメータの説明については :ref:`CommonParms` を参照してください。" + +#: ../../shape.rst:338 f42ebad5ca1c429d86307b7f655ba863 +msgid "" +"PDF only: Insert text into the specified rectangle. The text will be " +"split into lines and words and then filled into the available space, " +"starting from one of the four rectangle corners, which depends on " +"`rotate`. Line feeds and multiple space will be respected." +msgstr "" +"PDFのみ:指定された矩形にテキストを挿入します。テキストは行と単語に分割され、*回転* " +"に応じて異なる矩形の角から始まり、利用可能なスペースに挿入されます。行送りと複数のスペースは尊重されます。" + +#: ../../shape.rst:340 bdb0bfe831814187852bc2c1cda74b81 +msgid "the area to use. It must be finite and not empty." +msgstr "使用する領域。有限でかつ空でなければなりません。" + +#: ../../shape.rst:342 09e7231558844204a9827cccc8d0e053 +msgid "" +"the text to be inserted. Must be specified as a string or a sequence of " +"strings. Line breaks are respected also when occurring in a sequence " +"entry." +msgstr "挿入するテキスト。文字列または文字列のシーケンスとして指定する必要があります。シーケンスエントリ内の行送りも尊重されます。" + +#: ../../shape.rst:344 c3d7974b47a64e6a9ce2e15fcc08da02 +msgid "" +"align each text line. Default is 0 (left). Centered, right and justified " +"are the other supported options, see :ref:`TextAlign`. Please note that " +"the effect of parameter value *TEXT_ALIGN_JUSTIFY* is only achievable " +"with \"simple\" (single-byte) fonts (including the :ref:`Base-14-Fonts`)." +msgstr "" +"各テキスト行を整列させます。デフォルトは0(左揃え)。センタリング、右揃え、ジャスティファイドがサポートされている他のオプションです。詳細は " +":ref:`TextAlign` を参照してください。なお、パラメータ値 *TEXT_ALIGN_JUSTIFY* " +"の効果は、「シンプル」(単一バイト)フォント(:ref:`Base-14-Fonts` を含む)でのみ達成できます。" + +#: ../../shape.rst:346 d614c8ee637141b8b8e8f6dc6ca7c7f3 +msgid "" +"a factor to override the line height calculated from font properties. If " +"not `None`, a line height of `fontsize * lineheight` will be used. :arg " +"int expandtabs: controls handling of tab characters ``\\t`` using the " +"`string.expandtabs()` method **per each line**." +msgstr "" +"フォントのプロパティから計算された行の高さをオーバーライドするためのファクター。``None`` でない場合、`fontsize * " +"lineheight` の行の高さが使用されます。" + +#: ../../shape.rst 48c5f8c0b32044499211dabdb7f87407 +msgid "arg int expandtabs" +msgstr "" + +#: ../../shape.rst:348 6028a7be9acd4b54896401c02aae1f59 +msgid "" +"controls handling of tab characters ``\\t`` using the " +"`string.expandtabs()` method **per each line**." +msgstr "タブ文字 *\\t* の処理を **行ごとに** *string.expandtabs()* メソッドを使用して制御します。" + +#: ../../shape.rst:350 bc65023b0ad24de89efdb0e3d5028fea +msgid "" +"*(new in v1.18.1)* set transparency for stroke colors. Negative values " +"and values > 1 will be ignored. Default is 1 (intransparent)." +msgstr "*(v1.18.1で新たに追加)* ストロークカラーの透明度を設定します。負の値や1より大きい値は無視されます。デフォルトは1(不透明)です。" + +#: ../../shape.rst:353 f97c73f6c3504030b80fd487a11fe8ee +msgid "" +"requests text to be rotated in the rectangle. This value must be a " +"multiple of 90 degrees. Default is 0 (no rotation). Effectively, the four" +" values `0`, `90`, `180` and `270` (= `-90`) are processed, each causing " +"the text to start in a different rectangle corner. Bottom-left is `90`, " +"bottom-right is `180`, and `-90 / 270` is top-right. See the example how " +"text is filled in a rectangle. This argument takes precedence over " +"morphing. See the second example, which shows text first rotated left by " +"`90` degrees and then the whole rectangle rotated clockwise around is " +"lower left corner." +msgstr "" +"テキストを矩形内で回転させるように要求します。受け入れ可能な値は90度の倍数です。デフォルトは0(回転なし)、つまり左から右への水平なテキスト行です。180は、右から左に逆さまに表示されることを意味します。90は反時計回りの回転で、テキストは上に向かって表示されます。270(または-90)は時計回りの回転で、テキストは下に向かって表示されます。いずれの場合も、pointは最初の文字の矩形の左下の座標を指定します。複数行がある場合は、常にこのパラメータで確立された読み取り方向に従います。したがって、rotate" +" = 180の場合、行2は行1の上に配置されますなど。" + +#: ../../shape.rst:358 ae409af6e5bf482d9ca9c1480823156f +msgid "" +"**If positive or zero**: successful execution. The value returned is the " +"unused rectangle line space in pixels. This may safely be ignored -- or " +"be used to optimize the rectangle, position subsequent items, etc. **If " +"negative**: no execution. The value returned is the space deficit to " +"store text lines. Enlarge rectangle, decrease *fontsize*, decrease text " +"amount, etc." +msgstr "" + +#: ../../shape.rst:359 10b55cdb24e441a881c9aa12c4beae3c +msgid "" +"**If positive or zero**: successful execution. The value returned is the " +"unused rectangle line space in pixels. This may safely be ignored -- or " +"be used to optimize the rectangle, position subsequent items, etc." +msgstr "" +"**正またはゼロの場合** " +":正常な実行。返される値は使用されていない矩形行のスペース(ピクセル単位)です。これを安全に無視するか、矩形を最適化したり、後続のアイテムの位置を調整したりするために使用できます。" + +#: ../../shape.rst:361 8dc52497077b4262840bf6aeb868aaf7 +msgid "" +"**If negative**: no execution. The value returned is the space deficit to" +" store text lines. Enlarge rectangle, decrease *fontsize*, decrease text " +"amount, etc." +msgstr "" +"**負の場合**: " +"実行なし。返される値はテキスト行を格納するためのスペース不足です。矩形を拡大したり、フォントサイズを減少させたり、テキストの量を減少させたりする必要があります。" + +#: ../../shape.rst:375 3da0ea4708304e5e885f32ac75cc200f +msgid "" +"Update the page's :data:`contents` with the accumulated drawings, " +"followed by any text insertions. If text overlaps drawings, it will be " +"written on top of the drawings." +msgstr "" +"ページの :data:`contents` " +"を蓄積された描画に更新し、その後テキスト挿入が続きます。テキストが描画と重なる場合、テキストは描画の上に書き込まれます。" + +#: ../../shape.rst:377 50890cf6faf0493a8045890157cad2f2 +msgid "**Do not forget to execute this method:**" +msgstr "**このメソッドを実行しないことは忘れないでください:**" + +#: ../../shape.rst:379 0f8cf27d8dba4f6e88f93d6eff91f99a +msgid "" +"If a shape is **not committed, it will be ignored and the page will not " +"be changed!**" +msgstr "シェイプが **確定されていない** 場合、無視され、ページは変更されません!" + +#: ../../shape.rst:381 560de3610cb3427d9412f8cc6853aa96 +msgid "" +"The method will reset attributes :attr:`Shape.rect`, :attr:`lastPoint`, " +":attr:`draw_cont`, :attr:`text_cont` and :attr:`totalcont`. Afterwards, " +"the shape object can be reused for the **same page**." +msgstr "" +"このメソッドは、属性 :attr:`Shape.rect`、:attr:`lastPoint` 、:attr:`draw_cont` " +"、:attr:`text_cont` 、および :attr:`totalcont` をリセットします。その後、シェイプオブジェクトは " +"**同じページ** で再利用できます。" + +#: ../../shape.rst:383 b541e686a7c946f88bf7c19567ce734c +msgid "" +"determine whether to put content in foreground (default) or background. " +"Relevant only, if the page already has a non-empty :data:`contents` " +"object." +msgstr "" +"コンテンツを前面(デフォルト)または背面に配置するかどうかを決定します。ページに既存の非空の :data:`contents` " +"オブジェクトがある場合のみ関連します。" + +#: ../../shape.rst:385 417137176c1747349405bb4c8fb61d9e +msgid "**---------- Attributes ----------**" +msgstr "**属性**" + +#: ../../shape.rst:389 c557cfbcf2f5406785ebfecd3ff1517d +msgid "For reference only: the page's document." +msgstr "参照専用:ページの文書。" + +#: ../../shape.rst 72dfcd7fdae74f0880ab6f19f03d79f3 +#: 75b9e1ec876b4c59b07c5dcbdff62368 92b486749825470baeb632a9e37cd261 +#: b4f1769a752a4ba6bd469beb607218b6 c74d2d1ea9114c13be6e38bcd1deaedd +#: cbb0122317594386aa3583478c51ccbb dc6264da761a4a01be4fee79a0ce1530 +#: f47148b8f0f54ef9814d48e915f313d3 faa5a76a189a44e1a494982fef691804 +msgid "type" +msgstr "" + +#: ../../shape.rst:391 8f30cd32df5144ecabb231437d577390 +msgid ":ref:`Document`" +msgstr "" + +#: ../../shape.rst:395 052570a11eaf408dba57d40ce4bd42e3 +msgid "For reference only: the owning page." +msgstr "参照専用:所属するページ。" + +#: ../../shape.rst:397 bd6c0fb240c3475f8d56114a6361aec6 +msgid ":ref:`Page`" +msgstr "" + +#: ../../shape.rst:401 186223be5f2746f2bd7a53ef45c0de4b +msgid "Copy of the page's height" +msgstr "ページの高さのコピー" + +#: ../../shape.rst:403 ../../shape.rst:409 02408bc4f3ee46d19b1416484e8f620f +#: de7be7acb7a2493ba3556bdcc870858e +msgid "float" +msgstr "" + +#: ../../shape.rst:407 ebae5c5e7d8e46b9945ee984eb26c40b +msgid "Copy of the page's width." +msgstr "ページの幅のコピー。" + +#: ../../shape.rst:413 114df7efef35486aaf928cdf01433f06 +msgid "" +"Accumulated command buffer for **draw methods** since last finish. Every " +"finish method will append its commands to :attr:`Shape.totalcont`." +msgstr "" +"最後のfinish以降の **描画メソッド** の蓄積コマンドバッファ。各finishメソッドはそのコマンドを " +":attr:`Shape.totalcont` に追加します。" + +#: ../../shape.rst:415 ../../shape.rst:421 ../../shape.rst:442 +#: 47b843e7f65b45599993773e6ac9633f 5b0ee611bc9944578ef034e8972116de +#: dd6362698b114520b7e7e5fde7c5c15b +msgid "str" +msgstr "" + +#: ../../shape.rst:419 502c94c6d1e44c3a86ac39a68eae9b6e +msgid "" +"Accumulated text buffer. All **text insertions** go here. This buffer " +"will be appended to :attr:`totalcont` :meth:`Shape.commit`, so that text " +"will never be covered by drawings in the same Shape." +msgstr "" +"蓄積されたテキストバッファ。すべての **テキスト挿入** はここに行われます。このバッファは :attr:`totalcont` " +":meth:`Shape.commit` に追加されるため、テキストは同じShape内の描画に覆われることはありません。" + +#: ../../shape.rst:425 4f0bb106df03415f9b6577b73441283a +msgid "" +"Rectangle surrounding drawings. This attribute is at your disposal and " +"may be changed at any time. Its value is set to ``None`` when a shape is " +"created or committed. Every *draw** method, and " +":meth:`Shape.insert_textbox` update this property (i.e. **enlarge** the " +"rectangle as needed). **Morphing** operations, however " +"(:meth:`Shape.finish`, :meth:`Shape.insert_textbox`) are ignored." +msgstr "" +"描画を囲む矩形。この属性はあなたの自由に使用でき、いつでも変更できます。シェイプが作成または確定されたとき、その値は ``None`` " +"に設定されます。すべての *draw** メソッド、および :meth:`Shape.insert_textbox` " +"はこのプロパティを更新します(必要に応じて矩形を **拡大します** )。ただし、**変形** 操作(:meth:`Shape.finish` " +"、:meth:`Shape.insert_textbox` )は無視されます。" + +#: ../../shape.rst:427 38a94eb2a3d1477297299e30ee252c32 +msgid "" +"A typical use of this attribute would be setting " +":attr:`Page.cropbox_position` to this value, when you are creating shapes" +" for later or external use. If you have not manipulated the attribute " +"yourself, it should reflect a rectangle that contains all drawings so " +"far." +msgstr "" +"この属性の典型的な使用法は、後でまたは外部で使用するためにシェイプを作成する場合、:attr:`Page.cropbox_position` " +"をこの値に設定することです。自分で属性を操作していない場合、これはこれまでのすべての描画を含む矩形を反映するはずです。" + +#: ../../shape.rst:429 a19cb45e3df843b3a6de4431f84f8b8b +msgid "" +"If you have used morphing and need a rectangle containing the morphed " +"objects, use the following code::" +msgstr "変形を使用し、変形されたオブジェクトを含む矩形が必要な場合、次のコードを使用します::" + +#: ../../shape.rst:436 b60123de550c459ebe674a33061d914d +msgid ":ref:`Rect`" +msgstr "" + +#: ../../shape.rst:440 639b459c8c9e4fc59336542f53e2dcf6 +msgid "" +"Total accumulated command buffer for draws and text insertions. This will" +" be used by :meth:`Shape.commit`." +msgstr "描画およびテキスト挿入のための累積コマンドバッファです。これは :meth:`Shape.commit` で使用されます。" + +#: ../../shape.rst:446 f379fa92dfa14418bbfb0066162c4c98 +msgid "" +"For reference only: the current point of the drawing path. It is ``None``" +" at *Shape* creation and after each *finish()* and *commit()*." +msgstr "" +"参照用: 描画パスの現在のポイントです。*Shape* の作成時、および各 *finish()* および *commit()* の後には " +"``None`` です。" + +#: ../../shape.rst:451 68e74a764b3e4f6588f5ebe441234fb5 +msgid "Usage" +msgstr "使用法" + +#: ../../shape.rst:452 23e835b27a204f62b14890d069805e61 +msgid "" +"A drawing object is constructed by *shape = page.new_shape()*. After " +"this, as many draw, finish and text insertions methods as required may " +"follow. Each sequence of draws must be finished before the drawing is " +"committed. The overall coding pattern looks like this::" +msgstr "" +"drawingオブジェクトは `shape = page.new_shape()` " +"によって構築されます。その後、必要なだけ多くのdraw、finish、およびテキスト挿入メソッドが続きます。描画をコミットする前に、各描画シーケンスを終了する必要があります。全体のコーディングパターンは次のようになります::" + +#: ../../shape.rst:471 d5d9c1a5034648cc8101ab7a0e65864c +msgid "" +"Each *finish()* combines the preceding draws into one logical shape, " +"giving it common colors, line width, morphing, etc. If *closePath* is " +"specified, it will also connect the end point of the last draw with the " +"starting point of the first one." +msgstr "" +"各 *finish()* メソッドは、前の描画を1つの論理的なシェイプに結合し、共通の色、線の幅、変形などを設定します。*closePath* " +"が指定されている場合、最後の描画のエンドポイントを最初の描画のスタートポイントと接続します。" + +#: ../../shape.rst:473 90e44675158d407da4757ba07618d637 +msgid "" +"To successfully create compound graphics, let each draw method use the " +"end point of the previous one as its starting point. In the above pseudo " +"code, *draw2* should hence use the returned :ref:`Point` of *draw1* as " +"its starting point. Failing to do so, would automatically start a new " +"path and *finish()* may not work as expected (but it won't complain " +"either)." +msgstr "" +"複合グラフィックを正常に作成するには、各 draw " +"メソッドが前の描画のエンドポイントを次の描画のスタートポイントとして使用してください。上記の疑似コードでは、*draw2* は *draw1* " +"の戻り値である :ref:`Point` " +"を次の描画のスタートポイントとして使用すべきです。そうしないと、自動的に新しいパスが開始され、*finish()* " +"が期待どおりに動作しないかもしれません(ただし、エラーは発生しません)。" + +#: ../../shape.rst:475 5a61a171cfc3491cb0fa6eeba8b2d5f6 +msgid "" +"Text insertions may occur anywhere before the commit (they neither touch " +":attr:`Shape.draw_cont` nor :attr:`Shape.lastPoint`). They are appended " +"to *Shape.totalcont* directly, whereas draws will be appended by " +"*Shape.finish*." +msgstr "" +"テキストの挿入は、コミット(commit)の前にどこでも発生できます :attr:`Shape.draw_cont` または " +":attr:`Shape.lastPoint` に触れません)。テキストは直接 *Shape.totalcont* に追加されますが、描画は " +"*Shape.finish* によって追加されます。" + +#: ../../shape.rst:477 670a1cb6aa2c4c2aa8b788488c536db9 +msgid "" +"Each *commit* takes all text insertions and shapes and places them in " +"foreground or background on the page -- thus providing a way to control " +"graphical layers." +msgstr "各 *コミット* は、すべてのテキスト挿入とシェイプをページの前面または背面に配置し、グラフィックのレイヤーを制御する方法を提供します。" + +#: ../../shape.rst:479 a4c94b4f025b4fadaeeeebd9edc9b98e +msgid "" +"**Only** *commit* **will update** the page's contents, the other methods " +"are basically string manipulations." +msgstr "*コミット* **のみ** がページのコンテンツを **更新します**。他のメソッドは基本的に文字列の操作です。" + +#: ../../shape.rst:482 8aacff8ea21148919dd04fa1d299c172 +msgid "Examples" +msgstr "例" + +#: ../../shape.rst:483 964370dcb4584792a7ae853f1d28eda4 +msgid "Create a full circle of pieces of pie in different colors::" +msgstr "異なる色のピースを使用して円形のパイを作成します::" + +#: ../../shape.rst:498 23300cd759d04ae6abe72244b0352624 +msgid "Here is an example for 5 colors:" +msgstr "以下は5つの色の例です。" + +#: ../../shape.rst:502 238f06ee4d584097a15bd69c62785b32 +msgid "" +"Create a regular n-edged polygon (fill yellow, red border). We use " +"*draw_sector()* only to calculate the points on the circumference, and " +"empty the draw command buffer again before drawing the polygon::" +msgstr "" +"正則なn角形を作成します(塗りつぶしは黄色、赤い境界線)。周囲の点を計算するために *draw_sector()* " +"を使用し、多角形を描く前に描画コマンドバッファを空にします::" + +#: ../../shape.rst:517 a3792a9bc841403395cc4d611b2812a3 +msgid "Here is the polygon for n = 7:" +msgstr "n = 7の場合の多角形は次のとおりです:" + +#: ../../shape.rst:524 986f4d507f434bbcb5352c0ed5a04c58 +msgid "Common Parameters" +msgstr "共通パラメータ" + +#: ../../shape.rst:526 5472904b466548fd95e4eaec7af7c2ae +msgid "**fontname** (*str*)" +msgstr "" + +#: ../../shape.rst:528 4f689138fcdc4c379a757bdaefa8f288 +msgid "In general, there are three options:" +msgstr "一般的に、3つのオプションがあります:" + +#: ../../shape.rst:530 734f639f27364f9193df03ba4e43a927 +msgid "" +"Use one of the standard :ref:`Base-14-Fonts`. In this case, *fontfile* " +"**must not** be specified and *\"Helvetica\"* is used if this parameter " +"is omitted, too." +msgstr "" +"標準の :ref:`Base-14-Fonts` のいずれかを使用します。この場合、*fontfile* を指定 " +"**してはいけず**、このパラメータを省略した場合は *「Helvetica」* が使用されます。" + +#: ../../shape.rst:531 4bb743b9400f4cb3b4c058155229d8b0 +msgid "" +"Choose a font already in use by the page. Then specify its **reference** " +"name prefixed with a slash \"/\", see example below." +msgstr "既にページで使用されているフォントを選択します。その場合、スラッシュ「/」で前置された **参照** 名を指定します。以下の例を参照してください。" + +#: ../../shape.rst:532 9a34626d11aa4bd99f76d99bb78fdf3d +msgid "" +"Specify a font file present on your system. In this case choose an " +"arbitrary, but new name for this parameter (without \"/\" prefix)." +msgstr "システムに存在するフォントファイルを指定します。この場合、任意の新しい名前をこのパラメータに選択します(「/」接頭辞なし)。" + +#: ../../shape.rst:534 918b82d861b2402f886d07411b40bc87 +msgid "" +"If inserted text should re-use one of the page's fonts, use its reference" +" name appearing in :meth:`Page.get_fonts` like so:" +msgstr "" +"挿入されるテキストがページのフォントの1つを再利用する場合は、:meth:`Page.get_fonts` " +"に表示される参照名を次のように使用します:" + +#: ../../shape.rst:536 dfe633ea91224f2aa9387d0763aaa162 +msgid "" +"Suppose the font list has the item *[1024, 0, 'Type1', 'NimbusMonL-Bold'," +" 'R366']*, then specify *fontname = \"/R366\", fontfile = None* to use " +"font *NimbusMonL-Bold*." +msgstr "" +"フォントリストに項目 *[1024, 0, 'Type1', 'NimbusMonL-Bold', 'R366']* がある場合、フォント " +"*NimbusMonL-Bold* を使用するには、*fontname = \"/R366\"、fontfile = None* と指定します。" + +#: ../../shape.rst:540 c39dca28731f4fd685c0d91492e3fab5 +msgid "**fontfile** (*str*)" +msgstr "" + +#: ../../shape.rst:542 cf3e42d6c7e44c25824830b2b65b5228 +msgid "" +"File path of a font existing on your computer. If you specify *fontfile*," +" make sure you use a *fontname* **not occurring** in the above list. This" +" new font will be embedded in the PDF upon *doc.save()*. Similar to new " +"images, a font file will be embedded only once. A table of MD5 codes for " +"the binary font contents is used to ensure this." +msgstr "" +"コンピューターに存在するフォントのファイルパス。*fontfile* を指定する場合、上記のリストに **存在しない** *フォント名* " +"を使用してください。この新しいフォントは、*doc.save()* " +"時にPDFに埋め込まれます。画像と同様に、フォントファイルは1度だけ埋め込まれます。バイナリフォントコンテンツのMD5コードのテーブルが使用され、これを確実にします。" + +#: ../../shape.rst:546 76ee746f05ec4244850466e311f99f6a +msgid "**set_simple** (*bool*)" +msgstr "" + +#: ../../shape.rst:548 39388630281e4d3f8da8c16273609972 +msgid "" +"Fonts installed from files are installed as **Type0** fonts by default. " +"If you want to use 1-byte characters only, set this to true. This setting" +" cannot be reverted. Subsequent changes are ignored." +msgstr "" +"ファイルからインストールされたフォントは、デフォルトで **Type0** " +"フォントとしてインストールされます。1バイト文字のみを使用する場合は、これをtrueに設定します。この設定は元に戻すことはできません。後続の変更は無視されます。" + +#: ../../shape.rst:552 af3166520c274206a13e1a0baec46874 +msgid "**fontsize** (*float*)" +msgstr "" + +#: ../../shape.rst:554 1ba19903918d47b38a64022ab307ad6e +msgid "Font size of text, see: :data:`fontsize`." +msgstr "テキストのフォントサイズ、参照: :data:`fontsize`。" + +#: ../../shape.rst:558 f957a4a2dc5d4c7a93af1aad1141e348 +msgid "**dashes** (*str*)" +msgstr "" + +#: ../../shape.rst:560 fbae07e8952e47748a176d1270430476 +#, fuzzy +msgid "" +"Causes lines to be drawn dashed. The general format is `\"[n m] p\"` of " +"(up to) 3 floats denoting pixel lengths. ``n`` is the dash length, ``m`` " +"(optional) is the subsequent gap length, and ``p`` (the \"phase\" - " +"**required**, even if 0!) specifies how many pixels should be skipped " +"before the dashing starts. If ``m`` is omitted, it defaults to ``n``." +msgstr "" +"線を破線で描画するようにします。一般的なフォーマットは、ピクセル長を示す(最大)3つの浮動小数点数の `\"[n m] p\"` です。``n``" +" は破線の長さ、``m`` (オプション)はその後のギャップの長さ、 ``p`` (「位相」 - 0であっても " +"**必須**!)は、破線が開始される前にスキップするべきピクセルの数を指定します。``m`` が省略された場合、``n`` " +"にデフォルト値が設定されます。" + +#: ../../shape.rst:562 6cc3d9bdb9f549d8a8ecc425ae21919b +msgid "" +"A continuous line (no dashes) is drawn with `\"[] 0\"` or ``None`` or " +"`\"\"`. Examples:" +msgstr "連続線(破線なし)は `\"[] 0\"` または ``None`` または `\"\"` で描画されます。例:" + +#: ../../shape.rst:564 5b2f1471a3364bce88db1d174bd4a4b7 +msgid "" +"Specifying `\"[3 4] 0\"` means dashes of 3 and gaps of 4 pixels following" +" each other." +msgstr "`\"[3 4] 0\"` を指定すると、3ピクセルの破線と4ピクセルのギャップが交互に続きます。" + +#: ../../shape.rst:565 1e6f67ef2c174d5286313f74014b814b +msgid "`\"[3 3] 0\"` and `\"[3] 0\"` do the same thing." +msgstr "`\"[3 3] 0\"` と `\"[3] 0\"` は同じことをします。" + +#: ../../shape.rst:567 a9823d7b0b0747d28c07e9f1eff08a97 +msgid "" +"For (the rather complex) details on how to achieve sophisticated dashing " +"effects, see :ref:`AdobeManual`, page 217." +msgstr "洗練された破線効果を実現する詳細な詳細については、:ref:`AdobeManual` の217ページを参照してください。" + +#: ../../shape.rst:571 d1a553e173a7429a857f65045ab0e437 +msgid "**color / fill** (*list, tuple*)" +msgstr "**色/塗りつぶし** (*リスト、タプル*)" + +#: ../../shape.rst:573 14330946fd1a4d8a996f6b2c28063ded +msgid "" +"Stroke and fill colors can be specified as tuples or list of of floats " +"from 0 to 1. These sequences must have a length of 1 (GRAY), 3 (RGB) or 4" +" (CMYK). For GRAY colorspace, a single float instead of the unwieldy " +"*(float,)* or *[float]* is also accepted. Accept (default) or use `None` " +"to not use the parameter." +msgstr "" +"線の色と塗りつぶしの色は、0から1までの浮動小数点数からなるリストまたはタプルとして指定できます。これらのシーケンスの長さは、1(GRAY)、3(RGB)、または4(CMYK)でなければなりません。GRAYカラースペースの場合、*[float]*" +" や*(float,)* " +"の代わりに単一の浮動小数点数を使用することもできます。このパラメータを受け入れるには、(デフォルト)を受け入れるか、または使用しないようにするには" +" `None` を使用します。" + +#: ../../shape.rst:575 e816f8b5d9ff430080ae997838786774 +msgid "" +"To simplify color specification, method *getColor()* in *pymupdf.utils* " +"may be used to get predefined RGB color triples by name. It accepts a " +"string as the name of the color and returns the corresponding triple. The" +" method knows over 540 color names -- see section :ref:`ColorDatabase`." +msgstr "" +"色の指定を簡略化するために、*pymupdf.utils* の *getColor()* " +"メソッドを使用して、名前に対応するRGBカラートリプルを取得することができます。これは色の名前を指定する文字列を受け入れ、対応するトリプルを返します。このメソッドは540以上のカラー名を認識します。:ref:`ColorDatabase`" +" のセクションを参照してください。" + +#: ../../shape.rst:577 4635acc9d7694796a3bc9ae29886a5a0 +msgid "" +"Please note that the term *color* usually means \"stroke\" color when " +"used in conjunction with fill color." +msgstr "ご注意ください。*色* という用語は、通常、塗りつぶし色と組み合わせて使用される場合に「線の」色を意味します" + +#: ../../shape.rst:579 36458f858b744fa89c1376aadea2059d +msgid "" +"If letting default a color parameter to `None`, then no resp. color " +"selection command will be generated. If *fill* and *color* are both " +"`None`, then the drawing will contain no color specification. But it will" +" still be \"stroked\", which causes PDF's default color \"black\" be used" +" by Adobe Acrobat and all other viewers." +msgstr "" +"色パラメータのデフォルト値を `None` にする場合、対応する色の選択コマンドは生成されません。*fill* と *color* がどちらも " +"`None` の場合、描画には色の指定が含まれません。ただし、まだ「ストローク」されているため、Adobe " +"Acrobatや他のビューアではPDFのデフォルトカラー「ブラック」が使用されます。" + +#: ../../shape.rst:583 76a445577e6041a8849d1d50652d57f6 +#, fuzzy +msgid "**width** (*float*)" +msgstr "**border_width** (*浮動小数点数*)" + +#: ../../shape.rst:585 537fca23f709435493eb732dae3268ee +msgid "" +"The stroke (\"border\") width of the elements in a shape (if applicable)." +" The default value is 1. The values width, color and fill have the " +"following relationship / dependency:" +msgstr "図形内の要素のストローク(\"枠線\")の幅(適用可能な場合)。デフォルト値は1です。幅、色、塗りつぶしの値は、以下の関係/依存関係を持ちます:" + +#: ../../shape.rst:587 fa7a5177ae0449e187b82327743f7456 +msgid "" +"If `fill=None` shape elements will always be drawn with a border - even " +"if `color=None` (in which case black is taken) or `width=0` (in which " +"case 1 is taken)." +msgstr "" +"`fill=None` の場合、shape要素は常に枠線で描画されます - `color=None` " +"の場合(その場合、黒が使用されます)、または `width=0` の場合(その場合、1が使用されます)でもです。" + +#: ../../shape.rst:588 cd5cc59d1028404fbfed2f24e33410ae +#, fuzzy +msgid "" +"Shapes without border can only be achieved if a fill color is specified " +"(which may be white of course). To achieve this, specify `width=0`. In " +"this case, the ``color`` parameter is ignored." +msgstr "" +"枠線のない図形は、塗りつぶしの色が指定されている場合のみ実現できます(もちろん白色であっても構いません)。これを実現するには、 `width=0`" +" を指定します。この場合、 `color` パラメータは無視されます。" + +#: ../../shape.rst:592 382ccf96c5aa4181a4572f714e062247 +msgid "**stroke_opacity / fill_opacity** (*floats*)" +msgstr "**stroke_opacity / fill_opacity** (*浮動小数点数*)" + +#: ../../shape.rst:594 ae70a471fc0d48c2a59637f8b31a6a24 +msgid "" +"Both values are floats in range [0, 1]. Negative values or values > 1 " +"will ignored (in most cases). Both set the transparency such that a value" +" 0.5 corresponds to 50% transparency, 0 means invisible and 1 means " +"intransparent. For e.g. a rectangle the stroke opacity applies to its " +"border and fill opacity to its interior." +msgstr "両方の値は[0、1]の範囲の浮動小数点数です。負の値または1を超える値は(ほとんどの場合)無視されます。両方の値は、透明度を設定し、たとえば値0.5は50%の透明度を示し、0は不可視を意味し、1は不透明を意味します。たとえば四角形の場合、stroke_opacityはその境界に、fill_opacityはその内部に適用されます。" + +#: ../../shape.rst:596 a8bb91c0072541a9b37e3e6a62caa30f +#, fuzzy +msgid "" +"For text insertions (:meth:`Shape.insert_text` and " +":meth:`Shape.insert_textbox`), use *fill_opacity* for the text. At first " +"sight this seems surprising, but it becomes obvious when you look further" +" down to `render_mode`: `fill_opacity` applies to the yellow and " +"`stroke_opacity` applies to the blue color." +msgstr "" +"テキストの挿入(:meth:`Shape.insert_text` および :meth:`Shape.insert_textbox` " +")の場合、テキストには *fill_opacity* を使用してください。最初は驚くかもしれませんが、*render_mode* " +"の詳細を見ると明らかになります。*fill_opacity* は黄色に適用され、*stroke_opacity* は青色に適用されます。" + +#: ../../shape.rst:600 641a78de051a437995282b57d04de9aa +msgid "**border_width** (*float*)" +msgstr "**border_width** (*浮動小数点数*)" + +#: ../../shape.rst:602 92a8ccb5633243f2ad24f12364e68cd8 +msgid "" +"Set the border width for text insertions. New in v1.14.9. Relevant only " +"if the render mode argument is used with a value greater zero." +msgstr "テキスト挿入の境界線の幅を設定します。v1.14.9で新しく追加されました。render_mode引数がゼロより大きい値で使用されている場合にのみ関連します。" + +#: ../../shape.rst:606 abbf4573c17a4e30ac45792ace4f3a6e +msgid "**render_mode** (*int*)" +msgstr "**render_mode** (*整数*)" + +#: ../../shape.rst:608 c584ddf8b2d84f64979c15dc82edee5e +msgid "" +"*New in version 1.14.9:* Integer in `range(8)` which controls the text " +"appearance (:meth:`Shape.insert_text` and :meth:`Shape.insert_textbox`). " +"See page 246 in :ref:`AdobeManual`. New in v1.14.9. These methods now " +"also differentiate between fill and stroke colors." +msgstr "" +"*バージョン1.14.9で新たに導入された* `range(8)` " +"内の整数。これはテキストの外観(:meth:`Shape.insert_text` および " +":meth:`Shape.insert_textbox` )を制御します。:ref:`AdobeManual` " +"のページ246を参照してください。バージョン1.14.9での新機能です。これらのメソッドは、塗りつぶしの色とストロークの色を区別するようになりました。" + +#: ../../shape.rst:610 59add67dd81245e68a70f2404603f6a6 +msgid "" +"For default 0, only the text fill color is used to paint the text. For " +"backward compatibility, using the *color* parameter instead also works." +msgstr "" +"デフォルト値0では、テキストの塗りつぶし色のみがテキストを塗りつぶすために使用されます。*color* " +"パラメーターを代わりに使用することも、後方互換性のために機能します。" + +#: ../../shape.rst:611 32bac19a29a941d3beb40151ca36f18a +msgid "" +"For render mode 1, only the border of each glyph (i.e. text character) is" +" drawn with a thickness as set in argument *border_width*. The color " +"chosen in the *color* argument is taken for this, the *fill* parameter is" +" ignored." +msgstr "" +"render_mode 1では、各グリフ(つまりテキスト文字)の境界線のみが指定された *border_width* " +"で描画されます。この際、*color* 引数で選択した色が使用され、*fill* パラメーターは無視されます。" + +#: ../../shape.rst:612 6777a1d7af8447e094630286981c6652 +msgid "" +"For render mode 2, the glyphs are filled and stroked, using both color " +"parameters and the specified border width. You can use this value to " +"simulate **bold text** without using another font: choose the same value " +"for *fill* and *color* and an appropriate value for *border_width*." +msgstr "" +"render_mode " +"2では、グリフは塗りつぶされ、ストロークされ、両方の色パラメーターと指定された境界幅が使用されます。別のフォントを使用せずに " +"**太字のテキスト** をシミュレートするためにこの値を使用できます。*fill* と *color* " +"に同じ値を選択し、*border_width* に適切な値を選択します。" + +#: ../../shape.rst:613 8a1b88179e55472dbef190b87e299a07 +msgid "" +"For render mode 3, the glyphs are neither stroked nor filled: the text " +"becomes invisible." +msgstr "render_mode 3では、グリフはストロークも塗りつぶしもされません。テキストは見えなくなります。" + +#: ../../shape.rst:615 c89121f9821c4152ae8c7942769dc98f +msgid "" +"The following examples use border_width=0.3, together with a fontsize of " +"15. Stroke color is blue and fill color is some yellow." +msgstr "以下の例では、border_width=0.3を使用し、フォントサイズは15です。ストロークの色は青で、塗りつぶしの色は黄色です。" + +#: ../../shape.rst:621 758ba1a3725545968a5bf8174fc0067f +#, fuzzy +msgid "**miter_limit** (*float*)" +msgstr "**border_width** (*浮動小数点数*)" + +#: ../../shape.rst:623 47f6cf2090874b34aa1000378a41c702 +msgid "" +"A float specifying the maximum acceptable value of the quotient `miter-" +"length / line-width` (\"miter quotient\"). Used in text output methods. " +"This is only relevant for non-zero render mode values -- then, characters" +" are written with border lines (i.e. \"stroked\")." +msgstr "" + +#: ../../shape.rst:625 23a44194e32f40238f56f9720992ea19 +msgid "" +"If two lines stroking some character meet at a sharp (<= 90°) angle and " +"the line width is large enough, then \"spikes\" may become visible -- " +"causing an ugly appearance as shown below. For more background, see page " +"126 of the :ref:`AdobeManual`." +msgstr "" + +#: ../../shape.rst:627 dbeec71e85254059b1b116e6bf53cab4 +msgid "" +"For instance, when joins meet at 90°, then the miter length is ``sqrt(2) " +"* line-width``, so the miter quotient is ``sqrt(2)``." +msgstr "" + +#: ../../shape.rst:629 6230069ffd5b4b55b23b39e3542c5df6 +msgid "" +"If ``miter_limit`` is exceeded, then all joins with a larger qotient will" +" appear as beveled (\"butt\" appearance)." +msgstr "" + +#: ../../shape.rst:631 709fe5f6dae74edf97ca361613799805 +msgid "" +"The default value 1 (and any smaller value) will ensure that all joins " +"are rendered as a butt. A value of ``None`` will use the PDF default " +"value." +msgstr "" + +#: ../../shape.rst:633 96300bafed9240e7a4726029c3dff673 +msgid "Example text showing spikes (``miter_limit=None``):" +msgstr "" + +#: ../../shape.rst:637 4057d5819930438fbd72dfb0ac5ec1fe +msgid "Example text suppressing spikes (``miter_limit=1``):" +msgstr "" + +#: ../../shape.rst:643 128861b7bd2c4122af369f3d66935014 +msgid "**overlay** (*bool*)" +msgstr "**オーバーレイ** (*bool*)" + +#: ../../shape.rst:645 0de1da78f296432caf149b9467906484 +msgid "Causes the item to appear in foreground (default) or background." +msgstr "アイテムを前景( デフォルト)または背景に表示させます。" + +#: ../../shape.rst:649 da7bfef6a119478d829a68086d3b2f02 +msgid "**morph** (*sequence*)" +msgstr "**morph** (*シーケンス*)" + +#: ../../shape.rst:651 f4f5cdf22e924385887227709cc48b65 +msgid "" +"Causes \"morphing\" of either a shape, created by the *draw*()* methods, " +"or the text inserted by page methods *insert_textbox()* / " +"*insert_text()*. If not ``None``, it must be a pair *(fixpoint, matrix)*," +" where *fixpoint* is a :ref:`Point` and *matrix* is a :ref:`Matrix`. The " +"matrix can be anything except translations, i.e. *matrix.e == matrix.f ==" +" 0* must be true. The point is used as a fixed point for the matrix " +"operation. For example, if *matrix* is a rotation or scaling, then " +"*fixpoint* is its center. Similarly, if *matrix* is a left-right or up-" +"down flip, then the mirroring axis will be the vertical, respectively " +"horizontal line going through *fixpoint*, etc." +msgstr "" +"*draw*()* メソッドによって作成されたシェイプまたはページメソッド *insert_textbox()*/ *insert_text()*" +" によって挿入されたテキストのいずれかの「変形」を引き起こします。 ``None`` でない場合、*(fixpoint、matrix)* " +"のペアでなければなりません。ここで、*fixpoint* は :ref:`Point` であり、*matrix* は :ref:`Matrix` " +"です。行列は、*matrix.e == matrix.f == 0* " +"でない限り、移動以外の任意のものにすることができます。ポイントは、行列操作の固定ポイントとして使用されます。たとえば、*行列* " +"が回転またはスケーリングである場合、*fixpoint* はその中心です。同様に、*行列* が左右または上下の反転である場合、鏡像化される軸は " +"*fixpoint* を介して通る垂直、または水平の線です。" + +#: ../../shape.rst:653 d7a99be19cae4b00bcef607b3a379087 +msgid "" +"Several methods contain checks whether the to be inserted items will " +"actually fit into the page (like :meth:`Shape.insert_text`, or " +":meth:`Shape.draw_rect`). For the result of a morphing operation there is" +" however no such guaranty: this is entirely the programmer's " +"responsibility." +msgstr "" +"いくつかのメソッドには、ページに挿入されるアイテムが実際に適合するかどうかをチェックする検証が含まれています(:meth:`Shape.insert_text`" +" 、または :meth:`Shape.draw_rect` " +"など)。変形操作の結果にはそのような保証はありません。これは完全にプログラマーの責任です。" + +#: ../../shape.rst:657 c3ca870a3e9544b89f78f868de17c0ef +msgid "**lineCap (deprecated: \"roundCap\")** (*int*)" +msgstr "**lineCap(非推奨:「roundCap」**)(*int*)" + +#: ../../shape.rst:659 fea4b9f8410b4003ae173a8603274a60 +msgid "" +"Controls the look of line ends. The default value 0 lets each line end at" +" exactly the given coordinate in a sharp edge. A value of 1 adds a semi-" +"circle to the ends, whose center is the end point and whose diameter is " +"the line width. Value 2 adds a semi-square with an edge length of line " +"width and a center of the line end." +msgstr "線の末端の外観を制御します。デフォルト値0は、各線の末端が鋭いエッジで正確な座標で終了することを許可します。値1は、末端に直径が線の幅で中心が末端ポイントである半円を追加します。値2は、線の幅と線の末端を中心とする辺の長さがある半正方形を追加します。" + +#: ../../shape.rst:661 37893d88e52d4b2e8bda93c32ae636d8 +msgid "*Changed in version 1.14.15*" +msgstr "" + +#: ../../shape.rst:665 22794a68bb13467b9047ce06d3abfcbf +msgid "**lineJoin** (*int*)" +msgstr "" + +#: ../../shape.rst:667 6552f46bbd804d51ba0bc8d53dba7892 +msgid "" +"*New in version 1.14.15:* Controls the way how line connections look " +"like. This may be either as a sharp edge (0), a rounded join (1), or a " +"cut-off edge (2, \"butt\")." +msgstr "" +"*バージョン1.14.15で新規追加:* " +"ラインの接続方法を制御します。これは、鋭いエッジ(0)、丸い接続(1)、または切り取られたエッジ(2、「butt」)のいずれかです。" + +#: ../../shape.rst:671 f99edc98fa524d40a92febeffc5c45c2 +msgid "**closePath** (*bool*)" +msgstr "" + +#: ../../shape.rst:673 36e9f0eb81d94b989bd969438537f326 +msgid "" +"Causes the end point of a drawing to be automatically connected with the " +"starting point (by a straight line)." +msgstr "描画の終点を自動的に始点に直線で接続させる原因となります。" + +#: ../../footer.rst:60 41e759d0552b4644a452e8186750d7d0 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/story-class.mo b/docs/locales/ja/LC_MESSAGES/story-class.mo new file mode 100644 index 000000000..cf8921f5a Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/story-class.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/story-class.po b/docs/locales/ja/LC_MESSAGES/story-class.po new file mode 100644 index 000000000..5a4a41780 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/story-class.po @@ -0,0 +1,969 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 5a64c03ac3304767805a34e5519dc643 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 987f101ad8fa44c9a85292f456737790 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 937bf2fcb1964c3f97071eb2764745fd +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../story-class.rst:7 b33db4381f96455f881c4cb8b48a9239 +msgid "Story" +msgstr "Story (ストーリー)" + +#: ../../story-class.rst:11 96383b7b49df4dc69e7545f0a3d37164 +msgid "New in v1.21.0" +msgstr "バージョン1.21.0で新規追加" + +#: ../../story-class.rst:14 7d3756e920e1487eb3e1b3bf28ed6f9b +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../story-class.rst:14 51a2ed231a8d4b16a5606ce08ce54e8b +msgid "**Short Description**" +msgstr "**簡単な説明**" + +#: ../../story-class.rst:16 65726e39b3794bb9a492d8c5ba15990f +msgid ":meth:`Story.reset`" +msgstr "" + +#: ../../story-class.rst:16 d9f0619b632e4bb582c9bbc227d889b5 +msgid "\"rewind\" story output to its beginning" +msgstr "ストーリーの出力を先頭に巻き戻す" + +#: ../../story-class.rst:17 7c94ba99f15b4f4d847c8263ca0b5132 +msgid ":meth:`Story.place`" +msgstr "" + +#: ../../story-class.rst:17 5ef6af8dda1c4704b193cb14b0e43f33 +msgid "compute story content to fit in provided rectangle" +msgstr "指定された長方形に収まるストーリーのコンテンツを計算" + +#: ../../story-class.rst:18 bb0b48f6450441f5a3dd54b788dee19a +msgid ":meth:`Story.draw`" +msgstr "" + +#: ../../story-class.rst:18 8570b10f54d345139cfe978f97d8c9b3 +msgid "write the computed content to current page" +msgstr "計算されたコンテンツを現在のページに書き込む" + +#: ../../story-class.rst:19 f57f0a347b814e60b8da011ae533e111 +msgid ":meth:`Story.element_positions`" +msgstr "" + +#: ../../story-class.rst:19 2e5a11cc2c1c430a878377cd43dbd28c +msgid "callback function logging currently processed story content" +msgstr "現在処理中のストーリーコンテンツを記録するコールバック関数" + +#: ../../story-class.rst:20 7d3e1c7cf395410d9a7653dac537daad +msgid ":attr:`Story.body`" +msgstr "" + +#: ../../story-class.rst:20 1979b2f75d824d3588dc32499dace02a +msgid "the story's underlying :htmlTag:`body`" +msgstr "ストーリーの基本となる :htmlTag:`body`" + +#: ../../story-class.rst:21 7ba33a6d21124637b218ce0b7f3d3ec1 +msgid ":meth:`Story.write`" +msgstr "" + +#: ../../story-class.rst:21 757b24965dee4f998578cefcb1a94b35 +msgid "places and draws Story to a DocumentWriter" +msgstr "ストーリーを :ref:`DocumentWriter` に配置して描画します" + +#: ../../story-class.rst:22 5005e48c2d114342997d30259781a86d +msgid ":meth:`Story.write_stabilized`" +msgstr "" + +#: ../../story-class.rst:22 12712f5deea24c88b77364960c58dbdb +msgid "iterative layout of html content to a DocumentWriter" +msgstr "HTMLコンテンツを :ref:`DocumentWriter` に反復的にレイアウトします" + +#: ../../story-class.rst:23 16d33507e0554249ba750773fb5cb53d +msgid ":meth:`Story.write_with_links`" +msgstr "" + +#: ../../story-class.rst:23 93c333b6d5f843cf8258a5274dfa22f0 +msgid "like `write()` but also creates PDF links" +msgstr "`write()` と同様ですが、PDFリンクも作成します" + +#: ../../story-class.rst:24 25e70986951c4b668e0a1704e2984855 +msgid ":meth:`Story.write_stabilized_with_links`" +msgstr "" + +#: ../../story-class.rst:24 2b185135aa8348fb825dded5295a7474 +msgid "like `write_stabilized()` but also creates PDF links" +msgstr "`write_stabilized()` と同様ですが、PDFリンクも作成します" + +#: ../../story-class.rst:25 f7ca87bbd93e492587ff63f31dc3d7d5 +msgid ":meth:`Story.fit`" +msgstr "" + +#: ../../story-class.rst:25 ../../story-class.rst:252 +#: 945d31669d104a67b4251a8a2ed5b133 c2cb9dc7bf694149806fb89443a430ba +msgid "Finds optimal rect that contains the story `self`." +msgstr "" + +#: ../../story-class.rst:26 57ee0022377e4fbbaf74f70142af7217 +msgid ":meth:`Story.fit_scale`" +msgstr "" + +#: ../../story-class.rst:27 f73eb5fd926e4dc5b66afcbd819851d6 +msgid ":meth:`Story.fit_height`" +msgstr "" + +#: ../../story-class.rst:28 c5a858a25f3c4e45adf3c65f77eb35b4 +msgid ":meth:`Story.fit_width`" +msgstr "" + +#: ../../story-class.rst:31 555f8dfcf2c6423696e0a8111f5a202f +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../story-class.rst:37 834595e0caee413fb07ac0888c96cf9c +msgid "" +"Create a **story**, optionally providing HTML and CSS source. The HTML is" +" parsed, and held within the Story as a DOM (Document Object Model)." +msgstr "" +"**ストーリー** " +"を作成します。オプションでHTMLとCSSのソースを提供できます。HTMLは解析され、ストーリー内でDOM(ドキュメントオブジェクトモデル)として保持されます。" + +#: ../../story-class.rst:40 d9a1893bb0f14ccf9425ee58f9e72dfc +msgid "" +"This structure may be modified: content (text, images) may be added, " +"copied, modified or removed by using methods of the :ref:`Xml` class." +msgstr "この構造は変更できます。テキスト、画像などのコンテンツは、:ref:`Xml` クラスのメソッドを使用して追加、コピー、変更、または削除できます。" + +#: ../../story-class.rst:43 f0a727b22fb84079ae4095e10e3c2373 +msgid "" +"When finished, the **story** can be written to any device; in typical " +"usage the device may be provided by a :ref:`DocumentWriter` to make new " +"pages." +msgstr "" +"完了したら、**ストーリー** を任意のデバイスに書き込むことができます。通常の使用法では、デバイスは新しいページを生成するために " +":ref:`DocumentWriter` によって提供されます。" + +#: ../../story-class.rst:46 8bd0a736bc6749f68af7bee1912f8244 +msgid "Here are some general remarks:" +msgstr "以下は一般的な注意事項です:" + +#: ../../story-class.rst:48 0f8c24eeaf79495db53a2614e831f91a +msgid "" +"The :ref:`Story` constructor parses and validates the provided HTML to " +"create the DOM." +msgstr ":ref:`Story` コンストラクタは提供されたHTMLを解析し、検証してDOMを作成します。" + +#: ../../story-class.rst:49 a8c9195a3cdf4ea9bc8acd4c4bc70ff2 +msgid "" +"PyMuPDF provides a number of ways to manipulate the HTML source by " +"providing access to the *nodes* of the underlying DOM. Documents can be " +"completely built from ground up programmatically, or the existing DOM can" +" be modified pretty arbitrarily. For details of this interface, please " +"see the :ref:`Xml` class." +msgstr "" +"PyMuPDFは、基本的なDOMのノードへのアクセスを提供することでHTMLソースを操作する方法をいくつか提供しています。文書は完全にプログラム的に構築することも、既存のDOMをかなり任意に変更することもできます。このインターフェースの詳細については、:ref:`Xml`" +" クラスをご覧ください。" + +#: ../../story-class.rst:54 9a32acd1613e44cd87d2f73e6e3790e4 +msgid "" +"If no (or no more) changes to the DOM are required, the story is ready to" +" be laid out and to be fed to a series of devices (typically devices " +"provided by a :ref:`DocumentWriter` to produce new pages)." +msgstr "" +"DOMへの変更が必要ない(またはもう必要ない)場合、ストーリーはレイアウトが可能であり、新しいページを生成するために通常は " +":ref:`DocumentWriter` によって提供されるデバイスに供給する準備ができています。" + +#: ../../story-class.rst:57 de9ba211086b47eca447de9b6b5356c1 +msgid "" +"The next step is to place the story and write it out. This can either be " +"done directly, by looping around calling `place()` and `draw()`, or " +"alternatively, the looping can handled for you using the `write()` or " +"`write_stabilised()` methods. Which method you choose is largely a matter" +" of taste." +msgstr "" +"次のステップは、ストーリーを配置して書き出すことです。これは、直接行うこともできます(`place()` と `draw()` " +"を呼び出すことでループ処理することで)、または代替として `write()` または `write_stabilised()` " +"メソッドを使用しても、ループ処理を自動化できます。どの方法を選択するかは主に好みの問題です。" + +#: ../../story-class.rst:63 9615fc9723fb4897a2817bb454bf3149 +msgid "To work in the first of these styles, the following loop should be used:" +msgstr "最初のスタイルで作業するには、次のループを使用する必要があります:" + +#: ../../story-class.rst:65 41867b8cd7bb4533b097c4aa2dd1a08e +msgid "" +"Obtain a suitable device to write to; typically by requesting a new, " +"empty page from a :ref:`DocumentWriter`." +msgstr "適切なデバイスを取得します。通常、:ref:`DocumentWriter` から新しい空のページを要求することで取得します。" + +#: ../../story-class.rst:68 6e11ba83740c42a0814219e6c7bb4f2f +msgid "" +"Determine one or more rectangles on the page, that should receive " +"**story** data. Note that not every page needs to have the same set of " +"rectangles." +msgstr "" +"ページ上に **ストーリー** " +"データを受け取る必要がある1つまたは複数の長方形を決定します。すべてのページが同じ長方形セットを持つ必要はないことに注意してください。" + +#: ../../story-class.rst:71 27521b4c124e42ad873a6b98785cc18d +msgid "" +"Pass each rectangle to the **story** to place it, learning what part of " +"that rectangle has been filled, and whether there is more story data that" +" did not fit. This step can be repeated several times with adjusted " +"rectangles until the caller is happy with the results." +msgstr "" +"各長方形を **ストーリー** " +"に渡して配置し、その長方形のどの部分が埋まったか、また収まらなかったストーリーデータがあるかを学びます。このステップは、調整された長方形で何度も繰り返すことができ、呼び出し元が結果に満足するまで続けることができます。" + +#: ../../story-class.rst:76 05e07028c7d3438d8b4564f851c3f6dd +msgid "" +"Optionally, at this point, we can request details of where interesting " +"items have been placed, by calling the `element_positions()` method. " +"Items are deemed to be interesting if their integer `heading` attribute " +"is a non-zero (corresponding to HTML tags :htmlTag:`h1` - :htmlTag:`h6`)," +" if their `id` attribute is not `None` (corresponding to HTML tag " +":htmlTag:`id`), or if their `href` attribute is not `None` (responding to" +" HTML tag :htmlTag:`href`). This can conveniently be used for automatic " +"generation of a Table of Contents, an index of images or the like." +msgstr "" +"任意で、この段階で `element_positions()` " +"メソッドを呼び出して、興味深いアイテムが配置された場所の詳細をリクエストすることができます。アイテムは、整数の `heading` " +"属性がゼロでない場合(:htmlTag:`h1` - :htmlTag:`h6`)、id属性がNoneでない場合(:htmlTag:`id` " +"に対応)、またはhref属性がNoneでない場合(:htmlTag:`href` " +"に対応)に興味深いと見なされます。これは、目次、画像の索引などの自動生成に便利に使用できます。" + +#: ../../story-class.rst:85 921092efac384debac3b3b54e0888f7f +msgid "Next, draw that rectangle out to the device with the `draw()` method." +msgstr "次に、`draw()` メソッドを使用してその長方形をデバイスに描画します。" + +#: ../../story-class.rst:86 d36b5912ed30437198b3cf8b6a589d81 +msgid "" +"If the most recent call to `place()` indicated that all the story data " +"had fitted, stop now." +msgstr "最も最近の `place()` の呼び出しが、すべてのストーリーデータが収まったことを示した場合、ここで停止します。" + +#: ../../story-class.rst:88 c97e2fc9a4d7461f87a1b0a14181de80 +msgid "" +"Otherwise, we can loop back. If there are more rectangles to be placed on" +" the current device (page), we jump back to step 3 - if not, we jump back" +" to step 1 to get a new device." +msgstr "それ以外の場合、ループを戻すことができます。現在のデバイス(ページ)に配置するためにさらに長方形がある場合は、ステップ3に戻ります。長方形がない場合は、新しいデバイスを取得するためにステップ1に戻ります。" + +#: ../../story-class.rst:91 58ca39dc2e03443689141076908acc02 +msgid "" +"Alternatively, in the case where you are using a :ref:`DocumentWriter`, " +"the `write()` or `write_stabilized()` methods can be used. These handle " +"all the looping for you, in exchange for being provided with callbacks " +"that control the behaviour (notably a callback that enumerates the " +"rectangles/pages to use)." +msgstr "" +"代わりに、:ref:`DocumentWriter` を使用している場合、`write()` または `write_stabilized()` " +"メソッドを使用できます。これらは、動作を制御するコールバックが提供される代わりに、すべてのループ処理を処理します(特に使用する長方形/ページを列挙するコールバックが含まれます)。" + +#: ../../story-class.rst:96 d2f8dc8f478942b69038e2d6d08a91fe +msgid "" +"Which part of the **story** will land on which rectangle / which page, is" +" fully under control of the :ref:`Story` object and cannot be predicted." +msgstr "" +"**ストーリー** オブジェクトがどの部分がどの長方形/ページに配置されるかは、完全に :ref:`Story` " +"オブジェクトの制御下にあり、予測することはできません。" + +#: ../../story-class.rst:98 a231e6de936e448a93e21ff261ff8b39 +msgid "" +"Images may be part of a **story**. They will be placed together with any " +"surrounding text." +msgstr "画像は **ストーリー** の一部となる可能性があります。画像は周囲のテキストと一緒に配置されます。" + +#: ../../story-class.rst:99 10d7d8aa6a1c47998853f3879c904e3a +msgid "" +"Multiple stories may - independently from each other - write to the same " +"page. For example, one may have separate stories for page header, page " +"footer, regular text, comment boxes, etc." +msgstr "複数のストーリーは、互いに独立して同じページに書き込むことができます。たとえば、ページヘッダー、ページフッター、通常のテキスト、コメントボックスなどの異なるストーリーを持つことができます。" + +#: ../../story-class.rst 0c16913df99f4028a3870cbbbd910379 +#: 104010970e1342a5a900ab45a655ed39 1d65623102054847865dc9b04f9af565 +#: 456fb972dc3a4159952dd56ac1654590 47b053df5e8a436b92de3d583ae08f39 +#: 4c8c1511d6ca4de3b1ff6c538979f186 6cb09318829e49438c6c48b4681dfc29 +#: b0223ffe432744529572c26fc11dab80 c448f77784b245c2b24ce441b2722ea0 +#: fe317c7a33064ffea7eca66586243546 +msgid "Parameters" +msgstr "パラメーター:" + +#: ../../story-class.rst:103 287c70d5ad0647e88290a1333ac631e0 +msgid "" +"HTML source code. If omitted, a basic minimum is generated (see below). " +"If provided, not a complete HTML document is needed. The in-built source " +"parser will forgive (many / most) HTML syntax errors and also accepts " +"HTML fragments like `\"Hello, World!\"`." +msgstr "" +"HTMLソースコード。省略した場合、基本的な最小限のHTMLが生成されます(以下参照)。指定した場合、完全なHTML文書は必要ありません。組み込みのソースパーサは(多くの)HTML構文エラーを許容し、また" +" `\"Hello, World!\"` のようなHTMLフラグメントも受け入れます。" + +#: ../../story-class.rst:108 63178e97fc4246158cbc7062597c4b93 +msgid "CSS source code. If provided, must contain valid CSS specifications." +msgstr "CSSソースコード。指定する場合、有効なCSS仕様を含む必要があります。" + +#: ../../story-class.rst:109 0cf5afcd55e544ad8724adfc4be7f3bb +msgid "the default text font size." +msgstr "デフォルトのテキストフォントサイズ。" + +#: ../../story-class.rst:110 049536668785410f8df0dbcb3a1feddc +msgid "" +"an :ref:`Archive` from which to load resources for rendering. Currently " +"supported resource types are images and text fonts. If omitted, the story" +" will not try to look up any such data and may thus produce incomplete " +"output. .. note:: Instead of an actual archive, valid arguments for " +"**creating** an :ref:`Archive` can also be provided -- in which case an " +"archive will temporarily be constructed. So, instead of `story = " +"pymupdf.Story(archive=pymupdf.Archive(\"myfolder\"))`, one can also " +"shorter write `story = pymupdf.Story(archive=\"myfolder\")`." +msgstr "" + +#: ../../story-class.rst:110 51aa7bb6ca3c42a58819b3c524960306 +msgid "" +"an :ref:`Archive` from which to load resources for rendering. Currently " +"supported resource types are images and text fonts. If omitted, the story" +" will not try to look up any such data and may thus produce incomplete " +"output." +msgstr "" +"レンダリングのためのリソースをロードするための :ref:`Archive` " +"。現在サポートされているリソースタイプは画像とテキストフォントです。省略した場合、ストーリーはそのようなデータを検索しようとはせず、不完全な出力を生成する可能性があります。" + +#: ../../story-class.rst:112 a1bdde42988b468c929fe9fb82452451 +msgid "" +"Instead of an actual archive, valid arguments for **creating** an " +":ref:`Archive` can also be provided -- in which case an archive will " +"temporarily be constructed. So, instead of `story = " +"pymupdf.Story(archive=pymupdf.Archive(\"myfolder\"))`, one can also " +"shorter write `story = pymupdf.Story(archive=\"myfolder\")`." +msgstr "" +"アーカイブの代わりに、:ref:`Archive` を一時的に **構築する** ための有効な引数も提供できます。したがって、`story = " +"pymupdf.Story(archive=pymupdf.Archive(\"myfolder\"))` の代わりに、`story = " +"pymupdf.Story(archive=\"myfolder\")` と短縮して記述することもできます。 " + +#: ../../story-class.rst:116 e956d2bfd28841d7a04df00b07957e4f +msgid "" +"Calculate that part of the story's content, that will fit in the provided" +" rectangle. The method maintains a pointer which part of the story's " +"content has already been written and upon the next invocation resumes " +"from that pointer's position." +msgstr "ストーリーのコンテンツのうち、指定された長方形に収まる部分を計算します。このメソッドは、ストーリーのコンテンツのどの部分が既に書き込まれたかを示すポインタを維持し、次回の呼び出し時にそのポインタの位置から再開します。" + +#: ../../story-class.rst:118 dc9976ea8ccc4d79bc216fce83ed6c9d +msgid "" +"layout the current part of the content to fit into this rectangle. This " +"must be a sub-rectangle of the page's :ref:`MediaBox`." +msgstr "" +"現在のコンテンツをこの長方形に収めるためのレイアウト。これはページの :ref:`MediaBox` " +"のサブ長方形でなければなりません。" + +#: ../../story-class.rst daaea6c19d22421da33214020526af4d +msgid "Return type" +msgstr "戻り値の型:" + +#: ../../story-class.rst 954873df303d499d861dc708a54e6c20 +msgid "Returns" +msgstr "戻り値:" + +#: ../../story-class.rst:121 4d9be8b010ca4b4b8dcc983949132f37 +msgid "" +"a bool (int) `more` and a rectangle `filled`. If `more == 0`, all content" +" of the story has been written, otherwise more is waiting to be written " +"to subsequent rectangles / pages. Rectangle `filled` is the part of " +"`where` that has actually been filled." +msgstr "" +"bool (int) `more` と、実際に `filled` 長方形を返します。`more == 0` " +"の場合、ストーリーのすべてのコンテンツが書き込まれたことを意味し、それ以外の場合、`more` " +"は次の長方形/ページに書き込むために待機しています。埋められた長方形は実際に埋められた `where` の一部です。" + +#: ../../story-class.rst:125 a836145f2a5a4a75b589eefba9b24211 +msgid "Write the content part prepared by :meth:`Story.place` to the page." +msgstr ":meth:`Story.place` によって準備されたコンテンツの一部をページに書き込みます。" + +#: ../../story-class.rst:127 152149a28a8b49acaaf7bfeadead892c +msgid "" +"the :ref:`Device` created by `dev = writer.begin_page(mediabox)`. The " +"device knows how to call all MuPDF functions needed to write the content." +msgstr "" +"`dev = writer.begin_page(mediabox)` で作成された " +":ref:`Device`。このデバイスは、コンテンツを書き込むために必要なすべてのMuPDF関数を呼び出す方法を知っています。" + +#: ../../story-class.rst:128 35b2e7568db140c9abf31a7ee3ef7f54 +msgid "" +"a matrix for transforming content when writing to the page. An example " +"may be writing rotated text. The default means no transformation (i.e. " +"the :ref:`Identity` matrix)." +msgstr "" +"ページに書き込む際にコンテンツを変形させるための行列。テキストを回転させるなどの例が考えられます。デフォルトでは変換は行われず(つまり " +":ref:`Identity` 行列)、コンテンツはそのまま書き込まれます。" + +#: ../../story-class.rst:132 e9b8bfcac05244199692b5afb6d90ed0 +msgid "" +"Let the Story provide positioning information about certain HTML elements" +" once their place on the current page has been computed - i.e. invoke " +"this method **directly after** :meth:`Story.place`." +msgstr "" +"ストーリーが現在のページ上で特定のHTML要素の配置情報を提供するようにします。つまり、:meth:`Story.place` の **直後に**" +" このメソッドを呼び出します。" + +#: ../../story-class.rst:134 e0021c2f17db417c8976fce48469b736 +msgid "" +"*Story* will pass position information to *function*. This information " +"can for example be used to generate a Table of Contents." +msgstr "*Story* は位置情報を *関数* に渡します。この情報は、目次の生成などに便利に使用できます。" + +#: ../../story-class.rst:136 27a5d6e27ace4870892e746111d4365b +msgid "" +"a Python function accepting an :class:`ElementPosition` object. It will " +"be invoked by the Story object to process positioning information. The " +"function **must** be a callable accepting exactly one argument." +msgstr "" +":class:`ElementPosition` " +"オブジェクトを受け入れるPython関数。この関数は、位置情報を処理するためにStoryオブジェクトによって呼び出されます。**関数** " +"は正確に1つの引数を受け入れるcallableである必要があります。" + +#: ../../story-class.rst:137 08bda4472e0b457faada19c08c19734e +msgid "" +"an optional dictionary with any **additional** information that should be" +" added to the :class:`ElementPosition` instance passed to `function`. " +"Like for example the current output page number. Every key in this " +"dictionary must be a string that conforms to the rules for a valid Python" +" identifier. The complete set of information is explained below." +msgstr "" +"`function` に渡される :class:`ElementPosition` インスタンスに **追加** " +"情報を追加するためのオプションの辞書。たとえば、現在の出力ページ番号などが含まれることがあります。この辞書のすべてのキーは、有効なPython識別子の規則に従う文字列である必要があります。情報の完全なセットは以下で説明されています。" + +#: ../../story-class.rst:146 98941c7d778c4d34a3d76bb76b5739dc +msgid "Rewind the story's document to the beginning for starting over its output." +msgstr "ストーリーのドキュメントを最初に戻して、出力を再開します。" + +#: ../../story-class.rst:150 2eaf3e53d3c34b20bd6b65526619e383 +msgid "" +"The :htmlTag:`body` part of the story's DOM. This attribute contains the " +":ref:`Xml` node of :htmlTag:`body`. All relevant content for PDF " +"production is contained between \"\" and \"\"." +msgstr "" +"ストーリーのDOMの :htmlTag:`body` 部分。この属性には :htmlTag:`body` の :ref:`Xml` " +"ノードが含まれています。PDFの制作に関連するすべてのコンテンツは、「」と「」の間に含まれています。" + +#: ../../story-class.rst:154 09a8579966ad4dd69d832b7cff35811b +msgid "" +"Places and draws Story to a `DocumentWriter`. Avoids the need for calling" +" code to implement a loop that calls `Story.place()` and `Story.draw()` " +"etc, at the expense of having to provide at least the `rectfn()` " +"callback." +msgstr "" +"Storyを :ref:`DocumentWriter` に配置し、描画します。これにより、`Story.place()` および " +"`Story.draw()` などを呼び出すループの実装が不要になりますが、`rectfn()` コールバックを少なくとも提供する必要があります。" + +#: ../../story-class.rst:159 5194586e662141618935bdf29a379921 +msgid "a `DocumentWriter` or None." +msgstr ":ref:`DocumentWriter` または `None`。" + +#: ../../story-class.rst:160 d62e010560f14bf1b7cd210ebf78b20c +#, fuzzy +msgid "" +"a callable taking `(rect_num: int, filled: Rect)` and returning " +"`(mediabox, rect, ctm)`: * mediabox: None or rect for new page. * rect: " +"The next rect into which content should be placed. * ctm: None or a " +"`Matrix`." +msgstr "" +"`(rect_num: int, filled: Rect)` を取り、`(mediabox, rect, ctm)` " +"を返すcallable:mediabox" + +#: ../../story-class.rst:160 ae5c5dc080b540f09ad256f10c2cf83d +msgid "" +"a callable taking `(rect_num: int, filled: Rect)` and returning " +"`(mediabox, rect, ctm)`:" +msgstr "`(rect_num: int, filled: Rect)` を取り、`(mediabox, rect, ctm)` を返すcallable:" + +#: ../../story-class.rst:163 ../../story-class.rst:204 +#: 193ce6bab9f74a3a9cf1bbeaf3b2dc88 991faab1db1a4e479714af5b2e511e0e +#, fuzzy +msgid "mediabox: None or rect for new page." +msgstr "新しいページ用の `None` またはrect。" + +#: ../../story-class.rst:164 ../../story-class.rst:205 +#: 2c523120813f4975b82ffb2efb008126 cbe16d25e8e24253b41d04316f0700a3 +#, fuzzy +msgid "rect: The next rect into which content should be placed." +msgstr "コンテンツを配置する次の矩形。" + +#: ../../story-class.rst:165 6068e757476744c2a2008edf19b650a1 +#, fuzzy +msgid "ctm: None or a `Matrix`." +msgstr "`None` または `Matrix`。" + +#: ../../story-class.rst:166 54470f3ad61e4531aca719c848ecebb4 +msgid "" +"None, or a callable taking `(position: ElementPosition)`: * position:" +" An `ElementPosition` with an extra `.page_num` member. Typically " +"called multiple times as we generate elements that are headings or have " +"an id." +msgstr "" + +#: ../../story-class.rst:166 0c1e37384309449d8a3f1aa31612af12 +#, fuzzy +msgid "None, or a callable taking `(position: ElementPosition)`:" +msgstr "" +"`None` または(ElementPosition)を取り、`(position: ElementPosition)`: " +"positionを返すcallable:" + +#: ../../story-class.rst:168 78f15911e52b455d87f6517e46edf76d +msgid "position:" +msgstr "" + +#: ../../story-class.rst:169 8ba40e35c2734c1d8c5eb72cc71c82f4 +msgid "An `ElementPosition` with an extra `.page_num` member." +msgstr "`.page_num` メンバーを持つ `ElementPosition`。" + +#: ../../story-class.rst:170 9d3b6f846744442c8f1ebd5c03e1a944 +msgid "" +"Typically called multiple times as we generate elements that are headings" +" or have an id." +msgstr "通常、見出しやIDを持つ要素を生成する際に複数回呼び出されます。" + +#: ../../story-class.rst:172 0d6deb6dd4d9405ab38dff88ad1fc98f +msgid "" +"None, or a callable taking `(page_num, mediabox, dev, after)`; called at " +"start (`after=0`) and end (`after=1`) of each page." +msgstr "" +"`None` または `(page_num, mediabox, dev, after)` " +"を取り、各ページの開始(`after=0`)および終了(`after=1`)時に呼び出されます。" + +#: ../../story-class.rst:178 a79665d597fb416d960734c83cfd6657 +msgid "" +"Static method that does iterative layout of html content to a " +"`DocumentWriter`." +msgstr "htmlコンテンツを :ref:`DocumentWriter` に対して反復的にレイアウトするための静的メソッド。" + +#: ../../story-class.rst:181 6fc706ddf1b648e68d59f87ae642ba3d +msgid "" +"For example this allows one to add a table of contents section while " +"ensuring that page numbers are patched up until stable." +msgstr "これにより、ページ番号が安定するまで目次セクションを追加したりすることができます。" + +#: ../../story-class.rst:184 624160a14d984361bd0974171f55c924 +msgid "" +"Repeatedly creates a new `Story` from `(contentfn(), user_css, em, " +"archive)` and lays it out with internal call to `Story.write()`; uses a " +"None writer and extracts the list of `ElementPosition`'s which is passed " +"to the next call of `contentfn()`." +msgstr "" +"`(contentfn()、user_css、em、archive)` から新しい `Story` " +"を繰り返し作成し、`Story.write()` への内部呼び出しでそれをレイアウトします。 `None` " +"のライターを使用し、`ElementPosition` のリストを次回の `contentfn()` 呼び出しに渡します。" + +#: ../../story-class.rst:190 4b831cab0cb845b08bcd23293dfacaff +msgid "" +"When the html from `contentfn()` becomes unchanged, we do a final " +"iteration using `writer`." +msgstr "`contentfn()` からのhtmlが変更されなくなると、`writer` を使用して最終的な反復処理を行います。" + +#: ../../story-class.rst:193 fc2b4f5717ce43d49be5f534a63440f9 +msgid "A `DocumentWriter`." +msgstr ":ref:`DocumentWriter`。" + +#: ../../story-class.rst:195 8156be2fda8e4081b5e9447ab8e0e84f +msgid "" +"A function taking a list of `ElementPositions` and returning a string " +"containing html. The returned html can depend on the list of positions, " +"for example with a table of contents near the start." +msgstr "" +"`ElementPositions` " +"のリストを取り、htmlを含む文字列を返す関数。返されるhtmlは、位置のリストに依存する場合があります。たとえば、最初の近くに目次がある場合です。" + +#: ../../story-class.rst:200 f67b52fd424e4dc68961c8310fcaa746 +#, fuzzy +msgid "" +"A callable taking `(rect_num: int, filled: Rect)` and returning " +"`(mediabox, rect, ctm)`: * mediabox: None or rect for new page. * rect: " +"The next rect into which content should be placed. * ctm: A `Matrix`." +msgstr "" +"`(rect_num: int, filled: Rect)` を取り、`(mediabox, rect, ctm)` " +"を返すcallable:mediabox" + +#: ../../story-class.rst:201 2619bfc1a11c4ae089bce95a0a0b388f +msgid "" +"A callable taking `(rect_num: int, filled: Rect)` and returning " +"`(mediabox, rect, ctm)`:" +msgstr "" + +#: ../../story-class.rst:206 893cb8063b414afaaced0a0058f0d3ab +#, fuzzy +msgid "ctm: A `Matrix`." +msgstr ":ref:`Matrix`。" + +#: ../../story-class.rst:207 fa079791f265424f89482cae4560278b +msgid "" +"None, or a callable taking `(page_num, medibox, dev, after)`; called at " +"start (`after=0`) and end (`after=1`) of each page." +msgstr "" +"`None` または `(page_num、medibox、dev、after)` " +"を取り、各ページの開始(`after=0`)および終了(`after=1`)時に呼び出されます。" + +#: ../../story-class.rst:212 1f50bce4bc934df3b5d624e1b672e8a7 +msgid "" +"If true, we add unique ids to all header tags that don't already have an " +"id. This can help automatic generation of tables of contents." +msgstr "Trueの場合、idを持たないすべての見出しタグに一意のidを追加します。これは目次の自動生成に役立ちます" + +#: ../../story-class.rst:216 4d3d89f8a84e4ddca5b6f49a19e7360f +msgid "Returns:" +msgstr "" + +#: ../../story-class.rst:217 67ed7ea6343840ce8ca2af472e723907 +msgid "None." +msgstr "" + +#: ../../story-class.rst:221 dc5ef0f369b64b56a861bebe37e7d740 +msgid "" +"Similar to `write()` except that we don't have a `writer` arg and we " +"return a PDF `Document` in which links have been created for each " +"internal html link." +msgstr "" +"`write()` に類似していますが、`writer` 引数がなく、内部のHTMLリンクごとにリンクが作成されたPDF `Document` " +"が返されます。" + +#: ../../story-class.rst:227 f60fb57103404e4487a18f27456b9ac5 +msgid "" +"Similar to `write_stabilized()` except that we don't have a `writer` arg " +"and instead return a PDF `Document` in which links have been created for " +"each internal html link." +msgstr "" +"`write_stabilized()` に類似していますが、`writer` " +"引数がなく、代わりに各内部のHTMLリンクにリンクが作成されたPDF `Document` が返されます。" + +#: ../../story-class.rst:233 06abc8ee0cef4e4998719c4d3848c318 +msgid "The result from a `Story.fit*()` method." +msgstr "" + +#: ../../story-class.rst:235 f882cf97ed5e42b69f1212bd5c0d3b3e +msgid "Members:" +msgstr "" + +#: ../../story-class.rst:237 05afdcff48b54e87a70244ac485320f6 +msgid "`big_enough`:" +msgstr "" + +#: ../../story-class.rst:238 296c3d062bc34a2f9826b3fe722423ed +msgid "`True` if the fit succeeded." +msgstr "" + +#: ../../story-class.rst:239 910160ef882e49209e123946968ad89d +msgid "`filled`:" +msgstr "" + +#: ../../story-class.rst:240 2be02fcee76042b3b1e67242108c02d8 +msgid "From the last call to `Story.place()`." +msgstr "" + +#: ../../story-class.rst:241 7f3007c064224426bdeffe2f30577d5a +msgid "`more`:" +msgstr "" + +#: ../../story-class.rst:242 14d98da06ba44fadacc3c05f8c88d916 +msgid "`False` if the fit succeeded." +msgstr "" + +#: ../../story-class.rst:243 bea5727ffc6f4a0f9b4816919098061b +msgid "`numcalls`:" +msgstr "" + +#: ../../story-class.rst:244 508bd595de3a4b6a87cf8cece9b67d36 +msgid "Number of calls made to `self.place()`." +msgstr "" + +#: ../../story-class.rst:245 0c4225d297644946b97fffd257cb3149 +#, fuzzy +msgid "`parameter`:" +msgstr "パラメーター:" + +#: ../../story-class.rst:246 a38a79d488cc413583a588ace44bad64 +msgid "The successful parameter value, or the largest failing value." +msgstr "" + +#: ../../story-class.rst:247 e5e7d478999f4d3aa75cc2e4a0bf8ed2 +msgid "`rect`:" +msgstr "" + +#: ../../story-class.rst:248 865655b3648b49e19a5b3683d5ac952c +msgid "The rect created from `parameter`." +msgstr "" + +#: ../../story-class.rst:254 ../../story-class.rst:282 +#: ../../story-class.rst:304 ../../story-class.rst:325 +#: 13889911bf6e4b4099ac3db022c63ba7 181f41eeee2248529bcc9f4180a0fd2f +#: 2a70154984d84fde9207702c6951f227 833e95a45bf24525a22b1bf7caedcfd7 +msgid "Returns a `Story.FitResult` instance." +msgstr "" + +#: ../../story-class.rst:256 9f2d407c6f7447a384ccc54acb6bbb6d +msgid "" +"On success, the last call to `self.place()` will have been with the " +"returned rectangle, so `self.draw()` can be used directly." +msgstr "" + +#: ../../story-class.rst:259 723d3c893ffb40979529d0c13998947c +msgid "" +"A callable taking a floating point `parameter` and returning a " +"`pymupdf.Rect()`. If the rect is empty, we assume the story will not fit " +"and do not call `self.place()`. Must guarantee that `self.place()` " +"behaves monotonically when given rect `fn(parameter`) as `parameter` " +"increases. This usually means that both width and height increase or stay" +" unchanged as `parameter` increases." +msgstr "" + +#: ../../story-class.rst:260 9507ed4935a0463eb2d74e2300c6e45c +msgid "" +"A callable taking a floating point `parameter` and returning a " +"`pymupdf.Rect()`. If the rect is empty, we assume the story will not fit " +"and do not call `self.place()`." +msgstr "" + +#: ../../story-class.rst:264 b8b210f2827f4137a23a52bb43be5a7d +msgid "" +"Must guarantee that `self.place()` behaves monotonically when given rect " +"`fn(parameter`) as `parameter` increases. This usually means that both " +"width and height increase or stay unchanged as `parameter` increases." +msgstr "" + +#: ../../story-class.rst:268 a4dba85757184a2f88067b0d0bc2a5ac +msgid "Minimum parameter to consider; `None` for -infinity." +msgstr "" + +#: ../../story-class.rst:270 c3041af96355455e9dd9c1d8b9001d02 +msgid "Maximum parameter to consider; `None` for +infinity." +msgstr "" + +#: ../../story-class.rst:272 50ba04ced3904450bbb765d580667a8a +msgid "Maximum error in returned `parameter`." +msgstr "" + +#: ../../story-class.rst:274 ../../story-class.rst:295 +#: ../../story-class.rst:317 ../../story-class.rst:338 +#: 4560cdaee7304e68af9c9515d9baeeab 853d4b4eccc64445b4dadae224277597 +#: 9a97ca12c0fd4e4cad25cd646c9fd3a1 c27eb2778a574ddd9cdbb5da9e952da1 +msgid "If true we output diagnostics." +msgstr "" + +#: ../../story-class.rst:279 4dca0fa3d9dc4d74a72be9a25b3042f5 +msgid "" +"Finds smallest value `scale` in range `scale_min..scale_max` where `scale" +" * rect` is large enough to contain the story `self`." +msgstr "" + +#: ../../story-class.rst:284 ../../story-class.rst:306 +#: 3e56c24499df4c63acde4cd29372549c c1836f4303104884b7f75d1c7d7f4c18 +msgid "width of rect." +msgstr "" + +#: ../../story-class.rst:286 ../../story-class.rst:327 +#: 599c4470f82444f093e40776f0fc3088 d107736639a3470a838f7ae926a0c493 +msgid "height of rect." +msgstr "" + +#: ../../story-class.rst:288 192ebb0b7d2c4255ae07cda187e1c04a +msgid "Minimum scale to consider; must be >= 0." +msgstr "" + +#: ../../story-class.rst:290 9becc562a2894913ac6b78b33fa618a8 +msgid "Maximum scale to consider, must be >= scale_min or `None` for infinite." +msgstr "" + +#: ../../story-class.rst:293 b80e9c9c5a024ab9b415c43f695db382 +msgid "Maximum error in returned scale." +msgstr "" + +#: ../../story-class.rst:300 20b042fb975b4ef38c6a275f58290800 +msgid "" +"Finds smallest height in range `height_min..height_max` where a rect with" +" size `(width, height)` is large enough to contain the story `self`." +msgstr "" + +#: ../../story-class.rst:308 de3278b094bd4069a6ade29bab864b2e +msgid "Minimum height to consider; must be >= 0." +msgstr "" + +#: ../../story-class.rst:310 d890a80ccfbc4767b155bf963080574b +msgid "Maximum height to consider, must be >= height_min or `None` for infinite." +msgstr "" + +#: ../../story-class.rst:313 ../../story-class.rst:334 +#: 345fc763c90942b3af9314cb53d3635c fb1f9d71eaf340dfb2d14dff3ccbaf8d +msgid "`(x0, y0)` of rect." +msgstr "" + +#: ../../story-class.rst:315 05ba2969245642aa9c5df4b5faa1c1c7 +msgid "Maximum error in returned height." +msgstr "" + +#: ../../story-class.rst:322 2c063f96e3d24728acc052323d414f48 +msgid "" +"Finds smallest width in range `width_min..width_max` where a rect with " +"size `(width, height)` is large enough to contain the story `self`." +msgstr "" + +#: ../../story-class.rst:329 755a76f8bcad4f9c9b17627fb2dd2b92 +msgid "Minimum width to consider; must be >= 0." +msgstr "" + +#: ../../story-class.rst:331 4a9bc91b4df64e0aae622c2fc204c366 +msgid "Maximum width to consider, must be >= width_min or `None` for infinite." +msgstr "" + +#: ../../story-class.rst:336 a4d66b18a35f42759e36401ac18b5919 +msgid "Maximum error in returned width." +msgstr "" + +#: ../../story-class.rst:343 d2d8c9c232984e92ab30e2dc905f7f7a +msgid "Element Positioning CallBack function" +msgstr "要素位置コールバック関数" + +#: ../../story-class.rst:345 a81639ecc64041979b5a57ccef510f75 +msgid "" +"The callback function can be used to log information about story output. " +"The function's access to the information is read-only: it has no way to " +"influence the story's output." +msgstr "コールバック関数は、ストーリーの出力に関する情報を記録するために使用できます。この関数は情報への読み取り専用アクセスを持ち、ストーリーの出力に影響を与える方法はありません。" + +#: ../../story-class.rst:347 d63a34b1197048c0a8e7128565d99b11 +msgid "" +"A typical loop for executing a story with using this method would look " +"like this::" +msgstr "このメソッドを使用してストーリーを実行する典型的なループは次のようになります:" + +#: ../../story-class.rst:379 022c24b9ecb142e4b995435e43d0abba +msgid "Attributes of the ElementPosition class" +msgstr "ElementPosition クラスの属性" + +#: ../../story-class.rst:380 eb9fd926dd144458ad966437c52bc9b3 +msgid "" +"Exactly one parameter must be passed to the function provided by " +":meth:`Story.element_positions`. It is an object with the following " +"attributes:" +msgstr ":meth:`Story.element_positions` で提供される関数に渡すパラメータは、次の属性を持つオブジェクトである必要があります。" + +#: ../../story-class.rst:382 51f35390ddf549c49309c8e41b08fc0d +msgid "" +"The parameter passed to the `recorder` function is an object with the " +"following attributes:" +msgstr "`recorder` 関数に渡されるパラメータは、次の属性を持つオブジェクトです。" + +#: ../../story-class.rst:384 ea7a7e801d004da2946b291daf693c79 +msgid "`elpos.depth` (int) -- depth of this element in the box structure." +msgstr "`elpos.depth` (int)– ボックス構造内でのこの要素の深さ。" + +#: ../../story-class.rst:386 eedd651708ad4d08ab6358db8492fe86 +msgid "" +"`elpos.heading` (int) -- the header level, 0 if no header, 1-6 for " +":htmlTag:`h1` - :htmlTag:`h6`." +msgstr "" +"`elpos.heading` (int)– ヘッダーレベル、ヘッダーがない場合は0、 :htmlTag:`h1` - :htmlTag:`h6`" +" に対して1-6。" + +#: ../../story-class.rst:388 58c1d6c18dbc4defa9fdccf70fb9f5a1 +#, fuzzy +msgid "" +"`elpos.href` (str) -- value of the `href` attribute, or None if not " +"defined." +msgstr "`elpos.href` (str)– `href` 属性の値、または未定義の場合は `None`。" + +#: ../../story-class.rst:390 085ae77bd6a0482e9bfed8909e66cc63 +msgid "`elpos.id` (str) -- value of the `id` attribute, or None if not defined." +msgstr "`elpos.id` (str)– `id` 属性の値、または未定義の場合は `None`。" + +#: ../../story-class.rst:392 6148773811f84de3837f6e84482b5a6f +msgid "`elpos.rect` (tuple) -- element position on page." +msgstr "`elpos.rect` (tuple)– ページ上の要素の位置。" + +#: ../../story-class.rst:394 e46b041d8944429f8ec58070b71170c3 +msgid "`elpos.text` (str) -- immediate text of the element." +msgstr "`elpos.text` (str)– 要素の直接のテキスト。" + +#: ../../story-class.rst:396 10ae79dfd5114378b05c394821db2139 +msgid "" +"`elpos.open_close` (int bit field) -- bit 0 set: opens element, bit 1 " +"set: closes element. Relevant for elements that may contain other " +"elements and thus may not immediately be closed after being created / " +"opened." +msgstr "" +"`elpos.open_close` (int ビットフィールド)– " +"ビット0がセットされている場合、要素を開く。ビット1がセットされている場合、要素を閉じる。他の要素を含む可能性がある要素に対して、即座に作成/開始された後にすぐに閉じられない要素に関連します。" + +#: ../../story-class.rst:398 d06166cffa24405cbbc118345b121dc7 +msgid "`elpos.rect_num` (int) -- count of rectangles filled by the story so far." +msgstr "`elpos.rect_num` (int)– これまでにストーリーで埋められた長方形の数。" + +#: ../../story-class.rst:400 1559da9c2ea247b99415efc378836399 +msgid "" +"`elpos.page_num` (int) -- page number; only present when using " +"`pymupdf.Story.write*()` functions." +msgstr "`elpos.page_num` (int)– ページ番号。`pymupdf.Story.write*()` 関数を使用する場合にのみ存在します。" + +#: ../../footer.rst:60 c25ec66bf75249a58ca8ecc2e8d0184a +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "a callable taking `(rect_num: int, " +#~ "filled: Rect)` and returning `(mediabox, " +#~ "rect, ctm)`: mediabox: None or " +#~ "rect for new page. rect: The" +#~ " next rect into which content should" +#~ " be placed. ctm: None or " +#~ "a `Matrix`." +#~ msgstr "" + +#~ msgid "mediabox:" +#~ msgstr "" + +#~ msgid "rect:" +#~ msgstr "" + +#~ msgid "ctm:" +#~ msgstr "" + +#~ msgid "" +#~ "None, or a callable taking `(position:" +#~ " ElementPosition)`: position: An " +#~ "`ElementPosition` with an extra `.page_num`" +#~ " member. Typically called multiple times" +#~ " as we generate elements that are " +#~ "headings or have an id." +#~ msgstr "" + +#~ msgid "." +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/supported-files-table.mo b/docs/locales/ja/LC_MESSAGES/supported-files-table.mo new file mode 100644 index 000000000..7f5c93dd1 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/supported-files-table.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/supported-files-table.po b/docs/locales/ja/LC_MESSAGES/supported-files-table.po new file mode 100644 index 000000000..64ff90887 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/supported-files-table.po @@ -0,0 +1,20 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2024, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2024. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.8\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2023-12-21 13:53+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" diff --git a/docs/locales/ja/LC_MESSAGES/textpage.mo b/docs/locales/ja/LC_MESSAGES/textpage.mo new file mode 100644 index 000000000..02de5f0a6 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/textpage.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/textpage.po b/docs/locales/ja/LC_MESSAGES/textpage.po new file mode 100644 index 000000000..0329e6238 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/textpage.po @@ -0,0 +1,1333 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 9ca6145cb97145af9cdf5ef76dc0de3c +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 0600b90839df40e9ac23cb738c57c29a +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 1f6d63ced97943c9b613b299ec031e9c +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../textpage.rst:7 9966d819027e453abbb1adeb4d1b2bfb +msgid "TextPage" +msgstr "TextPage (テキストページ)" + +#: ../../textpage.rst:9 8297e05841c84513ba479147993e07bc +msgid "" +"This class represents text and images shown on a document page. All " +":ref:`MuPDF document types` are supported." +msgstr "" +"このクラスは、文書ページに表示されるテキストと画像を表します。すべての :ref:`MuPDF document " +"types` がサポートされています。" + +#: ../../textpage.rst:11 305a69e6d1cc4efe929bb3b1e01c338f +msgid "" +"The usual ways to create a textpage are :meth:`DisplayList.get_textpage` " +"and :meth:`Page.get_textpage`. Because there is a limited set of methods " +"in this class, there exist wrappers in :ref:`Page` which are handier to " +"use. The last column of this table shows these corresponding :ref:`Page` " +"methods." +msgstr "" +"テキストページを作成する通常の方法は、:meth:`DisplayList.get_textpage` および " +":meth:`Page.get_textpage` です。このクラスにはメソッドのセットが制限されているため、:ref:`Page` " +"内にはより使いやすいラッパーが存在します。この表の最後の列には、対応する :ref:`Page` メソッドが示されています。" + +#: ../../textpage.rst:13 b28048eebfef4686a5b9cce8aa9a9242 +msgid "For a description of what this class is all about, see Appendix 2." +msgstr "このクラスに関する詳細な説明については、付録2を参照してください。" + +#: ../../textpage.rst:16 69b717897dfc419ea428dbd7a0de9662 +msgid "**Method**" +msgstr "**メソッド**" + +#: ../../textpage.rst:16 b015eb61d04f419e8b7fbb496471574b +msgid "**Description**" +msgstr "**説明**" + +#: ../../textpage.rst:16 a44519ebf5904319bc4a59d55cfd8e73 +msgid "page get_text or search method" +msgstr "page get_textまたはsearchメソッド" + +#: ../../textpage.rst:18 8008e72f7f374921bfa7c04521f8e0c3 +msgid ":meth:`~.extractText`" +msgstr "" + +#: ../../textpage.rst:18 77846738c3614d43b519d0e00d3d96a4 +msgid "extract plain text" +msgstr "プレーンテキストを抽出します" + +#: ../../textpage.rst:18 ../../textpage.rst:19 3f6e2c97c46146adb7c925645b823ddd +#: d689ad9d5aaa4bdb8029dd8b22b70b0f +msgid "\"text\"" +msgstr "" + +#: ../../textpage.rst:19 b33cc2a2a3ef4e318acbfd19152b414b +msgid ":meth:`~.extractTEXT`" +msgstr "" + +#: ../../textpage.rst:19 c5c23844e38a4a5e8d5bde717d12f1f6 +msgid "synonym of previous" +msgstr "前述の同義語" + +#: ../../textpage.rst:20 d0ed007250bf40e39cec0ea8758af8da +msgid ":meth:`~.extractBLOCKS`" +msgstr "" + +#: ../../textpage.rst:20 814dcd360d82450599f24cdfbdef17c5 +msgid "plain text grouped in blocks" +msgstr "ブロックにグループ化されたプレーンテキスト" + +#: ../../textpage.rst:20 629bdb6414654b9f9cb94ce40354164c +msgid "\"blocks\"" +msgstr "" + +#: ../../textpage.rst:21 3ddf85d5fb49495cbea91a11e1607ae2 +msgid ":meth:`~.extractWORDS`" +msgstr "" + +#: ../../textpage.rst:21 edfdc32c2fc74cd68e555cad8e96868b +msgid "all words with their bbox" +msgstr "すべての単語とそのバウンディングボックス" + +#: ../../textpage.rst:21 4bc95410c6ee4ff2bd0b600ecf44f4b9 +msgid "\"words\"" +msgstr "" + +#: ../../textpage.rst:22 07c485e7d0f7479faa4a87fd71b2f5e5 +msgid ":meth:`~.extractHTML`" +msgstr "" + +#: ../../textpage.rst:22 6d2468734a6640d2bb0382734ebd79e1 +msgid "page content in HTML format" +msgstr "HTML形式のページコンテンツ" + +#: ../../textpage.rst:22 2adb8031ce96459cb3f20269010cca59 +msgid "\"html\"" +msgstr "" + +#: ../../textpage.rst:23 f5c7d3a6ff554e678c0b20e089c51a5b +msgid ":meth:`~.extractXHTML`" +msgstr "" + +#: ../../textpage.rst:23 2be43ebbad204613afa2dac47ec43dab +msgid "page content in XHTML format" +msgstr "XHTML形式のページコンテンツ" + +#: ../../textpage.rst:23 f622b44d00444653961d91b40c500a36 +msgid "\"xhtml\"" +msgstr "" + +#: ../../textpage.rst:24 73a3a84cb7044222b3e3d3f13e95e717 +msgid ":meth:`~.extractXML`" +msgstr "" + +#: ../../textpage.rst:24 275a3c930f7d49e28dd68902c6f67f1b +msgid "page text in XML format" +msgstr "XML形式のページテキスト" + +#: ../../textpage.rst:24 683535736405464a8c545be3161fc1b3 +msgid "\"xml\"" +msgstr "" + +#: ../../textpage.rst:25 e682781270e94299b8ecc57e7673923a +msgid ":meth:`~.extractDICT`" +msgstr "" + +#: ../../textpage.rst:25 ../../textpage.rst:27 5f983ba8bbcd49e6a3f48d06600fbc5f +#: 7cd7d12c082140efb13d14acb816f7fe +msgid "page content in *dict* format" +msgstr "*辞書* 形式のページコンテンツ" + +#: ../../textpage.rst:25 28f2f9408d05410e837a1fe3c511b4b2 +msgid "\"dict\"" +msgstr "" + +#: ../../textpage.rst:26 60dc6c6e5d1842348ed55e5d047da848 +msgid ":meth:`~.extractJSON`" +msgstr "" + +#: ../../textpage.rst:26 ../../textpage.rst:28 6c6f2c4c234d4bc796aef69138586877 +#: 8bba15457a094d60b34ebe13bf920b39 +msgid "page content in JSON format" +msgstr "JSON形式のページコンテンツ" + +#: ../../textpage.rst:26 80dc933b5e3a4420bd0d3a08d3d10d9e +msgid "\"json\"" +msgstr "" + +#: ../../textpage.rst:27 dcdb9d6944b54bcba9fa189b6c7f52cb +msgid ":meth:`~.extractRAWDICT`" +msgstr "" + +#: ../../textpage.rst:27 49c8d6da08d144e9a3442f06e332a627 +msgid "\"rawdict\"" +msgstr "" + +#: ../../textpage.rst:28 53dbbd0bbeab41d1920c1ffa81f80296 +msgid ":meth:`~.extractRAWJSON`" +msgstr "" + +#: ../../textpage.rst:28 b0c2313e501d45a384644ecae53cbd1d +msgid "\"rawjson\"" +msgstr "" + +#: ../../textpage.rst:29 4560d07e61fa48dfa46c970d50b0743e +msgid ":meth:`~.search`" +msgstr "" + +#: ../../textpage.rst:29 01146efdbcf54aaba30cc1173162f210 +msgid "Search for a string in the page" +msgstr "ページ内の文字列を検索します" + +#: ../../textpage.rst:29 f35dabe2e99843cca4604961ee9dffde +msgid ":meth:`Page.search_for`" +msgstr "" + +#: ../../textpage.rst:32 8aee7016e26240e2bfa9d18be19a98d5 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../textpage.rst:40 cd8df8359f9f4524a39869a7107a4368 +msgid "" +"Return a string of the page's complete text. The text is UTF-8 unicode " +"and in the same sequence as specified at the time of document creation." +msgstr "ページの完全なテキストの文字列を返します。テキストはUTF-8ユニコードで、文書作成時に指定された順序と同じです。" + +#: ../../textpage.rst 57da01987a7843fca5d739690f83f82a +#: 671578e8ecf845f083967ff38478d427 7e7dcf3e596f4f5ebc9039838dc9ebba +#: 8a8b6b3ca4e0408dbd33f8640e4ac26e ad6e9346d9d44984b434be3a8cd0b0a2 +#: d50290b47cbc4fc3b7fbf02c24036796 dc4e99ebeaaf4e068ef7cf6bc5915ac6 +msgid "Parameters" +msgstr "" + +#: ../../textpage.rst:42 ../../textpage.rst:85 ../../textpage.rst:93 +#: ../../textpage.rst:113 ../../textpage.rst:121 +#: 16fb4ac02785457b9710449f9df41f14 23ee8f97b7184497ad7bc905b9b13624 +#: 40321a9de1394c2199c23ff76e4ab458 89f2e9b0fea144eca048813faba0eeaa +#: b003361c0ff648a7b8e6b841e0e05999 +msgid "" +"(new in v1.19.1) sort the output by vertical, then horizontal " +"coordinates. In many cases, this should suffice to generate a \"natural\"" +" reading order." +msgstr "" + +#: ../../textpage.rst 0016111872404e169cbf1939b7c7d6a8 +#: 0998a86461f24855ab3f792dded2227c 49da96effe14424886d87bb9d6621b29 +#: 6a93913ee78c40f894b9df8f0547aa11 717ee5c982c24238844eda2ac1ee1a99 +#: 80a094079a8f42f38a882326261f71a6 8142ec710040424cb21597ecea1e53d9 +#: 96346247583745628dac24baa4a6639e a4150f10a5c0406a8df2d01a211754ad +#: af5da724d13e4f48991e381d99a03fdf ea64c177370c41e5884e95cf392dde1d +msgid "Return type" +msgstr "" + +#: ../../textpage.rst:49 6b33d35e93314ecfa37fa95c710500db +msgid "" +"Textpage content as a list of text lines grouped by block. Each list " +"items looks like this::" +msgstr "テキストページの内容を、ブロックごとにグループ化されたテキスト行のリストとして返します。各リストアイテムは次のようになります::" + +#: ../../textpage.rst:53 cfb5ee966a994a11a216e249dc6caadf +msgid "" +"The first four entries are the block's bbox coordinates, *block_type* is " +"1 for an image block, 0 for text. *block_no* is the block sequence " +"number. Multiple text lines are joined via line breaks." +msgstr "" +"最初の4つのエントリはブロックのbbox座標で、*ブblock_type* は画像ブロックの場合は1、テキストの場合は0です。*block_no*" +" はブロックのシーケンス番号です。複数のテキスト行は改行で結合されます。" + +#: ../../textpage.rst:55 ca9405c070eb4a92877a2ab6f30a3e12 +msgid "" +"For an image block, its bbox and a text line with some image meta " +"information is included -- **not the image content**." +msgstr "画像ブロックの場合、そのbboxと画像メタ情報を含むテキスト行が含まれますが、**画像の内容は含まれません**。" + +#: ../../textpage.rst:57 7e94ebbeaa0e491787ab47eb385add63 +msgid "" +"This is a high-speed method with just enough information to output plain " +"text in desired reading sequence." +msgstr "これは、必要な読み取り順序でプレーンテキストを出力するのに十分な情報を持つ高速なメソッドです。" + +#: ../../textpage.rst:63 dd5a25a964b24fd19590783019182acc +msgid "Changed in v1.23.5: added `delimiters` parameter" +msgstr "" + +#: ../../textpage.rst:65 7340df397af44343b4d15f029ae9bcfb +msgid "" +"Textpage content as a list of single words with bbox information. An item" +" of this list looks like this::" +msgstr "テキストページの内容を、bbox情報を持つ単語のリストとして返します。このリストのアイテムは次のようになります::" + +#: ../../textpage.rst:69 c6d5a4cd67194e5199a675aa248f0bd3 +msgid "" +"(new in v1.23.5) use these characters as *additional* word separators. By" +" default, all white spaces (including the non-breaking space `0xA0`) " +"indicate start and end of a word. Now you can specify more characters " +"causing this. For instance, the default will return " +"`\"john.doe@outlook.com\"` as **one** word. If you specify " +"`delimiters=\"@.\"` then the **four** words `\"john\"`, `\"doe\"`, " +"`\"outlook\"`, `\"com\"` will be returned. Other possible uses include " +"ignoring punctuation characters `delimiters=string.punctuation`. The " +"\"word\" strings will not contain any delimiting character." +msgstr "" + +#: ../../textpage.rst:71 7af81f97fcc2435ab536aebd6b7f34ab +#, fuzzy +msgid "" +"This is a high-speed method which e.g. allows extracting text from within" +" given areas or recovering the text reading sequence." +msgstr "" +"スペースで区切られたすべてが *「単語」* " +"として扱われます。これは、指定された領域内からテキストを抽出したり、テキストの読み取り順序を復元したりするのに適した高速なメソッドです。" + +#: ../../textpage.rst:77 1d2e06d47e9841048613550acba8bf61 +msgid "" +"Textpage content as a string in HTML format. This version contains " +"complete formatting and positioning information. Images are included " +"(encoded as base64 strings). You need an HTML package to interpret the " +"output in Python. Your internet browser should be able to adequately " +"display this information, but see :ref:`HTMLQuality`." +msgstr "" +"HTML形式の文字列としてのテキストページの内容を返します。このバージョンには完全なフォーマットと位置情報が含まれています。画像も含まれており(base64文字列としてエンコードされています)、Pythonで出力を解釈するためにはHTMLパッケージが必要です。インターネットブラウザはこの情報を適切に表示できるはずですが、:ref:`HTMLQuality`" +" を参照してください。" + +#: ../../textpage.rst:83 282b1d017a6e4a31ba6b7975680e0614 +msgid "" +"Textpage content as a Python dictionary. Provides same information detail" +" as HTML. See below for the structure." +msgstr "TextPageの内容をPythonの辞書として取得します。HTMLと同じ情報の詳細を提供します。構造については以下を参照してください。" + +#: ../../textpage.rst:91 da83463d890f4b8fa66eafaf62eea27e +msgid "" +"Textpage content as a JSON string. Created by " +"`json.dumps(TextPage.extractDICT())`. It is included for backlevel " +"compatibility. You will probably use this method ever only for outputting" +" the result to some file. The method detects binary image data and " +"converts them to base64 encoded strings." +msgstr "" +"TextPageの内容をJSON形式の文字列として取得します。`json.dumps(TextPage.extractDICT())` " +"によって作成されます。これは過去の互換性のために含まれています。おそらく、結果をファイルに出力する際にのみこのメソッドを使用するでしょう。このメソッドはバイナリ画像データを検出し、それらをBase64エンコードされた文字列に変換します。" + +#: ../../textpage.rst:99 df3e01a91f7c4ae790499d50c3e85f29 +msgid "" +"Textpage content as a string in XHTML format. Text information detail is " +"comparable with :meth:`extractTEXT`, but also contains images (base64 " +"encoded). This method makes no attempt to re-create the original visual " +"appearance." +msgstr "" +"TextPageの内容をXHTML形式の文字列として取得します。テキスト情報の詳細は :meth:`extractTEXT` " +"と比較できますが、画像も含まれます(Base64エンコードされています)。このメソッドは元の視覚的な外観を再作成しようとしません。" + +#: ../../textpage.rst:105 435edffbc686457dba05b981881c25a0 +msgid "" +"Textpage content as a string in XML format. This contains complete " +"formatting information about every single character on the page: font, " +"size, line, paragraph, location, color, etc. Contains no images. You need" +" an XML package to interpret the output in Python." +msgstr "TextPageの内容をXML形式の文字列として取得します。これにはページ上の各文字に関する完全なフォーマット情報が含まれています:フォント、サイズ、行、段落、位置、色など。画像は含まれていません。Pythonで出力を解釈するにはXMLパッケージが必要です。" + +#: ../../textpage.rst:111 87bd5187b0f44f89ab4104b82f56e019 +msgid "" +"Textpage content as a Python dictionary -- technically similar to " +":meth:`extractDICT`, and it contains that information as a subset " +"(including any images). It provides additional detail down to each " +"character, which makes using XML obsolete in many cases. See below for " +"the structure." +msgstr "" +"TextPageの内容をPythonの辞書として取得します。技術的には :meth:`extractDICT` " +"と類似しており、その情報をサブセットとして含みます(画像も含まれます)。詳細については以下を参照してください。" + +#: ../../textpage.rst:119 895cfa706afc446084e720ac3f93fd03 +msgid "" +"Textpage content as a JSON string. Created by " +"`json.dumps(TextPage.extractRAWDICT())`. You will probably use this " +"method ever only for outputting the result to some file. The method " +"detects binary image data and converts them to base64 encoded strings." +msgstr "" +"TextPageの内容をJSON形式の文字列として取得します。`json.dumps(TextPage.extractRAWDICT())` " +"によって作成されます。おそらく、結果をファイルに出力する際にのみこのメソッドを使用するでしょう。このメソッドはバイナリ画像データを検出し、それらをBase64エンコードされた文字列に変換します。" + +#: ../../textpage.rst:127 ee07f7cf801a4f8d95d545ad5bfc0560 +msgid "Changed in v1.18.2" +msgstr "変更内容 v1.18.2" + +#: ../../textpage.rst:129 4cd763790da84021813e300e32cb9cf6 +msgid "Search for *string* and return a list of found locations." +msgstr "*文字列* を検索し、見つかった位置のリストを返します。" + +#: ../../textpage.rst:131 cdac1bdde5e244499332db6777276c69 +msgid "" +"the string to search for. Upper and lower cases will all match if needle " +"consists of ASCII letters only -- it does not yet work for \"Ä\" versus " +"\"ä\", etc." +msgstr "" +"検索対象の文字列。ASCII文字だけで構成されている場合、大文字と小文字はすべて一致します。これはまだ、例えば \"Ä\" と \"ä\" " +"のような場合には機能しません。" + +#: ../../textpage.rst:132 4453db069b674068b294e21ec53118c5 +msgid "return quadrilaterals instead of rectangles." +msgstr "長方形の代わりに四辺形を返すかどうか。" + +#: ../../textpage.rst ed3dabd5dd09480d9af2c202991f0797 +msgid "Returns" +msgstr "" + +#: ../../textpage.rst:134 a9e6d6c3c1b244359142004ee7c7a2f6 +msgid "" +"a list of :ref:`Rect` or :ref:`Quad` objects, each surrounding a found " +"*needle* occurrence. As the search string may contain spaces, its parts " +"may be found on different lines. In this case, more than one rectangle " +"(resp. quadrilateral) are returned. **(Changed in v1.18.2)** The method " +"**now supports dehyphenation**, so it will find e.g. \"method\", even if " +"it was hyphenated in two parts \"meth-\" and \"od\" across two lines. The" +" two returned rectangles will contain \"meth\" (no hyphen) and \"od\"." +msgstr "" +"見つかった *needle* の出現箇所を囲む :ref:`Rect` または :ref:`Quad` " +"オブジェクトのリスト。検索文字列にはスペースが含まれる可能性があるため、その部分が異なる行に存在することがあります。この場合、複数の長方形(または四辺形)が返されます" +" **(v1.18.2 で変更)** 。このメソッドは **今ではハイフネーションに対応しており** 、例えば \"method\" が " +"\"meth-\" と \"od\" という2つの部分に分かれている場合でも、\"meth\"(ハイフンなし)と \"od\" " +"の2つの長方形が含まれます。" + +#: ../../textpage.rst:136 1406308a92e44433808a1f565e8f7e85 +msgid "**Overview of changes in v1.18.2:**" +msgstr "**v1.18.2 での変更内容の概要:**" + +#: ../../textpage.rst:138 eb8f8e4f7f4c49249c64dce6f62f72f7 +msgid "The `hit_max` parameter has been removed: all hits are always returned." +msgstr "`hit_max` パラメータが削除され、すべてのヒットが常に返されます。" + +#: ../../textpage.rst:139 aff4eacc24194b39abe53618b30ac986 +msgid "" +"The `rect` parameter of the :ref:`TextPage` is now respected: only text " +"inside this area is examined. Only characters with fully contained bboxes" +" are considered. The wrapper method :meth:`Page.search_for` " +"correspondingly supports a *clip* parameter." +msgstr "" +":ref:`TextPage` の `rect` パラメータが尊重され、この領域内のテキストのみが検査されます。完全に含まれる bboxes " +"を持つ文字のみが考慮されます。:meth:`Page.search_for` のラッパーメソッドも対応するクリップパラメータをサポートしています。" + +#: ../../textpage.rst:140 3d50c7047c7644298736ada65850b843 +msgid "**Hyphenated words** are now found." +msgstr "**ハイフネーションされた単語** も見つかります。" + +#: ../../textpage.rst:141 6323511b08df43c8a157fa38e3e750e8 +msgid "" +"**Overlapping rectangles** in the same line are now automatically joined." +" We assume that such separations are an artifact created by multiple " +"marked content groups, containing parts of the same search needle." +msgstr "" +"同じ行内の **重複する長方形** " +"は自動的に結合されます。このような分割は、同じ検索対象の一部を含む複数のマークコンテンツグループによって作成されたアーティファクトと仮定されます。" + +#: ../../textpage.rst:143 14e7f89d3d5c4489a8cbd68df8e257ec +msgid "" +"Example Quad versus Rect: when searching for needle \"pymupdf\", then the" +" corresponding entry will either be the blue rectangle, or, if *quads* " +"was specified, the quad *Quad(ul, ur, ll, lr)*." +msgstr "" +"例:Quad 対 Rect:needle \"pymupdf\" を検索する場合、対応するエントリは青い長方形であるか、*quads* " +"が指定された場合は四辺形 *Quad(ul, ur, ll, lr)* になります。" + +#: ../../textpage.rst:149 3a79ec0fc7fe4ccebd68324e457093ac +msgid "" +"The rectangle associated with the text page. This either equals the " +"rectangle of the creating page or the `clip` parameter of " +":meth:`Page.get_textpage` and text extraction / searching methods." +msgstr "" +"テキストページに関連付けられた矩形。これは、作成ページの矩形または :meth:`Page.get_textpage` " +"およびテキスト抽出/検索メソッドの `clip` パラメーターと一致する場合があります。" + +#: ../../textpage.rst:151 1bfbbebaedbb4052bb3fe3b6dd9c243a +msgid "" +"The output of text searching and most text extractions **is restricted to" +" this rectangle**. (X)HTML and XML output will however always extract the" +" full page." +msgstr "" +"テキスト検索およびほとんどのテキスト抽出の出力は " +"**この矩形に制限されます**。ただし、(X)HTMLおよびXMLの出力は常にフルページを抽出します。" + +#: ../../textpage.rst:156 b141c7f9a7cb42d4a4fcdcd03e611923 +msgid "Structure of Dictionary Outputs" +msgstr "辞書出力の構造" + +#: ../../textpage.rst:157 0ab389a241664dee85374659702e3186 +msgid "" +"Methods :meth:`TextPage.extractDICT`, :meth:`TextPage.extractJSON`, " +":meth:`TextPage.extractRAWDICT`, and :meth:`TextPage.extractRAWJSON` " +"return dictionaries, containing the page's text and image content. The " +"dictionary structures of all four methods are almost equal. They strive " +"to map the text page's information hierarchy of blocks, lines, spans and " +"characters as precisely as possible, by representing each of these by its" +" own sub-dictionary:" +msgstr "" +":meth:`TextPage.extractDICT`、:meth:`TextPage.extractJSON` " +"、:meth:`TextPage.extractRAWDICT` 、:meth:`TextPage.extractRAWJSON` " +"は、ページのテキストおよび画像コンテンツを含む辞書を返します。これらの4つのメソッドの辞書構造はほぼ同じです。これらは、ブロック、行、スパン、文字の情報階層をテキストページにできるだけ正確にマップし、各要素を独自のサブ辞書で表現することを目指しています。" + +#: ../../textpage.rst:159 ac8012d470e1498fbf322b84979d54fe +msgid "A **page** consists of a list of **block dictionaries**." +msgstr "**ページ** は **ブロック辞書** のリストから構成されます。" + +#: ../../textpage.rst:160 b669893b4d0d43018ce1ae7cf03c0cc4 +msgid "A (text) **block** consists of a list of **line dictionaries**." +msgstr "(テキスト) **ブロック** は、**行辞書** のリストから構成されます。" + +#: ../../textpage.rst:161 fadd7e5c79ff4ef7bb836f45bfa54d3e +msgid "A **line** consists of a list of **span dictionaries**." +msgstr "**行** は、**スパン辞書** のリストから構成されます。" + +#: ../../textpage.rst:162 36c8f65fcf1d4c619d438b8f01e187a7 +msgid "" +"A **span** either consists of the text itself or, for the RAW variants, a" +" list of **character dictionaries**." +msgstr "**スパン** は、テキスト自体またはRAWバリアントの場合、**文字辞書** のリストから構成されます。" + +#: ../../textpage.rst:163 ed3f04f51bcd4c31b3e3dd7fd03320c0 +msgid "" +"RAW variants: a **character** is a dictionary of its origin, bbox and " +"unicode." +msgstr "RAWバリアント:**文字** はその起源、bbox、およびUnicodeの辞書です。" + +#: ../../textpage.rst:165 495b9d336f004a809e9f71f332f21699 +msgid "" +"All PyMuPDF geometry objects herein (points, rectangles, matrices) are " +"represented by there **\"like\"** formats: a :data:`rect_like` *tuple* is" +" used instead of a :ref:`Rect`, etc. The reasons for this are performance" +" and memory considerations:" +msgstr "" +"ここでのすべてのPyMuPDFジオメトリオブジェクト(ポイント、矩形、行列)は、:data:`rect_like` *タプル* の形式で " +":ref:`Rect` などの代わりに使用されます。これは、パフォーマンスとメモリの考慮事項からです。" + +#: ../../textpage.rst:167 730e1437a5c243e0862e4cc1b85ddfae +msgid "" +"This code is written in C, where Python tuples can easily be generated. " +"The geometry objects on the other hand are defined in Python source only." +" A conversion of each Python tuple into its corresponding geometry object" +" would add significant -- and largely unnecessary -- execution time." +msgstr "" +"このコードはCで書かれており、Pythonタプルは簡単に生成できます。一方、ジオメトリオブジェクトはPythonソースでのみ定義されています。 " +"各Pythonタプルを対応するジオメトリオブジェクトに変換することは、実行時間を大幅に(かつ不必要に)増加させるでしょう。" + +#: ../../textpage.rst:168 6160bc818c6641d59712647ba09d6f2f +msgid "" +"A 4-tuple needs about 168 bytes, the corresponding :ref:`Rect` 472 bytes " +"- almost three times the size. A \"dict\" dictionary for a text-heavy " +"page contains 300+ bbox objects -- which thus require about 50 KB storage" +" as 4-tuples versus 140 KB as :ref:`Rect` objects. A \"rawdict\" output " +"for such a page will however contain **4 to 5 thousand** bboxes, so in " +"this case we talk about 750 KB versus 2 MB." +msgstr "" +"4つのタプルは約168バイト、対応する :ref:`Rect` は472バイトです - " +"サイズのほぼ3倍です。テキストが豊富なページの「dict」辞書には300以上のbboxオブジェクトが含まれているため、これらは4タプルとして約50" +" KBのストレージを必要としますが、:ref:`Rect` オブジェクトでは約140 " +"KBです。このようなページの「rawdict」出力は、**4,000から5,000** のbboxを含む場合がありますので、この場合は750 " +"KB対2 MBとなります。" + +#: ../../textpage.rst:170 553b7505dc654bce9acc63f7985e8ee7 +msgid "" +"Please also note, that only **bboxes** (= :data:`rect_like` 4-tuples) are" +" returned, whereas a :ref:`TextPage` actually has the **full position " +"information** -- in :ref:`Quad` format. The reason for this decision is " +"again a memory consideration: a :data:`quad_like` needs 488 bytes (3 " +"times the size of a :data:`rect_like`). Given the mentioned amounts of " +"generated bboxes, returning :data:`quad_like` information would have a " +"significant impact." +msgstr "" +"また、注意してください。 **bboxes** (rect_like 4タプル)のみが返され、:ref:`TextPage` には " +"**完全な位置情報** が含まれていることです - Quad形式で。これはメモリの考慮事項です。:data:`quad_like` " +"には488バイト(:data:`rect_like` " +"の3倍のサイズ)が必要です。言及した数の生成されたbboxを考えると、:data:`quad_like` " +"情報を返すことは重大な影響を与えるでしょう。" + +#: ../../textpage.rst:172 99cd9aa1d0454fccb81b7754e49fe731 +msgid "" +"In the vast majority of cases, we are dealing with **horizontal text " +"only**, where bboxes provide entirely sufficient information." +msgstr "ほとんどの場合、**水平テキストのみ** を扱っており、bboxは十分な情報を提供します。" + +#: ../../textpage.rst:174 887228756dd54500a2972ea8eaaae72b +msgid "" +"In addition, **the full quad information is not lost**: it can be " +"recovered as needed for lines, spans, and characters by using the " +"appropriate function from the following list:" +msgstr "さらに、**フルクワッド情報は失われていません**。必要に応じて、以下のリストから適切な関数を使用して行、スパン、および文字のクワッド情報を回復できます:" + +#: ../../textpage.rst:176 8bf018658e234a00bc0de17969dae3bb +msgid ":meth:`recover_quad` -- the quad of a complete span" +msgstr ":meth:`recover_quad` – 完全なスパンのクワッド" + +#: ../../textpage.rst:177 6c3eebc9418f44c7831b2ffcb508609f +msgid ":meth:`recover_span_quad` -- the quad of a character subset of a span" +msgstr ":meth:`recover_span_quad` – スパンの一部の文字のクワッド" + +#: ../../textpage.rst:178 424c09f4c5c348309525195aab757ed0 +msgid ":meth:`recover_line_quad` -- the quad of a line" +msgstr ":meth:`recover_line_quad` – 行のクワッド" + +#: ../../textpage.rst:179 400df8764f0e431dac567a85e4bbd78f +msgid ":meth:`recover_char_quad` -- the quad of a character" +msgstr ":meth:`recover_char_quad` – 文字のクワッド" + +#: ../../textpage.rst:181 96e1e1fa7b2e47cda3509c378547e0c3 +msgid "" +"As mentioned, using these functions is ever only needed, if the text is " +"**not written horizontally** -- `line[\"dir\"] != (1, 0)` -- and you need" +" the quad for text marker annotations (:meth:`Page.add_highlight_annot` " +"and friends)." +msgstr "" +"前述のように、これらの関数を使用する必要があるのは、テキストが **水平に書かれていない** 場合 - `line[\"dir\"] != (1," +" 0)` - およびテキストマーカーアノテーション(:meth:`Page.add_highlight_annot` " +"など)でクワッドが必要な場合のみです。" + +#: ../../textpage.rst:191 595f43998ba0436e97c27949a639e0cf +msgid "Page Dictionary" +msgstr "ページ辞書" + +#: ../../textpage.rst:194 ../../textpage.rst:208 ../../textpage.rst:251 +#: ../../textpage.rst:263 ../../textpage.rst:285 ../../textpage.rst:369 +#: 1028a325d80d4f058a4b0359d7d4598d 25712968c441436eba02cf3a671cc015 +#: b97697d8ddd34a02b36ff938c56fe5e3 cffcfc0ebdc14a359647c0a579bc33b5 +#: e1d612602b454ac8a1f8fc9f345c7780 f8641ab264d24089a5504d8795434eb1 +msgid "**Key**" +msgstr "**キー**" + +#: ../../textpage.rst:194 ../../textpage.rst:208 ../../textpage.rst:251 +#: ../../textpage.rst:263 ../../textpage.rst:285 ../../textpage.rst:369 +#: 0bf110ecaacf4aa28287fd0e005ffddd 289abda4fd474dcdb960b408ef88468b +#: 3bf64d336de0427991445adcd0168093 574109b9a38f400a914c84c0356caf88 +#: a983b778e0064d50b3eaea79e9f4f3b6 c1f614c73fff407b9086f838a02bdaa2 +msgid "**Value**" +msgstr "**値**" + +#: ../../textpage.rst:196 ../../textpage.rst:214 +#: eb90193de1ce4b5c9eb39d385278964b f5b723f222a64cc68a99030c73b60d8f +msgid "width" +msgstr "" + +#: ../../textpage.rst:196 14cc2215742c4da9bd200255c8f933c5 +msgid "width of the `clip` rectangle *(float)*" +msgstr "`clip` 矩形の幅 *(float)*" + +#: ../../textpage.rst:197 ../../textpage.rst:215 +#: 3aa7f8da25374d7296bbe5236ae369aa f91db5b834224a16ad9aeabc73e98e8e +msgid "height" +msgstr "" + +#: ../../textpage.rst:197 c231566c6b4c4cb3b4fe79f2f8253c3c +msgid "height of the `clip` rectangle *(float)*" +msgstr "`clip` 矩形の高さ *(float)*" + +#: ../../textpage.rst:198 31ed4b8f4f5541c3b722a5bdd6a6c34d +msgid "blocks" +msgstr "" + +#: ../../textpage.rst:198 a1079925560f4211bc1fb868e6b872a1 +msgid "*list* of block dictionaries" +msgstr "ブロック辞書の *list* " + +#: ../../textpage.rst:202 08e070c1094e4ccca4fc04700d353970 +msgid "Block Dictionaries" +msgstr "ブロック辞書" + +#: ../../textpage.rst:203 7d0906215b32466288832e1b6896048a +msgid "" +"Block dictionaries come in two different formats for **image blocks** and" +" for **text blocks**." +msgstr "ブロック辞書は、**画像ブロック** と **テキストブロック** の2つの異なるフォーマットで提供されます。" + +#: ../../textpage.rst:205 7e62f09f297148beb698e62d3afc4c36 +msgid "**Image block:**" +msgstr "**画像ブロック:**" + +#: ../../textpage.rst:210 ../../textpage.rst:253 +#: 7b13ee4d579245ceb12b6f3cf76c1fab d85e1acb01be4077b861168ee129767f +msgid "type" +msgstr "" + +#: ../../textpage.rst:210 efd2b31cf58548bebfe784580e65e609 +#, fuzzy +msgid "1 = image (``int``)" +msgstr "1 = 画像 *(int)*" + +#: ../../textpage.rst:211 ../../textpage.rst:254 ../../textpage.rst:265 +#: ../../textpage.rst:287 ../../textpage.rst:372 +#: 03d37603b3344af6b5e7da54db015b1b 319eb162bb77446380ad084c386e7880 +#: 489ec9a675314683b3e30183ac86b467 d66375110a74421c99ad70cba6cc18ef +#: f6aa14043a8a4b699002979f9b457101 +msgid "bbox" +msgstr "" + +#: ../../textpage.rst:211 51701c8e494240cb8459ca8170dd7511 +msgid "image bbox on page (:data:`rect_like`)" +msgstr "ページ上の画像の境界ボックス(:data:`rect_like`)" + +#: ../../textpage.rst:212 ../../textpage.rst:255 +#: dad8f7481ff04520890bf9540defc8d1 ee650f2cc2af439ebc7dfe5fcf770410 +msgid "number" +msgstr "" + +#: ../../textpage.rst:212 e77220a8a6de41c39d609dff474be846 +#, fuzzy +msgid "block count (``int``)" +msgstr "ブロック数 *(int)*" + +#: ../../textpage.rst:213 58b4e56c8c424eed8d03bfeaeef4ea93 +msgid "ext" +msgstr "" + +#: ../../textpage.rst:213 1583d8baf0cb4f9b88f96e64556d2f6c +#, fuzzy +msgid "image type (``str``), as file extension, see below" +msgstr "画像の種類 *(str)*、ファイル拡張子として、以下参照" + +#: ../../textpage.rst:214 3786a0ca184247ccbac8ce835f4b7dd6 +#, fuzzy +msgid "original image width (``int``)" +msgstr "元の画像の幅 *(int)*" + +#: ../../textpage.rst:215 d62202c977f4427897519364fff16fa0 +#, fuzzy +msgid "original image height (``int``)" +msgstr "元の画像の高さ *(int)*" + +#: ../../textpage.rst:216 e146c7ea5f6f460db3e8980b52c734cf +msgid "colorspace" +msgstr "" + +#: ../../textpage.rst:216 a2b2fbd44fc947adb67a432fe838d702 +#, fuzzy +msgid "colorspace component count (``int``)" +msgstr "カラースペースのコンポーネント数 *(int)*" + +#: ../../textpage.rst:217 b011e27b5b7f4071accc210533eeb410 +msgid "xres" +msgstr "" + +#: ../../textpage.rst:217 76876d5bff234745968c9b29c479abf0 +#, fuzzy +msgid "resolution in x-direction (``int``)" +msgstr "x方向の解像度 *(int)*" + +#: ../../textpage.rst:218 ca47a153418e437983d2d8df4624aaec +msgid "yres" +msgstr "" + +#: ../../textpage.rst:218 45276559019b42ac85ff614a8b179484 +#, fuzzy +msgid "resolution in y-direction (``int``)" +msgstr "y方向の解像度 *(int)*" + +#: ../../textpage.rst:219 5199ee0421fe41bb90ef866f83525686 +msgid "bpc" +msgstr "" + +#: ../../textpage.rst:219 3af264e6c2f44b12ac9f208644fceacd +#, fuzzy +msgid "bits per component (``int``)" +msgstr "コンポーネントごとのビット数 *(int)*" + +#: ../../textpage.rst:220 30a1799870544636840f2ca52f4213b6 +msgid "transform" +msgstr "" + +#: ../../textpage.rst:220 bb9357da9bf547baacb8163e91b5b9ed +msgid "matrix transforming image rect to bbox (:data:`matrix_like`)" +msgstr "画像矩形を境界ボックスに変換する行列(:data:`matrix_like`)" + +#: ../../textpage.rst:221 ../../textpage.rst:292 +#: d46d130394b74529afbeafcf2648b049 fec7c57a41bd4be0ad7387af0fbdc847 +msgid "size" +msgstr "" + +#: ../../textpage.rst:221 19a074e1bba545f0a6e1f4fa51b35bc8 +#, fuzzy +msgid "size of the image in bytes (``int``)" +msgstr "画像のサイズ(バイト単位)*(int)*" + +#: ../../textpage.rst:222 0ada30bc2c4c455ca8a2eccf8ec5645a +msgid "image" +msgstr "" + +#: ../../textpage.rst:222 299044835f19498b9364b83a4be51966 +#, fuzzy +msgid "image content (``bytes``)" +msgstr "画像コンテンツ *(bytes)*" + +#: ../../textpage.rst:223 0f80267e7b8a43e48e7438031afc13b7 +msgid "mask" +msgstr "" + +#: ../../textpage.rst:223 63331458b66141edb5d0d8d16c641424 +msgid "image mask content (``bytes``) for transparent images" +msgstr "" + +#: ../../textpage.rst:226 3c28b6119f1e4c05845e754c4e7762df +msgid "" +"Possible values of the \"ext\" key are \"bmp\", \"gif\", \"jpeg\", " +"\"jpx\" (JPEG 2000), \"jxr\" (JPEG XR), \"png\", \"pnm\", and \"tiff\"." +msgstr "" +"\"ext\"キーの可能な値は、\"bmp\"、\"gif\"、\"jpeg\"、\"jpx\"(JPEG 2000)、\"jxr\"(JPEG " +"XR)、\"png\"、\"pnm\"、および \"tiff\" です。" + +#: ../../textpage.rst:230 6cc71007ae5943aba16bc9df25bcbfc9 +msgid "" +"An image block is generated for **all and every image occurrence** on the" +" page. Hence there may be duplicates, if an image is shown at different " +"locations." +msgstr "ページ上の **すべての画像** が画像ブロックとして生成されます。したがって、画像が異なる場所で表示される場合、重複が発生する可能性があります。" + +#: ../../textpage.rst:232 968df807caf345efb4f5145afb265729 +msgid "" +":ref:`TextPage` and corresponding method :meth:`Page.get_text` are " +"**available for all document types**. Only for PDF documents, methods " +":meth:`Document.get_page_images` / :meth:`Page.get_images` offer some " +"overlapping functionality as far as image lists are concerned. But both " +"lists **may or may not** contain the same items. Any differences are most" +" probably caused by one of the following:" +msgstr "" +":ref:`TextPage` および対応するメソッド :meth:`Page.get_text` は **すべてのドキュメントタイプ** " +"で利用可能です。PDFドキュメントの場合、:meth:`Document.get_page_images` / " +":meth:`Page.get_images` は画像リストに関する機能が一部重複しています。ただし、これらのリストは同じアイテムを " +"**含むかどうかは必ずしも保証されません**。違いがある場合、その原因はおそらく次のいずれかです。" + +#: ../../textpage.rst:234 58c9e4620f5c45b5a55b38b92a6fd494 +msgid "" +"\"Inline\" images (see page 214 of the :ref:`AdobeManual`) of a PDF page " +"are contained in a textpage, but **do not appear** in " +":meth:`Page.get_images`." +msgstr "" +"PDFページの「インライン」画像(:ref:`AdobeManual` " +"のページ214を参照)はテキストページに含まれていますが、:meth:`Page.get_images` には **表示されません**。" + +#: ../../textpage.rst:235 1976d291ba2043ed8edc7a38a5dab528 +msgid "" +"Annotations may also contain images -- these will **not appear** in " +":meth:`Page.get_images`." +msgstr "アノテーションにも画像が含まれることがあります。これらは :meth:`Page.get_images` には **表示されません**。" + +#: ../../textpage.rst:236 06ed842b388847b7a7113443675b76cb +msgid "" +"Image blocks in a textpage are generated for **every** image location -- " +"whether or not there are any duplicates. This is in contrast to " +":meth:`Page.get_images`, which will list each image only once (per " +"reference name)." +msgstr "" +"テキストページの画像ブロックは、画像の場所 **ごとに** " +"生成されます。重複があるかどうかに関係なくです。これは、:meth:`Page.get_images` " +"では各画像が1回だけリストされる(参照名ごとに)のとは対照的です。" + +#: ../../textpage.rst:237 9f5942f2272b483590d6ed0a8a5f178e +msgid "" +"Images mentioned in the page's :data:`object` definition will **always** " +"appear in :meth:`Page.get_images` [#f1]_. But it may happen, that there " +"is no \"display\" command in the page's :data:`contents` (erroneously or " +"on purpose). In this case the image will **not appear** in the textpage." +msgstr "" +"ページの :data:`object` 定義で言及されている画像は、常に :meth:`Page.get_images` に表示されます " +"[#f1]_。ただし、ページの :data:`contents` に「表示」コマンドがない場合、画像はテキストページに表示されません。" + +#: ../../textpage.rst:239 b59961eabc8f46f486afc6f2c788b491 +msgid "" +"The image's \"transformation matrix\" is defined as the matrix, for which" +" the expression `bbox / transform == pymupdf.Rect(0, 0, 1, 1)` is true, " +"lookup details here: :ref:`ImageTransformation`." +msgstr "" +"画像の「変換行列」は、`bbox / transform == pymupdf.Rect(0, 0, 1, 1)` " +"という式が真である場合の行列です。詳細はこちらを参照してください: :ref:`ImageTransformation`。" + +#: ../../textpage.rst:241 43cad69a55ef4f20816d50cc8efbe1b6 +msgid "" +"A transparent image may be accompanied by a mask image. This is stored " +"under key `\"mask\"` and has the format of a `DeviceGray` PNG image. " +"Otherwise the value of this key is ``None``. If present, you may be able " +"to recover (an equivalent of) the original image -- i.e. with " +"transparency -- by creating :ref:`Pixmap` objects from the \"image\", " +"respectively \"mask\" values and overlay them. This is not guaranteed to " +"always work because mask images come in multiple formats, of which not " +"all qualify for the conditions under which overlaying Pixmaps are " +"supported. Here is a code snippet:" +msgstr "" + +#: ../../textpage.rst:248 a317df3004da43a6a184efa0266a463d +msgid "**Text block:**" +msgstr "**テキストブロック:**" + +#: ../../textpage.rst:253 030797816aa549989b47bbb0e2632ff6 +msgid "0 = text *(int)*" +msgstr "0 = テキスト *(int)*" + +#: ../../textpage.rst:254 5845dcf4d2ff4b8aa3b8696f11fd4a20 +msgid "block rectangle, :data:`rect_like`" +msgstr "ブロックの矩形、:data:`rect_like`" + +#: ../../textpage.rst:255 392754de728c42fcae42b1f9b5a1145e +msgid "block count *(int)*" +msgstr "ブロック数 *(int)*" + +#: ../../textpage.rst:256 3bbc359f17e345399daa6507bdeea828 +msgid "lines" +msgstr "" + +#: ../../textpage.rst:256 4042757af1954925a1cf44f2a1335dce +msgid "*list* of text line dictionaries" +msgstr "テキスト行の辞書の *list*" + +#: ../../textpage.rst:260 3d06aedad94149bc9fda2938390bc25f +msgid "Line Dictionary" +msgstr "" + +#: ../../textpage.rst:265 2aa6b4384d6a43ae8535097a2f4d7183 +msgid "line rectangle, :data:`rect_like`" +msgstr "行の矩形、:data:`rect_like`" + +#: ../../textpage.rst:266 057f881221e5432c812c73df38a30476 +msgid "wmode" +msgstr "" + +#: ../../textpage.rst:266 9b8a69aaf19842ae8147069b78b00dd1 +msgid "writing mode *(int)*: 0 = horizontal, 1 = vertical" +msgstr "書き込みモード *(int)*:0 = 水平、1 = 垂直" + +#: ../../textpage.rst:267 692c2dda2d01494db929c1c879dfbdba +msgid "dir" +msgstr "" + +#: ../../textpage.rst:267 9b0ca2dd38e24416955b894693d6ff52 +msgid "writing direction, :data:`point_like`" +msgstr "書き込み方向、:data:`point_like`" + +#: ../../textpage.rst:268 230317eadc6b4662a8970d6c2ba18495 +msgid "spans" +msgstr "" + +#: ../../textpage.rst:268 bf9fdcd40df14bcda53145f233e3ef56 +msgid "*list* of span dictionaries" +msgstr "スパン辞書の *list*" + +#: ../../textpage.rst:271 fa187d7adc9a47ecb61fbbdb00651da2 +#, fuzzy +msgid "" +"The value of key *\"dir\"* is the **unit vector** `dir = (cosine, -sine)`" +" of the angle, which the text has relative to the x-axis [#f2]_. See the " +"following picture: The word in each quadrant (counter-clockwise from top-" +"right to bottom-right) is rotated by 30, 120, 210 and 300 degrees " +"respectively." +msgstr "" +"キー *\"dir\"* の値は、テキストが x 軸に対してどの角度であるかを示す **ユニットベクトル** `dir = (cosine, " +"sine)` " +"です。次の図を参照してください:各象限内の単語(右上から反時計回りに、上から下まで)は、それぞれ30、120、210、300度回転しています。" + +#: ../../textpage.rst:277 9bb9215f24144aa98bdf2ba3bf53f511 +msgid "Span Dictionary" +msgstr "" + +#: ../../textpage.rst:279 ede5941b8dfa4244a8c0216c6f7e2cad +msgid "" +"Spans contain the actual text. A line contains **more than one span " +"only**, if it contains text with different font properties." +msgstr "スパンには実際のテキストが含まれています。フォントのプロパティが異なるテキストを含む場合を除き、1行には複数のスパンが含まれます。" + +#: ../../textpage.rst:281 6d53a255181a47a89ae1ffa5b80e2b1a +msgid "Changed in version 1.14.17 Spans now also have a *bbox* key (again)." +msgstr "バージョン1.14.17で変更されました:スパンには *bbox* キーが含まれています(再び)。" + +#: ../../textpage.rst:282 7181bf3c38f348e28414b10b1f15325a +msgid "Changed in version 1.17.6 Spans now also have an *origin* key." +msgstr "バージョン1.17.6で変更されました:スパンには *origin* キーも含まれています。" + +#: ../../textpage.rst:287 7a0c3e49b06244d5a3f9b08938753c70 +msgid "span rectangle, :data:`rect_like`" +msgstr "スパンの矩形、:data:`rect_like`" + +#: ../../textpage.rst:288 ../../textpage.rst:371 +#: 941eff0bda7848b1aded9b21d87508d4 fc611070d2e14a749a6523327a435ab1 +msgid "origin" +msgstr "" + +#: ../../textpage.rst:288 4e9cfcbb63dd4d3b96d02cac7a72d335 +msgid "the first character's origin, :data:`point_like`" +msgstr "最初の文字の原点、:data:`point_like`" + +#: ../../textpage.rst:289 ffe4ce11ff8e482d8125e561122e8a01 +msgid "font" +msgstr "" + +#: ../../textpage.rst:289 c193bd6161a1476a895ff5d05e36e414 +msgid "font name *(str)*" +msgstr "フォント名 *(str)*" + +#: ../../textpage.rst:290 0f4cf45ac40742c2bdbba91439182885 +msgid "ascender" +msgstr "" + +#: ../../textpage.rst:290 9d18fc7dfd794af3863c8617f6cca80b +msgid "ascender of the font *(float)*" +msgstr "フォントのアセンダー *(float)*" + +#: ../../textpage.rst:291 c8355e31a600438d9aa2f5c09ad08a32 +msgid "descender" +msgstr "" + +#: ../../textpage.rst:291 642a93c2137f4f22a445251b97134d53 +msgid "descender of the font *(float)*" +msgstr "フォントのディセンダー *(float)*" + +#: ../../textpage.rst:292 0f948a6afd6c40c2af7e4682ea3f0c41 +msgid "font size *(float)*" +msgstr "フォントサイズ *(float)*" + +#: ../../textpage.rst:293 3481c94bc1974e3e87ad16a48d7bee02 +msgid "flags" +msgstr "" + +#: ../../textpage.rst:293 7ad9d44635b2454b8676ec29468e86d6 +msgid "font characteristics *(int)*" +msgstr "フォントの特性 *(int)*" + +#: ../../textpage.rst:294 1bea765fcc0d440780a84e5ff7428ab5 +msgid "char_flags" +msgstr "" + +#: ../../textpage.rst:294 6bbef3a00ae64fe080a4f63e69c11277 +#, fuzzy +msgid "char characteristics *(int)*" +msgstr "フォントの特性 *(int)*" + +#: ../../textpage.rst:295 8eb70c16b7d646e48893ef568ff42402 +msgid "color" +msgstr "" + +#: ../../textpage.rst:295 59eabfe4fab04ba2affa939c5fde49b7 +#, fuzzy +msgid "text color in sRGB format 0xRRGGBB *(int)*." +msgstr "sRGB形式のテキストカラー *(int)*" + +#: ../../textpage.rst:296 fd9113d3e57447b5b4bdc780cde1dda4 +msgid "alpha" +msgstr "" + +#: ../../textpage.rst:296 d40a9c36fe9f4f0586641e3cf2350ff2 +msgid "text opacity 0..255 *(int)*." +msgstr "" + +#: ../../textpage.rst:297 94ac61d4a8494149a0be110b7b31fbb7 +msgid "text" +msgstr "" + +#: ../../textpage.rst:297 71f33f09b5dc4d26a232e868d6a71835 +msgid "(only for :meth:`extractDICT`) text *(str)*" +msgstr "(:meth:`extractDICT` のみ)テキスト *(str)* " + +#: ../../textpage.rst:298 badb479f9a6c4f8cb637a1fc27887492 +msgid "chars" +msgstr "" + +#: ../../textpage.rst:298 5f31662e0fd54663932aef0cf696bc81 +msgid "(only for :meth:`extractRAWDICT`) *list* of character dictionaries" +msgstr "(:meth:`extractRAWDICT` のみ)文字の辞書の*list*" + +#: ../../textpage.rst:301 2e5c684a3b98457dad2eeca01251bcbe +msgid "|history_begin|" +msgstr "" + +#: ../../textpage.rst:303 af2202bc94eb4986b35814e7062ea2af +msgid "*(New in version 1.25.3.0):* Added *\"alpha\"* item." +msgstr "" + +#: ../../textpage.rst:305 346a48e9dc164f95a383ca075ec44282 +msgid "" +"*(New in version 1.16.0):* *\"color\"* is the text color encoded in sRGB " +"(int) format, e.g. 0xFF0000 for red. There are functions for converting " +"this integer back to formats (r, g, b) (PDF with float values from 0 to " +"1) :meth:`sRGB_to_pdf`, or (R, G, B), :meth:`sRGB_to_rgb` (with integer " +"values from 0 to 255)." +msgstr "" +"*バージョン1.16.0で新しく追加された項目:*. *「color」* " +"はsRGB(int)形式でエンコードされたテキストカラーで、例えば赤の場合は0xFF0000です。この整数をフォーマット(r、g、b)(PDFでは0から1の範囲の浮動小数点値)" +" :meth:`sRGB_to_pdf` または(R、G、B)、:meth:`sRGB_to_rgb` " +"(整数値で0から255の範囲)に変換するための関数があります。" + +#: ../../textpage.rst:307 01cb891c462347beb23cc803ba6f273d +msgid "" +"*(New in v1.18.5):* *\"ascender\"* and *\"descender\"* are font " +"properties, provided relative to :data:`fontsize` 1. Note that descender " +"is a negative value. The following picture shows the relationship to " +"other values and properties." +msgstr "" +"*バージョン1.18.5で新しく追加された項目:* *「ascender」* と *「descender」* は、フォントのプロパティで、 " +":data:`fontsize` " +"1に対して提供されます。ディセンダーは負の値であることに注意してください。以下の画像は、他の値やプロパティとの関係を示しています。" + +#: ../../textpage.rst:309 38509ecdb88249a89b1f87e100af2794 +msgid "|history_end|" +msgstr "" + +#: ../../textpage.rst:314 2ba023fe0c7d4f3a84c985d6d42257e0 +msgid "" +"These numbers may be used to compute the minimum height of a character " +"(or span) -- as opposed to the standard height provided in the \"bbox\" " +"values (which actually represents the **line height**). The following " +"code recalculates the span bbox to have a height of **fontsize** exactly " +"fitting the text inside:" +msgstr "" +"これらの数字は、文字(またはスパン)の最小の高さを計算するために使用できます。これは、実際には行の高さを表す「bbox」値ではなく、フォントサイズに完全に合わせるための高さを表します。次のコードは、スパンのbboxを、内部のテキストに完全に合わせる高さが" +" **フォントサイズ** となるように再計算します:" + +#: ../../textpage.rst:324 23459ad4eef14d39af6467b841bb249d +msgid "" +"The above calculation may deliver a **larger** height! This may e.g. " +"happen for OCRed documents, where the risk of all sorts of text artifacts" +" is high. MuPDF tries to come up with a reasonable bbox height, " +"independently from the :data:`fontsize` found in the PDF. So please " +"ensure that the height of `span[\"bbox\"]` is **larger** than " +"`span[\"size\"]`." +msgstr "" +"上記の計算は、**より大きな** " +"高さをもたらす可能性があります。これは、OCRedドキュメントなどで発生する可能性があります。MuPDFは、PDFに存在する " +":data:`fontsize` から独立して、適切なbboxの高さを見つけ出そうとします。したがって、`span[\"bbox\"]` の高さが" +" `span[\"size\"]` よりも大きいことを確認してください。" + +#: ../../textpage.rst:326 8a9f78a3ca49476bb09d47f7c0037fd2 +msgid "" +"You may request PyMuPDF to do all of the above automatically by executing" +" `pymupdf.TOOLS.set_small_glyph_heights(True)`. This sets a global " +"parameter so that all subsequent text searches and text extractions are " +"based on reduced glyph heights, where meaningful." +msgstr "" +"PyMuPDFに対して、`pymupdf.TOOLS.set_small_glyph_heights(True)` " +"を実行して、上記のすべてを自動的に実行するように依頼することができます。これは、すべての後続のテキスト検索とテキスト抽出が、縮小されたグリフの高さを基に行われるようにするためのグローバルパラメータを設定します。" + +#: ../../textpage.rst:328 4c2b61999a4e4a13b8d42222a75f0b77 +msgid "" +"The following shows the original span rectangle in red and the rectangle " +"with re-computed height in blue." +msgstr "以下は、元のspanの矩形を赤で、再計算された高さを持つ矩形を青で示しています。" + +#: ../../textpage.rst:334 de6eebad5e374780b4087dafc5a35cea +msgid "" +"*\"flags\"* is an integer, which represents font properties except for " +"the first bit 0. They are to be interpreted like this:" +msgstr "*\"flags\"* は、フォントのプロパティを表す整数で、最初のビット 0 を除いて次のように解釈されます:" + +#: ../../textpage.rst:336 ba741a3fc759485985d49c19f01c9ef8 +#, fuzzy +msgid "" +"bit 0: superscripted (:data:`TEXT_FONT_SUPERSCRIPT`) -- not a font " +"property, detected by MuPDF code." +msgstr "bit 0: superscripted (2\\ :sup:`0`) - フォントのプロパティではなく、MuPDFコードによって検出されます。" + +#: ../../textpage.rst:337 78e75a3016a143eebba37096ad39d511 +msgid "bit 1: italic (:data:`TEXT_FONT_ITALIC`)" +msgstr "" + +#: ../../textpage.rst:338 dea957fa07fb47e58b4eaa819d064d78 +#, fuzzy +msgid "bit 2: serifed (:data:`TEXT_FONT_SERIFED`)" +msgstr "ビット 2: セリフ付き (2\\ :sup:`2`)" + +#: ../../textpage.rst:339 9af423dfcf754de499eec516291b3dfa +#, fuzzy +msgid "bit 3: monospaced (:data:`TEXT_FONT_MONOSPACED`)" +msgstr "ビット 3: モノスペース (2\\ :sup:`3`)" + +#: ../../textpage.rst:340 251453629796445893d21c28cc8124ea +msgid "bit 4: bold (:data:`TEXT_FONT_BOLD`)" +msgstr "" + +#: ../../textpage.rst:342 59a6d01576154fe9bf8e8b9051b33213 +msgid "Test these characteristics like so:" +msgstr "これらの特性は、次のようにテストできます:" + +#: ../../textpage.rst:348 bfc7d6a052ce4fce8790ede3a436fb69 +msgid "" +"Bits 1 thru 4 are font properties, i.e. encoded in the font program. " +"Please note, that this information is not necessarily correct or " +"complete: fonts quite often contain wrong data here." +msgstr "" +"ビット1からビット4までがフォントのプロパティであり、つまりフォントプログラムにエンコードされています。 " +"ただし、この情報は必ずしも正確または完全ではないことに注意してください。フォントにはしばしば誤ったデータが含まれていることがあります。" + +#: ../../textpage.rst:350 4d1498d57d2b4c69ba82c3d86493fb97 +msgid "" +"*\"char_flags\"* is an integer, which represents extra character " +"properties:" +msgstr "" + +#: ../../textpage.rst:352 3612fbd5dadd402d99e182ef3b916ea7 +msgid "bit 0: strikeout." +msgstr "" + +#: ../../textpage.rst:353 3fdaa90226ad4e3dbe95664df8eea0cf +msgid "bit 1: underline." +msgstr "" + +#: ../../textpage.rst:354 0c3f086196de4719af885a8be8c7e0d0 +msgid "bit 2: synthetic (always 0, see char dictionary)." +msgstr "" + +#: ../../textpage.rst:355 8168d259a6994df58b08b6a5bb6e528d +msgid "bit 3: filled." +msgstr "" + +#: ../../textpage.rst:356 b15cbf775d8242179c6218dcf678dfbc +msgid "bit 4: stroked." +msgstr "" + +#: ../../textpage.rst:357 fe26904a6aef423cb032d941a3f16e7f +msgid "bit 5: clipped." +msgstr "" + +#: ../../textpage.rst:359 cc08341dee4841cdb9efce8f8328c72c +msgid "" +"For example if not filled and not stroked (`if not (char_flags & 2**3 & " +"2**4): ...`) then the text will be invisible." +msgstr "" + +#: ../../textpage.rst:362 1a73125e6b5d4636bef571142a56d348 +msgid "(`char_flags` is new in v1.25.2.)" +msgstr "" + +#: ../../textpage.rst:366 fc0755117ea040d38d703aefe8a1d835 +msgid "Character Dictionary for :meth:`extractRAWDICT`" +msgstr "文字のディクショナリ、:meth:`extractRAWDICT`" + +#: ../../textpage.rst:371 d5cbb8a9af1f4428896b1b7fc4d8c549 +msgid "character's left baseline point, :data:`point_like`" +msgstr "文字の左ベースラインポイント、:data:`point_like`" + +#: ../../textpage.rst:372 a46239e4f25e455bb2cdf5860328b0d7 +msgid "character rectangle, :data:`rect_like`" +msgstr "文字の矩形、:data:`rect_like`" + +#: ../../textpage.rst:373 446a80c6a1ac4c898322c4e4c35aa223 +msgid "synthetic" +msgstr "" + +#: ../../textpage.rst:373 739878748b6447ef82df79dedaf0f65a +msgid "bool." +msgstr "" + +#: ../../textpage.rst:374 698361d0ea704ea78d1c12e2544b28b4 +msgid "c" +msgstr "" + +#: ../../textpage.rst:374 24b9cd869f7344bd90d6f40b1c3862d3 +msgid "the character (unicode)" +msgstr "文字(ユニコード)" + +#: ../../textpage.rst:377 9a4e847fd26a450ab6af91fa2f536982 +msgid "(`synthetic` is new in v1.25.3.)" +msgstr "" + +#: ../../textpage.rst:379 8ad57f9575ed4fa9bdfbde5224fb43a3 +msgid "" +"This image shows the relationship between a character's bbox and its " +"quad: |textpagechar|" +msgstr "この画像は、文字のbboxとそのquadの関係を示しています:|textpagechar|" + +#: ../../textpage.rst:381 1917647d6d434b7e9f9f59d21e6be2b1 +#: 3f06e78ea445472d92030debab4e3b04 +msgid "textpagechar" +msgstr "" + +#: ../../textpage.rst:386 c5dd21ec4aaf4022b18a1dec50c7908d +msgid "Footnotes" +msgstr "脚注" + +#: ../../textpage.rst:387 46ddf86e85364b05abc5c9ea6a706b17 +msgid "" +"Image specifications for a PDF page are done in a page's (sub-) " +":data:`dictionary`, called `/Resources`. Resource dictionaries can be " +"**inherited** from any of the page's parent objects (usually the " +":data:`catalog` -- the top-level parent). The PDF creator may e.g. define" +" one `/Resources` on file level, naming all images and / or all fonts " +"ever used by any page. In these cases, :meth:`Page.get_images` and " +":meth:`Page.get_fonts` will consequently return the same lists for all " +"pages. If desired, this situation can be reverted using " +":meth:`Page.clean_contents`. After execution, the page's object " +"definition will show fonts and images that are actually used." +msgstr "" + +#: ../../textpage.rst:389 63c6426e75ed40b49222361f407a4bec +msgid "" +"The coordinate systems of MuPDF and PDF are different in that MuPDF uses " +"the page's top-left point as `(0, 0)`. In PDF, this is the bottom-left " +"point. Therefore, the positive direction for MuPDF's y-axis is **from top" +" to bottom**. This causes the sign change for the sine value here: a " +"**negative** value indicates anti-clockwise rotation of the text." +msgstr "" + +#: ../../footer.rst:60 a56c8b8ce8ae4441b082d7b78de8ab79 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "Image specifications for a PDF page " +#~ "are done in a page's (sub-) " +#~ ":data:`dictionary`, called *\"/Resources\"*. " +#~ "Resource dictionaries can be **inherited** " +#~ "from the page's parent object (usually" +#~ " the :data:`catalog`). The PDF creator " +#~ "may e.g. define one */Resources* on " +#~ "file level, naming all images and " +#~ "all fonts ever used by any page." +#~ " In these cases, :meth:`Page.get_images` " +#~ "and :meth:`Page.get_fonts` will return the " +#~ "same lists for all pages." +#~ msgstr "" +#~ "PDFページの画像仕様は、ページの(サブ) :data:`dictionary` *「/Resources」*" +#~ " として行われます。リソースディクショナリは、通常 :data:`catalog` " +#~ "のページ親オブジェクトから継承されます。PDFの作成者は、ファイル全体で1つの/ */Resources* " +#~ "を定義し、すべてのページで使用されるすべての画像とフォントを名前で指定する場合があります。これらの場合、:meth:`Page.get_images`" +#~ " と :meth:`Page.get_fonts` はすべてのページに対して同じリストを返します。" + +#~ msgid "*(Changed in v1.18.0)* -- new dict key *number*, the block number." +#~ msgstr "*(v1.18.0で変更)* - 新しいdictキー *number* 、ブロック番号。" + +#~ msgid "" +#~ "*(Changed in v1.18.11)* -- new dict " +#~ "key *transform*, the image transformation " +#~ "matrix for image blocks." +#~ msgstr "*(v1.18.11で変更)* - 新しいdictキー *transform* 、画像ブロックの画像変換行列。" + +#~ msgid "" +#~ "*(Changed in v1.18.11)* -- new dict " +#~ "key *size*, the size of the image" +#~ " in bytes for image blocks." +#~ msgstr "*(v1.18.11で変更)* - 新しいdictキー *size* 、画像ブロックのバイト単位の画像サイズ。" + +#~ msgid "bit 1: italic (2\\ :sup:`1`)" +#~ msgstr "ビット 1: イタリック(2\\ :sup:`1`)" + +#~ msgid "bit 4: bold (2\\ :sup:`4`)" +#~ msgstr "ビット 4: ボールド (2\\ :sup:`4`)" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/textwriter.mo b/docs/locales/ja/LC_MESSAGES/textwriter.mo new file mode 100644 index 000000000..a5d1693c9 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/textwriter.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/textwriter.po b/docs/locales/ja/LC_MESSAGES/textwriter.po new file mode 100644 index 000000000..883f287eb --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/textwriter.po @@ -0,0 +1,710 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 e25f415be312493ea3ae1f28db48fb1a +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 dccdc4856fd544a882af2b070c10a666 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 6bd5a3c81bd94748ba267f88934f7963 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../textwriter.rst:7 c6975d5fbc08457291e33247e41b0c83 +msgid "TextWriter" +msgstr "TextWriter (テキストライター)" + +#: ../../textwriter.rst:9 1e02a43a34c840cb955678ee25bd31ce +msgid "|pdf_only_class|" +msgstr "PDFのみ。" + +#: ../../textwriter.rst:11 11e7bf38655543ffa60d8816581dbabf +msgid "New in v1.16.18" +msgstr "v1.16.18で新たに追加" + +#: ../../textwriter.rst:13 3c8bfbc5ff924a9eb9b6b94109ec34f9 +msgid "" +"This class represents a MuPDF *text* object. The basic idea is to " +"**decouple (1) text preparation, and (2) text output** to PDF pages." +msgstr "" +"このクラスはMuPDFの *text* " +"オブジェクトを表します。**基本的なアイデアは、(1)テキストの準備と(2)PDFページへのテキストの出力を切り離すことです** 。" + +#: ../../textwriter.rst:15 da54ba43f33e4678af8bf51e628f29df +msgid "" +"During **preparation**, a text writer stores any number of text pieces " +"(\"spans\") together with their positions and individual font " +"information. The **output** of the writer's prepared content may happen " +"multiple times to any PDF page with a compatible page size." +msgstr "" +"**準備**中、テキストライターはテキストピース(「スパン」)を任意の数保存し、それぞれの位置と個別のフォント情報とともに保存します。ライターの準備されたコンテンツの" +" **出力** は、互換性のあるページサイズを持つ任意のPDFページに複数回行われる可能性があります。" + +#: ../../textwriter.rst:17 f4c7ab10f0a34399b9c1dc1ad38e4600 +msgid "" +"A text writer is an elegant alternative to methods " +":meth:`Page.insert_text` and friends:" +msgstr "テキストライターは、:meth:`Page.insert_text` などと比較して、次の点で優れた代替手段です:" + +#: ../../textwriter.rst:19 42ac247fde9442d0b8e6a634d87a2a00 +msgid "" +"**Improved text positioning:** Choose any point where insertion of text " +"should start. Storing text returns the \"cursor position\" after the " +"*last character* of the span." +msgstr "" +"**改良されたテキスト配置:** テキストを挿入する開始点を任意に選択できます。テキストの保存はスパンの *最後の文字の後* " +"の「カーソル位置」を返します。" + +#: ../../textwriter.rst:20 f934f37c93a4447da9cfb972800a8e21 +msgid "" +"**Free font choice:** Each text span has its own font and " +":data:`fontsize`. This lets you easily switch when composing a larger " +"text." +msgstr "" +"**自由なフォントの選択:** 各テキストスパンには独自のフォントと :data:`fontsize` " +"があります。大きなテキストを作成する際に簡単に切り替えることができます。" + +#: ../../textwriter.rst:21 c3a1e5ba5ac246a0821a2de8b88cab5c +msgid "" +"**Automatic fallback fonts:** If a character is not supported by the " +"chosen font, alternative fonts are automatically searched. This " +"significantly reduces the risk of seeing unprintable symbols in the " +"output (\"TOFUs\" -- looking like a small rectangle). PyMuPDF now also " +"comes with the **universal font \"Droid Sans Fallback Regular\"**, which " +"supports **all Latin** characters (including Cyrillic and Greek), and " +"**all CJK** characters (Chinese, Japanese, Korean)." +msgstr "" +"**自動フォントのフォールバック:** " +"選択したフォントが文字をサポートしていない場合、代替フォントが自動的に検索されます。これにより、出力で印刷できない記号を見るリスク(「TOFUs」" +" - 小さな四角形に見える)が大幅に減少します。PyMuPDFはまた、**すべてのラテン**文字(キリル文字とギリシャ文字を含む)および " +"**すべてのCJK**文字(中国語、日本語、韓国語)をサポートする **ユニバーサルフォント「Droid Sans Fallback " +"Regular」** を提供します。" + +#: ../../textwriter.rst:22 4c231b43d07c45538b5adbc94bbc0cb1 +msgid "" +"**Cyrillic and Greek Support:** The :ref:`Base-14-fonts` have integrated " +"support of Cyrillic and Greek characters **without specifying encoding.**" +" Your text may be a mixture of Latin, Greek and Cyrillic." +msgstr "キリル文字とギリシャ文字のサポート:PDFベース14フォントには、エンコーディングを指定せずにキリル文字とギリシャ文字の統合サポートがあります。テキストはラテン文字、ギリシャ文字、キリル文字の混合である可能性があります。" + +#: ../../textwriter.rst:23 719a06928f8c4e278b7cc35cdb535838 +msgid "" +"**Transparency support:** Parameter *opacity* is supported. This offers a" +" handy way to create watermark-style text." +msgstr "" +"**透明度のサポート:** パラメータの *透明度* " +"がサポートされています。これはウォーターマークスタイルのテキストを作成する便利な方法を提供します。" + +#: ../../textwriter.rst:24 a43dc70b42144ecaac47bf7487da71ce +msgid "" +"**Justified text:** Supported for any font -- not just simple fonts as in" +" :meth:`Page.insert_textbox`." +msgstr "" +"**両端揃えのテキスト**: :meth:`Page.insert_textbox` " +"のような単純なフォントだけでなく、すべてのフォントでサポートされています。" + +#: ../../textwriter.rst:25 eeff1d730fb54a3aa436c9443c2ea821 +msgid "" +"**Reusability:** A TextWriter object exists independent from PDF pages. " +"It can be written multiple times, either to the same or to other pages, " +"in the same or in different PDFs, choosing different colors or " +"transparency." +msgstr "" +"**再利用性:** " +"テキストライターオブジェクトはPDFページとは独立して存在します。同じページまたは異なるページ、同じPDFまたは異なるPDFに、異なる色や透明度を選択して複数回書き込むことができます。" + +#: ../../textwriter.rst:27 6e5ca11e783d4ea3a50555b4a02fcd09 +msgid "Using this object entails three steps:" +msgstr "このオブジェクトを使用するには、次の3つのステップが必要です:" + +#: ../../textwriter.rst:29 314e49bf63ea43dc95427576b7fc80a4 +msgid "" +"When **created**, a TextWriter requires a fixed **page rectangle** in " +"relation to which it calculates text positions. A text writer can write " +"to pages of this size only." +msgstr "" +"**作成** 時に、テキストライターはテキストの位置を計算するために関連する固定 **ページの矩形** " +"を必要とします。テキストライターは、このサイズのページにのみ書き込むことができます。" + +#: ../../textwriter.rst:30 0d4841a7acae4e7bb3783b7074b74ff8 +msgid "" +"Store text in the TextWriter using methods :meth:`TextWriter.append`, " +":meth:`TextWriter.appendv` and :meth:`TextWriter.fill_textbox` as often " +"as is desired." +msgstr "" +"テキストライターを使用してテキストを " +":meth:`TextWriter.append`、:meth:`TextWriter.appendv`、および " +":meth:`TextWriter.fill_textbox` メソッドで必要な回数だけ保存します。" + +#: ../../textwriter.rst:31 c8a2949e8a4948538a9b5dc92a01e0d0 +msgid "Output the TextWriter object on some PDF page(s)." +msgstr "TextWriter オブジェクトをいくつかのPDFページに出力します。" + +#: ../../textwriter.rst:35 cf8b65d52fbb49a991fd273bb016bf4a +msgid "" +"Starting with version 1.17.0, TextWriters **do support** text rotation " +"via the *morph* parameter of :meth:`TextWriter.write_text`." +msgstr "" +"バージョン1.17.0から、TextWriterは :meth:`TextWriter.write_text` の *morph* " +"パラメータを介したテキストの回転を **サポートしています** 。" + +#: ../../textwriter.rst:37 508f78dbbb7447b1940f851703db0ab1 +msgid "" +"There also exists :meth:`Page.write_text` which combines one or more " +"TextWriters and jointly writes them to a given rectangle and with a given" +" rotation angle -- much like :meth:`Page.show_pdf_page`." +msgstr "" +"また、 :meth:`Page.write_text` " +"も存在し、1つ以上のTextWriterを組み合わせて、指定された矩形と回転角に共同で書き込みます - " +":meth:`Page.show_pdf_page` のように。" + +#: ../../textwriter.rst:41 b9dde294925e4ffa8949f1fcd7efc0d6 +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../textwriter.rst:41 6874cb44f5fb40189f0e82c957eb93c2 +msgid "**Short Description**" +msgstr "**短い説明**" + +#: ../../textwriter.rst:43 91a4db0d70e644b38347b983e8de2378 +msgid ":meth:`~TextWriter.append`" +msgstr "" + +#: ../../textwriter.rst:43 390f1d022b7c4d7eaa44addf02bd9541 +msgid "Add text in horizontal write mode" +msgstr "水平書き込みモードでテキストを追加します。" + +#: ../../textwriter.rst:44 0818893bc21040279507a817b4fe3e2f +msgid ":meth:`~TextWriter.appendv`" +msgstr "" + +#: ../../textwriter.rst:44 cc29cf869e3a4ddebaffec427cc86bfe +msgid "Add text in vertical write mode" +msgstr "垂直書き込みモードでテキストを追加します。" + +#: ../../textwriter.rst:45 ca1023883a45422f8c1b21b28b0e3044 +msgid ":meth:`~TextWriter.fill_textbox`" +msgstr "" + +#: ../../textwriter.rst:45 d5365f88499d403fb866da0ab00671c5 +msgid "Fill rectangle (horizontal write mode)" +msgstr "矩形を埋めます(水平書き込みモード)。" + +#: ../../textwriter.rst:46 5b5d070b02f44d9fb33bd428ccb17934 +msgid ":meth:`~TextWriter.write_text`" +msgstr "" + +#: ../../textwriter.rst:46 613eabdf09fd469184886211223d59e7 +msgid "Output TextWriter to a PDF page" +msgstr "TextWriterをPDFページに出力します。" + +#: ../../textwriter.rst:47 1a531a26cdb746f3857ef7fada50219e +msgid ":attr:`~TextWriter.color`" +msgstr "" + +#: ../../textwriter.rst:47 29ba5a45348247229302210dbc130067 +msgid "Text color (can be changed)" +msgstr "テキストの色(変更可能です)。" + +#: ../../textwriter.rst:48 188c20529cd945769c55331d971c9362 +msgid ":attr:`~TextWriter.last_point`" +msgstr "" + +#: ../../textwriter.rst:48 b4bc687b96754e70a6f3b3f0f2282776 +msgid "Last written character ends here" +msgstr "最後に書かれた文字がここで終了します。" + +#: ../../textwriter.rst:49 5ecb99aea0944b4cbea149dcdd9ca21a +msgid ":attr:`~TextWriter.opacity`" +msgstr "" + +#: ../../textwriter.rst:49 18659a4d57d74c8c8de94ce5ec865320 +msgid "Text opacity (can be changed)" +msgstr "テキストの透明度(変更可能です)。" + +#: ../../textwriter.rst:50 4480e5d7099243dda1ca52a74d125365 +msgid ":attr:`~TextWriter.rect`" +msgstr "" + +#: ../../textwriter.rst:50 8fcf6c35ee0e40d5bc4e36c04a44edd9 +msgid "Page rectangle used by this TextWriter" +msgstr "このTextWriterが使用するページの矩形。" + +#: ../../textwriter.rst:51 9693e129190248b583296d46677610e0 +msgid ":attr:`~TextWriter.text_rect`" +msgstr "" + +#: ../../textwriter.rst:51 69eb3e220ad148129ada9f5080199b7f +msgid "Area occupied so far" +msgstr "現在までに占有された領域。" + +#: ../../textwriter.rst:55 ecbf87c3092c40e88b1686b77c83e7d2 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../textwriter.rst 03eaec6ab7cf480595ca2a22f6965c0b +#: 286e269d3d7e43ec95f9bc1551929f6f 33df58e1dd004b55aa255fcdc31b691c +#: a60948696f864a7390141db86c5ebed8 f7f0df94f8fc4e4b88f8242397bdcbbe +msgid "Parameters" +msgstr "" + +#: ../../textwriter.rst:61 bbcff21e7dce4c01a49526e7bfacc9e3 +msgid "rectangle internally used for text positioning computations." +msgstr "テキストの配置計算に内部的に使用される矩形。" + +#: ../../textwriter.rst:62 0df6eb5234844aada8962468bee150af +msgid "" +"sets the transparency for the text to store here. Values outside the " +"interval `[0, 1)` will be ignored. A value of e.g. 0.5 means 50% " +"transparency." +msgstr "ここに保存されるテキストの透明度を設定します。区間 `[0, 1)` 外の値は無視されます。例:0.5の場合、50%の透明度を意味します。" + +#: ../../textwriter.rst:63 9aba40bdb8d542fb939849ac1ea1b1df +msgid "" +"the color of the text. All colors are specified as floats *0 <= color <= " +"1*. A single float represents some gray level, a sequence implies the " +"colorspace via its length." +msgstr "" +"テキストの色。すべての色は浮動小数点数 *0 <= color <= 1* " +"として指定されます。単一の浮動小数点数は一定の灰色レベルを表し、シーケンスはその長さを介して色空間を示します。" + +#: ../../textwriter.rst:68 4a2e21d3e62e4b87aefffc34a94e609a +msgid "*Changed in v1.18.9*" +msgstr "*バージョン1.18.9で変更*" + +#: ../../textwriter.rst:69 ../../textwriter.rst:100 +#: 1044467fdfcd441f9b5bfa8e0e0e8130 1d08a5d40d214ab680cf8793d311258d +msgid "*Changed in v1.18.15*" +msgstr "*バージョン1.18.15で変更*" + +#: ../../textwriter.rst:71 3ca6ee533b09435abe5ceddf7b7cb27a +msgid "Add some new text in horizontal writing." +msgstr "水平書き込みで新しいテキストを追加します。" + +#: ../../textwriter.rst:73 ../../textwriter.rst:104 +#: ab09fda39d3e4393a29ec59b69c0e1ea f5214f793d2744af9340193c95565897 +msgid "start position of the text, the bottom left point of the first character." +msgstr "テキストの開始位置、最初の文字の左下の点です。" + +#: ../../textwriter.rst:74 4a82511a99cb44e9bf3d1a6b21723548 +msgid "" +"a string of arbitrary length. It will be written starting at position " +"\"pos\"." +msgstr "任意の長さの文字列。位置「pos」から書き始められます。" + +#: ../../textwriter.rst:75 ../../textwriter.rst:106 +#: a5950a0164694f058048e1fe83c69aea ae30e02ec0124cb28f914a306f3c4a95 +msgid "a :ref:`Font`. If omitted, `pymupdf.Font(\"helv\")` will be used." +msgstr ":ref:`Font`。省略された場合、`pymupdf.Font(\"helv\")` が使用されます。" + +#: ../../textwriter.rst:76 d9a53946e7c24a4d99d8138582744c5b +msgid "the :data:`fontsize`, a positive number, default 11." +msgstr ":data:`fontsize`、正の数、デフォルトは11です。" + +#: ../../textwriter.rst:77 ../../textwriter.rst:108 +#: 3873988af95148b7801a88eb7c01984c 4e3b9506d8504e24985ba55c6ec9282c +msgid "" +"the language to use, e.g. \"en\" for English. Meaningful values should be" +" compliant with the ISO 639 standards 1, 2, 3 or 5. Reserved for future " +"use: currently has no effect as far as we know." +msgstr "" +"使用する言語、例:英語の場合は \"en\"。意味のある値はISO " +"639規格1、2、3、または5に準拠している必要があります。将来の使用を予約しています:現在はわかる限りでは何の効果もありません。" + +#: ../../textwriter.rst:78 55d4902125eb472292eb77593b90e7e5 +msgid "" +"*(New in v1.18.9)* whether the text should be written from right to left." +" Applicable for languages like Arabian or Hebrew. Default is ``False``. " +"If ``True``, any Latin parts within the text will automatically " +"converted. There are no other consequences, i.e. " +":attr:`TextWriter.last_point` will still be the rightmost character, and " +"there neither is any alignment taking place. Hence you may want to use " +":meth:`TextWriter.fill_textbox` instead." +msgstr "" +"*(バージョン1.18.9で新機能)* テキストを右から左に書くかどうか。アラビア語やヘブライ語などの言語に適用されます。デフォルトは " +"``False`` です。``True`` " +"の場合、テキスト内のラテン語部分は自動的に変換されます。他に何の影響もないため、:attr:`TextWriter.last_point` " +"は依然として最右の文字ですし、配置も行われません。そのため、代わりに :meth:`TextWriter.fill_textbox` " +"を使用することがあります。" + +#: ../../textwriter.rst:79 f750558225c04c6da0549d0056048f3d +msgid "" +"*(New in v1.18.15)* look for the character's Small Capital version in the" +" font. If present, take that value instead. Otherwise the original " +"character (this font or the fallback font) will be taken. The fallback " +"font will never return small caps. For example, this snippet:: >>> doc =" +" pymupdf.open() >>> page = doc.new_page() >>> text = \"PyMuPDF: the " +"Python bindings for MuPDF\" >>> font = pymupdf.Font(\"figo\") # choose a" +" font with small caps >>> tw = pymupdf.TextWriter(page.rect) >>> " +"tw.append((50,100), text, font=font, small_caps=True) >>> " +"tw.write_text(page) >>> doc.ez_save(\"x.pdf\") will produce this PDF " +"text: .. image:: images/img-smallcaps.*" +msgstr "" + +#: ../../textwriter.rst:79 432cbd7ed9054416a5191bfef24de2e8 +msgid "" +"*(New in v1.18.15)* look for the character's Small Capital version in the" +" font. If present, take that value instead. Otherwise the original " +"character (this font or the fallback font) will be taken. The fallback " +"font will never return small caps. For example, this snippet::" +msgstr "" +"*(バージョン1.18.15で新機能)* " +"フォント内の文字の小文字バージョンを探します。存在する場合、その値が使用されます。それ以外の場合、元の文字(このフォントまたはフォールバックフォント)が使用されます。フォールバックフォントは小文字バージョンを返しません。たとえば、次のスニペット::" + +#: ../../textwriter.rst:90 98e21ce1902441eeba6f19061e4f5493 +msgid "will produce this PDF text:" +msgstr "これにより、このPDFテキストが生成されます:" + +#: ../../textwriter.rst 06f4faa84b0348aeaef09e3bcae38a60 +#: 21653364344e49fa8c3e3f9dd7eef9a2 998669305307407c96429cca0cb28abc +msgid "Returns" +msgstr "返り値:" + +#: ../../textwriter.rst:95 ../../textwriter.rst:111 +#: cf5848704c4f45d5863be1781e9750c6 d5ee4999d6c340078760223118e71def +msgid "" +":attr:`text_rect` and :attr:`last_point`. *(Changed in v1.18.0:)* Raises " +"an exception for an unsupported font -- checked via " +":attr:`Font.is_writable`." +msgstr "" +":attr:`text_rect` および :attr:`last_point`。 *(v1.18.0 " +"で変更)*:サポートされていないフォントの場合、:attr:`Font.is_writable` を介してチェックされた例外が発生します。" + +#: ../../textwriter.rst:102 22fef7a0a45f4133bb03c80510af77a3 +msgid "Add some new text in vertical, top-to-bottom writing." +msgstr "縦書きの新しいテキストを追加します。" + +#: ../../textwriter.rst:105 475f2776fe6649fb848f5c4af040ce52 +msgid "a string. It will be written starting at position \"pos\"." +msgstr "文字列。 \"pos\" から開始して書き込まれます。" + +#: ../../textwriter.rst:107 d3f691c3b0a649a2a4f2f3c3dc3be7b5 +msgid "the :data:`fontsize`, a positive float, default 11." +msgstr ":data:`fontsize`、正の浮動小数点数、デフォルトは 11 です。" + +#: ../../textwriter.rst:109 ../../textwriter.rst:133 +#: 6c2e96448a5a4ca1859aaa376d058339 847d5cdaa04d419ebad9cc8d314008c3 +msgid "*(New in v1.18.15)* see :meth:`append`." +msgstr "*(v1.18.15 で新しい)* 詳細は :meth:`append` を参照してください。" + +#: ../../textwriter.rst:115 47fff506b0d7424997914794a0ff7b05 +msgid "" +"Changed in 1.17.3: New parameter `pos` to specify where to start writing " +"within rectangle." +msgstr "1.17.3で変更:新しいパラメーター `pos` を追加して、矩形内での書き込みを開始する位置を指定できるようにしました。" + +#: ../../textwriter.rst:116 a9c32b09c1a1455ab9d11b281da03e13 +msgid "" +"Changed in v1.18.9: Return list of lines which do not fit in rectangle. " +"Support writing right-to-left (e.g. Arabian, Hebrew)." +msgstr "v1.18.9で変更:矩形に収まらない行のリストを返すようにしました。右から左に書くサポートを追加(アラビア語、ヘブライ語など)。" + +#: ../../textwriter.rst:117 9650fa4cd27447cb858698c792a3ab60 +msgid "Changed in v1.18.15: Prefer small caps if supported by the font." +msgstr "v1.18.15で変更:フォントがサポートしている場合、小文字のテキストを優先します。" + +#: ../../textwriter.rst:119 f11687cd59084cd996b529b2ce12e6fc +msgid "" +"Fill a given rectangle with text in horizontal writing mode. This is a " +"convenience method to use as an alternative for :meth:`append`." +msgstr "指定された矩形を水平書きモードでテキストで埋めます。これは、:meth:`append` の代替手段として使用する便利な方法です。" + +#: ../../textwriter.rst:121 4bd2adc309254e8f89c23c963d1d2c8e +msgid "the area to fill. No part of the text will appear outside of this." +msgstr "埋める領域。テキストのいずれの部分もこれの外には表示されません。" + +#: ../../textwriter.rst:122 cf21ee09a6d347079e7e38b61a116f9e +msgid "" +"the text. Can be specified as a (UTF-8) string or a list / tuple of " +"strings. A string will first be converted to a list using *splitlines()*." +" Every list item will begin on a new line (forced line breaks)." +msgstr "" +"テキスト。UTF-8文字列または文字列のリスト/タプルとして指定できます。文字列は最初に *splitlines()* " +"を使用してリストに変換されます。各リストアイテムは新しい行で始まります(強制的な改行が行われます)。" + +#: ../../textwriter.rst:123 e58f8a40089740bb8f7e99547e3a52a9 +msgid "" +"*(new in v1.17.3)* start storing at this point. Default is a point near " +"rectangle top-left." +msgstr "*(v1.17.3で新規)* このポイントから記憶を開始します。デフォルトは矩形の左上近くのポイントです。" + +#: ../../textwriter.rst:124 1bba1b52fabb4feba87ed2e1a93046e5 +msgid "the :ref:`Font`, default `pymupdf.Font(\"helv\")`." +msgstr ":ref:`Font`、デフォルトは `pymupdf.Font(\"helv\")`。" + +#: ../../textwriter.rst:125 391f5afbac8343f4a221b7935e311d0c +msgid "the :data:`fontsize`." +msgstr ":data:`fontsize`" + +#: ../../textwriter.rst:126 a627dc7fe51349409a7bb26273a7309e +msgid "" +"text alignment. Use one of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, " +"TEXT_ALIGN_RIGHT or TEXT_ALIGN_JUSTIFY." +msgstr "テキストの配置。TEXT_ALIGN_LEFT、TEXT_ALIGN_CENTER、TEXT_ALIGN_RIGHT、またはTEXT_ALIGN_JUSTIFYのいずれかを使用します。" + +#: ../../textwriter.rst:127 47a6880a61374d5b989b75ee244f39e0 +msgid "" +"*(New in v1.18.9)* whether the text should be written from right to left." +" Applicable for languages like Arabian or Hebrew. Default is ``False``. " +"If ``True``, any Latin parts are automatically reverted. You must still " +"set the alignment (if you want right alignment), it does not happen " +"automatically -- the other alignment options remain available as well." +msgstr "" +"*(v1.18.9で新規)* テキストを右から左に書くかどうか。アラビア語やヘブライ語などの言語に適用可能です。デフォルトは ``False`` " +"です。``True`` " +"の場合、ラテン文字部分は自動的に反転されます。右寄せを希望する場合、引き続き配置を設定する必要があります(他の配置オプションも利用可能)。" + +#: ../../textwriter.rst:128 338fd2cfbb9f43eebe7aac794d285947 +msgid "" +"on text overflow do nothing, warn, or raise an exception. Overflow text " +"will never be written. **Changed in v1.18.9:** * Default is ``None``. * " +"The list of overflow lines will be returned." +msgstr "" + +#: ../../textwriter.rst:128 40285c69fa6d4e098e7e7d65e3b57181 +msgid "" +"on text overflow do nothing, warn, or raise an exception. Overflow text " +"will never be written. **Changed in v1.18.9:**" +msgstr "テキストのオーバーフローがある場合、何もしない、警告する、または例外を発生させるかどうか。オーバーフローテキストは書き込まれません。**v1.18.9で変更:**" + +#: ../../textwriter.rst:130 5186e708cabe4ece8c4923a3be58faab +msgid "Default is ``None``." +msgstr "デフォルトは ``None`` です。" + +#: ../../textwriter.rst:131 c9cd6684f8c94000b9768021a448a07e +msgid "The list of overflow lines will be returned." +msgstr "オーバーフローラインのリストが返されます。" + +#: ../../textwriter.rst 02670889aed04b008cecdb3c16c81c4f +#: 7c2d0cb6d3334a7d9986dbc7d90386fd c372a91f4d4e402b9597012aa50ca7e8 +#: cccda959bb4042ddb338028cba7e741c e3365d51e8b74ffcb87857224abfc418 +#: fc009306fc80478fa00be99b10517dbf +msgid "Return type" +msgstr "" + +#: ../../textwriter.rst:136 c60d734f21084a21b312d32fad9869bd +msgid "" +"*New in v1.18.9* -- List of lines that did not fit in the rectangle. Each" +" item is a tuple `(text, length)` containing a string and its length (on " +"the page)." +msgstr "" +"*v1.18.9で新規* -矩形に収まらなかった行のリスト。各アイテムは、文字列とそのページ上の長さ(長さ)を含むタプル `(text, " +"length)` です。" + +#: ../../textwriter.rst:138 9d32a899d67a40d89e21fbfcbdcd9ed5 +msgid "" +"Use these methods as often as is required -- there is no technical limit " +"(except memory constraints of your system). You can also mix " +":meth:`append` and text boxes and have multiple of both. Text positioning" +" is exclusively controlled by the insertion point. Therefore there is no " +"need to adhere to any order. *(Changed in v1.18.0:)* Raise an exception " +"for an unsupported font -- checked via :attr:`Font.is_writable`." +msgstr "" +"これらのメソッドは必要な回数だけ使用できます。技術的な制限はありません(システムのメモリ制約を除く)。また、:meth:`append` " +"とテキストボックスを混在させ、それぞれ複数持つこともできます。テキストの配置は挿入ポイントによってのみ制御されます。したがって、特定の順序に従う必要はありません。*(v1.18.0で変更)*" +" :サポートされていないフォントに対して例外を発生させます - :attr:`Font.is_writable` をチェックします。" + +#: ../../textwriter.rst:143 9b878e0128ae4996b183a0f12430688d +msgid "" +"Write the TextWriter text to a page, which is the only mandatory " +"parameter. The other parameters can be used to temporarily override the " +"values used when the TextWriter was created." +msgstr "TextWriterのテキストをページに書き込みます。これは唯一の必須パラメータで、TextWriterが作成されたときに使用される値を一時的にオーバーライドするために、他のパラメータを使用できます。" + +#: ../../textwriter.rst:145 8a08795ab37642d68b081669bd19dbbc +msgid "write to this :ref:`Page`." +msgstr "この :ref:`Page` に書き込みます。" + +#: ../../textwriter.rst:146 ../../textwriter.rst:147 +#: 0c7ba4b4f32448bea8d4d9c48b58dd8a b66facab5aca402a89d3624b62c17c77 +msgid "override the value of the TextWriter for this output." +msgstr "この出力のためにTextWriterの値をオーバーライドします。" + +#: ../../textwriter.rst:148 5006c8e264ca46c9ae272adfcfad10e1 +msgid "" +"modify the text appearance by applying a matrix to it. If provided, this " +"must be a sequence *(fixpoint, matrix)* with a point-like *fixpoint* and " +"a matrix-like *matrix*. A typical example is rotating the text around " +"*fixpoint*." +msgstr "" +"テキストの外観を変更するために、それに行列を適用して修正します。提供される場合、これは *(fixpoint、matrix)* " +"としてシーケンスでなければなりません。point-like *fixpoint* とmatrix-like *matrix* " +"を持っています。典型的な例は、テキストを *fixpoint* の周りで回転させることです。" + +#: ../../textwriter.rst:149 96e7ac37266f44a58e745502da479403 +msgid "put in foreground (default) or background." +msgstr "前景(デフォルト)または背景に配置します。" + +#: ../../textwriter.rst:150 abbc850a979e43328c92e53ed0c00799 +msgid "*(new in v1.18.4)* the :data:`xref` of an :data:`OCG` or :data:`OCMD`." +msgstr "*(v1.18.4で新規)* :data:`OCG` または :data:`OCMD` の :data:`xref`。" + +#: ../../textwriter.rst:151 ce3186c3205140019f041e35d658b0da +msgid "" +"The PDF `Tr` operator value. Values: 0 (default), 1, 2, 3 (invisible). " +".. image:: images/img-rendermode.*" +msgstr "" + +#: ../../textwriter.rst:151 bdf45cf0c031489788a8ef24b687ca01 +msgid "The PDF `Tr` operator value. Values: 0 (default), 1, 2, 3 (invisible)." +msgstr "PDF `Tr` 演算子の値。値:0(デフォルト)、1、2、3(不可視)。" + +#: ../../textwriter.rst:158 924bce3b23354b8db175a1030136ee4f +msgid "The area currently occupied." +msgstr "現在占有されている領域。" + +#: ../../textwriter.rst:160 ../../textwriter.rst:184 +#: 597dadf1263f4100aa94f7e6fd0cac59 62859bc0002e476b909fd3314760c3c1 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../textwriter.rst:164 ebffb6e923b542319c374d9a3191c930 +msgid "" +"The \"cursor position\" -- a :ref:`Point` -- after the last written " +"character (its bottom-right)." +msgstr "最後に書かれた文字の後にある「カーソル位置」 - :ref:`Point` 形式。" + +#: ../../textwriter.rst:166 e627c94d443c4b15939dbb1da1fca6d0 +msgid ":ref:`Point`" +msgstr "" + +#: ../../textwriter.rst:170 638ab80764a4433eb9061f7de24a2339 +msgid "The text opacity (modifiable)." +msgstr "テキストの不透明度(変更可能)。" + +#: ../../textwriter.rst:176 fa5d197bd0d0412898c56c89dab163c3 +msgid "The text color (modifiable)." +msgstr "テキストの色(変更可能)。" + +#: ../../textwriter.rst:182 b8f24fda0c48472ea9bd3b6b36e082bb +msgid "" +"The page rectangle for which this TextWriter was created. Must not be " +"modified." +msgstr "このTextWriterが作成されたページの長方形。変更しないでください。" + +#: ../../textwriter.rst:187 f3460a6193964ea2b7e5342dbc10d518 +msgid "" +"To see some demo scripts dealing with TextWriter, have a look at `this " +"`_ " +"repository." +msgstr "" +"TextWriterを扱うデモスクリプトを見るには、`この `_ リポジトリをご覧ください。" + +#: ../../textwriter.rst:189 cbb1eaac8ab84ee0a53d167e003577b8 +msgid "Opacity and color apply to **all the text** in this object." +msgstr "不透明度と色は、このオブジェクト内の **すべてのテキスト** に適用されます。" + +#: ../../textwriter.rst:190 6972b609e1cd4b0c957644b0b952f6fc +msgid "" +"If you need different colors / transparency, you must create a separate " +"TextWriter. Whenever you determine the color should change, simply append" +" the text to the respective TextWriter using the previously returned " +":attr:`last_point` as position for the new text span." +msgstr "" +"異なる色/透明度が必要な場合、別々のTextWriterを作成する必要があります。色が変更されるべきと判断した場合、新しいテキストスパンの位置として前回返された" +" :attr:`last_point` を使用して、対応するTextWriterにテキストを追加するだけです。" + +#: ../../textwriter.rst:191 e58f72984d7449c0a879797f11ece31c +msgid "" +"Appending items or text boxes can occur in arbitrary order: only the " +"position parameter controls where text appears." +msgstr "アイテムまたはテキストボックスの追加は任意の順序で発生できます:テキストが表示される位置を制御するのは、位置パラメータのみです。" + +#: ../../textwriter.rst:192 6b3ca0a67cd044c7b212f261ae0e9c6a +msgid "" +"Font and :data:`fontsize` can freely vary within the same TextWriter. " +"This can be used to let text with different properties appear on the same" +" displayed line: just specify *pos* accordingly, and e.g. set it to " +":attr:`last_point` of the previously added item." +msgstr "" +"同じTextWriter内でフォントと :data:`fontsize` " +"を自由に変更できます。これは、異なるプロパティを持つテキストが同じ表示される行に表示されるようにするために使用できます。posを適切に指定し、前に追加したアイテムの" +" :attr:`last_point` に設定してください。" + +#: ../../textwriter.rst:193 29bd43da60cf48dabb0a63c140cd02b6 +msgid "" +"You can use the *pos* argument of :meth:`TextWriter.fill_textbox` to set " +"the position of the first text character. This allows filling the same " +"textbox with contents from different :ref:`TextWriter` objects, thus " +"allowing for multiple colors, opacities, etc." +msgstr "" +":meth:`TextWriter.fill_textbox` の *pos* " +"引数を使用して、最初のテキスト文字の位置を設定できます。これにより、異なる :ref:`TextWriter` " +"オブジェクトからのコンテンツを含む同じテキストボックスを埋めることができ、複数の色、不透明度などが可能になります。" + +#: ../../textwriter.rst:194 4657dfd571bd4f3bbc5bb5d038797d58 +msgid "" +"MuPDF does not support all fonts with this feature, e.g. no Type3 fonts. " +"Starting with v1.18.0 this can be checked via the font attribute " +":attr:`Font.is_writable`. This attribute is also checked when using " +":ref:`TextWriter` methods." +msgstr "" +"MuPDFは、この機能を持つすべてのフォントをサポートしているわけではありません。たとえば、Type3フォントはサポートされていません。v1.18.0以降、:attr:`Font.is_writable`" +" 属性を使用して、これを確認できます。この属性は、:ref:`TextWriter` メソッドを使用する際にもチェックされます。" + +#: ../../footer.rst:60 68cab4f97fcf4503932903f84bb59645 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "" +#~ "**Free font choice:** Each text span " +#~ "has its own font and fontsize. " +#~ "This lets you easily switch when " +#~ "composing a larger text." +#~ msgstr "" + +#~ msgid "the fontsize, a positive number, default 11." +#~ msgstr "" + +#~ msgid "the fontsize, a positive float, default 11." +#~ msgstr "" + +#~ msgid "the fontsize." +#~ msgstr "" + +#~ msgid "" +#~ "Font and fontsize can freely vary " +#~ "within the same TextWriter. This can " +#~ "be used to let text with different" +#~ " properties appear on the same " +#~ "displayed line: just specify *pos* " +#~ "accordingly, and e.g. set it to " +#~ ":attr:`last_point` of the previously added " +#~ "item." +#~ msgstr "" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/the-basics.mo b/docs/locales/ja/LC_MESSAGES/the-basics.mo new file mode 100644 index 000000000..c6fedcd25 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/the-basics.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/the-basics.po b/docs/locales/ja/LC_MESSAGES/the-basics.po new file mode 100644 index 000000000..8b0ffa358 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/the-basics.po @@ -0,0 +1,952 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 ebd72ea23c554adea55b860ac32cfd10 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 d808fab239944edbb4a07604487c3d4d +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 32d34325b50d49718d4ae087152d121b +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../the-basics.rst:8 3ef401bbe47d4d928056b1ad88406df4 +msgid "The Basics" +msgstr "基本" + +#: ../../the-basics.rst:13 7ad7d2aeeb0949939d1edf110e9cde48 +msgid "Opening a File" +msgstr "ファイルを開く" + +#: ../../the-basics.rst:16 b3977994add44c128857ebe6778dbce8 +msgid "To open a file, do the following:" +msgstr "ファイルを開くには、以下の手順を実行してください:" + +#: ../../the-basics.rst:27 ../../the-basics.rst:58 ../../the-basics.rst:119 +#: ../../the-basics.rst:149 ../../the-basics.rst:199 ../../the-basics.rst:249 +#: ../../the-basics.rst:287 ../../the-basics.rst:377 ../../the-basics.rst:416 +#: ../../the-basics.rst:467 ../../the-basics.rst:553 ../../the-basics.rst:638 +#: ../../the-basics.rst:694 ../../the-basics.rst:886 ../../the-basics.rst:1070 +#: 2739866db7be4d6fbd854a42404c20bb 279266de676f42ee8ca372b72f1b50d3 +#: 42d583c1c82e4c1c9c61e4221d22858b 4bbc83d3c8014f94bca9c10402d0772f +#: 6eb5cd518be24829a8fea50dc936924e 6f01de37c57b45dd826d3e9df5dd3352 +#: 72e59350df88431bb0d56787d262e6f3 76f22999a4234db0a736d9b66557dd9a +#: 9aa232f9bff64aec84bc0061ae859fde a56f4151e39f4227823677221a30af73 +#: cb848559a7ed4bceb96742c75b6a8a26 d2d7c31ddd6a4481afad882faa434f0c +#: d8991f84c3e6479fa2c6050dd8b27e28 e70c606f8abd4024a11d97fb8f7cd006 +#: fabe4bd1a2d042da9318fbae362a32ad +msgid "**Taking it further**" +msgstr "**さらに進む**" + +#: ../../the-basics.rst:29 1ec0b9c37a314b2cb8dde60de149c32e +msgid "" +"See the :ref:`list of supported file types` and " +":ref:`The How to Guide on Opening Files ` for more " +"advanced options." +msgstr "" +"より高度なオプションについては、:ref:`サポートされている ` と " +":ref:`ファイルを開く方法ガイド` のリストを参照してください。" + +#: ../../the-basics.rst:38 cb07704fb73043b2827ddfe5306bb8eb +msgid "Extract text from a |PDF|" +msgstr "|PDF| からテキストを抽出する" + +#: ../../the-basics.rst:40 63223b54f3cc495aa83fddc584a650c9 +msgid "To extract all the text from a |PDF| file, do the following:" +msgstr "|PDF| ファイルからすべてのテキストを抽出するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:54 82cde87ae4a8477cb30daea5d72f5f54 +msgid "" +"Of course it is not just |PDF| which can have text extracted - all the " +":ref:`supported document file formats ` such as " +":title:`MOBI`, :title:`EPUB`, :title:`TXT` can have their text extracted." +msgstr "" +"もちろん、PDFだけでなく、MOBI、EPUB、TXTなどの :ref:`サポートされているドキュメントファイル形式 " +"` でもテキストを抽出できます。" + +#: ../../the-basics.rst:60 68a8a1314b8a474a9636d7ff5ebf2245 +msgid "" +"If your document contains image based text content the use OCR on the " +"page for subsequent text extraction:" +msgstr "もし文書に画像ベースのテキストコンテンツが含まれている場合は、後続のテキスト抽出のためにそのページでOCRを使用してください。" + +#: ../../the-basics.rst:67 ../../the-basics.rst:121 +#: 009d24e7339b46f7afe03c6afa62fbd2 b502b55793844906a001df204caff15f +msgid "" +"There are many more examples which explain how to extract text from " +"specific areas or how to extract tables from documents. Please refer to " +"the :ref:`How to Guide for Text`." +msgstr "特定の領域からテキストを抽出する方法や、文書から表を抽出する方法など、さらに多くの例があります。テキストの方法ガイドを参照してください。" + +#: ../../the-basics.rst:69 58a9cf1efe6a40b5ada05038e2d55412 +msgid "" +"You can now also :ref:`extract text in Markdown " +"format`." +msgstr ":ref:`Markdown形式でテキストを抽出 ` することもできます。" + +#: ../../the-basics.rst:71 ../../the-basics.rst:123 ../../the-basics.rst:153 +#: ../../the-basics.rst:208 ../../the-basics.rst:255 ../../the-basics.rst:291 +#: ../../the-basics.rst:318 ../../the-basics.rst:343 ../../the-basics.rst:383 +#: ../../the-basics.rst:420 ../../the-basics.rst:474 ../../the-basics.rst:500 +#: ../../the-basics.rst:527 ../../the-basics.rst:608 ../../the-basics.rst:659 +#: ../../the-basics.rst:698 ../../the-basics.rst:762 ../../the-basics.rst:821 +#: ../../the-basics.rst:898 ../../the-basics.rst:928 ../../the-basics.rst:964 +#: ../../the-basics.rst:990 ../../the-basics.rst:1074 +#: 06196b3b11364bd8ba72a68806270fbc 28f136db73ea4ff8b27a322e19a05299 +#: 2a2f674929bc4956b9fa645ec55231da 2a42d66f67ea469da857b3aefcefb111 +#: 34dfbca2e1ef487085d769c401e479da 4b5819a0804e42669c6c95bd78540195 +#: 57551550d33b446da752ec01b2d38be1 58193e34f85e4f3286969bb715d2ed1b +#: 758afc8b33924ed7afa93712789855ff 7c7a34a487ba49719ca245b940618040 +#: 926638700d944932adaab0f81e219328 99f090981eab4af2adb7c096da2ae2e3 +#: 9f4be351ab174a7fbfd5426ee671e7f8 ac69cd30aed24f97aa9c7c5eff88d4b3 +#: adf8863e8d274a1e9e4cd774f4b6c024 b6dfec45d83b42e68fc997fcb0213e92 +#: c34132ed22984098ac5b8bd1e9a4806b c8f4c6c1d0ee4228977d9294144fbc0e +#: cb4e3c70daa14279926090c987c7a950 e547959996fc49f483ba7574079782d7 +#: f8047ddd3ad0454c8c7eb0d0258590f1 fca50bef410a492eb5e9db5cd98453f0 +#: fd039b9c5e9e49b2a72044dabcf4d52c +msgid "**API reference**" +msgstr "**APIリファレンス**" + +#: ../../the-basics.rst:73 099a4007de224f26813b7e0c6086aba5 +msgid ":meth:`Page.get_text`" +msgstr ":meth:`Page.get_text`" + +#: ../../the-basics.rst:85 3478a0ef399a4f90bdf04e54b4b848f8 +msgid "Extract images from a |PDF|" +msgstr "|PDF| から画像を抽出する" + +#: ../../the-basics.rst:87 2c137c284bae4ff3bd22402af36ebe06 +msgid "To extract all the images from a |PDF| file, do the following:" +msgstr "|PDF| すべての画像を抽出するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:125 58022a36615f4658a70db9b3d3cb08c0 +msgid ":meth:`Page.get_images`" +msgstr "" + +#: ../../the-basics.rst:126 887f2573340f469a8f4bb2e328eaf5a3 +msgid ":ref:`Pixmap`" +msgstr "" + +#: ../../the-basics.rst:133 734558bdb8694762a5d412b1e2faa7e9 +msgid "Extract vector graphics" +msgstr "ベクトルグラフィックスを抽出" + +#: ../../the-basics.rst:135 0377de0bb75a443c8d1266b4d5b21c9a +msgid "To extract all the vector graphics from a document page, do the following:" +msgstr "ドキュメントのページからすべてのベクトルグラフィックスを抽出するには、以下の手順に従います:" + +#: ../../the-basics.rst:145 495a59882911438aae6c87d4c09cc096 +msgid "" +"This will return a dictionary of paths for any vector drawings found on " +"the page." +msgstr "これにより、ページ上で見つかったすべてのベクター図形のパスの辞書が返されます。" + +#: ../../the-basics.rst:151 edb401df140347688dfb686967959746 +msgid "" +"Please refer to: :ref:`How to Extract " +"Drawings`." +msgstr "" +"次のページを参照してください: :ref:`「描画の抽出方法」 " +"`。" + +#: ../../the-basics.rst:155 99278f5d80fa49a1b1b55bb0d070fe0a +#, fuzzy +msgid ":meth:`Page.get_drawings`" +msgstr ":meth:`Page.get_text`" + +#: ../../the-basics.rst:166 3f2a04b771b4465fba3841d4145c94eb +msgid "Merging |PDF| files" +msgstr "|PDF| ファイルの結合" + +#: ../../the-basics.rst:168 00fbed70866b44078ae8b3f72592c767 +msgid "To merge |PDF| files, do the following:" +msgstr "|PDF| ファイルを結合するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:182 2c3a5d3d3d264086993c70ed0f9610ad +msgid "Merging |PDF| files with other types of file" +msgstr "|PDF| ァイルと他の種類のファイルを結合する" + +#: ../../the-basics.rst:184 2a56bc4e77b14621bdc02e63f9456750 +msgid "" +"With :meth:`Document.insert_file` you can invoke the method to merge " +":ref:`supported files` with |PDF|. For example:" +msgstr "" +":meth:`Document.insert_file` " +"を使用すると、サポートされているファイルをPDFと結合するメソッドを呼び出すことができます。例えば:" + +#: ../../the-basics.rst:201 f2e00ac506914ac4b90cf6ba9b933452 +#, fuzzy +msgid "" +"It is easy to join PDFs with :meth:`Document.insert_pdf` & " +":meth:`Document.insert_file`. Given open |PDF| documents, you can copy " +"page ranges from one to the other. You can select the point where the " +"copied pages should be placed, you can revert the page sequence and also " +"change page rotation." +msgstr "" +":meth:`Document.insert_pdf` と :meth:`Document.insert_file` " +"を使用すれば、PDFを簡単に結合できます。開かれたPDF文書がある場合、片方のPDFから別のPDFへページの範囲をコピーすることができます。コピーされたページが配置される位置を選択することができ、ページの順序を元に戻すこともできます。さらに、ページの回転も変更することができます。詳細な説明はこの" +" `Wiki `_ の記事に記載されています。" + +#: ../../the-basics.rst:203 097d528e7b89483887fe9b103286246d +msgid "" +"The GUI script `join.py `_ uses this method" +" to join a list of files while also joining the respective table of " +"contents segments. It looks like this:" +msgstr "" +"GUIスクリプト `join.py `_ " +"では、この方法を使用してファイルのリストを結合し、それぞれの目次セグメントも結合します。スクリプトの見た目は以下のようになっています:" + +#: ../../the-basics.rst:210 1fbf6ae246834c7daa68bb87e30c563d +msgid ":meth:`Document.insert_pdf`" +msgstr "" + +#: ../../the-basics.rst:211 eb458481793c4cb1b1e232362c454d05 +msgid ":meth:`Document.insert_file`" +msgstr "" + +#: ../../the-basics.rst:218 b75e34a3e76543769aa5c7b08099b88e +msgid "Working with Coordinates" +msgstr "座標を扱う" + +#: ../../the-basics.rst:220 ba6915f5f3614cfc866c5d6a96dd7561 +msgid "" +"There is one *mathematical term* that you should feel comfortable with " +"when using |PyMuPDF| - **\"coordinates\"**. Please have a quick look at " +"the :ref:`Coordinates` section to understand the coordinate system to " +"help you with positioning objects and understand your document space." +msgstr "" +"|PyMuPDF| を使用する際に快適に感じるべき *数学用語* が1つあります - **「座標」** " +"です。オブジェクトの配置や文書空間の理解に役立つ座標系を理解するために、:ref:`Coordinates` セクションを簡単にご覧ください。" + +#: ../../the-basics.rst:229 1a7cee38451a4ac49f536db1c8867f23 +msgid "Adding a watermark to a |PDF|" +msgstr "|PDF| ファイルにウォータマークを追加する方法" + +#: ../../the-basics.rst:231 7a831a320dcb4d03b66a33b5656a1592 +msgid "To add a watermark to a |PDF| file, do the following:" +msgstr "|PDF| ファイルにウォータマークを追加するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:251 32163f8855f94b3da489e46235d0f84f +msgid "" +"Adding watermarks is essentially as simple as adding an image at the base" +" of each |PDF| page. You should ensure that the image has the required " +"opacity and aspect ratio to make it look the way you need it to." +msgstr "ウォータマークを追加することは、基本的には各PDFページの底辺に画像を追加するだけです。画像が必要な透明度とアスペクト比を持っていることを確認して、必要な見た目になるようにします。" + +#: ../../the-basics.rst:253 8b5f52f2d540405f8e19f3ff6efd6007 +msgid "" +"In the example above a new image is created from each file reference, but" +" to be more performant (by saving memory and file size) this image data " +"should be referenced only once - see the code example and explanation on " +":meth:`Page.insert_image` for the implementation." +msgstr "" +"前述の例では、各ファイル参照から新しい画像が作成されていますが、メモリとファイルサイズを節約するために、この画像データは1回だけ参照されるようにすると、パフォーマンスが向上します" +" - Page.insert_image()のコード例と説明を参照してください。" + +#: ../../the-basics.rst:257 fcfe127ad9414e8a9a9f6fe506eca749 +msgid ":meth:`Page.bound`" +msgstr "" + +#: ../../the-basics.rst:258 ../../the-basics.rst:294 +#: 2122203304f24c7e86364c4e44d8c515 5bd4a37b290b484ab12ffef1d743e039 +msgid ":meth:`Page.insert_image`" +msgstr "" + +#: ../../the-basics.rst:267 74156cf07c4c44abb86f01275234c22c +msgid "Adding an image to a |PDF|" +msgstr "|PDF| に画像を追加する" + +#: ../../the-basics.rst:269 e4ad47cff3e149b09cfcee436ce71ec0 +msgid "To add an image to a |PDF| file, for example a logo, do the following:" +msgstr "PDFファイルに画像を追加するためには、例えばロゴを追加する場合、以下の手順を実行してください:" + +#: ../../the-basics.rst:289 ef303cbd63ea4d6cae44d744a77253f8 +msgid "" +"As with the watermark example you should ensure to be more performant by " +"only referencing the image once if possible - see the code example and " +"explanation on :meth:`Page.insert_image`." +msgstr "" +"ウォータマークの例と同様に、できる限り画像を1回だけ参照することで、より効率的な処理を行うようにしてください。詳細は " +":meth:`Page.insert_image` のコード例と説明を参照してください。" + +#: ../../the-basics.rst:293 0a5e74d57a334f90b301d4f7266ae6a7 +msgid ":ref:`Rect`" +msgstr "" + +#: ../../the-basics.rst:303 97f7c41b4fcc40d297f51e89344ec00e +msgid "Rotating a |PDF|" +msgstr "|PDF| を回転させる" + +#: ../../the-basics.rst:305 a6bb30db140947ee94661859e458334a +msgid "To add a rotation to a page, do the following:" +msgstr "ページに回転を追加するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:320 5b2ac1a6f5284ffdbcf63f04a5bb87e3 +msgid ":meth:`Page.set_rotation`" +msgstr "" + +#: ../../the-basics.rst:328 5d0f43e3da814e9195c2500beb9b2aae +msgid "Cropping a |PDF|" +msgstr "|PDF| のトリミング" + +#: ../../the-basics.rst:330 ebcd5ad854a344fb8fdb76e0f157f822 +msgid "To crop a page to a defined :ref:`Rect`, do the following:" +msgstr "定義された矩形 :ref:`Rect` にページをトリミングするには、以下の手順を実行してください:" + +#: ../../the-basics.rst:345 10f26d2a0e9f428c877fe76eca3149b4 +msgid ":meth:`Page.set_cropbox`" +msgstr "" + +#: ../../the-basics.rst:354 37f2015ccbe843179609bdd2ce54ce25 +msgid ":index:`Attaching Files `" +msgstr ":index:`ファイルの添付 `" + +#: ../../the-basics.rst:356 73fa4a36d3114266a5b8c37898cdd520 +msgid "To attach another file to a page, do the following:" +msgstr "別のファイルをページに添付するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:379 fef41696e0104638bfa7e9e46f94d5da +msgid "" +"When adding the file with :meth:`Page.add_file_annot` note that the third" +" parameter for the `filename` should include the actual file extension. " +"Without this the attachment possibly will not be able to be recognized as" +" being something which can be opened. For example, if the `filename` is " +"just *\"attachment\"* when view the resulting PDF and attempting to open " +"the attachment you may well get an error. However, with " +"*\"attachment.pdf\"* this can be recognized and opened by PDF viewers as " +"a valid file type." +msgstr "" +":meth:`Page.add_file_annot` " +"でファイルを追加する際には、ファイル名を指定する第三引数には実際のファイルの拡張子を含める必要があります。これがないと、添付ファイルが開けるものとして認識されない可能性があります。例えば、ファイル名が単に「attachment」とだけ指定されている場合、生成されたPDFを見て添付ファイルを開こうとするとエラーが発生するかもしれません。しかし、「attachment.pdf」と指定されている場合、PDFビューアーで有効なファイルタイプとして認識され、開くことができます。" + +#: ../../the-basics.rst:381 2eb971ca49044acfb758808877174159 +msgid "" +"The default icon for the attachment is by default a \"push pin\", however" +" you can change this by setting the `icon` parameter." +msgstr "添付ファイルのデフォルトアイコンは「押しピン」ですが、 `icon` パラメータを設定することでこれを変更することができます。" + +#: ../../the-basics.rst:385 472cb94f410f45d389386b479f752c7d +msgid ":ref:`Point`" +msgstr "" + +#: ../../the-basics.rst:386 ../../the-basics.rst:422 +#: bc83ca635d004bbea323fc12889c3475 e3b71ff6dabe4e7092a149500b391650 +msgid ":meth:`Document.tobytes`" +msgstr "" + +#: ../../the-basics.rst:387 861dcb9381e943f982a5ff2233584d56 +msgid ":meth:`Page.add_file_annot`" +msgstr "" + +#: ../../the-basics.rst:396 36ca0a3374d84c2d914b97f4600f49bc +msgid ":index:`Embedding Files `" +msgstr ":index:`ファイルを埋め込む `" + +#: ../../the-basics.rst:398 3d3da8f629564876915a4356adb5a4c5 +msgid "To embed a file to a document, do the following:" +msgstr "ファイルを文書に埋め込むには、以下の手順を実行してください:" + +#: ../../the-basics.rst:418 522ee31eaa5c4be29e943f9e031733ce +msgid "" +"As with :ref:`attaching files`, when adding " +"the file with :meth:`Document.embfile_add` note that the first parameter " +"for the `filename` should include the actual file extension." +msgstr "" +"ファイルを添付する場合と同様に、 :meth:`Document.embfile_add` " +"でファイルを追加する際には、ファイル名を指定する第一引数には実際のファイルの拡張子を含める必要があります" + +#: ../../the-basics.rst:423 d105ed0d5499461a9a468719a7df3543 +msgid ":meth:`Document.embfile_add`" +msgstr "" + +#: ../../the-basics.rst:433 2182d5b6f29e40d39aab4f677fedfa2b +msgid "Deleting Pages" +msgstr "ページを削除する" + +#: ../../the-basics.rst:435 634d1c5913ad422281859654914d1268 +msgid "To delete a page from a document, do the following:" +msgstr "文書からページを削除するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:445 437764ad7c2747118b3feb3fa4f1367b +msgid "To delete a multiple pages from a document, do the following:" +msgstr "複数のページを文書から削除するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:459 7ffda49506894b2ca02ad9e6bc7e975a +msgid "What happens if I delete a page referred to by bookmarks or hyperlinks?" +msgstr "ブックマークやハイパーリンクで参照されているページを削除した場合、どうなりますか?" + +#: ../../the-basics.rst:461 9ccef7fc01e34b1f890333f741269c36 +msgid "" +"A bookmark (entry in the Table of Contents) will become inactive and will" +" no longer navigate to any page." +msgstr "ブックマーク(目次のエントリー)は無効になり、もはやどのページにもナビゲートしません" + +#: ../../the-basics.rst:463 2db9a64894ac4cc5926029b386cc8dbc +msgid "" +"A hyperlink will be removed from the page that contains it. The visible " +"content on that page will not otherwise be changed in any way." +msgstr "ハイパーリンクは、それを含むページから削除されます。そのページの可視コンテンツは、他の方法で変更されることはありません。" + +#: ../../the-basics.rst:469 a64aa50abd5848b8a15b0f27de5f2595 +msgid "" +"The page index is zero-based, so to delete page 10 of a document you " +"would do the following `doc.delete_page(9)`." +msgstr "ページのインデックスは0から始まるため、文書の10ページ目を削除するには、以下のようにします: `doc.delete_page(9)`。" + +#: ../../the-basics.rst:471 fb3cae6bf2b54a59b81ff59a5bb7563e +msgid "" +"Similarly, `doc.delete_pages(from_page=9, to_page=14)` will delete pages " +"10 - 15 inclusive." +msgstr "" +"同様に、 `doc.delete_pages(from_page=9, to_page=14)` " +"は、ページ10からページ15までを含む範囲のページを削除します。" + +#: ../../the-basics.rst:476 ed756abf6b3e413c9ffbb5a6caf6b88b +msgid ":meth:`Document.delete_page`" +msgstr "" + +#: ../../the-basics.rst:477 65198e829eef4bb381f47ddf30cc7e4f +msgid ":meth:`Document.delete_pages`" +msgstr "" + +#: ../../the-basics.rst:485 7bbd41be5ecb4d2e9c76a3a244cca14d +msgid "Re-Arranging Pages" +msgstr "ページを再配置する" + +#: ../../the-basics.rst:487 a1f98aa6a7714dab99cef5e054cfb3bd +msgid "To change the sequence of pages, i.e. re-arrange pages, do the following:" +msgstr "ページを再配置するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:502 f179f7f7f5794947ac4541278ba55b6e +msgid ":meth:`Document.move_page`" +msgstr "" + +#: ../../the-basics.rst:511 d21575e2370d4ad4ab02f2ab07dd150c +msgid "Copying Pages" +msgstr "ページをコピーする" + +#: ../../the-basics.rst:514 57a67ecb95784254803c922f7d08f6c6 +msgid "To copy pages, do the following:" +msgstr "ページをコピーするには、以下の手順を実行してください:" + +#: ../../the-basics.rst:529 5a496f287a5b41fbb7d2b54cb5c697c8 +msgid ":meth:`Document.copy_page`" +msgstr "" + +#: ../../the-basics.rst:537 892cc58284e0408d8b96e992e24544cb +msgid "Selecting Pages" +msgstr "ページを選択する" + +#: ../../the-basics.rst:540 3db8f868455c4422a22a8739d7651725 +msgid "To select pages, do the following:" +msgstr "ページを選択するには、以下の手順を実行してください:" + +#: ../../the-basics.rst:555 09491c46d0124346a4620dc928ac2a9e +msgid "" +"With |PyMuPDF| you have all options to copy, move, delete or re-arrange " +"the pages of a |PDF|. Intuitive methods exist that allow you to do this " +"on a page-by-page level, like the :meth:`Document.copy_page` method." +msgstr "" +"|PyMuPDF| を使用すると、 |PDF| のページをコピー、移動、削除、または再配置するためのオプションがすべて揃っています。 " +":meth:`Document.copy_page` メソッドのように、ページ単位で直感的なメソッドを使用してこれらの操作を行うことができます。" + +#: ../../the-basics.rst:557 f38fd21a8beb4136bb4eda2f7ed52873 +msgid "" +"Or you alternatively prepare a complete new page layout in form of a " +":title:`Python` sequence, that contains the page numbers you want, in the" +" sequence you want, and as many times as you want each page. The " +"following may illustrate what can be done with :meth:`Document.select`" +msgstr "" +"または、 :title:`Python` " +"のシーケンスとして完全な新しいページレイアウトを準備し、希望するページ番号を希望する順序で含め、必要なページ数だけ繰り返すこともできます。次の例は、" +" :meth:`Document.select` を使用した可能性を示しています。" + +#: ../../the-basics.rst:564 88e421ecb4fc4d56adaf214dccae4daa +msgid "" +"Now let's prepare a PDF for double-sided printing (on a printer not " +"directly supporting this):" +msgstr "以下のように、両面印刷用にPDFを準備しましょう(直接これをサポートしていないプリンターで):" + +#: ../../the-basics.rst:566 74d3cda3847c46b7af864e6bc7c19cf8 +msgid "" +"The number of pages is given by `len(doc)` (equal to `doc.page_count`). " +"The following lists represent the even and the odd page numbers, " +"respectively:" +msgstr "" +"ページ数は `len(doc)`( `doc.page_count` " +"と同じ)で与えられます。以下のリストは、それぞれ偶数ページと奇数ページの番号を表しています:" + +#: ../../the-basics.rst:573 2e020acac7f14b078d00def4cebc4fec +msgid "" +"This snippet creates the respective sub documents which can then be used " +"to print the document:" +msgstr "このスニペットは、それぞれのサブドキュメントを作成し、それらを使用してドキュメントを印刷することができます:" + +#: ../../the-basics.rst:585 a77fc70595bb4766ad8a0d5ffca2b8cf +msgid "" +"For more information also have a look at this Wiki `article " +"`_." +msgstr "" +"詳細については、この `ウィキの記事 `_ もご覧ください。" + +#: ../../the-basics.rst:588 6c87cc6c645e4408a02889aca746f748 +msgid "" +"The following example will reverse the order of all pages (**extremely " +"fast:** sub-second time for the 756 pages of the :ref:`AdobeManual`):" +msgstr "" +"次の例は、すべてのページの順序を逆にするものです(非常に高速: :ref:`AdobeManual` " +"リファレンスの756ページをサブセカンドの時間で処理します)。" + +#: ../../the-basics.rst:598 b9eda6c35a9b4b569375594839df0a97 +msgid "" +"This snippet duplicates the PDF with itself so that it will contain the " +"pages *0, 1, ..., n, 0, 1, ..., n* **(extremely fast and without " +"noticeably increasing the file size!)**:" +msgstr "このスニペットは、PDFを自身と重複させることで、ページ0、1、…、n、0、1、…、nを含むようにします(非常に高速で、ファイルサイズをほとんど増やさずに実現します!):" + +#: ../../the-basics.rst:610 db170cb4731c4d9fb983242366f6e186 +msgid ":meth:`Document.select`" +msgstr "" + +#: ../../the-basics.rst:621 11bf1fc8a6144c888028b25e240c5514 +msgid "Adding Blank Pages" +msgstr "空白のページを追加する" + +#: ../../the-basics.rst:623 e6ba26e2fe264930b67c0e9ff511017c +msgid "To add a blank page, do the following:" +msgstr "空白のページを追加するには、以下の手順を行います:" + +#: ../../the-basics.rst:640 c73c566766104c198225c46f3fa4b332 +msgid "Use this to create the page with another pre-defined paper format:" +msgstr "以下の方法を使用して、別の事前定義された用紙フォーマットを使ってページを作成します:" + +#: ../../the-basics.rst:648 fa99380598a74f60a839a06cb2b9f706 +msgid "" +"The convenience function :meth:`paper_size` knows over 40 industry " +"standard paper formats to choose from. To see them, inspect dictionary " +":attr:`paperSizes`. Pass the desired dictionary key to :meth:`paper_size`" +" to retrieve the paper dimensions. Upper and lower case is supported. If " +"you append \"-L\" to the format name, the landscape version is returned." +msgstr "" +"便利な関数 :meth:`paper_size` は、40以上の業界標準の用紙フォーマットを選択できます。それらを確認するには、辞書 " +"attr:`paperSizes` を調べてください。用紙の寸法を取得するために、希望する辞書キーを :meth:`paper_size` " +"に渡します。大文字と小文字の両方がサポートされています。フォーマット名に「-L」を追加すると、横長バージョンが返されます。" + +#: ../../the-basics.rst:650 7bb204302fe64b9c8102ce9b60e306f1 +msgid "" +"Here is a 3-liner that creates a |PDF|: with one empty page. Its file " +"size is 460 bytes:" +msgstr "以下は、1ページの空白の |PDF|: を作成するための3行のコードです。ファイルサイズは460バイトです。" + +#: ../../the-basics.rst:661 16708fa927574853b07cfbe391fa574a +msgid ":meth:`Document.new_page`" +msgstr "" + +#: ../../the-basics.rst:662 555dd6a7a4ca428ca05ffe585eb7dd98 +msgid ":attr:`paperSizes`" +msgstr "" + +#: ../../the-basics.rst:671 0c9d0f6b0af141bc92c8094e7d5d0fee +msgid "Inserting Pages with Text Content" +msgstr "ページにテキストコンテンツを挿入する" + +#: ../../the-basics.rst:673 8801a769c6764d358d2334b4dd80e5d6 +msgid "" +"Using the :meth:`Document.insert_page` method also inserts a new page and" +" accepts the same `width` and `height` parameters. But it lets you also " +"insert arbitrary text into the new page and returns the number of " +"inserted lines." +msgstr "" +":meth:`Document.insert_page` " +"メソッドを使用すると、新しいページが挿入され、同じ幅と高さのパラメーターが受け入れられます。しかし、このメソッドでは新しいページに任意のテキストを挿入することができ、挿入された行数が返されます。" + +#: ../../the-basics.rst:696 1f709a1e688c4c64ad9835a43df6b7e5 +msgid "" +"The text parameter can be a (sequence of) string (assuming UTF-8 " +"encoding). Insertion will start at :ref:`Point` (50, 72), which is one " +"inch below top of page and 50 points from the left. The number of " +"inserted text lines is returned." +msgstr "" +"テキストパラメーターは、UTF-8エンコーディングを想定している(シーケンスの)文字列です。挿入はページの上端から1インチ下であるポイント " +":ref:`Point` (50, 72) から始まり、左から50ポイントの位置です。挿入されたテキストの行数が返されます。" + +#: ../../the-basics.rst:700 5503e405b8a74c46ac510d142a942f92 +msgid ":meth:`Document.insert_page`" +msgstr "" + +#: ../../the-basics.rst:711 28cd42d51f99409a89dd644e5a331e95 +msgid "Splitting Single Pages" +msgstr "単一のページを分割する" + +#: ../../the-basics.rst:713 7caccc9bbcaa4fa2941b493e69be1447 +msgid "" +"This deals with splitting up pages of a |PDF| in arbitrary pieces. For " +"example, you may have a |PDF| with *Letter* format pages which you want " +"to print with a magnification factor of four: each page is split up in 4 " +"pieces which each going to a separate |PDF| page in *Letter* format " +"again." +msgstr "" +"この方法は、 |PDF| のページを任意の部分に分割することに関連しています。例えば、Letterフォーマットのページを含む |PDF| " +"を、4倍の拡大率で印刷したい場合を考えてみましょう。各ページは4つの部分に分割され、それぞれが再びLetterフォーマットの個別の |PDF| " +"ページになります。" + +#: ../../the-basics.rst:756 ../../the-basics.rst:814 +#: 3ad50277922f4b4c9ce6e2eb2d3b4350 650a82fa6a3444c7b4f1119637692c77 +msgid "Example:" +msgstr "例:" + +#: ../../the-basics.rst:764 ../../the-basics.rst:823 +#: 7f62edf5aa794474925340a623c05d4b a9ff4ea823fa4eaf80c415c485663e6b +msgid ":meth:`Page.cropbox_position`" +msgstr "" + +#: ../../the-basics.rst:765 ../../the-basics.rst:824 +#: 86564b77e5c3440dba3f909f25fed95e ffceed2bb2ac4e738336a3eb438cb502 +msgid ":meth:`Page.show_pdf_page`" +msgstr "" + +#: ../../the-basics.rst:775 02e8677a9ee44d47acfb3845cbeb17f3 +msgid "Combining Single Pages" +msgstr "単一のページを結合する" + +#: ../../the-basics.rst:777 2f61c36415a847069ab18578a22a28b3 +msgid "" +"This deals with joining |PDF| pages to form a new |PDF| with pages each " +"combining two or four original ones (also called \"2-up\", \"4-up\", " +"etc.). This could be used to create booklets or thumbnail-like overviews." +msgstr "" +"これは、2つまたは4つの元のページを組み合わせた新しい |PDF| " +"を作成することを意味します(または「2-up」、「4-up」などとも呼ばれます)。これは、ブックレットやサムネイルのような概要を作成するために使用できます。" + +#: ../../the-basics.rst:833 8a7f8d78c24e4f378fee6b2230783c26 +msgid "|PDF| Encryption & Decryption" +msgstr "|PDF| の暗号化と復号化" + +#: ../../the-basics.rst:836 71dc729191e040fd8118fee77ba6a2a8 +msgid "" +"Starting with version 1.16.0, |PDF| decryption and encryption (using " +"passwords) are fully supported. You can do the following:" +msgstr "バージョン1.16.0から、PDFの暗号化と復号化(パスワードを使用)が完全にサポートされています。以下のことができます:" + +#: ../../the-basics.rst:838 dd359c928f974ab5947e47392afdc2a9 +msgid "" +"Check whether a document is password protected / (still) encrypted " +"(:attr:`Document.needs_pass`, :attr:`Document.is_encrypted`)." +msgstr "" +"ドキュメントがパスワード保護されているか、(まだ)暗号化されているかを確認する(:attr:`Document.needs_pass`, " +":attr:`Document.is_encrypted`)。" + +#: ../../the-basics.rst:839 f16ee2a615fe4984bb226ff54db0fe8f +msgid "Gain access authorization to a document (:meth:`Document.authenticate`)." +msgstr "ドキュメントへのアクセス権を取得する(:meth:`Document.authenticate`)。" + +#: ../../the-basics.rst:840 6a05b415f16d40768f4e1564236d6231 +msgid "" +"Set encryption details for PDF files using :meth:`Document.save` or " +":meth:`Document.write` and" +msgstr ":meth:`Document.save` または :meth:`Document.write` を使用して、PDFファイルの暗号化詳細を設定する。" + +#: ../../the-basics.rst:842 a28670df924945ff88d6be01321e1a30 +msgid "decrypt or encrypt the content" +msgstr "内容の復号化または暗号化" + +#: ../../the-basics.rst:843 d73094fe4cc9466a9bf3378d56401b78 +msgid "set password(s)" +msgstr "パスワードの設定" + +#: ../../the-basics.rst:844 5aed08cd9fcc43ab8d5b06fb650a0ca6 +msgid "set the encryption method" +msgstr "暗号化方式の設定" + +#: ../../the-basics.rst:845 76a02a01b3e04059b3deb41a2a8ffb73 +msgid "set permission details" +msgstr "権限の詳細設定" + +#: ../../the-basics.rst:847 7fca3b11f52b4b0ca3876005bd500c57 +msgid "A PDF document may have two different passwords:" +msgstr "PDF文書には2つの異なるパスワードが存在する場合があります:" + +#: ../../the-basics.rst:849 73c3a5f74efb4167905a89f8f1383c0a +msgid "" +"The **owner password** provides full access rights, including changing " +"passwords, encryption method, or permission detail." +msgstr "オーナーパスワード:パスワードを変更したり、暗号化方法を変更したり、権限の詳細を含む完全なアクセス権を提供します。" + +#: ../../the-basics.rst:850 e2fb43f6fcdb49739207db257e3f97bb +msgid "" +"The **user password** provides access to document content according to " +"the established permission details. If present, opening the |PDF| in a " +"viewer will require providing it." +msgstr "ユーザーパスワード:文書の内容にアクセスするための権限詳細に基づいてアクセスを提供します。存在する場合、ビューアでPDFを開く際にユーザーパスワードを入力する必要があります。" + +#: ../../the-basics.rst:852 c66bf0cbf4934d79a253d8f6b43d51f4 +msgid "" +"Method :meth:`Document.authenticate` will automatically establish access " +"rights according to the password used." +msgstr "メソッド :meth:`Document.authenticate` は、使用されるパスワードに基づいて自動的にアクセス権を確立します。" + +#: ../../the-basics.rst:854 8d09fb5167d443ef9ee672510c3def18 +msgid "" +"The following snippet creates a new |PDF| and encrypts it with separate " +"user and owner passwords. Permissions are granted to print, copy and " +"annotate, but no changes are allowed to someone authenticating with the " +"user password." +msgstr "" +"以下のスニペットは新しい |PDF| " +"を作成し、別々のユーザーパスワードとオーナーパスワードで暗号化します。印刷、コピー、注釈付けの権限が与えられますが、ユーザーパスワードで認証するユーザーに対しては変更は許可されません" + +#: ../../the-basics.rst:888 10c5018907dd4496be681ebd8e664fd6 +msgid "" +"Opening this document with some viewer (Nitro Reader 5) reflects these " +"settings:" +msgstr "このドキュメントをいくつかのビューア(Nitro Reader 5など)で開くと、これらの設定が反映されます:" + +#: ../../the-basics.rst:892 03d88819ea404f0aaf81ffe5d60d7425 +msgid "" +"**Decrypting** will automatically happen on save as before when no " +"encryption parameters are provided." +msgstr "暗号化パラメータが提供されていない場合、保存時に自動的に復号化されます。" + +#: ../../the-basics.rst:894 f367fcac22c6414ab97981825f130856 +msgid "" +"To **keep the encryption method** of a PDF save it using " +"`encryption=pymupdf.PDF_ENCRYPT_KEEP`. If `doc.can_save_incrementally() " +"== True`, an incremental save is also possible." +msgstr "" +"PDFの暗号化方法を保持するには、`encryption=pymupdf.PDF_ENCRYPT_KEEP` を使用して保存します。また、 " +"`doc.can_save_incrementally() == True` であれば、増分保存も可能です" + +#: ../../the-basics.rst:896 beec27387fd7422bb3e063049ff22429 +msgid "" +"To **change the encryption method** specify the full range of options " +"above (`encryption`, `owner_pw`, `user_pw`, `permissions`). An " +"incremental save is **not possible** in this case." +msgstr "暗号化方法を変更する場合は、上記のすべてのオプション(`encryption`、`owner_pw`、`user_pw`、`permissions`)を指定します。この場合、増分保存はできません。" + +#: ../../the-basics.rst:900 20699b9f6451443d8b941958c154d7a7 +msgid ":meth:`Document.save`" +msgstr "" + +#: ../../the-basics.rst:909 d798da65fa514bd0aebc4068c084a7ca +msgid "Extracting Tables from a :title:`Page`" +msgstr "ページからのテーブルの抽出" + +#: ../../the-basics.rst:911 e4ef14dd9323461295fcdab920a24b7e +msgid "Tables can be found and extracted from any document :ref:`Page`." +msgstr "表はどのドキュメントの :ref:`Page` からも見つけて抽出できます。" + +#: ../../the-basics.rst:930 fc83ee37b6f443bc9104c718aa70c0ec +#, fuzzy +msgid ":meth:`Page.find_tables`" +msgstr ":meth:`Page.get_text`" + +#: ../../the-basics.rst:935 ae116fead03d40f4ae9e9e1fa4dd5657 +msgid "" +"There is also the `pdf2docx extract tables method`_ which is capable of " +"table extraction if you prefer." +msgstr "お好みの場合、テーブル抽出が `可能なpdf2docxのextract tablesメソッド`_ もあります。" + +#: ../../the-basics.rst:944 09cbe8ebd73f4fdbae255f4eb4ac88d9 +msgid "Getting Page Links" +msgstr "ページリンクの取得" + +#: ../../the-basics.rst:946 8f8e76066ac34e6288f125de53c52324 +msgid "Links can be extracted from a :ref:`Page` to return :ref:`Link` objects." +msgstr ":ref:`Page` からリンクを抽出して :ref:`Link` オブジェクトを返すことができます。" + +#: ../../the-basics.rst:966 4b1b5f7afbde4704808ac3807d07675a +#, fuzzy +msgid ":meth:`Page.first_link`" +msgstr ":meth:`Page.get_text`" + +#: ../../the-basics.rst:975 b3aade8e69434effb9c03ef1b44c1f10 +msgid "Getting All Annotations from a Document" +msgstr "ドキュメントからすべての注釈を取得する" + +#: ../../the-basics.rst:977 6801cadfb5354b0ab04f7979d5a356ba +msgid "" +"Annotations (:ref:`Annot`) on pages can be retrieved with the " +"`page.annots()` method." +msgstr "ページ上の注釈 (:ref:`Annot`) は、`page.annots()` メソッドを使用して取得できます。" + +#: ../../the-basics.rst:992 bf4d519f3c994f33b280f0de93018463 +#, fuzzy +msgid ":meth:`Page.annots`" +msgstr ":meth:`Page.get_text`" + +#: ../../the-basics.rst:1002 c5273dfabc3043a8b9607b2a9d28ce43 +msgid "Redacting content from a |PDF|" +msgstr "|PDF| からコンテンツを塗りつぶす" + +#: ../../the-basics.rst:1004 e030189f77db4e8983c5a6528654fd34 +msgid "" +"Redactions are special types of annotations which can be marked onto a " +"document page to denote an area on the page which should be securely " +"removed. After marking an area with a rectangle then this area will be " +"marked for *redaction*, once the redaction is *applied* then the content " +"is securely removed." +msgstr "マーキングされた領域に矩形を使用して、ページ上の削除されるべき領域を示すために、赤字が付けられる特別なアノテーションです。領域に矩形をマークした後、その領域は赤字としてマークされ、赤字が適用されると、そのコンテンツは安全に削除されます。" + +#: ../../the-basics.rst:1006 576dffbcb6454f419fb44e03a6766f97 +msgid "" +"For example if we wanted to redact all instances of the name \"Jane Doe\"" +" from a document we could do the following:" +msgstr "たとえば、ドキュメントから名前「ジェーン・ドー」のすべてのインスタンスを塗りつぶしたい場合は、次のようにします:" + +#: ../../the-basics.rst:1034 512a331e2f6a4420af87d029a290261c +msgid "" +"Another example could be redacting an area of a page, but not to redact " +"any line art (i.e. vector graphics) within the defined area, by setting a" +" parameter flag as follows:" +msgstr "別の例として、ページの領域を塗りつぶすが、定義された領域内の線画(ベクトルグラフィックス)を塗りつぶさないように、パラメータフラグを設定することができます。" + +#: ../../the-basics.rst:1065 eed2e214f7454d77b9f11502e75e71b6 +msgid "" +"Once a redacted version of a document is saved then the redacted content " +"in the |PDF| is *irretrievable*. Thus, a redacted area in a document " +"removes text and graphics completely from that area." +msgstr "" +"文書の塗りつぶし済みバージョンを保存すると、|PDF| 内の塗りつぶされたコンテンツは *取り戻すことができなくなります* " +"。したがって、文書内の塗りつぶされた領域は、その領域からテキストとグラフィックスを完全に削除します。" + +#: ../../the-basics.rst:1072 24199c7a643e40f981bc9a2d6b9a76f5 +msgid "" +"The are a few options for creating and applying redactions to a page, for" +" the full API details to understand the parameters to control these " +"options refer to the API reference." +msgstr "ページに赤字を作成して適用するためのいくつかのオプションがあります。これらのオプションを制御するためのパラメータを理解するためには、完全なAPI詳細についてAPIリファレンスを参照してください。" + +#: ../../the-basics.rst:1076 5620f79d90484e0c84d9f1e6cf483ef2 +#, fuzzy +msgid ":meth:`Page.add_redact_annot`" +msgstr ":meth:`Page.get_text`" + +#: ../../the-basics.rst:1078 2238e86dba64405e99bc6de9a1e29266 +#, fuzzy +msgid ":meth:`Page.apply_redactions`" +msgstr ":meth:`Page.get_text`" + +#: ../../the-basics.rst:1088 64e88b96ce2b4df99e9d8c97cae10c8f +msgid "Converting PDF Documents" +msgstr "PDF ドキュメントの変換" + +#: ../../the-basics.rst:1090 c9b689d1ca1242deaf0ecf74d670aa7b +msgid "" +"We recommend the pdf2docx_ library which uses |PyMuPDF| and the **python-" +"docx** library to provide simple document conversion from |PDF| to " +"**DOCX** format." +msgstr "" +"PyMuPDF を使用する pdf2docx_ ライブラリと、|PDF| から **DOCX** 形式への簡単なドキュメント変換を提供する " +"**python-docx** ライブラリをお勧めします。" + +#: ../../footer.rst:60 3aa3d5fcda7349e5a5c5339a71002610 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "Supported File Types" +#~ msgstr "サポートされているファイルタイプ" + +#~ msgid "|PyMuPDF| supports the following file types:" +#~ msgstr "|PyMuPDF| は以下のファイルタイプをサポートしています:" + +#~ msgid "" +#~ "Opening with :index:`a Wrong File " +#~ "Extension `" +#~ msgstr "間違ったファイル拡張子でファイルを開く" + +#~ msgid "" +#~ "If you have a document with a " +#~ "wrong file extension for its type, " +#~ "you can still correctly open it." +#~ msgstr "もしドキュメントのファイル拡張子がその種類に対して間違っていても、正しく開くことができます。" + +#~ msgid "Assume that *\"some.file\"* is actually an XPS. Open it like so:" +#~ msgstr "*\"some.file\"* が実際にXPSファイルだと仮定して、以下のようにして開いてください:" + +#~ msgid "" +#~ "There are many file types beyond " +#~ "|PDF| which can be opened by " +#~ "|PyMuPDF|, for more details see the " +#~ "list of :ref:`supported file " +#~ "types`." +#~ msgstr "" +#~ "「PyMuPDF」で開くことができるのは、|PDF| " +#~ "に限らず、多くのファイルタイプがあります。詳細については、:ref:`サポートされているファイルタイプのリスト`" +#~ " をご覧ください。" + +#~ msgid "" +#~ "|PyMuPDF| itself does not try to " +#~ "determine the file type from the " +#~ "file contents. **You** are responsible " +#~ "for supplying the filetype info in " +#~ "some way -- either implicitly via " +#~ "the file extension, or explicitly as " +#~ "shown. There are pure :title:`Python` " +#~ "packages like `filetype " +#~ "`_ that help you" +#~ " doing this. Also consult the " +#~ ":ref:`Document` chapter for a full " +#~ "description." +#~ msgstr "" +#~ "PyMuPDF自体は、ファイルの内容からファイルタイプを判断しようとはしません。ファイルの拡張子などを通じて暗黙的に、または明示的にファイルタイプ情報を提供する責任があります。`filetype" +#~ " `_ " +#~ "などの純粋なPythonパッケージがこのような操作をサポートしています。また、詳細な説明についてはドキュメントの章を参照してください。" + +#~ msgid "" +#~ "If |PyMuPDF| encounters a file with " +#~ "an unknown / missing extension, it " +#~ "will try to open it as a " +#~ "|PDF|. So in these cases there is" +#~ " no need for additional precautions. " +#~ "Similarly, for memory documents, you can" +#~ " just specify `doc=pymupdf.open(stream=mem_area)` " +#~ "to open it as a |PDF| document." +#~ msgstr "" +#~ "|PyMuPDF| " +#~ "が不明な/欠落している拡張子のファイルに遭遇した場合、それをPDFとして開こうと試みます。したがって、これらの場合は追加の注意が必要ありません。同様に、メモリ上のドキュメントの場合は、`doc=pymupdf.open(stream=mem_area)`" +#~ " と指定するだけでPDFドキュメントとして開くことができます" + +#~ msgid "" +#~ "If you attempt to open an " +#~ "unsupported file then |PyMuPDF| will " +#~ "throw a file data error." +#~ msgstr "サポートされていないファイルを開こうとした場合、PyMuPDFはファイルデータエラーをスローします。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/tools.mo b/docs/locales/ja/LC_MESSAGES/tools.mo new file mode 100644 index 000000000..6634f6fda Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/tools.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/tools.po b/docs/locales/ja/LC_MESSAGES/tools.po new file mode 100644 index 000000000..4f1b1fe6c --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/tools.po @@ -0,0 +1,794 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 fa4cb1c40839408c9c8cd07e9e01bb93 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 0eadbcdf62a345f492e6c5df071271b8 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 ac94c9f7137942e6a95a87c8a8567dd6 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../tools.rst:6 630133a66584427d8841e6ac73cd74b2 +msgid "Tools" +msgstr "Tools (ツール)" + +#: ../../tools.rst:8 9874cd63550144bdb2077af42f68c67f +msgid "" +"This class is a collection of utility methods and attributes, mainly " +"around memory management. To simplify and speed up its use, it is " +"automatically instantiated under the name *TOOLS* when PyMuPDF is " +"imported." +msgstr "" +"このクラスは、メモリ管理を中心としたユーティリティメソッドと属性のコレクションです。使用を簡素化し、高速化するために、PyMuPDFがインポートされると、自動的に" +" *TOOLS* という名前でインスタンス化されます。" + +#: ../../tools.rst:11 99d510145a8d481b814d6864371fd664 +msgid "**Method / Attribute**" +msgstr "**メソッド/属性**" + +#: ../../tools.rst:11 5a58a92212e44a278c9d89c56b3e1210 +msgid "**Description**" +msgstr "**説明**" + +#: ../../tools.rst:13 2b964d526bc34dbca89696e9a7f8a649 +msgid ":meth:`Tools.gen_id`" +msgstr "" + +#: ../../tools.rst:13 2e23b218a694403f803ca3d4641d1e46 +msgid "generate a unique identifier" +msgstr "一意の識別子を生成します。" + +#: ../../tools.rst:14 24f42327e36744579573d798b230bef0 +msgid ":meth:`Tools.store_shrink`" +msgstr "" + +#: ../../tools.rst:14 b4c3bfa744874fc3a556cbd6b8e9b6b8 +msgid "shrink the storables cache [#f1]_" +msgstr "格納可能キャッシュを縮小します [#f1]_。" + +#: ../../tools.rst:15 bad8f3d46e5b4afdb102c4e7bf50d072 +msgid ":meth:`Tools.mupdf_warnings`" +msgstr "" + +#: ../../tools.rst:15 a2b9a9e372e04cecb473e320064f1301 +msgid "return the accumulated MuPDF warnings" +msgstr "蓄積されたMuPDFの警告を返します。" + +#: ../../tools.rst:16 965bf5733d4a437c9bf6499bb67ce938 +msgid ":meth:`Tools.mupdf_display_errors`" +msgstr "" + +#: ../../tools.rst:16 95fb9fea93424795be84f9bccfc5a2c6 +msgid "control whether MuPDF errors are displayed as messages." +msgstr "" + +#: ../../tools.rst:17 aae25f7b27504eacb2dcb5df61f4be65 +msgid ":meth:`Tools.mupdf_display_warnings`" +msgstr "" + +#: ../../tools.rst:17 0ec8dcf6c973477abe4aa02ed3c8b605 +msgid "control whether MuPDF warnings are displayed as messages." +msgstr "" + +#: ../../tools.rst:18 53a94e9b9d1044b394e412402f3dcf71 +msgid ":meth:`Tools.reset_mupdf_warnings`" +msgstr "" + +#: ../../tools.rst:18 39ad9b132aad445c8698e388b85423ed +#, fuzzy +msgid "empty MuPDF warnings/errors message buffer." +msgstr "MuPDFの警告メッセージバッファを空にします。" + +#: ../../tools.rst:19 6719fb8425af4227a2a2717806cd2872 +msgid ":meth:`Tools.set_aa_level`" +msgstr "" + +#: ../../tools.rst:19 084c6bfe822c4d048572d1bb4db01b5d +msgid "set the anti-aliasing values" +msgstr "アンチエイリアシングの値を設定します。" + +#: ../../tools.rst:20 30023363ff6e4e8aa20333cd744dab27 +msgid ":meth:`Tools.set_annot_stem`" +msgstr "" + +#: ../../tools.rst:20 104ce28d382346ac85d8db2cfcb5da80 +msgid "set the prefix of new annotation / link ids" +msgstr "新しい注釈/リンクIDのプレフィックスを設定します。" + +#: ../../tools.rst:21 de68f470119b4a60b48b7054e2f46a0c +msgid ":meth:`Tools.set_small_glyph_heights`" +msgstr "" + +#: ../../tools.rst:21 f69f8f062c094ffcac57dd21b3725054 +msgid "search and extract using small bbox heights" +msgstr "小さなbbox高さを使用して検索および抽出します。" + +#: ../../tools.rst:22 a5f65f4e49c744f2b64fd47f8f963187 +msgid ":meth:`Tools.set_subset_fontnames`" +msgstr "" + +#: ../../tools.rst:22 d7a4ef524baa414c934afd124e2039c8 +msgid "control suppression of subset fontname tags" +msgstr "サブセットフォント名タグの抑制を制御します。" + +#: ../../tools.rst:23 f7f424f91b824b6fbc8d16f177210157 +msgid ":meth:`Tools.show_aa_level`" +msgstr "" + +#: ../../tools.rst:23 de220ac6db8f43b590219bfc76ada785 +msgid "return the anti-aliasing values" +msgstr "アンチエイリアシングの値を返します。" + +#: ../../tools.rst:24 9822bd4c433c4e73a4a62e8e2fd8d55d +msgid ":meth:`Tools.unset_quad_corrections`" +msgstr "" + +#: ../../tools.rst:24 1ebfd2086c444331b2bf1d386b71dc90 +msgid "disable PyMuPDF-specific code" +msgstr "PyMuPDF固有のコードを無効にします。" + +#: ../../tools.rst:25 b93af0b1adae41ab8ef6adfe0264b3d7 +msgid ":attr:`Tools.fitz_config`" +msgstr "" + +#: ../../tools.rst:25 4aabf943204d404e9bcd0659b004b61a +msgid "configuration settings of PyMuPDF" +msgstr "PyMuPDFの設定設定" + +#: ../../tools.rst:26 bda50dea0b1b426db8d540c41f8e225d +msgid ":attr:`Tools.store_maxsize`" +msgstr "" + +#: ../../tools.rst:26 d98d3a57e7f648caac4f8c85c2aa73dc +msgid "maximum storables cache size" +msgstr "最大格納可能キャッシュサイズ" + +#: ../../tools.rst:27 832df81af8a44bf994b2927588b125b4 +msgid ":attr:`Tools.store_size`" +msgstr "" + +#: ../../tools.rst:27 1e9ca667256f4ce88906ba326e637a59 +msgid "current storables cache size" +msgstr "現在の格納可能キャッシュサイズ" + +#: ../../tools.rst:30 876332768bb84673a877ceb4dece1450 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../tools.rst:36 65fde22db17c4647b856922e599073a4 +msgid "" +"A convenience method returning a unique positive integer which will " +"increase by 1 on every invocation. Example usages include creating unique" +" keys in databases - its creation should be faster than using timestamps " +"by an order of magnitude." +msgstr "1回の呼び出しごとに1ずつ増加する一意の正の整数を返す便利なメソッドです。使用例には、データベース内で一意のキーを作成することが含まれます。タイムスタンプを使用するよりも作成が10倍高速であるべきです。" + +#: ../../tools.rst:38 ab15f82a042e4ed3b078c757f4e63b86 +msgid "" +"MuPDF has dropped support for this in v1.14.0, so we have re-implemented " +"a similar function with the following differences:" +msgstr "MuPDFはv1.14.0でこれをサポートを中止したため、次の違いを持つ類似の関数を再実装しました。" + +#: ../../tools.rst:40 84cae01ccf524246ac7b4d9427698005 +msgid "" +"It is not part of MuPDF's global context and not threadsafe (not an issue" +" because we do not support threads in PyMuPDF anyway)." +msgstr "これはMuPDFのグローバルコンテキストの一部ではなく、スレッドセーフではありません(とにかくPyMuPDFではスレッドをサポートしていないため、これは問題ではありません)。" + +#: ../../tools.rst:41 f05a3f0eec6c47eaab32b3d7dbadfa32 +msgid "" +"It is implemented as *int*. This means that the maximum number is " +"*sys.maxsize*. Should this number ever be exceeded, the counter starts " +"over again at 1." +msgstr "" +"これはintとして実装されています。つまり、最大値は *sys.maxsize* " +"です。この数値がいつか超過される場合、カウンターは1から再開します。" + +#: ../../tools.rst 17bbc4e0e9b1480aab24e4c8d6b901fd +#: 2c7d5afc0c614f2e947541283319a8aa 3f0e5e23a1b3401fab7534a9ffddfb6f +#: 4bceb68ff0cf4c4db44a34b626d14422 4bfaf26ceea2422398c767ec7da4f944 +#: 656f7c4b2db04ef1928e253ee58fbda8 783c6110b0e84fa69ec1e999f0763115 +#: 886de849a3b644a3b7483f4f34d47ab1 9956690247394b5abfa7987d9cb2942f +#: ae9fe9f8edaa4afa9830772bb949887b +msgid "Return type" +msgstr "戻り値:" + +#: ../../tools.rst 05be31d115d84df1af6eec3998b6c813 +#: 1b38a72665554ad2b7b31cd24642d09f 322b991feded4473a556bdb214b03b1d +#: 47b093107003487e9ca47b7610cc6159 63fec7ceb4ea496eba269d589582ac36 +#: a18d16ae4a364a9d863acd8c74a5fa94 b4f07341f4ad48e897984538401987b3 +#: c6c09e9c896c4a13b41f9be7c7bd6ff6 db36bae168a54d46838b57f99adb8f1d +msgid "Returns" +msgstr "返り値:" + +#: ../../tools.rst:44 d04c5dc089994e268783aceaffad24d8 +msgid "a unique positive integer." +msgstr "一意の正の整数。" + +#: ../../tools.rst:49 5ebeca1825d746f39692eebb86cb9f42 +msgid "New in v1.18.6" +msgstr "v1.18.6で新登場" + +#: ../../tools.rst:51 8b6fd71202b24c318c21b52e9aabae8c +msgid "Set or inquire the prefix for the id of new annotations, fields or links." +msgstr "新しい注釈、フィールド、またはリンクのIDのプレフィックスを設定または問い合わせます。" + +#: ../../tools.rst 1ecb667b90fb4a4baa562619c4cf215f +#: 658ec43b52b04330bf7a40f38e909bbc 95eef4039ef94ba79c670022599c654c +#: b3ae48d8f1a449ddbe4b6a9ae98cf4d9 c299a5cad9db4f4c84dde48c1ef4fc58 +#: c4311092aae34e4691fb6e3995c50792 c79cc99a8ac340c2a6a9adc2b3fe239a +#: ca6497050d944cf788e55eb72bc9f7d1 f54117de12074455b85ab6c94ff7dd5b +msgid "Parameters" +msgstr "パラメータ" + +#: ../../tools.rst:53 3501d21aa8734541bc1aa99a8ed5190c +msgid "" +"if omitted, the current value is returned, default is \"fitz\". " +"Annotations, fields / widgets and links technically are subtypes of the " +"same type of object (`/Annot`) in PDF documents. An `/Annot` object may " +"be given a unique identifier within a page. For each of the applicable " +"subtypes, PyMuPDF generates identifiers \"stem-Annn\", \"stem-Wnnn\" or " +"\"stem-Lnnn\" respectively. The number \"nnn\" is used to enforce the " +"required uniqueness." +msgstr "" +"省略された場合、現在の値が返され、デフォルトは \"fitz\" " +"です。注釈、フィールド/ウィジェット、およびリンクは技術的にはPDF文書内の同じ種類のオブジェクト(`/Annot`)のサブタイプです。`/Annot`" +" オブジェクトは、ページ内で一意の識別子を持つことができます。適用可能なサブタイプごとに、PyMuPDFはそれぞれ \"stem-Annn" +"\"、\"stem-Wnnn\"、または \"stem-Lnnn\" の識別子を生成します。番号 \"nnn\" " +"は必要な一意性を強制するために使用されます。" + +#: ../../tools.rst:56 b611d59a32fa4723ae6eea8e46c1e4fc +msgid "the current value." +msgstr "現在の値。" + +#: ../../tools.rst:61 ed4a7b571b5840db82e999c073d641bb +msgid "New in v1.18.5" +msgstr "v1.18.5で新登場" + +#: ../../tools.rst:63 d110db042c104cd9a70ee2ba0ea17274 +msgid "" +"Set or inquire reduced bbox heights in text extract and text search " +"methods." +msgstr "テキスト抽出およびテキスト検索メソッドでのbboxの高さを縮小するかどうかを設定または問い合わせます。" + +#: ../../tools.rst:65 9dacdc2684a04823aaf2faa1c6a84582 +msgid "" +"if omitted or `None`, the current setting is returned. For other values " +"the *bool()* function is applied to set a global variable. If `True`, " +":meth:`Page.search_for` and :meth:`Page.get_text` methods return " +"character, span, line or block bboxes that have a height of *font size*. " +"If `False` (standard setting when PyMuPDF is imported), bbox height will " +"be based on font properties and normally equal *line height*." +msgstr "" +"省略された場合または `None` の場合、現在の設定が返されます。他の値の場合、*bool()* " +"関数が適用され、グローバル変数を設定します。`True` の場合、:meth:`Page.search_for` および " +":meth:`Page.get_text` メソッドは *フォントサイズ* " +"の高さを持つ文字、スパン、行、またはブロックのbboxを返します。`False` " +"の場合(PyMuPDFがインポートされたときの標準設定)、bboxの高さはフォントのプロパティに基づき、通常は *行の高さ* と同じです。" + +#: ../../tools.rst:68 ../../tools.rst:81 ../../tools.rst:95 +#: 3d9f991f869c4e3687694b4b5d83e20f 8fa94c128ef84d4db600a970cc431f03 +#: fb141d29fa904b54ac786f25cee529a0 +msgid "``True`` or ``False``." +msgstr "``True`` または ``False``。" + +#: ../../tools.rst:70 6ff2af93fffb41d1952b1726635074e7 +msgid "" +"Text extraction options \"xml\", \"xhtml\" and \"html\", which directly " +"wrap MuPDF code, are not influenced by this." +msgstr "直接MuPDFコードをラップするテキスト抽出オプション「xml」、「xhtml」、「html」は、これに影響を受けません。" + +#: ../../tools.rst:74 3e2f146855c24da3bc5620a6dbf0fe16 +msgid "New in v1.18.9" +msgstr "バージョン1.18.9で追加されました" + +#: ../../tools.rst:76 a47cf06efd8f42ed94738ff03d3423d2 +msgid "Control suppression of subset fontname tags in text extractions." +msgstr "テキスト抽出におけるサブセットフォント名のタグを抑制する制御を行います。" + +#: ../../tools.rst:78 c87edd954d4c42e6b4fdd9829d79cd37 +msgid "" +"if omitted / `None`, the current setting is returned. Arguments " +"evaluating to `True` or `False` set a global variable. If `True`, options" +" \"dict\", \"json\", \"rawdict\" and \"rawjson\" will return e.g. " +"`\"NOHSJV+Calibri-Light\"`, otherwise only `\"Calibri-Light\"` (the " +"default). The setting remains in effect until changed again." +msgstr "" +"省略された場合 `None`、現在の設定が返されます。`True` または `False` " +"の値を評価して、グローバル変数を設定します。`True` の場合、オプション " +"\"dict\"、\"json\"、\"rawdict\"、\"rawjson\" は例えば `\"NOHSJV+Calibri-Light\"`" +" のように返され、それ以外の場合はデフォルトの `\"Calibri-Light\"` のみが返されます。設定は再度変更されるまで有効です。" + +#: ../../tools.rst:83 82a85fb54bd7436c925ba3b349ae06ef +msgid "" +"Except mentioned above, no other text extraction variants are influenced " +"by this. This is especially true for the options \"xml\", \"xhtml\" and " +"\"html\", which are based on MuPDF code. They extract the font name " +"`\"Calibri-Light\"`, or even just the **family** name -- `Calibri` in " +"this example." +msgstr "" +"上記に記載されている以外のテキスト抽出バリアントには影響しません。特に、MuPDFコードに基づくオプション " +"\"xml\"、\"xhtml\"、\"html\" には影響しません。これらはフォント名 `\"Calibri-" +"Light\"`、またはこの例の場合はファミリー名 `Calibri` のみを抽出します。" + +#: ../../tools.rst:88 149b8e463c8f481c96e3b70333a749dc +msgid "New in v1.18.10" +msgstr "バージョン1.18.10で追加されました" + +#: ../../tools.rst:90 159ffb24af174c1f8c260dc38669bafc +msgid "" +"Enable / disable PyMuPDF-specific code, that tries to rebuild valid " +"character quads when encountering nonsense in :meth:`Page.get_text` text " +"extractions. This code depends on certain font properties (ascender and " +"descender), which do not exist in rare situations and cause segmentation " +"faults when trying to access them. This method sets a global parameter in" +" PyMuPDF, which suppresses execution of this code." +msgstr "" +":meth:`Page.get_text` " +"テキスト抽出で無意味なものを検出した際に、有効な文字の四角形を再構築しようとするPyMuPDF固有のコードの有効化/無効化を行います。このコードは、存在しない場合があるフォントの特性(ascenderおよびdescender)に依存しており、これらの特性にアクセスしようとするとセグメンテーション違反が発生します。このメソッドはPyMuPDF内のグローバルパラメータを設定し、このコードの実行を抑制します。" + +#: ../../tools.rst:92 e6745e3e0ce7442b9f04193751691b0a +msgid "" +"if omitted or `None`, the current setting is returned. For other values " +"the *bool()* function is applied to set a global variable. If `True`, " +"PyMuPDF will not try to access the resp. font properties and use values " +"`ascender=0.8` and `descender=-0.2` instead." +msgstr "" +"省略された場合、または `None` 、現在の設定が返されます。他の値の場合、*bool()* " +"関数が適用され、グローバル変数を設定します。`True` の場合、PyMuPDFは該当するフォントの特性にアクセスしようとせず、代わりに " +"`ascender=0.8` および `descender=-0.2` の値を使用します。" + +#: ../../tools.rst:100 4c85e3216c354121a73b16cd1b293077 +msgid "Reduce the storables cache by a percentage of its current size." +msgstr "現在のサイズを基準に、ストレージキャッシュのサイズを指定した割合で減少させます。" + +#: ../../tools.rst:102 7618f829b73f402e918ae962dfb6a9f0 +msgid "" +"the percentage of current size to free. If 100+ the store will be " +"emptied, if zero, nothing will happen. MuPDF's caching strategy is " +"\"least recently used\", so low-usage elements get deleted first." +msgstr "現在のサイズの何パーセントを解放するかを指定します。100以上の場合、ストアは空になります。むしろ、何も変更されません。MuPDFのキャッシング戦略は「最後に使用されたもの」なので、使用頻度の低い要素が最初に削除されます。" + +#: ../../tools.rst:105 b452872341864da4b7fbb34a1df97034 +msgid "" +"the new current store size. Depending on the situation, the size " +"reduction may be larger than the requested percentage." +msgstr "新しい現在のストアサイズ。状況に応じて、要求された割合よりもサイズが大幅に削減される場合があります。" + +#: ../../tools.rst:109 ../../tools.rst:119 b2c6c42229034879a13c08954bd588f3 +#: b6ae51cffc9c4a38a66643a43141bf46 +msgid "New in version 1.16.14" +msgstr "バージョン1.16.14で新たに追加" + +#: ../../tools.rst:111 e883965495954aed98f30f83e9079e4f +msgid "" +"Return the current anti-aliasing values. These values control the " +"rendering quality of graphics and text elements." +msgstr "現在のアンチエイリアシング値を返します。これらの値は、グラフィックスとテキスト要素のレンダリング品質を制御します。" + +#: ../../tools.rst:114 d4f138c5562e4a23a0da982a9c981f64 +msgid "" +"A dictionary with the following initial content: `{'graphics': 8, 'text':" +" 8, 'graphics_min_line_width': 0.0}`." +msgstr "" +"以下の初期内容を持つ辞書: `{'graphics': 8, 'text': 8, 'graphics_min_line_width': " +"0.0}`。" + +#: ../../tools.rst:121 debfdfadcaeb4efd8e0c271b9fdc0a06 +msgid "" +"Set the new number of bits to use for anti-aliasing. The same value is " +"taken currently for graphics and text rendering. This might change in a " +"future MuPDF release." +msgstr "アンチエイリアシングに使用するビット数を設定します。現在はグラフィックスとテキストのレンダリングに同じ値が使用されます。将来のMuPDFリリースで変更されるかもしれません。" + +#: ../../tools.rst:123 3afd6a391b304cf9a27f036dd2a8eaea +msgid "" +"an integer ranging between 0 and 8. Value outside this range will be " +"silently changed to valid values. The value will remain in effect " +"throughout the current session or until changed again." +msgstr "0から8までの範囲の整数。この範囲外の値は静かに有効な値に変更されます。この値は、現在のセッション全体または再度変更されるまで有効です。" + +#: ../../tools.rst:128 ../../tools.rst:167 6647accef2ce43b5b05bcd3953c5d30a +#: e0edaaaf95744a8f9baf9b72c5df725a +msgid "New in version 1.16.0" +msgstr "バージョン1.16.0で新たに追加" + +#: ../../tools.rst:130 1c026c1906724ca1b13646c1e61293e9 +msgid "Empty MuPDF warnings message buffer." +msgstr "MuPDFの警告メッセージバッファを空にします。" + +#: ../../tools.rst:135 df4c55d8bc2a4f6ca7f828393cd543b2 +#, fuzzy +msgid "Control whether MuPDF errors should be displayed as |PyMuPDF| messages." +msgstr "MuPDFのエラーを表示するかどうかを表示または設定します。" + +#: ../../tools.rst:138 ../../tools.rst:154 73102cae996e4e1ca4fda07841b0a537 +#: c281bd2c631d4437a13966f361de0044 +msgid "If `None`, the current setting is left unchanged." +msgstr "" + +#: ../../tools.rst:139 4465af6643354c54be13db87f1577e1c +msgid "" +"Otherwise changes the current setting to `bool(value)`; if ``True``, " +"future MuPDF errors will be shown as :ref:`Messages`." +msgstr "" + +#: ../../tools.rst:141 014bd4cfe67c4bb3bcc80ada14574ac7 +msgid "" +"Regardless of this setting, MuPDF errors will always be stored in the " +"warnings store." +msgstr "" + +#: ../../tools.rst:142 ../../tools.rst:158 8670777bc54b406d9b8d60d6f5053a03 +#: 8d3b08113ac444d1b78d9668a9fc41e5 +msgid "Upon import of |PyMuPDF| this value is ``True``." +msgstr "" + +#: ../../tools.rst:144 ../../tools.rst:160 83494dc367d6491db6461f478975266d +#: a414e7de4bda4732bfea1df4dd2a1c93 +msgid "The current setting as ``True`` or ``False``." +msgstr "" + +#: ../../tools.rst:146 ../../tools.rst:162 80a5617a5563495a9a4817dcf619cea1 +#: dddb4d52d6d14c31b7f962bd3d1b949d +msgid "New in version 1.16.8" +msgstr "バージョン1.16.8で新たに追加" + +#: ../../tools.rst:151 af5f9e3136c24a14964064e6626325cf +msgid "Control whether MuPDF warnings should be displayed as |PyMuPDF| messages." +msgstr "" + +#: ../../tools.rst:155 ac6aeba8d7af46f58aaa48643288ede1 +msgid "" +"Otherwise changes the current setting to `bool(value)`; if ``True``, " +"future MuPDF warnings will be shown as :ref:`Messages`." +msgstr "" + +#: ../../tools.rst:157 634e03217c9946ba97edc0b2a56d297f +msgid "" +"Regardless of this setting, MuPDF warnings will always be stored in the " +"warnings store." +msgstr "" + +#: ../../tools.rst:169 ef0995c7931b4d9c8a892c119669e82d +msgid "" +"Return all stored MuPDF messages as a string with interspersed line-" +"breaks." +msgstr "すべての保存されたMuPDFメッセージを、改行が挿入された文字列として返します。" + +#: ../../tools.rst:171 d3fae7bc59084382bd08ee62f17f5669 +msgid "*(new in version 1.16.7)* whether to automatically empty the store." +msgstr "*(バージョン1.16.7で新たに追加)* ストアを自動的に空にするかどうか。" + +#: ../../tools.rst:176 ed88482c9c404c6eb13f4e44e68a6402 +msgid "" +"A dictionary containing the actual values used for configuring PyMuPDF " +"and MuPDF. Also refer to the installation chapter. This is an overview of" +" the keys, each of which describes the status of a support aspect." +msgstr "PyMuPDFとMuPDFを設定するために使用される実際の値を含む辞書です。インストールの章も参照してください。これは各キーを概説したもので、それぞれがサポートの側面のステータスを記述しています。" + +#: ../../tools.rst:179 983aafaaf72f47258fa885a81ffc31de +msgid "**Key**" +msgstr "**キー**" + +#: ../../tools.rst:179 2f18cc5c3f894e9e8c20b1cf745c4d35 +msgid "**Support included for ...**" +msgstr "**サポートが含まれているのは...**" + +#: ../../tools.rst:181 67e338ca234a4c2aa997416d0d1c5e17 +msgid "plotter-g" +msgstr "" + +#: ../../tools.rst:181 84bd5e00e5604b07b62a4f670c9ce45f +msgid "Gray colorspace rendering" +msgstr "グレーカラースペースのレンダリング" + +#: ../../tools.rst:182 76b2f02c816740679adf790799635016 +msgid "plotter-rgb" +msgstr "" + +#: ../../tools.rst:182 79231c9485fd412994543d8f4d42c1fa +msgid "RGB colorspace rendering" +msgstr "RGBカラースペースのレンダリング" + +#: ../../tools.rst:183 91d85a25304d4436bfeed78b823f708d +msgid "plotter-cmyk" +msgstr "" + +#: ../../tools.rst:183 924491f03aaf43d8a889798e8268c631 +msgid "CMYK colorspcae rendering" +msgstr "CMYKカラースペースのレンダリング" + +#: ../../tools.rst:184 b0fe29a8de9a4503a0c5bd27e7fc2b1e +msgid "plotter-n" +msgstr "" + +#: ../../tools.rst:184 e2367e803bd544e0a704fa3f6f4a377f +msgid "overprint rendering" +msgstr "オーバープリントのレンダリング" + +#: ../../tools.rst:185 40de005f09c64d48a68030bc62f04a7a +msgid "pdf" +msgstr "" + +#: ../../tools.rst:185 017318bcef5043d3a24dc1ac259bd96d +msgid "PDF documents" +msgstr "PDFドキュメント" + +#: ../../tools.rst:186 7879df7588cf4b4881667d16ef9b4044 +msgid "xps" +msgstr "" + +#: ../../tools.rst:186 554de734100a44889b05ea16bbbb4801 +msgid "XPS documents" +msgstr "XPSドキュメント" + +#: ../../tools.rst:187 e899c76673eb4de781ea52b20f05d668 +msgid "svg" +msgstr "" + +#: ../../tools.rst:187 2dfe37a142164ae3acbc5c646b670f4f +msgid "SVG documents" +msgstr "SVGドキュメント" + +#: ../../tools.rst:188 5f181d975a094cc29464ac24fe074950 +msgid "cbz" +msgstr "" + +#: ../../tools.rst:188 0bc4106af7494ee88794d1ae4bb7ff79 +msgid "CBZ documents" +msgstr "CBZドキュメント" + +#: ../../tools.rst:189 61552041ec884b1bbe274dbbda088bd1 +msgid "img" +msgstr "" + +#: ../../tools.rst:189 d12314e92993424f83174027a6ec725b +msgid "IMG documents" +msgstr "IMGドキュメント" + +#: ../../tools.rst:190 dd29953e1c66455a9923bf816e2a3282 +msgid "html" +msgstr "" + +#: ../../tools.rst:190 127d85fe33354ac7971afa414123d63a +msgid "HTML documents" +msgstr "HTMLドキュメント" + +#: ../../tools.rst:191 10cbf4f7a1cd41b9a80e083b7c002bed +msgid "epub" +msgstr "" + +#: ../../tools.rst:191 9ec403b9146f43eea9153809fffae100 +msgid "EPUB documents" +msgstr "EPUBドキュメント" + +#: ../../tools.rst:192 481a896d6b84460e8c1282ef91422469 +msgid "jpx" +msgstr "" + +#: ../../tools.rst:192 22765c91abce4d04836e6ff2519678ef +msgid "JPEG2000 images" +msgstr "JPEG2000画像" + +#: ../../tools.rst:193 d46c0035bc044897ada86595b3d04bda +msgid "js" +msgstr "" + +#: ../../tools.rst:193 dfe437b8979a42939b5938fb1f481ed3 +msgid "JavaScript" +msgstr "" + +#: ../../tools.rst:194 8ddda388461a473c9736d53993c96bcf +msgid "tofu" +msgstr "" + +#: ../../tools.rst:194 7c1d3ad770de460f824862a33e06e231 +msgid "all TOFU fonts" +msgstr "すべてのTOFUフォント" + +#: ../../tools.rst:195 ac7fb6b9c8e445aeb8f522c60057cf2b +msgid "tofu-cjk" +msgstr "" + +#: ../../tools.rst:195 6285e0aecced48a88277034654e7e42d +msgid "CJK font subset (China, Japan, Korea)" +msgstr "CJKフォントのサブセット(中国、日本、韓国)" + +#: ../../tools.rst:196 2a0c3f09e54c410a975730ce5aaa00de +msgid "tofu-cjk-ext" +msgstr "" + +#: ../../tools.rst:196 32e4ef23a02f4c36a24fd31d49a5607e +msgid "CJK font extensions" +msgstr "CJKフォント拡張" + +#: ../../tools.rst:197 8cf92f3f79df450ebefd0fbca0b7d825 +msgid "tofu-cjk-lang" +msgstr "" + +#: ../../tools.rst:197 b7426777724d4f16b655a5c11d77f9dd +msgid "CJK font language extensions" +msgstr "CJKフォントの言語拡張" + +#: ../../tools.rst:198 703a769d7b434dcaa4cfa08f972492b2 +msgid "tofu-emoji" +msgstr "" + +#: ../../tools.rst:198 846d00d7a0a249ea87e0c1029b8e395c +msgid "TOFU emoji fonts" +msgstr "TOFU絵文字フォント" + +#: ../../tools.rst:199 66b289107a8e4e8abd1bd9d88d481d42 +msgid "tofu-historic" +msgstr "" + +#: ../../tools.rst:199 6f259e97e9bd497d9e9a1e025bcf9982 +msgid "TOFU historic fonts" +msgstr "TOFU歴史的フォント" + +#: ../../tools.rst:200 14977ce1ee544b9abfc8db96b0cbb68c +msgid "tofu-symbol" +msgstr "" + +#: ../../tools.rst:200 dd5d0777ea504428b2029606f420e098 +msgid "TOFU symbol fonts" +msgstr "TOFUシンボルフォント" + +#: ../../tools.rst:201 629d3ba30efd4345bbc69f56a1d9810c +msgid "tofu-sil" +msgstr "" + +#: ../../tools.rst:201 bfbb2fb374b64c6082355e8e493b429f +msgid "TOFU SIL fonts" +msgstr "TOFU SILフォント" + +#: ../../tools.rst:202 4f7a65512f0844ef818c1d9624c5c335 +msgid "icc" +msgstr "" + +#: ../../tools.rst:202 7fc4ac21a5104fac97ffb3afa1c5980d +msgid "ICC profiles" +msgstr "ICCプロファイル" + +#: ../../tools.rst:203 c40e5ca7f3024ac2b2434d42fd91fc52 +msgid "py-memory" +msgstr "" + +#: ../../tools.rst:203 9b71c6d69b014bf2b7b0c8fe41ce6e22 +msgid "using Python memory management [#f2]_" +msgstr "Pythonメモリ管理を使用 [#f2]_" + +#: ../../tools.rst:204 ab89c03d6ab44abfb553acb0f1bf378b +msgid "base14" +msgstr "" + +#: ../../tools.rst:204 002400f1661d403bb8cd28b30acbd9b1 +msgid "Base-14 fonts (should always be true)" +msgstr "Base-14フォント(常にtrueである必要があります)" + +#: ../../tools.rst:207 a7392b4d969e4c53a89f24a17bedd641 +#, fuzzy +msgid "" +"For an explanation of the term \"TOFU\" see `this Wikipedia article " +"`_::" +msgstr "" +"「TOFU」の用語の説明については、 `このウィキペディアの記事 " +"`_ を参照してください。::" + +#: ../../tools.rst:241 f0ca1b8827e0409b8d193598bd42bbb6 +msgid "" +"Maximum storables cache size in bytes. |PyMuPDF| is generated with a " +"value of 268'435'456 (256 MB, the default value), which you should " +"therefore always see here. If this value is zero, then an \"unlimited\" " +"growth is permitted." +msgstr "" +"ストアブルキャッシュの最大サイズ(バイト単位)。 |PyMuPDF| は、デフォルト値である268'435'456(256 " +"MB)で生成されており、したがって常にここで見るはずです。この値がゼロの場合、増加の「制限なし」が許可されています。" + +#: ../../tools.rst:247 c4a141a4e2f54f6c821d474393efda1b +msgid "" +"Current storables cache size in bytes. This value may change (and will " +"usually increase) with every use of a |PyMuPDF| function. It will " +"(automatically) decrease only when :attr:`Tools.store_maxsize` is going " +"to be exceeded: in this case, |MuPDF| will evict low-usage objects until " +"the value is again in range." +msgstr "" +"現在のストアブルキャッシュのサイズ(バイト単位)。この値は、 |PyMuPDF| " +"の関数を使用するたびに変更される可能性があります(通常は増加します)。この値は、:attr:`Tools.store_maxsize` " +"が超過される場合にのみ(自動的に)減少し、その場合、MuPDFは使用率の低いオブジェクトを削除して、値が再び範囲内に収まるようにします。" + +#: ../../tools.rst:252 081f0727dfcf40879ab8811e1fa325d3 +msgid "Example Session" +msgstr "セッションの例" + +#: ../../tools.rst:288 99b53db972bc49c58f0d4592323bf6c1 +msgid "Footnotes" +msgstr "脚注" + +#: ../../tools.rst:289 df2b4099a10547688c103f8b5777ecc7 +msgid "" +"This memory area is internally used by MuPDF, and it serves as a cache " +"for objects that have already been read and interpreted, thus improving " +"performance. The most bulky object types are images and also fonts. When " +"an application starts up the MuPDF library (in our case this happens as " +"part of *import pymupdf*), it must specify a maximum size for this area. " +"PyMuPDF's uses the default value (256 MB) to limit memory consumption. " +"Use the methods here to control or investigate store usage. For example: " +"even after a document has been closed and all related objects have been " +"deleted, the store usage may still not drop down to zero. So you might " +"want to enforce that before opening another document." +msgstr "" +"このメモリ領域はMuPDF内部で使用され、既に読み取られ解釈されたオブジェクトのキャッシュとして機能し、パフォーマンスを向上させます。最も重いオブジェクトのタイプは画像およびフォントです。アプリケーションがMuPDFライブラリを起動するとき(私たちの場合、これは" +" *import pymupdf* の一部として発生します)、この領域の最大サイズを指定する必要があります。 " +"PyMuPDFはメモリ消費を制限するためにデフォルト値(256 " +"MB)を使用します。ここで提供されているメソッドを使用してストアの使用状況を制御または調査できます。たとえば、文書が閉じられ、関連するすべてのオブジェクトが削除された後でも、ストアの使用状況がゼロにならないことがあります。したがって、別の文書を開く前にこれを強制したいかもしれません。" + +#: ../../tools.rst:291 cff1cb203bfb4dd0a6e7c5c76c91e337 +msgid "" +"By default PyMuPDF and MuPDF use `malloc()`/`free()` for dynamic memory " +"management. One can instead force them to use the Python allocation " +"functions `PyMem_New()`/`PyMem_Del()`, by modifying *fitz/fitz.i* to do " +"`#define JM_MEMORY 1` and rebuilding PyMuPDF." +msgstr "" +"デフォルトでは、PyMuPDFおよびMuPDFは動的メモリ管理に `malloc()`/`free()` " +"を使用します。代わりに、*fitz/fitz.i* を変更して `#define JM_MEMORY 1` " +"を行い、PyMuPDFを再構築することで、Pythonの割り当て関数 `PyMem_New()`/`PyMem_Del()` " +"を使用するように強制することができます。" + +#: ../../footer.rst:60 03615545ca9c49a09aa7623e8edafff5 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "::" +#~ msgstr "" + +#~ msgid "empty MuPDF messages on STDOUT" +#~ msgstr "STDOUTのMuPDFメッセージを空にします。" + +#~ msgid "" +#~ "if not a bool, the current setting" +#~ " is returned. If true, MuPDF errors" +#~ " will be shown on *sys.stderr*, " +#~ "otherwise suppressed. In any case, " +#~ "messages continue to be stored in " +#~ "the warnings store. Upon import of " +#~ "PyMuPDF this value is ``True``." +#~ msgstr "" +#~ "boolでない場合、現在の設定が返されます。Trueの場合、MuPDFのエラーは *sys.stderr* " +#~ "に表示され、それ以外の場合は抑制されます。いずれの場合も、メッセージは警告ストアに引き続き保存されます。PyMuPDFのインポート時にこの値は" +#~ " ``True`` です。" + +#~ msgid "``True`` or ``False``" +#~ msgstr "``True`` または ``False``" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/tutorial.mo b/docs/locales/ja/LC_MESSAGES/tutorial.mo new file mode 100644 index 000000000..14f55efb2 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/tutorial.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/tutorial.po b/docs/locales/ja/LC_MESSAGES/tutorial.po new file mode 100644 index 000000000..a41432951 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/tutorial.po @@ -0,0 +1,1231 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 05967661a4da47e68c009a247e2e85aa +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 ba8db4757b2445c58daa7ce09daa3966 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 edfb0c5405f04fbcbc0db3172f2657d9 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../tutorial.rst:8 e378dc60236d4b669bffb87e1333d47f +msgid "Tutorial" +msgstr "チュートリアル" + +#: ../../tutorial.rst:12 79164895cbdb4876ab62b8a34848f031 +msgid "" +"This tutorial will show you the use of |PyMuPDF|, |MuPDF| in " +"Python, step by step." +msgstr "このチュートリアルでは、PythonでのPyMuPDF、MuPDFのステップバイステップの使用方法を紹介します。" + +#: ../../tutorial.rst:14 f3586dfb42924abab88898f06f38cbcd +msgid "" +"Because |MuPDF| supports not only PDF, but also XPS, OpenXPS, CBZ," +" CBR, FB2 and EPUB formats, so does |PyMuPDF| [#f1]_. Nevertheless, for the" +" sake of brevity we will only talk about PDF files. At places where " +"indeed only PDF files are supported, this will be mentioned explicitly." +msgstr "MuPDFはPDFだけでなく、XPS、OpenXPS、CBZ、CBR、FB2、EPUB形式もサポートしており、PyMuPDFも同様です[1]。ただし、簡潔さのために、ここではPDFファイルについてのみ話を進めます。本当にPDFファイルのみがサポートされている場所では、明示的にその旨を記述します。" + +#: ../../tutorial.rst:16 e30cf64f80944d90ba7ccfbb03a3a9c1 +msgid "" +"In addition to this introduction, please do visit PyMuPDF's `YouTube Channel `_ which covers most of the following in the form of YouTube \"Shorts\" and longer videos." +msgstr "このイントロダクションに加えて、ぜひ `PyMuPDF の YouTube `_ チャンネル もご覧ください。以下の内容の多くが、YouTubeの「ショート動画」や長編動画の形式で紹介されています。" + +#: ../../tutorial.rst:19 071e51fe96454d0bbe2eb32b51f301b3 +msgid "Importing the Bindings" +msgstr "バインディングのインポート" + +#: ../../tutorial.rst:20 357ec7fa762f4147880f4a0936252038 +msgid "" +"The Python bindings to MuPDF are made available by this import statement." +" We also show here how your version can be checked::" +msgstr "MuPDFへのPythonバインディングは、このインポート文によって利用可能になります。また、ここではあなたのバージョンを確認する方法も示します::" + +#: ../../tutorial.rst:30 d6ab45b822f0475dbbb14ee5380a0998 +msgid "Note on the Name *fitz*" +msgstr "*fitz* という名前についての注意事項" + +#: ../../tutorial.rst:32 adb2b197702149509f7c2fbb9f6a165b +msgid "" +"Old versions of |PyMuPDF| had their **Python** import name as `fitz`. " +"Newer versions use `pymupdf` instead, and offer `fitz` as a fallback so " +"that old code will still work." +msgstr "このライブラリのPythonのトップレベルインポート名は **\"fitz\"** です。 これには歴史的な理由があります:" + +#: ../../tutorial.rst:34 5042f5b8984445b1bc0c884c052724d0 +msgid "The reason for the name `fitz` is a historical curiosity:" +msgstr "これには歴史的な理由があります:" + +#: ../../tutorial.rst:36 f16e7e4c92a34191b574e5067a982638 +msgid "The original rendering library for MuPDF was called *Libart*." +msgstr "MuPDFの元々のレンダリングライブラリは *Libart* と呼ばれていました。" + +#: ../../tutorial.rst:38 a713609310154ab681ad34dab4a1f930 +msgid "" +"*\"After Artifex Software acquired the MuPDF project, the development " +"focus shifted on writing a new modern graphics library called \"Fitz\". " +"Fitz was originally intended as an R&D project to replace the aging " +"Ghostscript graphics library, but has instead become the rendering engine" +" powering MuPDF.\"* (Quoted from `Wikipedia " +"`_)." +msgstr "" +"「 Artifex SoftwareがMuPDFプロジェクトを取得した後、開発の焦点は新しいモダンなグラフィックスライブラリ " +"\"Fitz\"の開発に移りました。 " +"Fitzの開発に移りました。Fitzは元々古くなったGhostscriptグラフィックスライブラリを置き換えるためのR&Dプロジェクトとして意図されていましたが、代わりにMuPDFを駆動するレンダリングエンジンになりました。」*" +" ( `Wikipedia `_ より引用 ) 。" + +#: ../../tutorial.rst:42 fc94f501fa3b4087ad6c2a2164191050 +msgid "" +"Use of legacy name `fitz` can fail if defunct pypi.org package `fitz` is " +"installed; see :ref:`problems-after-installation`." +msgstr "" + +#: ../../tutorial.rst:48 dd31bf6c9991404bb140492c93eada39 +msgid "Opening a Document" +msgstr "ドキュメントの開く" + +#: ../../tutorial.rst:50 de12fcbc4f284a9bbc276df2ee856cf3 +msgid "" +"To access a :ref:`supported document`, it must be " +"opened with the following statement::" +msgstr ":ref:`サポートされているドキュメント ` にアクセスするには、次の文で開く必要があります::" + +#: ../../tutorial.rst:54 d0c152720d6643c89f87490ed6a0d318 +msgid "" +"This creates the :ref:`Document` object *doc*. *filename* must be a " +"Python string (or a `pathlib.Path`) specifying the name of an existing " +"file." +msgstr "" +"これにより、 :ref:`Document` " +"オブジェクトdocが作成されます。filenameは、既存のファイルの名前を指定するPythonの文字列(または `pathlib.Path` " +")である必要があります。" + +#: ../../tutorial.rst:56 1bafacc42fd249d88fd507712ca0f967 +msgid "" +"It is also possible to open a document from memory data, or to create a " +"new, empty PDF. See :ref:`Document` for details. You can also use " +":ref:`Document` as a *context manager*." +msgstr "" +"また、メモリデータからドキュメントを開くことや、新しい空のPDFを作成することも可能です。詳細については、 :ref:`Document` " +"を参照してください。 :ref:`Document` はコンテキストマネージャとしても使用できます。" + +#: ../../tutorial.rst:58 bb20205b53c14774a64794e857274b39 +msgid "" +"A document contains many attributes and functions. Among them are meta " +"information (like \"author\" or \"subject\"), number of total pages, " +"outline and encryption information." +msgstr "ドキュメントには多くの属性と関数が含まれています。その中には、メタ情報(「author」や「subject」など)、総ページ数、アウトライン、暗号化情報などがあります。" + +#: ../../tutorial.rst:61 751d099725e1445597c4429e4a527b30 +msgid "Some :ref:`Document` Methods and Attributes" +msgstr "いくつかの :ref:`Document` メソッドと属" + +#: ../../tutorial.rst:64 23e9d02038de4d26b3f3046ff9290c2a +msgid "**Method / Attribute**" +msgstr "**メソッド / 属性**" + +#: ../../tutorial.rst:64 ea3bd02523914cd6ad04b24051580e86 +msgid "**Description**" +msgstr "説明" + +#: ../../tutorial.rst:66 d52833dbb20e4d63b865b2b38316a4f6 +msgid ":attr:`Document.page_count`" +msgstr "" + +#: ../../tutorial.rst:66 4eb8d47eece141dcaae4dc82fae01cf9 +msgid "the number of pages (*int*)" +msgstr "ページ数 (*int*)" + +#: ../../tutorial.rst:67 60de5a4a274c48eea29e36ad651c71ae +msgid ":attr:`Document.metadata`" +msgstr "" + +#: ../../tutorial.rst:67 6b75aa58187d4de7910e3ae60b594e6e +msgid "the metadata (*dict*)" +msgstr "メタデータ (*dict*)" + +#: ../../tutorial.rst:68 166a5fe4285449ddaf7070f3bff9a8f8 +msgid ":meth:`Document.get_toc`" +msgstr "" + +#: ../../tutorial.rst:68 b37c69de939840ebb9e4e3bb5cf3b35e +msgid "get the table of contents (*list*)" +msgstr "目次を取得する (*list*)" + +#: ../../tutorial.rst:69 4b9427462b01465d9f721e32c45441b3 +msgid ":meth:`Document.load_page`" +msgstr "" + +#: ../../tutorial.rst:69 55e0bf1b22c84de5a546ee4003634bde +msgid "read a :ref:`Page`" +msgstr ":ref:`Page` を読む" + +#: ../../tutorial.rst:73 0d0efe9358564831abcdae7b46d423ad +msgid "Accessing Meta Data" +msgstr "メタデータへのアクセス" + +#: ../../tutorial.rst:74 3bf0259c5d844a5ea33f28f009d1084c +msgid "" +"PyMuPDF fully supports standard metadata. :attr:`Document.metadata` is a " +"Python dictionary with the following keys. It is available for **all " +"document types**, though not all entries may always contain data. For " +"details of their meanings and formats consult the respective manuals, " +"e.g. :ref:`AdobeManual` for PDF. Further information can also be found in" +" chapter :ref:`Document`. The meta data fields are strings or ``None`` if" +" not otherwise indicated. Also be aware that not all of them always " +"contain meaningful data -- even if they are not ``None``." +msgstr "" +"PyMuPDFは標準的なメタデータを完全にサポートしています。 :attr:`Document.metadata` " +"はPythonの辞書で、次のキーが含まれています。これはすべてのドキュメントタイプで利用可能ですが、すべてのエントリが常にデータを含むわけではありません。それらの意味や形式の詳細については、対応するマニュアルを参照してください。例えば、PDFの場合は" +" :ref:`AdobeManual` を参照してください。さらなる情報は :ref:`Document` " +"の章でも見つけることができます。メタデータのフィールドは文字列または ``None`` " +"です(特に指定がない場合)。また、それらのすべてが常に意味のあるデータを含んでいるわけではないことにも注意してください。" + +#: ../../tutorial.rst:77 c50c4a8525e24079a77ac54b1fc4233c +msgid "Key" +msgstr "キー" + +#: ../../tutorial.rst:77 d80dc529b03e4fdcb3723e17909641ec +msgid "Value" +msgstr "値" + +#: ../../tutorial.rst:79 1270501ff746404d9a540ac46ca40289 +msgid "producer" +msgstr "" + +#: ../../tutorial.rst:79 1d7515f242594d6793585075ae2a1c3e +msgid "producer (producing software)" +msgstr "プロデューサー(生成ソフトウェア)" + +#: ../../tutorial.rst:80 468ddd00b3c144bd9b3e154204fbed15 +msgid "format" +msgstr "" + +#: ../../tutorial.rst:80 b1eb1e1957bc48a0851024ce094c662f +msgid "format: 'PDF-1.4', 'EPUB', etc." +msgstr "形式:'PDF-1.4'、'EPUB'など" + +#: ../../tutorial.rst:81 b1122070ff9b4c4494bedf4f3388d9b5 +msgid "encryption" +msgstr "" + +#: ../../tutorial.rst:81 91a989f6b4d84da0be125c5870b85629 +msgid "encryption method used if any" +msgstr "暗号化方法(適用されている場合)" + +#: ../../tutorial.rst:82 19480fa53c3544319c83fd3fad08e759 +#: 7f2a63267f694384a554dae16109df23 +msgid "author" +msgstr "" + +#: ../../tutorial.rst:83 f5b61af7d3294bdf9ce3dc42e2a1a8b0 +msgid "modDate" +msgstr "" + +#: ../../tutorial.rst:83 801fc48a09794a0fac77225f214744e9 +msgid "date of last modification" +msgstr "最終更新日" + +#: ../../tutorial.rst:84 5c1f0ad075c14921a33eff59aae70720 +#: 8bade788738649d8a71c0a402d4ee15e +msgid "keywords" +msgstr "" + +#: ../../tutorial.rst:85 32abcccab99941608fef4dea21fa26ef +#: a7b239e234ac4e738b57649e92f97b58 +msgid "title" +msgstr "" + +#: ../../tutorial.rst:86 43ebc843826848f08ba05a060612babd +msgid "creationDate" +msgstr "" + +#: ../../tutorial.rst:86 6df64ddf396941d9b363802e71a17d4e +msgid "date of creation" +msgstr "作成日" + +#: ../../tutorial.rst:87 8bb3abe30fe64d669b807607a52a2420 +msgid "creator" +msgstr "" + +#: ../../tutorial.rst:87 9db959512cba40f4a6390f8fa75faa93 +msgid "creating application" +msgstr "作成アプリケーション" + +#: ../../tutorial.rst:88 1b1f49a6b78841abb58585aa54af2bdc +#: 9ac5fc37fe5d4f25b601404e30ab2154 +msgid "subject" +msgstr "" + +#: ../../tutorial.rst:91 219ce7c6343d408ab8d80fbc0a05ec6e +msgid "" +"Apart from these standard metadata, **PDF documents** starting from PDF " +"version 1.4 may also contain so-called *\"metadata streams\"* (see also " +":data:`stream`). Information in such streams is coded in XML. PyMuPDF " +"deliberately contains no XML components for this purpose (the " +":ref:`PyMuPDF Xml class` is a helper class intended to access the " +"DOM content of a :ref:`Story` object), so we do not directly support " +"access to information contained therein. But you can extract the stream " +"as a whole, inspect or modify it using a package like `lxml`_ and then " +"store the result back into the PDF. If you want, you can also delete this" +" data altogether." +msgstr "" +"PDFバージョン1.4以降のPDFドキュメントには、これらの標準的なメタデータに加えて、いわゆる「メタデータストリーム」(streamも参照)が含まれていることがあります。このようなストリームに含まれる情報はXMLでコード化されています。PyMuPDFは意図的にこの目的のためにXMLコンポーネントを含んでいないため(" +" :ref:`PyMuPDF Xml class` クラスは :ref:`Story` " +"オブジェクトのDOMコンテンツにアクセスするためのヘルパークラスです)、その情報への直接的なアクセスをサポートしていません。しかし、 " +"`lxml`_ " +"などのパッケージを使用してストリーム全体を抽出し、検査または変更し、その結果をPDFに再格納することはできます。必要な場合は、このデータを完全に削除することもできます。" + +#: ../../tutorial.rst:93 1cdab6c40c754d80b0e856afad24a1db +msgid "" +"There are two utility scripts in the repository that `metadata import " +"(PDF only)`_ resp. `metadata export`_ metadata from resp. to CSV files." +msgstr "" +"リポジトリには、`metadata import(PDF only)`_ および `metadata export`_ " +"をCSVファイルに行うための2つのユーティリティスクリプトがあります" + +#: ../../tutorial.rst:96 b5cc3dbefd4d4224905521dd275497b2 +msgid "Working with Outlines" +msgstr "アウトラインと一緒に作業する" + +#: ../../tutorial.rst:97 436fc3a7e03f4a90bb900308d8dd7548 +msgid "" +"The easiest way to get all outlines (also called \"bookmarks\") of a " +"document, is by loading its *table of contents*::" +msgstr "アウトライン(または「ブックマーク」とも呼ばれる)を取得する最も簡単な方法は、そのドキュメントの目次を読み込むことです::" + +#: ../../tutorial.rst:101 3b688b47071e4d87b3d7cd95ced8dd08 +msgid "" +"This will return a Python list of lists *[[lvl, title, page, ...], ...]* " +"which looks much like a conventional table of contents found in books." +msgstr "" +"これにより、Pythonのリストであるリスト [[lvl, title, page, ...], ...] " +"が返されます。これは、本に見られる伝統的な目次に非常によく似たものです。" + +#: ../../tutorial.rst:103 71983635bbe745518b0e2bf0de62b01c +msgid "" +"*lvl* is the hierarchy level of the entry (starting from 1), *title* is " +"the entry's title, and *page* the page number (1-based!). Other " +"parameters describe details of the bookmark target." +msgstr "" +"lvl はエントリーの階層レベル(1から開始)を示し、title はエントリーのタイトル、page " +"はページ番号(1から始まる)を示します。その他のパラメータはブックマークのターゲットの詳細を説明します。" + +#: ../../tutorial.rst:105 4b50bc776e3842c5ad39eb2a80765944 +msgid "" +"There are two utility scripts in the repository that `toc import (PDF " +"only)`_ resp. `toc export`_ table of contents from resp. to CSV files." +msgstr "" +"リポジトリ内には、CSVファイルから目次をインポートおよびエクスポートするためのユーティリティスクリプトが2つあります。詳細は、 `toc " +"import (PDF only)`_ および `toc export`_ をご覧ください。" + +#: ../../tutorial.rst:108 e99a2b08c8ac4dbfaaada3a4d3febd23 +msgid "Working with Pages" +msgstr "ページの操作" + +#: ../../tutorial.rst:109 484157f129e74a108d83a0687d390d90 +msgid ":ref:`Page` handling is at the core of MuPDF's functionality." +msgstr ":ref:`Page` の処理はMuPDFの機能の中核です。" + +#: ../../tutorial.rst:111 39d19b40747a4209be020d521fab3d56 +msgid "" +"You can render a page into a raster or vector (SVG) image, optionally " +"zooming, rotating, shifting or shearing it." +msgstr "ページをラスターまたはベクター(SVG)イメージにレンダリングすることができます。オプションでズーム、回転、シフト、またはシアーを行うこともできます" + +#: ../../tutorial.rst:112 ee05f9cee64f40dbbd92d40df9081fc4 +msgid "" +"You can extract a page's text and images in many formats and search for " +"text strings." +msgstr "テキストと画像を多くの形式で抽出し、テキスト文字列を検索することができます。" + +#: ../../tutorial.rst:113 0a743e8513164292a9ed45182829305a +msgid "" +"For PDF documents many more methods are available to add text or images " +"to pages." +msgstr "PDFドキュメントの場合、さらに多くのメソッドが利用可能で、テキストや画像をページに追加することができます" + +#: ../../tutorial.rst:115 33b8b5b674bd42bfa3c10352d8386ded +msgid "" +"First, a :ref:`Page` must be created. This is a method of " +":ref:`Document`::" +msgstr "最初に :ref:`Page` を作成する必要があります。これは :ref:`Document` のメソッドです::" + +#: ../../tutorial.rst:120 0f62eb911c7a46b8b2f7d6b911449ad4 +msgid "" +"Any integer `-∞ < pno < page_count` is possible here. Negative numbers " +"count backwards from the end, so *doc[-1]* is the last page, like with " +"Python sequences." +msgstr "" +"ここでは、どんな整数 `-∞ < pno < page_count` でも可能です。負の数は末尾から逆に数えますので、doc[-1] " +"はPythonのシーケンスと同様に最後のページになります" + +#: ../../tutorial.rst:122 a6c304f33507429b9a30bd8e926fbf12 +msgid "" +"Some more advanced way would be using the document as an **iterator** " +"over its pages::" +msgstr "より高度な方法として、ドキュメントをそのページの **イテレータ** として使用することもできます::" + +#: ../../tutorial.rst:136 dd002e48ae7445049b3b31ec113b9e39 +msgid "Once you have your page, here is what you would typically do with it:" +msgstr "ページを取得したら、通常は次のようなことを行います:" + +#: ../../tutorial.rst:139 479b18109bc34625a53a33ca74bca74c +msgid "Inspecting the Links, Annotations or Form Fields of a Page" +msgstr "リンク、注釈、またはフォームフィールドをページで調査する" + +#: ../../tutorial.rst:140 93f1bc7076b84e53b34a0bf7b7752426 +msgid "" +"Links are shown as \"hot areas\" when a document is displayed with some " +"viewer software. If you click while your cursor shows a hand symbol, you " +"will usually be taken to the target that is encoded in that hot area. " +"Here is how to get all links::" +msgstr "リンクは、ドキュメントがビューアソフトウェアで表示されるときに「ホットエリア」として表示されます。カーソルが手のシンボルを示すときにクリックすると、通常、そのホットエリアにエンコードされたターゲットに移動します。以下はすべてのリンクを取得する方法です::" + +#: ../../tutorial.rst:145 a0f2daeae55d47ce99d8e84b319faece +msgid "" +"*links* is a Python list of dictionaries. For details see " +":meth:`Page.get_links`." +msgstr "`links` はPythonの辞書のリストです。詳細は :meth:`Page.get_links` を参照してください。" + +#: ../../tutorial.rst:147 b27868bc6ccb46f282f088bb49db681f +msgid "You can also use an iterator which emits one link at a time::" +msgstr "また、一度に1つのリンクを生成するイテレータを使用することもできます::" + +#: ../../tutorial.rst:152 f4ad01b38b1040bfa0d9b295acad14bd +msgid "" +"If dealing with a PDF document page, there may also exist annotations " +"(:ref:`Annot`) or form fields (:ref:`Widget`), each of which have their " +"own iterators::" +msgstr "PDFドキュメントのページを扱う場合、注釈(:ref:`Annot`)やフォームフィールド(:ref:`Widget`)も存在する場合があります。それぞれに独自のイテレータがあります::" + +#: ../../tutorial.rst:162 aa1e66c76cd84d98a396687a12122803 +msgid "Rendering a Page" +msgstr "ページのレンダリング" + +#: ../../tutorial.rst:163 7114d4014690481fbb394caeafe88cd0 +msgid "This example creates a **raster** image of a page's content::" +msgstr "以下の例は、ページの内容をラスター画像として作成します::" + +#: ../../tutorial.rst:167 5d490b2d2071456590b70ad02b2040a3 +msgid "" +"``pix`` is a :ref:`Pixmap` object which (in this case) contains an **RGB** " +"image of the page, ready to be used for many purposes. Method " +":meth:`Page.get_pixmap` offers lots of variations for controlling the " +"image: resolution / DPI, colorspace (e.g. to produce a grayscale image or" +" an image with a subtractive color scheme), transparency, rotation, " +"mirroring, shifting, shearing, etc. For example: to create an **RGBA** " +"image (i.e. containing an alpha channel), specify *pix = " +"page.get_pixmap(alpha=True)*." +msgstr "" +"`pix` はページのRGBイメージを含む :ref:`Pixmap` " +"オブジェクトです。このイメージは多くの目的に使用する準備ができています。:meth:`Page.get_pixmap` " +"メソッドには、画像を制御するためのさまざまなオプションが用意されています:解像度/DPI、カラースペース(例えば、グレースケールイメージや減色カラースキームのイメージを作成するために)、透明度、回転、ミラーリング、シフト、シアーなどがあります。例えば、RGBAイメージ(アルファチャネルを含むイメージ)を作成する場合は、`pix" +" = page.get_pixmap(alpha=True)` と指定します。" + +#: ../../tutorial.rst:169 d83c8c8ecf374c119c71005bd91511c7 +msgid "" +"A :ref:`Pixmap` contains a number of methods and attributes which are " +"referenced below. Among them are the integers ``width``, ``height`` (each in " +"pixels) and ``stride`` (number of bytes of one horizontal image line). " +"Attribute ``samples`` represents a rectangular area of bytes representing " +"the image data (a Python ``bytes`` object)." +msgstr "" +":ref:`Pixmap` " +"には、以下で参照されるいくつかのメソッドと属性が含まれています。その中には、ピクセル単位の整数である幅、高さ(それぞれピクセル単位)および " +"`stride` (1つの水平イメージラインのバイト数)があります。属性 `samples` " +"は、イメージデータを表すバイトの長方形領域(Pythonの `bytes` オブジェクト)を表します。" + +#: ../../tutorial.rst:171 33dc6238724b49c3a201a622c7289f3c +msgid "" +"You can also create a **vector** image of a page by using " +":meth:`Page.get_svg_image`. Refer to this `Vector Image Support page`_ " +"for details." +msgstr "" +":meth:`Page.get_svg_image` " +"を使用することで、ページのベクターイメージを作成することもできます。詳細については、`Vector Image Support page`_ " +"ページを参照してください。" + +#: ../../tutorial.rst:174 845ef517b9e143a1afe1950cb45677ee +msgid "Saving the Page Image in a File" +msgstr "ページのイメージをファイルに保存する" + +#: ../../tutorial.rst:175 bdeba468768e4188b09289122c78230f +msgid "We can simply store the image in a PNG file::" +msgstr "簡単にページのイメージをPNGファイルに保存できます::" + +#: ../../tutorial.rst:180 710babb269494a0db2dff3a59b541b75 +msgid "Displaying the Image in GUIs" +msgstr "GUIでイメージを表示する" + +#: ../../tutorial.rst:181 6f459b063b0e46e39a3a571d118d161c +msgid "" +"We can also use it in GUI dialog managers. :attr:`Pixmap.samples` " +"represents an area of bytes of all the pixels as a Python bytes object. " +"Here are some examples, find more in the `examples`_ directory." +msgstr "" +"GUIダイアログマネージャーでも使用できます。 :attr:`Pixmap.samples` " +"は、すべてのピクセルのバイトの領域をPythonのbytesオブジェクトとして表します。以下はいくつかの例ですが、さらに多くの例は " +"`examples`_ ディレクトリで見つけることができます。" + +#: ../../tutorial.rst:184 04c74142d3a5408cb2df43362abab4a9 +msgid "wxPython" +msgstr "" + +#: ../../tutorial.rst:185 8ae38c97a4d746519d5b07ea1f0c2915 +msgid "" +"Consult their documentation for adjustments to RGB(A) pixmaps and, " +"potentially, specifics for your wxPython release::" +msgstr "RGB(A)のピクセルマップに対する調整や、可能な場合はwxPythonのリリースに特有の詳細については、関連するドキュメントを参照してください。" + +#: ../../tutorial.rst:193 4ecff91251c346c1828b5c2212df148d +msgid "Tkinter" +msgstr "" + +#: ../../tutorial.rst:194 3937a8f51b8d431183782d33e29fdb0d +msgid "Please also see section 3.19 of the `Pillow documentation`_::" +msgstr "`Pillow documentation`_ ドキュメントのセクション3.19も参照してください。" + +#: ../../tutorial.rst:203 54cb07c2be2245af96a39fc1d108cbd6 +msgid "The following **avoids using Pillow**::" +msgstr "以下の方法では、Pillowを使用しないようにしています。" + +#: ../../tutorial.rst:210 278fa9ee08b341d5b6c8abb366fc16ec +msgid "" +"If you are looking for a complete Tkinter script paging through **any " +"supported** document, `here it is!`_. It can also zoom into pages, and it" +" runs under Python 2 or 3. It requires the extremely handy `PySimpleGUI`_" +" pure Python package." +msgstr "" +"もし、サポートされている任意のドキュメントをページ送りで表示する完全なTkinterスクリプトをお探しでしたら、`こちらがあります! " +"`_ また、このスクリプトではページをズームインすることもでき、Python " +"2または3で動作します。非常に便利な純粋なPythonパッケージである `PySimpleGUI`_ が必要です。" + +#: ../../tutorial.rst:213 c0267a22336c4e4da907533f35f9d341 +msgid "PyQt4, PyQt5, PySide" +msgstr "" + +#: ../../tutorial.rst:214 9a6ff2af3cb64216a96d828ef5117a0d +msgid "Please also see section 3.16 of the `Pillow documentation`_::" +msgstr "Pillowドキュメントのセクション3.16も参照してください。" + +#: ../../tutorial.rst:223 a7cc76e286584e9a867c24ef3add76a8 +msgid "" +"Again, you also can get along **without using Pillow.** Qt's `QImage` " +"luckily supports native Python pointers, so the following is the " +"recommended way to create Qt images::" +msgstr "Pillowを使用せずに進めることもできます。幸運なことに、QtのQImageはネイティブなPythonポインタをサポートしているので、以下はQtイメージを作成する推奨される方法です。" + +#: ../../tutorial.rst:233 10d5fdd6f131454eae7a2e63c574c5bd +msgid "Extracting Text and Images" +msgstr "テキストや画像を抽出する" + +#: ../../tutorial.rst:234 1943f89f643845f0b54db821bcc3e09d +msgid "" +"We can also extract all text, images and other information of a page in " +"many different forms, and levels of detail::" +msgstr "ページのすべてのテキスト、画像、およびその他の情報を、さまざまな形式や詳細レベルで抽出することもできます。::" + +#: ../../tutorial.rst:238 554e63dabc1248f783b976d361dcec69 +msgid "" +"Use one of the following strings for *opt* to obtain different formats " +"[#f2]_:" +msgstr "異なるフォーマットを得るために、以下の文字列のうち1つをoptに使用できます [#f2]_:" + +#: ../../tutorial.rst:240 f44036ad1ba24ece8a18ea2fc98c1f03 +msgid "" +"**\"text\"**: (default) plain text with line breaks. No formatting, no " +"text position details, no images." +msgstr "**\"text\"**: (デフォルト) 行送りのプレーンテキスト。書式設定やテキストの位置の詳細、画像は含まれません。" + +#: ../../tutorial.rst:242 a77470c3dd534162802cade7841cd2f0 +msgid "**\"blocks\"**: generate a list of text blocks (= paragraphs)." +msgstr "**\"blocks\"**: テキストブロック(段落)のリストを生成します。" + +#: ../../tutorial.rst:244 0203e41b56554819a6376440502f5b4b +msgid "**\"words\"**: generate a list of words (strings not containing spaces)." +msgstr "**\"words\"**: 単語(スペースを含まない文字列)のリストを生成します。" + +#: ../../tutorial.rst:246 04c3946009704057bae58608e81e89bc +msgid "" +"**\"html\"**: creates a full visual version of the page including any " +"images. This can be displayed with your internet browser." +msgstr "**\"html\"**: 画像を含むページの完全な視覚的バージョンを作成します。これはインターネットブラウザで表示できます。" + +#: ../../tutorial.rst:248 e96b199aabba4aca94d6b53b75aa27cd +msgid "" +"**\"dict\"** / **\"json\"**: same information level as HTML, but provided" +" as a Python dictionary or resp. JSON string. See " +":meth:`TextPage.extractDICT` for details of its structure." +msgstr "" +"**\"dict\"** / **\"json\"**: " +"HTMLと同じ情報レベルですが、Pythonの辞書またはJSON文字列として提供されます。その構造の詳細については、:meth:`TextPage.extractDICT`" +" を参照してください。" + +#: ../../tutorial.rst:250 69d52fc9b54b4109b44c122576994849 +msgid "" +"**\"rawdict\"** / **\"rawjson\"**: a super-set of **\"dict\"** / " +"**\"json\"**. It additionally provides character detail information like " +"XML. See :meth:`TextPage.extractRAWDICT` for details of its structure." +msgstr "" +"**\"rawdict\"** / **\"rawjson\"**: \"dict\" / " +"\"json\"のスーパーセットです。これにはXMLのような文字の詳細情報も含まれます。その構造の詳細については、:meth:`TextPage.extractRAWDICT`" +" を参照してください。" + +#: ../../tutorial.rst:252 a895c3b5c62848bf973a2b44b58097ef +msgid "" +"**\"xhtml\"**: text information level as the TEXT version but includes " +"images. Can also be displayed by internet browsers." +msgstr "**\"xhtml\"**: TEXTバージョンのテキスト情報レベルを含み、画像も含まれます。インターネットブラウザでも表示できます。" + +#: ../../tutorial.rst:254 7c2a9b1d406a472e990d81723f7546b9 +msgid "" +"**\"xml\"**: contains no images, but full position and font information " +"down to each single text character. Use an XML module to interpret." +msgstr "" +"**\"xml\"**: " +"画像は含まれませんが、各テキスト文字までの完全な位置とフォント情報を含みます。XMLモジュールを使用して解釈することができます。" + +#: ../../tutorial.rst:256 38ebfd1e60f2470db255fe5ce90e60cb + +msgid "" +"To give you an idea about the output of these alternatives, we did text " +"example extracts. See :ref:`Appendix1`." +msgstr "これらの代替方法の出力のイメージを示すために、テキストの例抽出を行いました。:ref:`Appendix1` を参照してください" + +#: ../../tutorial.rst:259 570661af75b040a2ab647ae2c7ed0b06 +msgid "Searching for Text" +msgstr "テキストを検索す" + +#: ../../tutorial.rst:260 bd492032074c47a88220162f66174e76 +msgid "You can find out, exactly where on a page a certain text string appears::" +msgstr "特定のテキスト文字列がページのどこに現れるかを正確に調べることができます。::" + +#: ../../tutorial.rst:264 d38076507d744dce945e696f5729c684 +msgid "" +"This delivers a list of rectangles (see :ref:`Rect`), each of which " +"surrounds one occurrence of the string \"mupdf\" (case insensitive). You " +"could use this information to e.g. highlight those areas (PDF only) or " +"create a cross reference of the document." +msgstr "" +"このコードは、\"mupdf\"(大文字と小文字を区別しない)という文字列が含まれる各領域を囲む長方形( :ref:`Rect` " +"参照)のリストを生成します。この情報を使って、それらの領域を強調表示したり(PDFのみ)、文書のクロスリファレンスを作成したりすることができます。" + +#: ../../tutorial.rst:266 64b9d94c27d2430a9a761da4084e0510 +msgid "" +"Please also do have a look at chapter :ref:`cooperation` and at demo " +"programs `demo.py`_ and `demo-lowlevel.py`_. Among other things they " +"contain details on how the :ref:`TextPage`, :ref:`Device` and " +":ref:`DisplayList` classes can be used for a more direct control, e.g. " +"when performance considerations suggest it." +msgstr "" +"また、:ref:`cooperation` という章、およびデモプログラム `demo.py`_ と `demo-lowlevel.py`_ " +"も参照してください。これらには、:ref:`TextPage` 、 :ref:`Device` 、:ref:`DisplayList` " +"クラスをより直接的に制御する方法に関する詳細な情報が含まれています。例えば、パフォーマンスを考慮する必要がある場合などに役立ちます" + +#: ../../tutorial.rst:273 2fcdc8a391004211a63c3682b7fb5f2b +msgid "Stories: Generating PDF from HTML Source" +msgstr "ストーリー: HTMLソースからPDFを生成する" + +#: ../../tutorial.rst:275 ab8a9b2cdb7641d881c73fc7abf43268 +msgid "" +"The :ref:`Story` class is a new feature of PyMuPDF version 1.21.0. It " +"represents support for MuPDF's **\"story\"** interface." +msgstr "" +"ストーリークラスはPyMuPDFバージョン1.21.0の新機能です。これはMuPDFの :ref:`Story` " +"インターフェースに対するサポートを表しています。" + +#: ../../tutorial.rst:277 d6963f89881a4fba95e7b1099865b22a +msgid "" +"The following is a quote from the book `\"MuPDF Explored\"`_ by Robin " +"Watts from `Artifex`_:" +msgstr "以下はArtifexのRobin Wattsによる書籍「MuPDF Explored」からの引用です。" + +#: ../../tutorial.rst:281 f4ecf26b2cd04a978bf74422d5fa8c30 +msgid "" +"*Stories provide a way to easily layout styled content for use with " +"devices, such as those offered by Document Writers (...). The concept of " +"a story comes from desktop publishing, which in turn (...) gets it from " +"newspapers. If you consider a traditional newspaper layout, it will " +"consist of various news articles (stories) that are laid out into " +"multiple columns, possibly across multiple pages.*" +msgstr "*ストーリーは、ドキュメントライターなどのデバイスで使用するためのスタイル付きコンテンツを簡単にレイアウトする方法を提供します。ストーリーという概念は、デスクトップパブリッシングから来ており、それ自体が新聞から取り入れられています。伝統的な新聞のレイアウトを考えると、複数の列や複数のページにわたって配置されたさまざまなニュース記事(ストーリー)で構成されています。*" + +#: ../../tutorial.rst:283 24ef55c29a4d468781822c3a0c6ae60e +msgid "" +"*Accordingly, MuPDF uses a story to represent a flow of text with styling" +" information. The user of the story can then supply a sequence of " +"rectangles into which the story will be laid out, and the positioned text" +" can then be drawn to an output device. This keeps the concept of the " +"text itself (the story) to be separated from the areas into which the " +"text should be flowed (the layout).*" +msgstr "*それに応じて、MuPDFでは、テキストのフローとスタイリング情報を表すためにストーリーを使用しています。ストーリーのユーザーは、ストーリーをレイアウトするための矩形のシーケンスを提供し、配置されたテキストを出力デバイスに描画できます。これにより、テキスト自体(ストーリー)がテキストを配置する領域(レイアウト)から分離されるという概念が保持されます*" + +#: ../../tutorial.rst:287 8a384a5047b544f5900574c11df5e1df +msgid "" +"A Story works somewhat similar to an internet browser: It faithfully " +"parses and renders HTML hypertext and also optional stylesheets (CSS). " +"But its **output is a PDF** -- not web pages." +msgstr "ストーリーは、インターネットブラウザといくつかの点で似ています。それは忠実にHTMLハイパーテキストとオプションのスタイルシート(CSS)を解析してレンダリングします。ただし、出力されるのはPDFであり、ウェブページではありません。" + +#: ../../tutorial.rst:290 42027a24ac6c46fc91baef0cd7076471 +msgid "" +"When creating a :ref:`Story`, the input from up to three different " +"information sources is taken into account. All these items are optional." +msgstr "ストーリーを作成する際には、最大3つの異なる情報源からの入力が考慮されます。これらのすべてのアイテムはオプションです。" + +#: ../../tutorial.rst:292 9adeb199229844bb83284b74fda7555e +msgid "" +"HTML source code, either a Python string or **created by the script** " +"using methods of :ref:`Xml`." +msgstr "HTMLソースコード:Pythonの文字列として提供されるか、 :ref:`Xml` のメソッドを使用してスクリプトによって作成されます。" + +#: ../../tutorial.rst:294 e48f3a50012241f0849b4d6077b00e10 +msgid "" +"CSS (Cascaded Style Sheet) source code, provided as a Python string. CSS " +"can be used to provide styling information (text font size, color, etc.) " +"like it would happen for web pages. Obviously, this string may also be " +"read from a file." +msgstr "CSS(カスケーディングスタイルシート)ソースコード:Pythonの文字列として提供されます。CSSは、ウェブページにおけるように、スタイリング情報(テキストフォントサイズ、色など)を提供するために使用できます。もちろん、この文字列はファイルから読み込むこともできます。" + +#: ../../tutorial.rst:296 cb2f2cd086c14b9894c55183dc6a8b80 +msgid "" +"An :ref:`Archive` **must be used** whenever the DOM references images, or" +" uses text fonts except the standard :ref:`Base-14-Fonts`, CJK fonts and " +"the NOTO fonts generated into the PyMuPDF binary." +msgstr "" +"DOMが画像を参照する場合や、標準の :ref:`Base-14-Fonts` " +"、CJKフォント、およびPyMuPDFバイナリに生成されたNOTOフォント以外のテキストフォントを使用する場合、 :ref:`Archive` " +"を使用する必要があります。" + +#: ../../tutorial.rst:299 92aa9219931649b5972b55f06acaba5f +msgid "" +"The :ref:`API` allows creating DOMs completely from scratch, " +"including desired styling information. It can also be used to modify or " +"extend **provided** HTML: text can be deleted or replaced, or its styling" +" can be changed. Text -- for example extracted from databases -- can also" +" be added and fill template-like HTML documents." +msgstr "" +"この :ref:`API` " +"では、望むスタイル情報を含めて、DOMを完全にゼロから作成することができます。また、提供されたHTMLを変更したり拡張したりするためにも使用できます。テキストは削除や置換ができ、スタイルも変更できます。データベースから抽出されたテキストを追加して、テンプレートのようなHTMLドキュメントを作成することもできます。" + +#: ../../tutorial.rst:301 bfef6bbfde0e4e9f95799418e6b840d0 +msgid "" +"It is **not required** to provide syntactically complete HTML documents: " +"snippets like `Hello World!` are fully accepted, and many /" +" most syntax errors are automatically corrected." +msgstr "" +"文法的に完全なHTMLドキュメントを提供する必要はありません。例えば ``HelloWorld!`` " +"のような断片も完全に受け入れられ、多くの場合、構文エラーは自動的に修正されます。" + +#: ../../tutorial.rst:303 043777ef2f3349c3a0173448f52cecbb +msgid "" +"After the HTML is considered complete, it can be used to create a PDF " +"document. This happens via the new :ref:`DocumentWriter` class. The " +"programmer calls its methods to create a new empty page, and passes " +"rectangles to the Story to fill them." +msgstr "" +"HTMLが完成したと見なされた後、それを使用してPDF文書を作成できます。これは新しい :ref:`DocumentWriter` " +"クラスを介して行われます。プログラマーはそのメソッドを呼び出して新しい空のページを作成し、:ref:`Story` " +"に矩形を渡してそれらを埋めることができます。" + +#: ../../tutorial.rst:305 23c7aafe13d34181a4d91db97ba388ce +msgid "" +"The story in turn will return completion codes indicating whether or not " +"more content is waiting to be written. Which part of the content will " +"land in which rectangle or on which page is automatically determined by " +"the story itself -- it cannot be influenced other than by providing the " +"rectangles." +msgstr "" +":ref:`Story` " +"は、書き込まれる待機中のコンテンツがあるかどうかを示す完了コードを返します。コンテンツのどの部分がどの矩形またはどのページに配置されるかは、:ref:`Story`" +" 自体によって自動的に決定されます。矩形を提供すること以外では影響を与えることはできません。" + +#: ../../tutorial.rst:307 4c261efdbac741c4982404ab01604e2b +msgid "" +"Please see the :ref:`Stories recipes` for a number of " +"typical use cases." +msgstr "典型的な使用例については、 :ref:`Stories recipes` レシピをご覧ください。" + +#: ../../tutorial.rst:311 1cfcd608ebe3470d989d732b1053ef15 +msgid "PDF Maintenance" +msgstr "PDFメンテナンス" + +#: ../../tutorial.rst:312 adb1c7c8a9924ac6ad2c05db95c0343a +msgid "" +"PDFs are the only document type that can be **modified** using PyMuPDF. " +"Other file types are read-only." +msgstr "PDFはPyMuPDFを使用して変更できる唯一のドキュメントタイプです。他のファイルタイプは読み取り専用です。" + +#: ../../tutorial.rst:314 9cd40dae8b664758b29583ab66394808 +msgid "" +"However, you can convert **any document** (including images) to a PDF and" +" then apply all PyMuPDF features to the conversion result. Find out more " +"here :meth:`Document.convert_to_pdf`, and also look at the demo script " +"`pdf-converter.py`_ which can convert any :ref:`supported " +"document` to PDF." +msgstr "" +"ただし、任意のドキュメント(画像を含む)をPDFに変換し、変換結果にすべてのPyMuPDF機能を適用することができます。 " +":meth:`Document.convert_to_pdf` " +"で詳細を確認できます。また、任意のサポートされているドキュメントをPDFに変換できるデモスクリプト `pdf-converter.py`_ " +"も確認してください。" + +#: ../../tutorial.rst:316 f2e2843ced0b43dca260d5fb7417e81a +msgid "" +":meth:`Document.save()` always stores a PDF in its current (potentially " +"modified) state on disk." +msgstr ":meth:`Document.save()` は常に現在の(変更された可能性のある)PDFをディスクに保存します。" + +#: ../../tutorial.rst:318 211603a41aaf4aa2bf96e1dcba2c8d1e +msgid "" +"You normally can choose whether to save to a new file, or just append " +"your modifications to the existing one (\"incremental save\"), which " +"often is very much faster." +msgstr "通常、新しいファイルに保存するか、既存のファイルに変更内容を追加するか(「増分保存」)を選択できます。増分保存は非常に高速な場合があります。" + +#: ../../tutorial.rst:320 f22382b263444329820b3e4b1c1f8ecc +msgid "" +"The following describes ways how you can manipulate PDF documents. This " +"description is by no means complete: much more can be found in the " +"following chapters." +msgstr "以下にPDFドキュメントを操作する方法を示しますが、これに限らず、詳細は次の章でさらに見つけることができます。" + +#: ../../tutorial.rst:323 d99528fe55cc49799b2ad8ee3460f8cd +msgid "Modifying, Creating, Re-arranging and Deleting Pages" +msgstr "ページの修正、作成、再配置、および削" + +#: ../../tutorial.rst:324 107ff92570a24cde96fad3fd09fef0fb +msgid "" +"There are several ways to manipulate the so-called **page tree** (a " +"structure describing all the pages) of a PDF:" +msgstr "PDFの**ページツリー** (すべてのページを記述する構造)を操作するためには、いくつかの方法があります。" + +#: ../../tutorial.rst:326 0003af108e2840da87372ff1fd5226f0 +msgid "" +":meth:`Document.delete_page` and :meth:`Document.delete_pages` delete " +"pages." +msgstr ":meth:`Document.delete_page` と :meth:`Document.delete_pages` はページを削除します。" + +#: ../../tutorial.rst:328 a944cc4bb64749afa365e43f0c2c95a3 +msgid "" +":meth:`Document.copy_page`, :meth:`Document.fullcopy_page` and " +":meth:`Document.move_page` copy or move a page to other locations within " +"the same document." +msgstr "" +":meth:`Document.copy_page` 、 :meth:`Document.fullcopy_page` 、 " +":meth:`Document.move_page` は、ページを同じドキュメント内の他の場所にコピーしたり移動したりします。" + +#: ../../tutorial.rst:330 d3c938ecfec045e88fcfd36f38d2a459 +msgid "" +":meth:`Document.select` shrinks a PDF down to selected pages. Parameter " +"is a sequence [#f3]_ of the page numbers that you want to keep. These " +"integers must all be in range *0 <= i < page_count*. When executed, all " +"pages **missing** in this list will be deleted. Remaining pages will " +"occur **in the sequence and as many times (!) as you specify them**." +msgstr "" +":meth:`Document.select` " +"はPDFを選択したページに縮小します。パラメータは[3]のページ番号のシーケンスで、保持したいページの番号を指定します。これらの整数はすべて範囲 " +"`0 <= i < page_count` " +"にある必要があります。実行されると、このリストに含まれていないすべてのページが削除されます。残ったページは指定した順序通りに(指定回数分!)現れます。" + +#: ../../tutorial.rst:332 e8d7536dada8429993f454c33ebe1bc6 +msgid "So you can easily create new PDFs with" +msgstr "したがって、次のような簡単な方法で新しいPDFを作成できます:" + +#: ../../tutorial.rst:334 500d467e659f464a9be86508595c8f8f +msgid "the first or last 10 pages," +msgstr "最初または最後の10ページ" + +#: ../../tutorial.rst:335 7ac997363c834974a15fe70f9cf887f3 +msgid "only the odd or only the even pages (for doing double-sided printing)," +msgstr "奇数ページのみまたは偶数ページのみ(両面印刷用)" + +#: ../../tutorial.rst:336 52d03385fca64465abb5a7a68be887b4 +msgid "pages that **do** or **don't** contain a given text," +msgstr "特定のテキストを含むまたは含まないページ" + +#: ../../tutorial.rst:337 312b16ee73bc4053a1765c3727eabba4 +msgid "reverse the page sequence, ..." +msgstr "ページの順序を逆にする" + +#: ../../tutorial.rst:339 0680eca8c5384e47a5c6af617fd6dbd4 +msgid "... whatever you can think of." +msgstr "... など、思いつく限りのことができます。" + +#: ../../tutorial.rst:341 8ffb226602e84b2d8f7b1aaa97cb6721 +msgid "" +"The saved new document will contain links, annotations and bookmarks that" +" are still valid (i.a.w. either pointing to a selected page or to some " +"external resource)." +msgstr "保存された新しいドキュメントには、まだ有効なリンク、注釈、およびブックマークが含まれています(選択したページを指すか、あるいは外部リソースを指すかを問わず)。" + +#: ../../tutorial.rst:343 d0b34cee85ee45e1ab3443c8523a5a4b +msgid "" +":meth:`Document.insert_page` and :meth:`Document.new_page` insert new " +"pages." +msgstr ":meth:`Document.insert_page` および :meth:`Document.new_page` は新しいページを挿入します。" + +#: ../../tutorial.rst:345 f2f4d69bc1a84523881267ef1a381d4f +msgid "" +"Pages themselves can moreover be modified by a range of methods (e.g. " +"page rotation, annotation and link maintenance, text and image " +"insertion)." +msgstr "さらに、ページ自体はさまざまな方法で変更できます(ページの回転、注釈とリンクの維持、テキストと画像の挿入など)" + +#: ../../tutorial.rst:348 92e85309c2254d6bb062cb77b4648dc2 +msgid "Joining and Splitting PDF Documents" +msgstr "PDF文書の結合と分割" + +#: ../../tutorial.rst:350 7b1f14d511a84c488a347bf0238d9901 +msgid "" +"Method :meth:`Document.insert_pdf` copies pages **between different** PDF" +" documents. Here is a simple **joiner** example (*doc1* and *doc2* being " +"opened PDFs)::" +msgstr "" +"メソッド :meth:`Document.insert_pdf` は異なるPDF文書間でページをコピーします。以下は簡単な結合の例です( " +"`doc1` と `doc2` は開かれたPDF文書です)::" + +#: ../../tutorial.rst:355 c79912677faf4c54a26a5189bc4b5b80 +msgid "" +"Here is a snippet that **splits** *doc1*. It creates a new document of " +"its first and its last 10 pages::" +msgstr "以下は、 `doc1` を分割するスニペットです。最初の10ページと最後の10ページを含む新しいドキュメントを作成します。" + +#: ../../tutorial.rst:362 b3a608ade2a3470186e036c291477c96 +msgid "" +"More can be found in the :ref:`Document` chapter. Also have a look at " +"`PDFjoiner.py`_." +msgstr ":ref:`Document` の章にはさらに詳細が記載されています。また、 `PDFjoiner.py`_ も確認してください。" + +#: ../../tutorial.rst:365 02b0c1a5f04b4dae8aa27371760eead5 +msgid "Embedding Data" +msgstr "データの埋め込み" + +#: ../../tutorial.rst:367 b28d4ebda3a14d069e6534c239bf2dac +msgid "" +"PDFs can be used as containers for arbitrary data (executables, other " +"PDFs, text or binary files, etc.) much like ZIP archives." +msgstr "PDFはZIPアーカイブのように、任意のデータ(実行可能ファイル、他のPDF、テキストファイル、バイナリファイルなど)をコンテナとして使用できます。" + +#: ../../tutorial.rst:369 04c4033a230c46ceab62df3ef213205b +msgid "" +"PyMuPDF fully supports this feature via :ref:`Document` *embfile_** " +"methods and attributes. For some detail read :ref:`Appendix 3`, consult " +"the Wiki on `dealing with embedding files`_, or the example scripts " +"`embedded-copy.py`_, `embedded-export.py`_, `embedded-import.py`_, and " +"`embedded-list.py`_." +msgstr "" +"PyMuPDFは :ref:`Document` *embfile_** " +"メソッドと属性を介して、この機能を完全にサポートしています。詳細については、 :ref:`Appendix3` " +"を読むか、ファイルの埋め込みに対処するためのウィキを参照してください。また、exampleスクリプトの `embedded-copy.py`_ " +"、`embedded-export.py`_ 、`embedded-import.py`_ 、および `embedded-list.py`_ " +"も参考になるでしょう。" + +#: ../../tutorial.rst:373 6c50eafa9b144cf3b3aeb36f946eed07 +msgid "Saving" +msgstr "保存" + +#: ../../tutorial.rst:375 1b094625b4f6439996e7f95040640fc7 +msgid "" +"As mentioned above, :meth:`Document.save` will **always** save the " +"document in its current state." +msgstr "前述のように、 :meth:`Document.save` は常にドキュメントを現在の状態で保存します。" + +#: ../../tutorial.rst:377 3884d37d66e24f51a21bf2c8d54682a0 +msgid "" +"You can write changes back to the **original PDF** by specifying option " +"``incremental=True``. This process is (usually) **extremely fast**, since " +"changes are **appended to the original file** without completely " +"rewriting it." +msgstr "" +"オプション `incremental=True` " +"を指定することで、変更を元のPDFに書き戻すことができます。このプロセスは(通常)非常に高速です。変更は元のファイルに追加されるため、完全に書き直す必要がありません。" + +#: ../../tutorial.rst:379 d82a59f0f1444f528e7b0b1e41005091 +msgid "" +":meth:`Document.save` options correspond to options of MuPDF's command " +"line utility *mutool clean*, see the following table." +msgstr "" +":meth:`Document.save` のオプションは、MuPDFのコマンドラインユーティリティmutool " +"cleanのオプションと対応しています。以下の表を参照してください。" + +#: ../../tutorial.rst:382 a2719cded81243c89f81bd304431e1f0 +msgid "**Save Option**" +msgstr "**保存オプション**" + +#: ../../tutorial.rst:382 b9882b9da2064022b9a963dd902e4653 +msgid "**mutool**" +msgstr "" + +#: ../../tutorial.rst:382 881334199d6f47a0adc238b7051b91c1 +msgid "**Effect**" +msgstr "**効果**" + +#: ../../tutorial.rst:384 c2182e3340414c48bab7b2e765f85e93 +msgid "garbage=1" +msgstr "" + +#: ../../tutorial.rst:384 dc388d3599fb43f999961f8e2fc8eb8e +msgid "g" +msgstr "" + +#: ../../tutorial.rst:384 b34589850ac84fe1b67edee04343ac0d +msgid "garbage collect unused objects" +msgstr "未使用のオブジェクトをガベージコレクションします" + +#: ../../tutorial.rst:385 92070a3202d74b5db0e7662c386310c9 +msgid "garbage=2" +msgstr "" + +#: ../../tutorial.rst:385 95a46fcf657b4a4991e56eb9755ba000 +msgid "gg" +msgstr "" + +#: ../../tutorial.rst:385 62e16e074c2448d8bc155dfdf6afdda5 +msgid "in addition to 1, compact :data:`xref` tables" +msgstr "1に加えて、xrefテーブルをコンパクトにします" + +#: ../../tutorial.rst:386 77f025b11a83409b84f2eee80fad402d +msgid "garbage=3" +msgstr "" + +#: ../../tutorial.rst:386 504485e1d6cb4d1d8786e62f8faa978e +msgid "ggg" +msgstr "" + +#: ../../tutorial.rst:386 c3e117b3d4be42728175ceb24ebbb432 +msgid "in addition to 2, merge duplicate objects" +msgstr "2に加えて、重複したオブジェクトをマージします" + +#: ../../tutorial.rst:387 386d21dd828449d48e07ec9006e3324c +msgid "garbage=4" +msgstr "" + +#: ../../tutorial.rst:387 1778e2d92f1945379bbbaaeb6d1d0a3f +msgid "gggg" +msgstr "" + +#: ../../tutorial.rst:387 2fb520a34ee84f6c917f2f946463f294 +msgid "in addition to 3, merge duplicate stream content" +msgstr "3に加えて、重複したストリームの内容をマージします" + +#: ../../tutorial.rst:388 057be323cb4046688e8d955e27729089 +msgid "clean=True" +msgstr "" + +#: ../../tutorial.rst:388 8bcf19b7dc0d4b4eafad5f7a4c84c22d +msgid "cs" +msgstr "" + +#: ../../tutorial.rst:388 68e84f8fcc4e4051be0ea61e6b72951c +msgid "clean and sanitize content streams" +msgstr "コンテンツストリームをクリーンアップしてサニタイズします" + +#: ../../tutorial.rst:389 bcb99a6d5341449f80aaa25be82c209c +msgid "deflate=True" +msgstr "" + +#: ../../tutorial.rst:389 f027ea4ce13144049bfd333a3e9a666f +msgid "z" +msgstr "" + +#: ../../tutorial.rst:389 6f2e9b1158804573acb4d20e2daca889 +msgid "deflate uncompressed streams" +msgstr "非圧縮のストリームをdeflate圧縮します" + +#: ../../tutorial.rst:390 53905bc3eb97437e8911ddbc5d100554 +msgid "deflate_images=True" +msgstr "" + +#: ../../tutorial.rst:390 34acaac300444bf08486a3959b81dece +msgid "i" +msgstr "" + +#: ../../tutorial.rst:390 9b6809eb583e495cb5a8a322074180f8 +msgid "deflate image streams" +msgstr "画像ストリームをdeflate圧縮します" + +#: ../../tutorial.rst:391 72d3e5a396dd4fb29d03b762a58b906a +msgid "deflate_fonts=True" +msgstr "" + +#: ../../tutorial.rst:391 a4e6652a6a14408f8335a8fe83b9bf96 +msgid "f" +msgstr "" + +#: ../../tutorial.rst:391 6ee5c61d2867436ba3dcf2f8ef83ab86 +msgid "deflate fontfile streams" +msgstr "フォントファイルストリームをdeflate圧縮します" + +#: ../../tutorial.rst:392 7a28d660d2b04da9badd9d56176db271 +msgid "ascii=True" +msgstr "" + +#: ../../tutorial.rst:392 15cdc88f7d8b45d2913cdac93cb0abba +msgid "a" +msgstr "" + +#: ../../tutorial.rst:392 3c7d9fd90dab4253a99aa01e0a6a51ff +msgid "convert binary data to ASCII format" +msgstr "バイナリデータをASCII形式に変換します" + +#: ../../tutorial.rst:393 76e5877279f846468b87ea5fcba8cb52 +msgid "linear=True" +msgstr "" + +#: ../../tutorial.rst:393 ef0deabe1236445f823821ccb49065c9 +msgid "l" +msgstr "" + +#: ../../tutorial.rst:393 5d6bef23e4d5440fa6c61fdaedc7e1e3 +msgid "create a linearized version" +msgstr "線形化バージョンを作成します" + +#: ../../tutorial.rst:394 564a5f2120f64c54aa1efdd8dded2a0d +msgid "expand=True" +msgstr "" + +#: ../../tutorial.rst:394 70de347592a0460bbfd4e06cdd95a0d4 +msgid "d" +msgstr "" + +#: ../../tutorial.rst:394 a00de27f23244fc49535777b88b9fc4a +msgid "decompress all streams" +msgstr "すべてのストリームを解凍します" + +#: ../../tutorial.rst:397 6c7e230d21b34ca7a5f0c7de6b5d5187 +msgid "" +"For an explanation of terms like *object, stream, xref* consult the " +":ref:`Glossary` chapter." +msgstr "オブジェクト、ストリーム、xrefなどの用語の詳細については、 :ref:`Glossary` の章を参照してください。" + +#: ../../tutorial.rst:399 dc0df99a528d4978997189a285ab567f +msgid "" +"For example, ``mutool clean -ggggz file.pdf`` yields excellent compression " +"results. It corresponds to ``doc.save(filename, garbage=4, deflate=True)``." +msgstr "" +"例えば、 `mutool clean -ggggz file.pdf` は優れた圧縮結果をもたらします。これは " +"`doc.save(filename, garbage=4, deflate=True)` に対応しています。" + +#: ../../tutorial.rst:402 77dd1b8c5ae8491eadd5ee69c5c969ca +msgid "Closing" +msgstr "クローズ" + +#: ../../tutorial.rst:403 f54a1c4a6b194905adcc2de9c957a901 +msgid "" +"It is often desirable to \"close\" a document to relinquish control of " +"the underlying file to the OS, while your program continues." +msgstr "プログラムが継続する間に、基になるファイルの制御をOSに戻すために、ドキュメントを「クローズ」することがしばしば望まれます。" + +#: ../../tutorial.rst:405 aba7fafb6d144278a587237e62031f63 +msgid "" +"This can be achieved by the :meth:`Document.close` method. Apart from " +"closing the underlying file, buffer areas associated with the document " +"will be freed." +msgstr "" +"これは :meth:`Document.close` " +"メソッドによって実現できます。基になるファイルをクローズするだけでなく、ドキュメントに関連するバッファ領域も解放されます。" + +#: ../../tutorial.rst:408 ef36cad35b2b43b3b4e6a75a3cad4061 +msgid "Further Reading" +msgstr "さらなる情報" + +#: ../../tutorial.rst:409 413e5c22ce7b4e7c946d398ff40ff7bc +msgid "" +"Also have a look at PyMuPDF's `Wiki`_ pages. Especially those named in " +"the sidebar under title **\"Recipes\"** cover over 15 topics written in " +"\"How-To\" style." +msgstr "" +"また、PyMuPDFの `Wiki`_ " +"ページもご覧ください。特に、サイドバーのタイトル「Recipes」の下に名前が挙げられているものは、15以上のトピックが「How-" +"To」スタイルで書かれています。" + +#: ../../tutorial.rst:411 cc93bd599e14470998e742dc7e454487 +msgid "" +"This document also contains a :ref:`FAQ`. This chapter has close " +"connection to the aforementioned recipes, and it will be extended with " +"more content over time." +msgstr ":ref:`Recipes: Table of Contents` をご覧ください。" + +#: ../../tutorial.rst:418 cb8fc0b5b1c848168a96624e65dd29fc +msgid "Footnotes" +msgstr "脚注" + +#: ../../tutorial.rst:419 b94228407b8545b2a40f21dedce5ff06 +msgid "" +"PyMuPDF lets you also open several image file types just like normal " +"documents. See section :ref:`ImageFiles` in chapter :ref:`Pixmap` for " +"more comments." +msgstr "" +"PyMuPDFは通常のドキュメントと同様に、いくつかの画像ファイル形式も開くことができます。詳細は、章 :ref:`Pixmap` のセクション " +":ref:`ImageFiles` を参照してください。" + +#: ../../tutorial.rst:421 0530b8abe40b49d793ce8d1b1741140a +msgid "" +":meth:`Page.get_text` is a convenience wrapper for several methods of " +"another PyMuPDF class, :ref:`TextPage`. The names of these methods " +"correspond to the argument string passed to :meth:`Page.get_text` \\: " +"*Page.get_text(\"dict\")* is equivalent to *TextPage.extractDICT()* \\." +msgstr "" +":meth:`Page.get_text` は、別のPyMuPDFクラス、:ref:`TextPage` " +"のいくつかのメソッドの便利なラッパーです。これらのメソッドの名前は、 :meth:`Page.get_text` " +"に渡された引数文字列に対応しています。例えば、 `Page.get_text(“dict”)` は " +"`TextPage.extractDICT()` と同等です。" + +#: ../../tutorial.rst:423 83003f91b1bd4cb183918e279dd80e30 +msgid "" +"\"Sequences\" are Python objects conforming to the sequence protocol. " +"These objects implement a method named *__getitem__()*. Best known " +"examples are Python tuples and lists. But *array.array*, *numpy.array* " +"and PyMuPDF's \"geometry\" objects (:ref:`Algebra`) are sequences, too. " +"Refer to :ref:`SequenceTypes` for details." +msgstr "\"シーケンス\"とは、シーケンスプロトコルに準拠したPythonオブジェクトのことを指します。これらのオブジェクトは、getitem()という名前のメソッドを実装しています。よく知られた例としては、Pythonのタプルやリストがあります。しかし、array.array、numpy.array、およびPyMuPDFの「geometry」オブジェクト(幾何オブジェクトのためのオペレーター代数)もシーケンスです。詳細については、「PyMuPDFにおけるPythonシーケンスの引数の使用」を参照してください" + +#: ../../footer.rst:60 2332c1f552f94af7ba9abc4470a1da83 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ "Importing |PyMuPDF| as `fitz` still " +#~ "works however |PyMuPDF| **cannot coexist** " +#~ "with other packages named \"fitz\" in" +#~ " the same Python environment." +#~ msgstr "したがって、PyMuPDFは同じPython環境で「fitz」という名前のパッケージと共存することはできません" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/vars.mo b/docs/locales/ja/LC_MESSAGES/vars.mo new file mode 100644 index 000000000..e2354943e Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/vars.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/vars.po b/docs/locales/ja/LC_MESSAGES/vars.po new file mode 100644 index 000000000..04e054543 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/vars.po @@ -0,0 +1,882 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 fe9b5e25292345dabfb497bb19edb766 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 d2f8f08d7c614fd4a24526338f98cf81 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 b21de3ab3c9c435c854c3d2c35b872eb +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../vars.rst:5 1030466fe621484abb0ed898e9bd858e +msgid "Constants and Enumerations" +msgstr "定数と列挙型" + +#: ../../vars.rst:6 7b9e2d297c744f9896459d857124f8fe +#, fuzzy +msgid "" +"Constants and enumerations of :title:`MuPDF` as implemented by |PyMuPDF|." +" Each of the following values is accessible as `pymupdf.value`." +msgstr "PyMuPDFによって実装されたMuPDFの定数と列挙型。以下の各変数は、 *pymupdf.variable* としてアクセスできます。" + +#: ../../vars.rst:10 93b7c9094c1a4a71859329ac3585422e +msgid "Constants" +msgstr "定数" + +#: ../../vars.rst:14 7fb75480ccc840dfaa9e63651abc6c59 +msgid "Predefined Python list of valid :ref:`Base-14-Fonts`." +msgstr "PDFベース14フォントの事前定義されたPythonリスト。" + +#: ../../vars.rst 09c3b969b61646df8d91f86cb079ea60 +#: 10a6d73acab042d7bb131e133536db78 253394944f67424fb6f134c03c18972a +#: 295204de5ddd4acf94e9337133b6ab41 33b601da845b497abfda322b0e34d746 +#: 3d1a316a7eca42a09333ceae6757f67c 428bd79aa9784b51a67935ca4691fe71 +#: 46d1381f7d3444c7bd238119b586a4bf 6866fb6bcf9546da8f1a70dd2f70d55b +#: 6960f8df76ba4eb5a0b01f460d59356d 733a409d043344b5bd9e45771490c344 +#: 74e0236c1bf6493591c4237cf0483e57 8bab110d76ed4c47b11bb57ba1f80010 +#: 9ca5944592834b6495adbddf9e94d1de a09525b0a9b243989d16120571d0edc2 +#: a0ab45345d2d464dbc0138a582ba3cff bc579d3796e14cd99e7bd462936b9388 +#: c98d57fd103f4a13848a71827bda04aa d5b4892f3f1f415b8c2c31ae4268884c +#: d5cd42a0da88418d8c02c0c74ab59e6c dd0c5824dd7348748ee402755d931f64 +#: de9a3ce202f14edab37dc8e0288bc4e5 e540535dea624014bcefd48f86582d30 +#: f3c91524c9c7416ba8088fe263a3aa53 f6ec6d751f8d4b1eb04285b8b8cf1721 +#: f888476feadf4b77b29835b2400cbc94 +msgid "type" +msgstr "" + +#: ../../vars.rst:16 01a14e397bf64b1ca8867ae79611a594 +msgid "list" +msgstr "" + +#: ../../vars.rst:20 144b32ec81dd44a49ce81c0e827d05dd +msgid "Predefined RGB colorspace *pymupdf.Colorspace(pymupdf.CS_RGB)*." +msgstr "事前定義されたRGBカラースペース *pymupdf.Colorspace(pymupdf.CS_RGB)*。" + +#: ../../vars.rst:22 ../../vars.rst:28 ../../vars.rst:34 +#: 2ae706b11a0e42ec85227aca8df8b5da 2b900439e2dd42c48650b9c926a25ad2 +#: 6b0f05140f54460cbdeaf5ae880c66c2 +msgid ":ref:`Colorspace`" +msgstr ":ref:`Colorspace`" + +#: ../../vars.rst:26 453c0e5549bb436eabf89d23f4a39e20 +msgid "Predefined GRAY colorspace *pymupdf.Colorspace(pymupdf.CS_GRAY)*." +msgstr "事前定義されたGRAYカラースペース *pymupdf.Colorspace(pymupdf.CS_GRAY)*。" + +#: ../../vars.rst:32 f28115ecc14f4b3fa020ef83ae3de01f +msgid "Predefined CMYK colorspace *pymupdf.Colorspace(pymupdf.CS_CMYK)*." +msgstr "事前定義されたCMYKカラースペース *pymupdf.Colorspace(pymupdf.CS_CMYK)*。" + +#: ../../vars.rst:38 8644c9c449e64f5a83f23be98df5e837 +msgid "1 -- Type of :ref:`Colorspace` is RGBA" +msgstr "1 – :ref:`Colorspace` のタイプはRGBA" + +#: ../../vars.rst:40 ../../vars.rst:46 ../../vars.rst:52 ../../vars.rst:326 +#: ../../vars.rst:332 ../../vars.rst:341 ../../vars.rst:350 ../../vars.rst:356 +#: ../../vars.rst:362 09af83007b7d4e23b12cbc19243303f2 +#: 2edaf272dfa54f8fbc3adf2750b339ea 5c5e36e69bad471783a3dd49d330bd5e +#: 86f5471ae7d1470493fa5da69e5d6aa5 8d8542dcb26c46b9bb6086f15e992bbd +#: a44f189f2ecb4a258d02694763689f54 d835a815ac894c25927034021fda246a +#: dead95221beb45bf8aa3e244ec5e899d f20c657e0b534b5d9a2e87c1a21bb576 +msgid "int" +msgstr "" + +#: ../../vars.rst:44 dea4263081ba4994a2eeaa9d81eee74d +msgid "2 -- Type of :ref:`Colorspace` is GRAY" +msgstr "2 – :ref:`Colorspace` のタイプはGRAY" + +#: ../../vars.rst:50 d166778de3d24a18a747305e3d8fcb20 +msgid "3 -- Type of :ref:`Colorspace` is CMYK" +msgstr "3 – :ref:`Colorspace` のタイプはCMYK" + +#: ../../vars.rst:56 c49bb5d14a654854813f83d5439881cc +msgid "'x.xx.x' -- MuPDF version that is being used by PyMuPDF." +msgstr "" + +#: ../../vars.rst:58 ../../vars.rst:70 ../../vars.rst:82 +#: 21a24e7f678c41689906f6f3a44c4d9c 894451110d954f70b83b9c086ee80d49 +#: 956c3e03532b48a1b2d48a54f23ce584 +msgid "string" +msgstr "" + +#: ../../vars.rst:62 d7f7bbe56b91414984ad7b0319521c10 +msgid "MuPDF version as a tuple of integers, `(major, minor, patch)`." +msgstr "" + +#: ../../vars.rst:64 ../../vars.rst:76 ../../vars.rst:88 +#: 207f5eb93ed945aba7806632b6423da7 3f43e06044e444069fd8df47e546539c +#: 982cdc3ad1c54084ae82883fdf9be7c1 +msgid "tuple" +msgstr "" + +#: ../../vars.rst:68 329db1806650462baee132494f489e4f +#, fuzzy +msgid "'x.xx.x' -- PyMuPDF version." +msgstr "‘x.xxx’ – MuPDFのバージョン" + +#: ../../vars.rst:74 c124efc4e65240eb9be8b289a3d5b4e9 +msgid "PyMuPDF version as a tuple of integers, `(major, minor, patch)`." +msgstr "" + +#: ../../vars.rst:80 7f0fa59a351b4a1a86275db5260bb276 +msgid "ISO timestamp *YYYY-MM-DD HH:MM:SS* when these bindings were built." +msgstr "これらのバインディングがビルドされたISOタイムスタンプ YYYY-MM-DD HH:MM:SS。" + +#: ../../vars.rst:86 fbd22c55345c4f83935292c0f151c564 +#, fuzzy +msgid "" +"(pymupdf_version, mupdf_version, timestamp) -- combined version " +"information where `timestamp` is the generation point in time formatted " +"as \"YYYYMMDDhhmmss\"." +msgstr "" +"(VersionBind、VersionFitz、タイムスタンプ) – " +"タイムスタンプは「YYYYMMDDhhmmss」という形式で表される、生成時点の時間情報を結合したバージョン情報。" + +#: ../../vars.rst:92 ../../vars.rst:100 892fe0288aeb4e99b8b0d3600184ef2b +#: dfd9229f0ee444b3bdb04a3c04df53ae +msgid "Legacy equivalent to `mupdf_version`." +msgstr "" + +#: ../../vars.rst:96 3cd10f8741a94714b233fcdba42963ca +msgid "Legacy equivalent to `pymupdf_version`." +msgstr "" + +#: ../../vars.rst:106 7733f01a712e474a8dfadb43f1b97b26 +msgid "Document Permissions" +msgstr "ドキュメントの許可" + +#: ../../vars.rst:109 ../../vars.rst:127 ../../vars.rst:140 +#: 23c7da2ac88d4e8aa9d426ebb8cc564e 9b8d302385fe4503b9e770cfe5869d69 +#: aa709efa1b7e4ee2ba21a6a73f45b034 +msgid "Code" +msgstr "コード" + +#: ../../vars.rst:109 b9a1feca1e2247dd8fb3eb8bdf0f54a0 +msgid "Permitted Action" +msgstr "許可されたアクション" + +#: ../../vars.rst:111 3dd46d71a51a493b8aff660ddb32b6b7 +msgid "PDF_PERM_PRINT" +msgstr "" + +#: ../../vars.rst:111 409c61f885d4482f9a980d219762cb7d +msgid "Print the document" +msgstr "文書を印刷する" + +#: ../../vars.rst:112 6796bebae6f6460fb93342edb0f62341 +msgid "PDF_PERM_MODIFY" +msgstr "" + +#: ../../vars.rst:112 fd898736e37d4f528240b29014abd0e0 +msgid "Modify the document's contents" +msgstr "文書の内容を変更する" + +#: ../../vars.rst:113 d8a4d790db384119ac0305c207be337b +msgid "PDF_PERM_COPY" +msgstr "" + +#: ../../vars.rst:113 f7817d26bfdc4d14b265df25a694076a +msgid "Copy or otherwise extract text and graphics" +msgstr "テキストやグラフィックスをコピーしたりその他の抽出を行う" + +#: ../../vars.rst:114 8a02ac6b9a4d4399a2940a96e311a80b +msgid "PDF_PERM_ANNOTATE" +msgstr "" + +#: ../../vars.rst:114 4ca81a28be8448ce9469dde104486304 +msgid "Add or modify text annotations and interactive form fields" +msgstr "テキスト注釈やインタラクティブなフォームフィールドを追加または変更する" + +#: ../../vars.rst:115 8d256680b5a64635a6b4f5aa56432961 +msgid "PDF_PERM_FORM" +msgstr "" + +#: ../../vars.rst:115 257e185d25414ac6b8f8849455d459be +msgid "Fill in forms and sign the document" +msgstr "フォームに記入し、文書に署名する" + +#: ../../vars.rst:116 dab26bdc5a2c41a2825ced24a564d464 +msgid "PDF_PERM_ACCESSIBILITY" +msgstr "" + +#: ../../vars.rst:116 c175209ea21a457db4ac47920318e63e +msgid "Obsolete, always permitted" +msgstr "廃止されましたが、常に許可されています" + +#: ../../vars.rst:117 204559ad227f46f79d769d00108ab587 +msgid "PDF_PERM_ASSEMBLE" +msgstr "" + +#: ../../vars.rst:117 69c2e8a6caf2466396c7936e95acfde0 +msgid "Insert, rotate, or delete pages, bookmarks, thumbnail images" +msgstr "ページの挿入、回転、削除、ブックマーク、サムネイル画像の操作" + +#: ../../vars.rst:118 374d712b7da44ed195cd4c290e1c2716 +msgid "PDF_PERM_PRINT_HQ" +msgstr "" + +#: ../../vars.rst:118 7bd08c2438f7404c9edb7918cfa9dde4 +msgid "High quality printing" +msgstr "高品質印刷" + +#: ../../vars.rst:124 44c375dc62c84ea4aa0f893f7428fc97 +msgid "PDF Optional Content Codes" +msgstr "PDFオプショナルコンテンツコード" + +#: ../../vars.rst:127 ../../vars.rst:140 1f12957ffd304f15be7b4562fbb70af8 +#: 9fcfaed6b5d44c55a6666f7af39b6e3e +msgid "Meaning" +msgstr "意味" + +#: ../../vars.rst:129 2949b53ec89740089745a91a86f8f8bb +msgid "PDF_OC_ON" +msgstr "" + +#: ../../vars.rst:129 04bedb33af514509b6c30bedbe47b114 +msgid "Set an OCG to ON temporarily" +msgstr "一時的にOCGをONに設定します" + +#: ../../vars.rst:130 4c80348e5c3a4f648165764918eceaec +msgid "PDF_OC_TOGGLE" +msgstr "" + +#: ../../vars.rst:130 146b94ce0c5d4052b94ea728b7cf05cf +msgid "Toggle OCG status temporarily" +msgstr "OCGステータスを一時的に切り替えます" + +#: ../../vars.rst:131 88b629a7c41f4b9ba8ef72dfd6335841 +msgid "PDF_OC_OFF" +msgstr "" + +#: ../../vars.rst:131 977b6700e0a84fafa4bed9ea9115e2e3 +msgid "Set an OCG to OFF temporarily" +msgstr "一時的にOCGをOFFに設定します" + +#: ../../vars.rst:137 75fa410a0f1e43819577edc2279b177d +msgid "PDF encryption method codes" +msgstr "PDF暗号化方式コード" + +#: ../../vars.rst:142 0dc98fa47aeb477c850a169dfbc49787 +msgid "PDF_ENCRYPT_KEEP" +msgstr "" + +#: ../../vars.rst:142 ad93d01ac8cd40b48e540af48cae695a +msgid "do not change" +msgstr "変更しない" + +#: ../../vars.rst:143 e793fb9b2a3b451e80fd1cf718ac0a73 +msgid "PDF_ENCRYPT_NONE" +msgstr "" + +#: ../../vars.rst:143 1aa3620babfa401c97204c155b2d5897 +msgid "remove any encryption" +msgstr "暗号化を解除する" + +#: ../../vars.rst:144 a60217b47a0845d1a92460e1d91f56ec +msgid "PDF_ENCRYPT_RC4_40" +msgstr "" + +#: ../../vars.rst:144 6f4cdbdd522d462c947445aab0ce9afb +msgid "RC4 40 bit" +msgstr "RC4 40ビット" + +#: ../../vars.rst:145 16f6da906d544174ad7481a53cad1c77 +msgid "PDF_ENCRYPT_RC4_128" +msgstr "" + +#: ../../vars.rst:145 8d40afdfea4f45af888a67cced393b08 +msgid "RC4 128 bit" +msgstr "RC4 128ビット" + +#: ../../vars.rst:146 668eb7572ef0467f85299de98db772f9 +msgid "PDF_ENCRYPT_AES_128" +msgstr "" + +#: ../../vars.rst:146 0dead0f7d85343cf8806bf240e3be702 +msgid "*Advanced Encryption Standard* 128 bit" +msgstr "*Advanced Encryption Standard* 128ビット" + +#: ../../vars.rst:147 eb827f08580f41dfa6884be021ca8bfa +msgid "PDF_ENCRYPT_AES_256" +msgstr "" + +#: ../../vars.rst:147 86463cfed631417d97de185e6ede3875 +msgid "*Advanced Encryption Standard* 256 bit" +msgstr "*Advanced Encryption Standard* 256ビット" + +#: ../../vars.rst:148 7cc2750771844a20af313213e12466b9 +msgid "PDF_ENCRYPT_UNKNOWN" +msgstr "" + +#: ../../vars.rst:148 ecdf191ea39e4dcea3a7b49000707ca4 +msgid "unknown" +msgstr "不明" + +#: ../../vars.rst:154 9ba92b1cac044752afe8a086846edde4 +msgid "Font File Extensions" +msgstr "フォントファイルの拡張子" + +#: ../../vars.rst:155 d05c8692b98444f19d977acf4654e2c5 +msgid "" +"The table show file extensions you should use when saving fontfile " +"buffers extracted from a PDF. This string is returned by " +":meth:`Document.get_page_fonts`, :meth:`Page.get_fonts` and " +":meth:`Document.extract_font`." +msgstr "" +"このテーブルは、PDFから抽出されたフォントファイルバッファを保存する際に使用すべきファイル拡張子を示しています。この文字列は、:meth:`Document.get_page_fonts`、:meth:`Page.get_fonts`、および" +" :meth:`Document.extract_font` によって返されます。" + +#: ../../vars.rst:158 10036336cad34de79b475eedb0d89264 +msgid "Ext" +msgstr "拡張子" + +#: ../../vars.rst:158 286a37d16191417fa1ec3d30013b9905 +msgid "Description" +msgstr "説明" + +#: ../../vars.rst:160 2ec4cb1ec4a14555b74fbfed3290a14d +msgid "ttf" +msgstr "" + +#: ../../vars.rst:160 9a3d8a2bbdb74e3c9321c00f8fbb00ce +msgid "TrueType font" +msgstr "TrueTypeフォント" + +#: ../../vars.rst:161 d5bcadec8ad74c0493a5485e2c1cb3c3 +msgid "pfa" +msgstr "" + +#: ../../vars.rst:161 e0e30249099043e2a8a2d092ce908af5 +msgid "Postscript for ASCII font (various subtypes)" +msgstr "ASCII用のPostscriptフォント(さまざまなサブタイプ)" + +#: ../../vars.rst:162 dce1949516af496a88009678fa1bdbb6 +msgid "cff" +msgstr "" + +#: ../../vars.rst:162 6a979379a8bb493d9c0a471fb9948665 +msgid "Type1C font (compressed font equivalent to Type1)" +msgstr "Type1Cフォント(Type1と同等の圧縮フォント)" + +#: ../../vars.rst:163 bf67e41376a74b109bebe33cb2fa217a +msgid "cid" +msgstr "" + +#: ../../vars.rst:163 9630223a534d4703ac88c9eac2b049f1 +msgid "character identifier font (postscript format)" +msgstr "文字識別子フォント(Postscript形式)" + +#: ../../vars.rst:164 f6f324ea27cb4c1a9d9fb71e4ff86548 +msgid "otf" +msgstr "" + +#: ../../vars.rst:164 bbc8200b8cc241cf9232e2d7e9397e14 +msgid "OpenType font" +msgstr "OpenTypeフォント" + +#: ../../vars.rst:165 4fa13207e2c4485b9ee155461144bc1b +msgid "n/a" +msgstr "" + +#: ../../vars.rst:165 5bafd17a3f0841efb18a431d74469745 +msgid "not extractable, e.g. :ref:`Base-14-Fonts`, Type 3 fonts and others" +msgstr "抽出できない、 :ref:`PDFベース14フォント ` 、Type 3フォント、その他" + +#: ../../vars.rst:171 ec7137b3e4b84b3eb6805e102f418a3e +msgid "Text Alignment" +msgstr "テキストの配置" + +#: ../../vars.rst:174 3bbfd7ec062e46429c9b740758e870ff +msgid "0 -- align left." +msgstr "0 – 左揃え。" + +#: ../../vars.rst:178 7b46923a6c57440b9419e91d2a6b6c57 +msgid "1 -- align center." +msgstr "1 – 中央揃え。" + +#: ../../vars.rst:182 55454a7de8fa477582234dab9711284e +msgid "2 -- align right." +msgstr "2 – 右揃え。" + +#: ../../vars.rst:186 caeb62fad8aa4f2dba31742445127af1 +msgid "3 -- align justify." +msgstr "3 – 両端揃え。" + +#: ../../vars.rst:193 f2d396fbbf8241b2bbb96e710f39179c +msgid "Font Properties" +msgstr "" + +#: ../../vars.rst:194 79137841087d4b71b3618b71d459d012 +msgid "" +"Please note that the following bits are derived from what a font has to " +"say about its properties. It may not be (and quite often is not) correct." +msgstr "" + +#: ../../vars.rst:198 99be8995e30b46a8897ade1a103ce5a8 +msgid "" +"1 -- the character or span is a superscript. This property is computed by" +" MuPDF and not part of any font information." +msgstr "" + +#: ../../vars.rst:202 ac50ee48c40445d88e3170a486932cbe +msgid "2 -- the font is italic." +msgstr "" + +#: ../../vars.rst:206 a35ac9ea04ae469a817d3e6356c9edde +msgid "4 -- the font is serifed." +msgstr "" + +#: ../../vars.rst:210 5e36edef7a9a4061876b61d1552ad213 +msgid "8 -- the font is mono-spaced." +msgstr "" + +#: ../../vars.rst:214 793a358252de4e72b691114a0476651d +msgid "16 -- the font is bold." +msgstr "" + +#: ../../vars.rst:217 c7c29302bc3847fdb257f3abff34d9d9 +msgid "Text Extraction Flags" +msgstr "テキスト抽出フラグ" + +#: ../../vars.rst:218 f83abf5bbd40402890f0db614be6f339 +#, fuzzy +msgid "" +"Option bits controlling the amount of data, that are parsed into a " +":ref:`TextPage`." +msgstr ":ref:`TextPage` に解析されるデータの量を制御するオプションビット - このクラスは、主にPyMuPDF内部でのみ使用されます。" + +#: ../../vars.rst:220 43f62f5edc5644ef91134286b5ddbe14 +#, fuzzy +msgid "" +"For the PyMuPDF programmer, some combination (using Python's `|` " +"operator, or simply use `+`) of these values are aggregated in the " +"``flags`` integer, a parameter of all text search and text extraction " +"methods. Depending on the individual method, different default " +"combinations of the values are used. Please use a value that meets your " +"situation. Especially make sure to switch off image extraction unless you" +" really need them. The impact on performance and memory is significant!" +msgstr "" +"PyMuPDFプログラマーにとって、これらの値のいくつかの組み合わせ(Pythonの`|`演算子を使用するか、単純に`+`を使用する)が、すべてのテキスト検索およびテキスト抽出メソッドのパラメータである" +" `flags` " +"整数に集約されます。個々のメソッドによっては、異なる値のデフォルトの組み合わせが使用されます。ご自身の状況に合った値を使用してください。特に、必要がない限り画像抽出をオフにしてください。パフォーマンスとメモリに対する影響が大きいです!" + +#: ../../vars.rst:224 f745b0af916a4865868cb63378d7ed3b +msgid "" +"1 -- If set, ligatures are passed through to the application in their " +"original form. Otherwise ligatures are expanded into their constituent " +"parts, e.g. the ligature \"ffi\" is expanded into three eparate " +"characters f, f and i. Default is \"on\" in PyMuPDF. MuPDF supports the " +"following 7 ligatures: \"ff\", \"fi\", \"fl\", \"ffi\", \"ffl\", , " +"\"ft\", \"st\"." +msgstr "" +"1 – " +"設定されている場合、リガチャは元の形式のままアプリケーションに渡されます。それ以外の場合、リガチャは構成要素に展開されます。例:リガチャ「ffi」は、3つの個別の文字" +" f、f、および i " +"に展開されます。デフォルトはPyMuPDFで「オン」です。MuPDFは以下の7つのリガチャに対応しています:\"ff\"、\"fi\"、\"fl\"、\"ffi\"、\"ffl\"、\"ft\"、\"st\"。" + +#: ../../vars.rst:228 1beb177071964f40a7555dd9dcfb5201 +msgid "" +"2 -- If set, whitespace is passed through. Otherwise any type of " +"horizontal whitespace (including horizontal tabs) will be replaced with " +"space characters of variable width. Default is \"on\" in PyMuPDF." +msgstr "" +"2 – " +"設定されている場合、空白はそのまま渡されます。それ以外の場合、水平空白(水平タブを含む)のいずれかのタイプは可変幅のスペース文字に置き換えられます。デフォルトはPyMuPDFで「オン」です。" + +#: ../../vars.rst:232 68845437b4ce4cc99b2247bd79dcca13 +msgid "" +"4 -- If set, then images will be stored in the :ref:`TextPage`. This " +"causes the presence of (usually large!) binary image content in the " +"output of text extractions of types \"blocks\", \"dict\", \"json\", " +"\"rawdict\", \"rawjson\", \"html\", and \"xhtml\" and is the default " +"there. If used with \"blocks\" however, only image metadata will be " +"returned, not the image itself." +msgstr "" +"4 – 設定されている場合、画像は :ref:`TextPage` " +"に保存されます。これにより、テキスト抽出の出力に(通常は大きな)バイナリ画像コンテンツが含まれることになります。ただし、これはタイプ「blocks」、「dict」、「json」、「rawdict」、「rawjson」、「html」、および「xhtml」のテキスト抽出にのみ適用され、デフォルトです。ただし、「blocks」とともに使用される場合、画像メタデータのみが返され、画像自体は返されません。" + +#: ../../vars.rst:236 0a63f2880d4048bf985562e86a0eb94c +msgid "" +"8 -- If set, Mupdf will not try to add missing space characters where " +"there are large gaps between characters. In PDF, the creator often does " +"not insert spaces to point to the next character's position, but will " +"provide the direct location address. The default in PyMuPDF is \"off\" --" +" so spaces **will be generated**." +msgstr "" +"8 – " +"設定されている場合、Mupdfは文字間の大きな間隔に欠落したスペース文字を追加しようとしません。PDFでは、作成者はしばしば次の文字の位置を指し示すためにスペースを挿入しませんが、直接の場所のアドレスを提供します。PyMuPDFのデフォルトは「オフ」です" +" - したがって、スペースが生成されます。" + +#: ../../vars.rst:240 5cc0718f0ecb4c66aa6a721e7da94c4b +msgid "" +"16 -- Ignore hyphens at line ends and join with next line. Used " +"internally with the text search functions. However, it is generally " +"available: if on, text extractions will return joined text lines (or " +"spans) with the ending hyphen of the first line eliminated. So two " +"separate spans **\"first meth-\"** and **\"od leads to wrong results\"** " +"on different lines will be joined to one span **\"first method leads to " +"wrong results\"** and correspondingly updated bboxes: the characters of " +"the resulting span will no longer have identical y-coordinates." +msgstr "" +"16 – " +"行末のハイフンを無視し、次の行に結合します。テキスト検索関数と内部で使用されます。ただし、一般的に使用できます。ONの場合、テキスト抽出は結合されたテキスト行(またはスパン)を返します。最初の行のハイフンが除去されます。異なる行にある「first" +" meth-」と「od leads to wrong results」の2つの個別のスパンが「first method leads to " +"wrong " +"results」として結合され、それに応じて更新されたバウンディングボックス(bbox):結果のスパンの文字はもはや同じy座標を持ちません。" + +#: ../../vars.rst:244 17848384472243f7919897b9644924c4 +msgid "" +"32 -- Generate a new line for every span. Not used (\"off\") in PyMuPDF, " +"but available for your use. Every line in \"dict\", \"json\", " +"\"rawdict\", \"rawjson\" will contain exactly one span." +msgstr "" +"32 – " +"各スパンに対して新しい行を生成します。PyMuPDFでは使用されませんが(オフです)、使用可能です。\"dict\"、\"json\"、\"rawdict\"、\"rawjson\"の各行には正確に1つのスパンが含まれます。" + +#: ../../vars.rst:248 619c77ac3797401f8f15d48e9a869b8c +#, fuzzy +msgid "" +"64 -- Characters entirely outside a page's **mediabox** or contained in " +"other \"clipped\" areas will be ignored. This is default in PyMuPDF." +msgstr "64 – ページのメディアボックス外にある文字は無視されます。これはPyMuPDFのデフォルトです。" + +#: ../../vars.rst:252 c5bd44858fb643b29c90dcedbc9b97ce +msgid "" +"128 -- Use raw character codes instead of U+FFFD. This is the default for" +" **text extraction** in PyMuPDF. If you **want to detect** when encoding " +"information is missing or uncertain, toggle this flag and scan for the " +"presence of U+FFFD (= `chr(0xfffd)`) code points in the resulting text." +msgstr "" + +#: ../../vars.rst:256 20b2145b5f144c1fb2b16ae5beb54928 +msgid "256 -- Not supported." +msgstr "" + +#: ../../vars.rst:260 550ddd5ded6f4f60848443cd8d82d345 +msgid "" +"512 -- Ignore metric values of all fonts when computing character " +"boundary boxes -- most prominently the `ascender " +"`_ and `descender " +"`_ values. Instead, follow the " +"drawing commands of each character's glyph and compute its rectangle " +"hull. This is the smallest rectangle wrapping all points used for drawing" +" the visual appearance - see the :ref:`Shape` class for understanding the" +" background. This will especially result in individual character heights." +" For instance a (white) space will have a **bbox of height 0** (because " +"nothing is drawn) -- in contrast to the non-zero boundary box generated " +"when using font metrics. This option may be useful to cope with getting " +"meaningful boundary boxes even for fonts containing errors. Its use will " +"slow down text extraction somewhat because of the incurred computational " +"effort." +msgstr "" + +#: ../../vars.rst:262 29fa50a96c4b4750a61482a0361601b6 +msgid "" +"Note that this has no effect by default - one must also disable the " +"global quad corrections setting with " +"`pymupdf.TOOLS.unset_quad_corrections(True)`." +msgstr "" + +#: ../../vars.rst:267 7277865b5b5a42678c9305a006d97704 +msgid "1024 -- Not supported." +msgstr "" + +#: ../../vars.rst:271 4cb1830671184382bbc93ad044143ff6 +msgid "" +"2048 -- Ignore built-in differences between text appearing in e.g. PDF " +"viewers versus text stored in the PDF. See :ref:`AdobeManual`, page 615 " +"for background. If set, the **stored** (\"replacement\" text) is ignored " +"in favor of the displayed text." +msgstr "" + +#: ../../vars.rst:275 20bf6709d2844de080c1d5205cf728c6 +msgid "4096 -- Attempt to segment page into different regions." +msgstr "" + +#: ../../vars.rst:277 59ac067b7e44463d8f0fb19b1f87783b +msgid "" +"The following constants represent the default combinations of the above " +"for text extraction and searching:" +msgstr "以下の定数は、テキスト抽出と検索のための上記のデフォルトの組み合わせを表します:" + +#: ../../vars.rst:281 ../../vars.rst:285 ../../vars.rst:289 ../../vars.rst:309 +#: 380a7c5f0e5d42889c3a5d034281f461 74c462ca4834489b9a0da88c2c82c756 +#: b02c43245274409697af49792303311e b4051b129792462d9c73343baea60d95 +msgid "" +"`TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP " +"| TEXT_USE_CID_FOR_UNKNOWN_UNICODE`" +msgstr "" + +#: ../../vars.rst:293 ../../vars.rst:297 ../../vars.rst:301 ../../vars.rst:305 +#: 213ae84f9e9e4c3a9cf33fe4a858f208 746d8ce37fcf45ac8770b345347b8ee2 +#: c56d9271be584382aa7d77169d65806a ea42055961404c368f89d72e6ff24025 +msgid "" +"`TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP " +"| TEXT_PRESERVE_IMAGES | TEXT_USE_CID_FOR_UNKNOWN_UNICODE`" +msgstr "" + +#: ../../vars.rst:313 b4f425a22ba344cb95039fffc59d16e0 +msgid "`TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_DEHYPHENATE`" +msgstr "" + +#: ../../vars.rst:319 9f1e6c0231844a7995d5c2ac9d74aa18 +msgid "Link Destination Kinds" +msgstr "リンクの目的の種類" + +#: ../../vars.rst:320 52a8f0f028a1481f8143ba4f83fe96a1 +msgid "Possible values of :attr:`linkDest.kind` (link destination kind)." +msgstr ":attr:`linkDest.kind` (リンクの目的の種類)の可能な値。" + +#: ../../vars.rst:324 1f102b8b787148659c55c6aa63d05a45 +msgid "0 -- No destination. Indicates a dummy link." +msgstr "0 – 目的地なし。ダミーリンクを示します。" + +#: ../../vars.rst:330 628b01a5da0243be80ffa1f45fca5b0a +msgid "1 -- Points to a place in this document." +msgstr "1 – このドキュメント内の場所を指します。" + +#: ../../vars.rst:336 55a042b81e0247b692551330eec2ea72 +msgid "" +"2 -- Points to a URI -- typically a resource specified with internet " +"syntax." +msgstr "2 – URIを指します。通常はインターネット構文で指定されたリソースです。" + +#: ../../vars.rst:338 793a6dd92caf4a8f9c0d2f74ad716b30 +msgid "" +"PyMuPDF treats any external link that contains a colon and does not start" +" with `file:`, as `LINK_URI`." +msgstr "PyMuPDFは、コロンを含み、`file:` で始まらない任意の外部リンクを `LINK_URI` として扱います。" + +#: ../../vars.rst:345 153598660d514e7abf50c58200473ef0 +msgid "3 -- Launch (open) another file (of any \"executable\" type)." +msgstr "3 – 別のファイル(任意の「実行可能」タイプ)を開きます。" + +#: ../../vars.rst:347 2572f060a8be469a8f239543eee30634 +msgid "" +"|PyMuPDF| treats any external link that starts with `file:` or doesn't " +"contain a colon, as `LINK_LAUNCH`." +msgstr "PyMuPDFは、 `file:` で始まるかコロンを含まない外部リンクを `LINK_LAUNCH` として扱います。" + +#: ../../vars.rst:354 f99887ce5676409a941f3b146c835e70 +msgid "4 -- points to a named location." +msgstr "4 – 名前付きの場所を指します。" + +#: ../../vars.rst:360 4ca695a9178945d1a5c5edcec9340af9 +msgid "5 -- Points to a place in another PDF document." +msgstr "5 – 別のPDFドキュメント内の場所を指します。" + +#: ../../vars.rst:367 2447ac8f911043d48df9f7b68a833fa6 +msgid "Link Destination Flags" +msgstr "リンクの目的地フラグ" + +#: ../../vars.rst:369 426d4c32933f47529ea795d2cc9dd0d3 +msgid "" +"The rightmost byte of this integer is a bit field, so test the truth of " +"these bits with the *&* operator." +msgstr "この整数の最も右側のバイトはビットフィールドです。したがって、これらのビットの真偽を *&* 演算子でテストします。" + +#: ../../vars.rst:373 5feef71290cd4741ac4e24188b65bc22 +msgid "1 (bit 0) Top left x value is valid" +msgstr "1 (ビット 0) 左上の x 値が有効です" + +#: ../../vars.rst:375 ../../vars.rst:381 ../../vars.rst:387 ../../vars.rst:393 +#: ../../vars.rst:399 ../../vars.rst:405 ../../vars.rst:411 +#: 2fc468e70d634444b3316afec29e5d16 36f729a0d67040c5bddd732cd6807bf7 +#: 6925e6a9be744a508bf4526d1478805e bcddf9b8991c4f158a491f4ee81be612 +#: cc183df801ce45ba8b3c94d535cc0739 dfee08f9f29c45419a637f7814445640 +#: fcac1c72ce4d4d4389dda848e316c624 +msgid "bool" +msgstr "" + +#: ../../vars.rst:379 d090868c69104be492ebdd3ece8c4225 +msgid "2 (bit 1) Top left y value is valid" +msgstr "2 (ビット 1) 左上の y 値が有効です" + +#: ../../vars.rst:385 f20a0948e93043c4be3c860a7aea5b77 +msgid "4 (bit 2) Bottom right x value is valid" +msgstr "4 (ビット 2) 右下の x 値が有効です" + +#: ../../vars.rst:391 418e8b406e464b2a8a09ff48ace76f60 +msgid "8 (bit 3) Bottom right y value is valid" +msgstr "8 (ビット 3) 右下の y 値が有効です" + +#: ../../vars.rst:397 0b96102f9a934d99bffbd50d41626cb5 +msgid "16 (bit 4) Horizontal fit" +msgstr "16 (ビット 4) 水平フィット" + +#: ../../vars.rst:403 ba27d7c3b9944de1b5f9b9ca62246d5a +msgid "32 (bit 5) Vertical fit" +msgstr "32 (ビット 5) 垂直フィット" + +#: ../../vars.rst:409 2f533e61da7243638eeccd17238f192c +msgid "64 (bit 6) Bottom right x is a zoom figure" +msgstr "64 (ビット 6) 右下の x はズーム値です" + +#: ../../vars.rst:415 c55ea7fa1fd643a5b9a9544ebaa50fdb +msgid "Annotation Related Constants" +msgstr "注釈関連の定数" + +#: ../../vars.rst:416 3b0b11d864d442ec822894f7d0ae50fb +msgid "See chapter 8.4.5, pp. 615 of the :ref:`AdobeManual` for details." +msgstr "詳細については、:ref:`Adobe PDFリファレンス ` の第8.4.5章、615ページをご覧ください。" + +#: ../../vars.rst:421 1ecde3a9d7b64b169087ced43b15ad4c +msgid "Annotation Types" +msgstr "アノテーションタイプ" + +#: ../../vars.rst:422 bd5c20d426de46f9976ae12f4432d2f7 +msgid "" +"These identifiers also cover **links** and **widgets**: the PDF " +"specification technically handles them all in the same way, whereas " +"|MuPDF| (and PyMuPDF) treats them as three basically different types of " +"objects." +msgstr "" +"これらの識別子は **リンク** と **ウィジェット** " +"も含みます。PDF仕様では、技術的にはこれらをすべて同じ方法で処理しますが、|MuPDF| " +"(およびPyMuPDF)では、基本的には異なる3つのオブジェクトタイプとして扱われます。" + +#: ../../vars.rst:459 aa4a6bc505df440588c37777df64fd25 +msgid "Annotation Flag Bits" +msgstr "注釈フラグビット" + +#: ../../vars.rst:476 652eb560a8cd4828920bb3e96d1af25f +msgid "Annotation Line Ending Styles" +msgstr "注釈の線の終端スタイル" + +#: ../../vars.rst:492 91a6e109a521435ab0a009b049d52473 +msgid "Widget Constants" +msgstr "ウィジェットの定数" + +#: ../../vars.rst:497 79858be404974621bbdf883a55cdd335 +msgid "Widget Types (*field_type*)" +msgstr "ウィジェットのタイプ(*field_type*)" + +#: ../../vars.rst:510 007cf2a263a5470db51e41cb81182c72 +msgid "Text Widget Subtypes (*text_format*)" +msgstr "テキストウィジェットのサブタイプ(*text_format*)" + +#: ../../vars.rst:521 1b59a595b14a4b53b47ed18e14dd4955 +msgid "Widget flags (*field_flags*)" +msgstr "ウィジェットフラグ(*field_flags*)" + +#: ../../vars.rst:522 ae069f2b2f0142bbadf75a94ae527b29 +msgid "**Common to all field types**::" +msgstr "**すべてのフィールドタイプに共通**:" + +#: ../../vars.rst:528 931fc1b0c6ec4beab0c8defc28393b89 +msgid "**Text widgets**::" +msgstr "**テキストウィジェット**:" + +#: ../../vars.rst:538 10486d9f5a204298babec68cd00a5762 +msgid "**Button widgets**::" +msgstr "**ボタンウィジェット**:" + +#: ../../vars.rst:545 0a8ec85346a94b15bf76bbf26fabeae0 +msgid "**Choice widgets**::" +msgstr "**チョイスウィジェット**:" + +#: ../../vars.rst:558 7073861cbd3241e2b1ab16b85a75612a +msgid "PDF Standard Blend Modes" +msgstr "PDF標準ブレンドモード" + +#: ../../vars.rst:560 6ccb9f3bac5c45a4a9e4d727549d8c65 +msgid "For an explanation see :ref:`AdobeManual`, page 324::" +msgstr "詳細については、Adobe PDFリファレンスのページ324をご覧ください:" + +#: ../../vars.rst:583 8226db5de8bc4cecaae39937d19979a8 +msgid "Stamp Annotation Icons" +msgstr "スタンプ注釈アイコン" + +#: ../../vars.rst:584 ce323f567b174b05a5ee0de5dff013d7 +msgid "MuPDF has defined the following icons for **rubber stamp** annotations::" +msgstr "MuPDFは、ラバースタンプ注釈に次のアイコンを定義しています:" + +#: ../../footer.rst:60 ff648a3da94e4adaa3a122627e288a1e +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ "`TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE " +#~ "| TEXT_MEDIABOX_CLIP`" +#~ msgstr "" + +#~ msgid "" +#~ "`TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE " +#~ "| TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES`" +#~ msgstr "" + +#~ msgid "Return type" +#~ msgstr "戻り値の型" + +#~ msgid "'x.xx.x' -- version of PyMuPDF (these bindings)" +#~ msgstr "‘x.xx.x’ – PyMuPDF(これらのバインディング)のバージョン" + +#~ msgid "" +#~ "The docstring of *fitz* contains " +#~ "information of the above which can " +#~ "be retrieved like so: *print(fitz.__doc__)*," +#~ " and should look like: *PyMuPDF " +#~ "1.10.0: Python bindings for the MuPDF" +#~ " 1.10 library, built on 2016-11-30 " +#~ "13:09:13*." +#~ msgstr "" +#~ "fitzのドキュストリングには、上記の情報が含まれており、次のようにして取得できます: " +#~ "print(fitz.__doc__)。以下のように表示されるべきです:PyMuPDF 1.10.0:MuPDF " +#~ "1.10ライブラリ用のPythonバインディング、ビルド日時2016-11-30 13:09:13。" + +#~ msgid "" +#~ "`TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE " +#~ "| TEXT_MEDIABOX_CLIP | TEXT_DEHYPHENATE`" +#~ msgstr "" + +#~ msgid "" +#~ "128 -- If set, use raw character" +#~ " codes instead of U+FFFD. This is " +#~ "the default for **text extraction** in" +#~ " PyMuPDF. If you **want to detect**" +#~ " when encoding information is missing " +#~ "or uncertain, toggle this flag and " +#~ "scan for the presence of U+FFFD (=" +#~ " `chr(0xfffd)`) code points in the " +#~ "resulting text." +#~ msgstr "" +#~ "128 -- 設定されている場合、U+FFFDの代わりに生の文字コードを使用します。これは、PyMuPDFにおける" +#~ " **テキスト抽出** " +#~ "のデフォルトです。エンコーディング情報が不足しているか不確かな場合に**検出したい**場合は、このフラグを切り替えて、結果のテキストにU+FFFD" +#~ " (= `chr(0xfffd)`)コードポイントが存在するかどうかをスキャンします。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/version.mo b/docs/locales/ja/LC_MESSAGES/version.mo new file mode 100644 index 000000000..8b5ba9746 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/version.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/version.po b/docs/locales/ja/LC_MESSAGES/version.po new file mode 100644 index 000000000..30485d1e3 --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/version.po @@ -0,0 +1,73 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# FIRST AUTHOR , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../version.rst:3 5da0b4d14ad6412dbaf1fbc368247a52 +#, fuzzy +msgid "" +"This documentation covers **PyMuPDF v1.25.5** features as of **2025-03-31" +" 00:00:01**." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#: ../../version.rst:5 65b61a5748c4422395214d229bc552fc +msgid "" +"The major and minor versions of |PyMuPDF| and |MuPDF| will always be the " +"same. Only the third qualifier (patch level) may deviate from that of " +"|MuPDF|." +msgstr "" +"|PyMuPDF| と |MuPDF| のメジャーおよびマイナーバージョンは常に同じになります。第三の修飾子(パッチレベル)のみが |MuPDF|" +" のものと異なる場合があります。" + +#: ../../version.rst:7 5996bf15f5474c8a808d6e5e0885062e +msgid "" +"Typically PyMuPDF is released more frequently than MuPDF so it will often" +" be the case that the patch level of PyMuPDF will be greater than the " +"embedded MuPDF." +msgstr "" + +#: ../../version.rst:11 1cac232b69264e9998c5745f46bc36b3 +msgid "For example PyMuPDF-1.24.5 contains MuPDF-1.24.2." +msgstr "" + +#: ../../version.rst:13 2e97259ae1cb4975976ce1bf01696d3c +msgid "Also see `pymupdf_version` and `mupdf_version`." +msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF " +#~ "v1.23.0rc1** features as of **2023-08-10 " +#~ "00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.23.4**" +#~ " features as of **2023-09-26 00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.23.5**" +#~ " features as of **2023-10-11 00:00:01**." +#~ msgstr "" + +#~ msgid "" +#~ "This documentation covers **PyMuPDF v1.23.8**" +#~ " features as of **2023-12-19 00:00:01**." +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/widget.mo b/docs/locales/ja/LC_MESSAGES/widget.mo new file mode 100644 index 000000000..89bed599c Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/widget.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/widget.po b/docs/locales/ja/LC_MESSAGES/widget.po new file mode 100644 index 000000000..ff47e7c2f --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/widget.po @@ -0,0 +1,625 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 5a37e663e6e54990a2fdc7914c7b502d +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 6ce428f055fa497abf09a94986576135 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 a9ada920c19e4ec593e16e678f2a50a4 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../widget.rst:7 624076b86b264de091f0e582f6640f76 +msgid "Widget" +msgstr "Widget (ウィジェット)" + +#: ../../widget.rst:9 ffcd4034d2e34d2fac63824314c39935 +msgid "|pdf_only_class|" +msgstr "PDFのみ。" + +#: ../../widget.rst:11 b7bb6ad05187461ead0062be00f41fe6 +msgid "" +"This class represents a PDF Form field, also called a \"widget\". " +"Throughout this documentation, we are using these terms synonymously. " +"Fields technically are a special case of PDF annotations, which allow " +"users with limited permissions to enter information in a PDF. This is " +"primarily used for filling out forms." +msgstr "このクラスは、PDFフォームフィールド、または「ウィジェット」とも呼ばれるものを表します。このドキュメンテーション全体で、これらの用語を同義語として使用しています。フィールドは技術的にはPDF注釈の特殊なケースであり、制限付きの権限を持つユーザーがPDFに情報を入力することを可能にします。これは主にフォームの記入に使用されます。" + +#: ../../widget.rst:13 f7da4535f1244c46be953370d8b27709 +msgid "" +"Like annotations, widgets live on PDF pages. Similar to annotations, the " +"first widget on a page is accessible via :attr:`Page.first_widget` and " +"subsequent widgets can be accessed via the :attr:`Widget.next` property." +msgstr "" +"アノテーションと同様に、ウィジェットもPDFページ上に存在します。注釈と同様に、ページ上の最初のウィジェットは " +":attr:`Page.first_widget` 経由でアクセスでき、その後のウィジェットは :attr:`Widget.next` " +"プロパティ経由でアクセスできます。" + +#: ../../widget.rst:15 6a4c5c86f7e84180a8066861d64cc4f4 +msgid "" +"*(Changed in version 1.16.0)* MuPDF no longer treats widgets as a subset " +"of general annotations. Consequently, :attr:`Page.first_annot` and " +":meth:`Annot.next` will deliver **non-widget annotations exclusively**, " +"and be ``None`` if only form fields exist on a page. Vice versa, " +":attr:`Page.first_widget` and :meth:`Widget.next` will only show widgets." +" This design decision is purely internal to MuPDF; technically, links, " +"annotations and fields have a lot in common and also continue to share " +"the better part of their code within (Py-) MuPDF." +msgstr "" +"*(バージョン1.16.0で変更)* " +"MuPDFはウィジェットを一般的な注釈のサブセットとして扱わなくなりました。したがって、:attr:`Page.first_annot` および " +":meth:`Annot.next` " +"は非ウィジェット注釈のみを返し、ページにフォームフィールドのみが存在する場合はNoneを返します。逆に、:attr:`Page.first_widget`" +" および :meth:`Widget.next` " +"はウィジェットのみを表示します。この設計の決定はMuPDF内部におけるものであり、技術的にはリンク、注釈、およびフィールドは多くの共通点を持ち、また(Py-)MuPDF内でコードの大部分を共有し続けています。" + +#: ../../widget.rst:18 69c3ef7ceec34f689dd615ccd7d5d62e +msgid "**Class API**" +msgstr "クラスAPI" + +#: ../../widget.rst:24 3cdb2777c88743c1b8ca2765bde92b83 +msgid "*New in version 1.18.15*" +msgstr "*バージョン1.18.15で新しく追加*" + +#: ../../widget.rst:26 0057129f93684af88acefd31d78713cc +msgid "" +"Return the names of On / Off (i.e. selected / clicked or not) states a " +"button field may have. While the 'Off' state usually is also named like " +"so, the 'On' state is often given a name relating to the functional " +"context, for example 'Yes', 'Female', etc." +msgstr "" +"ボタンフィールドが持つOn / Off(選択/クリックしたかどうか)状態の名前を返します。通常、 'Off'状態も同様に名前が付けられていますが、" +" 'On'状態は機能的なコンテキストに関連する名前がよく付けられます。たとえば、「Yes」、「Female」などです。" + +#: ../../widget.rst:28 a2b4d38b7e6342619083617b782a21aa +msgid "" +"This method helps finding out the possible values of :attr:`field_value` " +"in these cases." +msgstr "このメソッドは、これらのケースで :attr:`field_value` の可能な値を調べるのに役立ちます。" + +#: ../../widget.rst 5639241c379d438fb8e160cc895126f9 +#: ec887bc94866419d829a41c6e01079bc +msgid "returns" +msgstr "戻り値:" + +#: ../../widget.rst:30 24a4790a279b4109b83baa31b0b5fdfb +msgid "" +"a dictionary with the names of 'On' and 'Off' for the *normal* and the " +"*pressed-down* appearance of button widgets. The following example shows " +"that the \"selected\" value is \"Male\":" +msgstr "" +"*normal* の 状態と *pressed-down* 状態のボタンウィジェットの 'On'と " +"'Off'の名前を持つ辞書。次の例では、「選択された」値は「Male」であることが示されています:" + +#: ../../widget.rst:38 ed578450970b4a2699c36e278392dc68 +msgid "New in version 1.22.2" +msgstr "新機能(バージョン1.22.2で追加)" + +#: ../../widget.rst:40 174182b4e4ca45e9a99be3a220b409f8 +msgid "" +"Return the value of the \"ON\" state of check boxes and radio buttons. " +"For check boxes this is always the value \"Yes\". For radio buttons, this" +" is the value to select / activate the button." +msgstr "チェックボックスとラジオボタンの「ON」状態の値を返します。チェックボックスの場合、これは常に「Yes」という値です。ラジオボタンの場合、これはボタンを選択/アクティブ化する値です。" + +#: ../../widget.rst:42 806816919ec246bfacec626a2c3f54cd +msgid "" +"the value that sets the button to \"selected\". For non-checkbox, non-" +"radiobutton fields, always `None` is returned. For check boxes the return" +" is `True`. For radio buttons this is the value \"Male\" in the following" +" example:" +msgstr "" +"ボタンを「選択」に設定する値が返されます。非チェックボックス、非ラジオボタンフィールドの場合、常に `None` " +"が返されます。チェックボックスの場合、戻り値は `True` です。ラジオボタンの場合、次の例では値が「Male」です。" + +#: ../../widget.rst:49 335eb13c74ea49ee8bce4bb87ca2c698 +msgid "" +"So for check boxes and radio buttons, the recommended method to set them " +"to \"selected\", or to check the state is the following:" +msgstr "したがって、チェックボックスとラジオボタンの場合、それらを「選択」または状態を確認するための推奨される方法は次のとおりです。" + +#: ../../widget.rst:58 98ebba26b9e34976bc273f6cb51ac715 +msgid "" +"After any changes to a widget, this method **must be used** to store them" +" in the PDF [#f1]_." +msgstr "ウィジェットに変更が加えられた後、これらの変更をPDFに保存するためにこのメソッドを **使用する必要があります** [#f1]_。" + +#: ../../widget.rst:62 fa156aa2c78e4f1a919837db96371466 +msgid "" +"Reset the field's value to its default -- if defined -- or remove it. Do " +"not forget to issue :meth:`update` afterwards." +msgstr "" +"フィールドの値をデフォルト値にリセットします。デフォルトが定義されている場合、それを削除します。その後、:meth:`update` " +"を実行するのを忘れないでください。" + +#: ../../widget.rst:66 25e74c983f2f4aa7ab7c2fa854c0c76e +msgid "" +"Point to the next form field on the page. The last widget returns " +"``None``." +msgstr "ページ上の次のフォームフィールドを指します。最後のウィジェットは ``None`` を返します。" + +#: ../../widget.rst:70 b76719c2b59249c2930f6fa33e4b2722 +msgid "" +"A list of up to 4 floats defining the field's border color. Default value" +" is ``None`` which causes border style and border width to be ignored." +msgstr "" +"フィールドの境界線の色を定義する最大4つの浮動小数点数のリストです。デフォルト値は ``None`` " +"で、これにより境界線スタイルと境界線の幅が無視されます。" + +#: ../../widget.rst:74 8b795f4b032f4c5aa84b676b9ea65d34 +msgid "" +"A string defining the line style of the field's border. See " +":attr:`Annot.border`. Default is \"s\" (\"Solid\") -- a continuous line. " +"Only the first character (upper or lower case) will be regarded when " +"creating a widget." +msgstr "" +"フィールドの境界線の線スタイルを定義する文字列です。:attr:`Annot.border` を参照してください。デフォルトは \"s\" " +"(\"Solid\") で、連続線です。ウィジェットを作成する際、最初の文字(大文字または小文字)のみが考慮されます。" + +#: ../../widget.rst:78 d09a065ab4ec4c1791b2960690804b09 +msgid "A float defining the width of the border line. Default is 1." +msgstr "境界線の幅を定義する浮動小数点数です。デフォルトは1です。" + +#: ../../widget.rst:82 7d99b528b90540d39b723d7f076791c3 +msgid "" +"A list/tuple of integers defining the dash properties of the border line." +" This is only meaningful if *border_style == \"D\"* and " +":attr:`border_color` is provided." +msgstr "" +"*border_style == \"D\"* であり、:attr:`border_color` " +"が指定されている場合にのみ意味があります。これは、境界線のダッシュプロパティを定義する整数のリスト/タプルです。" + +#: ../../widget.rst:86 7ed44198f8164b9e9c564dd6ffcb3082 +msgid "" +"Python sequence of strings defining the valid choices of list boxes and " +"combo boxes. For these widget types, this property is mandatory and must " +"contain at least two items. Ignored for other types." +msgstr "リストボックスとコンボボックスの有効な選択肢を定義するPythonシーケンスの文字列です。これらのウィジェットタイプでは、このプロパティが必須で、少なくとも2つのアイテムを含める必要があります。他のタイプでは無視されます。" + +#: ../../widget.rst:90 ffd9837719ff414d889670f7b2b970e7 +msgid "" +"A mandatory string defining the field's name. No checking for duplicates " +"takes place." +msgstr "フィールドの名前を定義する必須の文字列です。重複をチェックしません。" + +#: ../../widget.rst:94 6da2fbed18d64beab82cf5acdfa9b2b4 +msgid "" +"An optional string containing an \"alternate\" field name. Typically used" +" for any notes, help on field usage, etc. Default is the field name." +msgstr "「代替」フィールド名を含むオプションの文字列です。通常、フィールドの使用方法に関するメモ、ヘルプなどに使用されます。デフォルトはフィールド名です。" + +#: ../../widget.rst:98 3e5bf44c11944233a56e69ce9a4ad30c +msgid "The value of the field." +msgstr "フィールドの値です。" + +#: ../../widget.rst:102 57f036533748469793b18036e6015982 +msgid "" +"An integer defining a large amount of properties of a field. Be careful " +"when changing this attribute as this may change the field type." +msgstr "フィールドの多くのプロパティを定義する整数です。この属性を変更する際は注意してください。これはフィールドのタイプを変更する可能性があります。" + +#: ../../widget.rst:106 6c9d94075e8748a0a87c22c1dd4b42a9 +msgid "" +"A mandatory integer defining the field type. This is a value in the range" +" of 0 to 6. It cannot be changed when updating the widget." +msgstr "フィールドタイプを定義する必須の整数です。これは0から6の範囲の値です。ウィジェットを更新する際に変更できません。" + +#: ../../widget.rst:110 372318f71bef45158a4257ef3dc31fe3 +msgid "A string describing (and derived from) the field type." +msgstr "フィールドタイプを説明する文字列(フィールドタイプから派生)。" + +#: ../../widget.rst:114 6a4e2f4cd82547dcbdbb7ed1a032ce74 +msgid "A list of up to 4 floats defining the field's background color." +msgstr "フィールドの背景色を定義する、最大4つの浮動小数点数のリスト。" + +#: ../../widget.rst:118 cd5e1b967c5d420199d69a8f33daf101 +msgid "The caption string of a button-type field." +msgstr "ボタンタイプのフィールドのキャプション文字列。" + +#: ../../widget.rst:122 67815a711eb94148a961ce0704652c4d +msgid "A bool indicating the signing status of a signature field, else ``None``." +msgstr "署名フィールドの署名ステータスを示すブール値。それ以外の場合は ``None``。" + +#: ../../widget.rst:126 623d98faf9bc4cdc8df9b945f643e22a +msgid "The rectangle containing the field." +msgstr "フィールドを含む矩形。" + +#: ../../widget.rst:130 af99b2e634034ad191540968d0bd4ecb +msgid "" +"A list of **1, 3 or 4 floats** defining the text color. Default value is " +"black (`[0, 0, 0]`)." +msgstr "テキストの色を定義する、**1、3、または4つの浮動小数点数** のリスト。デフォルト値は黒 (`[0, 0, 0]`)です。" + +#: ../../widget.rst:134 cce6169ee717445782dd147893bbcaf5 +msgid "" +"A string defining the font to be used. Default and replacement for " +"invalid values is *\"Helv\"*. For valid font reference names see the " +"table below." +msgstr "" +"使用するフォントを定義する文字列。デフォルトおよび無効な値の置換は *「Helv」* " +"です。有効なフォント参照名については以下の表を参照してください。" + +#: ../../widget.rst:138 236002b2d59149c3a2cd68c3933f48b1 +msgid "" +"A float defining the text :data:`fontsize`. Default value is zero, which " +"causes PDF viewer software to dynamically choose a size suitable for the " +"annotation's rectangle and text amount." +msgstr "" +"テキストの :data:`fontsize` " +"を定義する浮動小数点数。デフォルト値はゼロで、PDFビューアソフトウェアが注釈の矩形とテキストの量に適したサイズを動的に選択します。" + +#: ../../widget.rst:142 1c34f05786ef46f7b714733936efd15f +msgid "" +"An integer defining the maximum number of text characters. PDF viewers " +"will (should) not accept a longer text." +msgstr "テキストの最大文字数を定義する整数。PDFビューアは(するはずです)より長いテキストを受け入れません。" + +#: ../../widget.rst:146 e1e9a9fe04dd4386ad599c2be783506b +msgid "" +"An integer defining acceptable text types (e.g. numeric, date, time, " +"etc.). For reference only for the time being -- will be ignored when " +"creating or updating widgets." +msgstr "許容可能なテキストタイプを定義する整数(例:数値、日付、時刻など)。現時点では参考用のみで、ウィジェットを作成または更新する際には無視されます。" + +#: ../../widget.rst:150 ae77d0438e6941818caa49a31e97dd57 +msgid "The PDF :data:`xref` of the widget." +msgstr "ウィジェットのPDF :data:`xref`。" + +#: ../../widget.rst:154 ../../widget.rst:160 ../../widget.rst:166 +#: ../../widget.rst:172 ../../widget.rst:178 1715f560673c4c1a8e500964e658112a +#: 30d474c2165a417084dbb883fa66a96f d42fde5c387c4049aeb6608a842ee8d0 +#: dae799231271447bb14fb4751a73e0a9 f1b5525cacb94ff0875ed93b03ba657c +msgid "New in version 1.16.12" +msgstr "バージョン1.16.12で新登場" + +#: ../../widget.rst:156 7e5d2653e4664291afdddb06b25b8d67 +msgid "" +"JavaScript text (unicode) for an action associated with the widget, or " +"``None``. This is the only script action supported for **button type** " +"widgets." +msgstr "" +"ウィジェットに関連付けられたアクション用のJavaScriptテキスト(Unicode)、または ``None``。これは **ボタンタイプ** " +"のウィジェットに対してサポートされる唯一のスクリプトアクションです。" + +#: ../../widget.rst:162 3066f59272d04b3badfb7216eb5a6f38 +msgid "" +"JavaScript text (unicode) to be performed when the user types a key-" +"stroke into a text field or combo box or modifies the selection in a " +"scrollable list box. This action can check the keystroke for validity and" +" reject or modify it. ``None`` if not present." +msgstr "" +"JavaScriptテキスト(Unicode)は、ユーザーがテキストフィールドまたはコンボボックスにキーストロークを入力するか、スクロール可能なリストボックスの選択を変更するときに実行されるアクションです。このアクションはキーストロークの妥当性をチェックし、拒否または変更することができます。存在しない場合は" +" ``None`` です。" + +#: ../../widget.rst:168 6f83648a153e4db2a5c303bfa80129f9 +msgid "" +"JavaScript text (unicode) to be performed before the field is formatted " +"to display its current value. This action can modify the field’s value " +"before formatting. ``None`` if not present." +msgstr "" +"このアクションは、フィールドが現在の値を表示するためにフォーマットされる前に、フィールドの値を変更するために実行されるJavaScriptテキスト(Unicode)です。存在しない場合は" +" ``None`` です。" + +#: ../../widget.rst:174 8c6c2b8b063243e289aa6bf1261a20cc +msgid "" +"JavaScript text (unicode) to be performed when the field’s value is " +"changed. This action can check the new value for validity. ``None`` if " +"not present." +msgstr "" +"このアクションは、フィールドの値が変更されたときに実行されるJavaScriptテキスト(Unicode)です。このアクションは新しい値の妥当性をチェックすることができます。存在しない場合は" +" ``None`` です。" + +#: ../../widget.rst:180 d577aad3bc0a48ff93c19a20bc6df794 +msgid "" +"JavaScript text (unicode) to be performed to recalculate the value of " +"this field when that of another field changes. ``None`` if not present." +msgstr "バージョン1.16.12で新規追" + +#: ../../widget.rst:184 ../../widget.rst:190 076a163723124ff695eabcbc6011c9ff +#: dd1a992a5bd445f0925b51350fe3612f +msgid "New in version 1.22.6" +msgstr "バージョン1.22.6で新規追加" + +#: ../../widget.rst:186 6a99da600742485683030ca0118a75ab +msgid "" +"JavaScript text (unicode) to be performed on losing the focus of this " +"field. ``None`` if not present." +msgstr "このフィールドからフォーカスを失ったときに実行されるJavaScriptテキスト(Unicode)です。存在しない場合は ``None`` です。" + +#: ../../widget.rst:192 bb4cdc46e4a14934b8c9d9b2df8de1e5 +msgid "" +"JavaScript text (unicode) to be performed on focusing this field. " +"``None`` if not present." +msgstr "このフィールドにフォーカスが当たったときに実行されるJavaScriptテキスト(Unicode)です。存在しない場合は ``None`` です。" + +#: ../../widget.rst:196 e061b8c3fae847aeb2b44446c046d044 +msgid "For **adding** or **changing** one of the above scripts," +msgstr "上記のいずれかのスクリプトを **追加** または **変更** するには、" + +#: ../../widget.rst:197 c8d6bd5cc57d4e7e9769fd05983533cf +msgid "" +"just put the appropriate JavaScript source code in the widget attribute. " +"To **remove** a script, set the respective attribute to ``None``." +msgstr "" +"適切なJavaScriptソースコードをウィジェット属性に配置するだけです。スクリプトを **削除する** には、該当する属性を ``None``" +" に設定します。" + +#: ../../widget.rst:200 90b6fe7fb9664746b9276138127be35f +msgid "Button fields only support :attr:`script`." +msgstr "ボタンフィールドは :attr:`script` をサポートしています" + +#: ../../widget.rst:201 f1c3f4a0456645b9bebdc6573503cc21 +msgid "Other script entries will automatically be set to ``None``." +msgstr "他のスクリプトエントリは自動的に ``None`` に設定されます。" + +#: ../../widget.rst:203 116ede6258f54afab42748bf7888b1c4 +msgid "" +"It is worthwhile to look at `this " +"`_ manual with lots of " +"information about Adobe's standard scripts for various field types. For " +"example, if you want to add a text field representing a date, you may " +"want to store the following scripts. They will ensure pattern-compatible " +"date formats and display date pickers in supporting viewers::" +msgstr "" +"Adobeの標準スクリプトに関する多くの情報が含まれている `この " +"`_ " +"マニュアルを確認する価値があります。たとえば、日付を表すテキストフィールドを追加する場合、次のスクリプトを保存することができます。これにより、パターン互換の日付形式が確保され、サポートされているビューアで日付ピッカーが表示されます。" + +#: ../../widget.rst:215 92aadfd39ff04ff992508c5287cf8f1e +msgid "Standard Fonts for Widgets" +msgstr "ウィジェット用の標準フォント" + +#: ../../widget.rst:216 4f3a9738e3bc448f90a65c8f40711117 +msgid "" +"Widgets use their own resources object */DR*. A widget resources object " +"must at least contain a */Font* object. Widget fonts are independent from" +" page fonts. We currently support the 14 PDF base fonts using the " +"following fixed reference names, or any name of an already existing field" +" font. When specifying a text font for new or changed widgets, **either**" +" choose one in the first table column (upper and lower case supported), " +"**or** one of the already existing form fonts. In the latter case, " +"spelling must exactly match." +msgstr "" +"ウィジェットは独自のリソースオブジェクト */DR* を使用します。ウィジェットのリソースオブジェクトには、少なくとも */Font* " +"オブジェクトを含める必要があります。ウィジェットフォントはページフォントとは独立しています。現在、以下の固定参照名を使用して、14のPDFベースフォントをサポートしています。また、既存のフィールドフォントの名前でもかまいません。新しいウィジェットまたは変更されたウィジェットのテキストフォントを指定する際には、最初の表の列(大文字と小文字がサポートされています)から1つを選択するか、既存のフォームフォントの1つを選択してください。後者の場合、スペルは厳密に一致する必要があります。" + +#: ../../widget.rst:218 925bed73c43340b182d6f65b8a574e47 +msgid "" +"To find out already existing field fonts, inspect the list " +":attr:`Document.FormFonts`." +msgstr "既存のフィールドフォントを見つけるには、リスト :attr:`Document.FormFonts` を調べてください。" + +#: ../../widget.rst:221 e3bba5d4620c4ccba5ebc11a7d9175bc +msgid "**Reference**" +msgstr "**参照**" + +#: ../../widget.rst:221 ea453b1dcd7b4399ac82cc89c98cb502 +msgid "**Base14 Fontname**" +msgstr "**Base14フォント名**" + +#: ../../widget.rst:223 4a65356cbe2b49f68b0b5304c86b75cd +msgid "CoBI" +msgstr "" + +#: ../../widget.rst:223 d219453aa72f4e40adc60ba3ed42b917 +msgid "Courier-BoldOblique" +msgstr "" + +#: ../../widget.rst:224 ed32a0ad7b904ecd9cc84adcd5062310 +msgid "CoBo" +msgstr "" + +#: ../../widget.rst:224 fa650a7e028f455c8f4763efa725b201 +msgid "Courier-Bold" +msgstr "" + +#: ../../widget.rst:225 00ec7658b5c14c338b9e0f2dbc500a6f +msgid "CoIt" +msgstr "" + +#: ../../widget.rst:225 58d29a1a4e82490d90700630059ff068 +msgid "Courier-Oblique" +msgstr "" + +#: ../../widget.rst:226 a6dd7d369fe148ecaf163600b06a35d7 +msgid "Cour" +msgstr "" + +#: ../../widget.rst:226 ee32dea5ed8c4814b490868e9066d9c9 +msgid "Courier" +msgstr "" + +#: ../../widget.rst:227 5b789b2cf2e84e10bde2056d4d1b3537 +msgid "HeBI" +msgstr "" + +#: ../../widget.rst:227 495ab1b7848f4554afdc4c8e35207c9a +msgid "Helvetica-BoldOblique" +msgstr "" + +#: ../../widget.rst:228 2c2db59156584b3191aea75dfb529469 +msgid "HeBo" +msgstr "" + +#: ../../widget.rst:228 659b01998b6145aeb08c312004abe053 +msgid "Helvetica-Bold" +msgstr "" + +#: ../../widget.rst:229 8e03594c420e4421a42a1d9425141257 +msgid "HeIt" +msgstr "" + +#: ../../widget.rst:229 e29e8c9d103a4790ade38a5873b33a4a +msgid "Helvetica-Oblique" +msgstr "" + +#: ../../widget.rst:230 9d04f0ae163e4e10ba656490ddb685b5 +msgid "Helv" +msgstr "" + +#: ../../widget.rst:230 7b85ed55f7b641f790198218bef9c00d +msgid "Helvetica **(default)**" +msgstr "Helvetica **(デフォルト)**" + +#: ../../widget.rst:231 06febf78ccf14635a504879611ab6c11 +msgid "Symb" +msgstr "" + +#: ../../widget.rst:231 8715177d19f74536aec49ff8dbd7542a +msgid "Symbol" +msgstr "" + +#: ../../widget.rst:232 52e1e542dad742bd896b23da17169e27 +msgid "TiBI" +msgstr "" + +#: ../../widget.rst:232 6a07cf98db214d95bec8c16b5555ef06 +msgid "Times-BoldItalic" +msgstr "" + +#: ../../widget.rst:233 ceccd48998684f63b5c60a5cd2f6b313 +msgid "TiBo" +msgstr "" + +#: ../../widget.rst:233 f103cbe541d24335990d01e7b36e7e3d +msgid "Times-Bold" +msgstr "" + +#: ../../widget.rst:234 ce170947a01b4642bd4f7f6672ae124f +msgid "TiIt" +msgstr "" + +#: ../../widget.rst:234 f475881097734d41bf682dfabdf1f532 +msgid "Times-Italic" +msgstr "" + +#: ../../widget.rst:235 bec22af268124b0b95857a3c54d8e206 +msgid "TiRo" +msgstr "" + +#: ../../widget.rst:235 4c111eb1e9d44136902781dd26427493 +msgid "Times-Roman" +msgstr "" + +#: ../../widget.rst:236 bcd602ef00304eaab0ceb263267a1472 +msgid "ZaDb" +msgstr "" + +#: ../../widget.rst:236 6db1a28d39f5427386471d41ca17e8e8 +msgid "ZapfDingbats" +msgstr "" + +#: ../../widget.rst:239 ab6b8a8825d54d52a87cb2bb36646531 +msgid "" +"You are generally free to use any font for every widget. However, we " +"recommend using *ZaDb* (\"ZapfDingbats\") and :data:`fontsize` 0 for " +"check boxes: typical viewers will put a correctly sized tickmark in the " +"field's rectangle, when it is clicked." +msgstr "" +"一般的に、すべてのウィジェットに任意のフォントを使用することができます。ただし、チェックボックスには *ZaDb* " +"(「ZapfDingbats」)フォントと :data:`fontsize` " +"0を使用することをお勧めします。典型的なビューアは、フィールドの四角形をクリックすると正しくサイズ調整されたチェックマークを配置します。" + +#: ../../widget.rst:242 55e2525d7b0b440698e50964bd51993a +msgid "Supported Widget Types" +msgstr "対応ウィジェットの種類" + +#: ../../widget.rst:243 5286f138c8a544d5adcc8d3a9f844238 +msgid "" +"PyMuPDF supports the creation and update of many, but not all widget " +"types." +msgstr "PyMuPDFは、多くのウィジェットタイプの作成および更新をサポートしていますが、すべてのウィジェットタイプには対応していません。" + +#: ../../widget.rst:245 11633720f7174322aaba6a6b80395861 +msgid "text (`PDF_WIDGET_TYPE_TEXT`)" +msgstr "テキスト(`PDF_WIDGET_TYPE_TEXT`)" + +#: ../../widget.rst:246 c7d256722a2840788fcc778a30b0e581 +msgid "push button (`PDF_WIDGET_TYPE_BUTTON`)" +msgstr "プッシュボタン(`PDF_WIDGET_TYPE_BUTTON`)" + +#: ../../widget.rst:247 e5c912ce96bc48299e4c40f6d77cc040 +msgid "check box (`PDF_WIDGET_TYPE_CHECKBOX`)" +msgstr "チェックボックス(`PDF_WIDGET_TYPE_CHECKBOX`)" + +#: ../../widget.rst:248 1b23be72c1d04f10b82aeddd9b414ffb +msgid "combo box (`PDF_WIDGET_TYPE_COMBOBOX`)" +msgstr "コンボボックス(`PDF_WIDGET_TYPE_COMBOBOX`)" + +#: ../../widget.rst:249 ecf203e0683d4b0aa6d5f5d1500555b0 +msgid "list box (`PDF_WIDGET_TYPE_LISTBOX`)" +msgstr "リストボックス(`PDF_WIDGET_TYPE_LISTBOX`)" + +#: ../../widget.rst:250 74fed154e4164d12bd5b68ca6a25e924 +#, fuzzy +msgid "" +"radio button (`PDF_WIDGET_TYPE_RADIOBUTTON`): PyMuPDF does not currently " +"support the **creation** of groups of (interconnected) radio buttons, " +"where setting one automatically unsets the other buttons in the group. " +"The widget object also does not reflect the presence of a button group. " +"However: consistently selecting (or unselecting) a radio button is " +"supported. This includes correctly setting the value maintained in the " +"owning button group. Selecting a radio button may be done by either " +"assigning `True` or `field.on_state()` to the field value. **De-" +"selecting** the button should be done assigning `False`." +msgstr "" +"ラジオボタン(`PDF_WIDGET_TYPE_RADIOBUTTON`):PyMuPDFは現在、ラジオボタンのグループ(相互に接続された)の " +"**作成** " +"をサポートしていません。つまり、1つを設定すると、グループ内の他のボタンが自動的に設定解除されるような状態を作成することはできません。また、ウィジェットオブジェクトはボタングループの存在を反映しません。ただし、ラジオボタンを一貫して選択(または選択解除)することはサポートされています。これには、所属するボタングループで維持される値を正しく設定することも含まれます。ラジオボタンを選択するには、フィールドの値に" +" `True` または `field.on_state()` を割り当てることができます。ボタンの選択解除は、`False` " +"を割り当てることによって行う必要があります。" + +#: ../../widget.rst:251 0b9e92d2051f4a8aaf27d9c3bf996b80 +msgid "signature (`PDF_WIDGET_TYPE_SIGNATURE`) **read only**." +msgstr "署名(`PDF_WIDGET_TYPE_SIGNATURE`)は **読み取り専用です** 。" + +#: ../../widget.rst:254 3e45d7c9b8ac4c2d99e22048c692e363 +msgid "Footnotes" +msgstr "脚注" + +#: ../../widget.rst:255 d512795cfb164d92907502576f225590 +msgid "" +"If you intend to re-access a new or updated field (e.g. for making a " +"pixmap), make sure to reload the page first. Either close and re-open the" +" document, or load another page first, or simply do `page = " +"doc.reload_page(page)`." +msgstr "" +"新しいまたは更新されたフィールドに再アクセスする意図がある場合(たとえば、ピクセルマップを作成する場合)、まずページを再読み込むことを確認してください。文書を閉じて再度開くか、別のページを読み込むか、または単に" +" `page = doc.reload_page(page)` を実行してください" + +#: ../../footer.rst:60 c0063d35541741e5b2e52915f1014212 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/xml-class.mo b/docs/locales/ja/LC_MESSAGES/xml-class.mo new file mode 100644 index 000000000..2974a169a Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/xml-class.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/xml-class.po b/docs/locales/ja/LC_MESSAGES/xml-class.po new file mode 100644 index 000000000..a15e9e7be --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/xml-class.po @@ -0,0 +1,1080 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 35b2e14fdbf9456f870ea5e07c6a1abe +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 9c383ee007cb459a8f5ce0a78b556be8 +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 7207641edba74fb79cd09bd543f88a77 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../xml-class.rst:7 5a1acd7449794938872c81139fa8acd0 +msgid "Xml" +msgstr "" + +#: ../../xml-class.rst:11 4ee590d80c2745afaad733bc8ba82e1d +msgid "New in v1.21.0" +msgstr "v1.21.0で新たに追加" + +#: ../../xml-class.rst:13 8fa4f25d9d5d4b7ea94cc3b663b67d4c +msgid "" +"This represents an HTML or an XML node. It is a helper class intended to " +"access the DOM (Document Object Model) content of a :ref:`Story` object." +msgstr "" +"Xmlクラスは、HTMLまたはXMLノードを表します。これは、:ref:`Story` オブジェクトのDOM(Document Object " +"Model)コンテンツにアクセスするためのヘルパークラスです。" + +#: ../../xml-class.rst:15 e5105e52125246098e3b455ef2776456 +msgid "" +"There is no need to ever directly construct an :ref:`Xml` object: after " +"creating a :ref:`Story`, simply take :attr:`Story.body` -- which is an " +"Xml node -- and use it to navigate your way through the story's DOM." +msgstr "" +":ref:`Xml` オブジェクトを直接構築する必要はありません。:ref:`Story` を作成した後、単に " +":attr:`Story.body` を取得し(これはXmlノードです)、それを使用してストーリーのDOMを操作できます。" + +#: ../../xml-class.rst:19 086ba76b6eaa4206b74c6b525b2ae70b +msgid "**Method / Attribute**" +msgstr "**メソッド/属性**" + +#: ../../xml-class.rst:19 9b744e4fa0694c9986e2250f2278784d +msgid "**Description**" +msgstr "**説明**" + +#: ../../xml-class.rst:21 f46174ffaec14c09984d82014f3fc9a4 +msgid ":meth:`~.add_bullet_list`" +msgstr "" + +#: ../../xml-class.rst:21 eefa1933e3b5485eb89f2e53127bcbf1 +msgid "Add a :htmlTag:`ul` tag - bulleted list, context manager." +msgstr ":htmlTag:`ul` タグを追加します - 箇条書きリスト、コンテキストマネージャ。" + +#: ../../xml-class.rst:22 16936bcd25534da28940c85397594734 +msgid ":meth:`~.add_codeblock`" +msgstr "" + +#: ../../xml-class.rst:22 fcf17dab1d8645fe9307bbaad85e9a78 +msgid "Add a :htmlTag:`pre` tag, context manager." +msgstr ":htmlTag:`pre` タグを追加します、コンテキストマネージャ。" + +#: ../../xml-class.rst:23 103a3a5af0f14cd1924448b542f62b6b +msgid ":meth:`~.add_description_list`" +msgstr "" + +#: ../../xml-class.rst:23 e2f66dd80053480e97b4eeeb1ff42a9c +msgid "Add a :htmlTag:`dl` tag, context manager." +msgstr ":htmlTag:`dl` タグを追加します、コンテキストマネージャ。" + +#: ../../xml-class.rst:24 b041d519f2b64e188188cddb8b2f4e9b +msgid ":meth:`~.add_division`" +msgstr "" + +#: ../../xml-class.rst:24 31129830737949a8bb370681ecfd4d06 +msgid "add a :htmlTag:`div` tag (renamed from “section”), context manager." +msgstr ":htmlTag:`div` タグを追加します(「section」と名前変更)、コンテキストマネージャ。" + +#: ../../xml-class.rst:25 da7da028486141f58cf9c5a7952e1ba0 +msgid ":meth:`~.add_header`" +msgstr "" + +#: ../../xml-class.rst:25 228124168ce64130ad9d0ff5b13646e0 +msgid "Add a header tag (one of :htmlTag:`h1` to :htmlTag:`h6`), context manager." +msgstr "headerタグ(:htmlTag:`h1` から :htmlTag:`h6` のいずれか)を追加します、コンテキストマネージャ。" + +#: ../../xml-class.rst:26 806d8bfb5b3b4ce397764e9d4a0dc1d8 +msgid ":meth:`~.add_horizontal_line`" +msgstr "" + +#: ../../xml-class.rst:26 c07169b784924633a5e1fd68c8af798c +msgid "Add a :htmlTag:`hr` tag." +msgstr ":htmlTag:`hr` タグを追加します。" + +#: ../../xml-class.rst:27 5a73b0a8f50a496691126bd9d77a5637 +msgid ":meth:`~.add_image`" +msgstr "" + +#: ../../xml-class.rst:27 340cf98f4f3f4e4a84c228fd7be2e4d6 +msgid "Add a :htmlTag:`img` tag." +msgstr ":htmlTag:`img` タグを追加します。" + +#: ../../xml-class.rst:28 d158603be675421a8b7f39bb5ff0a586 +msgid ":meth:`~.add_link`" +msgstr "" + +#: ../../xml-class.rst:28 339c28e41d7e44558a1e9b671ba9ef54 +msgid "Add a :htmlTag:`a` tag." +msgstr ":htmlTag:`a` タグを追加します。" + +#: ../../xml-class.rst:29 7f6b79bf740d49b1932cf6a6a6576758 +msgid ":meth:`~.add_number_list`" +msgstr "" + +#: ../../xml-class.rst:29 0c01f30a42cf4468b8213216a1b3297c +msgid "Add a :htmlTag:`ol` tag, context manager." +msgstr ":htmlTag:`ol` タグを追加します、コンテキストマネージャ。" + +#: ../../xml-class.rst:30 8f9f02ea8c414d29a83d318465a0ff48 +msgid ":meth:`~.add_paragraph`" +msgstr "" + +#: ../../xml-class.rst:30 253c26552e274fbeb3da81a3470eec14 +msgid "Add a :htmlTag:`p` tag." +msgstr ":htmlTag:`p` タグを追加します。" + +#: ../../xml-class.rst:31 b2d6acd206bb4cb2a8f6be0b5197c0d8 +msgid ":meth:`~.add_span`" +msgstr "" + +#: ../../xml-class.rst:31 98fe31d20e7a4ae8af4eba0928d53dd1 +msgid "Add a :htmlTag:`span` tag, context manager." +msgstr ":htmlTag:`span` タグを追加します。コンテキストマネージャーです。" + +#: ../../xml-class.rst:32 50600afc53a74e6c80d8b4eef6b2304e +msgid ":meth:`~.add_subscript`" +msgstr "" + +#: ../../xml-class.rst:32 c9cb764e916a4b27afb3c18ae5983896 +msgid "" +"Add subscript text(:htmlTag:`sub` tag) - inline element, treated like " +"text." +msgstr "添字テキスト(:htmlTag:`sub` タグ)を追加します。行内要素で、テキストのように扱われます。" + +#: ../../xml-class.rst:33 b72843ab592f40e8877c99fe4c941dcb +msgid ":meth:`~.add_superscript`" +msgstr "" + +#: ../../xml-class.rst:33 3630b21203814ea190918432f1ca2417 +msgid "" +"Add subscript text (:htmlTag:`sup` tag) - inline element, treated like " +"text." +msgstr "上付きテキスト(:htmlTag:`sup` タグ)を追加します。行内要素で、テキストのように扱われます。" + +#: ../../xml-class.rst:34 69e9ce07a81841c280b49fa28bd70c97 +msgid ":meth:`~.add_code`" +msgstr "" + +#: ../../xml-class.rst:34 ../../xml-class.rst:35 ../../xml-class.rst:36 +#: ../../xml-class.rst:37 5c6747963ef74d38b09f3a6fbd934145 +#: 8cd903b064c74812942fc2b54fcb182d aa212076d95d4d34aca0fedfba075d7c +#: bbf8c37a914547df93d38bbff33c7496 +msgid "Add code text (:htmlTag:`code` tag) - inline element, treated like text." +msgstr "コードテキスト(:htmlTag:`code` タグ)を追加します。行内要素で、テキストのように扱われます。" + +#: ../../xml-class.rst:35 ed457783f87b4432965ba520310e2912 +msgid ":meth:`~.add_var`" +msgstr "" + +#: ../../xml-class.rst:36 baa6b54b7dec48428d61ef058714e1c8 +msgid ":meth:`~.add_samp`" +msgstr "" + +#: ../../xml-class.rst:37 2e681286c7934f9e98e30aeed8f1e058 +msgid ":meth:`~.add_kbd`" +msgstr "" + +#: ../../xml-class.rst:38 c90afbfd1f2a4432aa57f469ce1aa9c3 +msgid ":meth:`~.add_text`" +msgstr "" + +#: ../../xml-class.rst:38 ../../xml-class.rst:162 +#: e05cf6e5226b4f768bfb698de671c8db fdddcc02af0b4c43a986178f7ab86439 +#, fuzzy +msgid "Add a text string. Line breaks ``\\n`` are honored as :htmlTag:`br` tags." +msgstr "テキスト文字列を追加します。改行 ``\\n`` は :htmlTag:`br` タグとして認識されます。" + +#: ../../xml-class.rst:39 5ae611e247354be1a46cad16dd7fdf40 +msgid ":meth:`~.append_child`" +msgstr "" + +#: ../../xml-class.rst:39 f501e0b055984ba48721852e2de6968d +msgid "Append a child node." +msgstr "子ノードを追加します。" + +#: ../../xml-class.rst:40 25340d7a99fc4ccf860933df50d0ebec +msgid ":meth:`~.clone`" +msgstr "" + +#: ../../xml-class.rst:40 c331f277124c4ee3b8a1e9037493a3db +msgid "Make a copy if this node." +msgstr "このノードのコピーを作成します。" + +#: ../../xml-class.rst:41 89bd772f48204020b60419c2db9d5bff +msgid ":meth:`~.create_element`" +msgstr "" + +#: ../../xml-class.rst:41 405cddf85d9c49dd8e5b9cd0fcfa855d +msgid "Make a new node with a given tag name." +msgstr "指定されたタグ名で新しいノードを作成します。" + +#: ../../xml-class.rst:42 b521f73a8f764b0f87ea954af8981b37 +msgid ":meth:`~.create_text_node`" +msgstr "" + +#: ../../xml-class.rst:42 ../../xml-class.rst:306 +#: 36b72c94c0534f59897e7d90525f00ab dfd89d799b2a49de8a00c1c68015628f +msgid "Create direct text for the current node." +msgstr "現在のノードに直接テキストを作成します。" + +#: ../../xml-class.rst:43 fe2c4c6a496e4c30a74892bf305be0bf +msgid ":meth:`~.find`" +msgstr "" + +#: ../../xml-class.rst:43 318cef4b3a694140908577091226c3e3 +msgid "Find a sub-node with given properties." +msgstr "指定されたプロパティを持つサブノードを検索します。" + +#: ../../xml-class.rst:44 a453fbff3b22420db8f0c51be152f8d6 +msgid ":meth:`~.find_next`" +msgstr "" + +#: ../../xml-class.rst:44 8a60922fc34041dfa5e21d0caa6fac06 +msgid "Repeat previous \"find\" with the same criteria." +msgstr "前回の「find」と同じ条件で繰り返します。" + +#: ../../xml-class.rst:45 e198f0bc22b944d48e40dc5c38223e9b +msgid ":meth:`~.insert_after`" +msgstr "" + +#: ../../xml-class.rst:45 8f66fccc674d4dd8ab65c02ac8ba24fc +msgid "Insert an element after current node." +msgstr "現在のノードの後に要素を挿入します。" + +#: ../../xml-class.rst:46 9d6b713c56884ee1b369e276ec5102a7 +msgid ":meth:`~.insert_before`" +msgstr "" + +#: ../../xml-class.rst:46 ae54a30b8bcf44daa6da5370718a075b +msgid "Insert an element before current node." +msgstr "現在のノードの前に要素を挿入します。" + +#: ../../xml-class.rst:47 e9116568aee3452182bda7d5810abf1e +msgid ":meth:`~.remove`" +msgstr "" + +#: ../../xml-class.rst:47 64bec43c81d140e9bb69363bbdaa051c +msgid "Remove this node." +msgstr "このノードを削除します。" + +#: ../../xml-class.rst:48 1ac7caa0aab541d2b460ac445585c6c7 +msgid ":meth:`~.set_align`" +msgstr "" + +#: ../../xml-class.rst:48 ac6a6fbe13044fd787cd15a2670459ef +msgid "Set the alignment using a CSS style spec. Only works for block-level tags." +msgstr "CSS スタイル仕様を使用して配置を設定します。ブロックレベルのタグにのみ適用されます。" + +#: ../../xml-class.rst:49 2c4b7b6457d4484a92058c1de4270fe9 +msgid ":meth:`~.set_attribute`" +msgstr "" + +#: ../../xml-class.rst:49 ../../xml-class.rst:172 +#: 804a76ddd5074a59b8d94661b6941469 e0a54f2af7784bddb2c2e0baf1536f26 +msgid "Set an arbitrary key to some value (which may be empty)." +msgstr "任意のキーに値(空である可能性があります)を設定します。" + +#: ../../xml-class.rst:50 fa0b42cca1304379b0bcfebdc442dd15 +msgid ":meth:`~.set_bgcolor`" +msgstr "" + +#: ../../xml-class.rst:50 ../../xml-class.rst:199 +#: 63b3f2fe208b457ab9f31e92c5353ce8 bdaa46d0f4da43fca01a888db16337c7 +msgid "Set the background color. Only works for block-level tags." +msgstr "背景色を設定します。ブロックレベルのタグにのみ適用されます。" + +#: ../../xml-class.rst:51 ec6d2db5fec44f689ad6be9b934f0a82 +msgid ":meth:`~.set_bold`" +msgstr "" + +#: ../../xml-class.rst:51 ../../xml-class.rst:205 +#: 9e58087d6ecb42dab3894c28babdec5b bdc6a779999c4686ac0ffe57b93a8bd7 +msgid "Set bold on or off or to some string value." +msgstr "太字をオンまたはオフに設定するか、ある文字列の値に設定します。" + +#: ../../xml-class.rst:52 de15508d8d384d9da5d0b8cd2b1b780c +msgid ":meth:`~.set_color`" +msgstr "" + +#: ../../xml-class.rst:52 9189331756f9457b9a92c047923b268d +msgid "Set text color." +msgstr "テキストの色を設定します。" + +#: ../../xml-class.rst:53 9ac5e040ad994771a04fa353c7930b81 +msgid ":meth:`~.set_columns`" +msgstr "" + +#: ../../xml-class.rst:53 5e49161f1d2d43c889352cd9ff8edfc8 +msgid "Set the number of columns. Argument may be any valid number or string." +msgstr "列数を設定します。引数は任意の有効な数値または文字列である必要があります。" + +#: ../../xml-class.rst:54 09f99796e853445c8dcf98822e1ac0f7 +msgid ":meth:`~.set_font`" +msgstr "" + +#: ../../xml-class.rst:54 cf00809415f04821b5afcfb274bc466e +msgid "Set the font-family, e.g. “sans-serif”." +msgstr "フォントファミリーを設定します。例: \"sans-serif\"。" + +#: ../../xml-class.rst:55 b6ea49c8641c4332b9398bef1a6359a3 +msgid ":meth:`~.set_fontsize`" +msgstr "" + +#: ../../xml-class.rst:55 9fe5840d1b4e4891aa77b55e25a5b1e2 +msgid "Set the font size. Either a float or a valid HTML/CSS string." +msgstr "フォントサイズを設定します。浮動小数点数または有効なHTML/CSS文字列のいずれかです。" + +#: ../../xml-class.rst:56 893eb791a848403381a5172bc82ba8a2 +msgid ":meth:`~.set_id`" +msgstr "" + +#: ../../xml-class.rst:56 f2aa328fbed64085b476f3b70fdf3881 +msgid "Set a :htmlTag:`id`. A check for uniqueness is performed." +msgstr ":htmlTag:`id` を設定します。一意性のチェックが実行されます。" + +#: ../../xml-class.rst:57 783861b6af6e44bc8733b76733ec5da5 +msgid ":meth:`~.set_italic`" +msgstr "" + +#: ../../xml-class.rst:57 0701120232e24ab7a942c75085ee4917 +msgid "Set italic on or off or to some string value." +msgstr "イタリック体をオンまたはオフ、または一部の文字列値に設定します。" + +#: ../../xml-class.rst:58 309c49d3870449faa199927e29c64b96 +msgid ":meth:`~.set_leading`" +msgstr "" + +#: ../../xml-class.rst:58 ../../xml-class.rst:249 +#: 040c50bebd594591ae1e863aa20683a6 dd67658b9d3349cc89ede4628205f1df +msgid "" +"Set inter-block text distance (`-mupdf-leading`), only works on block-" +"level nodes." +msgstr "インターブロックテキスト間の距離(`-mupdf-leading`)を設定します。ブロックレベルのノードでのみ機能します。" + +#: ../../xml-class.rst:59 2281f01a10cb44209c22340d5f31c661 +msgid ":meth:`~.set_lineheight`" +msgstr "" + +#: ../../xml-class.rst:59 5fd19f3c7efd41d6be0dd0d482afa714 +msgid "Set height of a line. Float like 1.5, which sets to `1.5 * fontsize`." +msgstr "行の高さを設定します。1.5のような浮動小数点数は、`1.5 * fontsize` に設定します。" + +#: ../../xml-class.rst:60 da10321480c547bd877d788cbbc17d19 +msgid ":meth:`~.set_margins`" +msgstr "" + +#: ../../xml-class.rst:60 562e9cf200d04d618d6ce2d9d0199a52 +msgid "Set the margin(s), float or string with up to 4 values." +msgstr "マージンを設定します。浮動小数点数または最大4つの値を持つ文字列です。" + +#: ../../xml-class.rst:61 7a9720be94704744b9ffc66d54860d47 +msgid ":meth:`~.set_pagebreak_after`" +msgstr "" + +#: ../../xml-class.rst:61 ../../xml-class.rst:267 +#: 2e26773bf354417d9a1e518e237918a0 b15db1fb04a94a9db2c92c13e5185080 +msgid "Insert a page break after this node." +msgstr "このノードの後に改ページを挿入します。" + +#: ../../xml-class.rst:62 6f6e3d153b3b4739bb09a561522007a6 +msgid ":meth:`~.set_pagebreak_before`" +msgstr "" + +#: ../../xml-class.rst:62 ../../xml-class.rst:271 +#: 7136cfd22a9d457a97a7e2513a45846f c2118e0dbcda4f5da4addfd12c35aa83 +msgid "Insert a page break before this node." +msgstr "このノードの前に改ページを挿入します。" + +#: ../../xml-class.rst:63 1bbac025080541ae80e078311ca65702 +msgid ":meth:`~.set_properties`" +msgstr "" + +#: ../../xml-class.rst:63 1b2e863de6b347dd9e0815f739335444 +msgid "Set any or all desired properties in one call." +msgstr "1つの呼び出しで任意またはすべての所望のプロパティを設定します。" + +#: ../../xml-class.rst:64 5b7ad8cc67534183884e12223b204bfb +msgid ":meth:`~.add_style`" +msgstr "" + +#: ../../xml-class.rst:64 b48568e9492448fea987302ce3977624 +msgid "Set (add) a “style” that is not supported by its own `set_` method." +msgstr "独自の `set_` メソッドでサポートされていない \"スタイル\" を設定(追加)します。" + +#: ../../xml-class.rst:65 eeab444cf1874576b113c84b0127b3e9 +msgid ":meth:`~.add_class`" +msgstr "" + +#: ../../xml-class.rst:65 c6c3f6a173aa4093ad84ff360356b54b +msgid "Set (add) a “class” attribute." +msgstr "\"クラス\" 属性を設定(追加)します。" + +#: ../../xml-class.rst:66 dacd474bcc8e488ab2843c5b8e98be3a +msgid ":meth:`~.set_text_indent`" +msgstr "" + +#: ../../xml-class.rst:66 2a9fa4287d5043b38c543cccd188d907 +msgid "" +"Set indentation for first textblock line. Only works for block-level " +"nodes." +msgstr "最初のテキストブロック行のインデントを設定します。ブロックレベルのノードでのみ機能します。" + +#: ../../xml-class.rst:67 42123ce0b6f24506853dac4a1af569fa +msgid ":attr:`~.tagname`" +msgstr "" + +#: ../../xml-class.rst:67 ../../xml-class.rst:370 +#: 36166f24a5d84c408d14f9f1a2dc2468 b8920a310a044b1987fdc6fed230bf95 +msgid "Either the HTML tag name like :htmlTag:`p` or `None` if a text node." +msgstr "HTMLタグ名(例: :htmlTag:`p`)またはテキストノードの場合は `None`。" + +#: ../../xml-class.rst:68 66c56dbdcc064ee483ccc6e18e6d9785 +msgid ":attr:`~.text`" +msgstr "" + +#: ../../xml-class.rst:68 ../../xml-class.rst:374 +#: 28401b8b52b2423a8c4aa49d5fc3591b e4b6e887c7244003973e168bacadf31e +msgid "Either the node's text or `None` if a tag node." +msgstr "ノードのテキストまたはテキストノードの場合は `None`。" + +#: ../../xml-class.rst:69 4a66c8e044cb475fa6fb2613a94f66e3 +msgid ":attr:`~.is_text`" +msgstr "" + +#: ../../xml-class.rst:69 ca0266179d7442349348788c2abfc7fa +msgid "Check if the node is a text." +msgstr "ノードがテキストかどうかを確認します。" + +#: ../../xml-class.rst:70 e5f96a9fc17b4e368cfce3ef779be88c +msgid ":attr:`~.first_child`" +msgstr "" + +#: ../../xml-class.rst:70 ../../xml-class.rst:382 +#: 2f21de8ea3a74f8e8afccd49d6b046bc 42e3ed28baa943f6a3b5b61eebe41952 +msgid "Contains the first node one level below this one (or `None`)." +msgstr "このノードの1レベル下の最初のノードを含みます(または `None`)。" + +#: ../../xml-class.rst:71 84b9f06f42b04e4782acf1d68068c5b4 +msgid ":attr:`~.last_child`" +msgstr "" + +#: ../../xml-class.rst:71 ../../xml-class.rst:386 +#: 2c94509d8b3b40b4b94c1944dca9f128 676901a092fc4735b224b379bb0c3f9f +msgid "Contains the last node one level below this one (or `None`)." +msgstr "このノードの1レベル下の最後のノードを含みます(または `None`)。" + +#: ../../xml-class.rst:72 3dd91ae4326f41568dfd754dde88c7a4 +msgid ":attr:`~.next`" +msgstr "" + +#: ../../xml-class.rst:72 ../../xml-class.rst:390 +#: cbabeaa3eefd40249b4dd37b701d59cc d36f34064bfa40a0a40827e73a241423 +msgid "The next node at the same level (or `None`)." +msgstr "同じレベルの次のノード(または `None`)。" + +#: ../../xml-class.rst:73 7a0463bd4ac7449dabbfab5a78bbbdd0 +msgid ":attr:`~.previous`" +msgstr "" + +#: ../../xml-class.rst:73 ../../xml-class.rst:394 +#: 21ccb8eec1544ac69539f5504e1c2572 3bc7b50ddc0d48a1b11a3a42ff6bf3ff +msgid "The previous node at the same level." +msgstr "同じレベルの前のノード。" + +#: ../../xml-class.rst:74 d61cc5bbd1f146c7972c2ebfc6200683 +msgid ":attr:`~.root`" +msgstr "" + +#: ../../xml-class.rst:74 ../../xml-class.rst:398 +#: a71d233f5e2e429eb0443f3879620f09 cd5e5bf55358464abc8f27b0b2c4c712 +msgid "The top node of the DOM, which hence has the tagname :htmlTag:`html`." +msgstr "DOMのトップノードで、したがって :htmlTag:`html` というタグ名を持っています。" + +#: ../../xml-class.rst:79 7b4df043605440d3bb688f84d119f814 +msgid "**Class API**" +msgstr "**クラスAPI**" + +#: ../../xml-class.rst:85 15f567e4f1754a959a432aa258e0d91d +msgid "" +"Add an :htmlTag:`ul` tag - bulleted list, context manager. See `ul " +"`_." +msgstr "" +":htmlTag:`ul` タグ(箇条書きリスト)を追加します。コンテキスト マネージャーです。詳細は `ul " +"`_ " +"を参照してください。" + +#: ../../xml-class.rst:89 3c04822b8860402a8b9573c54e5194e3 +msgid "" +"Add a :htmlTag:`pre` tag, context manager. See `pre " +"`_." +msgstr "" +":htmlTag:`pre` タグ(コードブロック)を追加します。コンテキスト マネージャーです。詳細は `pre " +"`_ " +"を参照してください。" + +#: ../../xml-class.rst:93 0ed1cc73e1274f0a9666e87961987c78 +msgid "" +"Add a :htmlTag:`dl` tag, context manager. See `dl " +"`_." +msgstr "" +":htmlTag:`dl` タグ(説明リスト)を追加します。コンテキスト マネージャーです。詳細は `dl " +"`_ " +"を参照してください。" + +#: ../../xml-class.rst:97 0b0a1eaefbc1441fbd61bce939f12692 +msgid "" +"Add a :htmlTag:`div` tag, context manager. See `div " +"`_." +msgstr "" +":htmlTag:`div` タグを追加します。コンテキスト マネージャーです。詳細は `div " +"`_ " +"を参照してください。" + +#: ../../xml-class.rst:101 8be2c66a14014d738a580d04d0c21248 +msgid "" +"Add a header tag (one of :htmlTag:`h1` to :htmlTag:`h6`), context " +"manager. See `headings `_." +msgstr "" +"ヘッダータグ(:htmlTag:`h1` から :htmlTag:`h6` のいずれか)を追加します。コンテキスト マネージャーです。詳細は " +"`見出し `_ を参照してください。" + +#: ../../xml-class.rst 06667ae7d009417a97c1ecf590b1a67f +#: 0aaa5b3f295942ebbbccb8d389cd898a 1aa126b038ed4ce781aecf808c57cdec +#: 1d2b1dade7d6423bb8e41f0972a59fb3 1d556e1aced24a5a8be36537a9f59e1b +#: 1e0cf1d1d2054403831bfb69810ee742 255c1340c66245b48f2a3cdf80e4214c +#: 2c0eea03634e486dabab3f54b424fad2 46aa4928a5604814a92adf164d7fa247 +#: 49455d15a82740859665ec07012100a3 61e61452d3f94579997370f901426bce +#: 67c4ccac4676478495f011032187051d 6e85e6b82e584e458e9f10f45aeaf176 +#: 9079b60e1ede4c158bdcc07baa224b19 a1450c78b0a74e7a9a133091ddb6e387 +#: a90aa7ae4f834d88b627c22105ee12ce cc4f042b90394b9eb95de870938d8c43 +#: d0e84706994a47c4aad671254c7a85e2 d1095b66b77e4a72a9793960df3fa674 +#: d23494315ac041c791e01f27f2af929e d96c7bfed45740ec810afcba0125a60a +#: dd18489673354a54a1e8b0d472e4d131 e1d5789d094643a8b45b8b3c59f908b1 +#: e6d0fce46e4a49ef89c56b78e0914a3f edfbbc1dbdd840519996cc595579ac32 +#: f5a2500942f844c3ba7e25042d206f66 f9e99192da7d40888802688752c7595e +msgid "Parameters" +msgstr "" + +#: ../../xml-class.rst:103 05de048c66ba4b459eb9539250326096 +msgid "a value 1 - 6." +msgstr "値(1 - 6)。" + +#: ../../xml-class.rst:107 2c297e02768c469799efc10e37822f0e +msgid "" +"Add a :htmlTag:`hr` tag. See `hr `_." +msgstr "" +":htmlTag:`hr` タグを追加します。詳細は `hr `_ を参照してください。" + +#: ../../xml-class.rst:111 b970bd7ba2b14e329f30ab7f38392367 +msgid "" +"Add an :htmlTag:`img` tag. This causes the inclusion of the named image " +"in the DOM." +msgstr ":htmlTag:`img` タグを追加します。これにより、指定された名前の画像が DOM に含まれます。" + +#: ../../xml-class.rst:113 9fae2f8598bf4a7b873ba77fe44e109d +msgid "" +"the filename of the image. This **must be the member name** of some entry" +" of the :ref:`Archive` parameter of the :ref:`Story` constructor." +msgstr "" +"画像のファイル名。これは :ref:`Story` コンストラクタの :ref:`Archive` パラメータのエントリの " +"**メンバー名である必要があります** 。" + +#: ../../xml-class.rst:114 f86a37a277f94897a82aefd1205cbb77 +msgid "" +"if provided, either an absolute (int) value, or a percentage string like " +"\"30%\". A percentage value refers to the width of the specified `where` " +"rectangle in :meth:`Story.place`. If this value is provided and `height` " +"is omitted, the image will be included keeping its aspect ratio." +msgstr "" +"提供された場合、絶対値(int)または「30%」などのパーセンテージ文字列。パーセンテージ値は :meth:`Story.place` " +"で指定された `where` の長方形の幅を指します。この値が提供され、`height` " +"が省略された場合、画像はアスペクト比を保持したまま含まれます。" + +#: ../../xml-class.rst:115 fb3d1d9e2800433b822fc09a4d7e5b38 +msgid "" +"if provided, either an absolute (int) value, or a percentage string like " +"\"30%\". A percentage value refers to the height of the specified `where`" +" rectangle in :meth:`Story.place`. If this value is provided and `width` " +"is omitted, the image's aspect ratio will be honored." +msgstr "" +"提供された場合、絶対値(int)または「30%」などのパーセンテージ文字列。パーセンテージ値は :meth:`Story.place` " +"で指定された `where` 長方形の高さを指します。この値が提供され、`width` が省略された場合、画像のアスペクト比が維持されます。" + +#: ../../xml-class.rst:119 7dc5d5cd96ba453683175419ccf0139d +msgid "Add an :htmlTag:`a` tag - inline element, treated like text." +msgstr ":htmlTag:`a` タグを追加します - インライン要素で、テキストのように扱われます。" + +#: ../../xml-class.rst:121 0e78457bb88a440b916bc48c1587500a +msgid "the URL target." +msgstr "URL の対象。" + +#: ../../xml-class.rst:122 29694751dcf94583ab50b52f45eac3c5 +msgid "the text to display. If omitted, the `href` text is shown instead." +msgstr "表示するテキスト。省略した場合、`href` のテキストが代わりに表示されます。" + +#: ../../xml-class.rst:126 b5e405fe3b834289ba24aee22e844046 +msgid "Add an :htmlTag:`ol` tag, context manager." +msgstr ":htmlTag:`ol` タグを追加します。コンテキストマネージャーです。" + +#: ../../xml-class.rst:130 76fa57288ce94e5f83f0c2179b72bfe6 +msgid "Add a :htmlTag:`p` tag, context manager." +msgstr ":htmlTag:`p` タグを追加します。コンテキストマネージャーです。" + +#: ../../xml-class.rst:134 ab34ee2cc1fd4256ad7020de606ebc05 +msgid "Add a :htmlTag:`span` tag, context manager. See `span`_" +msgstr ":htmlTag:`span` タグを追加します。コンテキストマネージャーです。span タグについては `span`_ を参照してください。" + +#: ../../xml-class.rst:138 07628e22868f42ffbcf7ddd9c0ac35d5 +msgid "" +"Add \"subscript\" text(:htmlTag:`sub` tag) - inline element, treated like" +" text." +msgstr "\"subscript\" テキスト(:htmlTag:`sub` タグ)を追加します。テキストと同様に扱われるインライン要素です。" + +#: ../../xml-class.rst:142 dd9903a2ecf4404a9d6accc1e0749f79 +msgid "" +"Add \"superscript\" text (:htmlTag:`sup` tag) - inline element, treated " +"like text." +msgstr "\"superscript\" テキスト(:htmlTag:`sup` タグ)を追加します。テキストと同様に扱われるインライン要素です。" + +#: ../../xml-class.rst:146 ed86a64559ee476a87e5777052bb75fe +msgid "" +"Add \"code\" text (:htmlTag:`code` tag) - inline element, treated like " +"text." +msgstr "\"code\" テキスト(:htmlTag:`code` タグ)を追加します。テキストと同様に扱われるインライン要素です。" + +#: ../../xml-class.rst:150 23a0e0f64af1407a8d78e8b1fc6eba45 +msgid "" +"Add \"variable\" text (:htmlTag:`var` tag) - inline element, treated like" +" text." +msgstr "\"variable\" テキスト(:htmlTag:`var` タグ)を追加します。テキストと同様に扱われるインライン要素です。" + +#: ../../xml-class.rst:154 16840c2eee994250a111a670c1982c07 +msgid "" +"Add \"sample output\" text (:htmlTag:`samp` tag) - inline element, " +"treated like text." +msgstr "\"sample output\" テキスト(:htmlTag:`samp` タグ)を追加します。テキストと同様に扱われるインライン要素です。" + +#: ../../xml-class.rst:158 c12b71425d2847eb8bde80f536c4c5ca +msgid "" +"Add \"keyboard input\" text (:htmlTag:`kbd` tag) - inline element, " +"treated like text." +msgstr "\"keyboard input\" テキスト(:htmlTag:`kbd` タグ)を追加します。テキストと同様に扱われるインライン要素です。" + +#: ../../xml-class.rst:166 a0c1e8813cc34b0b905e64b8843960a1 +msgid "Set the text alignment. Only works for block-level tags." +msgstr "テキストの配置を設定します。ブロックレベルのタグにのみ対応しています。" + +#: ../../xml-class.rst:168 5c1f833338ba45238483066cd6ee050f +msgid "" +"either one of the :ref:`TextAlign` or the `text-align " +"`_ values." +msgstr "" +":ref:`TextAlign` または `text-align `_ のいずれかの値です。" + +#: ../../xml-class.rst:174 ../../xml-class.rst:187 ../../xml-class.rst:195 +#: 2fe24dbf6c8e491da8d3a7aca96d7db7 a41e1f3d0e3646c4848ce0ef0df0f734 +#: e6ab290bc6c6427aa52cd824b5abcf95 +msgid "the name of the attribute." +msgstr "属性の名前。" + +#: ../../xml-class.rst:175 d389ba2ac4a34151a91a26102e2b6460 +msgid "the (optional) value of the attribute." +msgstr "(オプションの) 属性の値。" + +#: ../../xml-class.rst:179 07c3f60cb5574e45abb857f481b45d17 +msgid "Retrieve all attributes of the current nodes as a dictionary." +msgstr "現在のノードのすべての属性を辞書として取得します。" + +#: ../../xml-class.rst 17192c246086475781cbc2a532fe77f7 +#: 316b287ac38d47998654b66e17120d3c 33b69c8e0c214f7e94b9e785d1d5acb0 +#: 397c2d06aeac40b0a74e6504b51ad4da e796210aabb44877bdc47c5faba1888b +#: f45ca78bcbb745fd8bf01620b26911ca f470a993feb34859b12a4343388d8e3d +msgid "Returns" +msgstr "戻り値:" + +#: ../../xml-class.rst:181 9a860899e73444138923297266a09e92 +msgid "a dictionary with the attributes and their values of the node." +msgstr "ノードの属性とその値を含む辞書。" + +#: ../../xml-class.rst:185 367b3d6b05fe484c8692a5b9fdf0888d +msgid "Get the attribute value of `key`." +msgstr "`key` ーの属性値を取得します。" + +#: ../../xml-class.rst:189 0c606e3db4aa43e4b82e63059d8b9056 +msgid "a string with the value of `key`." +msgstr "`key` ーの値を含む文字列。" + +#: ../../xml-class.rst:193 7ab7a2c9a37a49528c76d65040e92e55 +msgid "Remove the attribute `key` from the node." +msgstr "ノードから属性 `key` を削除します。" + +#: ../../xml-class.rst:201 4a286ef8c13b4a818402696f63abbbda +msgid "" +"either an RGB value like (255, 0, 0) (for \"red\") or a valid " +"`background-color `_ value." +msgstr "" +"RGB値(例:(255, 0, 0)、\"red\"の場合)または有効な `background-color " +"`_ 値。" + +#: ../../xml-class.rst:207 0c4471f9c34f45e3b376695ecd25ae47 +msgid "" +"`True`, `False` or a valid `font-weight `_ value." +msgstr "" +"`True`、`False`、または有効な `font-weight `_ 値。" + +#: ../../xml-class.rst:211 377d0e10d7b548c88d42311255d9ff02 +msgid "Set the color of the text following." +msgstr "後続するテキストの色を設定します。" + +#: ../../xml-class.rst:213 0596f8f1cc684e51ac8e6a2e58d3f646 +msgid "" +"either an RGB value like (255, 0, 0) (for \"red\") or a valid `color " +"`_ value." +msgstr "" +"RGB値(例:(255, 0, 0)、\"red\"の場合)または有効な `カラー `_ 値。" + +#: ../../xml-class.rst:217 9de3ba47655e45ea86e95f7806cc1761 +msgid "Set the number of columns." +msgstr "列の数を設定します。" + +#: ../../xml-class.rst:219 d767dfd34bba48c2bd824015aedbfce5 +msgid "" +"a valid `columns `_ value." +msgstr "" +"有効な `columns `_" +" 値。" + +#: ../../xml-class.rst:221 c5ee8d67ed604a779b356ad7a3beb3b6 +msgid "Currently ignored - supported in a future MuPDF version." +msgstr "現在は無視されており、将来のMuPDFバージョンでサポートされます。" + +#: ../../xml-class.rst:225 a3e16b087cf149fbbf6f316ad7d79880 +msgid "Set the font-family." +msgstr "フォントファミリーを設定します。" + +#: ../../xml-class.rst:227 56592161ded1480aa5c21631ec00e5ac +msgid "e.g. \"sans-serif\"." +msgstr "例:\"sans-serif\"など。" + +#: ../../xml-class.rst:231 e716de072f9544618374c016b382f40a +msgid "Set the font size for text following." +msgstr "後続するテキストのフォントサイズを設定します。" + +#: ../../xml-class.rst:233 4699b76ad7694bccbd943643ee021862 +msgid "" +"a float or a valid `font-size `_ value." +msgstr "" +"floatまたは有効な `font-size `_ 値。" + +#: ../../xml-class.rst:237 8cfd620b051940348ac0556e9ae65033 +msgid "" +"Set a :htmlTag:`id`. This serves as a unique identification of the node " +"within the DOM. Use it to easily locate the node to inspect or modify it." +" A check for uniqueness is performed." +msgstr "" +":htmlTag:`id` " +"を設定します。これはDOM内でノードを一意に識別するためのもので、ノードを簡単に見つけて調査または変更するために使用します。一意性のチェックが実行されます。" + +#: ../../xml-class.rst:239 b6042d6da5b14d338da93ff2ac9afdc3 +msgid "id string of the node." +msgstr "ノードのID文字列。" + +#: ../../xml-class.rst:243 83dcfa78795e4c169619f1b4203a95f7 +msgid "Set italic on or off or to some string value for the text following it." +msgstr "後続するテキストのイタリックをオンまたはオフ、または一部の有効なフォントスタイル値に設定します。" + +#: ../../xml-class.rst:245 e74b3e4a56714306840b8bd5cdcfb8b6 +msgid "" +"`True`, `False` or some valid `font-style `_ value." +msgstr "" +"`True`、`False`、または有効な `font-style `_ 値。" + +#: ../../xml-class.rst:251 b9a0bdede6124f1fbbde6722313e5c2c +msgid "the distance in points to the previous block." +msgstr "前のブロックからの距離(ポイント単位)。" + +#: ../../xml-class.rst:255 f13883ee98f146ca9a924b575b3cc51a +msgid "Set height of a line." +msgstr "行の高さを設定します。" + +#: ../../xml-class.rst:257 74dc61a744d94742a7eb18faf8251006 +msgid "" +"a float like 1.5 (which sets to `1.5 * fontsize`), or some valid `line-" +"height `_ " +"value." +msgstr "value – 1.5のような浮動小数点数(これは `1.5 * fontsize`に設定されます)または有効な行の高さの値。" + +#: ../../xml-class.rst:261 f4aeb6cd082e4b039f2ef1c241cffc4a +msgid "Set the margin(s)." +msgstr "マージン(マージン)を設定します。" + +#: ../../xml-class.rst:263 754876b7c7b84370b81e5f3f15bbae21 +msgid "" +"float or string with up to 4 values. See `CSS documentation " +"`_." +msgstr "" +"floatまたは最大4つの値を含む文字列。`CSSのドキュメンテーション `_ を参照してください。" + +#: ../../xml-class.rst:275 f2da4761cba1404d8c850cf58d8ba28c +msgid "" +"Set any or all desired properties in one call. The meaning of argument " +"values equal the values of the corresponding `set_` methods." +msgstr "一度の呼び出しで任意またはすべての所望のプロパティを設定します。引数の値の意味は、対応する `set_` メソッドの値と同じです。" + +#: ../../xml-class.rst:277 c1f136b0c23e4147aef9acbc054decd4 +msgid "" +"The properties set by this method are directly attached to the node, " +"whereas every `set_` method generates a new :htmlTag:`span` below the " +"current node that has the respective property. So to e.g. \"globally\" " +"set some property for the :htmlTag:`body`, this method must be used." +msgstr "" +"このメソッドで設定されるプロパティは、直接ノードにアタッチされますが、`set_` メソッドごとに現在のノードの下に対応するプロパティを持つ新しい" +" :htmlTag:`span` " +"が生成されます。したがって、本文のいくつかのプロパティを「グローバルに」設定するには、このメソッドを使用する必要があります。" + +#: ../../xml-class.rst:281 0b00e9cdb0ed42819aad271da3e10621 +msgid "Set (add) some style attribute not supported by its own `set_` method." +msgstr "独自の `set_` メソッドでサポートされていないスタイル属性を設定(追加)します。" + +#: ../../xml-class.rst:283 457e6d62d41841ee8741782c6bfd201f +msgid "any valid CSS style value." +msgstr "有効なCSSスタイル値。" + +#: ../../xml-class.rst:287 996f4ece67a841e59d001f1d260f6571 +msgid "Set (add) some \"class\" attribute." +msgstr "いくつかの「class」属性を設定(追加)します。" + +#: ../../xml-class.rst:289 0cd9ff6d9c4640ab90612ca481fb1692 +msgid "" +"the name of the class. Must have been defined in either the HTML or the " +"CSS source of the DOM." +msgstr "クラスの名前。DOMのHTMLまたはCSSソースで定義されている必要があります。" + +#: ../../xml-class.rst:293 d40d57c4852c46539d9bdf515833a465 +msgid "" +"Set indentation for the first textblock line. Only works for block-level " +"nodes." +msgstr "最初のテキストブロックの行のインデントを設定します。ブロックレベルのノードでのみ機能します。" + +#: ../../xml-class.rst:295 0d6bee0b55f24569a45c44d5e814b140 +msgid "" +"a valid `text-indent `_ value. Please note that negative values do not work." +msgstr "" +"有効な `text-indent `_ 値。負の値は機能しないことに注意してください。" + +#: ../../xml-class.rst:300 762e7d8fde1d4ad3ba5ae1bf4fa34033 +msgid "" +"Append a child node. This is a low-level method used by other methods " +"like :meth:`Xml.add_paragraph`." +msgstr "子ノードを追加します。これは、:meth:`Xml.add_paragraph` などの他のメソッドで使用される低レベルのメソッドです。" + +#: ../../xml-class.rst:302 8fe5eebdd2b1421fb687a21dfcea9fae +msgid "the :ref:`Xml` node to append." +msgstr "追加する :ref:`Xml` ノード。" + +#: ../../xml-class.rst:308 bb3c5252dd3542cc8f263ab35551dedd +msgid "the text to append." +msgstr "追加するテキスト。" + +#: ../../xml-class.rst 022bf50af61d4a8a854203e9ba18a1ce +#: 70b34bf9aef34db599ca63934a722c36 87a8f73cb5e84da286de4cacee92e249 +#: c5fc1a77039842b2a10f66a29dbdb38e +msgid "Return type" +msgstr "" + +#: ../../xml-class.rst:310 ../../xml-class.rst:319 +#: 14d3d45c8f9f4c94b76c6b34f9156283 e219f8801a92456dacd23c1bab7c5fa4 +msgid ":ref:`Xml`" +msgstr "" + +#: ../../xml-class.rst:311 0ed716b5c85d4de4a0457c85b58e194f +msgid "the created element." +msgstr "作成された要素。" + +#: ../../xml-class.rst:315 d82360adfca04cbeac23160c0a0c3afa +msgid "" +"Create a new node with a given tag. This a low-level method used by other" +" methods like :meth:`Xml.add_paragraph`." +msgstr "" +"指定されたタグで新しいノードを作成します。これは、:meth:`Xml.add_paragraph` " +"などの他のメソッドで使用される低レベルなメソッドです。" + +#: ../../xml-class.rst:317 9d7b1665fb704f00a0416ea1627c3c3a +msgid "the element tag." +msgstr "要素のタグ。" + +#: ../../xml-class.rst:320 ea5c614b041341e1be82c8015fbed2ef +msgid "" +"the created element. To actually bind it to the DOM, use " +":meth:`Xml.append_child`." +msgstr "作成された要素。実際にDOMにバインドするには、:meth:`Xml.append_child` を使用します。" + +#: ../../xml-class.rst:324 20a9a38c30524aa7bc65ec6cbe680c47 +msgid "Insert the given element `elem` before this node." +msgstr "指定された要素 `elem` をこのノードの前に挿入します。" + +#: ../../xml-class.rst:326 ../../xml-class.rst:332 +#: 44a6c046ddfd4baa968b358f7db78b0a 848c5fe3c4bb49ffbb4f55cf1fcd0d94 +msgid "some :ref:`Xml` element." +msgstr "いくつかの :ref:`Xml` 要素。" + +#: ../../xml-class.rst:330 31950c02ed55490db0883f1568fc5528 +msgid "Insert the given element `elem` after this node." +msgstr "指定された要素 `elem` をこのノードの後に挿入します。" + +#: ../../xml-class.rst:336 9b8dcc0c94654a25a10773d34514557e +msgid "" +"Make a copy of this node, which then may be appended (using " +":meth:`Xml.append_child`) or inserted (using one of " +":meth:`Xml.insert_before`, :meth:`Xml.insert_after`) in this DOM." +msgstr "" +"このノードのコピーを作成し、それをこのDOMに追加(:meth:`Xml.append_child` " +"を使用)または挿入(:meth:`Xml.insert_before` 、:meth:`Xml.insert_after` " +"のいずれかを使用)できるようにします。" + +#: ../../xml-class.rst:338 4f1ca41be752476f906e73d07f12fa64 +msgid "the clone (:ref:`Xml`) of the current node." +msgstr "現在のノードのクローン(:ref:`Xml`)。" + +#: ../../xml-class.rst:342 0cbed49f156d4211b12cb5bbe7060543 +msgid "Remove this node from the DOM." +msgstr "このノードをDOMから削除します。" + +#: ../../xml-class.rst:347 ef3d6f9aa85c47ca9abe630e29080e81 +msgid "For debugging purposes, print this node's structure in a simplified form." +msgstr "デバッグ目的で、このノードの構造を簡略化された形式で表示します。" + +#: ../../xml-class.rst:351 b77052465aff4e6c8134c42867815823 +msgid "" +"Under the current node, find the first node with the given `tag`, " +"attribute `att` and value `match`." +msgstr "現在のノードの下で、指定された `tag` 、属性 `att`、値 `match` を持つ最初のノードを検索します。" + +#: ../../xml-class.rst:353 c3b5cf81b4a8466aaf9a51d9aa91aed8 +msgid "restrict search to this tag. May be `None` for unrestricted searches." +msgstr "このタグに制限して検索します。無制限の検索の場合は `None` にできます。" + +#: ../../xml-class.rst:354 b3e60cf5c1694489b76c6b061502d815 +msgid "check this attribute. May be `None`." +msgstr "この属性を確認します。`None` にできます。" + +#: ../../xml-class.rst:355 445027aad8f349d5870b74bc71484e02 +msgid "the desired attribute value to match. May be `None`." +msgstr "致する属性値を指定します。`None` にできます。" + +#: ../../xml-class.rst:357 ../../xml-class.rst:364 +#: 458a0f17635a4ee3beaeda11ffc6cf06 d2fbb5102700450a865b3f902ceac384 +msgid ":ref:`Xml`." +msgstr "" + +#: ../../xml-class.rst:358 b5de014d478e4703b88e1c98ad4dd9dd +msgid "`None` if nothing found, otherwise the first matching node." +msgstr "何も見つからない場合は `None`、それ以外の場合は最初の一致するノード。" + +#: ../../xml-class.rst:362 bfac5489d037476889d72ff8311df153 +#, fuzzy +msgid "" +"Continue a previous :meth:`Xml.find` (or :meth:`find_next`) with the same" +" values." +msgstr "以前の :meth:`Xml.find`(または :meth:`find_next` )を同じ値で継続します。" + +#: ../../xml-class.rst:365 6e37f5e14b8849b1abe7bf97c7015bc2 +msgid "`None` if none more found, otherwise the next matching node." +msgstr "それ以上見つからない場合は `None`、それ以外の場合は次の一致するノード。" + +#: ../../xml-class.rst:378 b1a7309c8f0f4aebb4c722ef9ac56d8f +msgid "Check if a text node." +msgstr "テキストノードであるかどうかを確認します。" + +#: ../../xml-class.rst:402 277f48d6971041c6949eb351109c59a6 +msgid "Setting Text properties" +msgstr "テキストのプロパティを設定する" + +#: ../../xml-class.rst:404 e4aa8bc7dde5406984bba698ad774b6a +msgid "" +"In HTML tags can be nested such that innermost text **inherits " +"properties** from the tag enveloping its parent tag. For example " +"`

some bold textthis is bold and italicregular text

`." +msgstr "" +"HTMLでは、タグはネストされることがあり、最も内側のテキストは親タグに囲まれたタグから **プロパティを継承します** " +"。たとえば、`

some bold textthis is bold and italicregular " +"text

`。" + +#: ../../xml-class.rst:406 231ae74dcdca47a1af2c04684d880e45 +msgid "" +"To achieve the same effect, methods like :meth:`Xml.set_bold` and " +":meth:`Xml.set_italic` each open a temporary :htmlTag:`span` with the " +"desired property underneath the current node." +msgstr "" +"同じ効果を得るために、:meth:`Xml.set_bold` や :meth:`Xml.set_italic` " +"などのメソッドは、各々所望のプロパティを持つ一時的な :htmlTag:`span` を現在のノードの下に追加します。" + +#: ../../xml-class.rst:408 703c755313884c3e8e069b836f756d47 +msgid "" +"In addition, these methods return there parent node, so they can be " +"concatenated with each other." +msgstr "さらに、これらのメソッドは親ノードを返すため、それらを連結することができます。" + +#: ../../xml-class.rst:413 1317fdaa53304e249c7283355ae5ebb4 +msgid "Context Manager support" +msgstr "コンテキストマネージャのサポート" + +#: ../../xml-class.rst:414 66b960c3bdd444e0a671fd3e9b81e5a1 +msgid "The standard way to add nodes to a DOM is this::" +msgstr "DOMにノードを追加する標準的な方法は次のとおりです::" + +#: ../../xml-class.rst:427 945235d10eaf46368506481d321761b5 +msgid "" +"Methods that are flagged as \"context managers\" can conveniently be used" +" in this way::" +msgstr "コンテキストマネージャとしてフラグが付いているメソッドは、次のように便利に使用できます::" + +#: ../../footer.rst:60 1b8fa4a6d5c14ec9becef06082bce778 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/locales/ja/LC_MESSAGES/znames.mo b/docs/locales/ja/LC_MESSAGES/znames.mo new file mode 100644 index 000000000..c96b910b3 Binary files /dev/null and b/docs/locales/ja/LC_MESSAGES/znames.mo differ diff --git a/docs/locales/ja/LC_MESSAGES/znames.po b/docs/locales/ja/LC_MESSAGES/znames.po new file mode 100644 index 000000000..efdd351ab --- /dev/null +++ b/docs/locales/ja/LC_MESSAGES/znames.po @@ -0,0 +1,789 @@ +# PyMuPDF Japanese documentation +# Copyright (C) 2015-2023, Artifex +# This file is distributed under the same license as the PyMuPDF package. +# Jamie Lemon , 2023. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PyMuPDF 1.23.0rc1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2025-04-17 15:00+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: Suzan Sanver \n" +"Language: ja\n" +"Language-Team: ja \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.10.3\n" + +#: ../../header.rst:-1 0f9441fa95204b209542b6195fa33e94 +msgid "Artifex" +msgstr "" + +#: ../../header.rst:-1 b494c998c5e441c78282bc408dffff8c +msgid "" +"PyMuPDF is a high-performance Python library for data extraction, " +"analysis, conversion & manipulation of PDF (and other) documents." +msgstr "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" + +#: ../../header.rst:-1 6067a6dd9e5242e4b0ce1d3b3ce3b293 +msgid "" +"PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables, " +"PDF Splitting, PDF Creation, Pyodide, PyScript" +msgstr "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" + +#: ../../znames.rst:7 873beb2a37544c07b1a5e70eca5650fe +msgid "Deprecated Names" +msgstr "非推奨の名前" + +#: ../../znames.rst:9 78ca6b2b094c43dab0f3efb7a1340a0f +msgid "" +"The original naming convention for methods and properties has been " +"\"camelCase\". Since its creation around 2013, a tremendous increase of " +"functionality has happened in PyMuPDF -- and with it a corresponding " +"increase in classes, methods and properties. In too many cases, this has " +"led to non-intuitive, illogical and ugly names, difficult to memorize or " +"guess." +msgstr "メソッドやプロパティの元々の命名規則は「camelCase」でした。その創造以来、PyMuPDFでの機能の著しい増加がありました。それに伴い、クラス、メソッド、プロパティも対応するように増加しました。多くの場合、これにより直感に反した、論理的でなく見栄えのしない名前が生まれ、覚えたり推測したりが難しくなりました。" + +#: ../../znames.rst:11 96fde94d6cb6460bb374c4c18ab3c5f2 +msgid "" +"A few versions ago, I therefore decided to shift gears and switch to a " +"\"snake_cased\" naming standard. This was a major effort, which needed a " +"step-wise approach. I think am done with it now (version 1.18.14)." +msgstr "数バージョン前、私はゆえに、段階的なアプローチが必要な大規模な取り組みであったにもかかわらず、「snake_cased」の命名規則に切り替えることを決定しました。これは大きな努力が必要でした。今はそれが完了したと思っています(バージョン1.18.14)。" + +#: ../../znames.rst:14 f1aba321b49d4a50b23fd84d5435c403 +msgid "" +"The following list maps deprecated names to their new versions. For " +"example, property `pageCount` became `page_count` in the :ref:`Document` " +"class. There also are less obvious name changes, e.g. method `getPNGdata`" +" was renamed to `tobytes` in the :ref:`Pixmap` class." +msgstr "" +"以下のリストは、非推奨の名前とそれらの新しいバージョンを対応付けたものです。例えば、プロパティである `pageCount` は、 " +":ref:`Document` クラス内では `page_count` " +"となりました。また、より明らかでない名前の変更もあります。例えば、メソッドである `getPNGdata` は :ref:`Pixmap` " +"クラス内で `tobytes` という名前に変更されました。" + +#: ../../znames.rst:16 da72cacf26b1418ebcdd799307695e7f +msgid "" +"Names of classes (camel case) and package-wide constants (the majority is" +" upper case) remain untouched." +msgstr "クラスの名前(キャメルケース)やパッケージ全体の定数(大部分は大文字)については、触れずに残ります。" + +#: ../../znames.rst:18 8caf5f400d46465da41396c978125459 +msgid "" +"Old names will remain available as deprecated aliases through MuPDF " +"version 1.19.0 and **be removed** in the version that follows it - " +"probably version 1.20.0, but this depends on upstream decisions (MuPDF)." +msgstr "" +"古い名前は、MuPDFバージョン1.19.0まで非推奨のエイリアスとして利用可能であり、その後のバージョン(おそらくバージョン1.20.0ですが、これは上流の決定(MuPDF)に依存します)で" +" **削除されます** 。" + +#: ../../znames.rst:20 196848a51a5e49d58753a2b5715c07b8 +msgid "" +"Starting with version 1.19.0, we will issue deprecation warnings on " +"`sys.stderr` like `Deprecation: 'newPage' removed from class 'Document' " +"after v1.19.0 - use 'new_page'.` when aliased methods are being used. " +"Using a deprecated property will not cause this type of warning." +msgstr "" +"バージョン1.19.0以降、エイリアス化されたメソッドが使用されている場合、 `sys.stderr` に非推奨の警告が表示されます。例 " +"`Deprecation: 'newPage'はクラス 'Document'からv1.19.0以降で削除されました - 'new_page'` " +"を使用してください。非推奨のプロパティを使用した場合、このタイプの警告は発生しません。" + +#: ../../znames.rst:22 fd326a4ad3c94bfb8d5f9569d40f630d +msgid "" +"Starting immediately, all deprecated objects (methods and properties) " +"will show a copy of the original's docstring, **prefixed** with the " +"deprecation message, for example::" +msgstr "" +"直ちに、すべての非推奨のオブジェクト(メソッドとプロパティ)は、元のドキュメント文字列のコピーが表示され、その前に非推奨のメッセージが " +"**付加されます** 。例:" + +#: ../../znames.rst:39 7ba1507fe4c94e149d0b1642bd1f770f +msgid "" +"There is a utility script `alias-changer.py `_ which can be used to " +"do mass-renames in your scripts. It accepts either a single file or a " +"folder as argument. If a folder is supplied, all its Python files and " +"those of its subfolders are changed. Optionally, backups of the scripts " +"can be taken." +msgstr "" +"スクリプト `alias-changer.py ` " +"があり、スクリプト内で大量の名前変更を行うために使用できます。単一のファイルまたはフォルダを引数として受け入れます。フォルダが指定された場合、そのフォルダ内のすべてのPythonファイルおよびそのサブフォルダのファイルが変更されます。必要に応じて、スクリプトのバックアップを取ることもできます。" + +#: ../../footer.rst:60 317892866ae648e99963bb6a51525df6 +msgid "This documentation covers all versions up to |version|." +msgstr "このドキュメントは |version| までのすべてのバージョンを対象としています。" + +#~ msgid "PyMuPDF は、PDF (およびその他の)ドキュメントのデータ抽出、分析、変換、操作のための高性能な Python ライブラリです。" +#~ msgstr "" + +#~ msgid "PDFテキスト抽出、PDFイメージ抽出、PDF変換、PDFテーブル、PDF分割、PDF作成, Pyodide, PyScript" +#~ msgstr "" + +#~ msgid "" +#~ "Deprecated names are not separately " +#~ "documented. The following list will help" +#~ " you find the documentation of the" +#~ " original." +#~ msgstr "非推奨の名前は別途文書化されていません。以下のリストは、元のドキュメントの検索に役立ちます。" + +#~ msgid "" +#~ "This is automatically generated. One or" +#~ " two items refer to yet undocumented" +#~ " methods - please simply ignore them." +#~ msgstr "これは自動的に生成されたものです。1または2のアイテムはまだ文書化されていないメソッドを参照していますが、無視してください。" + +#~ msgid ":index:`_isWrapped` -- :attr:`Page.is_wrapped`" +#~ msgstr "" + +#~ msgid ":index:`addCaretAnnot` -- :meth:`Page.add_caret_annot`" +#~ msgstr "" + +#~ msgid ":index:`addCircleAnnot` -- :meth:`Page.add_circle_annot`" +#~ msgstr "" + +#~ msgid ":index:`addFileAnnot` -- :meth:`Page.add_file_annot`" +#~ msgstr "" + +#~ msgid ":index:`addFreetextAnnot` -- :meth:`Page.add_freetext_annot`" +#~ msgstr "" + +#~ msgid ":index:`addHighlightAnnot` -- :meth:`Page.add_highlight_annot`" +#~ msgstr "" + +#~ msgid ":index:`addInkAnnot` -- :meth:`Page.add_ink_annot`" +#~ msgstr "" + +#~ msgid ":index:`addLineAnnot` -- :meth:`Page.add_line_annot`" +#~ msgstr "" + +#~ msgid ":index:`addPolygonAnnot` -- :meth:`Page.add_polygon_annot`" +#~ msgstr "" + +#~ msgid ":index:`addPolylineAnnot` -- :meth:`Page.add_polyline_annot`" +#~ msgstr "" + +#~ msgid ":index:`addRectAnnot` -- :meth:`Page.add_rect_annot`" +#~ msgstr "" + +#~ msgid ":index:`addRedactAnnot` -- :meth:`Page.add_redact_annot`" +#~ msgstr "" + +#~ msgid ":index:`addSquigglyAnnot` -- :meth:`Page.add_squiggly_annot`" +#~ msgstr "" + +#~ msgid ":index:`addStampAnnot` -- :meth:`Page.add_stamp_annot`" +#~ msgstr "" + +#~ msgid ":index:`addStrikeoutAnnot` -- :meth:`Page.add_strikeout_annot`" +#~ msgstr "" + +#~ msgid ":index:`addTextAnnot` -- :meth:`Page.add_text_annot`" +#~ msgstr "" + +#~ msgid ":index:`addUnderlineAnnot` -- :meth:`Page.add_underline_annot`" +#~ msgstr "" + +#~ msgid ":index:`addWidget` -- :meth:`Page.add_widget`" +#~ msgstr "" + +#~ msgid ":index:`chapterCount` -- :attr:`Document.chapter_count`" +#~ msgstr "" + +#~ msgid ":index:`chapterPageCount` -- :meth:`Document.chapter_page_count`" +#~ msgstr "" + +#~ msgid ":index:`cleanContents` -- :meth:`Page.clean_contents`" +#~ msgstr "" + +#~ msgid ":index:`clearWith` -- :meth:`Pixmap.clear_with`" +#~ msgstr "" + +#~ msgid ":index:`convertToPDF` -- :meth:`Document.convert_to_pdf`" +#~ msgstr "" + +#~ msgid ":index:`copyPage` -- :meth:`Document.copy_page`" +#~ msgstr "" + +#~ msgid ":index:`copyPixmap` -- :meth:`Pixmap.copy`" +#~ msgstr "" + +#~ msgid ":index:`CropBox` -- :attr:`Page.cropbox`" +#~ msgstr "" + +#~ msgid ":index:`CropBoxPosition` -- :attr:`Page.cropbox_position`" +#~ msgstr "" + +#~ msgid ":index:`deleteAnnot` -- :meth:`Page.delete_annot`" +#~ msgstr "" + +#~ msgid ":index:`deleteLink` -- :meth:`Page.delete_link`" +#~ msgstr "" + +#~ msgid ":index:`deletePage` -- :meth:`Document.delete_page`" +#~ msgstr "" + +#~ msgid ":index:`deletePageRange` -- :meth:`Document.delete_pages`" +#~ msgstr "" + +#~ msgid ":index:`deleteWidget` -- :meth:`Page.delete_widget`" +#~ msgstr "" + +#~ msgid ":index:`derotationMatrix` -- :attr:`Page.derotation_matrix`" +#~ msgstr "" + +#~ msgid ":index:`drawBezier` -- :meth:`Page.draw_bezier`" +#~ msgstr "" + +#~ msgid ":index:`drawBezier` -- :meth:`Shape.draw_bezier`" +#~ msgstr "" + +#~ msgid ":index:`drawCircle` -- :meth:`Page.draw_circle`" +#~ msgstr "" + +#~ msgid ":index:`drawCircle` -- :meth:`Shape.draw_circle`" +#~ msgstr "" + +#~ msgid ":index:`drawCurve` -- :meth:`Page.draw_curve`" +#~ msgstr "" + +#~ msgid ":index:`drawCurve` -- :meth:`Shape.draw_curve`" +#~ msgstr "" + +#~ msgid ":index:`drawLine` -- :meth:`Page.draw_line`" +#~ msgstr "" + +#~ msgid ":index:`drawLine` -- :meth:`Shape.draw_line`" +#~ msgstr "" + +#~ msgid ":index:`drawOval` -- :meth:`Page.draw_oval`" +#~ msgstr "" + +#~ msgid ":index:`drawOval` -- :meth:`Shape.draw_oval`" +#~ msgstr "" + +#~ msgid ":index:`drawPolyline` -- :meth:`Page.draw_polyline`" +#~ msgstr "" + +#~ msgid ":index:`drawPolyline` -- :meth:`Shape.draw_polyline`" +#~ msgstr "" + +#~ msgid ":index:`drawQuad` -- :meth:`Page.draw_quad`" +#~ msgstr "" + +#~ msgid ":index:`drawQuad` -- :meth:`Shape.draw_quad`" +#~ msgstr "" + +#~ msgid ":index:`drawRect` -- :meth:`Page.draw_rect`" +#~ msgstr "" + +#~ msgid ":index:`drawRect` -- :meth:`Shape.draw_rect`" +#~ msgstr "" + +#~ msgid ":index:`drawSector` -- :meth:`Page.draw_sector`" +#~ msgstr "" + +#~ msgid ":index:`drawSector` -- :meth:`Shape.draw_sector`" +#~ msgstr "" + +#~ msgid ":index:`drawSquiggle` -- :meth:`Page.draw_squiggle`" +#~ msgstr "" + +#~ msgid ":index:`drawSquiggle` -- :meth:`Shape.draw_squiggle`" +#~ msgstr "" + +#~ msgid ":index:`drawZigzag` -- :meth:`Page.draw_zigzag`" +#~ msgstr "" + +#~ msgid ":index:`drawZigzag` -- :meth:`Shape.draw_zigzag`" +#~ msgstr "" + +#~ msgid ":index:`embeddedFileAdd` -- :meth:`Document.embfile_add`" +#~ msgstr "" + +#~ msgid ":index:`embeddedFileCount` -- :meth:`Document.embfile_count`" +#~ msgstr "" + +#~ msgid ":index:`embeddedFileDel` -- :meth:`Document.embfile_del`" +#~ msgstr "" + +#~ msgid ":index:`embeddedFileGet` -- :meth:`Document.embfile_get`" +#~ msgstr "" + +#~ msgid ":index:`embeddedFileInfo` -- :meth:`Document.embfile_info`" +#~ msgstr "" + +#~ msgid ":index:`embeddedFileNames` -- :meth:`Document.embfile_names`" +#~ msgstr "" + +#~ msgid ":index:`embeddedFileUpd` -- :meth:`Document.embfile_upd`" +#~ msgstr "" + +#~ msgid ":index:`extractFont` -- :meth:`Document.extract_font`" +#~ msgstr "" + +#~ msgid ":index:`extractImage` -- :meth:`Document.extract_image`" +#~ msgstr "" + +#~ msgid ":index:`fileGet` -- :meth:`Annot.get_file`" +#~ msgstr "" + +#~ msgid ":index:`fileUpd` -- :meth:`Annot.update_file`" +#~ msgstr "" + +#~ msgid ":index:`fillTextbox` -- :meth:`TextWriter.fill_textbox`" +#~ msgstr "" + +#~ msgid ":index:`findBookmark` -- :meth:`Document.find_bookmark`" +#~ msgstr "" + +#~ msgid ":index:`firstAnnot` -- :attr:`Page.first_annot`" +#~ msgstr "" + +#~ msgid ":index:`firstLink` -- :attr:`Page.first_link`" +#~ msgstr "" + +#~ msgid ":index:`firstWidget` -- :attr:`Page.first_widget`" +#~ msgstr "" + +#~ msgid ":index:`fullcopyPage` -- :meth:`Document.fullcopy_page`" +#~ msgstr "" + +#~ msgid ":index:`gammaWith` -- :meth:`Pixmap.gamma_with`" +#~ msgstr "" + +#~ msgid ":index:`getArea` -- :meth:`Rect.get_area`" +#~ msgstr "" + +#~ msgid ":index:`getArea` -- :meth:`IRect.get_area`" +#~ msgstr "" + +#~ msgid ":index:`getCharWidths` -- :meth:`Document.get_char_widths`" +#~ msgstr "" + +#~ msgid ":index:`getContents` -- :meth:`Page.get_contents`" +#~ msgstr "" + +#~ msgid ":index:`getDisplayList` -- :meth:`Page.get_displaylist`" +#~ msgstr "" + +#~ msgid ":index:`getDrawings` -- :meth:`Page.get_drawings`" +#~ msgstr "" + +#~ msgid ":index:`getFontList` -- :meth:`Page.get_fonts`" +#~ msgstr "" + +#~ msgid ":index:`getImageBbox` -- :meth:`Page.get_image_bbox`" +#~ msgstr "" + +#~ msgid ":index:`getImageData` -- :meth:`Pixmap.tobytes`" +#~ msgstr "" + +#~ msgid ":index:`getImageList` -- :meth:`Page.get_images`" +#~ msgstr "" + +#~ msgid ":index:`getLinks` -- :meth:`Page.get_links`" +#~ msgstr "" + +#~ msgid ":index:`getOCGs` -- :meth:`Document.get_ocgs`" +#~ msgstr "" + +#~ msgid ":index:`getPageFontList` -- :meth:`Document.get_page_fonts`" +#~ msgstr "" + +#~ msgid ":index:`getPageImageList` -- :meth:`Document.get_page_images`" +#~ msgstr "" + +#~ msgid ":index:`getPagePixmap` -- :meth:`Document.get_page_pixmap`" +#~ msgstr "" + +#~ msgid ":index:`getPageText` -- :meth:`Document.get_page_text`" +#~ msgstr "" + +#~ msgid ":index:`getPageXObjectList` -- :meth:`Document.get_page_xobjects`" +#~ msgstr "" + +#~ msgid ":index:`getPDFnow` -- :meth:`get_pdf_now`" +#~ msgstr "" + +#~ msgid ":index:`getPDFstr` -- :meth:`get_pdf_str`" +#~ msgstr "" + +#~ msgid ":index:`getPixmap` -- :meth:`Page.get_pixmap`" +#~ msgstr "" + +#~ msgid ":index:`getPixmap` -- :meth:`Annot.get_pixmap`" +#~ msgstr "" + +#~ msgid ":index:`getPixmap` -- :meth:`DisplayList.get_pixmap`" +#~ msgstr "" + +#~ msgid ":index:`getPNGData` -- :meth:`Pixmap.tobytes`" +#~ msgstr "" + +#~ msgid ":index:`getPNGdata` -- :meth:`Pixmap.tobytes`" +#~ msgstr "" + +#~ msgid ":index:`getRectArea` -- :meth:`Rect.get_area`" +#~ msgstr "" + +#~ msgid ":index:`getRectArea` -- :meth:`IRect.get_area`" +#~ msgstr "" + +#~ msgid ":index:`getSigFlags` -- :meth:`Document.get_sigflags`" +#~ msgstr "" + +#~ msgid ":index:`getSVGimage` -- :meth:`Page.get_svg_image`" +#~ msgstr "" + +#~ msgid ":index:`getText` -- :meth:`Page.get_text`" +#~ msgstr "" + +#~ msgid ":index:`getText` -- :meth:`Annot.get_text`" +#~ msgstr "" + +#~ msgid ":index:`getTextBlocks` -- :meth:`Page.get_text_blocks`" +#~ msgstr "" + +#~ msgid ":index:`getTextbox` -- :meth:`Page.get_textbox`" +#~ msgstr "" + +#~ msgid ":index:`getTextbox` -- :meth:`Annot.get_textbox`" +#~ msgstr "" + +#~ msgid ":index:`getTextLength` -- :meth:`get_text_length`" +#~ msgstr "" + +#~ msgid ":index:`getTextPage` -- :meth:`Page.get_textpage`" +#~ msgstr "" + +#~ msgid ":index:`getTextPage` -- :meth:`Annot.get_textpage`" +#~ msgstr "" + +#~ msgid ":index:`getTextPage` -- :meth:`DisplayList.get_textpage`" +#~ msgstr "" + +#~ msgid ":index:`getTextWords` -- :meth:`Page.get_text_words`" +#~ msgstr "" + +#~ msgid ":index:`getToC` -- :meth:`Document.get_toc`" +#~ msgstr "" + +#~ msgid ":index:`getXmlMetadata` -- :meth:`Document.get_xml_metadata`" +#~ msgstr "" + +#~ msgid ":index:`ImageProperties` -- :meth:`image_properties`" +#~ msgstr "" + +#~ msgid ":index:`includePoint` -- :meth:`Rect.include_point`" +#~ msgstr "" + +#~ msgid ":index:`includePoint` -- :meth:`IRect.include_point`" +#~ msgstr "" + +#~ msgid ":index:`includeRect` -- :meth:`Rect.include_rect`" +#~ msgstr "" + +#~ msgid ":index:`includeRect` -- :meth:`IRect.include_rect`" +#~ msgstr "" + +#~ msgid ":index:`insertFont` -- :meth:`Page.insert_font`" +#~ msgstr "" + +#~ msgid ":index:`insertImage` -- :meth:`Page.insert_image`" +#~ msgstr "" + +#~ msgid ":index:`insertLink` -- :meth:`Page.insert_link`" +#~ msgstr "" + +#~ msgid ":index:`insertPage` -- :meth:`Document.insert_page`" +#~ msgstr "" + +#~ msgid ":index:`insertPDF` -- :meth:`Document.insert_pdf`" +#~ msgstr "" + +#~ msgid ":index:`insertText` -- :meth:`Page.insert_text`" +#~ msgstr "" + +#~ msgid ":index:`insertText` -- :meth:`Shape.insert_text`" +#~ msgstr "" + +#~ msgid ":index:`insertTextbox` -- :meth:`Page.insert_textbox`" +#~ msgstr "" + +#~ msgid ":index:`insertTextbox` -- :meth:`Shape.insert_textbox`" +#~ msgstr "" + +#~ msgid ":index:`invertIRect` -- :meth:`Pixmap.invert_irect`" +#~ msgstr "" + +#~ msgid ":index:`isConvex` -- :attr:`Quad.is_convex`" +#~ msgstr "" + +#~ msgid ":index:`isDirty` -- :attr:`Document.is_dirty`" +#~ msgstr "" + +#~ msgid ":index:`isEmpty` -- :attr:`Rect.is_empty`" +#~ msgstr "" + +#~ msgid ":index:`isEmpty` -- :attr:`IRect.is_empty`" +#~ msgstr "" + +#~ msgid ":index:`isEmpty` -- :attr:`Quad.is_empty`" +#~ msgstr "" + +#~ msgid ":index:`isFormPDF` -- :attr:`Document.is_form_pdf`" +#~ msgstr "" + +#~ msgid ":index:`isInfinite` -- :attr:`Rect.is_infinite`" +#~ msgstr "" + +#~ msgid ":index:`isInfinite` -- :attr:`IRect.is_infinite`" +#~ msgstr "" + +#~ msgid ":index:`isPDF` -- :attr:`Document.is_pdf`" +#~ msgstr "" + +#~ msgid ":index:`isRectangular` -- :attr:`Quad.is_rectangular`" +#~ msgstr "" + +#~ msgid ":index:`isRectilinear` -- :attr:`Matrix.is_rectilinear`" +#~ msgstr "" + +#~ msgid ":index:`isReflowable` -- :attr:`Document.is_reflowable`" +#~ msgstr "" + +#~ msgid ":index:`isRepaired` -- :attr:`Document.is_repaired`" +#~ msgstr "" + +#~ msgid ":index:`isStream` -- :meth:`Document.is_stream`" +#~ msgstr "" + +#~ msgid ":index:`lastLocation` -- :attr:`Document.last_location`" +#~ msgstr "" + +#~ msgid ":index:`lineEnds` -- :attr:`Annot.line_ends`" +#~ msgstr "" + +#~ msgid ":index:`loadAnnot` -- :meth:`Page.load_annot`" +#~ msgstr "" + +#~ msgid ":index:`loadLinks` -- :meth:`Page.load_links`" +#~ msgstr "" + +#~ msgid ":index:`loadPage` -- :meth:`Document.load_page`" +#~ msgstr "" + +#~ msgid ":index:`makeBookmark` -- :meth:`Document.make_bookmark`" +#~ msgstr "" + +#~ msgid ":index:`MediaBox` -- :attr:`Page.mediabox`" +#~ msgstr "" + +#~ msgid ":index:`MediaBoxSize` -- :attr:`Page.mediabox_size`" +#~ msgstr "" + +#~ msgid ":index:`metadataXML` -- :meth:`Document.xref_xml_metadata`" +#~ msgstr "" + +#~ msgid ":index:`movePage` -- :meth:`Document.move_page`" +#~ msgstr "" + +#~ msgid ":index:`needsPass` -- :attr:`Document.needs_pass`" +#~ msgstr "" + +#~ msgid ":index:`newPage` -- :meth:`Document.new_page`" +#~ msgstr "" + +#~ msgid ":index:`newShape` -- :meth:`Page.new_shape`" +#~ msgstr "" + +#~ msgid ":index:`nextLocation` -- :meth:`Document.next_location`" +#~ msgstr "" + +#~ msgid ":index:`pageCount` -- :attr:`Document.page_count`" +#~ msgstr "" + +#~ msgid ":index:`pageCropBox` -- :meth:`Document.page_cropbox`" +#~ msgstr "" + +#~ msgid ":index:`pageXref` -- :meth:`Document.page_xref`" +#~ msgstr "" + +#~ msgid ":index:`PaperRect` -- :meth:`paper_rect`" +#~ msgstr "" + +#~ msgid ":index:`PaperSize` -- :meth:`paper_size`" +#~ msgstr "" + +#~ msgid ":index:`paperSizes` -- :attr:`paper_sizes`" +#~ msgstr "" + +#~ msgid ":index:`PDFCatalog` -- :meth:`Document.pdf_catalog`" +#~ msgstr "" + +#~ msgid ":index:`PDFTrailer` -- :meth:`Document.pdf_trailer`" +#~ msgstr "" + +#~ msgid ":index:`pillowData` -- :meth:`Pixmap.pil_tobytes`" +#~ msgstr "" + +#~ msgid ":index:`pillowWrite` -- :meth:`Pixmap.pil_save`" +#~ msgstr "" + +#~ msgid ":index:`planishLine` -- :meth:`planish_line`" +#~ msgstr "" + +#~ msgid ":index:`preRotate` -- :meth:`Matrix.prerotate`" +#~ msgstr "" + +#~ msgid ":index:`preScale` -- :meth:`Matrix.prescale`" +#~ msgstr "" + +#~ msgid ":index:`preShear` -- :meth:`Matrix.preshear`" +#~ msgstr "" + +#~ msgid ":index:`preTranslate` -- :meth:`Matrix.pretranslate`" +#~ msgstr "" + +#~ msgid ":index:`previousLocation` -- :meth:`Document.prev_location`" +#~ msgstr "" + +#~ msgid ":index:`readContents` -- :meth:`Page.read_contents`" +#~ msgstr "" + +#~ msgid ":index:`resolveLink` -- :meth:`Document.resolve_link`" +#~ msgstr "" + +#~ msgid ":index:`rotationMatrix` -- :attr:`Page.rotation_matrix`" +#~ msgstr "" + +#~ msgid ":index:`searchFor` -- :meth:`Page.search_for`" +#~ msgstr "" + +#~ msgid ":index:`searchPageFor` -- :meth:`Document.search_page_for`" +#~ msgstr "" + +#~ msgid ":index:`setAlpha` -- :meth:`Pixmap.set_alpha`" +#~ msgstr "" + +#~ msgid ":index:`setBlendMode` -- :meth:`Annot.set_blendmode`" +#~ msgstr "" + +#~ msgid ":index:`setBorder` -- :meth:`Annot.set_border`" +#~ msgstr "" + +#~ msgid ":index:`setColors` -- :meth:`Annot.set_colors`" +#~ msgstr "" + +#~ msgid ":index:`setCropBox` -- :meth:`Page.set_cropbox`" +#~ msgstr "" + +#~ msgid ":index:`setFlags` -- :meth:`Annot.set_flags`" +#~ msgstr "" + +#~ msgid ":index:`setInfo` -- :meth:`Annot.set_info`" +#~ msgstr "" + +#~ msgid ":index:`setLanguage` -- :meth:`Document.set_language`" +#~ msgstr "" + +#~ msgid ":index:`setLineEnds` -- :meth:`Annot.set_line_ends`" +#~ msgstr "" + +#~ msgid ":index:`setMediaBox` -- :meth:`Page.set_mediabox`" +#~ msgstr "" + +#~ msgid ":index:`setMetadata` -- :meth:`Document.set_metadata`" +#~ msgstr "" + +#~ msgid ":index:`setName` -- :meth:`Annot.set_name`" +#~ msgstr "" + +#~ msgid ":index:`setOC` -- :meth:`Annot.set_oc`" +#~ msgstr "" + +#~ msgid ":index:`setOpacity` -- :meth:`Annot.set_opacity`" +#~ msgstr "" + +#~ msgid ":index:`setOrigin` -- :meth:`Pixmap.set_origin`" +#~ msgstr "" + +#~ msgid ":index:`setPixel` -- :meth:`Pixmap.set_pixel`" +#~ msgstr "" + +#~ msgid ":index:`setRect` -- :meth:`Annot.set_rect`" +#~ msgstr "" + +#~ msgid ":index:`setRect` -- :meth:`Pixmap.set_rect`" +#~ msgstr "" + +#~ msgid ":index:`setResolution` -- :meth:`Pixmap.set_dpi`" +#~ msgstr "" + +#~ msgid ":index:`setRotation` -- :meth:`Page.set_rotation`" +#~ msgstr "" + +#~ msgid ":index:`setToC` -- :meth:`Document.set_toc`" +#~ msgstr "" + +#~ msgid ":index:`setXmlMetadata` -- :meth:`Document.set_xml_metadata`" +#~ msgstr "" + +#~ msgid ":index:`showPDFpage` -- :meth:`Page.show_pdf_page`" +#~ msgstr "" + +#~ msgid ":index:`soundGet` -- :meth:`Annot.get_sound`" +#~ msgstr "" + +#~ msgid ":index:`tintWith` -- :meth:`Pixmap.tint_with`" +#~ msgstr "" + +#~ msgid ":index:`transformationMatrix` -- :attr:`Page.transformation_matrix`" +#~ msgstr "" + +#~ msgid ":index:`updateLink` -- :meth:`Page.update_link`" +#~ msgstr "" + +#~ msgid ":index:`updateObject` -- :meth:`Document.update_object`" +#~ msgstr "" + +#~ msgid ":index:`updateStream` -- :meth:`Document.update_stream`" +#~ msgstr "" + +#~ msgid ":index:`wrapContents` -- :meth:`Page.wrap_contents`" +#~ msgstr "" + +#~ msgid ":index:`writeImage` -- :meth:`Pixmap.save`" +#~ msgstr "" + +#~ msgid ":index:`writePNG` -- :meth:`Pixmap.save`" +#~ msgstr "" + +#~ msgid ":index:`writeText` -- :meth:`Page.write_text`" +#~ msgstr "" + +#~ msgid ":index:`writeText` -- :meth:`TextWriter.write_text`" +#~ msgstr "" + +#~ msgid ":index:`xrefLength` -- :meth:`Document.xref_length`" +#~ msgstr "" + +#~ msgid ":index:`xrefObject` -- :meth:`Document.xref_object`" +#~ msgstr "" + +#~ msgid ":index:`xrefStream` -- :meth:`Document.xref_stream`" +#~ msgstr "" + +#~ msgid ":index:`xrefStreamRaw` -- :meth:`Document.xref_stream_raw`" +#~ msgstr "" + +#~ msgid "Discord logo" +#~ msgstr "" + diff --git a/docs/matrix.rst b/docs/matrix.rst index f07398d12..91c0bb1c7 100644 --- a/docs/matrix.rst +++ b/docs/matrix.rst @@ -9,7 +9,7 @@ Matrix Matrix is a row-major 3x3 matrix used by image transformations in MuPDF (which complies with the respective concepts laid down in the :ref:`AdobeManual`). With matrices you can manipulate the rendered image of a page in a variety of ways: (parts of) the page can be rotated, zoomed, flipped, sheared and shifted by setting some or all of just six float values. -Since all points or pixels live in a two-dimensional space, one column vector of that matrix is a constant unit vector, and only the remaining six elements are used for manipulations. These six elements are usually represented by *[a, b, c, d, e, f]*. Here is how they are positioned in the matrix: +Since all points or pixels live in a two-dimensional space, one column vector of that matrix is a constant unit vector, and only the remaining six elements are used for manipulations. These six elements are usually represented by `[a, b, c, d, e, f]`. Here is how they are positioned in the matrix: .. image:: images/img-matrix.* @@ -68,7 +68,7 @@ Please note: A "sequence" must be any Python sequence object with exactly 6 float entries (see :ref:`SequenceTypes`). - *fitz.Matrix(1, 1)*, *fitz.Matrix(0.0 and *fitz.Matrix(fitz.Identity)* create modifiable versions of the :ref:`Identity` matrix, which looks like *[1, 0, 0, 1, 0, 0]*. + *pymupdf.Matrix(1, 1)* and *pymupdf.Matrix(pymupdf.Identity)* create modifiable versions of the :ref:`Identity` matrix, which looks like *[1, 0, 0, 1, 0, 0]*. .. method:: norm() @@ -220,6 +220,34 @@ Here are examples that illustrate some of the achievable effects. All pictures s .. image:: images/img-matrix-7.* :scale: 66 +9. Show some effects on a rectangle:: + import pymupdf + + # just definitions and a temp PDF + RED = (1, 0, 0) + BLUE = (0, 0, 1) + GREEN = (0, 1, 0) + doc = pymupdf.open() + page = doc.new_page() + + # rectangle + r1 = pymupdf.Rect(100, 100, 200, 200) + + # scales down by 50% in x- and up by 50% in y-direction + mat1 = pymupdf.Matrix(0.5, 1.5) + + # shifts by 50 in both directions + mat2 = pymupdf.Matrix(1, 0, 0, 1, 50, 50) + + # draw corresponding rectangles + page.draw_rect(r1, color=RED) # original + page.draw_rect(r1 * mat1, color=GREEN) # scaled + page.draw_rect(r1 * mat2, color=BLUE) # shifted + doc.ez_save("matrix-effects.pdf") + + +.. image:: images/img-matrix-9.* + :scale: 66 .. include:: footer.rst diff --git a/docs/module.rst b/docs/module.rst index b14b60dd5..47b33d306 100644 --- a/docs/module.rst +++ b/docs/module.rst @@ -3,21 +3,28 @@ .. _Module: ============================ -Module *fitz* +Command line interface ============================ * New in version 1.16.8 -PyMuPDF can also be used in the command line as a **module** to perform utility functions. This feature should obsolete writing some of the most basic scripts. +PyMuPDF can also be used from the command line to perform utility functions. This feature should obsolete writing some of the most basic scripts. Admittedly, there is some functional overlap with the MuPDF CLI `mutool`. On the other hand, PDF embedded files are no longer supported by MuPDF, so PyMuPDF is offering something unique here. Invocation ----------- -Invoke the module like this:: +The command-line interface can be invoked in two ways. + +* Use the installed `pymupdf` command:: + + pymupdf + +* Or use Python's `-m` switch with PyMuPDF's `pymupdf` module:: + + python -m pymupdf - python -m fitz .. highlight:: python @@ -33,18 +40,18 @@ General remarks: * How to use the module inside your script:: - >>> from fitz.__main__ import main as fitz_command + >>> import pymupdf.__main__ >>> cmd = "clean input.pdf output.pdf -pages 1,N".split() # prepare command line >>> saved_parms = sys.argv[1:] # save original command line >>> sys.argv[1:] = cmd # store new command line - >>> fitz_command() # execute module + >>> pymupdf.__main__.() # execute module >>> sys.argv[1:] = saved_parms # restore original command line * Use the following 2-liner and compile it with `Nuitka `_ in standalone mode. This will give you a CLI executable with all the module's features, that can be used on all compatible platforms without Python, PyMuPDF or MuPDF being installed. :: - from fitz.__main__ import main + from pymupdf.__main__ import main main() @@ -55,8 +62,8 @@ Cleaning and Copying This command will optimize the PDF and store the result in a new file. You can use it also for encryption, decryption and creating sub documents. It is mostly similar to the MuPDF command line utility *"mutool clean"*:: - python -m fitz clean -h - usage: fitz clean [-h] [-password PASSWORD] + pymupdf clean -h + usage: pymupdf clean [-h] [-password PASSWORD] [-encryption {keep,none,rc4-40,rc4-128,aes-128,aes-256}] [-owner OWNER] [-user USER] [-garbage {0,1,2,3,4}] [-compress] [-ascii] [-linear] [-permission PERMISSION] @@ -95,8 +102,8 @@ Extracting Fonts and Images ---------------------------- Extract fonts or images from selected PDF pages to a desired directory:: - python -m fitz extract -h - usage: fitz extract [-h] [-images] [-fonts] [-output OUTPUT] [-password PASSWORD] + pymupdf extract -h + usage: pymupdf extract [-h] [-images] [-fonts] [-output OUTPUT] [-password PASSWORD] [-pages PAGES] input @@ -126,8 +133,8 @@ Joining PDF Documents ----------------------- To join several PDF files specify:: - python -m fitz join -h - usage: fitz join [-h] -output OUTPUT [input [input ...]] + pymupdf join -h + usage: pymupdf join [-h] -output OUTPUT [input [input ...]] ---------------------------- join PDF documents --------------------------- @@ -157,7 +164,7 @@ Example: To join the following files and store the result as **output.pdf** enter this command: -*python -m fitz join -o output.pdf file1.pdf,,N-1 file2.pdf,secret,N,1 file3.pdf,,5-N* +``pymupdf join -o output.pdf file1.pdf,,N-1 file2.pdf,secret,N,1 file3.pdf,,5-N`` Low Level Information @@ -165,8 +172,8 @@ Low Level Information Display PDF internal information. Again, there are similarities to *"mutool show"*:: - python -m fitz show -h - usage: fitz show [-h] [-password PASSWORD] [-catalog] [-trailer] [-metadata] + pymupdf show -h + usage: pymupdf show [-h] [-password PASSWORD] [-catalog] [-trailer] [-metadata] [-xrefs XREFS] [-pages PAGES] input @@ -186,18 +193,18 @@ Display PDF internal information. Again, there are similarities to *"mutool show Examples:: - python -m fitz show x.pdf + pymupdf show x.pdf PDF is password protected - python -m fitz show x.pdf -pass hugo + pymupdf show x.pdf -pass hugo authentication unsuccessful - python -m fitz show x.pdf -pass jorjmckie + pymupdf show x.pdf -pass jorjmckie authenticated as owner file 'x.pdf', pages: 1, objects: 19, 58 MB, PDF 1.4, encryption: Standard V5 R6 256-bit AES Document contains 15 embedded files. - python -m fitz show FDA-1572_508_R6_FINAL.pdf -tr -m + pymupdf show FDA-1572_508_R6_FINAL.pdf -tr -m 'FDA-1572_508_R6_FINAL.pdf', pages: 2, objects: 1645, 1.4 MB, PDF 1.6, encryption: Standard V4 R4 128-bit AES document contains 740 root form fields and is signed @@ -242,8 +249,8 @@ Information Show the embedded file names (long or short format):: - python -m fitz embed-info -h - usage: fitz embed-info [-h] [-name NAME] [-detail] [-password PASSWORD] input + pymupdf embed-info -h + usage: pymupdf embed-info [-h] [-name NAME] [-detail] [-password PASSWORD] input --------------------------- list embedded files --------------------------- @@ -258,7 +265,7 @@ Show the embedded file names (long or short format):: Example:: - python -m fitz embed-info some.pdf + pymupdf embed-info some.pdf 'some.pdf' contains the following 15 embedded files. 20110813_180956_0002.jpg @@ -291,8 +298,8 @@ Extraction Extract an embedded file like this:: - python -m fitz embed-extract -h - usage: fitz embed-extract [-h] -name NAME [-password PASSWORD] [-output OUTPUT] + pymupdf embed-extract -h + usage: pymupdf embed-extract [-h] -name NAME [-password PASSWORD] [-output OUTPUT] input ---------------------- extract embedded file to disk ---------------------- @@ -308,15 +315,15 @@ Extract an embedded file like this:: For details consult :meth:`Document.embfile_get`. Example (refer to previous section):: - python -m fitz embed-extract some.pdf -name neue.datei + pymupdf embed-extract some.pdf -name neue.datei Saved entry 'neue.datei' as 'text-tester.pdf' Deletion ~~~~~~~~~~~~~~~~~~~~~~~~ Delete an embedded file like this:: - python -m fitz embed-del -h - usage: fitz embed-del [-h] [-password PASSWORD] [-output OUTPUT] -name NAME input + pymupdf embed-del -h + usage: pymupdf embed-del [-h] [-password PASSWORD] [-output OUTPUT] -name NAME input --------------------------- delete embedded file -------------------------- @@ -335,8 +342,8 @@ Insertion ~~~~~~~~~~~~~~~~~~~~~~~~ Add a new embedded file using this command:: - python -m fitz embed-add -h - usage: fitz embed-add [-h] [-password PASSWORD] [-output OUTPUT] -name NAME -path + pymupdf embed-add -h + usage: pymupdf embed-add [-h] [-password PASSWORD] [-output OUTPUT] -name NAME -path PATH [-desc DESC] input @@ -359,8 +366,8 @@ Updates ~~~~~~~~~~~~~~~~~~~~~~~ Update an existing embedded file using this command:: - python -m fitz embed-upd -h - usage: fitz embed-upd [-h] -name NAME [-password PASSWORD] [-output OUTPUT] + pymupdf embed-upd -h + usage: pymupdf embed-upd [-h] -name NAME [-password PASSWORD] [-output OUTPUT] [-path PATH] [-filename FILENAME] [-ufilename UFILENAME] [-desc DESC] input @@ -389,8 +396,8 @@ Copying ~~~~~~~~~~~~~~~~~~~~~~~ Copy embedded files between PDFs:: - python -m fitz embed-copy -h - usage: fitz embed-copy [-h] [-password PASSWORD] [-output OUTPUT] -source + pymupdf embed-copy -h + usage: pymupdf embed-copy [-h] [-password PASSWORD] [-output OUTPUT] -source SOURCE [-pwdsource PWDSOURCE] [-name [NAME [NAME ...]]] input @@ -418,7 +425,7 @@ Extract text from arbitrary :ref:`supported documents` to * **Simple** text extraction reproduces all text as it appears in the document pages -- no effort is made to rearrange in any particular reading order. * **Block sorting** sorts text blocks (as identified by MuPDF) by ascending vertical, then horizontal coordinates. This should be sufficient to establish a "natural" reading order for basic pages of text. -* **Layout** strives to reproduce the original appearance of the input pages. You can expect results like this (produced by the command `python -m fitz gettext -pages 1 demo1.pdf`): +* **Layout** strives to reproduce the original appearance of the input pages. You can expect results like this (produced by the command `pymupdf gettext -pages 1 demo1.pdf`): .. image:: images/img-layout-text.* :scale: 60 @@ -429,14 +436,14 @@ Extract text from arbitrary :ref:`supported documents` to After each page of the output file, a formfeed character, `hex(12)` is written -- even if the input page has no text at all. This behavior can be controlled via options. -.. note:: For "layout" mode, **only horizontal, left-to-right, top-to bottom** text is supported, other text is ignored. In this mode, text is also ignored, if its fontsize is too small. +.. note:: For "layout" mode, **only horizontal, left-to-right, top-to bottom** text is supported, other text is ignored. In this mode, text is also ignored, if its :data:`fontsize` is too small. "Simple" and "blocks" mode in contrast output **all text** for any text size or orientation. Command:: - python -m fitz gettext -h - usage: fitz gettext [-h] [-password PASSWORD] [-mode {simple,blocks,layout}] [-pages PAGES] [-noligatures] + pymupdf gettext -h + usage: pymupdf gettext [-h] [-password PASSWORD] [-mode {simple,blocks,layout}] [-pages PAGES] [-noligatures] [-convert-white] [-extra-spaces] [-noformfeed] [-skip-empty] [-output OUTPUT] [-grid GRID] [-fontsize FONTSIZE] input @@ -459,7 +466,7 @@ Command:: -skip-empty suppress pages with no text (default False) -output OUTPUT store text in this file (default inputfilename.txt) -grid GRID merge lines if closer than this (default 2) - -fontsize FONTSIZE only include text with a larger fontsize (default 3) + -fontsize FONTSIZE only include text with a larger :data:`fontsize` (default 3) .. note:: Command options may be abbreviated as long as no ambiguities are introduced. So the following do the same: @@ -472,10 +479,10 @@ Command:: * **noligatures:** (bool) corresponds to **not** :data:`TEXT_PRESERVE_LIGATURES`. If specified, ligatures (present in advanced fonts: glyphs combining multiple characters like "fi") are split up into their components (i.e. "f", "i"). Default is passing them through. * **convert-white:** corresponds to **not** :data:`TEXT_PRESERVE_WHITESPACE`. If specified, all white space characters (like tabs) are replaced with one or more spaces. Default is passing them through. * **extra-spaces:** (bool) corresponds to **not** :data:`TEXT_INHIBIT_SPACES`. If specified, large gaps between adjacent characters will be filled with one or more spaces. Default is off. -* **noformfeed:** (bool) instead of `hex(12)` (formfeed), write linebreaks `\n` at end of output pages. +* **noformfeed:** (bool) instead of `hex(12)` (formfeed), write linebreaks ``\n`` at end of output pages. * **skip-empty:** (bool) skip pages with no text. * **grid:** lines with a vertical coordinate difference of no more than this value (in points) will be merged into the same output line. Only relevant for "layout" mode. **Use with care:** 3 or the default 2 should be adequate in most cases. If **too large**, lines that are *intended* to be different in the original may be merged and will result in garbled and / or incomplete output. If **too low**, artifact separate output lines may be generated for some spans in the input line, just because they are coded in a different font with slightly deviating properties. -* **fontsize:** include text with fontsize larger than this value only (default 3). Only relevant for "layout" option. +* **fontsize:** include text with :data:`fontsize` larger than this value only (default 3). Only relevant for "layout" option. .. highlight:: python diff --git a/docs/outline.rst b/docs/outline.rst index 100dea47a..f37d6d331 100644 --- a/docs/outline.rst +++ b/docs/outline.rst @@ -6,7 +6,7 @@ Outline ================ -*outline* (or "bookmark"), is a property of *Document*. If not *None*, it stands for the first outline item of the document. Its properties in turn define the characteristics of this item and also point to other outline items in "horizontal" or downward direction. The full tree of all outline items for e.g. a conventional table of contents (TOC) can be recovered by following these "pointers". +The document outline (otherwise known as "bookmarks") is a property of :ref:`Document` (see :attr:`Document.outline`). If not ``None``, it stands for the first outline item of the document. Its properties in turn define the characteristics of this item and also point to other outline items in "horizontal" or downward direction. The full tree of all outline items for e.g. a conventional table of contents (TOC) can be recovered by following these "pointers". ============================ ================================================== **Method / Attribute** **Short Description** @@ -27,13 +27,13 @@ Outline .. attribute:: down - The next outline item on the next level down. Is *None* if the item has no kids. + The next outline item on the next level down. Is ``None`` if the item has no children. :type: :ref:`Outline` .. attribute:: next - The next outline item at the same level as this item. Is *None* if this is the last one in its level. + The next outline item at the same level as this item. Is ``None`` if this is the last one in its level. :type: `Outline` @@ -45,25 +45,40 @@ Outline .. attribute:: title - The item's title as a string or *None*. + The item's title as a string or ``None``. :type: str .. attribute:: is_open - Indicator showing whether any sub-outlines should be expanded (*True*) or be collapsed (*False*). This information is interpreted by PDF reader software. + Indicator showing whether any sub-outlines should be expanded (``True``) or be collapsed (``False``). This information is interpreted by PDF reader software. :type: bool .. attribute:: is_external - A bool specifying whether the target is outside (*True*) of the current document. + A bool specifying whether the target is outside (``True``) of the current document. :type: bool .. attribute:: uri - A string specifying the link target. The meaning of this property should be evaluated in conjunction with *isExternal*. The value may be *None*, in which case *isExternal == False*. If *uri* starts with *file://*, *mailto:*, or an internet resource name, *isExternal* is *True*. In all other cases *isExternal == False* and *uri* points to an internal location. In case of PDF documents, this should either be *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named location. The format varies for other document types, e.g. *uri = '../FixedDoc.fdoc#PG_21_LNK_84'* for page number 21 (1-based) in an XPS document. + A string specifying the link target. The meaning of this property should + be evaluated in conjunction with property `is_external`: + + * + `is_external` is true: ``uri`` points to some target outside the current + PDF, which may be an internet resource (``uri`` starts with ``http://`` or + similar), another file (``uri`` starts with ``file:`` or ``file://``) or some + other service like an e-mail address (``uri`` starts with ``mailto:``). + + * + `is_external` is false: ``uri`` will be `None` or point to an + internal location. In case of PDF documents, this should either be + *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named + location. The format varies for other document types, for example + "../FixedDoc.fdoc#PG_2_LNK_1" for page number 2 (1-based) in an XPS + document. :type: str diff --git a/docs/packaging.rst b/docs/packaging.rst new file mode 100644 index 000000000..4339f3f69 --- /dev/null +++ b/docs/packaging.rst @@ -0,0 +1,93 @@ +.. include:: header.rst + + +Packaging for Linux distributions +================================= + + +Requirements +------------ + +* Python +* MuPDF checkout (including submodules). +* PyMuPDF checkout. +* System packages listed in `scripts/sysinstall.py:g_sys_packages`. +* Python packages listed in `pyproject.toml`. + +Extra requirements for running tests: + +* Python packages listed in `scripts/gh_release.py:test_packages`. + + +General steps +------------- + +* Build and install MuPDF: + + * Install required system packages. + * Run `make install-shared-python` on MuPDF's `Makefile` with at least + these make variables: + + * `DESTDIR` set to the install directory, e.g. `/`. + * + `prefix` set to location relative to DESTDIR, such as `/usr/local` or + `/usr`. Must start with `/`. + * `USE_SYSTEM_LIBS=yes`. + * `HAVE_LEPTONICA=yes`. + * `HAVE_TESSERACT=yes`. + +* Build and install PyMuPDF: + + * + Run `pip install ./PyMuPDF` or `pip wheel ./PyMuPDF` with at least these + environment variables: + + * + `PYMUPDF_SETUP_MUPDF_BUILD=` (empty string) to prevent download and build + of hard-coded MuPDF release. + * + `CFLAGS`, `CXXFLAGS` and `LDFLAGS` set to allow visibility of the + installed MuPDF headers and shared libraries. + +* Run PyMuPDF tests: + + * Ensure required Python packages are available. + * + Run `pytest -k "not test_color_count and not test_3050" PyMuPDF` + + * Test `test_color_count` is known fail if MuPDF is not built with PyMuPDF's custom config.h. + * Test `test_3050` is known to fail if MuPDF is built without its own third-party libraries. + + +Use of scripts/sysinstall.py +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`scripts/sysinstall.py` provides a useful example of build, install and test +commands that are known to to work, because it is run regularly by Github +action `.github/workflows/test_sysinstall.yml`. + +* Run with `-h` or look at the doc-string to see detailed usage information. +* It uses Debian-style `apt` commands to install system packages. +* By default it assumes local git checkouts `mupdf/` and `PyMuPDF/`. + +To run a full build, install and test for both a local fake root and the system +root: + +.. code-block:: shell + + ./PyMuPDF/scripts/sysinstall.py + ./PyMuPDF/scripts/sysinstall.py --root / + +To see what commands would be run without actually running them: + +.. code-block:: shell + + ./PyMuPDF/scripts/sysinstall.py -m 0 -p 0 -t 0 + + +See also +-------- + +* + `setup.py`'s initial doc-comment has detailed information about the + environment variables used when building PyMuPDF. diff --git a/docs/page.rst b/docs/page.rst index 051d939cf..12704deba 100644 --- a/docs/page.rst +++ b/docs/page.rst @@ -12,6 +12,8 @@ There is a parent-child relationship between a document and its pages. If the do Several page methods have a :ref:`Document` counterpart for convenience. At the end of this chapter you will find a synopsis. +.. note:: Many times in this chapter we are using the term **coordinate**. It is of high importance to have at least a basic understanding of what that is and that you feel comfortable with the section :ref:`Coordinates`. + Modifying Pages --------------- Changing page properties and adding or changing page content is available for PDF documents only. @@ -24,7 +26,7 @@ In a nutshell, this is what you can do with PyMuPDF: .. note:: - Methods require coordinates (points, rectangles) to put content in desired places. Please be aware that since v1.17.0 these coordinates **must always** be provided relative to the **unrotated** page. The reverse is also true: except :attr:`Page.rect`, resp. :meth:`Page.bound` (both *reflect* when the page is rotated), all coordinates returned by methods and attributes pertain to the unrotated page. + Methods require coordinates (points, rectangles) to put content in desired places. Please be aware that these coordinates **must always** be provided relative to the **unrotated** page (since v1.17.0). The reverse is also true: except :attr:`Page.rect`, resp. :meth:`Page.bound` (both *reflect* when the page is rotated), all coordinates returned by methods and attributes pertain to the unrotated page. So the returned value of e.g. :meth:`Page.get_image_bbox` will not change if you do a :meth:`Page.set_rotation`. The same is true for coordinates returned by :meth:`Page.get_text`, annotation rectangles, and so on. If you want to find out, where an object is located in **rotated coordinates**, multiply the coordinates with :attr:`Page.rotation_matrix`. There also is its inverse, :attr:`Page.derotation_matrix`, which you can use when interfacing with other readers, which may behave differently in this respect. @@ -60,7 +62,9 @@ In a nutshell, this is what you can do with PyMuPDF: :meth:`Page.annot_xrefs` PDF only: a list of annotation (and widget) xrefs :meth:`Page.annots` return a generator over the annots on the page :meth:`Page.apply_redactions` PDF only: process the redactions of the page +:meth:`Page.clip_to_rect` PDF only: remove page content outside a rectangle :meth:`Page.bound` rectangle of the page +:meth:`Page.cluster_drawings` PDF only: bounding boxes of vector graphics :meth:`Page.delete_annot` PDF only: delete an annotation :meth:`Page.delete_image` PDF only: delete an image :meth:`Page.delete_link` PDF only: delete a link @@ -76,6 +80,7 @@ In a nutshell, this is what you can do with PyMuPDF: :meth:`Page.draw_sector` PDF only: draw a circular sector :meth:`Page.draw_squiggle` PDF only: draw a squiggly line :meth:`Page.draw_zigzag` PDF only: draw a zig-zagged line +:meth:`Page.find_tables` locate tables on the page :meth:`Page.get_drawings` get vector graphics on page :meth:`Page.get_fonts` PDF only: get list of referenced fonts :meth:`Page.get_image_bbox` PDF only: get bbox and matrix of embedded image @@ -95,12 +100,15 @@ In a nutshell, this is what you can do with PyMuPDF: :meth:`Page.insert_image` PDF only: insert an image :meth:`Page.insert_link` PDF only: insert a link :meth:`Page.insert_text` PDF only: insert text +:meth:`Page.insert_htmlbox` PDF only: insert html text in a rectangle :meth:`Page.insert_textbox` PDF only: insert a text box :meth:`Page.links` return a generator of the links on the page :meth:`Page.load_annot` PDF only: load a specific annotation :meth:`Page.load_widget` PDF only: load a specific field :meth:`Page.load_links` return the first link on a page :meth:`Page.new_shape` PDF only: create a new :ref:`Shape` +:meth:`Page.recolor` PDF only: change the colorspace of objects +:meth:`Page.remove_rotation` PDF only: set page rotation to 0 :meth:`Page.replace_image` PDF only: replace an image :meth:`Page.search_for` search for a string :meth:`Page.set_artbox` PDF only: modify `/ArtBox` @@ -139,14 +147,12 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: bound() - Determine the rectangle of the page. Same as property :attr:`Page.rect` below. For PDF documents this **usually** also coincides with :data:`mediabox` and :data:`cropbox`, but not always. For example, if the page is rotated, then this is reflected by this method -- the :attr:`Page.cropbox` however will not change. + Determine the rectangle of the page. Same as property :attr:`Page.rect`. For PDF documents this **usually** also coincides with :data:`mediabox` and :data:`cropbox`, but not always. For example, if the page is rotated, then this is reflected by this method -- the :attr:`Page.cropbox` however will not change. :rtype: :ref:`Rect` .. method:: add_caret_annot(point) - * New in v1.16.0 - PDF only: Add a caret icon. A caret annotation is a visual symbol normally used to indicate the presence of text edits on the page. :arg point_like point: the top left point of a 20 x 20 rectangle containing the MuPDF-provided icon. @@ -157,6 +163,12 @@ In a nutshell, this is what you can do with PyMuPDF: .. image:: images/img-caret-annot.* :scale: 70 + |history_begin| + + * New in v1.16.0 + + |history_end| + .. method:: add_text_annot(point, text, icon="Note") PDF only: Add a comment icon ("sticky note") with accompanying text. Only the icon is visible, the accompanying text is hidden and can be visualized by many PDF viewers by hovering the mouse over the symbol. @@ -164,47 +176,77 @@ In a nutshell, this is what you can do with PyMuPDF: :arg point_like point: the top left point of a 20 x 20 rectangle containing the MuPDF-provided "note" icon. :arg str text: the commentary text. This will be shown on double clicking or hovering over the icon. May contain any Latin characters. - :arg str icon: *(new in v1.16.0)* choose one of "Note" (default), "Comment", "Help", "Insert", "Key", "NewParagraph", "Paragraph" as the visual symbol for the embodied text [#f4]_. + :arg str icon: choose one of "Note" (default), "Comment", "Help", "Insert", "Key", "NewParagraph", "Paragraph" as the visual symbol for the embodied text [#f4]_. (New in v1.16.0) :rtype: :ref:`Annot` :returns: the created annotation. Stroke color yellow = (1, 1, 0), no fill color support. .. index:: - pair: color; add_freetext_annot - pair: fontname; add_freetext_annot - pair: fontsize; add_freetext_annot pair: rect; add_freetext_annot - pair: rotate; add_freetext_annot - pair: align; add_freetext_annot + pair: fontsize; add_freetext_annot + pair: fontname; add_freetext_annot pair: text_color; add_freetext_annot - pair: border_color; add_freetext_annot pair: fill_color; add_freetext_annot + pair: border_width; add_freetext_annot + pair: dashes; add_freetext_annot + pair: callout; add_freetext_annot + pair: line_end; add_freetext_annot + pair: opacity; add_freetext_annot + pair: align; add_freetext_annot + pair: rotate; add_freetext_annot + pair: richtext; add_freetext_annot + pair: style; add_freetext_annot - .. method:: add_freetext_annot(rect, text, fontsize=12, fontname="helv", border_color=None, text_color=0, fill_color=1, rotate=0, align=TEXT_ALIGN_LEFT) + .. method:: add_freetext_annot(rect, text, *, fontsize=11, fontname="helv", text_color=0, fill_color=None, border_width=0, dashes=None, callout=None, line_end=PDF_ANNOT_LE_OPEN_ARROW, opacity=1, align=TEXT_ALIGN_LEFT, rotate=0, richtext=False, style=None) - * Changed in v1.19.6: add border color parameter + PDF only: Add text in a given rectangle. Optionally, the appearance of a "callout" shape can be requested by specifying two or three point-like objects -- see below. - PDF only: Add text in a given rectangle. + :arg rect_like rect: the rectangle into which the text should be inserted. Text is automatically wrapped to a new line at box width. Text portions not fitting into the rectangle will be invisible without warning. - :arg rect_like rect: the rectangle into which the text should be inserted. Text is automatically wrapped to a new line at box width. Lines not fitting into the box will be invisible. + :arg str text: the text. May contain any mixture of Latin, Greek, Cyrillic, Chinese, Japanese and Korean characters. If `richtext=True` (see below), the string is interpreted as HTML syntax. This adds a plethora of ways for attractive effects. - :arg str text: the text. *(New in v1.17.0)* May contain any mixture of Latin, Greek, Cyrillic, Chinese, Japanese and Korean characters. The respective required font is automatically determined. - :arg float fontsize: the font size. Default is 12. - :arg str fontname: the font name. Default is "Helv". Accepted alternatives are "Cour", "TiRo", "ZaDb" and "Symb". The name may be abbreviated to the first two characters, like "Co" for "Cour". Lower case is also accepted. *(Changed in v1.16.0)* Bold or italic variants of the fonts are **no longer accepted**. A user-contributed script provides a circumvention for this restriction -- see section *Using Buttons and JavaScript* in chapter :ref:`FAQ`. *(New in v1.17.0)* The actual font to use is now determined on a by-character level, and all required fonts (or sub-fonts) are automatically included. Therefore, you should rarely ever need to care about this parameter and let it default (except you insist on a serifed font for your non-CJK text parts). - :arg sequence,float text_color: *(new in v1.16.0)* the text color. Default is black. + :arg float fontsize: the :data:`fontsize`. Default is 11. Ignored if `richtext=True`. - :arg sequence,float fill_color: *(new in v1.16.0)* the fill color. Default is white. - :arg sequence,float text_color: the text color. Default is black. - :arg sequence,float border_color: *(new in v1.19.6)* the border color. Default is `None`. - :arg int align: *(new in v1.17.0)* text alignment, one of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, TEXT_ALIGN_RIGHT - justify is **not supported**. + :arg str fontname: The font name. Default is "Helv". Ignored if `richtext=True`, otherwise the following **restritions apply:** + + * Accepted alternatives are "Helv" (Helvetica), "Cour" (Courier), "TiRo" (Timnes-Roman), "ZaDb" (ZapfDingBats) and "Symb" (Symbol). The name may be abbreviated to the first two characters, like "Co" for "Cour", lower case accepted. + * Bold or italic variants of the fonts are **not supported.** + + :arg list,tuple,float text_color: the text color. Default is black. Ignored if `richtext=True`. - :arg int rotate: the text orientation. Accepted values are 0, 90, 270, invalid entries are set to zero. + :arg list,tuple,float fill_color: the fill color. This is used for ``rect`` and the end point of the callout lines when applicable. Default is ``None``. + + :arg list,tuple,float border_color: This parameter **only has an effect** if `richtext=True`. Otherwise, ``text_color`` is used. + + :arg float border_width: the width of border and ``callout`` lines. Default is 0 (no border), in which case callout lines may still appear with some hairline width, depending on the PDF viewer used. In any case, this value must be positive to see a border line. + + :arg list,tuple dashes: a list of floats specifying how border and callout lines should be dashed. Default is ``None``. + + :arg list,tuple callout: a list / tuple of two or three :data:`point_like` objects, which will be interpreted as end point [, knee point] and start point (in this sequence) of up to two line segments, converting this annotation into a call-out shape. + + :arg int line_end: the line end symbol of the call-out line. It is drawn at the first point specified in the `callout` list. Default is an open arrow. For possible values see :ref:`AnnotationLineEnds`. + + :arg float opacity: a float `0 <= opacity < 1` turning the annotation transparent. Default is no transparency. + + :arg int align: text alignment, one of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, TEXT_ALIGN_RIGHT - justify is **not supported**. Ignored if `richtext=True`. + + :arg int rotate: the text orientation. Accepted values are integer multiples of 90°. Invalid entries receive a rotation of 0. + + :arg bool richtext: treat ``text`` as HTML syntax. This allows to achieve **bold**, *italic*, arbitrary text colors, font sizes, text alignment including justify and more - as far as the PDF subset of HTML and styling instructions supports this. This is similar to what happens in :meth:`Page.insert_htmlbox`. The base library will for example pull in required fonts if it encounters characters not contained in the standard ones. Some parameters are ignored if this option is set, as mentioned above. Default is ``False``. + + :arg str style: supply optional HTML styling information in CSS syntax. Ignored if `richtext=False`. :rtype: :ref:`Annot` - :returns: the created annotation. Color properties **can only be changed** using special parameters of :meth:`Annot.update`. There, you can also set a border color different from the text color. + :returns: the created annotation. + + |history_begin| + + * Changed in v1.19.6: add border color parameter + + |history_end| - .. method:: add_file_annot(pos, buffer, filename, ufilename=None, desc=None, icon="PushPin") + .. method:: add_file_annot(point, buffer_, filename, ufilename=None, desc=None, icon="PushPin") PDF only: Add a file attachment annotation with a "PushPin" icon at the specified location. @@ -212,12 +254,12 @@ In a nutshell, this is what you can do with PyMuPDF: :arg bytes,bytearray,BytesIO buffer: the data to be stored (actual file content, any data, etc.). - Changed in v1.14.13 *io.BytesIO* is now also supported. + Changed in v1.14.13: *io.BytesIO* is now also supported. :arg str filename: the filename to associate with the data. :arg str ufilename: the optional PDF unicode version of filename. Defaults to filename. :arg str desc: an optional description of the file. Defaults to filename. - :arg str icon: *(new in v1.16.0)* choose one of "PushPin" (default), "Graph", "Paperclip", "Tag" as the visual symbol for the attached data [#f4]_. + :arg str icon: choose one of "PushPin" (default), "Graph", "Paperclip", "Tag" as the visual symbol for the attached data [#f4]_. (New in v1.16.0) :rtype: :ref:`Annot` :returns: the created annotation. Stroke color yellow = (1, 1, 0), no fill color support. @@ -253,45 +295,87 @@ In a nutshell, this is what you can do with PyMuPDF: :rtype: :ref:`Annot` :returns: the created annotation. It is drawn with line (stroke) color red = (1, 0, 0), line width 1, fill color is supported. - .. method:: add_redact_annot(quad, text=None, fontname=None, fontsize=11, align=TEXT_ALIGN_LEFT, fill=(1, 1, 1), text_color=(0, 0, 0), cross_out=True) + --------- - * New in v1.16.11 + Redactions + ~~~~~~~~~~~ + + .. method:: add_redact_annot(quad, text=None, fontname=None, fontsize=11, align=TEXT_ALIGN_LEFT, fill=(1, 1, 1), text_color=(0, 0, 0), cross_out=True) - PDF only: Add a redaction annotation. A redaction annotation identifies content to be removed from the document. Adding such an annotation is the first of two steps. It makes visible what will be removed in the subsequent step, :meth:`Page.apply_redactions`. + **PDF only**: Add a redaction annotation. A redaction annotation identifies an area whose content should be removed from the document. Adding such an annotation is the first of two steps. It makes visible what will be removed in the subsequent step, :meth:`Page.apply_redactions`. :arg quad_like,rect_like quad: specifies the (rectangular) area to be removed which is always equal to the annotation rectangle. This may be a :data:`rect_like` or :data:`quad_like` object. If a quad is specified, then the enveloping rectangle is taken. - :arg str text: *(New in v1.16.12)* text to be placed in the rectangle after applying the redaction (and thus removing old content). + :arg str text: text to be placed in the rectangle after applying the redaction (and thus removing old content). (New in v1.16.12) - :arg str fontname: *(New in v1.16.12)* the font to use when *text* is given, otherwise ignored. The same rules apply as for :meth:`Page.insert_textbox` -- which is the method :meth:`Page.apply_redactions` internally invokes. The replacement text will be **vertically centered**, if this is one of the CJK or :ref:`Base-14-Fonts`. + :arg str fontname: the font to use when ``text`` is given, otherwise ignored. Only CJK and the :ref:`Base-14-Fonts` are supported. Apart from this, the same rules apply as for :meth:`Page.insert_textbox` -- which is what the method :meth:`Page.apply_redactions` internally invokes. - .. note:: + :arg float fontsize: the :data:`fontsize` to use for the replacing text. If the text is too large to fit, several insertion attempts will be made, gradually reducing the :data:`fontsize` to no less than 4. If then the text will still not fit, no text insertion will take place at all. (New in v1.16.12) - * For an **existing** font of the page, use its reference name as *fontname* (this is *item[4]* of its entry in :meth:`Page.get_fonts`). - * For a **new, non-builtin** font, proceed as follows:: + :arg int align: the horizontal alignment for the replacing text. See :meth:`insert_textbox` for available values. The vertical alignment is (approximately) centered. - page.insert_text(point, # anywhere, but outside all redaction rectangles - "something", # some non-empty string - fontname="newname", # new, unused reference name - fontfile="...", # desired font file - render_mode=3, # makes the text invisible - ) - page.add_redact_annot(..., fontname="newname") + :arg sequence fill: the fill color of the rectangle **after applying** the redaction. The default is *white = (1, 1, 1)*, which is also taken if ``None`` is specified. To suppress a fill color altogether, specify ``False``. In this cases the rectangle remains transparent. (New in v1.16.12) - :arg float fontsize: *(New in v1.16.12)* the fontsize to use for the replacing text. If the text is too large to fit, several insertion attempts will be made, gradually reducing the fontsize to no less than 4. If then the text will still not fit, no text insertion will take place at all. + :arg sequence text_color: the color of the replacing text. Default is *black = (0, 0, 0)*. (New in v1.16.12) - :arg int align: *(New in v1.16.12)* the horizontal alignment for the replacing text. See :meth:`insert_textbox` for available values. The vertical alignment is (approximately) centered if a PDF built-in font is used (CJK or :ref:`Base-14-Fonts`). + :arg bool cross_out: add two diagonal lines to the annotation rectangle. (New in v1.17.2) - :arg sequence fill: *(New in v1.16.12)* the fill color of the rectangle **after applying** the redaction. The default is *white = (1, 1, 1)*, which is also taken if *None* is specified. *(Changed in v1.16.13)* To suppress a fill color altogether, specify *False*. In this cases the rectangle remains transparent. + :rtype: :ref:`Annot` + :returns: the created annotation. Its standard appearance looks like a red rectangle (no fill color), optionally showing two diagonal lines. Colors, line width, dashing, opacity and blend mode can now be set and applied via :meth:`Annot.update` like with other annotations. (Changed in v1.17.2) - :arg sequence text_color: *(New in v1.16.12)* the color of the replacing text. Default is *black = (0, 0, 0)*. + .. image:: images/img-redact.* - :arg bool cross_out: *(new in v1.17.2)* add two diagonal lines to the annotation rectangle. + |history_begin| - :rtype: :ref:`Annot` - :returns: the created annotation. *(Changed in v1.17.2)* Its standard appearance looks like a red rectangle (no fill color), optionally showing two diagonal lines. Colors, line width, dashing, opacity and blend mode can now be set and applied via :meth:`Annot.update` like with other annotations. + * New in v1.16.11 - .. image:: images/img-redact.* + |history_end| + + + .. method:: apply_redactions(images=PDF_REDACT_IMAGE_PIXELS|2, graphics=PDF_REDACT_LINE_ART_REMOVE_IF_TOUCHED|2, text=PDF_REDACT_TEXT_REMOVE|0) + + **PDF only**: Remove all **content** contained in any redaction rectangle on the page. + + **This method applies and then deletes all redactions from the page.** + + :arg int images: How to redact overlapping images. The default (2) blanks out overlapping pixels. `PDF_REDACT_IMAGE_NONE | 0` ignores, and `PDF_REDACT_IMAGE_REMOVE | 1` completely removes images overlapping any redaction annotation. Option `PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE | 3` only removes images that are actually visible. + + :arg int graphics: How to redact overlapping vector graphics (also called "line-art" or "drawings"). The default (2) removes any overlapping vector graphics. `PDF_REDACT_LINE_ART_NONE | 0` ignores, and `PDF_REDACT_LINE_ART_REMOVE_IF_COVERED | 1` removes graphics fully contained in a redaction annotation. When removing line-art, please be aware that **stroked** vector graphics (i.e. type "s" or "sf") have a **larger wrapping rectangle** than one might expect: first of all, at least 50% of the path's line width have to be added in each direction to truly include all of the drawing. If a so-called "miter limit" is provided (see page 121 of the PDF specification), the enlarging value is `miter * width / 2`. So, when letting everything default (width = 1, miter = 10), the redaction rectangle should be at least 5 points larger in every direction. + + :arg int text: Whether to redact overlapping text. The default `PDF_REDACT_TEXT_REMOVE | 0` removes all characters whose boundary box overlaps any redaction rectangle. This complies with the original legal / data protection intentions of redaction annotations. Other use cases however may require to **keep text** while redacting vector graphics or images. This can be achieved by setting `text=True|PDF_REDACT_TEXT_NONE | 1`. This does **not comply** with the data protection intentions of redaction annotations. **Do so at your own risk.** + + :returns: `True` if at least one redaction annotation has been processed, `False` otherwise. + + .. note:: + * Text contained in a redaction rectangle will be **physically** removed from the page (assuming :meth:`Document.save` with a suitable garbage option) and will no longer appear in e.g. text extractions or anywhere else. All redaction annotations will also be removed. Other annotations are unaffected. + + * All overlapping links will be removed. If the rectangle of the link was covering text, then only the overlapping part of the text is being removed. Similar applies to images covered by link rectangles. + + * The overlapping parts of **images** will be blanked-out for default option `PDF_REDACT_IMAGE_PIXELS` (changed in v1.18.0). Option 0 does not touch any images and 1 will remove any image with an overlap. + + * For option `images=PDF_REDACT_IMAGE_REMOVE` only this page's **references to the images** are removed - not necessarily the images themselves. Images are completely removed from the file only, if no longer referenced at all (assuming suitable garbage collection options). + + * For option `images=PDF_REDACT_IMAGE_PIXELS` a new image of format PNG is created, which the page will use in place of the original one. The original image is not deleted or replaced as part of this process, so other pages may still show the original. In addition, the new, modified PNG image currently is **stored uncompressed**. Do keep these aspects in mind when choosing the right garbage collection method and compression options during save. + + * **Text removal** is done by character: A character is removed if its bbox has a **non-empty overlap** with a redaction rectangle (changed in MuPDF v1.17). Depending on the font properties and / or the chosen line height, deletion may occur for undesired text parts. Using :meth:`Tools.set_small_glyph_heights` with a ``True`` argument before text search may help to prevent this. + + * Redactions are a simple way to replace single words in a PDF, or to just physically remove them. Locate the word "secret" using some text extraction or search method and insert a redaction using "xxxxxx" as replacement text for each occurrence. + + - Be wary if the replacement is longer than the original -- this may lead to an awkward appearance, line breaks or no new text at all. + + - For a number of reasons, the new text may not exactly be positioned on the same line like the old one -- especially true if the replacement font was not one of CJK or :ref:`Base-14-Fonts`. + + |history_begin| + + * New in v1.16.11 + * Changed in v1.16.12: The previous *mark* parameter is gone. Instead, the respective rectangles are filled with the individual *fill* color of each redaction annotation. If a *text* was given in the annotation, then :meth:`insert_textbox` is invoked to insert it, using parameters provided with the redaction. + * Changed in v1.18.0: added option for handling images that overlap redaction areas. + * Changed in v1.23.27: added option for removing graphics as well. + * Changed in v1.24.2: added option `keep_text` to leave text untouched. + + |history_end| + + --------- .. method:: add_polyline_annot(points) @@ -330,42 +414,171 @@ In a nutshell, this is what you can do with PyMuPDF: >>> page.add_highlight_annot(quads) .. note:: - Obviously, text marker annotations need to know what is the top, the bottom, the left, and the right side of the area(s) to be marked. If the arguments are quads, this information is given by the sequence of the quad points. In contrast, a rectangle delivers much less information -- this is illustrated by the fact, that 4! = 24 different quads can be constructed with the four corners of a reactangle. + Obviously, text marker annotations need to know what is the top, the bottom, the left, and the right side of the area(s) to be marked. If the arguments are quads, this information is given by the sequence of the quad points. In contrast, a rectangle delivers much less information -- this is illustrated by the fact, that 4! = 24 different quads can be constructed with the four corners of a rectangle. Therefore, we **strongly recommend** to use the `quads` option for text searches, to ensure correct annotations. A similar consideration applies to marking **text spans** extracted with the "dict" / "rawdict" options of :meth:`Page.get_text`. For more details on how to compute quadrilaterals in this case, see section "How to Mark Non-horizontal Text" of :ref:`FAQ`. - :arg rect_like,quad_like,list,tuple quads: *(Changed in v1.14.20)* the location(s) -- rectangle(s) or quad(s) -- to be marked. A list or tuple must consist of :data:`rect_like` or :data:`quad_like` items (or even a mixture of either). Every item must be finite, convex and not empty (as applicable). *(Changed in v1.16.14)* **Set this parameter to** *None* if you want to use the following arguments. And vice versa: if not *None*, the remaining parameters must be *None*. - :arg point_like start: *(New in v1.16.14)* start text marking at this point. Defaults to the top-left point of *clip*. Must be provided if `quads` is *None*. - :arg point_like stop: *(New in v1.16.14)* stop text marking at this point. Defaults to the bottom-right point of *clip*. Must be used if `quads` is *None*. - :arg rect_like clip: *(New in v1.16.14)* only consider text lines intersecting this area. Defaults to the page rectangle. Only use if `start` and `stop` are provided. + :arg rect_like,quad_like,list,tuple quads: + the location(s) -- rectangle(s) or quad(s) -- to be marked. (Changed in v1.14.20) + A list or tuple must consist of :data:`rect_like` or :data:`quad_like` items (or even a mixture of either). + Every item must be finite, convex and not empty (as applicable). + **Set this parameter to** ``None`` if you want to use the following arguments (Changed in v1.16.14). + And vice versa: if not ``None``, the remaining parameters must be ``None``. + + :arg point_like start: start text marking at this point. Defaults to the top-left point of *clip*. Must be provided if `quads` is ``None``. (New in v1.16.14) + :arg point_like stop: stop text marking at this point. Defaults to the bottom-right point of *clip*. Must be used if `quads` is ``None``. (New in v1.16.14) + :arg rect_like clip: only consider text lines intersecting this area. Defaults to the page rectangle. Only use if `start` and `stop` are provided. (New in v1.16.14) - :rtype: :ref:`Annot` or *(changed in v1.16.14)* *None* - :returns: the created annotation. *(Changed in v1.16.14)* If *quads* is an empty list, **no annotation** is created. + :rtype: :ref:`Annot` or ``None`` (changed in v1.16.14). + :returns: the created annotation. If *quads* is an empty list, **no annotation** is created (changed in v1.16.14). - .. note:: Starting with v1.16.14 you can use parameters *start*, *stop* and *clip* to highlight consecutive lines between the points *start* and *stop*. Make use of *clip* to further reduce the selected line bboxes and thus deal with e.g. multi-column pages. The following multi-line highlight on a page with three text columns was created by specifying the two red points and setting clip accordingly. + .. note:: + You can use parameters *start*, *stop* and *clip* to highlight consecutive lines between the points *start* and *stop* (starting with v1.16.14). + Make use of *clip* to further reduce the selected line bboxes and thus deal with e.g. multi-column pages. + The following multi-line highlight on a page with three text columns was created by specifying the two red points and setting clip accordingly. .. image:: images/img-markers.* :scale: 100 + .. method:: cluster_drawings(clip=None, drawings=None, x_tolerance=3, y_tolerance=3, final_filter=True) + + Cluster vector graphics (synonyms are line-art or drawings) based on their geometrical vicinity. The method walks through the output of :meth:`Page.get_drawings` and joins paths whose `path["rect"]` are closer to each other than some tolerance values (given in the arguments). The result is a list of rectangles that each wrap things like tables (with gridlines), pie charts, bar charts, etc. + + :arg rect_like clip: only consider paths inside this area. The default is the full page. + + :arg list drawings: (optional) provide a previously generated output of :meth:`Page.get_drawings`. If `None` the method will execute the method. + + :arg float x_tolerance / y_tolerance: Assume vector graphics to be close enough neighbors for belonging to the same rectangle. Default is 3 points. + + :arg bool final_filter: If `True` (default), the method will to remove rectangles having width or height smaller than the respective tolerance value. If `False` no such filtering is done. + + .. method:: find_tables(clip=None, strategy=None, vertical_strategy=None, horizontal_strategy=None, vertical_lines=None, horizontal_lines=None, snap_tolerance=None, snap_x_tolerance=None, snap_y_tolerance=None, join_tolerance=None, join_x_tolerance=None, join_y_tolerance=None, edge_min_length=3, min_words_vertical=3, min_words_horizontal=1, intersection_tolerance=None, intersection_x_tolerance=None, intersection_y_tolerance=None, text_tolerance=None, text_x_tolerance=None, text_y_tolerance=None, add_lines=None, add_boxes=None, paths=None) + + Find tables on the page and return an object with related information. Typically, the default values of the many parameters will be sufficient. Adjustments should ever only be needed in corner case situations. + + :arg rect_like clip: specify a region to consider within the page rectangle and ignore the rest. Default is the full page. + + :arg str strategy: Request a **table detection** strategy. Valid values are "lines", "lines_strict" and "text". + + Default is **"lines"** which uses all vector graphics on the page to detect grid lines. + + Strategy **"lines_strict"** ignores borderless rectangle vector graphics. Sometimes single text pieces have background colors which may lead to false columns or lines. This strategy ignores them and can thus increase detection precision. + + If **"text"** is specified, text positions are used to generate "virtual" column and / or row boundaries. Use `min_words_*` to request the number of words for considering their coordinates. + + Use parameters `vertical_strategy` and `horizontal_strategy` **instead** for a more fine-grained treatment of the dimensions. + + :arg sequence[floats] horizontal_lines: y-coordinates of rows. If provided, there will be no attempt to identify additional table rows. This influences table detection. + + :arg sequence[floats] vertical_lines: x-coordinates of columns. If provided, there will be no attempt to identify additional table columns. This influences table detection. + + :arg int min_words_vertical: relevant for vertical strategy option "text": at least this many words must coincide to establish a **virtual column** boundary. + + :arg int min_words_horizontal: relevant for horizontal strategy option "text": at least this many words must coincide to establish a **virtual row** boundary. + + :arg float snap_tolerance: Any two horizontal lines whose y-values differ by no more than this value will be **snapped** into one. Accordingly for vertical lines. Default is 3. Separate values can be specified instead for the dimensions, using `snap_x_tolerance` and `snap_y_tolerance`. + + :arg float join_tolerance: Any two lines will be **joined** to one if the end and the start points differ by no more than this value (in points). Default is 3. Instead of this value, separate values can be specified for the dimensions using `join_x_tolerance` and `join_y_tolerance`. + + :arg float edge_min_length: Ignore a line if its length does not exceed this value (points). Default is 3. + + :arg float intersection_tolerance: When combining lines into cell borders, orthogonal lines must be within this value (points) to be considered intersecting. Default is 3. Instead of this value, separate values can be specified for the dimensions using `intersection_x_tolerance` and `intersection_y_tolerance`. + + :arg float text_tolerance: Characters will be combined into words only if their distance is no larger than this value (points). Default is 3. Instead of this value, separate values can be specified for the dimensions using `text_x_tolerance` and `text_y_tolerance`. + + :arg tuple,list add_lines: Specify a list of "lines" (i.e. pairs of :data:`point_like` objects) as **additional**, "virtual" vector graphics. These lines may help with table and / or cell detection and will not otherwise influence the detection strategy. Especially, in contrast to parameters `horizontal_lines` and `vertical_lines`, they will not prevent detecting rows or columns in other ways. These lines will be treated exactly like "real" vector graphics in terms of joining, snapping, intersecting, minimum length and containment in the `clip` rectangle. Similarly, lines not parallel to any of the coordinate axes will be ignored. + + :arg tuple,list add_boxes: Specify a list of rectangles (:data:`rect_like` objects) as **additional**, "virtual" vector graphics. These rectangles may help with table and / or cell detection and will not otherwise influence the detection strategy. Especially, in contrast to parameters `horizontal_lines` and `vertical_lines`, they will not prevent detecting rows or columns in other ways. These rectangles will be treated exactly like "real" vector graphics in terms of joining, snapping, intersecting, minimum length and containment in the `clip` rectangle. + + :arg list paths: list of vector graphics in the format as returned be :meth:`Page.get_drawings`. Using this parameter will prevent the method to extract vector graphics itself. This is useful if the vector graphics are already available. This can save execution time significantly. + + .. image:: images/img-findtables.* + + :returns: a `TableFinder` object that has the following significant attributes: + + * `cells`: a list of **all bboxes** on the page, that have been identified as table cells (across all tables). Each cell is a :data:`rect_like` tuple `(x0, y0, x1, y1)` of coordinates or `None`. + * `tables`: a list of `Table` objects. This is `[]` if the page has no tables. Single tables can be found as items of this list. But the `TableFinder` object itself is also a sequence of its tables. This means that if `tabs` is a `TableFinder` object, then table "n" is delivered by `tabs.tables[n]` as well as by the shorter `tabs[n]`. + + + * The `Table` object has the following attributes: + + * ``bbox``: the bounding box of the table as a tuple `(x0, y0, x1, y1)`. + * ``cells``: bounding boxes of the table's cells (list of tuples). A cell may also be `None`. + * ``extract()``: this method returns the text content of each table cell as a list of list of strings. + * ``to_markdown()``: this method returns the table as a **string in markdown format** (compatible to Github). Markdown viewers can render the string as a table. This output is optimized for **small token** sizes, which is especially beneficial for LLM/RAG feeds. Pandas DataFrames (see method `to_pandas()` below) offer an equivalent markdown table output which however is better readable for the human eye. Any line breaks (``\n``) in cells are replaced by HTML line breaks tags `
`. + * `to_pandas()`: this method returns the table as a `pandas `_ `DataFrame `_. DataFrames are very versatile objects allowing a plethora of table manipulation methods and outputs to almost 20 well-known formats, among them Excel files, CSV, JSON, markdown-formatted tables and more. `DataFrame.to_markdown()` generates a Github-compatible markdown format optimized for human readability. This method however requires the package `tabulate `_ to be installed in addition to pandas itself. + * ``header``: a `TableHeader` object containing header information of the table. + * ``col_count``: an integer containing the number of table columns. + * ``row_count``: an integer containing the number of table rows. + * ``rows``: a list of `TableRow` objects containing two attributes, ``bbox`` is the boundary box of the row, and `cells` is a list of table cells contained in this row. + + * The `TableHeader` object has the following attributes: + + * ``bbox``: the bounding box of the header. + * `cells`: a list of bounding boxes containing the name of the respective column. + * `names`: a list of strings containing the text of each of the cell bboxes. They represent the column names -- which are used when exporting the table to pandas DataFrames, markdown, etc. + * `external`: a bool indicating whether the header bbox is outside the table body (`True`) or not. Table headers are never identified by the `TableFinder` logic. Therefore, if `external` is true, then the header cells are not part of any cell identified by `TableFinder`. If `external == False`, then the first table row is the header. + + Please have a look at these `Jupyter notebooks `_, which cover standard situations like multiple tables on one page or joining table fragments across multiple pages. + + .. caution:: The lifetime of the `TableFinder` object, as well as that of all its tables **equals the lifetime of the page**. If the page object is deleted or reassigned, all tables are no longer valid. + + The only way to keep table content beyond the page's availability is to **extract it** via methods `Table.to_markdown()`, `Table.to_pandas()` or a copy of `Table.extract()` (e.g. `Table.extract()[:]`). + + .. note:: + + Once a table has been extracted to a **Pandas DataFrame** with `to_pandas()` it is easy to convert to other file types with the **Pandas API**: + + - table to Markdown, use `to_markdown `_ + - table to JSON, use: `to_json `_ + - table to Excel, use: `to_excel `_ + - table to CSV, use: `to_csv `_ + - table to HTML, use: `to_html `_ + - table to SQL, use: `to_sql `_ + + + |history_begin| + + * New in version 1.23.0 + * Changed in version 1.23.19: new argument `add_lines`. + + |history_end| + + .. important:: + + There is also the `pdf2docx extract tables method`_ which is capable of table extraction if you prefer. + + .. method:: add_stamp_annot(rect, stamp=0) - PDF only: Add a "rubber stamp" like annotation to e.g. indicate the document's intended use ("DRAFT", "CONFIDENTIAL", etc.). + PDF only: Add a "rubber stamp" annotation to e.g. indicate the document's intended use ("DRAFT", "CONFIDENTIAL", etc.). The parameter may be either an integer to select text from a predefined array of standard texts or an image. :arg rect_like rect: rectangle where to place the annotation. + :arg multiple stamp: The following options are available: + + * The id number (int) of the stamp text. For available stamps see :ref:`StampIcons`. + + * A string specifying an image file path. - :arg int stamp: id number of the stamp text. For available stamps see :ref:`StampIcons`. + * A ``bytes``, ``bytearray`` or ``io.BytesIO`` object for an image in memory. - .. note:: + * A :ref:`Pixmap`. + + 1. **Text-based stamps** - * The stamp's text and its border line will automatically be sized and be put horizontally and vertically centered in the given rectangle. :attr:`Annot.rect` is automatically calculated to fit the given **width** and will usually be smaller than this parameter. + * :attr:`Annot.rect` is automatically calculated as the largest rectangle with an aspect ratio of ``width:height = 3.8`` that fits in the provided ``rect``. Its position is vertically and horizontally centered. * The font chosen is "Times Bold" and the text will be upper case. - * The appearance can be changed using :meth:`Annot.set_opacity` and by setting the "stroke" color (no "fill" color supported). - * This can be used to create watermark images: on a temporary PDF page create a stamp annotation with a low opacity value, make a pixmap from it with *alpha=True* (and potentially also rotate it), discard the temporary PDF page and use the pixmap with :meth:`insert_image` for your target PDF. + * The appearance can be modified using :meth:`Annot.set_opacity` and by setting the "stroke" color. By PDF specification, stamp annotations have no "fill" color. + .. image:: images/img-stampannot.* - .. image :: images/img-stampannot.* - :scale: 80 + 2. **Image-based stamps** + * The image is scaled to fit into the rectangle `rect` such that the image's center and the center of `rect` coincide. The aspect ratio of the image is preserved, so the image may not fill the entire rectangle. However, at least one of the given rectangle's width or height are fully covered. + * The annotation can be modified via :meth:`Annot.set_opacity`. This method therefore is a way to display images transparently even if no alpha channel is present. + * Setting colors has no effect on image stamps. + * Rotating image-based stamps **is not supported**. Setting the rotation may lead to unexpected results. + .. method:: add_widget(widget) PDF only: Add a PDF Form field ("widget") to a page. This also **turns the PDF into a Form PDF**. Because of the large amount of different options available for widgets, we have developed a new class :ref:`Widget`, which contains the possible PDF field attributes. It must be used for both, form field creation and updates. @@ -377,7 +590,7 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: delete_annot(annot) - * Changed in v1.16.6: The removal will now include any bound 'Popup' or response annotations and related objects. + * The removal will now include any bound 'Popup' or response annotations and related objects (changed in v1.16.6). PDF only: Delete annotation from the page and return the next one. @@ -389,8 +602,6 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: delete_widget(widget) - * New in v1.18.4 - PDF only: Delete field from the page and return the next one. :arg widget: the widget to be deleted. @@ -399,55 +610,28 @@ In a nutshell, this is what you can do with PyMuPDF: :rtype: :ref:`Widget` :returns: the widget following the deleted one. Please remember that physical removal requires saving to a new file with garbage > 0. - .. method:: apply_redactions(images=PDF_REDACT_IMAGE_PIXELS) - - * New in v1.16.11 - * Changed in v1.16.12: The previous *mark* parameter is gone. Instead, the respective rectangles are filled with the individual *fill* color of each redaction annotation. If a *text* was given in the annotation, then :meth:`insert_textbox` is invoked to insert it, using parameters provided with the redaction. - * Changed in v1.18.0: added option for handling images that overlap redaction areas. - - PDF only: Remove all **text content** contained in any redaction rectangle. - - **This method applies and then deletes all redactions from the page.** - - :arg int images: How to redact overlapping images. The default (2) blanks out overlapping pixels. *PDF_REDACT_IMAGE_NONE* (0) ignores, and *PDF_REDACT_IMAGE_REMOVE* (1) completely removes all overlapping images. - - - :returns: *True* if at least one redaction annotation has been processed, *False* otherwise. - - .. note:: - * Text contained in a redaction rectangle will be **physically** removed from the page (assuming :meth:`Document.save` with a suitable garbage option) and will no longer appear in e.g. text extractions or anywhere else. All redaction annotations will also be removed. Other annotations are unaffected. - - * All overlapping links will be removed. If the rectangle of the link was covering text, then only the overlapping part of the text is being removed. Similar applies to images covered by link rectangles. - - * *(Changed in v1.18.0)* The overlapping parts of **images** will be blanked-out for default option `PDF_REDACT_IMAGE_PIXELS`. Option 0 does not touch any images and 1 will remove any image with an overlap. Please be aware that there is a bug for option *PDF_REDACT_IMAGE_PIXELS = 2*: transparent images will be incorrectly handled! - - * For option `images=PDF_REDACT_IMAGE_REMOVE` only this page's **references to the images** are removed - not necessarily the images themselves. Images are completely removed from the file only, if no longer referenced at all (assuming suitable garbage collection options). - - * For option `images=PDF_REDACT_IMAGE_PIXELS` a new image of format PNG is created, which the page will use in place of the original one. The original image is not deleted or replaced as part of this process, so other pages may still show the original. In addition, the new, modified PNG image currently is **stored uncompressed**. Do keep these aspects in mind when choosing the right garbage collection method and compression options during save. - - * **Text removal** is done by character: A character is removed if its bbox has a **non-empty overlap** with a redaction rectangle *(changed in MuPDF v1.17)*. Depending on the font properties and / or the chosen line height, deletion may occur for undesired text parts. Using :meth:`Tools.set_small_glyph_heights` with a *True* argument before text search may help to prevent this. + |history_begin| - * Redactions are a simple way to replace single words in a PDF, or to just physically remove them. Locate the word "secret" using some text extraction or search method and insert a redaction using "xxxxxx" as replacement text for each occurrence. + (New in v1.18.4) - - Be wary if the replacement is longer than the original -- this may lead to an awkward appearance, line breaks or no new text at all. + |history_end| - - For a number of reasons, the new text may not exactly be positioned on the same line like the old one -- especially true if the replacement font was not one of CJK or :ref:`Base-14-Fonts`. .. method:: delete_link(linkdict) - PDF only: Delete the specified link from the page. The parameter must be an **original item** of :meth:`get_links()` (see below). The reason for this is the dictionary's *"xref"* key, which identifies the PDF object to be deleted. + PDF only: Delete the specified link from the page. The parameter must be an **original item** of :meth:`get_links()`, see :ref:`link_dict_description`. The reason for this is the dictionary's *"xref"* key, which identifies the PDF object to be deleted. :arg dict linkdict: the link to be deleted. .. method:: insert_link(linkdict) - PDF only: Insert a new link on this page. The parameter must be a dictionary of format as provided by :meth:`get_links()` (see below). + PDF only: Insert a new link on this page. The parameter must be a dictionary of format as provided by :meth:`get_links()`, see :ref:`link_dict_description`. :arg dict linkdict: the link to be inserted. .. method:: update_link(linkdict) - PDF only: Modify the specified link. The parameter must be a (modified) **original item** of :meth:`get_links()` (see below). The reason for this is the dictionary's *"xref"* key, which identifies the PDF object to be changed. + PDF only: Modify the specified link. The parameter must be a (modified) **original item** of :meth:`get_links()`, see :ref:`link_dict_description`. The reason for this is the dictionary's *"xref"* key, which identifies the PDF object to be changed. :arg dict linkdict: the link to be modified. @@ -456,41 +640,45 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: get_label() - * New in v1.18.6 - PDF only: Return the label for the page. :rtype: str :returns: the label string like "vii" for Roman numbering or "" if not defined. + |history_begin| + + * New in v1.18.6 + |history_end| .. method:: get_links() Retrieves **all** links of a page. :rtype: list - :returns: A list of dictionaries. For a description of the dictionary entries see below. Always use this or the :meth:`Page.links` method if you intend to make changes to the links of a page. + :returns: A list of dictionaries. For a description of the dictionary entries, see :ref:`link_dict_description`. Always use this or the :meth:`Page.links` method if you intend to make changes to the links of a page. .. method:: links(kinds=None) - * New in v1.16.4 - Return a generator over the page's links. The results equal the entries of :meth:`Page.get_links`. - :arg sequence kinds: a sequence of integers to down-select to one or more link kinds. Default is all links. Example: *kinds=(fitz.LINK_GOTO,)* will only return internal links. + :arg sequence kinds: a sequence of integers to down-select to one or more link kinds. Default is all links. Example: *kinds=(pymupdf.LINK_GOTO,)* will only return internal links. :rtype: generator :returns: an entry of :meth:`Page.get_links()` for each iteration. - .. method:: annots(types=None) + |history_begin| * New in v1.16.4 + |history_end| + + .. method:: annots(types=None) + Return a generator over the page's annotations. - :arg sequence types: a sequence of integers to down-select to one or more annotation types. Default is all annotations. Example: `types=(fitz.PDF_ANNOT_FREETEXT, fitz.PDF_ANNOT_TEXT)` will only return 'FreeText' and 'Text' annotations. + :arg sequence types: a sequence of integers to down-select to one or more annotation types. Default is all annotations. Example: `types=(pymupdf.PDF_ANNOT_FREETEXT, pymupdf.PDF_ANNOT_TEXT)` will only return 'FreeText' and 'Text' annotations. :rtype: generator :returns: an :ref:`Annot` for each iteration. @@ -505,21 +693,29 @@ In a nutshell, this is what you can do with PyMuPDF: ...: page = doc.reload_page(page) In [6]: - .. method:: widgets(types=None) + |history_begin| * New in v1.16.4 + |history_end| + + .. method:: widgets(types=None) + Return a generator over the page's form fields. - :arg sequence types: a sequence of integers to down-select to one or more widget types. Default is all form fields. Example: `types=(fitz.PDF_WIDGET_TYPE_TEXT,)` will only return 'Text' fields. + :arg sequence types: a sequence of integers to down-select to one or more widget types. Default is all form fields. Example: `types=(pymupdf.PDF_WIDGET_TYPE_TEXT,)` will only return 'Text' fields. :rtype: generator :returns: a :ref:`Widget` for each iteration. + |history_begin| - .. method:: write_text(rect=None, writers=None, overlay=True, color=None, opacity=None, keep_proportion=True, rotate=0, oc=0) + * New in v1.16.4 - * New in v1.16.18 + |history_end| + + + .. method:: write_text(rect=None, writers=None, overlay=True, color=None, opacity=None, keep_proportion=True, rotate=0, oc=0) PDF only: Write the text of one or more :ref:`Textwriter` objects to the page. @@ -530,10 +726,16 @@ In a nutshell, this is what you can do with PyMuPDF: :arg bool overlay: put the text in foreground or background. :arg bool keep_proportion: maintain the aspect ratio. :arg float rotate: rotate the text by an arbitrary angle. - :arg int oc: *(new in v1.18.4)* the :data:`xref` of an :data:`OCG` or :data:`OCMD`. + :arg int oc: the :data:`xref` of an :data:`OCG` or :data:`OCMD`. (New in v1.18.4) .. note:: Parameters *overlay, keep_proportion, rotate* and *oc* have the same meaning as in :meth:`Page.show_pdf_page`. + |history_begin| + + * New in v1.16.18 + + |history_end| + .. index:: pair: border_width; insert_text @@ -546,16 +748,21 @@ In a nutshell, this is what you can do with PyMuPDF: pair: morph; insert_text pair: overlay; insert_text pair: render_mode; insert_text + pair: miter_limit; insert_text pair: rotate; insert_text pair: stroke_opacity; insert_text pair: fill_opacity; insert_text pair: oc; insert_text - .. method:: insert_text(point, text, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, border_width=1, encoding=TEXT_ENCODING_LATIN, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, overlay=True, oc=0) + .. method:: insert_text(point, text, *, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, miter_limit=1, border_width=0.05, encoding=TEXT_ENCODING_LATIN, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, overlay=True, oc=0) + + PDF only: Insert text lines starting at :data:`point_like` ``point``. See :meth:`Shape.insert_text`. + + |history_begin| * Changed in v1.18.4 - PDF only: Insert text starting at :data:`point_like` *point*. See :meth:`Shape.insert_text`. + |history_end| .. index:: pair: align; insert_textbox @@ -570,16 +777,100 @@ In a nutshell, this is what you can do with PyMuPDF: pair: morph; insert_textbox pair: overlay; insert_textbox pair: render_mode; insert_textbox + pair: miter_limit; insert_textbox pair: rotate; insert_textbox pair: stroke_opacity; insert_textbox pair: fill_opacity; insert_textbox pair: oc; insert_textbox - .. method:: insert_textbox(rect, buffer, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, border_width=1, encoding=TEXT_ENCODING_LATIN, expandtabs=8, align=TEXT_ALIGN_LEFT, charwidths=None, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0, overlay=True) + .. method:: insert_textbox(rect, buffer, *, fontsize=11, fontname="helv", fontfile=None, idx=0, color=None, fill=None, render_mode=0, miter_limit=1, border_width=1, encoding=TEXT_ENCODING_LATIN, expandtabs=8, align=TEXT_ALIGN_LEFT, charwidths=None, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0, overlay=True) + + PDF only: Insert text into the specified :data:`rect_like` *rect*. See :meth:`Shape.insert_textbox`. + + |history_begin| * Changed in v1.18.4 - PDF only: Insert text into the specified :data:`rect_like` *rect*. See :meth:`Shape.insert_textbox`. + |history_end| + + .. index:: + pair: rect; insert_htmlbox + pair: text; insert_htmlbox + pair: css; insert_htmlbox + pair: adjust; insert_htmlbox + pair: archive; insert_htmlbox + pair: overlay; insert_htmlbox + pair: rotate; insert_htmlbox + pair: oc; insert_htmlbox + pair: opacity; insert_htmlbox + pair: morph; insert_htmlbox + + .. method:: insert_htmlbox(rect, text, *, css=None, scale_low=0, archive=None, rotate=0, oc=0, opacity=1, overlay=True) + + **PDF only:** Insert text into the specified rectangle. The method has similarities with methods :meth:`Page.insert_textbox` and :meth:`TextWriter.fill_textbox`, but is **much more powerful**. This is achieved by letting a :ref:`Story` object do all the required processing. + + * Parameter ``text`` may be a string as in the other methods. But it will be **interpreted as HTML source** and may therefore also contain HTML language elements -- including styling. The `css` parameter may be used to pass in additional styling instructions. + + * Automatic line breaks are generated at word boundaries. The "soft hyphen" character `"­"` (or `­`) can be used to cause hyphenation and thus may also cause line breaks. **Forced** line breaks however are only achievable via the HTML tag ``
`` - ``\n`` is ignored and will be treated like a space. + + * With this method the following can be achieved: + + - Styling effects like bold, italic, text color, text alignment, font size or font switching. + - The text may include arbitrary languages -- **including right-to-left** languages. + - Scripts like `Devanagari `_ and several others in Asia have a highly complex system of ligatures, where two or more unicodes together yield one glyph. The Story uses the software package `HarfBuzz `_ , to deal with these things and produce correct output. + - One can also **include images** via HTML tag `` -- the Story will take care of the appropriate layout. This is an alternative option to insert images, compared to :meth:`Page.insert_image`. + - HTML tables (tag `
`) may be included in the text and will be handled appropriately. + - Links are automatically generated when present. + + * If content does not fit in the rectangle, the developer has two choices: + + - **either** only be informed about this (and accept a no-op, just like with the other textbox insertion methods), + - **or** (`scale_low=0` - the default) scale down the content until it fits. + + :arg rect_like rect: rectangle on page to receive the text. + :arg str,Story text: the text to be written. Can contain a mixture of plain text and HTML tags with styling instructions. Alternatively, a :ref:`Story` object may be specified (in which case the internal Story generation step will be omitted). A Story must have been generated with all required styling and Archive information. + :arg str css: optional string containing additional CSS instructions. This parameter is ignored if ``text`` is a Story. + :arg float scale_low: if necessary, scale down the content until it fits in the target rectangle. This sets the down scaling limit. Default is 0, no limit. A value of 1 means no down-scaling permitted. A value of e.g. 0.2 means maximum down-scaling by 80%. + :arg Archive archive: an Archive object that points to locations where to find images or non-standard fonts. If ``text`` refers to images or non-standard fonts, this parameter is required. This parameter is ignored if ``text`` is a Story. + :arg int rotate: one of the values 0, 90, 180, 270. Depending on this, text will be filled: + + - 0: top-left to bottom-right. + - 90: bottom-left to top-right. + - 180: bottom-right to top-left. + - 270: top-right to bottom-left. + + .. image:: images/img-rotate.* + + :arg int oc: the xref of an :data:`OCG` / :data:`OCMD` or 0. Please refer to :meth:`Page.show_pdf_page` for details. + :arg float opacity: set the fill and stroke opacity of the content. Only values `0 <= opacity < 1` are considered. + :arg bool overlay: put the text in front of other content. Please refer to :meth:`Page.show_pdf_page` for details. + + :returns: A tuple of floats `(spare_height, scale)`. + + - spare_height: The (positive) height of the remaining space in `rect` below the + text, or -1 if we failed to fit. + - scale: The scaling required; `0 < scale <= 1`. Will be `scale_low` + if we failed to fit. + + Please refer to examples in this section of the recipes: :ref:`RecipesText_I_c`. + + |history_begin| + + * New in v1.26.5: + + * do additional scaling to fit long words. + * + If we succeeded and scaled down, the returned `spare_height` is now + generally positive instead of being fixed to zero, because the final + rect's height is usually not an exact multiple of the font line + height. + * New in v1.23.8: rebased-only. + * New in v1.23.9: `opacity` parameter. + + |history_end| + + + **Drawing Methods** .. index:: pair: closePath; draw_line @@ -596,11 +887,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_line pair: oc; draw_line - .. method:: draw_line(p1, p2, color=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_line(p1, p2, color=(0,), width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw a line from *p1* to *p2* (:data:`point_like` \s). See :meth:`Shape.draw_line`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw a line from *p1* to *p2* (:data:`point_like` \s). See :meth:`Shape.draw_line`. + |history_end| .. index:: pair: breadth; draw_zigzag @@ -617,11 +912,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_zigzag pair: oc; draw_zigzag - .. method:: draw_zigzag(p1, p2, breadth=2, color=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_zigzag(p1, p2, breadth=2, color=(0,), width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw a zigzag line from *p1* to *p2* (:data:`point_like` \s). See :meth:`Shape.draw_zigzag`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw a zigzag line from *p1* to *p2* (:data:`point_like` \s). See :meth:`Shape.draw_zigzag`. + |history_end| .. index:: pair: breadth; draw_squiggle @@ -638,11 +937,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_squiggle pair: oc; draw_squiggle - .. method:: draw_squiggle(p1, p2, breadth=2, color=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_squiggle(p1, p2, breadth=2, color=(0,), width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw a squiggly (wavy, undulated) line from *p1* to *p2* (:data:`point_like` \s). See :meth:`Shape.draw_squiggle`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw a squiggly (wavy, undulated) line from *p1* to *p2* (:data:`point_like` \s). See :meth:`Shape.draw_squiggle`. + |history_end| .. index:: pair: closePath; draw_circle @@ -658,11 +961,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_circle pair: oc; draw_circle - .. method:: draw_circle(center, radius, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_circle(center, radius, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw a circle around *center* (:data:`point_like`) with a radius of *radius*. See :meth:`Shape.draw_circle`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw a circle around *center* (:data:`point_like`) with a radius of *radius*. See :meth:`Shape.draw_circle`. + |history_end| .. index:: pair: closePath; draw_oval @@ -678,11 +985,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_oval pair: oc; draw_oval - .. method:: draw_oval(quad, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_oval(quad, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw an oval (ellipse) within the given :data:`rect_like` or :data:`quad_like`. See :meth:`Shape.draw_oval`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw an oval (ellipse) within the given :data:`rect_like` or :data:`quad_like`. See :meth:`Shape.draw_oval`. + |history_end| .. index:: pair: closePath; draw_sector @@ -699,11 +1010,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_sector pair: oc; draw_sector - .. method:: draw_sector(center, point, angle, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, fullSector=True, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_sector(center, point, angle, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, fullSector=True, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw a circular sector, optionally connecting the arc to the circle's center (like a piece of pie). See :meth:`Shape.draw_sector`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw a circular sector, optionally connecting the arc to the circle's center (like a piece of pie). See :meth:`Shape.draw_sector`. + |history_end| .. index:: pair: closePath; draw_polyline @@ -719,11 +1034,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_polyline pair: oc; draw_polyline - .. method:: draw_polyline(points, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_polyline(points, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw several connected lines defined by a sequence of :data:`point_like` \s. See :meth:`Shape.draw_polyline`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw several connected lines defined by a sequence of :data:`point_like` \s. See :meth:`Shape.draw_polyline`. + |history_end| .. index:: @@ -740,11 +1059,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_bezier pair: oc; draw_bezier - .. method:: draw_bezier(p1, p2, p3, p4, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_bezier(p1, p2, p3, p4, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw a cubic Bézier curve from *p1* to *p4* with the control points *p2* and *p3* (all are :data:`point_like` \s). See :meth:`Shape.draw_bezier`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw a cubic Bézier curve from *p1* to *p4* with the control points *p2* and *p3* (all are :data:`point_like` \s). See :meth:`Shape.draw_bezier`. + |history_end| .. index:: pair: closePath; draw_curve @@ -760,11 +1083,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_curve pair: oc; draw_curve - .. method:: draw_curve(p1, p2, p3, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_curve(p1, p2, p3, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, closePath=False, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: This is a special case of *draw_bezier()*. See :meth:`Shape.draw_curve`. + + |history_begin| * Changed in v1.18.4 - PDF only: This is a special case of *draw_bezier()*. See :meth:`Shape.draw_curve`. + |history_end| .. index:: pair: closePath; draw_rect @@ -781,12 +1108,16 @@ In a nutshell, this is what you can do with PyMuPDF: pair: radius; draw_rect pair: oc; draw_rect - .. method:: draw_rect(rect, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, radius=None, oc=0) + .. method:: draw_rect(rect, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, radius=None, oc=0) + + PDF only: Draw a rectangle. See :meth:`Shape.draw_rect`. + + |history_begin| * Changed in v1.18.4 * Changed in v1.22.0: Added parameter *radius*. - PDF only: Draw a rectangle. See :meth:`Shape.draw_rect`. + |history_end| .. index:: pair: closePath; draw_quad @@ -802,11 +1133,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: fill_opacity; draw_quad pair: oc; draw_quad - .. method:: draw_quad(quad, color=None, fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: draw_quad(quad, color=(0,), fill=None, width=1, dashes=None, lineCap=0, lineJoin=0, overlay=True, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + + PDF only: Draw a quadrilateral. See :meth:`Shape.draw_quad`. + + |history_begin| * Changed in v1.18.4 - PDF only: Draw a quadrilateral. See :meth:`Shape.draw_quad`. + |history_end| .. index:: @@ -822,9 +1157,9 @@ In a nutshell, this is what you can do with PyMuPDF: :arg str fontname: The name by which this font shall be referenced when outputting text on this page. In general, you have a "free" choice here (but consult the :ref:`AdobeManual`, page 16, section 7.3.5 for a formal description of building legal PDF names). However, if it matches one of the :data:`Base14_Fonts` or one of the CJK fonts, *fontfile* and *fontbuffer* **are ignored**. - In other words, you cannot insert a font via *fontfile* / *fontbuffer* and also give it a reserved *fontname*. + In other words, you cannot insert a font via *fontfile* / *fontbuffer* and also give it a reserved *fontname*. - .. note:: A reserved fontname can be specified in any mixture of upper or lower case and still match the right built-in font definition: fontnames "helv", "Helv", "HELV", "Helvetica", etc. all lead to the same font definition "Helvetica". But from a :ref:`Page` perspective, these are **different references**. You can exploit this fact when using different *encoding* variants (Latin, Greek, Cyrillic) of the same font on a page. + .. note:: A reserved fontname can be specified in any mixture of upper or lower case and still match the right built-in font definition: fontnames "helv", "Helv", "HELV", "Helvetica", etc. all lead to the same font definition "Helvetica". But from a :ref:`Page` perspective, these are **different references**. You can exploit this fact when using different *encoding* variants (Latin, Greek, Cyrillic) of the same font on a page. :arg str fontfile: a path to a font file. If used, *fontname* must be **different from all reserved names**. @@ -886,52 +1221,57 @@ In a nutshell, this is what you can do with PyMuPDF: pair: oc; insert_image pair: xref; insert_image - .. method:: insert_image(rect, filename=None, pixmap=None, stream=None, mask=None, rotate=0, alpha=-1, oc=0, xref=0, keep_proportion=True, overlay=True) + .. method:: insert_image(rect, *, alpha=-1, filename=None, height=0, keep_proportion=True, mask=None, oc=0, overlay=True, pixmap=None, rotate=0, stream=None, width=0, xref=0) - PDF only: Put an image inside the given rectangle. The image may already exist in the PDF or be taken from a pixmap, a file, or a memory area. - - * Changed in v1.14.1: By default, the image keeps its aspect ratio. - * Changed in v1.14.13: The image is now always placed **centered** in the rectangle, i.e. the centers of image and rectangle are equal. - * Changed in v1.17.6: Insertion rectangle no longer needs to have a non-empty intersection with the page's :attr:`Page.cropbox` [#f5]_. - * Changed in v1.18.13: Allow providing the image as the xref of an existing one. + PDF only: Put an image inside the given rectangle. The image may already + exist in the PDF or be taken from a pixmap, a file, or a memory area. :arg rect_like rect: where to put the image. Must be finite and not empty. - :arg str filename: name of an image file (all formats supported by MuPDF -- see :ref:`ImageFiles`). - :arg bytes,bytearray,io.BytesIO stream: image in memory (all formats supported by MuPDF -- see :ref:`ImageFiles`). - - Changed in v1.14.13: *io.BytesIO* is now also supported. - + :arg int alpha: deprecated and ignored. + :arg str filename: + name of an image file (all formats supported by MuPDF -- see + :ref:`ImageFiles`). + :arg int height: + :arg bool keep_proportion: + maintain the aspect ratio of the image. + :arg bytes,bytearray,io.BytesIO mask: + image in memory -- to be used as image mask (alpha values) for the base + image. When specified, the base image must be provided as a filename or + a stream -- and must not be an image that already has a mask. + :arg int oc: + (:data:`xref`) make image visibility dependent on this :data:`OCG` + or :data:`OCMD`. Ignored after the first of multiple insertions. The + property is stored with the generated PDF image object and therefore + controls the image's visibility throughout the PDF. + :arg overlay: see :ref:`CommonParms`. :arg pixmap: a pixmap containing the image. - :type pixmap: :ref:`Pixmap` - - :arg bytes,bytearray,io.BytesIO mask: *(new in version v1.18.1)* image in memory -- to be used as image mask (alpha values) for the base image. When specified, the base image must be provided as a filename or a stream -- and must not be an image that already has a mask. - - :arg int xref: *(New in v1.18.13)* the :data:`xref` of an image already present in the PDF. If given, parameters `filename`, `pixmap`, `stream`, `alpha` and `mask` are ignored. The page will simply receive a reference to the existing image. - - :arg int alpha: *(Changed in v1.19.3)* deprecated. No longer needed -- ignored when given. - - :arg int rotate: *(new in version v1.14.11)* rotate the image. + :arg int rotate: rotate the image. Must be an integer multiple of 90 degrees. Positive values rotate anti-clockwise. If you need a rotation by an arbitrary angle, consider converting the image to a PDF (:meth:`Document.convert_to_pdf`) first and then use :meth:`Page.show_pdf_page` instead. + :arg bytes,bytearray,io.BytesIO stream: + image in memory (all formats supported by MuPDF -- see :ref:`ImageFiles`). + :arg int width: + :arg int xref: + the :data:`xref` of an image already present in the PDF. If given, + parameters `filename`, `pixmap`, `stream`, `alpha` and `mask` are + ignored. The page will simply receive a reference to the existing + image. - :arg int oc: *(new in v1.18.3)* (:data:`xref`) make image visibility dependent on this :data:`OCG` or :data:`OCMD`. Ignored after the first of multiple insertions. The property is stored with the generated PDF image object and therefore controls the image's visibility throughout the PDF. - :arg bool keep_proportion: *(new in version v1.14.11)* maintain the aspect ratio of the image. - - For a description of *overlay* see :ref:`CommonParms`. - - *Changed in v1.18.13:* Return xref of stored image. - - :rtype: int - :returns: The xref of the embedded image. This can be used as the `xref` argument for very significant performance boosts, if the image is inserted again. + :type pixmap: :ref:`Pixmap` + + :returns: + The `xref` of the embedded image. This can be used as the `xref` + argument for very significant performance boosts, if the image is + inserted again. This example puts the same image on every page of a document:: - >>> doc = fitz.open(...) - >>> rect = fitz.Rect(0, 0, 50, 50) # put thumbnail in upper left corner + >>> doc = pymupdf.open(...) + >>> rect = pymupdf.Rect(0, 0, 50, 50) # put thumbnail in upper left corner >>> img = open("some.jpg", "rb").read() # an image file >>> img_xref = 0 # first execution embeds the image >>> for page in doc: @@ -942,17 +1282,67 @@ In a nutshell, this is what you can do with PyMuPDF: .. note:: - 1. The method detects multiple insertions of the same image (like in above example) and will store its data only on the first execution. This is even true (although less performant), if using the default `xref=0`. - - 2. The method cannot detect if the same image had already been part of the file before opening it. - - 3. You can use this method to provide a background or foreground image for the page, like a copyright or a watermark. Please remember, that watermarks require a transparent image if put in foreground ... - - 4. The image may be inserted uncompressed, e.g. if a *Pixmap* is used or if the image has an alpha channel. Therefore, consider using *deflate=True* when saving the file. In addition, there exist effective ways to control the image size -- even if transparency comes into play. Have a look at `this `_ section of the documentation. - - 5. The image is stored in the PDF in its original quality. This may be much better than what you ever need for your display. Consider **decreasing the image size** before insertion -- e.g. by using the pixmap option and then shrinking it or scaling it down (see :ref:`Pixmap` chapter). The PIL method *Image.thumbnail()* can also be used for that purpose. The file size savings can be very significant. + 1. + The method detects multiple insertions of the same image (like + in the above example) and will store its data only on the first + execution. This is even true (although less performant), if using + the default `xref=0`. + 2. + The method cannot detect if the same image had already been part of + the file before opening it. + + 3. + You can use this method to provide a background or foreground image + for the page, like a copyright or a watermark. Please remember, that + watermarks require a transparent image if put in foreground ... + + 4. + The image may be inserted uncompressed, e.g. if a `Pixmap` is used + or if the image has an alpha channel. Therefore, consider using + `deflate=True` when saving the file. In addition, there are ways to + control the image size -- even if transparency comes into play. Have + a look at :ref:`RecipesImages_O`. + + 5. + The image is stored in the PDF at its original quality level. This + may be much better than what you need for your display. Consider + **decreasing the image size** before insertion -- e.g. by using + the pixmap option and then shrinking it or scaling it down (see + :ref:`Pixmap` chapter). The PIL method `Image.thumbnail()` can + also be used for that purpose. The file size savings can be very + significant. + + 6. + Another efficient way to display the same image on multiple + pages is another method: :meth:`show_pdf_page`. Consult + :meth:`Document.convert_to_pdf` for how to obtain intermediary PDFs + usable for that method. + + |history_begin| + + * Changed in v1.14.1: By default, the image keeps its aspect ratio. + * Changed in v1.14.11: Added args `keep_proportion`, `rotate`. + * Changed in v1.14.13: + + * + The image is now always placed **centered** in the rectangle, i.e. + the centers of image and rectangle are equal. + * Added support for `stream` as `io.BytesIO`. + + * Changed in v1.17.6: + Insertion rectangle no longer needs to have a non-empty intersection + with the page's :attr:`Page.cropbox` [#f5]_. + * Changed in v1.18.1: Added `mask` arg. + * Changed in v1.18.3: Added `oc` arg. + * Changed in v1.18.13: + + * Allow providing the image as the xref of an existing one. + * Added `xref` arg. + * Return `xref` of stored image. + + * Changed in v1.19.3: deprecate and ignore `alpha` arg. - 6. Another efficient way to display the same image on multiple pages is another method: :meth:`show_pdf_page`. Consult :meth:`Document.convert_to_pdf` for how to obtain intermediary PDFs usable for that method. Demo script `fitz-logo.py `_ implements a fairly complete approach. + |history_end| .. index:: @@ -963,8 +1353,6 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: replace_image(xref, filename=None, pixmap=None, stream=None) - * New in v1.21.0 - Replace the image at xref with another one. :arg int xref: the :data:`xref` of the image. @@ -977,6 +1365,12 @@ In a nutshell, this is what you can do with PyMuPDF: This is a **global replacement:** the new image will also be shown wherever the old one has been displayed throughout the file. This method mainly exists for technical purposes. Typical uses include replacing large images by smaller versions, like a lower resolution, graylevel instead of colored, etc., or changing transparency. + + |history_begin| + + * New in v1.21.0 + + |history_end| .. index:: @@ -984,8 +1378,6 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: delete_image(xref) - * New in v1.21.0 - Delete the image at xref. This is slightly misleading: actually the image is being replaced with a small transparent :ref:`Pixmap` using above :meth:`Page.replace_image`. The visible effect however is equivalent. :arg int xref: the :data:`xref` of the image. @@ -998,6 +1390,12 @@ In a nutshell, this is what you can do with PyMuPDF: `(45, 47, 1, 1, 8, 'DeviceGray', '', 'Im1', 'FlateDecode')` and also seem to "cover" the same boundary box on the page. + |history_begin| + + * New in v1.21.0 + + |history_end| + .. index:: pair: blocks; Page.get_text @@ -1013,37 +1411,34 @@ In a nutshell, this is what you can do with PyMuPDF: pair: xml; Page.get_text pair: textpage; Page.get_text pair: sort; Page.get_text + pair: delimiters; Page.get_text - .. method:: get_text(opt,*, clip=None, flags=None, textpage=None, sort=False) + .. method:: get_text(option,*, clip=None, flags=None, textpage=None, sort=False, delimiters=None) - * Changed in v1.19.0: added `textpage` parameter - * Changed in v1.19.1: added `sort` parameter - * Changed in v1.19.6: added new constants for defining default flags per method. - - Retrieves the content of a page in a variety of formats. This is a wrapper for :ref:`TextPage` methods by choosing the output option as follows: + Retrieves the content of a page in a variety of formats. Depending on the ``flags`` value, this may include text, images and several other object types. The method is a wrapper for multiple :ref:`TextPage` methods by choosing the output option `opt` as follows: - * "text" -- :meth:`TextPage.extractTEXT`, default - * "blocks" -- :meth:`TextPage.extractBLOCKS` - * "words" -- :meth:`TextPage.extractWORDS` - * "html" -- :meth:`TextPage.extractHTML` - * "xhtml" -- :meth:`TextPage.extractXHTML` - * "xml" -- :meth:`TextPage.extractXML` - * "dict" -- :meth:`TextPage.extractDICT` - * "json" -- :meth:`TextPage.extractJSON` - * "rawdict" -- :meth:`TextPage.extractRAWDICT` - * "rawjson" -- :meth:`TextPage.extractRAWJSON` + * "text" -- :meth:`TextPage.extractTEXT`, default. Always includes **text only.** + * "blocks" -- :meth:`TextPage.extractBLOCKS`. Includes text and **may** include image meta information. + * "words" -- :meth:`TextPage.extractWORDS`. Always includes **text only.** + * "html" -- :meth:`TextPage.extractHTML`. May include text and images. + * "xhtml" -- :meth:`TextPage.extractXHTML`. May include text and images. + * "xml" -- :meth:`TextPage.extractXML`. Always includes **text only.** + * "dict" -- :meth:`TextPage.extractDICT`. May include text and images. + * "json" -- :meth:`TextPage.extractJSON`. May include text and images. + * "rawdict" -- :meth:`TextPage.extractRAWDICT`. May include text and images. + * "rawjson" -- :meth:`TextPage.extractRAWJSON`. May include text and images. - :arg str opt: A string indicating the requested format, one of the above. A mixture of upper and lower case is supported. + :arg str opt: A string indicating the requested format, one of the above. A mixture of upper and lower case is supported. If misspelled, option "text" is silently assumed. - Changed in v1.16.3 Values "words" and "blocks" are now also accepted. + :arg rect-like clip: restrict the extraction to this rectangle. If ``None`` (default), the visible part of the page is taken. Any content (text, images) that is **not fully contained** in ``clip`` will be completely omitted. To avoid clipping altogether use ``clip=pymupdf.INFINITE_RECT()``. Only then the extraction will contain all items. This parameter has **no effect** on options "html", "xhtml" and "xml". - :arg rect-like clip: *(new in v1.17.7)* restrict extracted text to this rectangle. If None, the full page is taken. Has **no effect** for options "html", "xhtml" and "xml". + :arg int flags: indicator bits to control whether to include images or how text should be handled with respect to white spaces and :data:`ligatures`. See :ref:`TextPreserve` for available indicators and :ref:`text_extraction_flags` for default settings. (New in v1.16.2) - :arg int flags: *(new in v1.16.2)* indicator bits to control whether to include images or how text should be handled with respect to white spaces and :data:`ligatures`. See :ref:`TextPreserve` for available indicators and :ref:`text_extraction_flags` for default settings. + :arg textpage: use a previously created :ref:`TextPage`. This reduces execution time **very significantly:** by more than 50% and up to 95%, depending on the extraction option. If specified, the 'flags' and 'clip' arguments are ignored, because they are textpage-only properties. If omitted, a new, temporary textpage will be created. - :arg textpage: (new in v1.19.0) use a previously created :ref:`TextPage`. This reduces execution time **very significantly:** by more than 50% and up to 95%, depending on the extraction option. If specified, the 'flags' and 'clip' arguments are ignored, because they are textpage-only properties. If omitted, a new, temporary textpage will be created. + :arg bool sort: sort the output by vertical, then horizontal coordinates. In many cases, this should suffice to generate a "natural" reading order. Has no effect on (X)HTML and XML. For options "blocks", "dict", "json", "rawdict", "rawjson", sorting happens by coordinates `(y1, x0)` of the respective block bbox. For options "words" and "text", the text lines are completely re-synthesized to follow the reading sequence and appearance in the document -- which even establishes the original layout to some extent. - :arg bool sort: (new in v1.19.1) sort the output by vertical, then horizontal coordinates. In many cases, this should suffice to generate a "natural" reading order. Has no effect on (X)HTML and XML. Output option **"words"** sorts by `(y1, x0)` of the words' bboxes. Similar is true for "blocks", "dict", "json", "rawdict", "rawjson": they all are sorted by `(y1, x0)` of the resp. block bbox. If specified for "text", then internally "blocks" is used. + :arg str delimiters: use these characters as *additional* word separators with the "words" output option (ignored otherwise). By default, all white spaces (including non-breaking space `0xA0`) indicate start and end of a word. Now you can specify more characters causing this. For instance, the default will return `"john.doe@outlook.com"` as **one** word. If you specify `delimiters="@."` then the **four** words `"john"`, `"doe"`, `"outlook"`, `"com"` will be returned. Other possible uses include ignoring punctuation characters `delimiters=string.punctuation`. The "word" strings will not contain any delimiting character. (New in v1.23.5) :rtype: *str, list, dict* :returns: The page's content as a string, a list or a dictionary. Refer to the corresponding :ref:`TextPage` method for details. @@ -1051,7 +1446,17 @@ In a nutshell, this is what you can do with PyMuPDF: .. note:: 1. You can use this method as a **document conversion tool** from :ref:`any supported document type` to one of TEXT, HTML, XHTML or XML documents. - 2. The inclusion of text via the *clip* parameter is decided on a by-character level: **(changed in v1.18.2)** a character becomes part of the output, if its bbox is contained in *clip*. This **deviates** from the algorithm used in redaction annotations: a character will be **removed if its bbox intersects** any redaction annotation. + 2. The inclusion of text via the *clip* parameter is decided on a by-character level: a character becomes part of the output, if its bbox is contained in `clip`. This **deviates** from the algorithm used in redaction annotations: a character will be **removed if its bbox intersects** any redaction annotation. + + |history_begin| + + * Changed in v1.19.0: added `textpage` parameter + * Changed in v1.19.1: added `sort` parameter + * Changed in v1.19.6: added new constants for defining default flags per method. + * Changed in v1.23.5: added `delimiters` parameter + * Changed in v1.24.11: changed the effect of `sort_True` for "text" and "words" to closely follow natural reading sequence. + + |history_end| .. index:: pair: rect; get_textbox @@ -1059,21 +1464,25 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: get_textbox(rect, textpage=None) - * New in v1.17.7 - * Changed in v1.19.0: add `textpage` parameter - Retrieve the text contained in a rectangle. :arg rect-like rect: rect-like. :arg textpage: a :ref:`TextPage` to use. If omitted, a new, temporary textpage will be created. - :returns: a string with interspersed linebreaks where necessary. Changed in v1.19.0: It is based on dedicated code. A tyical use is checking the result of :meth:`Page.search_for`: + :returns: a string with interspersed linebreaks where necessary. It is based on dedicated code (changed in v1.19.0). A typical use is checking the result of :meth:`Page.search_for`: >>> rl = page.search_for("currency:") >>> page.get_textbox(rl[0]) 'Currency:' >>> + |history_begin| + + * New in v1.17.7 + * Changed in v1.19.0: add `textpage` parameter + + |history_end| + .. index:: pair: flags; get_textpage @@ -1081,17 +1490,21 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: get_textpage(clip=None, flags=3) - * New in v1.16.5 - * Changed in v1.17.7: introduced `clip` parameter. - Create a :ref:`TextPage` for the page. - :arg in flags: indicator bits controlling the content available for subsequent text extractions and searches -- see the parameter of :meth:`Page.get_text`. + :arg int flags: indicator bits controlling the content available for subsequent text extractions and searches -- see the parameter of :meth:`Page.get_text`. - :arg rect-like clip: *(new in v1.17.7)* restrict extracted text to this area. + :arg rect-like clip: restrict extracted text to this area. (New in v1.17.7) :returns: :ref:`TextPage` + |history_begin| + + * New in v1.16.5 + * Changed in v1.17.7: introduced `clip` parameter. + + |history_end| + .. index:: pair: flags; get_textpage_ocr @@ -1102,12 +1515,11 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: get_textpage_ocr(flags=3, language="eng", dpi=72, full=False, tessdata=None) - * New in v.1.19.0 - * Changed in v1.19.1: support full and partial OCRing a page. + **Optical Character Recognition** (**OCR**) technology can be used to extract text data for documents where text is in a raster image format throughout the page. Use this method to **OCR** a page for text extraction. - Create a :ref:`TextPage` for the page that includes OCRed text. MuPDF will invoke Tesseract-OCR if this method is used. Otherwise this is a normal :ref:`TextPage` object. + This method returns a :ref:`TextPage` for the page that includes OCRed text. MuPDF will invoke Tesseract-OCR if this method is used. Otherwise this is a normal :ref:`TextPage` object. - :arg in flags: indicator bits controlling the content available for subsequent test extractions and searches -- see the parameter of :meth:`Page.get_text`. + :arg int flags: indicator bits controlling the content available for subsequent test extractions and searches -- see the parameter of :meth:`Page.get_text`. :arg str language: the expected language(s). Use "+"-separated values if multiple languages are expected, "eng+spa" for English and Spanish. :arg int dpi: the desired resolution in dots per inch. Influences recognition quality (and execution time). :arg bool full: whether to OCR the full page, or just the displayed images. @@ -1125,18 +1537,17 @@ In a nutshell, this is what you can do with PyMuPDF: **OCRed text is only available** to PyMuPDF's text extractions and searches if their `textpage` parameter specifies the output of this method. - `This `_ Jupyter notebook walks through an example for using OCR textpages. + `This Jupyter notebook `_ walks through an example for using OCR textpages. + |history_begin| - .. method:: get_drawings(extended=False) + * New in v.1.19.0 + * Changed in v1.19.1: support full and partial OCRing a page. - * New in v1.18.0 - * Changed in v1.18.17 - * Changed in v1.19.0: add "seqno" key, remove "clippings" key - * Changed in v1.19.1: "color" / "fill" keys now always are either are RGB tuples or `None`. This resolves issues caused by exotic colorspaces. - * Changed in v1.19.2: add an indicator for the *"orientation"* of the area covered by an "re" item. - * Changed in v1.22.0: add new key `"layer"` which contains the name of the Optional Content Group of the path (or `None`). - * Changed in v1.22.0: add parameter `extended` to also return clipping and group paths. + |history_end| + + + .. method:: get_drawings(extended=False) Return the vector graphics of the page. These are instructions which draw lines, rectangles, quadruples or curves, including properties like colors, transparency, line width and dashing, etc. Alternative terms are "line art" and "drawings". @@ -1144,50 +1555,50 @@ In a nutshell, this is what you can do with PyMuPDF: The path dictionary for fill, stroke and fill-stroke paths has been designed to be compatible with class :ref:`Shape`. There are the following keys: - ============== ============================================================================ - Key Value - ============== ============================================================================ - closePath Same as the parameter in :ref:`Shape`. - color Stroke color (see :ref:`Shape`). - dashes Dashed line specification (see :ref:`Shape`). - even_odd Fill colors of area overlaps -- same as the parameter in :ref:`Shape`. - fill Fill color (see :ref:`Shape`). - items List of draw commands: lines, rectangles, quads or curves. - lineCap Number 3-tuple, use its max value on output with :ref:`Shape`. - lineJoin Same as the parameter in :ref:`Shape`. - fill_opacity (new in v1.18.17) fill color transparency (see :ref:`Shape`). - stroke_opacity (new in v1.18.17) stroke color transparency (see :ref:`Shape`). - rect Page area covered by this path. Information only. - layer (new in v1.22.0) name of applicable Optional Content Group - level (new in v1.22.0) the hierarchy level if `extended=True` - seqno (new in v1.19.0) command number when building page appearance - type (new in v1.18.17) type of this path. - width Stroke line width (see :ref:`Shape`). - ============== ============================================================================ - - * *(Changed in v1.18.17)* Key `"opacity"` has been replaced by the new keys `"fill_opacity"` and `"stroke_opacity"`. This is now compatible with the corresponding parameters of :meth:`Shape.finish`. + ============== ============================================================================ + Key Value + ============== ============================================================================ + closePath Same as the parameter in :ref:`Shape`. + color Stroke color (see :ref:`Shape`). + dashes Dashed line specification (see :ref:`Shape`). + even_odd Fill colors of area overlaps -- same as the parameter in :ref:`Shape`. + fill Fill color (see :ref:`Shape`). + items List of draw commands: lines, rectangles, quads or curves. + lineCap Number 3-tuple, use its max value on output with :ref:`Shape`. + lineJoin Same as the parameter in :ref:`Shape`. + fill_opacity fill color transparency (see :ref:`Shape`). (New in v1.18.17) + stroke_opacity stroke color transparency (see :ref:`Shape`). (New in v1.18.17) + rect Page area covered by this path. Information only. + layer name of applicable Optional Content Group. (New in v1.22.0) + level the hierarchy level if `extended=True`. (New in v1.22.0) + seqno command number when building page appearance. (New in v1.19.0) + type type of this path. (New in v1.18.17) + width Stroke line width. (see :ref:`Shape`). + ============== ============================================================================ + + Key `"opacity"` has been replaced by the new keys `"fill_opacity"` and `"stroke_opacity"`. This is now compatible with the corresponding parameters of :meth:`Shape.finish`. (Changed in v1.18.17) For paths other than groups or clips, key `"type"` takes one of the following values: - * **"f"** -- this is a *fill-only* path. Only key-values relevant for this operation have a meaning, not applicable ones are present with a value of *None*: `"color"`, `"lineCap"`, `"lineJoin"`, `"width"`, `"closePath"`, `"dashes"` and should be ignored. - * **"s"** -- this is a *stroke-only* path. Similar to previous, key `"fill"` is present with value *None*. + * **"f"** -- this is a *fill-only* path. Only key-values relevant for this operation have a meaning, not applicable ones are present with a value of ``None``: `"color"`, `"lineCap"`, `"lineJoin"`, `"width"`, `"closePath"`, `"dashes"` and should be ignored. + * **"s"** -- this is a *stroke-only* path. Similar to previous, key `"fill"` is present with value ``None``. * **"fs"** -- this is a path performing combined *fill* and *stroke* operations. Each item in `path["items"]` is one of the following: * `("l", p1, p2)` - a line from p1 to p2 (:ref:`Point` objects). * `("c", p1, p2, p3, p4)` - cubic Bézier curve **from p1 to p4** (p2 and p3 are the control points). All objects are of type :ref:`Point`. - * `("re", rect, orientation)` - a :ref:`Rect`. *Changed in v1.18.17:* Multiple rectangles within the same path are now detected. *Changed in v1.19.2:* added integer `orientation` which is 1 resp. -1 indicating whether the enclosed area is rotated left (1 = anti-clockwise), or resp. right [#f7]_. - * `("qu", quad)` - a :ref:`Quad`. *New in v1.18.17, changed in v1.19.2:* 3 or 4 consecutive lines are detected to actually represent a :ref:`Quad`. + * `("re", rect, orientation)` - a :ref:`Rect`. Multiple rectangles within the same path are now detected (changed in v1.18.17). Integer `orientation` is 1 resp. -1 indicating whether the enclosed area is rotated left (1 = anti-clockwise), or resp. right [#f7]_ (changed in v1.19.2). + * `("qu", quad)` - a :ref:`Quad`. 3 or 4 consecutive lines are detected to actually represent a :ref:`Quad` (changed in v1.19.2:). (New in v1.18.17) - .. note:: Starting with v1.19.2, quads and rectangles are more reliably recognized as such. + .. note::, quads and rectangles are more reliably recognized as such. (Starting with v1.19.2) - Using class :ref:`Shape`, you should be able to recreate the original drawings on a separate (PDF) page with high fidelity under normal, not too sophisticated circumstances. Please see the following comments on restrictions. A coding draft can be found in section "Extractings Drawings" of chapter :ref:`FAQ`. + Using class :ref:`Shape`, you should be able to recreate the original drawings on a separate (PDF) page with high fidelity under normal, not too sophisticated circumstances. Please see the following comments on restrictions. A coding draft can be found in :ref:`How to Extract Drawings `. - **New in v1.22.0:** Specifying `extended=True` significantly alters the output. Most importantly, new dictionary types are present: "clip" and "group". All paths will now be organized in a hierarchic structure which is encoded by the new integer key "level", the hierarchy level. Each group or clip establishes a new hierarchy, which applies to all subsequent paths having a *larger* level value. + Specifying `extended=True` significantly alters the output. Most importantly, new dictionary types are present: "clip" and "group". All paths will now be organized in a hierarchic structure which is encoded by the new integer key "level", the hierarchy level. Each group or clip establishes a new hierarchy, which applies to all subsequent paths having a *larger* level value. (New in v1.22.0) - Any path with a smaller level value than its predecessor will end the scope of (at least) the preceeding hierarchy level. A "clip" path with the same level as the preceding clip will end the scope of that clip. Same is true for groups. This is best explained by an example:: + Any path with a smaller level value than its predecessor will end the scope of (at least) the preceding hierarchy level. A "clip" path with the same level as the preceding clip will end the scope of that clip. Same is true for groups. This is best explained by an example:: +------+------+--------+------+--------+ | line | lvl0 | lvl1 | lvl2 | lvl3 | @@ -1209,75 +1620,68 @@ In a nutshell, this is what you can do with PyMuPDF: * **"clip"** dictionary. Its values (most importantly "scissor") remain valid / apply as long as following dictionaries have a **larger "level"** value. - ============== ============================================================================ - Key Value - ============== ============================================================================ - closePath Same as in "stroke" or "fill" dictionaries - even_odd Same as in "stroke" or "fill" dictionaries - items Same as in "stroke" or "fill" dictionaries - rect Same as in "stroke" or "fill" dictionaries - layer Same as in "stroke" or "fill" dictionaries - level Same as in "stroke" or "fill" dictionaries - scissor the clip rectangle - type "clip" - ============== ============================================================================ - - * "group" dictionary. Its values remain valid (apply) as long as following dictionaries have a **larger "level"** value. Any dictionary with an equal or lower level end this group. + ============== ============================================================================ + Key Value + ============== ============================================================================ + closePath Same as in "stroke" or "fill" dictionaries + even_odd Same as in "stroke" or "fill" dictionaries + items Same as in "stroke" or "fill" dictionaries + rect Same as in "stroke" or "fill" dictionaries + layer Same as in "stroke" or "fill" dictionaries + level Same as in "stroke" or "fill" dictionaries + scissor the clip rectangle + type "clip" + ============== ============================================================================ + + * "group" dictionary. Its values remain valid (apply) as long as following dictionaries have a **larger "level"** value. Any dictionary with an equal or lower level end this group. + + ============== ============================================================================ + Key Value + ============== ============================================================================ + rect Same as in "stroke" or "fill" dictionaries + layer Same as in "stroke" or "fill" dictionaries + level Same as in "stroke" or "fill" dictionaries + isolated (bool) Whether this group is isolated + knockout (bool) Whether this is a "Knockout Group" + blendmode Name of the BlendMode, default is "Normal" + opacity Float value in range [0, 1]. + type "group" + ============== ============================================================================ - ============== ============================================================================ - Key Value - ============== ============================================================================ - rect Same as in "stroke" or "fill" dictionaries - layer Same as in "stroke" or "fill" dictionaries - level Same as in "stroke" or "fill" dictionaries - isolated (bool) Whether this group is isolated - knockout (bool) Whether this is a "Knockout Group" - blendmode Name of the BlendMode, default is "Normal" - opacity Float value in range [0, 1]. - type "group" - ============== ============================================================================ - - - - * *(Changed in v1.18.17)* Key `"opacity"` has been replaced by the new keys `"fill_opacity"` and `"stroke_opacity"`. This is now compatible with the corresponding parameters of :meth:`Shape.finish`. - - - Key `"type"` takes one of the following values: - - * **"f"** -- this is a *fill-only* path. Only key-values relevant for this operation have a meaning, irrelevant ones have been added with default values for backward compatibility: `"color"`, `"lineCap"`, `"lineJoin"`, `"width"`, `"closePath"`, `"dashes"` and should be ignored. - * **"s"** -- this is a *stroke-only* path. Similar to previous, key `"fill"` is present with value `None`. - * **"fs"** -- this is a path performing combined *fill* and *stroke* operations. - - Each item in `path["items"]` is one of the following: + .. note:: The method is based on the output of :meth:`Page.get_cdrawings` -- which is much faster, but requires somewhat more attention processing its output. - * `("l", p1, p2)` - a line from p1 to p2 (:ref:`Point` objects). - * `("c", p1, p2, p3, p4)` - cubic Bézier curve **from p1 to p4** (p2 and p3 are the control points). All objects are of type :ref:`Point`. - * `("re", rect, orientation)` - a :ref:`Rect`. *Changed in v1.18.17:* Multiple rectangles within the same path are now detected. *Changed in v1.19.2:* added integer `orientation` which is 1 resp. -1 indicating whether the enclosed area is rotated left (1 = anti-clockwise), or resp. right [#f7]_. - * `("qu", quad)` - a :ref:`Quad`. *New in v1.18.17, changed in v1.19.2:* 3 or 4 consecutive lines are detected to actually represent a :ref:`Quad`. + |history_begin| + + * New in v1.18.0 + * Changed in v1.18.17 + * Changed in v1.19.0: add "seqno" key, remove "clippings" key + * Changed in v1.19.1: "color" / "fill" keys now always are either are RGB tuples or `None`. This resolves issues caused by exotic colorspaces. + * Changed in v1.19.2: add an indicator for the *"orientation"* of the area covered by an "re" item. + * Changed in v1.22.0: add new key `"layer"` which contains the name of the Optional Content Group of the path (or `None`). + * Changed in v1.22.0: add parameter `extended` to also return clipping and group paths. + + |history_end| - .. note:: Starting with v1.19.2, quads and rectangles are more reliably recognized as such. - Using class :ref:`Shape`, you should be able to recreate the original drawings on a separate (PDF) page with high fidelity under normal, not too sophisticated circumstances. Please see the following comments on restrictions. A coding draft can be found in section "How to Extract Drawings" of chapter :ref:`FAQ`. + .. method:: get_cdrawings(extended=False) - .. note:: The method is based on the output of :meth:`Page.get_cdrawings` -- which is much faster, but requires somewhat more attention processing its output. + Extract the vector graphics on the page. Apart from following technical differences, functionally equivalent to :meth:`Page.get_drawings`, but much faster: + * Every path type only contains the relevant keys, e.g. a stroke path has no `"fill"` color key. See comment in method :meth:`Page.get_drawings`. + * Coordinates are given as :data:`point_like`, :data:`rect_like` and :data:`quad_like` **tuples** -- not as :ref:`Point`, :ref:`Rect`, :ref:`Quad` objects. + If performance is a concern, consider using this method: Compared to versions earlier than 1.18.17, you should see much shorter response times. We have seen pages that required 2 seconds then, now only need 200 ms with this method. - .. method:: get_cdrawings(extended=False) + |history_begin| * New in v1.18.17 * Changed in v1.19.0: removed "clippings" key, added "seqno" key. * Changed in v1.19.1: always generate RGB color tuples. * Changed in v1.22.0: added new key `"layer"` which contains the name of the Optional Content Group of the path (or `None`). - * Changed in v1.22.0 added parameter `extended` to also return clipping paths. - - Extract the vector graphics on the page. Apart from following technical differences, functionally equivalent to :meth:`Page.get_drawings`, but much faster: - - * Every path type only contains the relevant keys, e.g. a stroke path has no `"fill"` color key. See comment in method :meth:`Page.get_drawings`. - * Coordinates are given as :data:`point_like`, :data:`rect_like` and :data:`quad_like` **tuples** -- not as :ref:`Point`, :ref:`Rect`, :ref:`Quad` objects. - - If performance is a concern, consider using this method: Compared to versions earlier than 1.18.17, you should see much shorter response times. We have seen pages that required 2 seconds then, now only need 200 ms with this method. + * Changed in v1.22.0: added parameter `extended` to also return clipping paths. + + |history_end| .. method:: get_fonts(full=False) @@ -1296,38 +1700,45 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: get_image_info(hashes=False, xrefs=False) - * *New in v1.18.11* - * *Changed in v1.18.13:* added image MD5 hashcode computation and :data:`xref` search. + Return a list of meta information dictionaries for all images displayed by the page. This works for all document types. - Return a list of meta information dictionaries for all images shown on the page. This works for all document types. Technically, this is a subset of the dictionary output of :meth:`Page.get_text`: the image binary content and any text on the page are ignored. + :arg bool hashes: Compute the MD5 hashcode for each encountered image, which allows identifying image duplicates. This adds the key `"digest"` to the output, whose value is a 16 byte `bytes` object. (New in v1.18.13) - :arg bool hashes: *New in v1.18.13:* Compute the MD5 hashcode for each encountered image, which allows identifying image duplicates. This adds the key `"digest"` to the output, whose value is a 16 byte `bytes` object. - - :arg bool xrefs: *New in v1.18.13:* **PDF only.** Try to find the :data:`xref` for each image. Implies `hashes=True`. Adds the `"xref"` key to the dictionary. If not found, the value is 0, which means, the image is either "inline" or otherwise undetectable. Please note that this option has an extended response time, because the MD5 hashcode will be computed at least two times for each image with an xref. + :arg bool xrefs: **PDF only.** Try to find the :data:`xref` for each image. Implies `hashes=True`. Adds the `"xref"` key to the dictionary. If not found, the value is 0, which means, the image is either "inline" or its xref is undetectable for some reason. Please note that this option has an extended response time, because the MD5 hashcode will be computed at least two times for each image with an xref. (New in v1.18.13) :rtype: list[dict] - :returns: A list of dictionaries. This includes information for **exactly those** images, that are shown on the page -- including *"inline images"*. In contrast to images included in :meth:`Page.get_text`, image **binary content** is not loaded, which drastically reduces memory usage. The dictionary layout is similar to that of image blocks in `page.get_text("dict")`. + :returns: A list of dictionaries. This includes information for **exactly those** images, that are shown on the page -- including *"inline images"*. The dictionary layout is similar to that of image blocks in `page.get_text("dict")`. + + In contrast to images included in :meth:`Page.get_text`, image **binary content** is not loaded by this method, which drastically reduces memory usage. Another difference is that image detection is not restricted to the visible part of the page or any ``clip`` parameter: method :meth:`Page.get_text` will only extract images **fully contained** in the provided ``clip``. =============== =============================================================== **Key** **Value** =============== =============================================================== - number block number *(int)* + number block number (``int``) bbox image bbox on page, :data:`rect_like` - width original image width *(int)* - height original image height *(int)* - cs-name colorspace name *(str)* - colorspace colorspace.n *(int)* - xres resolution in x-direction *(int)* - yres resolution in y-direction *(int)* - bpc bits per component *(int)* - size storage occupied by image *(int)* - digest MD5 hashcode *(bytes)*, if *hashes* is true + width original image width (``int``) + height original image height (``int``) + cs-name colorspace name (``str``) + colorspace colorspace.n (``int``) + xres resolution in x-direction (``int``) [#f10]_ + yres resolution in y-direction (``int``) [#f10]_ + bpc bits per component (``int``) + size storage occupied by image (``int``) + digest MD5 hashcode (``bytes``), if ``hashes`` is true xref image :data:`xref` or 0, if *xrefs* is true transform matrix transforming image rect to bbox, :data:`matrix_like` + has-mask whether the image is transparent and has a mask (``bool``) =============== =============================================================== Multiple occurrences of the same image are always reported. You can detect duplicates by comparing their `digest` values. + |history_begin| + + * New in v1.18.11 + * Changed in v1.18.13: added image MD5 hashcode computation and :data:`xref` search. + + |history_end| + .. method:: get_xobjects() @@ -1339,8 +1750,6 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: get_image_rects(item, transform=False) - *New in v1.18.13* - PDF only: Return boundary boxes and transformation matrices of an embedded image. This is an improved version of :meth:`Page.get_image_bbox` with the following differences: * There is no restriction on **how** the image is invoked (by the page or one of its Form XObjects). The result is always complete and correct. @@ -1353,42 +1762,56 @@ In a nutshell, this is what you can do with PyMuPDF: :rtype: list :returns: Boundary boxes and respective transformation matrices for each image occurrence on the page. If the item is not on the page, an empty list `[]` is returned. + |history_begin| + + New in v1.18.13 + + |history_end| + .. index:: pair: transform; get_image_bbox .. method:: get_image_bbox(item, transform=False) - * Changed in v1.18.11: return image transformation matrix - PDF only: Return boundary box and transformation matrix of an embedded image. :arg list,str item: an item of the list :meth:`Page.get_images` with *full=True* specified, or the reference **name** entry of such an item, which is item[-3] (or item[7] respectively). - :arg bool transform: *(new in v1.18.11)* also return the matrix used to transform the image rectangle to the bbox on the page. Default is just the bbox. If true, then a tuple `(bbox, matrix)` is returned. + :arg bool transform: return the matrix used to transform the image rectangle to the bbox on the page (new in v1.18.11). Default is just the bbox. If true, then a tuple `(bbox, matrix)` is returned. :rtype: :ref:`Rect` or (:ref:`Rect`, :ref:`Matrix`) :returns: the boundary box of the image -- optionally also its transformation matrix. - * *(Changed in v1.16.7)* -- If the page in fact does not display this image, an infinite rectangle is returned now. In previous versions, an exception was raised. Formally invalid parameters still raise exceptions. - * *(Changed in v1.17.0)* -- Only images referenced directly by the page are considered. This means that images occurring in embedded PDF pages are ignored and an exception is raised. - * *(Changed in v1.18.5)* -- Removed the restriction introduced in v1.17.0: any item of the page's image list may be specified. - * *(Changed in v1.18.11)* -- Partially re-instated a restriction: only those images are considered, that are either directly referenced by the page or by a Form XObject directly referenced by the page. - * *(Changed in v1.18.11)* -- Optionally also return the transformation matrix together with the bbox as the tuple `(bbox, transform)`. + |history_begin| + + * (Changed in v1.16.7): If the page in fact does not display this image, an infinite rectangle is returned now. In previous versions, an exception was raised. Formally invalid parameters still raise exceptions. + * (Changed in v1.17.0): Only images referenced directly by the page are considered. This means that images occurring in embedded PDF pages are ignored and an exception is raised. + * (Changed in v1.18.5): Removed the restriction introduced in v1.17.0: any item of the page's image list may be specified. + * (Changed in v1.18.11): Partially re-instated a restriction: only those images are considered, that are either directly referenced by the page or by a Form XObject directly referenced by the page. + * (Changed in v1.18.11): Optionally also return the transformation matrix together with the bbox as the tuple `(bbox, transform)`. + + |history_end| .. note:: 1. Be aware that :meth:`Page.get_images` may contain "dead" entries i.e. images, which the page **does not display**. This is no error, but intended by the PDF creator. No exception will be raised in this case, but an infinite rectangle is returned. You can avoid this from happening by executing :meth:`Page.clean_contents` before this method. - 2. The image's "transformation matrix" is defined as the matrix, for which the expression `bbox / transform == fitz.Rect(0, 0, 1, 1)` is true, lookup details here: :ref:`ImageTransformation`. + 2. The image's "transformation matrix" is defined as the matrix, for which the expression `bbox / transform == pymupdf.Rect(0, 0, 1, 1)` is true, lookup details here: :ref:`ImageTransformation`. + + |history_begin| + + * Changed in v1.18.11: return image transformation matrix + + |history_end| .. index:: pair: matrix; get_svg_image - .. method:: get_svg_image(matrix=fitz.Identity, text_as_path=True) + .. method:: get_svg_image(matrix=pymupdf.Identity, text_as_path=True) - Create an SVG image from the page. Only full page images are currently supported. + Create an SVG image from the page. Only full page images are currently supported. :arg matrix_like matrix: a matrix, default is :ref:`Identity`. - :arg bool text_as_path: *(new in v1.17.5)* -- controls how text is represented. *True* outputs each character as a series of elementary draw commands, which leads to a more precise text display in browsers, but a **very much larger** output for text-oriented pages. Display quality for *False* relies on the presence of the referenced fonts on the current system. For missing fonts, the internet browser will fall back to some default -- leading to unpleasant appearances. Choose *False* if you want to parse the text of the SVG. + :arg bool text_as_path: -- controls how text is represented. ``True`` outputs each character as a series of elementary draw commands, which leads to a more precise text display in browsers, but a **very much larger** output for text-oriented pages. Display quality for ``False`` relies on the presence of the referenced fonts on the current system. For missing fonts, the internet browser will fall back to some default -- leading to unpleasant appearances. Choose ``False`` if you want to parse the text of the SVG. (New in v1.17.5) :returns: a UTF-8 encoded string that contains the image. Because SVG has XML syntax it can be saved in a text file, the standard extension is `.svg`. @@ -1402,23 +1825,23 @@ In a nutshell, this is what you can do with PyMuPDF: pair: matrix; get_pixmap pair: dpi; get_pixmap - .. method:: get_pixmap(*, matrix=fitz.Identity, dpi=None, colorspace=fitz.csRGB, clip=None, alpha=False, annots=True) - - * Changed in v1.19.2: added support of parameter dpi. + .. method:: get_pixmap(*, matrix=pymupdf.Identity, dpi=None, colorspace=pymupdf.csRGB, clip=None, alpha=False, annots=True) Create a pixmap from the page. This is probably the most often used method to create a :ref:`Pixmap`. All parameters are *keyword-only.* :arg matrix_like matrix: default is :ref:`Identity`. - :arg int dpi: (new in v1.19.2) desired resolution in x and y direction. If not `None`, the `"matrix"` parameter is ignored. + :arg int dpi: desired resolution in x and y direction. If not `None`, the `"matrix"` parameter is ignored. (New in v1.19.2) :arg colorspace: The desired colorspace, one of "GRAY", "RGB" or "CMYK" (case insensitive). Or specify a :ref:`Colorspace`, ie. one of the predefined ones: :data:`csGRAY`, :data:`csRGB` or :data:`csCMYK`. :type colorspace: str or :ref:`Colorspace` :arg irect_like clip: restrict rendering to the intersection of this area with the page's rectangle. - :arg bool alpha: whether to add an alpha channel. Always accept the default *False* if you do not really need transparency. This will save a lot of memory (25% in case of RGB ... and pixmaps are typically **large**!), and also processing time. Also note an **important difference** in how the image will be rendered: with *True* the pixmap's samples area will be pre-cleared with *0x00*. This results in **transparent** areas where the page is empty. With *False* the pixmap's samples will be pre-cleared with *0xff*. This results in **white** where the page has nothing to show. + :arg bool alpha: whether to add an alpha channel. Always accept the default ``False`` if you do not really need transparency. This will save a lot of memory (25% in case of RGB ... and pixmaps are typically **large**!), and also processing time. Also note an **important difference** in how the image will be rendered: with ``True`` the pixmap's samples area will be pre-cleared with *0x00*. This results in **transparent** areas where the page is empty. With ``False`` the pixmap's samples will be pre-cleared with *0xff*. This results in **white** where the page has nothing to show. - Changed in v1.14.17 - The default alpha value is now *False*. + |history_begin| + + Changed in v1.14.17 + The default alpha value is now ``False``. * Generated with *alpha=True* @@ -1429,16 +1852,21 @@ In a nutshell, this is what you can do with PyMuPDF: .. image:: images/img-alpha-0.* + |history_end| + :arg bool annots: *(new in version 1.16.0)* whether to also render annotations or to suppress them. You can create pixmaps for annotations separately. :rtype: :ref:`Pixmap` :returns: Pixmap of the page. For fine-controlling the generated image, the by far most important parameter is **matrix**. E.g. you can increase or decrease the image resolution by using **Matrix(xzoom, yzoom)**. If zoom > 1, you will get a higher resolution: zoom=2 will double the number of pixels in that direction and thus generate a 2 times larger image. Non-positive values will flip horizontally, resp. vertically. Similarly, matrices also let you rotate or shear, and you can combine effects via e.g. matrix multiplication. See the :ref:`Matrix` section to learn more. .. note:: - The method will respect any page rotation and will not exceed the intersection of `clip` and :attr:`Page.cropbox`. If you need the page's mediabox (and if this is a different rectangle), you can use a snippet like the following to achieve this:: - In [1]: import fitz - In [2]: doc=fitz.open("demo1.pdf") + * The pixmap will have *"premultiplied"* pixels if `alpha=True`. To learn about some background, e.g. look for "Premultiplied alpha" `here `_. + + * The method will respect any page rotation and will not exceed the intersection of `clip` and :attr:`Page.cropbox`. If you need the page's mediabox (and if this is a different rectangle), you can use a snippet like the following to achieve this:: + + In [1]: import pymupdf + In [2]: doc=pymupdf.open("demo1.pdf") In [3]: page=doc[0] In [4]: rotation = page.rotation In [5]: cropbox = page.cropbox @@ -1451,59 +1879,81 @@ In a nutshell, this is what you can do with PyMuPDF: ...: In [11]: + |history_begin| + * Changed in v1.19.2: added support of parameter dpi. - .. method:: annot_names() + |history_end| - * New in v1.16.10 + + + .. method:: annot_names() PDF only: return a list of the names of annotations, widgets and links. Technically, these are the */NM* values of every PDF object found in the page's */Annots* array. :rtype: list + |history_begin| - .. method:: annot_xrefs() + * New in v1.16.10 - * New in v1.17.1 + |history_end| - PDF only: return a list of the :data`xref` numbers of annotations, widgets and links -- technically of all entries found in the page's */Annots* array. + + .. method:: annot_xrefs() + + PDF only: return a list of the :data:`xref` numbers of annotations, widgets and links -- technically of all entries found in the page's */Annots* array. :rtype: list :returns: a list of items *(xref, type)* where type is the annotation type. Use the type to tell apart links, fields and annotations, see :ref:`AnnotationTypes`. - - .. method:: load_annot(ident) + |history_begin| * New in v1.17.1 + |history_end| + + + .. method:: load_annot(ident) + PDF only: return the annotation identified by *ident*. This may be its unique name (PDF `/NM` key), or its :data:`xref`. :arg str,int ident: the annotation name or xref. :rtype: :ref:`Annot` - :returns: the annotation or *None*. + :returns: the annotation or ``None``. .. note:: Methods :meth:`Page.annot_names`, :meth:`Page.annot_xrefs` provide lists of names or xrefs, respectively, from where an item may be picked and loaded via this method. - .. method:: load_widget(xref) + |history_begin| - * New in v1.19.6 + * New in v1.17.1 - PDF only: return the field identified by *xref*. + |history_end| + + .. method:: load_widget(xref) + + PDF only: return the field identified by :data:`xref`. :arg int xref: the field's xref. :rtype: :ref:`Widget` - :returns: the field or *None*. + :returns: the field or ``None``. .. note:: This is similar to the analogous method :meth:`Page.load_annot` -- except that here only the xref is supported as identifier. + |history_begin| + + * New in v1.19.6 + + |history_end| + .. method:: load_links() Return the first link on a page. Synonym of property :attr:`first_link`. :rtype: :ref:`Link` - :returns: first link on the page (or *None*). + :returns: first link on the page (or ``None``). .. index:: pair: rotate; set_rotation @@ -1514,6 +1964,34 @@ In a nutshell, this is what you can do with PyMuPDF: :arg int rotate: An integer specifying the required rotation in degrees. Must be an integer multiple of 90. Values will be converted to one of 0, 90, 180, 270. + .. method:: recolor(components=1) + + PDF only: Change the colorspace components of all objects on page. + + :arg int components: The desired count of color components. Must be one of 1, 3 or 4, which results in color spaces DeviceGray, DeviceRGB or DeviceCMYK respectively. The method affects text, images and vector graphics. For instance, with the default value 1, a page will be converted to grayscale. If a page is already grayscale, the method will not cause visible changes -- independent of the value of ``components``. + + These changes are **permanent** and cannot be reverted. + + .. method:: clip_to_rect(rect) + + PDF only: Permanently remove page content outside the given rectangle. This is similar to :meth:`Page.set_cropbox`, but the page's rectangle will not be changed, only the content outside the rectangle will be removed. + + :arg rect_like rect: The rectangle to clip to. Must be finite and its intersection with the page must not be empty. + + The method works best for text: All text on the page will be removed (decided by single character) that has no intersection with the rectangle. For vector graphics, the method will remove all paths that have no intersection with the rectangle. For images, the method will remove all images that have no intersection with the rectangle. Vectors and images **having** an intersection with the rectangle, will be kept in their entirety. + + The method roughly has the same effect as if four redactions had been applied that cover the rectangle's outside. + + * New in v1.26.4. + + .. method:: remove_rotation() + + PDF only: Set page rotation to 0 while maintaining appearance and page content. + + :returns: The inverted matrix used to achieve this change. If the page was not rotated (rotation 0), :ref:`Identity` is returned. The method automatically recomputes the rectangles of any annotations, links and widgets present on the page. + + This method may come in handy when e.g. used with :meth:`Page.show_pdf_page`. + .. index:: pair: clip; show_pdf_page pair: keep_proportion; show_pdf_page @@ -1522,14 +2000,11 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: show_pdf_page(rect, docsrc, pno=0, keep_proportion=True, overlay=True, oc=0, rotate=0, clip=None) - * Changed in v1.14.11: Parameter *reuse_xref* has been deprecated. Position the source rectangle centered in target rectangle. Any rotation angle is now supported. - * Changed in v1.18.3: New parameter `oc`. - - PDF only: Display a page of another PDF as a **vector image** (otherwise similar to :meth:`Page.insert_image`). This is a multi-purpose method. For example, you can use it to + PDF only: Display a page of another PDF. This is similar to :meth:`Page.insert_image` but the source page will appear like a copy of itself and will not be rasterized. This is a multi-purpose method. For example, you can use it to: * create "n-up" versions of existing PDF files, combining several input pages into **one output page** (see example `combine.py `_), * create "posterized" PDF files, i.e. every input page is split up in parts which each create a separate output page (see `posterize.py `_), - * include PDF-based vector images like company logos, watermarks, etc., see `svg-logo.py `_, which puts an SVG-based logo on each page (requires additional packages to deal with SVG-to-PDF conversions). + * include PDF-based vector images like company logos, watermarks, etc., see `svg-logo.py `_, which puts an SVG-based logo on each page. :arg rect_like rect: where to place the image on current page. Must be finite and its intersection with the page must not be empty. :arg docsrc: source PDF document containing the page. Must be a different document object, but may be the same file. @@ -1541,25 +2016,25 @@ In a nutshell, this is what you can do with PyMuPDF: :arg bool overlay: put image in foreground (default) or background. - :arg int oc: *(new in v1.18.3)* (:data:`xref`) make visibility dependent on this :data:`OCG` / :data:`OCMD` (which must be defined in the target PDF) [#f9]_. - :arg float rotate: *(new in v1.14.10)* show the source rectangle rotated by some angle. *Changed in v1.14.11:* Any angle is now supported. + :arg int oc: (:data:`xref`) make visibility dependent on this :data:`OCG` / :data:`OCMD` (which must be defined in the target PDF) [#f9]_. (New in v1.18.3) + :arg float rotate: show the source rectangle rotated by some angle. Any angle is supported (changed in v1.14.11). (New in v1.14.10) :arg rect_like clip: choose which part of the source page to show. Default is the full page, else must be finite and its intersection with the source page must not be empty. - .. note:: In contrast to method :meth:`Document.insert_pdf`, this method does not copy annotations, widgets or links, so these are not included in the target [#f6]_. But all its **other resources (text, images, fonts, etc.)** will be imported into the current PDF. They will therefore appear in text extractions and in :meth:`get_fonts` and :meth:`get_images` lists -- even if they are not contained in the visible area given by *clip*. + .. note:: In contrast to method :meth:`Document.insert_pdf`, this method does not copy annotations, widgets or links, so these objects are not included in the target [#f6]_. But all its **other resources (text, images, fonts, etc.)** will be imported into the current PDF. They will therefore appear in text extractions and in :meth:`get_fonts` and :meth:`get_images` lists -- even if they are not contained in the visible area given by *clip*. Example: Show the same source page, rotated by 90 and by -90 degrees: - >>> doc = fitz.open() # new empty PDF + >>> doc = pymupdf.open() # new empty PDF >>> page=doc.new_page() # new page in A4 format >>> >>> # upper half page - >>> r1 = fitz.Rect(0, 0, page.rect.width, page.rect.height/2) + >>> r1 = pymupdf.Rect(0, 0, page.rect.width, page.rect.height/2) >>> >>> # lower half page >>> r2 = r1 + (0, page.rect.height/2, 0, page.rect.height/2) >>> - >>> src = fitz.open("PyMuPDF.pdf") # show page 0 of this + >>> src = pymupdf.open("PyMuPDF.pdf") # show page 0 of this >>> >>> page.show_pdf_page(r1, src, 0, rotate=90) >>> page.show_pdf_page(r2, src, 0, rotate=-90) @@ -1568,6 +2043,13 @@ In a nutshell, this is what you can do with PyMuPDF: .. image:: images/img-showpdfpage.* :scale: 70 + |history_begin| + + * Changed in v1.14.11: Parameter *reuse_xref* has been deprecated. Position the source rectangle centered in target rectangle. Any rotation angle is now supported. + * Changed in v1.18.3: New parameter `oc`. + + |history_end| + .. method:: new_shape() PDF only: Create a new :ref:`Shape` object for the page. @@ -1582,18 +2064,15 @@ In a nutshell, this is what you can do with PyMuPDF: pair: clip; search_for pair: textpage; search_for - .. method:: search_for(needle, *, clip=clip, quads=False, flags=TEXT_DEHYPHENATE | TEXT_PRESERVE_WHITESPACE | TEXT_PRESERVE_LIGATURES, textpage=None) - - * Changed in v1.18.2: added `clip` parameter. Remove `hit_max` parameter. Add default "dehyphenate". - * Changed in v1.19.0: added `textpage` parameter. + .. method:: search_for(needle, *, clip=None, quads=False, flags=TEXT_DEHYPHENATE | TEXT_PRESERVE_WHITESPACE | TEXT_PRESERVE_LIGATURES | TEXT_MEDIABOX_CLIP, textpage=None) Search for *needle* on a page. Wrapper for :meth:`TextPage.search`. :arg str needle: Text to search for. May contain spaces. Upper / lower case is ignored, but only works for ASCII characters: For example, "COMPÉTENCES" will not be found if needle is "compétences" -- "compÉtences" however will. Similar is true for German umlauts and the like. - :arg rect_like clip: *(New in v1.18.2)* only search within this area. + :arg rect_like clip: only search within this area. (New in v1.18.2) :arg bool quads: Return object type :ref:`Quad` instead of :ref:`Rect`. :arg int flags: Control the data extracted by the underlying :ref:`TextPage`. By default, ligatures and white spaces are kept, and hyphenation [#f8]_ is detected. - :arg textpage: (new in v1.19.0) use a previously created :ref:`TextPage`. This reduces execution time **significantly.** If specified, the 'flags' and 'clip' arguments are ignored. If omitted, a temporary textpage will be created. + :arg textpage: use a previously created :ref:`TextPage`. This reduces execution time **significantly.** If specified, the 'flags' and 'clip' arguments are ignored. If omitted, a temporary textpage will be created. (New in v1.19.0) :rtype: list @@ -1601,10 +2080,14 @@ In a nutshell, this is what you can do with PyMuPDF: A list of :ref:`Rect` or :ref:`Quad` objects, each of which -- **normally!** -- surrounds one occurrence of *needle*. **However:** if parts of *needle* occur on more than one line, then a separate item is generated for each these parts. So, if `needle = "search string"`, two rectangles may be generated. - **Changes in v1.18.2:** + |history_begin| + + Changes in v1.18.2: + + * There no longer is a limit on the list length (removal of the `hit_max` parameter). + * If a word is **hyphenated** at a line break, it will still be found. E.g. the needle "method" will be found even if hyphenated as "meth-od" at a line break, and two rectangles will be returned: one surrounding "meth" (without the hyphen) and another one surrounding "od". - * There no longer is a limit on the list length (removal of the `hit_max` parameter). - * If a word is **hyphenated** at a line break, it will still be found. E.g. the needle "method" will be found even if hyphenated as "meth-od" at a line break, and two rectangles will be returned: one surrounding "meth" (without the hyphen) and another one surrounding "od". + |history_end| .. note:: The method supports multi-line text marker annotations: you can use the full returned list as **one single** parameter for creating the annotation. @@ -1621,11 +2104,15 @@ In a nutshell, this is what you can do with PyMuPDF: The `matches` list will contain the words matching the given pattern. In the same way you can select `span["text"]` from the output of `page.get_text("dict")`. + |history_begin| - .. method:: set_mediabox(r) + * Changed in v1.18.2: added `clip` parameter. Remove `hit_max` parameter. Add default "dehyphenate". + * Changed in v1.19.0: added `textpage` parameter. - * New in v1.16.13 - * Changed in v1.19.4: remove all other rectangle definitions. + |history_end| + + + .. method:: set_mediabox(r) PDF only: Change the physical page dimension by setting :data:`mediabox` in the page's object definition. @@ -1635,6 +2122,13 @@ In a nutshell, this is what you can do with PyMuPDF: .. caution:: For non-empty pages this may have undesired effects, because the location of all content depends on this value and will therefore change position or even disappear. + |history_begin| + + * New in v1.16.13 + * Changed in v1.19.4: remove all other rectangle definitions. + + |history_end| + .. method:: set_cropbox(r) @@ -1646,20 +2140,20 @@ In a nutshell, this is what you can do with PyMuPDF: >>> page = doc.new_page() >>> page.rect - fitz.Rect(0.0, 0.0, 595.0, 842.0) + pymupdf.Rect(0.0, 0.0, 595.0, 842.0) >>> >>> page.cropbox # cropbox and mediabox still equal - fitz.Rect(0.0, 0.0, 595.0, 842.0) + pymupdf.Rect(0.0, 0.0, 595.0, 842.0) >>> >>> # now set cropbox to a part of the page - >>> page.set_cropbox(fitz.Rect(100, 100, 400, 400)) + >>> page.set_cropbox(pymupdf.Rect(100, 100, 400, 400)) >>> # this will also change the "rect" property: >>> page.rect - fitz.Rect(0.0, 0.0, 300.0, 300.0) + pymupdf.Rect(0.0, 0.0, 300.0, 300.0) >>> >>> # but mediabox remains unaffected >>> page.mediabox - fitz.Rect(0.0, 0.0, 595.0, 842.0) + pymupdf.Rect(0.0, 0.0, 595.0, 842.0) >>> >>> # revert CropBox change >>> # either set it to MediaBox @@ -1673,14 +2167,21 @@ In a nutshell, this is what you can do with PyMuPDF: .. method:: set_trimbox(r) - * New in v1.19.4 - PDF only: Set the resp. rectangle in the page object. For the meaning of these objects see :ref:`AdobeManual`, page 77. Parameter and restrictions are the same as for :meth:`Page.set_cropbox`. + |history_begin| + + * New in v1.19.4 + + |history_end| .. attribute:: rotation - Contains the rotation of the page in degrees (always 0 for non-PDF types). + Contains the rotation of the page in degrees (always 0 for non-PDF types). This is a copy of the value in the PDF file. The PDF documentation says: + + *"The number of degrees by which the page should be rotated clockwise when displayed or printed. The value must be a multiple of 90. Default value: 0."* + + In PyMuPDF, we make sure that this attribute is always one of 0, 90, 180 or 270. :type: int @@ -1737,7 +2238,7 @@ In a nutshell, this is what you can do with PyMuPDF: >>> page.set_rotation(90) # rotate an ISO A4 page >>> page.rect Rect(0.0, 0.0, 842.0, 595.0) - >>> p = fitz.Point(0, 0) # where did top-left point land? + >>> p = pymupdf.Point(0, 0) # where did top-left point land? >>> p * page.rotation_matrix Point(842.0, 0.0) >>> @@ -1746,19 +2247,19 @@ In a nutshell, this is what you can do with PyMuPDF: .. attribute:: first_link - Contains the first :ref:`Link` of a page (or *None*). + Contains the first :ref:`Link` of a page (or ``None``). :type: :ref:`Link` .. attribute:: first_annot - Contains the first :ref:`Annot` of a page (or *None*). + Contains the first :ref:`Annot` of a page (or ``None``). :type: :ref:`Annot` .. attribute:: first_widget - Contains the first :ref:`Widget` of a page (or *None*). + Contains the first :ref:`Widget` of a page (or ``None``). :type: :ref:`Widget` @@ -1789,6 +2290,8 @@ In a nutshell, this is what you can do with PyMuPDF: ----- +.. _link_dict_description: + Description of *get_links()* Entries ---------------------------------------- Each entry of the :meth:`Page.get_links` list is a dictionary with the following keys: @@ -1799,13 +2302,15 @@ Each entry of the :meth:`Page.get_links` list is a dictionary with the following * *page*: a 0-based integer indicating the destination page. Required for *LINK_GOTO* and *LINK_GOTOR*, else ignored. -* *to*: either a *fitz.Point*, specifying the destination location on the provided page, default is *fitz.Point(0, 0)*, or a symbolic (indirect) name. If an indirect name is specified, *page = -1* is required and the name must be defined in the PDF in order for this to work. Required for *LINK_GOTO* and *LINK_GOTOR*, else ignored. +* *to*: either a *pymupdf.Point*, specifying the destination location on the provided page, default is *pymupdf.Point(0, 0)*, or a symbolic (indirect) name. If an indirect name is specified, *page = -1* is required and the name must be defined in the PDF in order for this to work. Required for *LINK_GOTO* and *LINK_GOTOR*, else ignored. * *file*: a string specifying the destination file. Required for *LINK_GOTOR* and *LINK_LAUNCH*, else ignored. * *uri*: a string specifying the destination internet resource. Required for *LINK_URI*, else ignored. You should make sure to start this string with an unambiguous substring, that classifies the subtype of the URL, like `"http://"`, `"https://"`, `"file://"`, `"ftp://"`, `"mailto:"`, etc. Otherwise your browser will try to interpret the text and come to unwanted / unexpected conclusions about the intended URL type. -* *xref*: an integer specifying the PDF :data:`xref` of the link object. Do not change this entry in any way. Required for link deletion and update, otherwise ignored. For non-PDF documents, this entry contains *-1*. It is also *-1* for **all** entries in the *get_links()* list, if **any** of the links is not supported by MuPDF - see the note below. +* :data:`xref`: an integer specifying the PDF :data:`xref` of the link object. Do not change this entry in any way. Required for link deletion and update, otherwise ignored. For non-PDF documents, this entry contains *-1*. It is also *-1* for **all** entries in the *get_links()* list, if **any** of the links is not supported by MuPDF - see :ref:`notes_on_supporting_links`. + +.. _notes_on_supporting_links: Notes on Supporting Links --------------------------- @@ -1835,7 +2340,7 @@ Indirect *LINK_GOTOR* destinations can in general of course not be checked for v 2. Determine the target page number ("pno", 0-based) and a :ref:`Point` on it, where the link should be directed to. -3. Create a dictionary `d = {"kind": fitz.LINK_GOTO, "page": pno, "from": bbox, "to": point}`. +3. Create a dictionary `d = {"kind": pymupdf.LINK_GOTO, "page": pno, "from": bbox, "to": point}`. 4. Execute `page.insert_link(d)`. @@ -1847,20 +2352,171 @@ This is an overview of homologous methods on the :ref:`Document` and on the :ref ====================================== ===================================== **Document Level** **Page Level** ====================================== ===================================== -*Document.get_page_fonts(pno)* :meth:`Page.get_fonts` -*Document.get_page_images(pno)* :meth:`Page.get_images` -*Document.get_page_pixmap(pno, ...)* :meth:`Page.get_pixmap` -*Document.get_page_text(pno, ...)* :meth:`Page.get_text` -*Document.search_page_for(pno, ...)* :meth:`Page.search_for` +:meth:`Document.get_page_fonts` :meth:`Page.get_fonts` +:meth:`Document.get_page_images` :meth:`Page.get_images` +:meth:`Document.get_page_pixmap` :meth:`Page.get_pixmap` +:meth:`Document.get_page_text` :meth:`Page.get_text` +:meth:`Document.search_page_for` :meth:`Page.search_for` ====================================== ===================================== -The page number "pno" is a 0-based integer `-∞ < pno < page_count`. - .. note:: Most document methods (left column) exist for convenience reasons, and are just wrappers for: *Document[pno].*. So they **load and discard the page** on each execution. - However, the first two methods work differently. They only need a page's object definition statement - the page itself will **not** be loaded. So e.g. :meth:`Page.get_fonts` is a wrapper the other way round and defined as follows: *page.get_fonts == page.parent.get_page_fonts(page.number)*. + However, the first two methods work differently. They only need a page's object definition statement - the page itself will **not** be loaded. So e.g. :meth:`Page.get_fonts` is a wrapper the other way round and defined as follows: `page.get_fonts` == `page.parent.get_page_fonts(page.number)`. + + +When calling the :ref:`Document` equivalent methods then the page number is sent through as a parameter, e.g.: + +`Document.get_page_images(pno)` or `Document.get_page_text(pno)` + +.. tip:: + + The page number parameter, ``pno``, is a 0-based integer `-∞ < pno < page_count`. + + + + + +Tables and Related Classes +------------------------------------ + +The `TableFinder` class is returned by :meth:`Page.find_tables` and has related classes as follows: + + +.. class:: TableFinder + + An object always returned by :meth:`Page.find_tables`. Attributes of interest: + + .. attribute:: tables + + A list of :class:`Table` objects, each of which represents a table found on the page. An empty list if no tables are found. + + .. attribute:: page + + A reference to the :ref:`Page` object. + + :type: :ref:`Page` + + +.. class:: Table + + An object representing a table found on the page. + + + .. attribute:: page + + A back-reference to the owning page. + + :type: :ref:`Page` + + .. attribute:: cells + + An array of `Rect` objects for each cell in the table. + + :type: list + + + .. attribute:: header + + A `TableHeader` object. + + :type: `TableHeader` + + + .. attribute:: bbox + + The bounding box of all cells of the table header. + + + :type: :ref:`Rect` + + + + .. attribute:: row_count + + Number of rows in the table. + + :type: int + + + .. attribute:: col_count + + Number of columns in the table. + + :type: int + + + .. attribute:: rows + + An array of `TableRow` objects for each row in the table. + + :type: list + + + .. method:: extract() + + Extracts table cell text data into a list. + + :type: list + + .. method:: to_markdown(clean=False, fill_empty=True) + + Extracts table data into Markdown text format. + + + :arg bool clean: If ``True`` then markdown syntax is removed from cell content. + :arg bool fill_empty: If ``True`` then cell content `None` is replaced by the values above (columns) or left (rows) in an effort to approximate row and columns spans. + + + :type: string + + + .. method:: to_pandas() + + Return a `pandas DataFrame `_ `DataFrame `_ version of the table. + + :type: pandas DataFrame + + + +.. class:: TableHeader + + + Dedicated class for table headers. + + .. attribute:: bbox + + The bounding box of the union of cells belonging to the table header, given as a tuple (x0, y0, x1, y1). This rectangle contains all table header cells. + + :type: :ref:`Rect` + + .. attribute:: cells + + A list of tuples for each bbox of a column header. + + :type: list + + .. attribute:: names + + A list of strings with column header text. + + :type: list + + .. attribute:: external + + A boolean indicating whether the header is outside the table cells. + + :type: `bool` + + +.. class:: TableRow + + Dedicated class for table rows. + + +---- + .. rubric:: Footnotes @@ -1874,7 +2530,7 @@ The page number "pno" is a 0-based integer `-∞ < pno < page_count`. .. [#f5] The previous algorithm caused images to be **shrunk** to this intersection. Now the image can be anywhere on :attr:`Page.mediabox`, potentially being invisible or only partially visible if the cropbox (representing the visible page part) is smaller. -.. [#f6] If you need to also see annotations or fields in the target page, you can try and convert the source PDF to another PDF using :meth:`Document.convert_to_pdf`. The underlying MuPDF function of that method will convert these objects to normal page content. Then use :meth:`Page.show_pdf_page` with the converted PDF page. +.. [#f6] If you need to also see annotations or fields in the target page, you can convert the source PDF using :meth:`Document.bake`. The underlying MuPDF function of that method will convert these objects to normal page content. Then use :meth:`Page.show_pdf_page` with the converted PDF page. .. [#f7] In PDF, an area enclosed by some lines or curves can have a property called "orientation". This is significant for switching on or off the fill color of that area when there exist multiple area overlaps - see discussion in method :meth:`Shape.finish` using the "non-zero winding number" rule. While orientation of curves, quads, triangles and other shapes enclosed by lines always was detectable, this has been impossible for "re" (rectangle) items in the past. Adding the orientation parameter now delivers the missing information. @@ -1882,4 +2538,6 @@ The page number "pno" is a 0-based integer `-∞ < pno < page_count`. .. [#f9] Objects inside the source page, like images, text or drawings, are never aware of whether their owning page now is under OC control inside the target PDF. If source page objects are OC-controlled in the source PDF, then this will not be retained on the target: they will become unconditionally visible. +.. [#f10] This value is always 96, the default of the PDF interpreter. It **does not reflect** the resolution of the image itself. If you need the image's resolution, use the :meth:`Pixmap.xres` and :meth:`Pixmap.yres` attributes of the :ref:`Pixmap` created from the image binary. + .. include:: footer.rst diff --git a/docs/pixmap.rst b/docs/pixmap.rst index dccbbf0e5..679773622 100644 --- a/docs/pixmap.rst +++ b/docs/pixmap.rst @@ -33,8 +33,9 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :meth:`Pixmap.invert_irect` invert the pixels of a given area :meth:`Pixmap.pdfocr_save` save the pixmap as an OCRed 1-page PDF :meth:`Pixmap.pdfocr_tobytes` save the pixmap as an OCRed 1-page PDF -:meth:`Pixmap.pil_save` save as image using pillow -:meth:`Pixmap.pil_tobytes` write to `bytes` object using pillow +:meth:`Pixmap.pil_image` create a Pillow Image +:meth:`Pixmap.pil_save` save as a Pillow Image +:meth:`Pixmap.pil_tobytes` write to `bytes` as a Pillow Image :meth:`Pixmap.pixel` return the value of a pixel :meth:`Pixmap.save` save the pixmap in a variety of formats :meth:`Pixmap.set_alpha` set alpha values @@ -71,7 +72,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". .. class:: Pixmap - .. method:: __init__(self, colorspace, irect, alpha) + .. method:: __init__(self, colorspace, irect, alpha=False) **New empty pixmap:** Create an empty pixmap of size and origin given by the rectangle. So, *irect.top_left* designates the top left corner of the pixmap, and its width and height are *irect.width* resp. *irect.height*. Note that the image area is **not initialized** and will contain crap data -- use eg. :meth:`clear_with` or :meth:`set_rect` to be sure. @@ -80,17 +81,17 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :arg irect_like irect: The pixmap's position and dimension. - :arg bool alpha: Specifies whether transparency bytes should be included. Default is *False*. + :arg bool alpha: Specifies whether transparency bytes should be included. Default is ``False``. .. method:: __init__(self, colorspace, source) - **Copy and set colorspace:** Copy *source* pixmap converting colorspace. Any colorspace combination is possible, but source colorspace must not be *None*. + **Copy and set colorspace:** Copy *source* pixmap converting colorspace. Any colorspace combination is possible, but source colorspace must not be ``None``. - :arg colorspace: desired **target** colorspace. This **may also be** *None*. In this case, a "masking" pixmap is created: its :attr:`Pixmap.samples` will consist of the source's alpha bytes only. + :arg colorspace: desired **target** colorspace. This **may also be** ``None``. In this case, a "masking" pixmap is created: its :attr:`Pixmap.samples` will consist of the source's alpha bytes only. :type colorspace: :ref:`Colorspace` :arg source: the source pixmap. - :type source: *Pixmap* + :type source: :ref:`Pixmap`. .. method:: __init__(self, source, mask) @@ -99,17 +100,17 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". **Copy and add image mask:** Copy *source* pixmap, add an alpha channel with transparency data from a mask pixmap. :arg source: pixmap without alpha channel. - :type source: :ref:`Pixmap` + :type source: :ref:`Pixmap`. :arg mask: a mask pixmap. Must be a graysale pixmap. - :type mask: :ref:`Pixmap` + :type mask: :ref:`Pixmap`. .. method:: __init__(self, source, width, height, [clip]) - **Copy and scale:** Copy *source* pixmap, scaling new width and height values -- the image will appear stretched or shrunk accordingly. Supports partial copying. The source colorspace may be *None*. + **Copy and scale:** Copy *source* pixmap, scaling new width and height values -- the image will appear stretched or shrunk accordingly. Supports partial copying. The source colorspace may be ``None``. :arg source: the source pixmap. - :type source: *Pixmap* + :type source: :ref:`Pixmap`. :arg float width: desired target width. @@ -124,15 +125,15 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". **Copy and add or drop alpha:** Copy *source* and add or drop its alpha channel. Identical copy if *alpha* equals *source.alpha*. If an alpha channel is added, its values will be set to 255. :arg source: source pixmap. - :type source: *Pixmap* + :type source: :ref:`Pixmap`. - :arg bool alpha: whether the target will have an alpha channel, default and mandatory if source colorspace is *None*. + :arg bool alpha: whether the target will have an alpha channel, default and mandatory if source colorspace is ``None``. .. note:: A typical use includes separation of color and transparency bytes in separate pixmaps. Some applications require this like e.g. *wx.Bitmap.FromBufferAndAlpha()* of *wxPython*: >>> # 'pix' is an RGBA pixmap - >>> pixcolors = fitz.Pixmap(pix, 0) # extract the RGB part (drop alpha) - >>> pixalpha = fitz.Pixmap(None, pix) # extract the alpha part + >>> pixcolors = pymupdf.Pixmap(pix, 0) # extract the RGB part (drop alpha) + >>> pixalpha = pymupdf.Pixmap(None, pix) # extract the alpha part >>> bm = wx.Bitmap.FromBufferAndAlpha(pix.width, pix.height, pixcolors.samples, pixalpha.samples) @@ -178,7 +179,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". **From a PDF image:** Create a pixmap from an image **contained in PDF** *doc* identified by its :data:`xref`. All pimap properties are set by the image. Have a look at `extract-img1.py `_ and `extract-img2.py `_ to see how this can be used to recover all of a PDF's images. - :arg doc: an opened **PDF** document. + :arg doc: an opened |PDF| document. :type doc: :ref:`Document` :arg int xref: the :data:`xref` of an image object. For example, you can make a list of images used on a particular page with :meth:`Document.get_page_images`, which also shows the :data:`xref` numbers of each image. @@ -209,13 +210,13 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". .. method:: gamma_with(gamma) - Apply a gamma factor to a pixmap, i.e. lighten or darken it. Pixmaps with colorspace *None* are ignored with a warning. + Apply a gamma factor to a pixmap, i.e. lighten or darken it. Pixmaps with colorspace ``None`` are ignored with a warning. :arg float gamma: *gamma = 1.0* does nothing, *gamma < 1.0* lightens, *gamma > 1.0* darkens the image. .. method:: shrink(n) - Shrink the pixmap by dividing both, its width and height by 2\ :sup:`n`. + Shrink the pixmap by dividing both, its width and height by 2\ :sup:``n``. :arg int n: determines the new pixmap (samples) size. For example, a value of 2 divides width and height by 4 and thus results in a size of one 16\ :sup:`th` of the original. Values less than 1 are ignored with a warning. @@ -247,7 +248,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :arg sequence color: the desired value, given as a sequence of integers in `range(256)`. The length of the sequence must equal :attr:`Pixmap.n`, which includes any alpha byte. :rtype: bool - :returns: *False* if the rectangle was invalid or had an empty intersection with :attr:`Pixmap.irect`, else *True*. + :returns: ``False`` if the rectangle was invalid or had an empty intersection with :attr:`Pixmap.irect`, else ``True``. .. note:: @@ -284,12 +285,12 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :arg bytes,bytearray,BytesIO alphavalues: the new alpha values. If provided, its length must be at least *width * height*. If omitted (`None`), all alpha values are set to 255 (no transparency). *Changed in version 1.14.13:* *io.BytesIO* is now also accepted. :arg bool premultiply: *New in v1.18.13:* whether to premultiply color components with the alpha value. - :arg list,tuple opaque: ignore the alpha value and set this color to fully transparent. A sequence of integers in `range(256)` with a length of :attr:`Pixmap.n`. Default is *None*. For example, a typical choice for RGB would be `opaque=(255, 255, 255)` (white). + :arg list,tuple opaque: ignore the alpha value and set this color to fully transparent. A sequence of integers in `range(256)` with a length of :attr:`Pixmap.n`. Default is ``None``. For example, a typical choice for RGB would be `opaque=(255, 255, 255)` (white). .. method:: invert_irect([irect]) - Invert the color of all pixels in :ref:`IRect` *irect*. Will have no effect if colorspace is *None*. + Invert the color of all pixels in :ref:`IRect` *irect*. Will have no effect if colorspace is ``None``. :arg irect_like irect: The area to be inverted. Omit to invert everything. @@ -360,7 +361,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :arg str,fp filename: identifies the file to save to. May be either a string or a pointer to a file opened with "wb" (includes `io.BytesIO()` objects). :arg bool compress: whether to compress the resulting PDF, default is `True`. :arg str language: the languages occurring in the image. This must be specified in Tesseract format. Default is "eng" for English. Use "+"-separated Tesseract language codes for multiple languages, like "eng+spa" for English and Spanish. - : arg str tessdata: folder name of Tesseract's language support. If omitted, this information must be present as environment variable `TESSDATA_PREFIX`. + :arg str tessdata: folder name of Tesseract's language support. If omitted, this information must be present as environment variable `TESSDATA_PREFIX`. .. note:: **Will fail** if Tesseract is not installed or if the environment variable "TESSDATA_PREFIX" is not set to the `tessdata` folder name and not provided as parameter. @@ -372,25 +373,30 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". Perform text recognition using Tesseract and convert the image to a 1-page PDF with an OCR text layer. Internally invokes :meth:`Pixmap.pdfocr_save`. - :returns: A 1-page PDF file in memory. Could be opened like `doc=fitz.open("pdf", pix.pdfocr_tobytes())`, and text extractions could be performed on its `page=doc[0]`. + :returns: A 1-page PDF file in memory. Could be opened like `doc=pymupdf.open("pdf", pix.pdfocr_tobytes())`, and text extractions could be performed on its `page=doc[0]`. .. note:: Another possible use is insertion into some pdf. The following snippet reads the images of a folder and stores them as pages in a new PDF that contain an OCR text layer:: - doc = fitz.open() + doc = pymupdf.open() for imgfile in os.listdir(folder): - pix = fitz.Pixmap(imgfile) - imgpdf = fitz.open("pdf", pix.pdfocr_tobytes()) + pix = pymupdf.Pixmap(imgfile) + imgpdf = pymupdf.open("pdf", pix.pdfocr_tobytes()) doc.insert_pdf(imgpdf) pix = None imgpdf.close() doc.save("ocr-images.pdf") - .. method:: pil_save(*args, **kwargs) + .. method:: pil_image() - * New in v1.17.3 + Create a Pillow Image from the pixmap. PIL / Pillow must be installed. + + :raises ImportError: if Pillow is not installed. + :returns: a ``PIL.Image`` object + + .. method:: pil_save(*args, unmultiply=False, **kwargs) Write the pixmap as an image file using Pillow. Use this method for output unsupported by MuPDF. Examples are @@ -398,19 +404,24 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". * Storing EXIF information. * If you do not provide dpi information, the values *xres*, *yres* stored with the pixmap are automatically used. - A simple example: `pix.pil_save("some.webp", optimize=True, dpi=(150, 150))`. For details on other parameters see the Pillow documentation. + A simple example: `pix.pil_save("some.webp", optimize=True, dpi=(150, 150))`. + + :arg bool unmultiply: If the pixmap's colorspace is RGB with transparency, the alpha values may or may not already be multiplied into the color components ref/green/blue (called "premultiplied"). To enforce undoing premultiplication, set this parameter to `True`. To learn about some background, e.g. look for `"Premultiplied alpha" on this page `_. - Since v1.22.0, PyMuPDF supports JPEG output directly. For both, performance reasons and for reducing external dependencies, the use of this method is no longer recommended when outputting JPEG images. + + For details on other parameters see the Pillow documentation. + + Since v1.22.0, PyMuPDF supports JPEG output directly. We recommended to no longer use this method for JPEG output -- for performance reasons and for avoiding unnecessary external dependencies. :raises ImportError: if Pillow is not installed. - .. method:: pil_tobytes(*args, **kwargs) + .. method:: pil_tobytes(*args, unmultiply=False, **kwargs) * New in v1.17.3 - Return an image as a bytes object in the specified format using Pillow. For example `stream = pix.pil_tobytes(format="WEBP", optimize=True)`. Also see above. For details on other parameters see the Pillow documentation. + Return an image as a bytes object in the specified format using Pillow. For example `stream = pix.pil_tobytes(format="WEBP", optimize=True, dpi=(150, 150))`. Also see above. For details on other parameters see the Pillow documentation. - .raises ImportError: if Pillow is not installed. + :raises ImportError: if Pillow is not installed. :rtype: bytes @@ -475,7 +486,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". .. attribute:: colorspace - The colorspace of the pixmap. This value may be *None* if the image is to be treated as a so-called *image mask* or *stencil mask* (currently happens for extracted PDF document images only). + The colorspace of the pixmap. This value may be ``None`` if the image is to be treated as a so-called *image mask* or *stencil mask* (currently happens for extracted PDF document images only). :type: :ref:`Colorspace` @@ -541,6 +552,9 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". 367 ns ± 1.75 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each) In [4]: %timeit len(pix.samples) 3.52 ms ± 57.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + After the Pixmap has been destroyed, any attempt to use the memoryview + will fail with ValueError. :type: memoryview @@ -554,6 +568,9 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". img = QtGui.QImage(pix.samples_ptr, pix.width, pix.height, format) # (2) Both of the above lead to the same Qt image, but (2) can be **many hundred times faster**, because it avoids an additional copy of the pixel area. + + Warning: after the Pixmap has been destroyed, the Python pointer will be + invalid and attempting to use it may crash the Python interpreter. :type: int @@ -593,7 +610,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". .. attribute:: n - Number of components per pixel. This number depends on colorspace and alpha. If colorspace is not *None* (stencil masks), then *Pixmap.n - Pixmap.aslpha == pixmap.colorspace.n* is true. If colorspace is *None*, then *n == alpha == 1*. + Number of components per pixel. This number depends on colorspace and alpha. If colorspace is not ``None`` (stencil masks), then *Pixmap.n - Pixmap.alpha == pixmap.colorspace.n* is true. If colorspace is ``None``, then *n == alpha == 1*. :type: int @@ -611,7 +628,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". .. attribute:: interpolate - An information-only boolean flag set to *True* if the image will be drawn using "linear interpolation". If *False* "nearest neighbour sampling" will be used. + An information-only boolean flag set to ``True`` if the image will be drawn using "linear interpolation". If ``False`` "nearest neighbour sampling" will be used. :type: bool @@ -623,7 +640,7 @@ The following file types are supported as **input** to construct pixmaps: **BMP, 1. Directly create a pixmap with *Pixmap(filename)* or *Pixmap(byterray)*. The pixmap will then have properties as determined by the image. -2. Open such files with *fitz.open(...)*. The result will then appear as a document containing one single page. Creating a pixmap of this page offers all the options available in this context: apply a matrix, choose colorspace and alpha, confine the pixmap to a clip area, etc. +2. Open such files with *pymupdf.open(...)*. The result will then appear as a document containing one single page. Creating a pixmap of this page offers all the options available in this context: apply a matrix, choose colorspace and alpha, confine the pixmap to a clip area, etc. **SVG images** are only supported via method 2 above, not directly as pixmaps. But remember: the result of this is a **raster image** as is always the case with pixmaps [#f1]_. @@ -649,7 +666,7 @@ psd gray, rgb, cmyk yes .psd Adobe Photoshop Document .. note:: * Not all image file types are supported (or at least common) on all OS platforms. E.g. PAM and the Portable Anymap formats are rare or even unknown on Windows. - * Especially pertaining to CMYK colorspaces, you can always convert a CMYK pixmap to an RGB pixmap with *rgb_pix = fitz.Pixmap(fitz.csRGB, cmyk_pix)* and then save that in the desired format. + * Especially pertaining to CMYK colorspaces, you can always convert a CMYK pixmap to an RGB pixmap with *rgb_pix = pymupdf.Pixmap(pymupdf.csRGB, cmyk_pix)* and then save that in the desired format. * As can be seen, MuPDF's image support range is different for input and output. Among those supported both ways, PNG and JPEG are probably the most popular. * We also recommend using "ppm" formats as input to tkinter's *PhotoImage* method like this: *tkimg = tkinter.PhotoImage(data=pix.tobytes("ppm"))* (also see the tutorial). This is **very** fast (**60 times** faster than PNG). diff --git a/docs/pymupdf-pro.rst b/docs/pymupdf-pro.rst new file mode 100644 index 000000000..d7219844b --- /dev/null +++ b/docs/pymupdf-pro.rst @@ -0,0 +1,177 @@ + +.. include:: header.rst + + + +.. _pymupdf-pro + +PyMuPDF Pro +============= + + +|PyMuPDF Pro| is a set of *commercial extensions* for |PyMuPDF|. + +Enhance |PyMuPDF| capability with **Office** document support & **RAG/LLM** integrations. + +- Enables Office document handling, including ``doc``, ``docx``, ``hwp``, ``hwpx``, ``ppt``, ``pptx``, ``xls``, ``xlsx``, and others. +- Supports text and table extraction, document conversion and more. +- Includes the commercial version of |PyMuPDF4LLM|. + +To enquire about obtaining a commercial license, then `use this contact page `_. + + +.. note:: + + A licensed version of |PyMuPDF Pro| also gives you a licensed version of |PyMuPDF4LLM|. If you are interested in using the |PyMuPDF4LLM| package you should install it separately. + + +Platform support +-------------------- + +Available for these platforms only: + +- Windows x86_64. +- Linux x86_64 (glibc). +- MacOS x86_64. +- MacOS arm64. + + +Office file support +---------------------- + +In addition to the `standard file types supported by PyMuPDF `, |PyMuPDF Pro| supports: + +.. list-table:: + :header-rows: 1 + + * - **DOC/DOCX** + - **XLS/XLSX** + - **PPT/PPTX** + - **HWP/HWPX** + * - .. image:: images/icons/icon-docx.svg + :width: 40 + :height: 40 + - .. image:: images/icons/icon-xlsx.svg + :width: 40 + :height: 40 + - .. image:: images/icons/icon-pptx.svg + :width: 40 + :height: 40 + - .. image:: images/icons/icon-hangul.svg + :width: 40 + :height: 40 + + + +Usage +-------------- + +Installation +~~~~~~~~~~~~~~~~~~ + +Install via pip with: + +.. code-block:: bash + + pip install pymupdfpro + + +Loading an **Office** document +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Import |PyMuPDF Pro| and you can then reference **Office** documents directly, e.g.: + +.. code-block:: python + + import pymupdf.pro + pymupdf.pro.unlock() + # PyMuPDF has now been extended with PyMuPDF Pro features, with some restrictions. + doc = pymupdf.open("my-office-doc.xls") + +.. note:: + + All standard |PyMuPDF| functionality is exposed as expected - |PyMuPDF Pro| handles the extended **Office** file types + + +From then on you can work with document pages just as you would do normally, but with respect to the `restrictions `. + + +Converting an **Office** document to |PDF| +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following code snippet can convert your **Office** document to |PDF| format: + +.. code-block:: python + + import pymupdf.pro + pymupdf.pro.unlock() + + doc = pymupdf.open("my-office-doc.xlsx") + + pdfdata = doc.convert_to_pdf() + with open('output.pdf', 'wb') as f: + f.write(pdfdata) + + + +.. _PyMuPDFPro_Restrictions: + +Restrictions +~~~~~~~~~~~~~~~~~~~~ + + +|PyMuPDF Pro| functionality is restricted without a license key as follows: + + **Only the first 3 pages of any document will be available.** + +To unlock full functionality you should `obtain a trial key `_. + + +.. _PyMuPDFPro_TrialKeys: + +Trial keys +----------------------- + +To obtain a license key `please fill out the form on this page `_. You will then have the trial key emailled to the address you submitted. + + +Using a key +~~~~~~~~~~~~~~~~ + + +Initialize |PyMuPDF Pro| with a key as follows: + +.. code-block:: python + + import pymupdf.pro + pymupdf.pro.unlock(my_key) + # PyMuPDF has now been extended with PyMuPDF Pro features. + +This will allow you to evaluate the product for a limited time. If you want to use |PyMuPDF Pro| after this time you should then `enquire about obtaining a commercial license `_. + + +Fonts +----------------------- + +By default `pymupdf.pro.unlock()` searches for all installed font directories. + +This can be controlled with keyword-only args: + +* `fontpath`: specific font directories, either as a list/tuple or `os.sep`-separated string. + If None (the default), we use `os.environ['PYMUPDFPRO_FONT_PATH']` if set. +* `fontpath_auto`: Whether to append system font directories. + If None (the default) we use true if `os.environ['PYMUPDFPRO_FONT_PATH_AUTO']` is '1'. + If true we append all system font directories. + +Function `pymupdf.pro.get_fontpath()` returns a tuple of all font directories used by `unlock()`. + + +.. raw:: html + + + + + + + +.. include:: footer.rst diff --git a/docs/pymupdf4llm/api.rst b/docs/pymupdf4llm/api.rst new file mode 100644 index 000000000..5116bb704 --- /dev/null +++ b/docs/pymupdf4llm/api.rst @@ -0,0 +1,336 @@ +.. include:: ../header.rst + + + +.. _pymupdf4llm-api: + + +API +=========================================================================== + +The |PyMuPDF4LLM| API +-------------------------- + + +.. property:: version + + Prints the version of the library. + +.. method:: to_markdown(doc: pymupdf.Document | str, *, + detect_bg_color: bool = True, + dpi: int = 150, + embed_images: bool = False, + extract_words: bool = False, + filename: str | None = None, + fontsize_limit: float = 3, + force_text: bool = True, + graphics_limit: int = None, + hdr_info: Any = None, + ignore_alpha: bool = False, + ignore_code: bool = False, + ignore_graphics: bool = False, + ignore_images: bool = False, + image_format: str = "png", + image_path: str = "", + image_size_limit: float = 0.05, + margins: int = 0, + page_chunks: bool = False, + page_height: float = None, + page_separators: bool = False, + page_width: float = 612, + pages: list | range | None = None, + show_progress: bool = False, + table_strategy: str = "lines_strict", + use_glyphs: bool = False + write_images: bool = False, + ) -> str | list[dict] + + Read the pages of the file and outputs the text of its pages in |Markdown| format. How this should happen in detail can be influenced by a number of parameters. Please note that there exists **support for building page chunks** from the |Markdown| text. + + :arg Document,str doc: the file, to be specified either as a file path string, or as a |PyMuPDF| Document (created via `pymupdf.open`). In order to use `pathlib.Path` specifications, Python file-like objects, documents in memory etc. you **must** use a |PyMuPDF| Document. + + :arg bool detect_bg_color: does a simple check for the general background color of the pages (default is ``True``). If any text or vector has this color it will be ignored. May increase detection accuracy. + + :arg bool ignore_alpha: if ``True`` includes text even when completely transparent. Default is ``False``: transparent text will be ignored which usually increases detection accuracy. + + :arg list pages: optional, the pages to consider for output (caution: specify 0-based page numbers). If omitted all pages are processed. + + :arg hdr_info: optional. Use this if you want to provide your own header detection logic. This may be a callable or an object having a method named `get_header_id`. It must accept a text span (a span dictionary as contained in :meth:`~.extractDICT`) and a keyword parameter "page" (which is the owning :ref:`Page ` object). It must return a string "" or up to 6 "#" characters followed by 1 space. If omitted, a full document scan will be performed to find the most popular font sizes and derive header levels based on them. To completely avoid this behavior specify `hdr_info=lambda s, page=None: ""` or `hdr_info=False`. + + :arg bool write_images: when encountering images or vector graphics, images will be created from the respective page area and stored in the specified folder. Markdown references will be generated pointing to these images. Any text contained in these areas will not be included in the text output (but appear as part of the images). Therefore, if for instance your document has text written on full page images, make sure to set this parameter to `False`. + + :arg bool embed_images: like `write_images`, but images will be included in the markdown text as base64-encoded strings. Ignores `write_images` and `image_path` if used. This may drastically increase the size of your markdown text. + + :arg bool ignore_images: (New in v.0.0.20) Disregard images on the page. This may help detecting text correctly when pages are very crowded (often the case for documents representing presentation slides). Also speeds up processing time. + + :arg bool ignore_graphics: (New in v.0.0.20) Disregard vector graphics on the page. This may help detecting text correctly when pages are very crowded (often the case for documents representing presentation slides). Also speeds up processing time. This automatically prevents table detection. + + :arg float image_size_limit: this must be a ``0 <= value < 1``. Images are ignored if `width / page.rect.width <= image_size_limit` or `height / page.rect.height <= image_size_limit`. For instance, the default value 0.05 means that to be considered for inclusion, an image's width and height must be larger than 5% of the page's width and height, respectively. + + :arg int dpi: specify the desired image resolution in dots per inch. Relevant only if `write_images=True`. Default value is 150. + + :arg str image_path: store images in this folder. Relevant if `write_images=True`. Default is the path of the script directory. + + :arg str image_format: specify the desired image format via its extension. Default is "png" (portable network graphics). Another popular format may be "jpg". Possible values are all :ref:`supported output formats `. + + :arg bool force_text: generate text output even when overlapping images / graphics. This text then appears after the respective image. If `write_images=True` this parameter may be `False` to suppress repetition of text on images. + + :arg float,list margins: a float or a sequence of 2 or 4 floats specifying page borders. Only objects inside the margins will be considered for output. + + * `margin=f` yields `(f, f, f, f)` for `(left, top, right, bottom)`. + * `(top, bottom)` yields `(0, top, 0, bottom)`. + * To always read full pages **(default)**, use `margins=0`. + + :arg bool page_chunks: if `True` the output will be a list of `Document.page_count` dictionaries (one per page). Each dictionary has the following structure: + + - **"metadata"** - a dictionary consisting of the document's metadata :attr:`Document.metadata`, enriched with additional keys **"file_path"** (the file name), **"page_count"** (number of pages in document), and **"page_number"** (1-based page number). + + - **"toc_items"** - a list of Table of Contents items pointing to this page. Each item of this list has the format `[lvl, title, pagenumber]`, where `lvl` is the hierarchy level, `title` a string and `pagenumber` as a 1-based page number. + + - **"tables"** - a list of tables on this page. Each item is a dictionary with keys "bbox", "row_count" and "col_count". Key "bbox" is a `pymupdf.Rect` in tuple format of the table's position on the page. + + - **"images"** - a list of images on the page. This a copy of page method :meth:`Page.get_image_info`. + + - **"graphics"** - a list of vector graphics rectangles on the page. This is a list of boundary boxes of clustered vector graphics as delivered by method :meth:`Page.cluster_drawings`. + + - **"text"** - page content as |Markdown| text. + + - **"words"** - if `extract_words=True` was used. This is a list of tuples `(x0, y0, x1, y1, "wordstring", bno, lno, wno)` as delivered by `page.get_text("words")`. The **sequence** of these tuples however is the same as produced in the markdown text string and thus honors multi-column text. This is also true for text in tables: words are extracted in the sequence of table row cells. + + :arg bool page_separators: if ``True`` inserts a string ``--- end of page=n ---`` at the end of each page output. Intended for debugging purposes. The page number if 0-based. The separator string is wrapped with line breaks. Default is ``False``. + + :arg str filename: (New in v.0.0.19) Overwrites or sets the desired image file name of written images. Useful when the document is provided as a memory object (which has no inherent file name). + + :arg float page_width: specify a desired page width. This is ignored for documents with a fixed page width like PDF, XPS etc. **Reflowable** documents however, like e-books, office [#f2]_ or text files have no fixed page dimensions and by default are assumed to have Letter format width (612) and an **"infinite"** page height. This means that the **full document is treated as one large page.** + + :arg float page_height: specify a desired page height. For relevance see the `page_width` parameter. If using the default `None`, the document will appear as one large page with a width of `page_width`. Consequently in this case, no markdown page separators will occur (except the final one), respectively only one page chunk will be returned. + + :arg str table_strategy: `table detection strategy `_. Default is `"lines_strict"` which ignores background colors. In some occasions, other strategies may be more successful, for example `"lines"` which uses all vector graphics objects for detection. **Changed in v0.0.19:** A value of `None` will not perform any table detection at all. This may be useful when you know that your document contains no tables. Execution time savings can be significant. + + :arg int graphics_limit: use this to limit dealing with excess amounts of vector graphics elements. Scientific documents, or pages simulating text via graphics commands may contain tens of thousands of these objects. As vector graphics are analyzed for multiple purposes, runtime may quickly become intolerable. With this parameter, all vector graphics will be ignored if their count exceeds the threshold. **Changed in v0.0.19:** The page will still be processed, and text, tables and images should be extracted. + + :arg bool ignore_code: if `True` then mono-spaced text does not receive special formatting. Code blocks will no longer be generated. This value is set to `True` if `extract_words=True` is used. + + :arg bool extract_words: a value of `True` enforces `page_chunks=True` and adds key "words" to each page dictionary. Its value is a list of words as delivered by PyMuPDF's `Page` method `get_text("words")`. The sequence of the words in this list is the same as the extracted text. + + :arg bool show_progress: Default is `False`. A value of `True` displays a text-based progress bar as pages are being converted to Markdown. It will look similar to the following:: + + Processing input.pdf... + [==================== ] (148/291) + + :arg bool use_glyphs: (New in v.0.0.19) Default is `False`. A value of `True` will use the glyph number of the characters instead of the character itself if the font does not store the Unicode value. + + :returns: Either a string of the combined text of all selected document pages, or a list of dictionaries. + +.. method:: LlamaMarkdownReader(*args, **kwargs) + + Create a `pdf_markdown_reader.PDFMarkdownReader` using the `LlamaIndex`_ package. Please note that this package will **not automatically be installed** when installing **pymupdf4llm**. + + For details on the possible arguments, please consult the LlamaIndex documentation [#f1]_. + + :raises: `NotImplementedError`: Please install required `LlamaIndex`_ package. + :returns: a `pdf_markdown_reader.PDFMarkdownReader` and issues message "Successfully imported LlamaIndex". Please note that this method needs several seconds to execute. For details on using the markdown reader please see below. + +---- + + +.. class:: IdentifyHeaders + + .. method:: __init__(self, doc: pymupdf.Document | str, *, pages: list | range | None = None, body_limit: float = 11, max_levels: int = 6) + + Create an object which maps text font sizes to the respective number of '#' characters which are used by Markdown syntax to indicate header levels. The object is created by scanning the document for font size "popularity". The most popular font size and all smaller sizes are used for body text. Larger font sizes are mapped to the respective header levels - which correspond to the HTML tags `

` to `

`. + + All font sizes are rounded to integer values. + + If more than 6 header levels would be required, then the largest number smaller than the `
` font size is used for body text. + + Please note that creating the object will read and inspect the text of the entire document - independently of reading the document again in the `to_markdown()` method subsequently. Method `to_markdown()` by default **will create this object** if you do not override its `hdr_info=None` parameter. + + + :arg Document,str doc: the file, to be specified either as a file path string, or as a |PyMuPDF| Document (created via `pymupdf.open`). In order to use `pathlib.Path` specifications, Python file-like objects, documents in memory etc. you **must** use a |PyMuPDF| Document. + + :arg list pages: optional, the pages to consider. If omitted all pages are processed. + + :arg float body_limit: the default font size limit for body text. Only used when the document scan does not deliver valid information. + + :arg int max_levels: the maximum number of header levels to be used. Valid values are in `range(1, 7)`. The default is 6, which corresponds to the HTML tags `

` to `

`. A smaller value will limit the number of generated header levels. For instance, a value of 3 will only generate header tags "#", "##" and "###". Body text will be assumed for all font sizes smaller than the one corresponding to "###". + + + .. method:: get_header_id(self, span: dict, page=None) -> str + + Return appropriate markdown header prefix. This is either "" or a string of "#" characters followed by a space. + + Given a text span from a "dict"" extraction, determine the + markdown header prefix string of 0 to n concatenated '#' characters. + + :arg dict span: a dictionary containing the text span information. This is the same dictionary as returned by `page.get_text("dict")`. + + :arg Page page: the owning page object. This can be used when additional information needs to be extracted. + + :returns: a string of "#" characters followed by a space. + + .. attribute:: header_id + + A dictionary mapping (integer) font sizes to Markdown header strings like ``{14: '# ', 12: '## '}``. The dictionary is created by the :class:`IdentifyHeaders` constructor. The keys are the font sizes of the text spans in the document. The values are the respective header strings. + + .. attribute:: body_limit + + An integer value indicating the font size limit for body text. This is computed as ``min(header_id.keys()) - 1``. In the above example, body_limit would be 11. + + +---- + + +**How to limit header levels (example)** + +Limit the generated header levels to 3:: + + import pymupdf, pymupdf4llm + + filename = "input.pdf" + doc = pymupdf.open(filename) # use a Document for subsequent processing + my_headers = pymupdf4llm.IdentifyHeaders(doc, max_levels=3) # generate header info + md_text = pymupdf4llm.to_markdown(doc, hdr_info=my_headers) + + +**How to provide your own header logic (example 1)** + +Provide your own function which uses pre-determined, fixed font sizes:: + + import pymupdf, pymupdf4llm + + filename = "input.pdf" + doc = pymupdf.open(filename) # use a Document for subsequent processing + + def my_headers(span, page=None): + """ + Provide some custom header logic. + This is a callable which accepts a text span and the page. + Could be extended to check for other properties of the span, for + instance the font name, text color and other attributes. + """ + # header level is h1 if font size is larger than 14 + # header level is h2 if font size is larger than 10 + # otherwise it is body text + if span["size"] > 14: + return "# " + elif span["size"] > 10: + return "## " + else: + return "" + + # this will *NOT* scan the document for font sizes! + md_text = pymupdf4llm.to_markdown(doc, hdr_info=my_headers) + +**How to provide your own header logic (example 2)** + +This user function uses the document's Table of Contents -- under the assumption that the bookmark text is also present as a header line on the page (which certainly need not be the case!):: + + import pymupdf, pymupdf4llm + + filename = "input.pdf" + doc = pymupdf.open(filename) # use a Document for subsequent processing + TOC = doc.get_toc() # use the table of contents for determining headers + + def my_headers(span, page=None): + """ + Provide some custom header logic (experimental!). + This callable checks whether the span text matches any of the + TOC titles on this page. + If so, use TOC hierarchy level as header level. + """ + # TOC items on this page: + toc = [t for t in TOC if t[-1] == page.number + 1] + + if not toc: # no TOC items on this page + return "" + + # look for a match in the TOC items + for lvl, title, _ in toc: + if span["text"].startswith(title): + return "#" * lvl + " " + if title.startswith(span["text"]): + return "#" * lvl + " " + + return "" + + # this will *NOT* scan the document for font sizes! + md_text = pymupdf4llm.to_markdown(doc, hdr_info=my_headers) + +---- + + +.. class:: TocHeaders + + .. method:: __init__(self, doc: pymupdf.Document | str) + + Create an object which uses the document's Table of Contents (TOC) to determine header levels. Upon object creation, the table of contents is read via the `Document.get_toc()` method. The TOC data is then used to determine header levels in the `to_markdown()` method. + + This is an alternative to :class:`IdentifyHeaders`. Instead of running through the full document to identify font sizes, it uses the document's Table Of + Contents (TOC) to identify headers on pages. Like :class:`IdentifyHeaders`, this also is no guarantee to find headers, but for well-built Table of Contents, there is a good chance for more correctly identifying header lines on document pages than the font-size-based approach. + + It also has the advantage of being much faster than the font-size-based approach, as it does not execute a full document scan or even access any of the document pages. + + Examples where this approach works very well are the Adobe's files on PDF documentation. + + Please note that this feature **does not read document pages** where the table of contents may exist as normal standard text. It only accesses data as provided by the `Document.get_toc()` method. It will not identify any headers for documents where the table of contents is not available as a collection of bookmarks. + + .. method:: get_header_id(self, span: dict, page=None) -> str + + Return appropriate markdown header prefix. This is either an empty string or a string of "#" characters followed by a space. + + Given a text span from a "dict" extraction variant, determine the markdown header prefix string of 0 to n concatenated "#" characters. + + :arg dict span: a dictionary containing the text span information. This is the same dictionary as returned by `page.get_text("dict")`. + + :arg Page page: the owning page object. This can be used when additional information needs to be extracted. + + :returns: a string of "#" characters followed by a space. + + + +**How to use class TocHeaders** + +This is a version of previous **example 2** that uses :class:`TocHeaders` for header identification:: + + import pymupdf, pymupdf4llm + + filename = "input.pdf" + + doc = pymupdf.open(filename) # use a Document for subsequent processing + my_headers = pymupdf4llm.TocHeaders(doc) # use the table of contents for determining headers + + # this will *NOT* scan the document for font sizes! + md_text = pymupdf4llm.to_markdown(doc, hdr_info=my_headers) + +----- + +.. class:: pdf_markdown_reader.PDFMarkdownReader + + .. method:: load_data(file_path: Union[Path, str], extra_info: Optional[Dict] = None, **load_kwargs: Any) -> List[LlamaIndexDocument] + + This is the only method of the markdown reader you should currently use to extract markdown data. Please in any case ignore methods `aload_data()` and `lazy_load_data()`. Other methods like `use_doc_meta()` may or may not make sense. For more information, please consult the LlamaIndex documentation [#f1]_. + + Under the hood the method will execute `to_markdown()`. + + :returns: a list of `LlamaIndexDocument` documents - one for each page. + +----- + +For a list of changes, please see file `CHANGES.md `_. + +.. rubric:: Footnotes + +.. [#f1] `LlamaIndex documentation `_ + +.. [#f2] When using PyMuPDF-Pro, supported office documents are converted internally into a PDF-like format. Therefore, they **will have fixed page dimensions** and be no longer "reflowable". Consequently, the page width and page height specifications will be ignored as well in these cases. + + + + +.. include:: ../footer.rst + +.. _LlamaIndex: https://pypi.org/project/llama-index/ + + + diff --git a/docs/pymupdf4llm/index.rst b/docs/pymupdf4llm/index.rst new file mode 100644 index 000000000..a4cf6d97b --- /dev/null +++ b/docs/pymupdf4llm/index.rst @@ -0,0 +1,145 @@ + +.. include:: ../header.rst + +.. _pymupdf4llm + + +PyMuPDF4LLM +=========================================================================== + +|PyMuPDF4LLM| is aimed to make it easier to extract |PDF| content in the format you need for **LLM** & **RAG** environments. It supports :ref:`Markdown extraction ` as well as :ref:`LlamaIndex document output `. + +.. important:: + + You can extend the supported file types to also include **Office** document formats (DOC/DOCX, XLS/XLSX, PPT/PPTX, HWP/HWPX) by :ref:`using PyMuPDF Pro with PyMuPDF4LLM `. + +Features +------------------------------- + + - Support for multi-column pages + - Support for image and vector graphics extraction (and inclusion of references in the MD text) + - Support for page chunking output. + - Direct support for output as :ref:`LlamaIndex Documents `. + + +Functionality +-------------------- + +- This package converts the pages of a file to text in **Markdown** format using |PyMuPDF|. + +- Standard text and tables are detected, brought in the right reading sequence and then together converted to **GitHub**-compatible **Markdown** text. + +- Header lines are identified via the font size and appropriately prefixed with one or more `#` tags. + +- Bold, italic, mono-spaced text and code blocks are detected and formatted accordingly. Similar applies to ordered and unordered lists. + +- By default, all document pages are processed. If desired, a subset of pages can be specified by providing a list of `0`-based page numbers. + + +Installation +---------------- + + +Install the package via **pip** with: + + +.. code-block:: bash + + pip install pymupdf4llm + + +.. _extracting_as_md: + +Extracting a file as **Markdown** +-------------------------------------------------------------- + +To retrieve your document content in **Markdown** simply install the package and then use a couple of lines of **Python** code to get results. + + + +Then in your **Python** script do: + + +.. code-block:: python + + import pymupdf4llm + md_text = pymupdf4llm.to_markdown("input.pdf") + + +.. note:: + + Instead of the filename string as above, one can also provide a :ref:`PyMuPDF Document `. A second parameter may be a list of `0`-based page numbers, e.g. `[0,1]` would just select the first and second pages of the document. + + +If you want to store your **Markdown** file, e.g. store as a UTF8-encoded file, then do: + + +.. code-block:: python + + import pathlib + pathlib.Path("output.md").write_bytes(md_text.encode()) + + + +.. _extracting_as_llamaindex: + +Extracting a file as a **LlamaIndex** document +-------------------------------------------------------------- + +|PyMuPDF4LLM| supports direct conversion to a **LLamaIndex** document. A document is first converted into **Markdown** format and then a **LlamaIndex** document is returned as follows: + + + +.. code-block:: python + + import pymupdf4llm + llama_reader = pymupdf4llm.LlamaMarkdownReader() + llama_docs = llama_reader.load_data("input.pdf") + + +.. _using_pymupdf4llm_withpymupdfpro: + +Using with |PyMuPDF Pro| +--------------------------- + + +For **Office** document support, |PyMuPDF4LLM| works seamlessly with |PyMuPDF Pro|. Assuming you have :doc:`../pymupdf-pro` installed you will be able to work with **Office** documents as expected: + + +.. code-block:: python + + import pymupdf4llm + import pymupdf.pro + pymupdf.pro.unlock() + md_text = pymupdf4llm.to_markdown("sample.doc") + + +As you can see |PyMuPDF Pro| functionality will be available within the |PyMuPDF4LLM| context! + + + +API +------- + +See :ref:`the PyMuPDF4LLM API `. + +Further Resources +------------------- + + +Sample code +~~~~~~~~~~~~~~~ + +- `Command line RAG Chatbot with PyMuPDF `_ +- `Example of a Browser Application using Langchain and PyMuPDF `_ + + +Blogs +~~~~~~~~~~~~~~ + +- `RAG/LLM and PDF: Enhanced Text Extraction `_ +- `Creating a RAG Chatbot with ChatGPT and PyMuPDF `_ +- `Building a RAG Chatbot GUI with the ChatGPT API and PyMuPDF `_ +- `RAG/LLM and PDF: Conversion to Markdown Text with PyMuPDF `_ + +.. include:: ../footer.rst diff --git a/docs/pyodide.rst b/docs/pyodide.rst new file mode 100644 index 000000000..a3dd8342a --- /dev/null +++ b/docs/pyodide.rst @@ -0,0 +1,96 @@ +.. include:: header.rst + +Pyodide +======= + + +Overview +-------- + +* + `Pyodide `_ is a client-side Python implementation that + runs in a web browser. + +* The Pyodide build of PyMuPDF is currently experimental. + + +Building a PyMuPDF wheel for Pyodide +------------------------------------ + +A PyMuPDF wheel for Pyodide can be built by running `scripts/gh_release.py` +with some environmental variable settings. This is regularly tested on Github +by `.github/workflows/test_pyodide.yml`. + +Here is an example of this, a single Linux command (to be run with the current +directory set to a PyMuPDF checkout), that builds a Pyodide wheel:: + + inputs_sdist=0 \ + inputs_PYMUPDF_SETUP_MUPDF_BUILD="git:--recursive --depth 1 --shallow-submodules --branch master https://github.com/ArtifexSoftware/mupdf.git" \ + inputs_wheels_default=0 \ + inputs_wheels_linux_pyodide=1 \ + ./scripts/gh_release.py build + +This does the following (all inside Python venv's): + +* Download (git clone and pip install) and customise a Pyodide build environment. +* Download (git clone) the latest MuPDF. +* Build MuPDF and PyMuPDF in the Pyodide build environment. +* Create a wheel in `dist/`. + +For more information, see the comments for functions `build_pyodide_wheel()` +and `pyodide_setup()` in `scripts/gh_release.py`. + + +Using a Pyodide wheel +--------------------- + +* + Upload the wheel (for example + `PyMuPDF/dist/PyMuPDF-1.24.2-cp311-cp311-emscripten_3_1_32_wasm32.whl`) to a + webserver which has been configured to allow Cross-origin resource sharing + (https://en.wikipedia.org/wiki/Cross-origin_resource_sharing). + +* + The wheel can be used in a Pyodide console running in a web browser, or a + JupyterLite notebook running in a web browser. + + * To create a Pyodide console, go to: + + https://pyodide.org/en/stable/console.html + + * To create a JupyterLite notebook, go to: + + https://jupyterlite.readthedocs.io/en/latest/_static/lab/index.html + +* + In both these cases, one can use the following code to download the wheel + (replace `url` with the URL of the uploaded wheel) and import it:: + + import pyodide_js + await pyodide_js.loadPackage(url) + import pymupdf + + * + Note that `micropip.install()` does not work, because of PyMuPDF's use of + shared libraries. + + +Loading a PDF document from a URL into PyMuPDF +---------------------------------------------- + +* + Pyodide browser console does not have generic network access, so for + example `urllib.request.urlopen(url)` fails. But Pyodide has a built-in + `pyodide.http` module that uses javascript internally, which one can use + to download into a `bytes` instance, which can be used to create a PyMuPDF + `Document` instance:: + + import pyodide.http + r = await pyodide.http.pyfetch('https://...') + data = await r.bytes() + doc = pymupdf.Document(stream=data) + +* It looks like this only works with `https://`, not `http://`. + + +.. include:: footer.rst diff --git a/docs/rag.rst b/docs/rag.rst new file mode 100644 index 000000000..29082fa69 --- /dev/null +++ b/docs/rag.rst @@ -0,0 +1,139 @@ + +.. include:: header.rst + + +PyMuPDF, LLM & RAG +============================ + + +Integrating |PyMuPDF| into your :title:`Large Language Model (LLM)` framework and overall :title:`RAG (Retrieval-Augmented Generation`) solution provides the fastest and most reliable way to deliver document data. + +There are a few well known :title:`LLM` solutions which have their own interfaces with |PyMuPDF| - it is a fast growing area, so please let us know if you discover any more! + +If you need to export to :title:`Markdown` or obtain a :title:`LlamaIndex` Document from a file: + +.. raw:: html + + +

+ + + + +Integration with :title:`LangChain` +------------------------------------- + +It is simple to integrate directly with :title:`LangChain` by using their dedicated loader as follows: + + +.. code-block:: python + + from langchain_community.document_loaders import PyMuPDFLoader + loader = PyMuPDFLoader("example.pdf") + data = loader.load() + + +See `LangChain Using PyMuPDF `_ for full details. + + +Integration with :title:`LlamaIndex` +--------------------------------------- + + +Use the dedicated `PyMuPDFReader` from :title:`LlamaIndex` 🦙 to manage your document loading. + +.. code-block:: python + + from llama_index.readers.file import PyMuPDFReader + loader = PyMuPDFReader() + documents = loader.load(file_path="example.pdf") + +See `Building RAG from Scratch `_ for more. + + +Preparing Data for Chunking +----------------------------- + +Chunking (or splitting) data is essential to give context to your :title:`LLM` data and with :title:`Markdown` output now supported by |PyMuPDF| this means that `Level 3 chunking `_ is supported. + + + +.. _rag_outputting_as_md: + +Outputting as :title:`Markdown` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to export your document in :title:`Markdown` format you will need a separate helper. Package :doc:`pymupdf4llm/index` is a high-level wrapper of |PyMuPDF| functions which for each page outputs standard and table text in an integrated Markdown-formatted string across all document pages: + + +.. code-block:: python + + # convert the document to markdown + import pymupdf4llm + md_text = pymupdf4llm.to_markdown("input.pdf") + + # Write the text to some file in UTF8-encoding + import pathlib + pathlib.Path("output.md").write_bytes(md_text.encode()) + + +For further information please refer to: :doc:`pymupdf4llm/index`. + + +How to use :title:`Markdown` output +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once you have your data in :title:`Markdown` format you are ready to chunk/split it and supply it to your :title:`LLM`, for example, if this is :title:`LangChain` then do the following: + +.. code-block:: python + + import pymupdf4llm + from langchain.text_splitter import MarkdownTextSplitter + + # Get the MD text + md_text = pymupdf4llm.to_markdown("input.pdf") # get markdown for all pages + + splitter = MarkdownTextSplitter(chunk_size=40, chunk_overlap=0) + + splitter.create_documents([md_text]) + + + +For more see `5 Levels of Text Splitting `_ + + +Related Blogs +-------------------- + +To find out more about |PyMuPDF|, :title:`LLM` & :title:`RAG` check out our blogs for implementations & tutorials. + + +Methodologies to Extract Text +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- `Enhanced Text Extraction `_ +- `Conversion to Markdown Text with PyMuPDF `_ + + + +Create a Chatbot to discuss your documents +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- `Make a simple command line Chatbot `_ +- `Make a Chatbot GUI `_ + + + + + + + + +.. include:: footer.rst \ No newline at end of file diff --git a/docs/recipes-annotations.rst b/docs/recipes-annotations.rst index 3756779c0..84e42f62e 100644 --- a/docs/recipes-annotations.rst +++ b/docs/recipes-annotations.rst @@ -11,7 +11,9 @@ Annotations How to Add and Modify Annotations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In PyMuPDF, new annotations can be added via :ref:`Page` methods. Once an annotation exists, it can be modified to a large extent using methods of the :ref:`Annot` class. +In |PyMuPDF|, new annotations can be added via :ref:`Page` methods. Once an annotation exists, it can be modified to a large extent using methods of the :ref:`Annot` class. + +Annotations can **only** be inserted in |PDF| pages - other document types do not support annotation insertion. In contrast to many other tools, initial insert of annotations happens with a minimum number of properties. We leave it to the programmer to e.g. set attributes like author, creation date or subject. @@ -32,59 +34,27 @@ This script should lead to the following output: How to Use FreeText ~~~~~~~~~~~~~~~~~~~~~ -This script shows a couple of ways to deal with 'FreeText' annotations:: - - # -*- coding: utf-8 -*- - import fitz - - # some colors - blue = (0,0,1) - green = (0,1,0) - red = (1,0,0) - gold = (1,1,0) - - # a new PDF with 1 page - doc = fitz.open() - page = doc.new_page() - - # 3 rectangles, same size, above each other - r1 = fitz.Rect(100,100,200,150) - r2 = r1 + (0,75,0,75) - r3 = r2 + (0,75,0,75) - - # the text, Latin alphabet - t = "¡Un pequeño texto para practicar!" - - # add 3 annots, modify the last one somewhat - a1 = page.add_freetext_annot(r1, t, color=red) - a2 = page.add_freetext_annot(r2, t, fontname="Ti", color=blue) - a3 = page.add_freetext_annot(r3, t, fontname="Co", color=blue, rotate=90) - a3.set_border(width=0) - a3.update(fontsize=8, fill_color=gold) +This script shows a couple of basic ways to deal with 'FreeText' annotations: - # save the PDF - doc.save("a-freetext.pdf") +.. literalinclude:: samples/annotations-freetext1.py The result looks like this: -.. image:: images/img-freetext.* +.. image:: images/img-freetext1.* :scale: 80 ------------------------------- - +Here is an example for using rich text and call-out lines: +.. literalinclude:: samples/annotations-freetext2.py +The result looks like this: -Using Buttons and JavaScript -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Since MuPDF v1.16, 'FreeText' annotations no longer support bold or italic versions of the Times-Roman, Helvetica or Courier fonts. +.. image:: images/img-freetext2.* + :scale: 80 -A big **thank you** to our user `@kurokawaikki `_, who contributed the following script to **circumvent this restriction**. -.. literalinclude:: samples/make-bold.py - :language: python +------------------------------ --------------------------- .. _RecipesAnnotations_C: @@ -93,50 +63,9 @@ How to Use Ink Annotations ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ink annotations are used to contain freehand scribbling. A typical example may be an image of your signature consisting of first name and last name. Technically an ink annotation is implemented as a **list of lists of points**. Each point list is regarded as a continuous line connecting the points. Different point lists represent independent line segments of the annotation. -The following script creates an ink annotation with two mathematical curves (sine and cosine function graphs) as line segments:: - - import math - import fitz - - #------------------------------------------------------------------------------ - # preliminary stuff: create function value lists for sine and cosine - #------------------------------------------------------------------------------ - w360 = math.pi * 2 # go through full circle - deg = w360 / 360 # 1 degree as radians - rect = fitz.Rect(100,200, 300, 300) # use this rectangle - first_x = rect.x0 # x starts from left - first_y = rect.y0 + rect.height / 2. # rect middle means y = 0 - x_step = rect.width / 360 # rect width means 360 degrees - y_scale = rect.height / 2. # rect height means 2 - sin_points = [] # sine values go here - cos_points = [] # cosine values go here - for x in range(362): # now fill in the values - x_coord = x * x_step + first_x # current x coordinate - y = -math.sin(x * deg) # sine - p = (x_coord, y * y_scale + first_y) # corresponding point - sin_points.append(p) # append - y = -math.cos(x * deg) # cosine - p = (x_coord, y * y_scale + first_y) # corresponding point - cos_points.append(p) # append - - #------------------------------------------------------------------------------ - # create the document with one page - #------------------------------------------------------------------------------ - doc = fitz.open() # make new PDF - page = doc.new_page() # give it a page - - #------------------------------------------------------------------------------ - # add the Ink annotation, consisting of 2 curve segments - #------------------------------------------------------------------------------ - annot = page.addInkAnnot((sin_points, cos_points)) - # let it look a little nicer - annot.set_border(width=0.3, dashes=[1,]) # line thickness, some dashing - annot.set_colors(stroke=(0,0,1)) # make the lines blue - annot.update() # update the appearance - - page.draw_rect(rect, width=0.3) # only to demonstrate we did OK - - doc.save("a-inktest.pdf") +The following script creates an ink annotation with two mathematical curves (sine and cosine function graphs) as line segments: + +.. literalinclude:: samples/annotations-ink.py This is the result: diff --git a/docs/recipes-common-issues-and-their-solutions.rst b/docs/recipes-common-issues-and-their-solutions.rst index 616a98299..8f8c1a590 100644 --- a/docs/recipes-common-issues-and-their-solutions.rst +++ b/docs/recipes-common-issues-and-their-solutions.rst @@ -9,14 +9,14 @@ Common Issues and their Solutions How To Dynamically Clean Up Corrupt :title:`PDFs` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This shows a potential use of :title:`PyMuPDF` with another Python PDF library (the excellent pure Python package `pdfrw `_ is used here as an example). +This shows a potential use of |PyMuPDF| with another Python PDF library (the excellent pure Python package `pdfrw `_ is used here as an example). If a clean, non-corrupt / decompressed PDF is needed, one could dynamically invoke PyMuPDF to recover from many problems like so:: import sys from io import BytesIO from pdfrw import PdfReader - import fitz + import pymupdf #--------------------------------------- # 'Tolerant' PDF reader @@ -32,7 +32,7 @@ If a clean, non-corrupt / decompressed PDF is needed, one could dynamically invo # either we need a password or it is a problem-PDF # create a repaired / decompressed / decrypted version - doc = fitz.open("pdf", ibuffer) + doc = pymupdf.open("pdf", ibuffer) if password is not None: # decrypt if password provided rc = doc.authenticate(password) if not rc > 0: @@ -51,10 +51,10 @@ With the command line utility *pdftk* (`available ` to a :title:`PDF`. These include XPS, EPUB, FB2, CBZ and image formats, including multi-page TIFF images. +Here is a script that converts any |PyMuPDF| :ref:`supported document` to a |PDF|. These include XPS, EPUB, FB2, CBZ and image formats, including multi-page TIFF images. It features maintaining any metadata, table of contents and links contained in the source document:: @@ -81,27 +81,27 @@ It features maintaining any metadata, table of contents and links contained in t PyMuPDF v1.14.0+ """ import sys - import fitz - if not (list(map(int, fitz.VersionBind.split("."))) >= [1,14,0]): + import pymupdf + if not (list(map(int, pymupdf.VersionBind.split("."))) >= [1,14,0]): raise SystemExit("need PyMuPDF v1.14.0+") fn = sys.argv[1] print("Converting '%s' to '%s.pdf'" % (fn, fn)) - doc = fitz.open(fn) + doc = pymupdf.open(fn) b = doc.convert_to_pdf() # convert to pdf - pdf = fitz.open("pdf", b) # open as pdf + pdf = pymupdf.open("pdf", b) # open as pdf - toc= doc.het_toc() # table of contents of input + toc= doc.get_toc() # table of contents of input pdf.set_toc(toc) # simply set it for output meta = doc.metadata # read and set metadata if not meta["producer"]: - meta["producer"] = "PyMuPDF v" + fitz.VersionBind + meta["producer"] = "PyMuPDF v" + pymupdf.VersionBind if not meta["creator"]: meta["creator"] = "PyMuPDF PDF converter" - meta["modDate"] = fitz.get_pdf_now() + meta["modDate"] = pymupdf.get_pdf_now() meta["creationDate"] = meta["modDate"] pdf.set_metadata(meta) @@ -113,7 +113,7 @@ It features maintaining any metadata, table of contents and links contained in t link_cnti += len(links) # count how many pout = pdf[pinput.number] # read corresp. output page for l in links: # iterate though the links - if l["kind"] == fitz.LINK_NAMED: # we do not handle named links + if l["kind"] == pymupdf.LINK_NAMED: # we do not handle named links print("named link page", pinput.number, l) link_skip += 1 # count them continue @@ -127,56 +127,6 @@ It features maintaining any metadata, table of contents and links contained in t -How to Deal with Messages Issued by :title:`MuPDF` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Since :title:`PyMuPDF` v1.16.0, **error messages** issued by the underlying :title:`MuPDF` library are being redirected to the Python standard device *sys.stderr*. So you can handle them like any other output going to this devices. - -In addition, these messages go to the internal buffer together with any :title:`MuPDF` warnings -- see below. - -We always prefix these messages with an identifying string *"mupdf:"*. -If you prefer to not see recoverable MuPDF errors at all, issue the command `fitz.TOOLS.mupdf_display_errors(False)`. - -MuPDF warnings continue to be stored in an internal buffer and can be viewed using :meth:`Tools.mupdf_warnings`. - -Please note that MuPDF errors may or may not lead to Python exceptions. In other words, you may see error messages from which MuPDF can recover and continue processing. - -Example output for a **recoverable error**. We are opening a damaged PDF, but MuPDF is able to repair it and gives us a little information on what happened. Then we illustrate how to find out whether the document can later be saved incrementally. Checking the :attr:`Document.is_dirty` attribute at this point also indicates that during `fitz.open` the document had to be repaired: - ->>> import fitz ->>> doc = fitz.open("damaged-file.pdf") # leads to a sys.stderr message: -mupdf: cannot find startxref ->>> print(fitz.TOOLS.mupdf_warnings()) # check if there is more info: -cannot find startxref -trying to repair broken xref -repairing PDF document -object missing 'endobj' token ->>> doc.can_save_incrementally() # this is to be expected: -False ->>> # the following indicates whether there are updates so far ->>> # this is the case because of the repair actions: ->>> doc.is_dirty -True ->>> # the document has nevertheless been created: ->>> doc -fitz.Document('damaged-file.pdf') ->>> # we now know that any save must occur to a new file - -Example output for an **unrecoverable error**: - ->>> import fitz ->>> doc = fitz.open("does-not-exist.pdf") -mupdf: cannot open does-not-exist.pdf: No such file or directory -Traceback (most recent call last): - File "", line 1, in - doc = fitz.open("does-not-exist.pdf") - File "C:\Users\Jorj\AppData\Local\Programs\Python\Python37\lib\site-packages\fitz\fitz.py", line 2200, in __init__ - _fitz.Document_swiginit(self, _fitz.new_Document(filename, stream, filetype, rect, width, height, fontsize)) -RuntimeError: cannot open does-not-exist.pdf: No such file or directory ->>> - - - Changing Annotations: Unexpected Behaviour ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -215,79 +165,10 @@ Unfortunately there is not much you can do in most of these cases. * :meth:`Annot.set_flags` (annotation behaviour) * :meth:`Annot.set_info` (meta information, except changes to *content*) * :meth:`Annot.set_popup` (create popup or change its rect) - * :meth:`Annot.set_optional_content` (add / remove reference to optional content information) + * :meth:`Annot.set_oc` (add / remove reference to optional content information) * :meth:`Annot.set_open` * :meth:`Annot.update_file` (file attachment changes) -Misplaced Item Insertions on PDF Pages -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Problem -^^^^^^^^^ - -You inserted an item (like an image, an annotation or some text) on an existing PDF page, but later you find it being placed at a different location than intended. For example an image should be inserted at the top, but it unexpectedly appears near the bottom of the page. - -Cause -^^^^^^ - -The creator of the PDF has established a non-standard page geometry without keeping it "local" (as they should!). Most commonly, the PDF standard point (0,0) at *bottom-left* has been changed to the *top-left* point. So top and bottom are reversed -- causing your insertion to be misplaced. - -The visible image of a PDF page is controlled by commands coded in a special mini-language. For an overview of this language consult "Operator Summary" on pp. 643 of the :ref:`AdobeManual`. These commands are stored in :data:`contents` objects as strings (*bytes* in PyMuPDF). - -There are commands in that language, which change the coordinate system of the page for all the following commands. In order to limit the scope of such commands to "local", they must be wrapped by the command pair *q* ("save graphics state", or "stack") and *Q* ("restore graphics state", or "unstack"). - -.. highlight:: text - -So the PDF creator did this:: - - stream - 1 0 0 -1 0 792 cm % <=== change of coordinate system: - ... % letter page, top / bottom reversed - ... % remains active beyond these lines - endstream - -where they should have done this:: - - stream - q % put the following in a stack - 1 0 0 -1 0 792 cm % <=== scope of this is limited by Q command - ... % here, a different geometry exists - Q % after this line, geometry of outer scope prevails - endstream - -.. note:: - - * In the mini-language's syntax, spaces and line breaks are equally accepted token delimiters. - * Multiple consecutive delimiters are treated as one. - * Keywords "stream" and "endstream" are inserted automatically -- not by the programmer. - -.. highlight:: python - -Solutions -^^^^^^^^^^ - -Since v1.16.0, there is the property :attr:`Page.is_wrapped`, which lets you check whether a page's contents are wrapped in that string pair. - -If it is *False* or if you want to be on the safe side, pick one of the following: - -1. The easiest way: in your script, do a :meth:`Page.clean_contents` before you do your first item insertion. -2. Pre-process your PDF with the MuPDF command line utility *mutool clean -c ...* and work with its output file instead. -3. Directly wrap the page's :data:`contents` with the stacking commands before you do your first item insertion. - -**Solutions 1. and 2.** use the same technical basis and **do a lot more** than what is required in this context: they also clean up other inconsistencies or redundancies that may exist, multiple */Contents* objects will be concatenated into one, and much more. - -.. note:: For **incremental saves,** solution 1. has an unpleasant implication: it will bloat the update delta, because it changes so many things and, in addition, stores the **cleaned contents uncompressed**. So, if you use :meth:`Page.clean_contents` you should consider **saving to a new file** with (at least) *garbage=3* and *deflate=True*. - -**Solution 3.** is completely under your control and only does the minimum corrective action. There is a handy utility method :meth:`Page.wrap_contents` which -- as twe name suggests -- **wraps** the page's :data:`contents` object(s) by the PDF commands `q` and `Q`. - -This solution is extremely fast and the changes to the PDF are minimal. This is useful in situations where incrementally saving the file is desirable -- or even a must when the PDF has been digitally signed and you cannot change this status. - -We recommend the following snippet to get the situation under control: - - >>> if not page.is_wrapped: - page.wrap_contents() - >>> # start inserting text, images and other objects here - Missing or Unreadable Extracted Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/recipes-drawing-and-graphics.rst b/docs/recipes-drawing-and-graphics.rst index 1819dbba2..4b31a7877 100644 --- a/docs/recipes-drawing-and-graphics.rst +++ b/docs/recipes-drawing-and-graphics.rst @@ -6,18 +6,24 @@ Drawing and Graphics ============================== +.. note:: -PDF files support elementary drawing operations as part of their syntax. This includes basic geometrical objects like lines, curves, circles, rectangles including specifying colors. + When the terms "Drawings" or "Graphics" are mentioned here we are referring to "Vector Graphics" or "Line Art". + + Therefore please consider these terms as being synonymous! + + +PDF files support elementary drawing operations as part of their syntax. These are **vector graphics** and include basic geometrical objects like lines, curves, circles, rectangles including specifying colors. The syntax for such operations is defined in "A Operator Summary" on page 643 of the :ref:`AdobeManual`. Specifying these operators for a PDF page happens in its :data:`contents` objects. -PyMuPDF implements a large part of the available features via its :ref:`Shape` class, which is comparable to notions like "canvas" in other packages (e.g. `reportlab `_). +|PyMuPDF| implements a large part of the available features via its :ref:`Shape` class, which is comparable to notions like "canvas" in other packages (e.g. `reportlab `_). -A shape is always created as a **child of a page**, usually with an instruction like *shape = page.new_shape()*. The class defines numerous methods that perform drawing operations on the page's area. For example, *last_point = shape.draw_rect(rect)* draws a rectangle along the borders of a suitably defined *rect = fitz.Rect(...)*. +A shape is always created as a **child of a page**, usually with an instruction like `shape = page.new_shape()`. The class defines numerous methods that perform drawing operations on the page's area. For example, `last_point = shape.draw_rect(rect)` draws a rectangle along the borders of a suitably defined `rect = pymupdf.Rect(...)`. -The returned *last_point* **always** is the :ref:`Point` where drawing operation ended ("last point"). Every such elementary drawing requires a subsequent :meth:`Shape.finish` to "close" it, but there may be multiple drawings which have one common *finish()* method. +The returned *last_point* **always** is the :ref:`Point` where drawing operation ended ("last point"). Every such elementary drawing requires a subsequent :meth:`Shape.finish` to "close" it, but there may be multiple drawings which have one common ``finish()`` method. -In fact, :meth:`Shape.finish` *defines* a group of preceding draw operations to form one -- potentially rather complex -- graphics object. PyMuPDF provides several predefined graphics in `shapes_and_symbols.py `_ which demonstrate how this works. +In fact, :meth:`Shape.finish` *defines* a group of preceding draw operations to form one -- potentially rather complex -- graphics object. |PyMuPDF| provides several predefined graphics in `shapes_and_symbols.py `_ which demonstrate how this works. If you import this script, you can also directly use its graphics as in the following example:: @@ -35,7 +41,7 @@ If you import this script, you can also directly use its graphics as in the foll """ - import fitz + import pymupdf import shapes_and_symbols as sas # list of available symbol functions and their descriptions @@ -52,15 +58,15 @@ If you import this script, you can also directly use its graphics as in the foll (sas.smiley, "smiley (easy)"), ] - r = fitz.Rect(50, 50, 100, 100) # first rect to contain a symbol - d = fitz.Rect(0, r.height + 10, 0, r.height + 10) # displacement to next rect + r = pymupdf.Rect(50, 50, 100, 100) # first rect to contain a symbol + d = pymupdf.Rect(0, r.height + 10, 0, r.height + 10) # displacement to next rect p = (15, -r.height * 0.2) # starting point of explanation text rlist = [r] # rectangle list for i in range(1, len(tlist)): # fill in all the rectangles rlist.append(rlist[i-1] + d) - doc = fitz.open() # create empty PDF + doc = pymupdf.open() # create empty PDF page = doc.new_page() # create an empty page shape = page.new_shape() # start a Shape (canvas) @@ -84,12 +90,15 @@ This is the script's outcome: ------------------------------ + +.. _RecipesDrawingAndGraphics_Extract_Drawings: + How to Extract Drawings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * New in v1.18.0 -The drawing commands issued by a page can be extracted. Interestingly, this is possible for :ref:`all supported document types` -- not just PDF: so you can use it for XPS, EPUB and others as well. +Drawing commands (**vector graphics**) issued by a page can be extracted as a list of dictionaries. Interestingly, this is possible for :ref:`all supported document types` -- not just PDF: so you can use it for XPS, EPUB and others as well. Page method, :meth:`Page.get_drawings()` accesses draw commands and converts them into a list of Python dictionaries. Each dictionary -- called a "path" -- represents a separate drawing -- it may be simple like a single line, or a complex combination of lines and curves representing one of the shapes of the previous section. @@ -133,15 +142,15 @@ The *path* dictionary has been designed such that it can easily be used by the : The following is a code snippet which extracts the drawings of a page and re-draws them on a new page:: - import fitz - doc = fitz.open("some.file") + import pymupdf + doc = pymupdf.open("some.file") page = doc[0] paths = page.get_drawings() # extract existing drawings # this is a list of "paths", which can directly be drawn again using Shape # ------------------------------------------------------------------------- # # define some output page with the same dimensions - outpdf = fitz.open() + outpdf = pymupdf.open() outpage = outpdf.new_page(width=page.rect.width, height=page.rect.height) shape = outpage.new_shape() # make a drawing canvas for the output page # -------------------------------------- @@ -195,4 +204,48 @@ Here is a comparison between input and output of an example page, created by the .. note:: You can use the path list to make your own lists of e.g. all lines or all rectangles on the page and subselect them by criteria, like color or position on the page etc. +.. _RecipesDrawingAndGraphics_Delete_Drawings: + +How to Delete Drawings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To delete drawings/vector graphics we must use a :ref:`Redaction Annotation ` with the bounding box of the drawing and then **add and apply** a redaction to it to delete it. + + +The following code shows an example of deleting the first drawing found on the page:: + + paths = page.get_drawings() + rect = paths[0]["rect"] # rectangle of the 1st drawing + page.add_redact_annot(rect) + page.apply_redactions(0,2,1) # potentially set options for any of images, drawings, text + + +.. note:: + + See :meth:`Page.apply_redactions` for the parameter options which can be sent - you are able to apply deletion options to image, drawing and text objects which are bound by the annotation area. + + +How to Draw Graphics +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Drawing graphics is as simple as calling the type of :meth:`Drawing Method ` you may want. You can draw graphics directly on pages or within shape objects. + + +For example, to draw a circle:: + + # Draw a circle on the page using the Page method + page.draw_circle((center_x, center_y), radius, color=(1, 0, 0), width=2) + + # Draw a circle on the page using a Shape object + shape = page.new_shape() + shape.draw_circle((center_x, center_y), radius) + shape.finish(color=(1, 0, 0), width=2) + shape.commit(overlay=True) + +The :ref:`Shape` object can be used to combine multiple drawings that should receive common properties as specified by :meth:`Shape.finish`. + + + + + .. include:: footer.rst diff --git a/docs/recipes-images.rst b/docs/recipes-images.rst index 489355fc7..7eaa77a55 100644 --- a/docs/recipes-images.rst +++ b/docs/recipes-images.rst @@ -19,9 +19,9 @@ The document can be any :ref:`supported type`. The script works as a command line tool which expects the filename being supplied as a parameter. The generated image files (1 per page) are stored in the directory of the script:: - import sys, fitz # import the bindings + import sys, pymupdf # import the bindings fname = sys.argv[1] # get filename from command line - doc = fitz.open(fname) # open document + doc = pymupdf.open(fname) # open document for page in doc: # iterate through the pages pix = page.get_pixmap() # render page to an image pix.save("page-%i.png" % page.number) # store image as a PNG @@ -46,7 +46,7 @@ In the following, we apply a :index:`zoom factor ` of 2 t zoom_x = 2.0 # horizontal zoom zoom_y = 2.0 # vertical zoom - mat = fitz.Matrix(zoom_x, zoom_y) # zoom factor 2 in each dimension + mat = pymupdf.Matrix(zoom_x, zoom_y) # zoom factor 2 in each dimension pix = page.get_pixmap(matrix=mat) # use 'mat' instead of the identity matrix @@ -70,10 +70,10 @@ To achieve this, define a rectangle equal to the area you want to appear in the :: - mat = fitz.Matrix(2, 2) # zoom factor 2 in each direction + mat = pymupdf.Matrix(2, 2) # zoom factor 2 in each direction rect = page.rect # the page rectangle mp = (rect.tl + rect.br) / 2 # its middle point, becomes top-left of clip - clip = fitz.Rect(mp, rect.br) # the area we want + clip = pymupdf.Rect(mp, rect.br) # the area we want pix = page.get_pixmap(matrix=mat, clip=clip) In the above we construct *clip* by specifying two diagonally opposite points: the middle point *mp* of the page rectangle, and its bottom right, *rect.br*. @@ -99,7 +99,7 @@ Please also read the previous section. This time we want to **compute the zoom f zoom = HEIGHT / clip.height else: # clip is broader: zoom to window WIDTH zoom = WIDTH / clip.width - mat = fitz.Matrix(zoom, zoom) + mat = pymupdf.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat, clip=clip) For the other way round, now assume you **have** the zoom factor and need to **compute the fitting clip**. @@ -108,11 +108,11 @@ In this case we have `zoom = HEIGHT/clip.height = WIDTH/clip.width`, so we must width = WIDTH / zoom height = HEIGHT / zoom - clip = fitz.Rect(tl, tl.x + width, tl.y + height) + clip = pymupdf.Rect(tl, tl.x + width, tl.y + height) # ensure we still are inside the page clip &= page.rect - mat = fitz.Matrix(zoom, zoom) - pix = fitz.Pixmap(matrix=mat, clip=clip) + mat = pymupdf.Matrix(zoom, zoom) + pix = pymupdf.Pixmap(matrix=mat, clip=clip) ---------- @@ -147,7 +147,7 @@ If you want to recreate the original image in file form or as a memory area, you 1. Convert your document to a PDF, and then use one of the PDF-only extraction methods. This snippet will convert a document to PDF:: >>> pdfbytes = doc.convert_to_pdf() # this a bytes object - >>> pdf = fitz.open("pdf", pdfbytes) # open it as a PDF document + >>> pdf = pymupdf.open("pdf", pdfbytes) # open it as a PDF document >>> # now use 'pdf' like any PDF document 2. Use :meth:`Page.get_text` with the "dict" parameter. This works for all document types. It will extract all text and images shown on the page, formatted as a Python dictionary. Every image will occur in an image block, containing meta information and **the binary image data**. For details of the dictionary's structure, see :ref:`TextPage`. The method works equally well for PDF files. This creates a list of all images shown on a page:: @@ -183,9 +183,9 @@ How to Extract Images: PDF Documents Like any other "object" in a PDF, images are identified by a cross reference number (:data:`xref`, an integer). If you know this number, you have two ways to access the image's data: -1. **Create** a :ref:`Pixmap` of the image with instruction *pix = fitz.Pixmap(doc, xref)*. This method is **very** fast (single digit micro-seconds). The pixmap's properties (width, height, ...) will reflect the ones of the image. In this case there is no way to tell which image format the embedded original has. +1. **Create** a :ref:`Pixmap` of the image with instruction *pix = pymupdf.Pixmap(doc, xref)*. This method is **very** fast (single digit micro-seconds). The pixmap's properties (width, height, ...) will reflect the ones of the image. In this case there is no way to tell which image format the embedded original has. -2. **Extract** the image with *img = doc.extract_image(xref)*. This is a dictionary containing the binary image data as *img["image"]*. A number of meta data are also provided -- mostly the same as you would find in the pixmap of the image. The major difference is string *img["ext"]*, which specifies the image format: apart from "png", strings like "jpeg", "bmp", "tiff", etc. can also occur. Use this string as the file extension if you want to store to disk. The execution speed of this method should be compared to the combined speed of the statements *pix = fitz.Pixmap(doc, xref);pix.tobytes()*. If the embedded image is in PNG format, the speed of :meth:`Document.extract_image` is about the same (and the binary image data are identical). Otherwise, this method is **thousands of times faster**, and the **image data is much smaller**. +2. **Extract** the image with *img = doc.extract_image(xref)*. This is a dictionary containing the binary image data as *img["image"]*. A number of meta data are also provided -- mostly the same as you would find in the pixmap of the image. The major difference is string *img["ext"]*, which specifies the image format: apart from "png", strings like "jpeg", "bmp", "tiff", etc. can also occur. Use this string as the file extension if you want to store to disk. The execution speed of this method should be compared to the combined speed of the statements *pix = pymupdf.Pixmap(doc, xref);pix.tobytes()*. If the embedded image is in PNG format, the speed of :meth:`Document.extract_image` is about the same (and the binary image data are identical). Otherwise, this method is **thousands of times faster**, and the **image data is much smaller**. The question remains: **"How do I know those 'xref' numbers of images?"**. There are two answers to this: @@ -215,7 +215,7 @@ Some images in PDFs are accompanied by **image masks**. In their simplest form, Whether an image does have such a mask can be recognized in one of two ways in PyMuPDF: -1. An item of :meth:`Document.get_page_images` has the general format `(xref, smask, ...)`, where *xref* is the image's :data:`xref` and *smask*, if positive, then it is the :data:`xref` of a mask. +1. An item of :meth:`Document.get_page_images` has the general format `(xref, smask, ...)`, where :data:`xref` is the image's :data:`xref` and *smask*, if positive, then it is the :data:`xref` of a mask. 2. The (dictionary) results of :meth:`Document.extract_image` have a key *"smask"*, which also contains any mask's :data:`xref` if positive. If *smask == 0* then the image encountered via :data:`xref` can be processed as it is. @@ -225,9 +225,9 @@ To recover the original image using PyMuPDF, the procedure depicted as follows m .. image:: images/img-stencil.* :scale: 60 ->>> pix1 = fitz.Pixmap(doc.extract_image(xref)["image"]) # (1) pixmap of image w/o alpha ->>> mask = fitz.Pixmap(doc.extract_image(smask)["image"]) # (2) mask pixmap ->>> pix = fitz.Pixmap(pix1, mask) # (3) copy of pix1, image mask added +>>> pix1 = pymupdf.Pixmap(doc.extract_image(xref)["image"]) # (1) pixmap of image w/o alpha +>>> mask = pymupdf.Pixmap(doc.extract_image(smask)["image"]) # (2) mask pixmap +>>> pix = pymupdf.Pixmap(pix1, mask) # (3) copy of pix1, image mask added Step (1) creates a pixmap of the basic image. Step (2) does the same with the image mask. Step (3) adds an alpha channel and fills it with transparency information. @@ -254,19 +254,19 @@ We show here **three scripts** that take a list of (image and other) files and p The first one converts each image to a PDF page with the same dimensions. The result will be a PDF with one page per image. It will only work for :ref:`supported image` file formats:: - import os, fitz + import os, pymupdf import PySimpleGUI as psg # for showing a progress bar - doc = fitz.open() # PDF with the pictures + doc = pymupdf.open() # PDF with the pictures imgdir = "D:/2012_10_05" # where the pics are imglist = os.listdir(imgdir) # list of them imgcount = len(imglist) # pic count for i, f in enumerate(imglist): - img = fitz.open(os.path.join(imgdir, f)) # open pic as document + img = pymupdf.open(os.path.join(imgdir, f)) # open pic as document rect = img[0].rect # pic dimension pdfbytes = img.convert_to_pdf() # make a PDF stream img.close() # no longer needed - imgPDF = fitz.open("pdf", pdfbytes) # open stream as PDF + imgPDF = pymupdf.open("pdf", pdfbytes) # open stream as PDF page = doc.new_page(width = rect.width, # new page with ... height = rect.height) # pic dimension page.show_pdf_page(rect, imgPDF, 0) # image fills the page @@ -290,9 +290,9 @@ Look `here = [1, 14, 8]: + import pymupdf, time + if not list(map(int, pymupdf.VersionBind.split("."))) >= [1, 14, 8]: raise SystemExit("need PyMuPDF v1.14.8 for this script") n = 6 # depth (precision) d = 3**n # edge length @@ -485,14 +485,14 @@ This script creates an approximate image of it as a PNG, by going down to one-pi t0 = time.perf_counter() ir = (0, 0, d, d) # the pixmap rectangle - pm = fitz.Pixmap(fitz.csRGB, ir, False) + pm = pymupdf.Pixmap(pymupdf.csRGB, ir, False) pm.set_rect(pm.irect, (255,255,0)) # fill it with some background color color = (0, 0, 255) # color to fill the punch holes # alternatively, define a 'fill' pixmap for the punch holes # this could be anything, e.g. some photo image ... - fill = fitz.Pixmap(fitz.csRGB, ir, False) # same size as 'pm' + fill = pymupdf.Pixmap(pymupdf.csRGB, ir, False) # same size as 'pm' fill.set_rect(fill.irect, (0, 255, 255)) # put some color in def punch(x, y, step): @@ -543,9 +543,9 @@ How to Interface with NumPy This shows how to create a PNG file from a numpy array (several times faster than most other methods):: import numpy as np - import fitz + import pymupdf #============================================================================== - # create a fun-colored width * height PNG with fitz and numpy + # create a fun-colored width * height PNG with pymupdf and numpy #============================================================================== height = 150 width = 100 @@ -557,7 +557,7 @@ This shows how to create a PNG file from a numpy array (several times faster tha bild[i, j] = [(i+j)%256, i%256, j%256] samples = bytearray(bild.tostring()) # get plain pixel data from numpy array - pix = fitz.Pixmap(fitz.csRGB, width, height, samples, alpha=False) + pix = pymupdf.Pixmap(pymupdf.csRGB, width, height, samples, alpha=False) pix.save("test.png") diff --git a/docs/recipes-journalling.rst b/docs/recipes-journalling.rst index ba0c02355..279df638d 100644 --- a/docs/recipes-journalling.rst +++ b/docs/recipes-journalling.rst @@ -29,8 +29,8 @@ Description: * Make a new PDF and enable journalling. Then add a page and some text lines -- each as a separate operation. * Navigate within the journal, undoing and redoing these updates and displaying status and file results:: - >>> import fitz - >>> doc=fitz.open() + >>> import pymupdf + >>> doc=pymupdf.open() >>> doc.journal_enable() >>> # try update without an operation: @@ -103,7 +103,7 @@ Description: - the new update operation will become the new last entry. - >>> doc=fitz.open() + >>> doc=pymupdf.open() >>> doc.journal_enable() >>> doc.journal_start_op("Page insert") >>> page=doc.new_page() diff --git a/docs/recipes-low-level-interfaces.rst b/docs/recipes-low-level-interfaces.rst index f649fe975..60d63df99 100644 --- a/docs/recipes-low-level-interfaces.rst +++ b/docs/recipes-low-level-interfaces.rst @@ -129,8 +129,8 @@ How to Access the PDF Catalog ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is a central ("root") object of a PDF. It serves as a starting point to reach important other objects and it also contains some global options for the PDF:: - >>> import fitz - >>> doc=fitz.open("PyMuPDF.pdf") + >>> import pymupdf + >>> doc=pymupdf.open("PyMuPDF.pdf") >>> cat = doc.pdf_catalog() # get xref of the /Catalog >>> print(doc.xref_object(cat)) # print object definition << @@ -165,8 +165,8 @@ XRefStm int Offset of a cross-reference stream. See :ref:`AdobeManual` p Access this information via PyMuPDF with :meth:`Document.pdf_trailer` or, equivalently, via :meth:`Document.xref_object` using -1 instead of a valid :data:`xref` number. - >>> import fitz - >>> doc=fitz.open("PyMuPDF.pdf") + >>> import pymupdf + >>> doc=pymupdf.open("PyMuPDF.pdf") >>> print(doc.xref_object(-1)) # or: print(doc.pdf_trailer()) << /Type /XRef @@ -269,7 +269,7 @@ Use the following code to see **all items** stored in the metadata object:: raise ValueError("PDF has no metadata") xref = int(value.replace("0 R", "")) # extract the metadata xref # add some private information - doc.xref_set_key(xref, "mykey", fitz.get_pdf_str("北京 is Beijing")) + doc.xref_set_key(xref, "mykey", pymupdf.get_pdf_str("北京 is Beijing")) # # after executing the previous code snippet, we will see this: pprint(metadata) @@ -298,8 +298,8 @@ There also exist granular, elegant ways to access and manipulate selected PDF :d * :meth:`Document.xref_get_keys` returns the PDF keys of the object at :data:`xref`:: - In [1]: import fitz - In [2]: doc = fitz.open("pymupdf.pdf") + In [1]: import pymupdf + In [2]: doc = pymupdf.open("pymupdf.pdf") In [3]: page = doc[0] In [4]: from pprint import pprint In [5]: pprint(doc.xref_get_keys(page.xref)) diff --git a/docs/recipes-multiprocessing.rst b/docs/recipes-multiprocessing.rst index 8c4712c1c..e47530b1e 100644 --- a/docs/recipes-multiprocessing.rst +++ b/docs/recipes-multiprocessing.rst @@ -16,11 +16,7 @@ Multiprocessing ============================== -:title:`MuPDF` has no integrated support for threading - calling itself "thread-agnostic". While there do exist tricky possibilities to still use threading with :title:`MuPDF`, the baseline consequence for :title:`PyMuPDF` is: - -**No Python threading support**. - -Using :title:`PyMuPDF` in a :title:`Python` threading environment will lead to blocking effects for the main thread. +|PyMuPDF| does not support running on multiple threads - doing so may cause incorrect behaviour or even crash Python itself. However, there is the option to use :title:`Python's` *multiprocessing* module in a variety of ways. @@ -35,7 +31,7 @@ If you are looking to speed up page-oriented processing for a large document, us |toggleEnd| -Here is a more complex example involving inter-process communication between a main process (showing a GUI) and a child process doing :title:`PyMuPDF` access to a document. +Here is a more complex example involving inter-process communication between a main process (showing a GUI) and a child process doing |PyMuPDF| access to a document. |toggleStart| diff --git a/docs/recipes-ocr.rst b/docs/recipes-ocr.rst new file mode 100644 index 000000000..fca4455c6 --- /dev/null +++ b/docs/recipes-ocr.rst @@ -0,0 +1,60 @@ +.. include:: header.rst + +.. _RecipesOCR: + + +.. |toggleStart| raw:: html + +
+ See code + +.. |toggleEnd| raw:: html + +
+ +==================================== +OCR - Optical Character Recognition +==================================== + +|PyMuPDF| has integrated support for OCR (Optical Character Recognition). It is possible to use OCR for both, images (via the :ref:`Pixmap` class) and for document pages. + +The feature is currently based on Tesseract-OCR which must be installed as a separate application -- see the :ref:`installation_ocr`. + +How to OCR an Image +-------------------- +A supported image must first be converted to a :ref:`Pixmap`. The Pixmap can then be saved to a 1-page PDF. This page will look like the original image with the same width and height. It will contain a layer of text as recognized by Tesseract. + +The PDF can be generated via one of the methods :meth:`Pixmap.pdfocr_save` or :meth:`Pixmap.pdfocr_tobytes`, as a file on disk or as a PDF in memory. + +The text can be extracted and searched with the usual text extraction and search methods (:meth:`Page.get_text`, :meth:`Page.search_for`, etc.). Please also note the following important facts and prerequisites: + +* When converting the image to a Pixmap, please confirm that the color space is RGB and alpha is `False` (no transparency). Convert the original Pixmap if necessary. +* All text is written as "hidden" with Tesseract's own `GlyphLessFont`, a mono-spaced font with metrics comparable to Courier. +* All text has the properties regular and black (i.e. no bold, no italic, no information about the original fonts). +* Tesseract does not recognize vector graphics (i.e. no drawings / line-art). + +This approach is also recommended to OCR a complete scanned PDF: + +* Render each page to a :ref:`Pixmap` with desired resolution +* Append the resulting 1-page PDF to the output PDF + +How to OCR a Document Page +---------------------------- +Any supported document page can be OCR-ed -- either the complete page or only the image areas on it. + +Because optical character recognition is about one thousand times slower than standard text extraction, we make sure to do OCR only once per page and store the result in a :ref:`TextPage`. Using this TextPage for all subsequent extractions and text searches will then happen with |PyMuPDF|'s usual top speed. + +To OCR a document page, follow this approach: + +1. Determine whether OCR is needed / beneficial at all. A number of criteria can be used for this decision, like: + + * page is completely covered by an image + * no text exists on the page + * thousands of small vector graphics (indicating *simulated* text) + +2. OCR the page and store result in a :ref:`TextPage` object using an instruction like `tp = page.get_textpage_ocr(...)`. + +3. Refer to the produced :ref:`TextPage` in all subsequent text extractions and searches via the `textpage=tp` parameter. + + +.. include:: footer.rst diff --git a/docs/recipes-optional-content.rst b/docs/recipes-optional-content.rst index 17b5fae25..6e201e642 100644 --- a/docs/recipes-optional-content.rst +++ b/docs/recipes-optional-content.rst @@ -42,7 +42,7 @@ If you want to put an **existing** image under the control of an OCG, you must f To **remove** an OCG from an image, do `doc.set_oc(img_xref, 0)`. -One single OCG can be assigned to mutiple PDF objects to control their visibility. +One single OCG can be assigned to multiple PDF objects to control their visibility. How to Define Complex Optional Content Conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/recipes-stories.rst b/docs/recipes-stories.rst index 244ecd5a2..d633b9281 100644 --- a/docs/recipes-stories.rst +++ b/docs/recipes-stories.rst @@ -2,6 +2,7 @@ .. _RecipesStories: +.. role:: htmlTag(emphasis) .. |toggleStart| raw:: html @@ -41,17 +42,17 @@ Here is the inevitable "Hello World" example. We will show two variants: Variant using an existing HTML source [#f1]_ -- which in this case is defined as a constant in the script:: - import fitz + import pymupdf HTML = """

Hello World!

""" - MEDIABOX = fitz.paper_rect("letter") # output page format: Letter + MEDIABOX = pymupdf.paper_rect("letter") # output page format: Letter WHERE = MEDIABOX + (36, 36, -36, -36) # leave borders of 0.5 inches - story = fitz.Story(html=HTML) # create story from HTML - writer = fitz.DocumentWriter("output.pdf") # create the writer + story = pymupdf.Story(html=HTML) # create story from HTML + writer = pymupdf.DocumentWriter("output.pdf") # create the writer more = 1 # will indicate end of input once it is set to 0 @@ -67,7 +68,7 @@ Variant using an existing HTML source [#f1]_ -- which in this case is defined as The above effect (sans-serif and blue text) could have been achieved by using a separate CSS source like so:: - import fitz + import pymupdf CSS = """ body { @@ -81,24 +82,24 @@ Variant using an existing HTML source [#f1]_ -- which in this case is defined as """ # the story would then be created like this: - story = fitz.Story(html=HTML, user_css=CSS) + story = pymupdf.Story(html=HTML, user_css=CSS) ----- The Python API variant -- everything is created programmatically:: - import fitz + import pymupdf - MEDIABOX = fitz.paper_rect("letter") + MEDIABOX = pymupdf.paper_rect("letter") WHERE = MEDIABOX + (36, 36, -36, -36) - story = fitz.Story() # create an empty story + story = pymupdf.Story() # create an empty story body = story.body # access the body of its DOM with body.add_paragraph() as para: # store desired content para.set_font("sans-serif").set_color("blue").add_text("Hello World!") - writer = fitz.DocumentWriter("output.pdf") + writer = pymupdf.DocumentWriter("output.pdf") more = 1 @@ -126,13 +127,13 @@ Images can be referenced in the provided HTML source, or the reference to a desi We extend our "Hello World" example from above and display an image of our planet right after the text. Assuming the image has the name "world.jpg" and is present in the script's folder, then this is the modified version of the above Python API variant:: - import fitz + import pymupdf - MEDIABOX = fitz.paper_rect("letter") + MEDIABOX = pymupdf.paper_rect("letter") WHERE = MEDIABOX + (36, 36, -36, -36) # create story, let it look at script folder for resources - story = fitz.Story(archive=".") + story = pymupdf.Story(archive=".") body = story.body # access the body of its DOM with body.add_paragraph() as para: @@ -144,7 +145,7 @@ We extend our "Hello World" example from above and display an image of our plane # store image in another paragraph para.add_image("world.jpg") - writer = fitz.DocumentWriter("output.pdf") + writer = pymupdf.DocumentWriter("output.pdf") more = 1 @@ -171,7 +172,7 @@ These cases are fairly straightforward. As a general recommendation, HTML and CSS sources should be **read as binary files** and decoded before using them in a story. The Python `pathlib.Path` provides convenient ways to do this:: import pathlib - import fitz + import pymupdf htmlpath = pathlib.Path("myhtml.html") csspath = pathlib.Path("mycss.css") @@ -179,7 +180,7 @@ As a general recommendation, HTML and CSS sources should be **read as binary fil HTML = htmlpath.read_bytes().decode() CSS = csspath.read_bytes().decode() - story = fitz.Story(html=HTML, user_css=CSS) + story = pymupdf.Story(html=HTML, user_css=CSS) ----- @@ -334,7 +335,7 @@ Outputting HTML tables is supported as follows: * Column widths are computed automatically based on column content. They cannot be directly set. * Table **cells may contain images** which will be considered in the column width calculation magic. * Row heights are computed automatically based on row content - leading to multi-line rows where needed. -* The potentially multiple lines of a table row will always be kept together on one page (respectively "where" rectangle) and not be splitted. +* The potentially multiple lines of a table row will always be kept together on one page (respectively "where" rectangle) and not be split. * Table header rows are only **shown on the first page / "where" rectangle.** * The "style" attribute is ignored when given directly in HTML table elements. Styling for a table and its elements must happen separately, in CSS source or within the :htmlTag:`style` tag. * Styling for :htmlTag:`tr` elements is not supported and ignored. Therefore, a table-wide grid or alternating row background colors are not supported. One of the following example scripts however shows an easy way to deal with this limitation. diff --git a/docs/recipes-text.rst b/docs/recipes-text.rst index 667ebffcb..5bc1bcbc4 100644 --- a/docs/recipes-text.rst +++ b/docs/recipes-text.rst @@ -18,9 +18,9 @@ The document can be any :ref:`supported type`. The script works as a command line tool which expects the document filename supplied as a parameter. It generates one text file named "filename.txt" in the script directory. Text of pages is separated by a form feed character:: - import sys, pathlib, fitz + import sys, pathlib, pymupdf fname = sys.argv[1] # get document filename - with fitz.open(fname) as doc: # open document + with pymupdf.open(fname) as doc: # open document text = chr(12).join([page.get_text() for page in doc]) # write as a binary file to support non-ASCII characters pathlib.Path(fname + ".txt").write_bytes(text.encode()) @@ -40,11 +40,17 @@ See the following two sections for examples and further explanations. triple: lookup;text;key-value +How to Extract Text as Markdown +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is especially useful for :title:`RAG/LLM` environments - please see :ref:`Outputting as Markdown `. + + .. _RecipesText_A1: How to Extract Key-Value Pairs from a Page ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If the layout of a page is *"predictable"* in some sense, then there is a simple way to find the values for a given set of keywords fast and easily -- without using regular expressions. Please see `this `_ example script. +If the layout of a page is *"predictable"* in some sense, then there is a simple way to find the values for a given set of keywords fast and easily -- without using regular expressions. Please see `this example script `_. "Predictable" in this context means: @@ -85,7 +91,7 @@ One of the common issues with PDF text extraction is, that text may not appear i This is the responsibility of the PDF creator (software or a human). For example, page headers may have been inserted in a separate step -- after the document had been produced. In such a case, the header text will appear at the end of a page text extraction (although it will be correctly shown by PDF viewer software). For example, the following snippet will add some header and footer lines to an existing PDF:: - doc = fitz.open("some.pdf") + doc = pymupdf.open("some.pdf") header = "Header" # text in header footer = "Page %i of %i" # text in footer for page in doc: @@ -104,7 +110,7 @@ The text sequence extracted from a page modified in this way will look like this PyMuPDF has several means to re-establish some reading sequence or even to re-generate a layout close to the original: 1. Use `sort` parameter of :meth:`Page.get_text`. It will sort the output from top-left to bottom-right (ignored for XHTML, HTML and XML output). -2. Use the `fitz` module in CLI: `python -m fitz gettext ...`, which produces a text file where text has been re-arranged in layout-preserving mode. Many options are available to control the output. +2. Use the `pymupdf` module in CLI: `python -m pymupdf gettext ...`, which produces a text file where text has been re-arranged in layout-preserving mode. Many options are available to control the output. You can also use the above mentioned `script `_ with your modifications. @@ -113,12 +119,16 @@ You can also use the above mentioned `script ` from Documents -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you see a table in a document, you are not normally looking at something like an embedded Excel or other identifiable object. It usually is just text, formatted to appear as appropriate. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you see a table in a document, you are normally not looking at something like an embedded Excel or other identifiable object. It usually is just normal, standard text, formatted to appear as tabular data. + +Extracting tabular data from such a page area therefore means that you must find a way to **identify** the table area (i.e. its boundary box), then **(1)** graphically indicate table and column borders, and **(2)** then extract text based on this information. + +This can be a very complex task, depending on details like the presence or absence of lines, rectangles or other supporting vector graphics. -Extracting a tabular data from such a page area therefore means that you must find a way to **(1)** graphically indicate table and column borders, and **(2)** then extract text based on this information. +Method :meth:`Page.find_tables` does all that for you, with a high table detection precision. Its great advantage is that there are no external library dependencies, nor the need to employ artificial intelligence or machine learning technologies. It also provides an integrated interface to the well-known Python package for data analysis `pandas `_. -The wxPython GUI script `extract.py `_ strives to exactly do that. You may want to have a look at it and adjust it to your liking. +Please have a look at example `Jupyter notebooks `_, which cover standard situations like multiple tables on one page or joining table fragments across multiple pages. ---------- @@ -138,23 +148,23 @@ This method has advantages and drawbacks. Pros are: But you also have other options:: import sys - import fitz + import pymupdf def mark_word(page, text): """Underline each word that contains 'text'. """ found = 0 - wlist = page.get_text("words") # make the word list + wlist = page.get_text("words", delimiters=None) # make the word list for w in wlist: # scan through all words on page if text in w[4]: # w[4] is the word's string found += 1 # count - r = fitz.Rect(w[:4]) # make rect from word bbox + r = pymupdf.Rect(w[:4]) # make rect from word bbox page.add_underline_annot(r) # underline return found fname = sys.argv[1] # filename text = sys.argv[2] # search string - doc = fitz.open(fname) + doc = pymupdf.open(fname) print("underlining words containing '%s' in document '%s'" % (word, doc.name)) @@ -169,10 +179,10 @@ But you also have other options:: if new_doc: doc.save("marked-" + doc.name) -This script uses `Page.get_text("words")` to look for a string, handed in via cli parameter. This method separates a page's text into "words" using spaces and line breaks as delimiters. Further remarks: +This script uses `Page.get_text("words")` to look for a string, handed in via cli parameter. This method separates a page's text into "words" using white spaces as delimiters. Further remarks: * If found, the **complete word containing the string** is marked (underlined) -- not only the search string. -* The search string may **not contain spaces** or other white space. +* The search string may **not contain word delimiters**. By default, word delimiters are white spaces and the non-breaking space `chr(0xA0)`. If you use extra delimiting characters like `page.get_text("words", delimiters="./,")` then none of these characters should be included in your search string either. * As shown here, upper / lower cases are **respected**. But this can be changed by using the string method *lower()* (or even regular expressions) in function *mark_word*. * There is **no upper limit**: all occurrences will be detected. * You can use **anything** to mark the word: 'Underline', 'Highlight', 'StrikeThrough' or 'Square' annotations, etc. @@ -188,23 +198,26 @@ This script uses `Page.get_text("words")` to look for a string, handed in via cl How to Mark Searched Text ~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. codespell:ignore-begin + This script searches for text and marks it:: # -*- coding: utf-8 -*- - import fitz + import pymupdf # the document to annotate - doc = fitz.open("tilted-text.pdf") + doc = pymupdf.open("tilted-text.pdf") # the text to be marked - t = "¡La práctica hace el campeón!" + needle = "¡La práctica hace el campeón!" # work with first page only page = doc[0] # get list of text locations # we use "quads", not rectangles because text may be tilted! - rl = page.search_for(t, quads = True) + rl = page.search_for(needle, quads=True) # mark all found quads with one annotation page.add_squiggly_annot(rl) @@ -212,6 +225,8 @@ This script searches for text and marks it:: # save to a new PDF doc.save("a-squiggly.pdf") +.. codespell:ignore-end + The result looks like this: .. image:: images/img-textmarker.* @@ -230,12 +245,12 @@ But text **extraction** with the "dict" / "rawdict" options of :meth:`Page.get_t The "bboxes" returned by the method however are rectangles only -- not quads. So, to mark span text correctly, its quad must be recovered from the data contained in the line and span dictionary. Do this with the following utility function (new in v1.18.9):: - span_quad = fitz.recover_quad(line["dir"], span) + span_quad = pymupdf.recover_quad(line["dir"], span) annot = page.add_highlight_annot(span_quad) # this will mark the complete span text If you want to **mark the complete line** or a subset of its spans in one go, use the following snippet (works for v1.18.10 or later):: - line_quad = fitz.recover_line_quad(line, spans=line["spans"][1:-1]) + line_quad = pymupdf.recover_line_quad(line, spans=line["spans"][1:-1]) page.add_highlight_annot(line_quad) .. image:: images/img-linequad.* @@ -299,10 +314,10 @@ How to Write Text Lines ^^^^^^^^^^^^^^^^^^^^^^^^^^ Output some text lines on a page:: - import fitz - doc = fitz.open(...) # new or existing PDF + import pymupdf + doc = pymupdf.open(...) # new or existing PDF page = doc.new_page() # new or existing page via doc[n] - p = fitz.Point(50, 72) # start point of 1st line + p = pymupdf.Point(50, 72) # start point of 1st line text = "Some text,\nspread across\nseveral lines." # the same result is achievable by @@ -318,7 +333,7 @@ Output some text lines on a page:: doc.save("text.pdf") -With this method, only the **number of lines** will be controlled to not go beyond page height. Surplus lines will not be written and the number of actual lines will be returned. The calculation uses a line height calculated from the fontsize and 36 points (0.5 inches) as bottom margin. +With this method, only the **number of lines** will be controlled to not go beyond page height. Surplus lines will not be written and the number of actual lines will be returned. The calculation uses a line height calculated from the :data:`fontsize` and 36 points (0.5 inches) as bottom margin. Line **width is ignored**. The surplus part of a line will simply be invisible. @@ -326,8 +341,8 @@ However, for built-in fonts there are ways to calculate the line width beforehan Here is another example. It inserts 4 text strings using the four different rotation options, and thereby explains, how the text insertion point must be chosen to achieve the desired result:: - import fitz - doc = fitz.open() + import pymupdf + doc = pymupdf.open() page = doc.new_page() # the text strings, each having 3 lines text1 = "rotate=0\nLine 2\nLine 3" @@ -336,10 +351,10 @@ Here is another example. It inserts 4 text strings using the four different rota text4 = "rotate=180\nLine 2\nLine 3" red = (1, 0, 0) # the color for the red dots # the insertion points, each with a 25 pix distance from the corners - p1 = fitz.Point(25, 25) - p2 = fitz.Point(page.rect.width - 25, 25) - p3 = fitz.Point(25, page.rect.height - 25) - p4 = fitz.Point(page.rect.width - 25, page.rect.height - 25) + p1 = pymupdf.Point(25, 25) + p2 = pymupdf.Point(page.rect.width - 25, 25) + p3 = pymupdf.Point(25, page.rect.height - 25) + p4 = pymupdf.Point(page.rect.width - 25, page.rect.height - 25) # create a Shape to draw on shape = page.new_shape() @@ -375,82 +390,327 @@ How to Fill a Text Box ^^^^^^^^^^^^^^^^^^^^^^^^^^ This script fills 4 different rectangles with text, each time choosing a different rotation value:: - import fitz - doc = fitz.open(...) # new or existing PDF + import pymupdf + + doc = pymupdf.open() # new or existing PDF page = doc.new_page() # new page, or choose doc[n] - r1 = fitz.Rect(50,100,100,150) # a 50x50 rectangle - disp = fitz.Rect(55, 0, 55, 0) # add this to get more rects - r2 = r1 + disp # 2nd rect - r3 = r1 + disp * 2 # 3rd rect - r4 = r1 + disp * 3 # 4th rect - t1 = "text with rotate = 0." # the texts we will put in + + # write in this overall area + rect = pymupdf.Rect(100, 100, 300, 150) + + # partition the area in 4 equal sub-rectangles + CELLS = pymupdf.make_table(rect, cols=4, rows=1) + + t1 = "text with rotate = 0." # these texts we will written t2 = "text with rotate = 90." - t3 = "text with rotate = -90." - t4 = "text with rotate = 180." - red = (1,0,0) # some colors - gold = (1,1,0) - blue = (0,0,1) - """We use a Shape object (something like a canvas) to output the text and + t3 = "text with rotate = 180." + t4 = "text with rotate = 270." + text = [t1, t2, t3, t4] + red = pymupdf.pdfcolor["red"] # some colors + gold = pymupdf.pdfcolor["gold"] + blue = pymupdf.pdfcolor["blue"] + """ + We use a Shape object (something like a canvas) to output the text and the rectangles surrounding it for demonstration. """ shape = page.new_shape() # create Shape - shape.draw_rect(r1) # draw rectangles - shape.draw_rect(r2) # giving them - shape.draw_rect(r3) # a yellow background - shape.draw_rect(r4) # and a red border - shape.finish(width = 0.3, color = red, fill = gold) - # Now insert text in the rectangles. Font "Helvetica" will be used - # by default. A return code rc < 0 indicates insufficient space (not checked here). - rc = shape.insert_textbox(r1, t1, color = blue) - rc = shape.insert_textbox(r2, t2, color = blue, rotate = 90) - rc = shape.insert_textbox(r3, t3, color = blue, rotate = -90) - rc = shape.insert_textbox(r4, t4, color = blue, rotate = 180) - shape.commit() # write all stuff to page /Contents - doc.save("...") - -Several default values were used above: font "Helvetica", font size 11 and text alignment "left". The result will look like this: - -.. image:: images/img-textbox.* + for i in range(len(CELLS[0])): + shape.draw_rect(CELLS[0][i]) # draw rectangle + shape.insert_textbox( + CELLS[0][i], text[i], fontname="hebo", color=blue, rotate=90 * i + ) + + shape.finish(width=0.3, color=red, fill=gold) + + shape.commit() # write all stuff to the page + doc.ez_save(__file__.replace(".py", ".pdf")) + +Some default values were used above: font size 11 and text alignment "left". The result will look like this: + +.. image:: images/img-rotate.* :scale: 50 ------------------------------------------ .. _RecipesText_I_c: -How to Use Non-Standard Encoding +How to Fill a Box with HTML Text ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Since v1.14, MuPDF allows Greek and Russian encoding variants for the :data:`Base14_Fonts`. In PyMuPDF this is supported via an additional *encoding* argument. Effectively, this is relevant for Helvetica, Times-Roman and Courier (and their bold / italic forms) and characters outside the ASCII code range only. Elsewhere, the argument is ignored. Here is how to request Russian encoding with the standard font Helvetica:: +Method :meth:`Page.insert_htmlbox` offers a **much more powerful** way to insert text in a rectangle. + +Instead of simple, plain text, this method accepts HTML source, which may not only contain HTML tags but also styling instructions to influence things like font, font weight (bold) and style (italic), color and much more. + +It is also possible to mix multiple fonts and languages, to output HTML tables and to insert images and URI links. + +For even more styling flexibility, an additional CSS source may also be given. - page.insert_text(point, russian_text, encoding=fitz.TEXT_ENCODING_CYRILLIC) +The method is based on the :ref:`Story` class. Therefore, complex script systems like Devanagari, Nepali, Tamil and many are supported and written correctly thanks to using the HarfBuzz library - which provides this so-called **"text shaping"** feature. -The valid encoding values are TEXT_ENCODING_LATIN (0), TEXT_ENCODING_GREEK (1), and TEXT_ENCODING_CYRILLIC (2, Russian) with Latin being the default. Encoding can be specified by all relevant font and text insertion methods. +Any required fonts to output characters are automatically pulled in from the Google NOTO font library - as a fallback (when the -- optionally supplied -- user font(s) do not contain some glyphs). -By the above statement, the fontname *helv* is automatically connected to the Russian font variant of Helvetica. Any subsequent text insertion with **this fontname** will use the Russian Helvetica encoding. +As a small glimpse into the features offered here, we will output the following HTML-enriched text:: -If you change the fontname just slightly, you can also achieve an **encoding "mixture"** for the **same base font** on the same page:: + import pymupdf + + + rect = pymupdf.Rect(100, 100, 400, 300) + + text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed + eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris + nisi ut aliquid ex ea commodi consequat. Quis aute iure + reprehenderit + in voluptate velit + esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat + cupiditat non proident, sunt in culpa qui + officia deserunt mollit anim id + est laborum.""" + + doc = pymupdf.Document() - import fitz - doc=fitz.open() page = doc.new_page() - shape = page.new_shape() - t="Sômé tèxt wìth nöñ-Lâtîn characterß." - shape.insert_text((50,70), t, fontname="helv", encoding=fitz.TEXT_ENCODING_LATIN) - shape.insert_text((50,90), t, fontname="HElv", encoding=fitz.TEXT_ENCODING_GREEK) - shape.insert_text((50,110), t, fontname="HELV", encoding=fitz.TEXT_ENCODING_CYRILLIC) - shape.commit() - doc.save("t.pdf") + page.insert_htmlbox(rect, text, css="* {font-family: sans-serif;font-size:14px;}") + + doc.ez_save(__file__.replace(".py", ".pdf")) + +Please note how the "css" parameter is used to globally select the default "sans-serif" font and a font size of 14. + +The result will look like this: + +.. image:: images/img-htmlbox1.* + +How to output HTML tables and images +....................................... + +Here is another example that outputs a table with this method. This time, we are including all the styling in the HTML source itself. Please also note, how it works to include an image - even within a table cell:: + + import pymupdf + import os + + filedir = os.path.dirname(__file__) + + + text = """ + + + +

Some Colors

+
+ + + + + + + + + + + + +
LimeLemonImageMauve
GreenYellowBetween
Gray and Purple
+ + """ -The result: + doc = pymupdf.Document() -.. image:: images/img-encoding.* - :scale: 50 + page = doc.new_page() + rect = page.rect + (36, 36, -36, -36) + + # we must specify an Archive because of the image + page.insert_htmlbox(rect, text, archive=pymupdf.Archive(".")) + + doc.ez_save(__file__.replace(".py", ".pdf")) + + + +The result will look like this: + +.. image:: images/img-htmlbox2.* + + +How to Output Languages of the World +....................................... + +Our third example will demonstrate the automatic multi-language support. It includes automatic **text shaping** for complex scripting systems like Devanagari and right-to-left languages:: + + import pymupdf + + greetings = ( + "Hello, World!", # english + "Hallo, Welt!", # german + "سلام دنیا!", # persian + "வணக்கம், உலகம்!", # tamil + "สวัสดีชาวโลก!", # thai + "Привіт Світ!", # ucranian + "שלום עולם!", # hebrew + "ওহে বিশ্ব!", # bengali + "你好世界!", # chinese + "こんにちは世界!", # japanese + "안녕하세요, 월드!", # korean + "नमस्कार, विश्व !", # sanskrit + "हैलो वर्ल्ड!", # hindi + ) + doc = pymupdf.open() + page = doc.new_page() + rect = (50, 50, 200, 500) + + # join greetings into one text string + text = " ... ".join([t for t in greetings]) + + # the output of the above is simple: + page.insert_htmlbox(rect, text) + doc.save(__file__.replace(".py", ".pdf")) + +And this is the output: + +.. image:: images/img-htmlbox3.* + +How to Specify your Own Fonts +................................. + +Define your font files in CSS syntax using the `@font-face` statement. You need a separate `@font-face` for every combination of font weight and font style (e.g. bold or italic) you want to be supported. The following example uses the famous MS Comic Sans font in its four variants regular, bold, italic and bold-italic. + +As these four font files are located in the system's folder `C:/Windows/Fonts` the method needs an :ref:`Archive` definition that points to that folder:: + + """ + How to use your own fonts with method Page.insert_htmlbox(). + """ + import pymupdf + + # Example text + text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed + eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris + nisi ut aliquid ex ea commodi consequat. Quis aute iure + reprehenderit + in voluptate velit + esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat + cupiditat non proident, sunt in culpa qui + officia deserunt mollit anim id + est laborum.""" + + """ + We need an Archive object to show where font files are located. + We intend to use the font family "MS Comic Sans". + """ + arch = pymupdf.Archive("C:/Windows/Fonts") + + # These statements define which font file to use for regular, bold, + # italic and bold-italic text. + # We assign an arbitrary common font-family for all 4 font files. + # The Story algorithm will select the right file as required. + # We request to use "comic" throughout the text. + css = """ + @font-face {font-family: comic; src: url(comic.ttf);} + @font-face {font-family: comic; src: url(comicbd.ttf);font-weight: bold;} + @font-face {font-family: comic; src: url(comicz.ttf);font-weight: bold;font-style: italic;} + @font-face {font-family: comic; src: url(comici.ttf);font-style: italic;} + * {font-family: comic;} + """ + + doc = pymupdf.Document() + page = doc.new_page(width=150, height=150) # make small page + + page.insert_htmlbox(page.rect, text, css=css, archive=arch) + + doc.subset_fonts(verbose=True) # build subset fonts to reduce file size + doc.ez_save(__file__.replace(".py", ".pdf")) + +.. image:: images/img-htmlbox4.* + +How to Request Text Alignment +................................ + +This example combines multiple requirements: + +* Rotate the text by 90 degrees anti-clockwise. +* Use a font from package `pymupdf-fonts `_. You will see that the respective CSS definitions are a lot easier in this case. +* Align the text with the "justify" option. + +:: + + """ + How to use a pymupdf font with method Page.insert_htmlbox(). + """ + import pymupdf + + # Example text + text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed + eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris + nisi ut aliquid ex ea commodi consequat. Quis aute iure + reprehenderit + in voluptate velit + esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat + cupiditat non proident, sunt in culpa qui + officia deserunt mollit anim id + est laborum.""" + + """ + This is similar to font file support. However, we can use a convenience + function for creating required CSS definitions. + We still need an Archive for finding the font binaries. + """ + arch = pymupdf.Archive() + + # We request to use "myfont" throughout the text. + css = pymupdf.css_for_pymupdf_font("ubuntu", archive=arch, name="myfont") + css += "* {font-family: myfont;text-align: justify;}" + + doc = pymupdf.Document() + + page = doc.new_page(width=150, height=150) + + page.insert_htmlbox(page.rect, text, css=css, archive=arch, rotate=90) + + doc.subset_fonts(verbose=True) + doc.ez_save(__file__.replace(".py", ".pdf")) + +.. image:: images/img-htmlbox5.* + + +| + +.. _RecipesText_J: + + +How to Extract Text with Color +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Iterate through your text blocks and find the spans of text you need for this information. + +:: + + for page in doc: + text_blocks = page.get_text("dict", flags=pymupdf.TEXTFLAGS_TEXT)["blocks"] + for block in text_blocks: + for line in block["lines"]: + for span in line["spans"]: + text = span["text"] + color = pymupdf.sRGB_to_rgb(span["color"]) + print(f"Text: {text}, Color: {color}") -The snippet above indeed leads to three different copies of the Helvetica font in the PDF. Each copy is uniquely identified (and referenceable) by using the correct upper-lower case spelling of the reserved word "helv":: - for f in doc.get_page_fonts(0): print(f) - [6, 'n/a', 'Type1', 'Helvetica', 'helv', 'WinAnsiEncoding'] - [7, 'n/a', 'Type1', 'Helvetica', 'HElv', 'WinAnsiEncoding'] - [8, 'n/a', 'Type1', 'Helvetica', 'HELV', 'WinAnsiEncoding'] .. include:: footer.rst diff --git a/docs/recipes.rst b/docs/recipes.rst index 54bd4ee05..c0125d50b 100644 --- a/docs/recipes.rst +++ b/docs/recipes.rst @@ -8,15 +8,26 @@ .. toctree:: - recipes-text.rst + how-to-open-a-file.rst + +---- + +.. toctree:: + converting-files.rst ---- + .. toctree:: - recipes-images.rst + recipes-text.rst +---- + +.. toctree:: + + recipes-images.rst ---- @@ -24,7 +35,6 @@ recipes-annotations.rst - ---- .. toctree:: @@ -51,6 +61,12 @@ ---- +.. toctree:: + + recipes-ocr.rst + +---- + .. toctree:: recipes-optional-content.rst diff --git a/docs/rect.rst b/docs/rect.rst index 14b177be4..b5c2805b9 100644 --- a/docs/rect.rst +++ b/docs/rect.rst @@ -104,23 +104,23 @@ The following remarks are also valid for :ref:`IRect` objects: If "rect" is specified, the constructor creates a **new copy** of it. - Without parameters, the empty rectangle *Rect(0.0, 0.0, 0.0, 0.0)* is created. + Without parameters, the empty rectangle ``Rect(0.0, 0.0, 0.0, 0.0)`` is created. .. method:: round() - Creates the smallest containing :ref:`IRect`. This is **not** the same as simply rounding the rectangle's edges: The top left corner is rounded upwards and to the left while the bottom right corner is rounded downwards and to the right. + Creates the smallest containing :ref:`IRect`. This is **not the same** as simply rounding the rectangle's edges: The top left corner is rounded upwards and to the left while the bottom right corner is rounded downwards and to the right. - >>> fitz.Rect(0.5, -0.01, 123.88, 455.123456).round() + >>> pymupdf.Rect(0.5, -0.01, 123.88, 455.123456).round() IRect(0, -1, 124, 456) 1. If the rectangle is **empty**, the result is also empty. 2. **Possible paradox:** The result may be empty, **even if** the rectangle is **not** empty! In such cases, the result obviously does **not** contain the rectangle. This is because MuPDF's algorithm allows for a small tolerance (1e-3). Example: - >>> r = fitz.Rect(100, 100, 200, 100.001) + >>> r = pymupdf.Rect(100, 100, 200, 100.001) >>> r.is_empty # rect is NOT empty False >>> r.round() # but its irect IS empty! - fitz.IRect(100, 100, 200, 100) + pymupdf.IRect(100, 100, 200, 100) >>> r.round().is_empty True @@ -131,9 +131,9 @@ The following remarks are also valid for :ref:`IRect` objects: Transforms the rectangle with a matrix and **replaces the original**. If the rectangle is empty or infinite, this is a no-operation. :arg m: The matrix for the transformation. - :type m: :ref:`Matrix` + :type m: :data:`matrix_like` - :rtype: *Rect* + :rtype: ``Rect`` :returns: the smallest rectangle that contains the transformed original. .. method:: intersect(r) @@ -141,33 +141,33 @@ The following remarks are also valid for :ref:`IRect` objects: The intersection (common rectangular area, largest rectangle contained in both) of the current rectangle and *r* is calculated and **replaces the current** rectangle. If either rectangle is empty, the result is also empty. If *r* is infinite, this is a no-operation. If the rectangles are (mathematically) disjoint sets, then the result is invalid. If the result is valid but empty, then the rectangles touch each other in a corner or (part of) a side. :arg r: Second rectangle - :type r: :ref:`Rect` + :type r: :data:`rect_like` .. method:: include_rect(r) - The smallest rectangle containing the current one and *r* is calculated and **replaces the current** one. If either rectangle is infinite, the result is also infinite. If one is empty, the other one will be taken as the result. + The smallest rectangle containing the current one and ``r`` is calculated and **replaces the current** one. If either rectangle is infinite, the result is also infinite. If ``r`` is empty, the current rectangle remains unchanged. Else if the current rectangle is empty, it is replaced by ``r``. :arg r: Second rectangle - :type r: :ref:`Rect` + :type r: :data:`rect_like` .. method:: include_point(p) - The smallest rectangle containing the current one and point *p* is calculated and **replaces the current** one. **The infinite rectangle remains unchanged.** To create a rectangle containing a series of points, start with (the empty) *fitz.Rect(p1, p1)* and successively include the remaining points. + The smallest rectangle containing the current one and :data:`point_like` ``p`` is calculated and **replaces the current** one. **The infinite rectangle remains unchanged.** To create the rectangle that wraps a sequence of points, start with :meth:`EMPTY_RECT` and successively include the members of the sequence. :arg p: Point to include. - :type p: :ref:`Point` + :type p: :data:`point_like` .. method:: get_area([unit]) - Calculate the area of the rectangle and, with no parameter, equals *abs(rect)*. Like an empty rectangle, the area of an infinite rectangle is also zero. So, at least one of *fitz.Rect(p1, p2)* and *fitz.Rect(p2, p1)* has a zero area. + Calculate the area of the rectangle and, with no parameter, equals *abs(rect)*. Like an empty rectangle, the area of an infinite rectangle is also zero. So, at least one of *pymupdf.Rect(p1, p2)* and *pymupdf.Rect(p2, p1)* has a zero area. :arg str unit: Specify required unit: respective squares of *px* (pixels, default), *in* (inches), *cm* (centimeters), or *mm* (millimeters). :rtype: float .. method:: contains(x) - Checks whether *x* is contained in the rectangle. It may be an *IRect*, *Rect*, *Point* or number. If *x* is an empty rectangle, this is always true. If the rectangle is empty this is always *False* for all non-empty rectangles and for all points. `x in rect` and `rect.contains(x)` are equivalent. + Checks whether *x* is contained in the rectangle. It may be an *IRect*, *Rect*, *Point* or number. If *x* is an empty rectangle, this is always true. If the rectangle is empty this is always ``False`` for all non-empty rectangles and for all points. `x in rect` and `rect.contains(x)` are equivalent. :arg x: the object to check. :type x: :data:`rect_like` or :data:`point_like`. @@ -176,7 +176,7 @@ The following remarks are also valid for :ref:`IRect` objects: .. method:: intersects(r) - Checks whether the rectangle and a :data:`rect_like` "r" contain a common non-empty :ref:`Rect`. This will always be *False* if either is infinite or empty. + Checks whether the rectangle and a :data:`rect_like` "r" contain a common non-empty :ref:`Rect`. This will always be ``False`` if either is infinite or empty. :arg rect_like r: the rectangle to check. diff --git a/docs/requirements.txt b/docs/requirements.txt index 15bf52306..cb20fd944 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,4 +3,7 @@ rst2pdf # define sphinx versioning sphinx==5.3.0 furo -readthedocs-sphinx-search==0.1.1 +readthedocs-sphinx-search==0.3.2 +sphinx_copybutton +sphinx-notfound-page +sphinxcontrib-googleanalytics diff --git a/docs/resources.rst b/docs/resources.rst new file mode 100644 index 000000000..37be56dec --- /dev/null +++ b/docs/resources.rst @@ -0,0 +1,39 @@ + +.. include:: header.rst + + +Resources +============= + +**PyMuPDF Pro** +-------------------- + + +For **Office** file support `try PyMuPDF Pro `. + + +| +---- + + +Find out about **PyMuPDF Utilities** +------------------------------------------------- + +The :title:`GitHub` repository `PyMuPDF-Utilities `_ contains a full range of examples, demonstrations and use cases. + +| +---- + + +.. _pdf2docx_conversion: + +Do you need |PDF| to **DOCX** conversion? +-------------------------------------------------- + +We recommend the pdf2docx_ library which uses |PyMuPDF| and the **python-docx** library to provide simple document conversion from |PDF| to **DOCX** format. + + + + + +.. include:: footer.rst \ No newline at end of file diff --git a/docs/samples/annotations-freetext1.py b/docs/samples/annotations-freetext1.py new file mode 100644 index 000000000..5d6f24a29 --- /dev/null +++ b/docs/samples/annotations-freetext1.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +import pymupdf + +# some colors +blue = (0, 0, 1) +green = (0, 1, 0) +red = (1, 0, 0) +gold = (1, 1, 0) + +# a new PDF with 1 page +doc = pymupdf.open() +page = doc.new_page() + +# 3 rectangles, same size, above each other +r1 = pymupdf.Rect(100, 100, 200, 150) +r2 = r1 + (0, 75, 0, 75) +r3 = r2 + (0, 75, 0, 75) + +# the text, Latin alphabet +t = "¡Un pequeño texto para practicar!" + +# add 3 annots, modify the last one somewhat +a1 = page.add_freetext_annot(r1, t, text_color=red) +a2 = page.add_freetext_annot(r2, t, fontname="Ti", text_color=blue) +a3 = page.add_freetext_annot(r3, t, fontname="Co", text_color=blue, rotate=90) +a3.set_border(width=0) +a3.update(fontsize=8, fill_color=gold) + +# save the PDF +doc.save("a-freetext.pdf") diff --git a/docs/samples/annotations-freetext2.py b/docs/samples/annotations-freetext2.py new file mode 100644 index 000000000..e48a3ae03 --- /dev/null +++ b/docs/samples/annotations-freetext2.py @@ -0,0 +1,50 @@ +import pymupdf + +"""Use rich text for FreeText annotations""" + +# define an overall styling +ds = """font-size: 11pt; font-family: sans-serif;""" + +# some special characters +bullet = chr(0x2610) + chr(0x2611) + chr(0x2612) + +# the annotation text with HTML and styling syntax +text = f"""

+PyMuPDF འདི་ ཡིག་ཆ་བཀྲམ་སྤེལ་གྱི་དོན་ལུ་ པའི་ཐོན་ཐུམ་སྒྲིལ་དྲག་ཤོས་དང་མགྱོགས་ཤོས་ཅིག་ཨིན། +Here is some bold and italic text, followed by bold-italic. Text-based check boxes: {bullet}. +

""" + +# here are some colors +gold = (1, 1, 0) +green = (0, 1, 0) + +# new/empty PDF +doc = pymupdf.open() + +# make a page in ISO-A4 format +page = doc.new_page() + +# text goes into this: +rect = pymupdf.Rect(100, 100, 350, 200) + +# define some points for callout lines +p2 = rect.tr + (50, 30) +p3 = p2 + (0, 30) + +# define the annotation +annot = page.add_freetext_annot( + rect, + text, + fill_color=gold, # fill color + opacity=1, # non-transparent + rotate=0, # no rotation + border_width=1, # border and callout line width + dashes=None, # no dashing + richtext=True, # this is rich text + style=ds, # my styling default + callout=(p3, p2, rect.tr), # define end, knee, start points + line_end=pymupdf.PDF_ANNOT_LE_OPEN_ARROW, # symbol shown at p3 + border_color=green, +) + +doc.save(__file__.replace(".py", ".pdf"), pretty=True) diff --git a/docs/samples/annotations-ink.py b/docs/samples/annotations-ink.py new file mode 100644 index 000000000..035147651 --- /dev/null +++ b/docs/samples/annotations-ink.py @@ -0,0 +1,42 @@ +import math +import pymupdf + +#------------------------------------------------------------------------------ +# preliminary stuff: create function value lists for sine and cosine +#------------------------------------------------------------------------------ +w360 = math.pi * 2 # go through full circle +deg = w360 / 360 # 1 degree as radians +rect = pymupdf.Rect(100,200, 300, 300) # use this rectangle +first_x = rect.x0 # x starts from left +first_y = rect.y0 + rect.height / 2. # rect middle means y = 0 +x_step = rect.width / 360 # rect width means 360 degrees +y_scale = rect.height / 2. # rect height means 2 +sin_points = [] # sine values go here +cos_points = [] # cosine values go here +for x in range(362): # now fill in the values + x_coord = x * x_step + first_x # current x coordinate + y = -math.sin(x * deg) # sine + p = (x_coord, y * y_scale + first_y) # corresponding point + sin_points.append(p) # append + y = -math.cos(x * deg) # cosine + p = (x_coord, y * y_scale + first_y) # corresponding point + cos_points.append(p) # append + +#------------------------------------------------------------------------------ +# create the document with one page +#------------------------------------------------------------------------------ +doc = pymupdf.open() # make new PDF +page = doc.new_page() # give it a page + +#------------------------------------------------------------------------------ +# add the Ink annotation, consisting of 2 curve segments +#------------------------------------------------------------------------------ +annot = page.add_ink_annot((sin_points, cos_points)) +# let it look a little nicer +annot.set_border(width=0.3, dashes=[1,]) # line thickness, some dashing +annot.set_colors(stroke=(0,0,1)) # make the lines blue +annot.update() # update the appearance + +page.draw_rect(rect, width=0.3) # only to demonstrate we did OK + +doc.save("a-inktest.pdf") diff --git a/docs/samples/code-printer.py b/docs/samples/code-printer.py index 8aa54b4a1..c66243cdb 100644 --- a/docs/samples/code-printer.py +++ b/docs/samples/code-printer.py @@ -6,7 +6,7 @@ source codes. The following features are included as a specialty: -1. HTML source for fitz.Story created via Python API exclusively +1. HTML source for pymupdf.Story created via Python API exclusively 2. Separate Story objects for page headers and footers 3. Use of HTML "id" elements for identifying source start pages 4. Generate a Table of Contents pointing to source file starts. This @@ -18,12 +18,12 @@ import os import time -import fitz +import pymupdf THISDIR = os.path.dirname(os.path.abspath(__file__)) TOC = [] # this will contain the TOC list items CURRENT_ID = "" # currently processed filename - stored by recorder func -MEDIABOX = fitz.paper_rect("a4-l") # chosen page size +MEDIABOX = pymupdf.paper_rect("a4-l") # chosen page size WHERE = MEDIABOX + (36, 50, -36, -36) # sub rectangle for source content # location of the header rectangle HDR_WHERE = (36, 5, MEDIABOX.width - 36, 40) @@ -59,10 +59,10 @@ def recorder(elpos): def header_story(text): """Make the page header""" - header = fitz.Story() + header = pymupdf.Story() hdr_body = header.body hdr_body.add_paragraph().set_properties( - align=fitz.fitz.TEXT_ALIGN_CENTER, + align=pymupdf.TEXT_ALIGN_CENTER, bgcolor="#eee", font="sans-serif", bold=True, @@ -74,11 +74,11 @@ def header_story(text): def footer_story(text): """Make the page footer""" - footer = fitz.Story() + footer = pymupdf.Story() ftr_body = footer.body ftr_body.add_paragraph().set_properties( bgcolor="#eee", - align=fitz.TEXT_ALIGN_CENTER, + align=pymupdf.TEXT_ALIGN_CENTER, color="blue", fontsize=10, font="sans-serif", @@ -90,12 +90,12 @@ def code_printer(outfile): """Output the generated PDF to outfile.""" global MAX_TITLE_LEN where = +WHERE - writer = fitz.DocumentWriter(outfile, "") + writer = pymupdf.DocumentWriter(outfile, "") print_time = time.strftime("%Y-%m-%d %H:%M:%S (%z)") thispath = os.path.abspath(os.curdir) basename = os.path.basename(thispath) - story = fitz.Story() + story = pymupdf.Story() body = story.body body.set_properties(font="sans-serif") @@ -126,7 +126,7 @@ def code_printer(outfile): ).set_fontsize(10).add_text(text) # Indicate end of a source file - body.add_paragraph().set_align(fitz.TEXT_ALIGN_CENTER).add_text( + body.add_paragraph().set_align(pymupdf.TEXT_ALIGN_CENTER).add_text( f"---------- End of File '{code_file}' ----------" ) i += 1 # update file counter @@ -179,17 +179,17 @@ def code_printer(outfile): t0 = time.perf_counter() code_printer(fileptr1) # make the PDF t1 = time.perf_counter() - doc = fitz.open("pdf", fileptr1) + doc = pymupdf.open("pdf", fileptr1) old_count = doc.page_count # ----------------------------------------------------------------------------- # Post-processing step to make / insert the toc - # This also works using fitz.Story: + # This also works using pymupdf.Story: # - make a new PDF in memory which contains pages with the TOC text # - add these TOC pages to the end of the original file # - search item text on the inserted pages and cover each with a PDF link # - move the TOC pages to the front of the document # ----------------------------------------------------------------------------- - story = fitz.Story() + story = pymupdf.Story() body = story.body body.add_header(1).set_font("sans-serif").add_text("Table of Contents") # prefix TOC with an entry pointing to this page @@ -200,7 +200,7 @@ def code_printer(outfile): item[1] + f" - ({item[2]})" ) fileptr2 = io.BytesIO() # put TOC pages to a separate PDF initially - writer = fitz.DocumentWriter(fileptr2) + writer = pymupdf.DocumentWriter(fileptr2) i = 1 more = 1 while more: @@ -221,20 +221,22 @@ def code_printer(outfile): i += 1 writer.close() - doc2 = fitz.open("pdf", fileptr2) # open TOC pages as another PDF + doc2 = pymupdf.open("pdf", fileptr2) # open TOC pages as another PDF doc.insert_pdf(doc2) # and append to the main PDF new_range = range(old_count, doc.page_count) # the TOC page numbers pages = [doc[i] for i in new_range] # these are the TOC pages within main PDF for item in TOC: # search for TOC item text to get its rectangle for page in pages: - rl = page.search_for(item[1], flags=~fitz.TEXT_PRESERVE_LIGATURES) + rl = page.search_for(item[1], flags=pymupdf.TEXTFLAGS_SEARCH) if rl != []: # this text must be on next page break + else: + assert 0, f'Cannot find {item[1]=} in {len(pages)=}.' rect = rl[0] # rectangle of TOC item text link = { # make a link from it - "kind": fitz.LINK_GOTO, + "kind": pymupdf.LINK_GOTO, "from": rect, - "to": fitz.Point(0, item[3]), + "to": pymupdf.Point(0, item[3]), "page": item[2] - 1, } page.insert_link(link) diff --git a/docs/samples/filmfestival-sql.py b/docs/samples/filmfestival-sql.py index 66bce9d4d..ec949471b 100644 --- a/docs/samples/filmfestival-sql.py +++ b/docs/samples/filmfestival-sql.py @@ -31,7 +31,7 @@ import os import sqlite3 -import fitz +import pymupdf # ---------------------------------------------------------------------- # HTML template for the film report @@ -73,7 +73,7 @@ # ------------------------------------------------------------------- # define the HTML Story and fill it with database data # ------------------------------------------------------------------- -story = fitz.Story(festival_template) +story = pymupdf.Story(festival_template) body = story.body # access the HTML body detail template = body.find(None, "id", "filmtemplate") # find the template part @@ -100,8 +100,8 @@ # ------------------------------------------------------------------- # generate the PDF # ------------------------------------------------------------------- -writer = fitz.DocumentWriter(__file__.replace(".py", ".pdf"), "compress") -mediabox = fitz.paper_rect("a4") # use pages in ISO-A4 format +writer = pymupdf.DocumentWriter(__file__.replace(".py", ".pdf"), "compress") +mediabox = pymupdf.paper_rect("a4") # use pages in ISO-A4 format where = mediabox + (72, 36, -36, -72) # leave page borders more = 1 # end of output indicator diff --git a/docs/samples/json-example.py b/docs/samples/json-example.py index f6fe870f5..bc7b69866 100644 --- a/docs/samples/json-example.py +++ b/docs/samples/json-example.py @@ -1,4 +1,4 @@ -import fitz +import pymupdf import json my_json = """ @@ -164,11 +164,11 @@ # the result is a Python dictionary: my_dict = json.loads(my_json) -MEDIABOX = fitz.paper_rect("letter") # output page format: Letter +MEDIABOX = pymupdf.paper_rect("letter") # output page format: Letter WHERE = MEDIABOX + (36, 36, -36, -36) -writer = fitz.DocumentWriter("json-example.pdf") # create the writer +writer = pymupdf.DocumentWriter("json-example.pdf") # create the writer -story = fitz.Story() +story = pymupdf.Story() body = story.body for i, entry in enumerate(my_dict): diff --git a/docs/samples/make-bold.py b/docs/samples/make-bold.py deleted file mode 100644 index 330fe8bdd..000000000 --- a/docs/samples/make-bold.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Problem: Since MuPDF v1.16 a 'Freetext' annotation font is restricted to the -"normal" versions (no bold, no italics) of Times-Roman, Helvetica, Courier. -It is impossible to use PyMuPDF to modify this. - -Solution: Using Adobe's JavaScript API, it is possible to manipulate properties -of Freetext annotations. Check out these references: -https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/js_api_reference.pdf, -or https://www.adobe.com/devnet/acrobat/documentation.html. - -Function 'this.getAnnots()' will return all annotations as an array. We loop -over this array to set the properties of the text through the 'richContents' -attribute. -There is no explicit property to set text to bold, but it is possible to set -fontWeight=800 (400 is the normal size) of richContents. -Other attributes, like color, italics, etc. can also be set via richContents. - -If we have 'FreeText' annotations created with PyMuPDF, we can make use of this -JavaScript feature to modify the font - thus circumventing the above restriction. - -Use PyMuPDF v1.16.12 to create a push button that executes a Javascript -containing the desired code. This is what this program does. -Then open the resulting file with Adobe reader (!). -After clicking on the button, all Freetext annotations will be bold, and the -file can be saved. -If desired, the button can be removed again, using free tools like PyMuPDF or -PDF XChange editor. - -Note / Caution: ---------------- -The JavaScript will **only** work if the file is opened with Adobe Acrobat reader! -When using other PDF viewers, the reaction is unforeseeable. -""" -import sys - -import fitz - -# this JavaScript will execute when the button is clicked: -jscript = """ -var annt = this.getAnnots(); -annt.forEach(function (item, index) { - try { - var span = item.richContents; - span.forEach(function (it, dx) { - it.fontWeight = 800; - }) - item.richContents = span; - } catch (err) {} -}); -app.alert('Done'); -""" -i_fn = sys.argv[1] # input file name -o_fn = "bold-" + i_fn # output filename -doc = fitz.open(i_fn) # open input -page = doc[0] # get desired page - -# ------------------------------------------------ -# make a push button for invoking the JavaScript -# ------------------------------------------------ - -widget = fitz.Widget() # create widget - -# make it a 'PushButton' -widget.field_type = fitz.PDF_WIDGET_TYPE_BUTTON -widget.field_flags = fitz.PDF_BTN_FIELD_IS_PUSHBUTTON - -widget.rect = fitz.Rect(5, 5, 20, 20) # button position - -widget.script = jscript # fill in JavaScript source text -widget.field_name = "Make bold" # arbitrary name -widget.field_value = "Off" # arbitrary value -widget.fill_color = (0, 0, 1) # make button visible - -annot = page.add_widget(widget) # add the widget to the page -doc.save(o_fn) # output the file diff --git a/docs/samples/multiprocess-gui.py b/docs/samples/multiprocess-gui.py index 640330e08..3435e45f4 100644 --- a/docs/samples/multiprocess-gui.py +++ b/docs/samples/multiprocess-gui.py @@ -16,7 +16,7 @@ import time import multiprocessing as mp import queue -import fitz +import pymupdf ''' PyQt and PySide namespace unifier shim https://www.pythonguis.com/faq/pyqt6-vs-pyside6/ @@ -168,7 +168,7 @@ def closeEvent(self, event): def openDocInProcess(path, queNum, quePageInfo): start = my_timer() - doc = fitz.open(path) + doc = pymupdf.open(path) end = my_timer() quePageInfo.put(doc.page_count) while True: diff --git a/docs/samples/multiprocess-render.py b/docs/samples/multiprocess-render.py index 1174b02dd..761ea351c 100644 --- a/docs/samples/multiprocess-render.py +++ b/docs/samples/multiprocess-render.py @@ -15,7 +15,7 @@ import os import time from multiprocessing import Pool, cpu_count -import fitz +import pymupdf # choose a version specific timer function (bytes == str in Python 2) mytime = time.clock if str is bytes else time.perf_counter @@ -44,7 +44,7 @@ def render_page(vector): cpu = vector[1] # number of CPUs filename = vector[2] # document filename mat = vector[3] # the matrix for rendering - doc = fitz.open(filename) # open the document + doc = pymupdf.open(filename) # open the document num_pages = doc.page_count # get number of pages # pages per segment: make sure that cpu * seg_size >= num_pages! @@ -64,7 +64,7 @@ def render_page(vector): if __name__ == "__main__": t0 = mytime() # start a timer filename = sys.argv[1] - mat = fitz.Matrix(0.2, 0.2) # the rendering matrix: scale down to 20% + mat = pymupdf.Matrix(0.2, 0.2) # the rendering matrix: scale down to 20% cpu = cpu_count() # make vectors of arguments for the processes diff --git a/docs/samples/national-capitals.py b/docs/samples/national-capitals.py index 0b7ae52c2..d417ea85a 100644 --- a/docs/samples/national-capitals.py +++ b/docs/samples/national-capitals.py @@ -14,12 +14,13 @@ import sqlite3 import sys -import fitz +import pymupdf """ Table data. Used to populate a temporary SQL database, which will be processed by the script. Its only purpose is to avoid carrying around a separate database file. """ +# codespell:ignore-begin table_data = """China;Beijing;21542000;1.5%;2018 Japan;Tokyo;13921000;11.2%;2019 DR Congo;Kinshasa;12691000;13.2%;2017 @@ -260,6 +261,7 @@ Cocos (Keeling) Islands (Australia);West Island;134;24.6%;2011 Pitcairn Islands (UK);Adamstown;40;100.0%;2021 South Georgia and the South Sandwich Islands (UK);King Edward Point;22;73.3%;2018""" +# codespell:ignore-end # ------------------------------------------------------------------- # HTML template for the report. We define no table header items @@ -312,7 +314,7 @@ def recorder(elpos): if elpos.id not in ("row", "country", "capital", "population", "percent", "year"): return # only look at row / cell content - rect = fitz.Rect(elpos.rect) # cell rectangle + rect = pymupdf.Rect(elpos.rect) # cell rectangle if rect.y1 > elpos.filled: # ignore stuff below the filled rectangle return @@ -355,7 +357,7 @@ def recorder(elpos): # ------------------------------------------------------------------- # define the HTML Story and fill it with database data # ------------------------------------------------------------------- -story = fitz.Story(HTML, user_css=CSS) +story = pymupdf.Story(HTML, user_css=CSS) body = story.body # access the HTML body detail template = body.find(None, "id", "row") # find the template part @@ -384,8 +386,8 @@ def recorder(elpos): # generate the PDF and write it to memory # ------------------------------------------------------------------- fp = io.BytesIO() -writer = fitz.DocumentWriter(fp) -mediabox = fitz.paper_rect("letter") # use pages in Letter format +writer = pymupdf.DocumentWriter(fp) +mediabox = pymupdf.paper_rect("letter") # use pages in Letter format where = mediabox + (36, 36, -36, -72) # leave page borders more = True page = 0 @@ -405,7 +407,7 @@ def recorder(elpos): # ------------------------------------------------------------------- # re-open memory PDF for inserting gridlines and header rows # ------------------------------------------------------------------- -doc = fitz.open("pdf", fp) +doc = pymupdf.open("pdf", fp) for page in doc: page.wrap_contents() # ensure all "cm" commands are properly wrapped x, y, x1, y0 = coords[page.number] # read coordinates of the page @@ -435,11 +437,11 @@ def recorder(elpos): # Write page footer y0 = page.rect.height - 50 # top coordinate of footer bbox - bbox = fitz.Rect(0, y0, page.rect.width, y0 + 20) # footer bbox + bbox = pymupdf.Rect(0, y0, page.rect.width, y0 + 20) # footer bbox page.insert_textbox( bbox, f"World Capital Cities, Page {page.number+1} of {doc.page_count}", - align=fitz.TEXT_ALIGN_CENTER, + align=pymupdf.TEXT_ALIGN_CENTER, ) shape.finish(width=0.3, color=0.5, fill=0.9) # rectangles and gray lines shape.commit(overlay=False) # put the drawings in background diff --git a/docs/samples/new-annots.py b/docs/samples/new-annots.py index 74d459d89..e879a4bfa 100644 --- a/docs/samples/new-annots.py +++ b/docs/samples/new-annots.py @@ -20,10 +20,10 @@ import gc import sys -import fitz +import pymupdf -print(fitz.__doc__) -if fitz.VersionBind.split(".") < ["1", "17", "0"]: +print(pymupdf.__doc__) +if pymupdf.VersionBind.split(".") < ["1", "17", "0"]: sys.exit("PyMuPDF v1.17.0+ is needed.") gc.set_debug(gc.DEBUG_UNCOLLECTABLE) @@ -37,8 +37,8 @@ gold = (1, 1, 0) green = (0, 1, 0) -displ = fitz.Rect(0, 50, 0, 50) -r = fitz.Rect(72, 72, 220, 100) +displ = pymupdf.Rect(0, 50, 0, 50) +r = pymupdf.Rect(72, 72, 220, 100) t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!" @@ -49,7 +49,7 @@ def print_descr(annot): ) -doc = fitz.open() +doc = pymupdf.open() page = doc.new_page() page.set_rotation(0) @@ -65,7 +65,7 @@ def print_descr(annot): rotate=90, text_color=blue, fill_color=gold, - align=fitz.TEXT_ALIGN_CENTER, + align=pymupdf.TEXT_ALIGN_CENTER, ) annot.set_border(width=0.3, dashes=[2]) annot.update(text_color=blue, fill_color=gold) @@ -81,36 +81,36 @@ def print_descr(annot): page.insert_text( pos, # insertion point highlight, # inserted text - morph=(pos, fitz.Matrix(-5)), # rotate around insertion point + morph=(pos, pymupdf.Matrix(-5)), # rotate around insertion point ) rl = page.search_for(highlight, quads=True) # need a quad b/o tilted text annot = page.add_highlight_annot(rl[0]) print_descr(annot) pos = annot.rect.bl # next insertion point -page.insert_text(pos, underline, morph=(pos, fitz.Matrix(-10))) +page.insert_text(pos, underline, morph=(pos, pymupdf.Matrix(-10))) rl = page.search_for(underline, quads=True) annot = page.add_underline_annot(rl[0]) print_descr(annot) pos = annot.rect.bl -page.insert_text(pos, strikeout, morph=(pos, fitz.Matrix(-15))) +page.insert_text(pos, strikeout, morph=(pos, pymupdf.Matrix(-15))) rl = page.search_for(strikeout, quads=True) annot = page.add_strikeout_annot(rl[0]) print_descr(annot) pos = annot.rect.bl -page.insert_text(pos, squiggled, morph=(pos, fitz.Matrix(-20))) +page.insert_text(pos, squiggled, morph=(pos, pymupdf.Matrix(-20))) rl = page.search_for(squiggled, quads=True) annot = page.add_squiggly_annot(rl[0]) print_descr(annot) pos = annot.rect.bl -r = fitz.Rect(pos, pos.x + 75, pos.y + 35) + (0, 20, 0, 20) +r = pymupdf.Rect(pos, pos.x + 75, pos.y + 35) + (0, 20, 0, 20) annot = page.add_polyline_annot([r.bl, r.tr, r.br, r.tl]) # 'Polyline' annot.set_border(width=0.3, dashes=[2]) annot.set_colors(stroke=blue, fill=green) -annot.set_line_ends(fitz.PDF_ANNOT_LE_CLOSED_ARROW, fitz.PDF_ANNOT_LE_R_CLOSED_ARROW) +annot.set_line_ends(pymupdf.PDF_ANNOT_LE_CLOSED_ARROW, pymupdf.PDF_ANNOT_LE_R_CLOSED_ARROW) annot.update(fill_color=(1, 1, 0)) print_descr(annot) @@ -118,7 +118,7 @@ def print_descr(annot): annot = page.add_polygon_annot([r.bl, r.tr, r.br, r.tl]) # 'Polygon' annot.set_border(width=0.3, dashes=[2]) annot.set_colors(stroke=blue, fill=gold) -annot.set_line_ends(fitz.PDF_ANNOT_LE_DIAMOND, fitz.PDF_ANNOT_LE_CIRCLE) +annot.set_line_ends(pymupdf.PDF_ANNOT_LE_DIAMOND, pymupdf.PDF_ANNOT_LE_CIRCLE) annot.update() print_descr(annot) @@ -126,7 +126,7 @@ def print_descr(annot): annot = page.add_line_annot(r.tr, r.bl) # 'Line' annot.set_border(width=0.3, dashes=[2]) annot.set_colors(stroke=blue, fill=gold) -annot.set_line_ends(fitz.PDF_ANNOT_LE_DIAMOND, fitz.PDF_ANNOT_LE_CIRCLE) +annot.set_line_ends(pymupdf.PDF_ANNOT_LE_DIAMOND, pymupdf.PDF_ANNOT_LE_CIRCLE) annot.update() print_descr(annot) @@ -161,7 +161,7 @@ def print_descr(annot): r, "This content will be removed upon applying the redaction.", color=blue, - align=fitz.TEXT_ALIGN_CENTER, + align=pymupdf.TEXT_ALIGN_CENTER, ) annot = page.add_redact_annot(r) print_descr(annot) diff --git a/docs/samples/quickfox-image-no-go.py b/docs/samples/quickfox-image-no-go.py index c370dfb5c..952e2142d 100644 --- a/docs/samples/quickfox-image-no-go.py +++ b/docs/samples/quickfox-image-no-go.py @@ -47,7 +47,7 @@ import io import os import zipfile -import fitz +import pymupdf thisdir = os.path.dirname(os.path.abspath(__file__)) @@ -75,11 +75,11 @@ def analyze_page(page): """ prect = page.rect # page rectangle - will be our MEDIABOX later where = prect + (BORDER, BORDER, -BORDER, -BORDER) - TABLE = fitz.make_table(where, rows=1, cols=COLS) + TABLE = pymupdf.make_table(where, rows=1, cols=COLS) # extract rectangles covered by images on this page IMG_RECTS = sorted( # image rects on page (sort top-left to bottom-right) - [fitz.Rect(item["bbox"]) for item in page.get_image_info()], + [pymupdf.Rect(item["bbox"]) for item in page.get_image_info()], key=lambda b: (b.y1, b.x0), ) @@ -101,7 +101,7 @@ def free_cells(column): free_stripes.append((column.y0, column.y1)) # make available cells of this column - CELLS = [fitz.Rect(column.x0, y0, column.x1, y1) for (y0, y1) in free_stripes] + CELLS = [pymupdf.Rect(column.x0, y0, column.x1, y1) for (y0, y1) in free_stripes] return CELLS # collection of available Story rectangles on page @@ -117,7 +117,7 @@ def free_cells(column): # -------------------------------------------------------------- # Make the Story object # -------------------------------------------------------------- -story = fitz.Story(HTML) +story = pymupdf.Story(HTML) # modify the DOM somewhat body = story.body # access HTML body @@ -140,14 +140,14 @@ def free_cells(column): img = next_img page_info = {} # contains MEDIABOX and free CELLS per page -doc = fitz.open(docname) +doc = pymupdf.open(docname) for page in doc: pno, mediabox, cells = analyze_page(page) page_info[pno] = (mediabox, cells) doc.close() # close target PDF for now - re-open later fileobject = io.BytesIO() # let DocumentWriter write to memory -writer = fitz.DocumentWriter(fileobject) # define output writer +writer = pymupdf.DocumentWriter(fileobject) # define output writer more = 1 # stop if this ever becomes zero pno = 0 # count output pages @@ -169,8 +169,8 @@ def free_cells(column): # Re-open writer output, read its pages and overlay target pages with them. # The generated pages have same dimension as their targets. -src = fitz.open("pdf", fileobject) -doc = fitz.open(doc.name) +src = pymupdf.open("pdf", fileobject) +doc = pymupdf.open(doc.name) for page in doc: # overlay every target page with the prepared text if page.number >= src.page_count: print(f"Text only uses {src.page_count} target pages!") diff --git a/docs/samples/quickfox.py b/docs/samples/quickfox.py index 27ceab298..b765d94d1 100644 --- a/docs/samples/quickfox.py +++ b/docs/samples/quickfox.py @@ -18,17 +18,17 @@ import io import os import zipfile -import fitz +import pymupdf thisdir = os.path.dirname(os.path.abspath(__file__)) myzip = zipfile.ZipFile(os.path.join(thisdir, "quickfox.zip")) -arch = fitz.Archive(myzip) +arch = pymupdf.Archive(myzip) -if fitz.fitz_fontdescriptors: +if pymupdf.fitz_fontdescriptors: # we want to use the Ubuntu fonts for sans-serif and for monospace - CSS = fitz.css_for_pymupdf_font("ubuntu", archive=arch, name="sans-serif") - CSS = fitz.css_for_pymupdf_font("ubuntm", CSS=CSS, archive=arch, name="monospace") + CSS = pymupdf.css_for_pymupdf_font("ubuntu", archive=arch, name="sans-serif") + CSS = pymupdf.css_for_pymupdf_font("ubuntm", CSS=CSS, archive=arch, name="monospace") else: # No pymupdf-fonts available. CSS="" @@ -38,7 +38,7 @@ HTML = myzip.read("quickfox.html").decode() # make the Story object -story = fitz.Story(HTML, user_css=CSS, archive=arch) +story = pymupdf.Story(HTML, user_css=CSS, archive=arch) # -------------------------------------------------------------- # modify the DOM somewhat @@ -56,7 +56,7 @@ para = para.find_next("p", None, None) # choose PDF page size -MEDIABOX = fitz.paper_rect("letter") +MEDIABOX = pymupdf.paper_rect("letter") # text appears only within this subrectangle WHERE = MEDIABOX + (36, 36, -36, -36) @@ -65,12 +65,12 @@ # -------------------------------------------------------------- COLS = 2 # layout: 2 cols 1 row ROWS = 1 -TABLE = fitz.make_table(WHERE, cols=COLS, rows=ROWS) +TABLE = pymupdf.make_table(WHERE, cols=COLS, rows=ROWS) # fill the cells of each page in this sequence: CELLS = [TABLE[i][j] for i in range(ROWS) for j in range(COLS)] fileobject = io.BytesIO() # let DocumentWriter write to memory -writer = fitz.DocumentWriter(fileobject) # define the writer +writer = pymupdf.DocumentWriter(fileobject) # define the writer more = 1 while more: # loop until all input text has been written out @@ -85,5 +85,5 @@ writer.close() # close DocumentWriter output # for housekeeping work re-open from memory -doc = fitz.open("pdf", fileobject) +doc = pymupdf.open("pdf", fileobject) doc.ez_save(docname) diff --git a/docs/samples/showpdf-page.py b/docs/samples/showpdf-page.py index 44400ebbb..4f74dac7e 100644 --- a/docs/samples/showpdf-page.py +++ b/docs/samples/showpdf-page.py @@ -2,14 +2,14 @@ Demo of Story class in PyMuPDF ------------------------------- -This script demonstrates how to the results of a fitz.Story output can be +This script demonstrates how to the results of a pymupdf.Story output can be placed in a rectangle of an existing (!) PDF page. """ import io import os -import fitz +import pymupdf def make_pdf(fileptr, text, rect, font="sans-serif", archive=None): @@ -20,7 +20,7 @@ def make_pdf(fileptr, text, rect, font="sans-serif", archive=None): text: the text to output (HTML format) rect: the target rectangle. Will use its width / height as mediabox font: (str) font family name, default sans-serif - archive: fitz.Archive parameter. To be used if e.g. images or special + archive: pymupdf.Archive parameter. To be used if e.g. images or special fonts should be used. Returns: The matrix to convert page rectangles of the created PDF back @@ -31,14 +31,14 @@ def make_pdf(fileptr, text, rect, font="sans-serif", archive=None): changed parameters. """ # use input rectangle as the page dimension - mediabox = fitz.Rect(0, 0, rect.width, rect.height) + mediabox = pymupdf.Rect(0, 0, rect.width, rect.height) # this matrix converts mediabox back to input rect matrix = mediabox.torect(rect) - story = fitz.Story(text, archive=archive) + story = pymupdf.Story(text, archive=archive) body = story.body body.set_properties(font=font) - writer = fitz.DocumentWriter(fileptr) + writer = pymupdf.DocumentWriter(fileptr) while True: device = writer.begin_page(mediabox) more, _ = story.place(mediabox) @@ -61,10 +61,10 @@ def make_pdf(fileptr, text, rect, font="sans-serif", archive=None): # Make a PDF page for demo purposes root = os.path.abspath( f"{__file__}/..") -doc = fitz.open(f"{root}/mupdf-title.pdf") +doc = pymupdf.open(f"{root}/mupdf-title.pdf") page = doc[0] -WHERE = fitz.Rect(50, 100, 250, 500) # target rectangle on existing page +WHERE = pymupdf.Rect(50, 100, 250, 500) # target rectangle on existing page fileptr = io.BytesIO() # let DocumentWriter use this as its file @@ -72,7 +72,7 @@ def make_pdf(fileptr, text, rect, font="sans-serif", archive=None): # call DocumentWriter and Story to fill our rectangle matrix = make_pdf(fileptr, HTML, WHERE) # ------------------------------------------------------------------- -src = fitz.open("pdf", fileptr) # open DocumentWriter output PDF +src = pymupdf.open("pdf", fileptr) # open DocumentWriter output PDF if src.page_count > 1: # target rect was too small raise ValueError("target WHERE too small") diff --git a/docs/samples/simple-grid.py b/docs/samples/simple-grid.py index 84dbd279e..f8f25bf40 100644 --- a/docs/samples/simple-grid.py +++ b/docs/samples/simple-grid.py @@ -1,15 +1,15 @@ -import fitz +import pymupdf -MEDIABOX = fitz.paper_rect("letter") # output page format: Letter -GRIDSPACE = fitz.Rect(100, 100, 400, 400) -GRID = fitz.make_table(GRIDSPACE, rows=2, cols=2) +MEDIABOX = pymupdf.paper_rect("letter") # output page format: Letter +GRIDSPACE = pymupdf.Rect(100, 100, 400, 400) +GRID = pymupdf.make_table(GRIDSPACE, rows=2, cols=2) CELLS = [GRID[i][j] for i in range(2) for j in range(2)] text_table = ("A", "B", "C", "D") -writer = fitz.DocumentWriter(__file__.replace(".py", ".pdf")) # create the writer +writer = pymupdf.DocumentWriter(__file__.replace(".py", ".pdf")) # create the writer device = writer.begin_page(MEDIABOX) # make new page for i, text in enumerate(text_table): - story = fitz.Story(em=1) + story = pymupdf.Story(em=1) body = story.body with body.add_paragraph() as para: para.set_bgcolor("#ecc") diff --git a/docs/samples/story-write-stabilized-links.py b/docs/samples/story-write-stabilized-links.py index 6532d1605..47b7f86b0 100644 --- a/docs/samples/story-write-stabilized-links.py +++ b/docs/samples/story-write-stabilized-links.py @@ -1,22 +1,22 @@ """ -Demo script for PyMuPDF's `fitz.Story.write_stabilized_with_links()`. +Demo script for PyMuPDF's `pymupdf.Story.write_stabilized_with_links()`. -`fitz.Story.write_stabilized_links()` is similar to -`fitz.Story.write_stabilized()` except that it creates a PDF `fitz.Document` +`pymupdf.Story.write_stabilized_links()` is similar to +`pymupdf.Story.write_stabilized()` except that it creates a PDF `pymupdf.Document` that contains PDF links generated from all internal links in the original html. """ import textwrap -import fitz +import pymupdf def rectfn(rect_num, filled): ''' We return one rect per page. ''' - rect = fitz.Rect(10, 20, 290, 380) - mediabox = fitz.Rect(0, 0, 300, 400) + rect = pymupdf.Rect(10, 20, 290, 380) + mediabox = pymupdf.Rect(0, 0, 300, 400) #print(f'rectfn(): rect_num={rect_num} filled={filled}') return mediabox, rect, None @@ -61,6 +61,7 @@ def contentfn(positions):

First section

Contents of first section.

@@ -85,5 +86,5 @@ def contentfn(positions): out_path = __file__.replace('.py', '.pdf') -document = fitz.Story.write_stabilized_with_links(contentfn, rectfn) +document = pymupdf.Story.write_stabilized_with_links(contentfn, rectfn) document.save(out_path) diff --git a/docs/samples/story-write-stabilized.py b/docs/samples/story-write-stabilized.py index 205ab0582..334449456 100644 --- a/docs/samples/story-write-stabilized.py +++ b/docs/samples/story-write-stabilized.py @@ -1,10 +1,10 @@ """ -Demo script for PyMuPDF's `fitz.Story.write_stabilized()`. +Demo script for PyMuPDF's `pymupdf.Story.write_stabilized()`. -`fitz.Story.write_stabilized()` is similar to `fitz.Story.write()`, +`pymupdf.Story.write_stabilized()` is similar to `pymupdf.Story.write()`, except instead of taking a fixed html document, it does iterative layout of dynamically-generated html content (provided by a callback) to a -`fitz.DocumentWriter`. +`pymupdf.DocumentWriter`. For example this allows one to add a dynamically-generated table of contents section while ensuring that page numbers are patched up until stable. @@ -12,15 +12,15 @@ import textwrap -import fitz +import pymupdf def rectfn(rect_num, filled): ''' We return one rect per page. ''' - rect = fitz.Rect(10, 20, 290, 380) - mediabox = fitz.Rect(0, 0, 300, 400) + rect = pymupdf.Rect(10, 20, 290, 380) + mediabox = pymupdf.Rect(0, 0, 300, 400) #print(f'rectfn(): rect_num={rect_num} filled={filled}') return mediabox, rect, None @@ -83,6 +83,6 @@ def contentfn(positions): out_path = __file__.replace('.py', '.pdf') -writer = fitz.DocumentWriter(out_path) -fitz.Story.write_stabilized(writer, contentfn, rectfn) +writer = pymupdf.DocumentWriter(out_path) +pymupdf.Story.write_stabilized(writer, contentfn, rectfn) writer.close() diff --git a/docs/samples/story-write.py b/docs/samples/story-write.py index 005853cf1..18c9fe9fa 100644 --- a/docs/samples/story-write.py +++ b/docs/samples/story-write.py @@ -10,7 +10,7 @@ import html -import fitz +import pymupdf # Create html containing multiple copies of our own source code. @@ -62,20 +62,20 @@ def rectfn(rect_num, filled): if rect_num % 4 == 0: # New page. - mediabox = fitz.Rect(0, 0, page_w, page_h) + mediabox = pymupdf.Rect(0, 0, page_w, page_h) else: mediabox = None # Return one of four rects in turn. rect_x = margin + (rect_w+margin) * ((rect_num // 2) % 2) rect_y = margin + (rect_h+margin) * (rect_num % 2) - rect = fitz.Rect(rect_x, rect_y, rect_x + rect_w, rect_y + rect_h) + rect = pymupdf.Rect(rect_x, rect_y, rect_x + rect_w, rect_y + rect_h) #print(f'rectfn(): rect_num={rect_num} filled={filled}. Returning: rect={rect}') return mediabox, rect, None -story = fitz.Story(html, em=8) +story = pymupdf.Story(html, em=8) out_path = __file__.replace('.py', '.pdf') -writer = fitz.DocumentWriter(out_path) +writer = pymupdf.DocumentWriter(out_path) story.write(writer, rectfn) writer.close() diff --git a/docs/samples/table01.py b/docs/samples/table01.py index 4faa8619b..9cdad9da3 100644 --- a/docs/samples/table01.py +++ b/docs/samples/table01.py @@ -15,7 +15,7 @@ ------------- PyMuPDF v1.22.0 or later """ -import fitz +import pymupdf table_text = ( # the content of each table row ( @@ -96,7 +96,7 @@ } """ -story = fitz.Story(HTML, user_css=CSS) # define the Story +story = pymupdf.Story(HTML, user_css=CSS) # define the Story body = story.body # access the HTML of it template = body.find(None, "id", "row") # find the template with name "row" parent = template.parent # access its parent i.e., the @@ -111,8 +111,8 @@ template.remove() # remove the template # Story is ready - output it via a writer -writer = fitz.DocumentWriter(__file__.replace(".py", ".pdf"), "compress") -mediabox = fitz.paper_rect("letter") # size of one output page +writer = pymupdf.DocumentWriter(__file__.replace(".py", ".pdf"), "compress") +mediabox = pymupdf.paper_rect("letter") # size of one output page where = mediabox + (36, 36, -36, -36) # use this sub-area for the content more = True # detects end of output diff --git a/docs/samples/text-lister.py b/docs/samples/text-lister.py index 2b6dbe169..cc5b651b3 100644 --- a/docs/samples/text-lister.py +++ b/docs/samples/text-lister.py @@ -1,6 +1,6 @@ import sys -import fitz +import pymupdf def flags_decomposer(flags): @@ -23,7 +23,7 @@ def flags_decomposer(flags): return ", ".join(l) -doc = fitz.open(sys.argv[1]) +doc = pymupdf.open(sys.argv[1]) page = doc[0] # read page text as a dictionary, suppressing extra spaces in CJK fonts diff --git a/docs/shape.rst b/docs/shape.rst index 1e895e489..c3a3cb585 100644 --- a/docs/shape.rst +++ b/docs/shape.rst @@ -5,6 +5,8 @@ Shape ================ +|pdf_only_class| + This class allows creating interconnected graphical elements on a PDF page. Its methods have the same meaning and name as the corresponding :ref:`Page` methods. In fact, each :ref:`Page` draw method is just a convenience wrapper for (1) one shape draw method, (2) the :meth:`Shape.finish` method, and (3) the :meth:`Shape.commit` method. For page text insertion, only the :meth:`Shape.commit` method is invoked. If many draw and text operations are executed for a page, you should always consider using a Shape object. @@ -50,7 +52,7 @@ Several draw methods can be executed in a row and each one of them will contribu .. method:: __init__(self, page) - Create a new drawing. During importing PyMuPDF, the *fitz.Page* object is being given the convenience method *new_shape()* to construct a *Shape* object. During instantiation, a check will be made whether we do have a PDF page. An exception is otherwise raised. + Create a new drawing. During importing PyMuPDF, the *pymupdf.Page* object is being given the convenience method *new_shape()* to construct a *Shape* object. During instantiation, a check will be made whether we do have a PDF page. An exception is otherwise raised. :arg page: an existing page of a PDF document. :type page: :ref:`Page` @@ -86,10 +88,10 @@ Several draw methods can be executed in a row and each one of them will contribu Here is an example of three connected lines, forming a closed, filled triangle. Little arrows indicate the stroking direction. - >>> import fitz - >>> doc=fitz.open() + >>> import pymupdf + >>> doc=pymupdf.open() >>> page=doc.new_page() - >>> r = fitz.Rect(100, 100, 300, 200) + >>> r = pymupdf.Rect(100, 100, 300, 200) >>> shape=page.new_shape() >>> shape.draw_squiggle(r.tl, r.tr) >>> shape.draw_squiggle(r.tr, r.br) @@ -131,7 +133,7 @@ Several draw methods can be executed in a row and each one of them will contribu Draw a standard cubic Bézier curve from *p1* to *p4*, using *p2* and *p3* as control points. - All arguments are :data:`point_like` \s. + All arguments are :data:`point_like` objects. :rtype: :ref:`Point` :returns: the end point, *p4*. @@ -231,21 +233,6 @@ Several draw methods can be executed in a row and each one of them will contribu :rtype: :ref:`Point` :returns: :attr:`Quad.ul`. - .. index:: - pair: border_width; insert_text - pair: color; insert_text - pair: encoding; insert_text - pair: fill; insert_text - pair: fontfile; insert_text - pair: fontname; insert_text - pair: fontsize; insert_text - pair: morph; insert_text - pair: render_mode; insert_text - pair: rotate; insert_text - pair: stroke_opacity; insert_text - pair: fill_opacity; insert_text - pair: oc; insert_text - .. index:: pair: closePath; finish pair: color; finish @@ -261,7 +248,7 @@ Several draw methods can be executed in a row and each one of them will contribu pair: oc; finish - .. method:: finish(width=1, color=None, fill=None, lineCap=0, lineJoin=0, dashes=None, closePath=True, even_odd=False, morph=(fixpoint, matrix), stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: finish(width=1, color=(0,), fill=None, lineCap=0, lineJoin=0, dashes=None, closePath=True, even_odd=False, morph=(fixpoint, matrix), stroke_opacity=1, fill_opacity=1, oc=0) Finish a set of *draw*()* methods by applying :ref:`CommonParms` to all of them. @@ -269,12 +256,12 @@ Several draw methods can be executed in a row and each one of them will contribu The method also supports **morphing the compound drawing** using :ref:`Point` *fixpoint* and :ref:`matrix` *matrix*. - :arg sequence morph: morph the text or the compound drawing around some arbitrary :ref:`Point` *fixpoint* by applying :ref:`Matrix` *matrix* to it. This implies that *fixpoint* is a **fixed point** of this operation: it will not change its position. Default is no morphing (*None*). The matrix can contain any values in its first 4 components, *matrix.e == matrix.f == 0* must be true, however. This means that any combination of scaling, shearing, rotating, flipping, etc. is possible, but translations are not. + :arg sequence morph: morph the text or the compound drawing around some arbitrary :ref:`Point` *fixpoint* by applying :ref:`Matrix` *matrix* to it. This implies that *fixpoint* is a **fixed point** of this operation: it will not change its position. Default is no morphing (``None``). The matrix can contain any values in its first 4 components, *matrix.e == matrix.f == 0* must be true, however. This means that any combination of scaling, shearing, rotating, flipping, etc. is possible, but translations are not. :arg float stroke_opacity: *(new in v1.18.1)* set transparency for stroke colors. Value < 0 or > 1 will be ignored. Default is 1 (intransparent). :arg float fill_opacity: *(new in v1.18.1)* set transparency for fill colors. Default is 1 (intransparent). - :arg bool even_odd: request the **"even-odd rule"** for filling operations. Default is *False*, so that the **"nonzero winding number rule"** is used. These rules are alternative methods to apply the fill color where areas overlap. Only with fairly complex shapes a different behavior is to be expected with these rules. For an in-depth explanation, see :ref:`AdobeManual`, pp. 137 ff. Here is an example to demonstrate the difference. + :arg bool even_odd: request the **"even-odd rule"** for filling operations. Default is ``False``, so that the **"nonzero winding number rule"** is used. These rules are alternative methods to apply the fill color where areas overlap. Only with fairly complex shapes a different behavior is to be expected with these rules. For an in-depth explanation, see :ref:`AdobeManual`, pp. 137 ff. Here is an example to demonstrate the difference. :arg int oc: *(new in v1.18.4)* the :data:`xref` number of an :data:`OCG` or :data:`OCMD` to make this drawing conditionally displayable. @@ -288,23 +275,39 @@ Several draw methods can be executed in a row and each one of them will contribu Of the four shapes in above image, the top two each show three circles drawn in standard manner (anti-clockwise, look at the arrows). The lower two shapes contain one (the top-left) circle drawn clockwise. As can be seen, area orientation is irrelevant for the right column (even-odd rule). + .. index:: + pair: border_width; insert_text + pair: color; insert_text + pair: encoding; insert_text + pair: fill; insert_text + pair: fontfile; insert_text + pair: fontname; insert_text + pair: fontsize; insert_text + pair: lineheight; insert_text + pair: morph; insert_text + pair: render_mode; insert_text + pair: miter_limit; insert_text + pair: rotate; insert_text + pair: stroke_opacity; insert_text + pair: fill_opacity; insert_text + pair: oc; insert_text - .. method:: insert_text(point, text, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, lineheight=None, fill=None, render_mode=0, border_width=1, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: insert_text(point, text, *, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, lineheight=None, fill=None, render_mode=0, miter_limit=1, border_width=1, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) - Insert text lines start at *point*. + Insert text lines starting at ``point``. :arg point_like point: the bottom-left position of the first character of *text* in pixels. It is important to understand, how this works in conjunction with the *rotate* parameter. Please have a look at the following picture. The small red dots indicate the positions of *point* in each of the four possible cases. .. image:: images/img-inserttext.* :scale: 33 - :arg str/sequence text: the text to be inserted. May be specified as either a string type or as a sequence type. For sequences, or strings containing line breaks *\n*, several lines will be inserted. No care will be taken if lines are too wide, but the number of inserted lines will be limited by "vertical" space on the page (in the sense of reading direction as established by the *rotate* parameter). Any rest of *text* is discarded -- the return code however contains the number of inserted lines. + :arg str/sequence text: the text to be inserted. May be specified as either a string type or as a sequence type. For sequences, or strings containing line breaks ``\n``, several lines will be inserted. No care will be taken if lines are too wide, but the number of inserted lines will be limited by "vertical" space on the page (in the sense of reading direction as established by the *rotate* parameter). Any rest of *text* is discarded -- the return code however contains the number of inserted lines. - :arg float lineheight: a factor to override the line height calculated from font properties. If not *None*, a line height of `fontsize * lineheight` will be used. - :arg float stroke_opacity: *(new in v1.18.1)* set transparency for stroke colors. Negative values and values > 1 will be ignored. Default is 1 (intransparent). + :arg float lineheight: a factor to override the line height calculated from font properties. If not `None`, a line height of `fontsize * lineheight` will be used. + :arg float stroke_opacity: *(new in v1.18.1)* set transparency for stroke colors (the **border line** of a character). Only `0 <= value <= 1` will be considered. Default is 1 (intransparent). :arg float fill_opacity: *(new in v1.18.1)* set transparency for fill colors. Default is 1 (intransparent). Use this value to control transparency of the text color. Stroke opacity **only** affects the border line of characters. - :arg int rotate: determines whether to rotate the text. Acceptable values are multiples of 90 degrees. Default is 0 (no rotation), meaning horizontal text lines oriented from left to right. 180 means text is shown upside down from **right to left**. 90 means anti-clockwise rotation, text running **upwards**. 270 (or -90) means clockwise rotation, text running **downwards**. In any case, *point* specifies the bottom-left coordinates of the first character's rectangle. Multiple lines, if present, always follow the reading direction established by this parameter. So line 2 is located **above** line 1 in case of *rotate = 180*, etc. + :arg int rotate: determines whether to rotate the text. Acceptable values are multiples of 90 degrees. Default is 0 (no rotation), meaning horizontal text lines oriented from left to right. 180 means text is shown upside down from **right to left**. 90 means anti-clockwise rotation, text running **upwards**. 270 (or -90) means clockwise rotation, text running **downwards**. In any case, *point* specifies the bottom-left coordinates of the first character's rectangle. Multiple lines, if present, always follow the reading direction established by this parameter. So line 2 is located **above** line 1 in case of `rotate = 180`, etc. :arg int oc: *(new in v1.18.4)* the :data:`xref` number of an :data:`OCG` or :data:`OCMD` to make this text conditionally displayable. @@ -323,14 +326,16 @@ Several draw methods can be executed in a row and each one of them will contribu pair: fontfile; insert_textbox pair: fontname; insert_textbox pair: fontsize; insert_textbox + pair: lineheight; insert_textbox pair: morph; insert_textbox pair: render_mode; insert_textbox + pair: miter_limit; insert_textbox pair: rotate; insert_textbox pair: oc; insert_textbox - .. method:: insert_textbox(rect, buffer, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, fill=None, render_mode=0, border_width=1, expandtabs=8, align=TEXT_ALIGN_LEFT, rotate=0, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) + .. method:: insert_textbox(rect, buffer, *, fontsize=11, fontname="helv", fontfile=None, set_simple=False, encoding=TEXT_ENCODING_LATIN, color=None, fill=None, render_mode=0, miter_limit=1, border_width=1, expandtabs=8, align=TEXT_ALIGN_LEFT, rotate=0, lineheight=None, morph=None, stroke_opacity=1, fill_opacity=1, oc=0) - PDF only: Insert text into the specified rectangle. The text will be split into lines and words and then filled into the available space, starting from one of the four rectangle corners, which depends on *rotate*. Line feeds and multiple space will be respected. + PDF only: Insert text into the specified rectangle. The text will be split into lines and words and then filled into the available space, starting from one of the four rectangle corners, which depends on `rotate`. Line feeds and multiple space will be respected. :arg rect_like rect: the area to use. It must be finite and not empty. @@ -338,12 +343,14 @@ Several draw methods can be executed in a row and each one of them will contribu :arg int align: align each text line. Default is 0 (left). Centered, right and justified are the other supported options, see :ref:`TextAlign`. Please note that the effect of parameter value *TEXT_ALIGN_JUSTIFY* is only achievable with "simple" (single-byte) fonts (including the :ref:`Base-14-Fonts`). - :arg int expandtabs: controls handling of tab characters *\t* using the *string.expandtabs()* method **per each line**. + :arg float lineheight: a factor to override the line height calculated from font properties. If not `None`, a line height of `fontsize * lineheight` will be used. + + :arg int expandtabs: controls handling of tab characters ``\t`` using the `string.expandtabs()` method **per each line**. :arg float stroke_opacity: *(new in v1.18.1)* set transparency for stroke colors. Negative values and values > 1 will be ignored. Default is 1 (intransparent). :arg float fill_opacity: *(new in v1.18.1)* set transparency for fill colors. Default is 1 (intransparent). Use this value to control transparency of the text color. Stroke opacity **only** affects the border line of characters. - :arg int rotate: requests text to be rotated in the rectangle. This value must be a multiple of 90 degrees. Default is 0 (no rotation). Effectively, four different values are processed: 0, 90, 180 and 270 (= -90), each causing the text to start in a different rectangle corner. Bottom-left is 90, bottom-right is 180, and -90 / 270 is top-right. See the example how text is filled in a rectangle. This argument takes precedence over morphing. See the second example, which shows text first rotated left by 90 degrees and then the whole rectangle rotated clockwise around is lower left corner. + :arg int rotate: requests text to be rotated in the rectangle. This value must be a multiple of 90 degrees. Default is 0 (no rotation). Effectively, the four values `0`, `90`, `180` and `270` (= `-90`) are processed, each causing the text to start in a different rectangle corner. Bottom-left is `90`, bottom-right is `180`, and `-90 / 270` is top-right. See the example how text is filled in a rectangle. This argument takes precedence over morphing. See the second example, which shows text first rotated left by `90` degrees and then the whole rectangle rotated clockwise around is lower left corner. :arg int oc: *(new in v1.18.4)* the :data:`xref` number of an :data:`OCG` or :data:`OCMD` to make this text conditionally displayable. @@ -415,7 +422,7 @@ Several draw methods can be executed in a row and each one of them will contribu .. attribute:: rect - Rectangle surrounding drawings. This attribute is at your disposal and may be changed at any time. Its value is set to *None* when a shape is created or committed. Every *draw** method, and :meth:`Shape.insert_textbox` update this property (i.e. **enlarge** the rectangle as needed). **Morphing** operations, however (:meth:`Shape.finish`, :meth:`Shape.insert_textbox`) are ignored. + Rectangle surrounding drawings. This attribute is at your disposal and may be changed at any time. Its value is set to ``None`` when a shape is created or committed. Every *draw** method, and :meth:`Shape.insert_textbox` update this property (i.e. **enlarge** the rectangle as needed). **Morphing** operations, however (:meth:`Shape.finish`, :meth:`Shape.insert_textbox`) are ignored. A typical use of this attribute would be setting :attr:`Page.cropbox_position` to this value, when you are creating shapes for later or external use. If you have not manipulated the attribute yourself, it should reflect a rectangle that contains all drawings so far. @@ -424,7 +431,7 @@ Several draw methods can be executed in a row and each one of them will contribu >>> # assuming ... >>> morph = (point, matrix) >>> # ... recalculate the shape rectangle like so: - >>> shape.rect = (shape.rect - fitz.Rect(point, point)) * ~matrix + fitz.Rect(point, point) + >>> shape.rect = (shape.rect - pymupdf.Rect(point, point)) * ~matrix + pymupdf.Rect(point, point) :type: :ref:`Rect` @@ -436,7 +443,7 @@ Several draw methods can be executed in a row and each one of them will contribu .. attribute:: lastPoint - For reference only: the current point of the drawing path. It is *None* at *Shape* creation and after each *finish()* and *commit()*. + For reference only: the current point of the drawing path. It is ``None`` at *Shape* creation and after each *finish()* and *commit()*. :type: :ref:`Point` @@ -479,8 +486,8 @@ Examples cols = (...) # a sequence of RGB color triples pieces = len(cols) # number of pieces to draw beta = 360. / pieces # angle of each piece of pie - center = fitz.Point(...) # center of the pie - p0 = fitz.Point(...) # starting point + center = pymupdf.Point(...) # center of the pie + p0 = pymupdf.Point(...) # starting point for i in range(pieces): p0 = shape.draw_sector(center, p0, beta, fullSector=True) # draw piece @@ -496,8 +503,8 @@ Here is an example for 5 colors: shape = page.new_shape() # start a new shape beta = -360.0 / n # our angle, drawn clockwise - center = fitz.Point(...) # center of circle - p0 = fitz.Point(...) # start here (1st edge) + center = pymupdf.Point(...) # center of circle + p0 = pymupdf.Point(...) # start here (1st edge) points = [p0] # store polygon edges for i in range(n): # calculate the edges p0 = shape.draw_sector(center, p0, beta) @@ -544,15 +551,15 @@ Common Parameters **fontsize** (*float*) - Font size of text. + Font size of text, see: :data:`fontsize`. ---- **dashes** (*str*) - Causes lines to be drawn dashed. The general format is `"[n m] p"` of (up to) 3 floats denoting pixel lengths. `n` is the dash length, `m` (optional) is the subsequent gap length, and `p` (the "phase" - **required**, even if 0!) specifies how many pixels should be skipped before the dashing starts. If `m` is omitted, it defaults to `n`. + Causes lines to be drawn dashed. The general format is `"[n m] p"` of (up to) 3 floats denoting pixel lengths. ``n`` is the dash length, ``m`` (optional) is the subsequent gap length, and ``p`` (the "phase" - **required**, even if 0!) specifies how many pixels should be skipped before the dashing starts. If ``m`` is omitted, it defaults to ``n``. - A continuous line (no dashes) is drawn with `"[] 0"` or *None* or `""`. Examples: + A continuous line (no dashes) is drawn with `"[] 0"` or ``None`` or `""`. Examples: * Specifying `"[3 4] 0"` means dashes of 3 and gaps of 4 pixels following each other. * `"[3 3] 0"` and `"[3] 0"` do the same thing. @@ -565,7 +572,7 @@ Common Parameters Stroke and fill colors can be specified as tuples or list of of floats from 0 to 1. These sequences must have a length of 1 (GRAY), 3 (RGB) or 4 (CMYK). For GRAY colorspace, a single float instead of the unwieldy *(float,)* or *[float]* is also accepted. Accept (default) or use `None` to not use the parameter. - To simplify color specification, method *getColor()* in *fitz.utils* may be used to get predefined RGB color triples by name. It accepts a string as the name of the color and returns the corresponding triple. The method knows over 540 color names -- see section :ref:`ColorDatabase`. + To simplify color specification, method *getColor()* in *pymupdf.utils* may be used to get predefined RGB color triples by name. It accepts a string as the name of the color and returns the corresponding triple. The method knows over 540 color names -- see section :ref:`ColorDatabase`. Please note that the term *color* usually means "stroke" color when used in conjunction with fill color. @@ -573,11 +580,20 @@ Common Parameters ---- +**width** (*float*) + + The stroke ("border") width of the elements in a shape (if applicable). The default value is 1. The values width, color and fill have the following relationship / dependency: + + * If `fill=None` shape elements will always be drawn with a border - even if `color=None` (in which case black is taken) or `width=0` (in which case 1 is taken). + * Shapes without border can only be achieved if a fill color is specified (which may be white of course). To achieve this, specify `width=0`. In this case, the ``color`` parameter is ignored. + +---- + **stroke_opacity / fill_opacity** (*floats*) Both values are floats in range [0, 1]. Negative values or values > 1 will ignored (in most cases). Both set the transparency such that a value 0.5 corresponds to 50% transparency, 0 means invisible and 1 means intransparent. For e.g. a rectangle the stroke opacity applies to its border and fill opacity to its interior. - For text insertions (:meth:`Shape.insert_text` and :meth:`Shape.insert_textbox`), use *fill_opacity* for the text. At first sight this seems surprising, but it becomes obvious when you look further down to *render_mode*: *fill_opacity* applies to the yellow and *stroke_opacity* applies to the blue color. + For text insertions (:meth:`Shape.insert_text` and :meth:`Shape.insert_textbox`), use *fill_opacity* for the text. At first sight this seems surprising, but it becomes obvious when you look further down to `render_mode`: `fill_opacity` applies to the yellow and `stroke_opacity` applies to the blue color. ---- @@ -602,6 +618,28 @@ Common Parameters ---- +**miter_limit** (*float*) + + A float specifying the maximum acceptable value of the quotient `miter-length / line-width` ("miter quotient"). Used in text output methods. This is only relevant for non-zero render mode values -- then, characters are written with border lines (i.e. "stroked"). + + If two lines stroking some character meet at a sharp (<= 90°) angle and the line width is large enough, then "spikes" may become visible -- causing an ugly appearance as shown below. For more background, see page 126 of the :ref:`AdobeManual`. + + For instance, when joins meet at 90°, then the miter length is ``sqrt(2) * line-width``, so the miter quotient is ``sqrt(2)``. + + If ``miter_limit`` is exceeded, then all joins with a larger qotient will appear as beveled ("butt" appearance). + + The default value 1 (and any smaller value) will ensure that all joins are rendered as a butt. A value of ``None`` will use the PDF default value. + + Example text showing spikes (``miter_limit=None``): + + .. image:: images/spikes-yes.* + + Example text suppressing spikes (``miter_limit=1``): + + .. image:: images/spikes-no.* + +---- + **overlay** (*bool*) Causes the item to appear in foreground (default) or background. @@ -610,7 +648,7 @@ Common Parameters **morph** (*sequence*) - Causes "morphing" of either a shape, created by the *draw*()* methods, or the text inserted by page methods *insert_textbox()* / *insert_text()*. If not *None*, it must be a pair *(fixpoint, matrix)*, where *fixpoint* is a :ref:`Point` and *matrix* is a :ref:`Matrix`. The matrix can be anything except translations, i.e. *matrix.e == matrix.f == 0* must be true. The point is used as a fixed point for the matrix operation. For example, if *matrix* is a rotation or scaling, then *fixpoint* is its center. Similarly, if *matrix* is a left-right or up-down flip, then the mirroring axis will be the vertical, respectively horizontal line going through *fixpoint*, etc. + Causes "morphing" of either a shape, created by the *draw*()* methods, or the text inserted by page methods *insert_textbox()* / *insert_text()*. If not ``None``, it must be a pair *(fixpoint, matrix)*, where *fixpoint* is a :ref:`Point` and *matrix* is a :ref:`Matrix`. The matrix can be anything except translations, i.e. *matrix.e == matrix.f == 0* must be true. The point is used as a fixed point for the matrix operation. For example, if *matrix* is a rotation or scaling, then *fixpoint* is its center. Similarly, if *matrix* is a left-right or up-down flip, then the mirroring axis will be the vertical, respectively horizontal line going through *fixpoint*, etc. .. note:: Several methods contain checks whether the to be inserted items will actually fit into the page (like :meth:`Shape.insert_text`, or :meth:`Shape.draw_rect`). For the result of a morphing operation there is however no such guaranty: this is entirely the programmer's responsibility. diff --git a/docs/story-class.rst b/docs/story-class.rst index c3d0e18f3..cfa3d75b0 100644 --- a/docs/story-class.rst +++ b/docs/story-class.rst @@ -22,6 +22,10 @@ Story :meth:`Story.write_stabilized` iterative layout of html content to a DocumentWriter :meth:`Story.write_with_links` like `write()` but also creates PDF links :meth:`Story.write_stabilized_with_links` like `write_stabilized()` but also creates PDF links +:meth:`Story.fit` Finds optimal rect that contains the story `self`. +:meth:`Story.fit_scale` +:meth:`Story.fit_height` +:meth:`Story.fit_width` =========================================== ============================================================= **Class API** @@ -58,32 +62,32 @@ Story * To work in the first of these styles, the following loop should be used: - 1. Obtain a suitable device to write to; - typically by requesting a new, - empty page from a :ref:`DocumentWriter`. - 2. Determine one or more rectangles on the page, - that should receive **story** data. - Note that not every page needs to have the same set of rectangles. - 3. Pass each rectangle to the **story** to place it, - learning what part of that rectangle has been filled, - and whether there is more story data that did not fit. - This step can be repeated several times with adjusted rectangles - until the caller is happy with the results. - 4. Optionally, at this point, - we can request details of where interesting items have been placed, - by calling the `element_positions()` method. - Items are deemed to be interesting if their integer `heading` attribute is a non-zero - (corresponding to HTML tags :htmlTag:`h1` - :htmlTag:`h6`), - if their `id` attribute is not `None` (corresponding to HTML tag :htmlTag:`id`), - or if their `href` attribute is not `None` (responding to HTML tag :htmlTag:`href`). - This can conveniently be used for automatic generation of a Table of Contents, - an index of images or the like. - 5. Next, draw that rectangle out to the device with the `draw()` method. - 6. If the most recent call to `place()` indicated that all the story data had fitted, - stop now. - 7. Otherwise, we can loop back. - If there are more rectangles to be placed on the current device (page), - we jump back to step 3 - if not, we jump back to step 1 to get a new device. + 1. Obtain a suitable device to write to; + typically by requesting a new, + empty page from a :ref:`DocumentWriter`. + 2. Determine one or more rectangles on the page, + that should receive **story** data. + Note that not every page needs to have the same set of rectangles. + 3. Pass each rectangle to the **story** to place it, + learning what part of that rectangle has been filled, + and whether there is more story data that did not fit. + This step can be repeated several times with adjusted rectangles + until the caller is happy with the results. + 4. Optionally, at this point, + we can request details of where interesting items have been placed, + by calling the `element_positions()` method. + Items are deemed to be interesting if their integer `heading` attribute is a non-zero + (corresponding to HTML tags :htmlTag:`h1` - :htmlTag:`h6`), + if their `id` attribute is not `None` (corresponding to HTML tag :htmlTag:`id`), + or if their `href` attribute is not `None` (responding to HTML tag :htmlTag:`href`). + This can conveniently be used for automatic generation of a Table of Contents, + an index of images or the like. + 5. Next, draw that rectangle out to the device with the `draw()` method. + 6. If the most recent call to `place()` indicated that all the story data had fitted, + stop now. + 7. Otherwise, we can loop back. + If there are more rectangles to be placed on the current device (page), + we jump back to step 3 - if not, we jump back to step 1 to get a new device. * Alternatively, in the case where you are using a :ref:`DocumentWriter`, the `write()` or `write_stabilized()` methods can be used. These handle all the looping for you, @@ -105,7 +109,7 @@ Story :arg float em: the default text font size. :arg archive: an :ref:`Archive` from which to load resources for rendering. Currently supported resource types are images and text fonts. If omitted, the story will not try to look up any such data and may thus produce incomplete output. - .. note:: Instead of an actual archive, valid arguments for **creating** an :ref:`Archive` can also be provided -- in which case an archive will temporarily be constructed. So, instead of `story = fitz.Story(archive=fitz.Archive("myfolder"))`, one can also shorter write `story = fitz.Story(archive="myfolder")`. + .. note:: Instead of an actual archive, valid arguments for **creating** an :ref:`Archive` can also be provided -- in which case an archive will temporarily be constructed. So, instead of `story = pymupdf.Story(archive=pymupdf.Archive("myfolder"))`, one can also shorter write `story = pymupdf.Story(archive="myfolder")`. .. method:: place(where) @@ -147,88 +151,193 @@ Story .. method:: write(writer, rectfn, positionfn=None, pagefn=None) - Places and draws Story to a `DocumentWriter`. Avoids the need for - calling code to implement a loop that calls `Story.place()` and - `Story.draw()` etc, at the expense of having to provide at least the - `rectfn()` callback. + Places and draws Story to a `DocumentWriter`. Avoids the need for + calling code to implement a loop that calls `Story.place()` and + `Story.draw()` etc, at the expense of having to provide at least the + `rectfn()` callback. - :arg writer: a `DocumentWriter` or None. - :arg rectfn: a callable taking `(rect_num: int, filled: Rect)` and - returning `(mediabox, rect, ctm)`: - mediabox: - None or rect for new page. - rect: - The next rect into which content should be placed. - ctm: - None or a `Matrix`. - :arg positionfn: None, or a callable taking `(position: ElementPosition)`: - position: - An `ElementPosition` with an extra `.page_num` member. - Typically called multiple times as we generate elements that - are headings or have an id. - :arg pagefn: - None, or a callable taking `(page_num, mediabox, dev, after)`; - called at start (`after=0`) and end (`after=1`) of each page. + :arg writer: a `DocumentWriter` or None. + :arg rectfn: a callable taking `(rect_num: int, filled: Rect)` and + returning `(mediabox, rect, ctm)`: + + * mediabox: None or rect for new page. + * rect: The next rect into which content should be placed. + * ctm: None or a `Matrix`. + :arg positionfn: None, or a callable taking `(position: ElementPosition)`: + + * position: + An `ElementPosition` with an extra `.page_num` member. + Typically called multiple times as we generate elements that + are headings or have an id. + :arg pagefn: + None, or a callable taking `(page_num, mediabox, dev, after)`; + called at start (`after=0`) and end (`after=1`) of each page. .. staticmethod:: write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True) - Static method that does iterative layout of html content to a - `DocumentWriter`. - - For example this allows one to add a table of contents section - while ensuring that page numbers are patched up until stable. - - Repeatedly creates a new `Story` from `(contentfn(), - user_css, em, archive)` and lays it out with internal call - to `Story.write()`; uses a None writer and extracts the list - of `ElementPosition`'s which is passed to the next call of - `contentfn()`. - - When the html from `contentfn()` becomes unchanged, we do a - final iteration using `writer`. - - :arg writer: - A `DocumentWriter`. - :arg contentfn: - A function taking a list of `ElementPositions` and - returning a string containing html. The returned html - can depend on the list of positions, for example with a - table of contents near the start. - :arg rectfn: - A callable taking `(rect_num: int, filled: Rect)` and - returning `(mediabox, rect, ctm)`: - mediabox: - None or rect for new page. - rect: - The next rect into which content should be - placed. - ctm: - A `Matrix`. - :arg pagefn: - None, or a callable taking `(page_num, medibox, - dev, after)`; called at start (`after=0`) and end - (`after=1`) of each page. - :arg archive: - . - :arg add_header_ids: - If true, we add unique ids to all header tags that - don't already have an id. This can help automatic - generation of tables of contents. - Returns: - None. + Static method that does iterative layout of html content to a + `DocumentWriter`. + + For example this allows one to add a table of contents section + while ensuring that page numbers are patched up until stable. + + Repeatedly creates a new `Story` from `(contentfn(), + user_css, em, archive)` and lays it out with internal call + to `Story.write()`; uses a None writer and extracts the list + of `ElementPosition`'s which is passed to the next call of + `contentfn()`. + + When the html from `contentfn()` becomes unchanged, we do a + final iteration using `writer`. + + :arg writer: + A `DocumentWriter`. + :arg contentfn: + A function taking a list of `ElementPositions` and + returning a string containing html. The returned html + can depend on the list of positions, for example with a + table of contents near the start. + :arg rectfn: + A callable taking `(rect_num: int, filled: Rect)` and + returning `(mediabox, rect, ctm)`: + + * mediabox: None or rect for new page. + * rect: The next rect into which content should be placed. + * ctm: A `Matrix`. + :arg pagefn: + None, or a callable taking `(page_num, medibox, + dev, after)`; called at start (`after=0`) and end + (`after=1`) of each page. + :arg archive: + :arg add_header_ids: + If true, we add unique ids to all header tags that + don't already have an id. This can help automatic + generation of tables of contents. + Returns: + None. .. method:: write_with_links(rectfn, positionfn=None, pagefn=None) - Similar to `write()` except that we don't have a `writer` arg - and we return a PDF `Document` in which links have been created - for each internal html link. + Similar to `write()` except that we don't have a `writer` arg + and we return a PDF `Document` in which links have been created + for each internal html link. .. staticmethod:: write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True) - Similar to `write_stabilized()` except that we don't have a `writer` - arg and instead return a PDF `Document` in which links have been - created for each internal html link. + Similar to `write_stabilized()` except that we don't have a `writer` + arg and instead return a PDF `Document` in which links have been + created for each internal html link. + .. class:: Story.FitResult + + The result from a `Story.fit*()` method. + + Members: + + `big_enough`: + `True` if the fit succeeded. + `filled`: + From the last call to `Story.place()`. + `more`: + `False` if the fit succeeded. + `numcalls`: + Number of calls made to `self.place()`. + `parameter`: + The successful parameter value, or the largest failing value. + `rect`: + The rect created from `parameter`. + + .. method:: fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False) + + Finds optimal rect that contains the story `self`. + + Returns a `Story.FitResult` instance. + + On success, the last call to `self.place()` will have been with the + returned rectangle, so `self.draw()` can be used directly. + + :arg fn: + A callable taking a floating point `parameter` and returning a + `pymupdf.Rect()`. If the rect is empty, we assume the story will + not fit and do not call `self.place()`. + + Must guarantee that `self.place()` behaves monotonically when + given rect `fn(parameter`) as `parameter` increases. This + usually means that both width and height increase or stay + unchanged as `parameter` increases. + :arg pmin: + Minimum parameter to consider; `None` for -infinity. + :arg pmax: + Maximum parameter to consider; `None` for +infinity. + :arg delta: + Maximum error in returned `parameter`. + :arg verbose: + If true we output diagnostics. + + .. method:: fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False) + + Finds smallest value `scale` in range `scale_min..scale_max` where + `scale * rect` is large enough to contain the story `self`. + + Returns a `Story.FitResult` instance. + + :arg width: + width of rect. + :arg height: + height of rect. + :arg scale_min: + Minimum scale to consider; must be >= 0. + :arg scale_max: + Maximum scale to consider, must be >= scale_min or `None` for + infinite. + :arg delta: + Maximum error in returned scale. + :arg verbose: + If true we output diagnostics. + + .. method:: fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False) + + Finds smallest height in range `height_min..height_max` where a rect + with size `(width, height)` is large enough to contain the story + `self`. + + Returns a `Story.FitResult` instance. + + :arg width: + width of rect. + :arg height_min: + Minimum height to consider; must be >= 0. + :arg height_max: + Maximum height to consider, must be >= height_min or `None` for + infinite. + :arg origin: + `(x0, y0)` of rect. + :arg delta: + Maximum error in returned height. + :arg verbose: + If true we output diagnostics. + + .. method:: fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False) + + Finds smallest width in range `width_min..width_max` where a rect with size + `(width, height)` is large enough to contain the story `self`. + + Returns a `Story.FitResult` instance. + + :arg height: + height of rect. + :arg width_min: + Minimum width to consider; must be >= 0. + :arg width_max: + Maximum width to consider, must be >= width_min or `None` for + infinite. + :arg origin: + `(x0, y0)` of rect. + :arg delta: + Maximum error in returned width. + :arg verbose: + If true we output diagnostics. + Element Positioning CallBack function -------------------------------------- @@ -247,10 +356,10 @@ A typical loop for executing a story with using this method would look like this """ - MEDIABOX = fitz.paper_rect("letter") # size of a page + MEDIABOX = pymupdf.paper_rect("letter") # size of a page WHERE = MEDIABOX + (36, 36, -36, -36) # leave borders of 0.5 inches - story = fitz.Story(html=HTML) # make the story - writer = fitz.DocumentWriter("test.pdf") # make the writer + story = pymupdf.Story(html=HTML) # make the story + writer = pymupdf.DocumentWriter("test.pdf") # make the writer pno = 0 # current page number more = 1 # will be set to 0 when done while more: # loop until all story content is processed @@ -276,7 +385,7 @@ The parameter passed to the `recorder` function is an object with the following * `elpos.heading` (int) -- the header level, 0 if no header, 1-6 for :htmlTag:`h1` - :htmlTag:`h6`. -* `elpos.href` (str) -- value of the `href`attribute, or None if not defined. +* `elpos.href` (str) -- value of the `href` attribute, or None if not defined. * `elpos.id` (str) -- value of the `id` attribute, or None if not defined. @@ -288,6 +397,6 @@ The parameter passed to the `recorder` function is an object with the following * `elpos.rect_num` (int) -- count of rectangles filled by the story so far. -* `elpos.page_num` (int) -- page number; only present when using `fitz.Story.write*()` functions. +* `elpos.page_num` (int) -- page number; only present when using `pymupdf.Story.write*()` functions. .. include:: footer.rst diff --git a/docs/supported-files-table.rst b/docs/supported-files-table.rst new file mode 100644 index 000000000..588c4b8ce --- /dev/null +++ b/docs/supported-files-table.rst @@ -0,0 +1,168 @@ +.. raw:: html + + + + + +
+ + + + + + + + + + + + + + + +
+ PDF + XPS + EPUB + MOBI + FB2 + CBZ + SVG + TXT +
+ +
JPG/JPEG, PNG, BMP, GIF, TIFF, PNM, PGM, PBM, PPM, PAM, JXR, JPX/JP2, PSD
+
JPG/JPEG, PNG, PNM, PGM, PBM, PPM, PAM, PSD, PS
+
+ + diff --git a/docs/textpage.rst b/docs/textpage.rst index 8b8ac3a2f..60aa54812 100644 --- a/docs/textpage.rst +++ b/docs/textpage.rst @@ -33,12 +33,14 @@ For a description of what this class is all about, see Appendix 2. .. class:: TextPage - .. method:: extractText + .. method:: extractText(sort=False) - .. method:: extractTEXT + .. method:: extractTEXT(sort=False) Return a string of the page's complete text. The text is UTF-8 unicode and in the same sequence as specified at the time of document creation. + :arg bool sort: (new in v1.19.1) sort the output by vertical, then horizontal coordinates. In many cases, this should suffice to generate a "natural" reading order. + :rtype: str @@ -56,13 +58,17 @@ For a description of what this class is all about, see Appendix 2. :rtype: list - .. method:: extractWORDS + .. method:: extractWORDS(delimiters=None) + + * Changed in v1.23.5: added `delimiters` parameter Textpage content as a list of single words with bbox information. An item of this list looks like this:: (x0, y0, x1, y1, "word", block_no, line_no, word_no) - Everything delimited by spaces is treated as a *"word"*. This is a high-speed method which e.g. allows extracting text from within given areas or recovering the text reading sequence. + :arg str delimiters: (new in v1.23.5) use these characters as *additional* word separators. By default, all white spaces (including the non-breaking space `0xA0`) indicate start and end of a word. Now you can specify more characters causing this. For instance, the default will return `"john.doe@outlook.com"` as **one** word. If you specify `delimiters="@."` then the **four** words `"john"`, `"doe"`, `"outlook"`, `"com"` will be returned. Other possible uses include ignoring punctuation characters `delimiters=string.punctuation`. The "word" strings will not contain any delimiting character. + + This is a high-speed method which e.g. allows extracting text from within given areas or recovering the text reading sequence. :rtype: list @@ -72,16 +78,20 @@ For a description of what this class is all about, see Appendix 2. :rtype: str - .. method:: extractDICT + .. method:: extractDICT(sort=False) Textpage content as a Python dictionary. Provides same information detail as HTML. See below for the structure. + :arg bool sort: (new in v1.19.1) sort the output by vertical, then horizontal coordinates. In many cases, this should suffice to generate a "natural" reading order. + :rtype: dict - .. method:: extractJSON + .. method:: extractJSON(sort=False) Textpage content as a JSON string. Created by `json.dumps(TextPage.extractDICT())`. It is included for backlevel compatibility. You will probably use this method ever only for outputting the result to some file. The method detects binary image data and converts them to base64 encoded strings. + :arg bool sort: (new in v1.19.1) sort the output by vertical, then horizontal coordinates. In many cases, this should suffice to generate a "natural" reading order. + :rtype: str .. method:: extractXHTML @@ -96,16 +106,20 @@ For a description of what this class is all about, see Appendix 2. :rtype: str - .. method:: extractRAWDICT + .. method:: extractRAWDICT(sort=False) Textpage content as a Python dictionary -- technically similar to :meth:`extractDICT`, and it contains that information as a subset (including any images). It provides additional detail down to each character, which makes using XML obsolete in many cases. See below for the structure. + :arg bool sort: (new in v1.19.1) sort the output by vertical, then horizontal coordinates. In many cases, this should suffice to generate a "natural" reading order. + :rtype: dict - .. method:: extractRAWJSON + .. method:: extractRAWJSON(sort=False) Textpage content as a JSON string. Created by `json.dumps(TextPage.extractRAWDICT())`. You will probably use this method ever only for outputting the result to some file. The method detects binary image data and converts them to base64 encoded strings. + :arg bool sort: (new in v1.19.1) sort the output by vertical, then horizontal coordinates. In many cases, this should suffice to generate a "natural" reading order. + :rtype: str .. method:: search(needle, quads=False) @@ -168,8 +182,10 @@ As mentioned, using these functions is ever only needed, if the text is **not wr .. image:: images/img-textpage.* - :scale: 66 +.. + We used to do `:scale: 66` here, but current (2024-04-22) sphinx messes up + the aspect ratio. Page Dictionary ~~~~~~~~~~~~~~~~~ @@ -186,28 +202,25 @@ Block Dictionaries ~~~~~~~~~~~~~~~~~~ Block dictionaries come in two different formats for **image blocks** and for **text blocks**. -* *(Changed in v1.18.0)* -- new dict key *number*, the block number. -* *(Changed in v1.18.11)* -- new dict key *transform*, the image transformation matrix for image blocks. -* *(Changed in v1.18.11)* -- new dict key *size*, the size of the image in bytes for image blocks. - **Image block:** =============== =============================================================== **Key** **Value** =============== =============================================================== -type 1 = image *(int)* +type 1 = image (``int``) bbox image bbox on page (:data:`rect_like`) -number block count *(int)* -ext image type *(str)*, as file extension, see below -width original image width *(int)* -height original image height *(int)* -colorspace colorspace component count *(int)* -xres resolution in x-direction *(int)* -yres resolution in y-direction *(int)* -bpc bits per component *(int)* +number block count (``int``) +ext image type (``str``), as file extension, see below +width original image width (``int``) +height original image height (``int``) +colorspace colorspace component count (``int``) +xres resolution in x-direction (``int``) [#f3]_ +yres resolution in y-direction (``int``) [#f3]_ +bpc bits per component (``int``) transform matrix transforming image rect to bbox (:data:`matrix_like`) -size size of the image in bytes *(int)* -image image content *(bytes)* +size size of the image in bytes (``int``) +image image content (``bytes``) +mask image mask content (``bytes``) for transparent images =============== =============================================================== Possible values of the "ext" key are "bmp", "gif", "jpeg", "jpx" (JPEG 2000), "jxr" (JPEG XR), "png", "pnm", and "tiff". @@ -223,7 +236,13 @@ Possible values of the "ext" key are "bmp", "gif", "jpeg", "jpx" (JPEG 2000), "j - Image blocks in a textpage are generated for **every** image location -- whether or not there are any duplicates. This is in contrast to :meth:`Page.get_images`, which will list each image only once (per reference name). - Images mentioned in the page's :data:`object` definition will **always** appear in :meth:`Page.get_images` [#f1]_. But it may happen, that there is no "display" command in the page's :data:`contents` (erroneously or on purpose). In this case the image will **not appear** in the textpage. - 3. The image's "transformation matrix" is defined as the matrix, for which the expression `bbox / transform == fitz.Rect(0, 0, 1, 1)` is true, lookup details here: :ref:`ImageTransformation`. + 3. The image's "transformation matrix" is defined as the matrix, for which the expression `bbox / transform == pymupdf.Rect(0, 0, 1, 1)` is true, lookup details here: :ref:`ImageTransformation`. + + 4. A transparent image may be accompanied by a mask image. This is stored under key `"mask"` and has the format of a `DeviceGray` PNG image. Otherwise the value of this key is ``None``. If present, you may be able to recover (an equivalent of) the original image -- i.e. with transparency -- by creating :ref:`Pixmap` objects from the "image", respectively "mask" values and overlay them. This is not guaranteed to always work because mask images come in multiple formats, of which not all qualify for the conditions under which overlaying Pixmaps are supported. Here is a code snippet: + + >>> base = pymupdf.Pixmap(block["image"]) + >>> mask = pymupdf.Pixmap(block["mask"]) + >>> result = pymupdf.Pixmap(base, mask) **Text block:** @@ -249,7 +268,7 @@ dir writing direction, :data:`point_like` spans *list* of span dictionaries =============== ===================================================== -The value of key *"dir"* is the **unit vector** `dir = (cosine, sine)` of the angle, which the text has relative to the x-axis. See the following picture: The word in each quadrant (counter-clockwise from top-right to bottom-right) is rotated by 30, 120, 210 and 300 degrees respectively. +The value of key *"dir"* is the **unit vector** `dir = (cosine, -sine)` of the angle, which the text has relative to the x-axis [#f2]_. See the following picture: The word in each quadrant (counter-clockwise from top-right to bottom-right) is rotated by 30, 120, 210 and 300 degrees respectively. .. image:: images/img-line-dir.* :scale: 100 @@ -272,14 +291,22 @@ ascender ascender of the font *(float)* descender descender of the font *(float)* size font size *(float)* flags font characteristics *(int)* -color text color in sRGB format *(int)* +char_flags char characteristics *(int)* +color text color in sRGB format 0xRRGGBB *(int)*. +alpha text opacity 0..255 *(int)*. text (only for :meth:`extractDICT`) text *(str)* chars (only for :meth:`extractRAWDICT`) *list* of character dictionaries =============== ===================================================================== +|history_begin| + +*(New in version 1.25.3.0):* Added *"alpha"* item. + *(New in version 1.16.0):* *"color"* is the text color encoded in sRGB (int) format, e.g. 0xFF0000 for red. There are functions for converting this integer back to formats (r, g, b) (PDF with float values from 0 to 1) :meth:`sRGB_to_pdf`, or (R, G, B), :meth:`sRGB_to_rgb` (with integer values from 0 to 255). -*(New in v1.18.5):* *"ascender"* and *"descender"* are font properties, provided relative to fontsize 1. Note that descender is a negative value. The following picture shows the relationship to other values and properties. +*(New in v1.18.5):* *"ascender"* and *"descender"* are font properties, provided relative to :data:`fontsize` 1. Note that descender is a negative value. The following picture shows the relationship to other values and properties. + +|history_end| .. image:: images/img-asc-desc.* :scale: 60 @@ -288,15 +315,15 @@ These numbers may be used to compute the minimum height of a character (or span) >>> a = span["ascender"] >>> d = span["descender"] ->>> r = fitz.Rect(span["bbox"]) ->>> o = fitz.Point(span["origin"]) # its y-value is the baseline +>>> r = pymupdf.Rect(span["bbox"]) +>>> o = pymupdf.Point(span["origin"]) # its y-value is the baseline >>> r.y1 = o.y - span["size"] * d / (a - d) >>> r.y0 = r.y1 - span["size"] >>> # r now is a rectangle of height 'fontsize' -.. caution:: The above calculation may deliver a **larger** height! This may e.g. happen for OCRed documents, where the risk of all sorts of text artifacts is high. MuPDF tries to come up with a reasonable bbox height, independently from the fontsize found in the PDF. So please ensure that the height of `span["bbox"]` is **larger** than `span["size"]`. +.. caution:: The above calculation may deliver a **larger** height! This may e.g. happen for OCRed documents, where the risk of all sorts of text artifacts is high. MuPDF tries to come up with a reasonable bbox height, independently from the :data:`fontsize` found in the PDF. So please ensure that the height of `span["bbox"]` is **larger** than `span["size"]`. -.. note:: You may request PyMuPDF to do all of the above automatically by executing `fitz.TOOLS.set_small_glyph_heights(True)`. This sets a global parameter so that all subsequent text searches and text extractions are based on reduced glyph heights, where meaningful. +.. note:: You may request PyMuPDF to do all of the above automatically by executing `pymupdf.TOOLS.set_small_glyph_heights(True)`. This sets a global parameter so that all subsequent text searches and text extractions are based on reduced glyph heights, where meaningful. The following shows the original span rectangle in red and the rectangle with re-computed height in blue. @@ -306,19 +333,35 @@ The following shows the original span rectangle in red and the rectangle with re *"flags"* is an integer, which represents font properties except for the first bit 0. They are to be interpreted like this: -* bit 0: superscripted (2\ :sup:`0`) -- not a font property, detected by MuPDF code. -* bit 1: italic (2\ :sup:`1`) -* bit 2: serifed (2\ :sup:`2`) -* bit 3: monospaced (2\ :sup:`3`) -* bit 4: bold (2\ :sup:`4`) +* bit 0: superscripted (:data:`TEXT_FONT_SUPERSCRIPT`) -- not a font property, detected by MuPDF code. +* bit 1: italic (:data:`TEXT_FONT_ITALIC`) +* bit 2: serifed (:data:`TEXT_FONT_SERIFED`) +* bit 3: monospaced (:data:`TEXT_FONT_MONOSPACED`) +* bit 4: bold (:data:`TEXT_FONT_BOLD`) Test these characteristics like so: ->>> if flags & 2**1: print("italic") ->>> # etc. +>>> if flags & pymupdf.TEXT_FONT_BOLD & pymupdf.TEXT_FONT_ITALIC: + print(f"{span['text']=} is bold and italic") + Bits 1 thru 4 are font properties, i.e. encoded in the font program. Please note, that this information is not necessarily correct or complete: fonts quite often contain wrong data here. +*"char_flags"* is an integer, which represents extra character properties: + +* bit 0: strikeout. +* bit 1: underline. +* bit 2: synthetic (always 0, see char dictionary). +* bit 3: filled. +* bit 4: stroked. +* bit 5: clipped. + +For example if not filled and not stroked (`if not (char_flags & 2**3 & 2**4): +...`) then the text will be invisible. + +(`char_flags` is new in v1.25.2.) + + Character Dictionary for :meth:`extractRAWDICT` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -327,18 +370,24 @@ Character Dictionary for :meth:`extractRAWDICT` =============== =========================================================== origin character's left baseline point, :data:`point_like` bbox character rectangle, :data:`rect_like` +synthetic bool. c the character (unicode) =============== =========================================================== +(`synthetic` is new in v1.25.3.) + This image shows the relationship between a character's bbox and its quad: |textpagechar| .. |textpagechar| image:: images/img-textpage-char.* :align: top :scale: 66 - .. rubric:: Footnotes -.. [#f1] Image specifications for a PDF page are done in a page's (sub-) :data:`dictionary`, called *"/Resources"*. Resource dictionaries can be **inherited** from the page's parent object (usually the :data:`catalog`). The PDF creator may e.g. define one */Resources* on file level, naming all images and all fonts ever used by any page. In these cases, :meth:`Page.get_images` and :meth:`Page.get_fonts` will return the same lists for all pages. +.. [#f1] Image specifications for a PDF page are done in a page's (sub-) :data:`dictionary`, called `/Resources`. Resource dictionaries can be **inherited** from any of the page's parent objects (usually the :data:`catalog` -- the top-level parent). The PDF creator may e.g. define one `/Resources` on file level, naming all images and / or all fonts ever used by any page. In these cases, :meth:`Page.get_images` and :meth:`Page.get_fonts` will consequently return the same lists for all pages. If desired, this situation can be reverted using :meth:`Page.clean_contents`. After execution, the page's object definition will show fonts and images that are actually used. + +.. [#f2] The coordinate systems of MuPDF and PDF are different in that MuPDF uses the page's top-left point as `(0, 0)`. In PDF, this is the bottom-left point. Therefore, the positive direction for MuPDF's y-axis is **from top to bottom**. This causes the sign change for the sine value here: a **negative** value indicates anti-clockwise rotation of the text. + +.. [#f3] This value is always 96, the default of the PDF interpreter. It **does not reflect** the resolution of the image itself. If you need the image's resolution, use the :meth:`Pixmap.xres` and :meth:`Pixmap.yres` attributes of the :ref:`Pixmap` created from the returned image binary. .. include:: footer.rst diff --git a/docs/textwriter.rst b/docs/textwriter.rst index 6cc326c01..782710f9a 100644 --- a/docs/textwriter.rst +++ b/docs/textwriter.rst @@ -6,6 +6,8 @@ TextWriter ================ +|pdf_only_class| + * New in v1.16.18 This class represents a MuPDF *text* object. The basic idea is to **decouple (1) text preparation, and (2) text output** to PDF pages. @@ -15,7 +17,7 @@ During **preparation**, a text writer stores any number of text pieces ("spans") A text writer is an elegant alternative to methods :meth:`Page.insert_text` and friends: * **Improved text positioning:** Choose any point where insertion of text should start. Storing text returns the "cursor position" after the *last character* of the span. -* **Free font choice:** Each text span has its own font and fontsize. This lets you easily switch when composing a larger text. +* **Free font choice:** Each text span has its own font and :data:`fontsize`. This lets you easily switch when composing a larger text. * **Automatic fallback fonts:** If a character is not supported by the chosen font, alternative fonts are automatically searched. This significantly reduces the risk of seeing unprintable symbols in the output ("TOFUs" -- looking like a small rectangle). PyMuPDF now also comes with the **universal font "Droid Sans Fallback Regular"**, which supports **all Latin** characters (including Cyrillic and Greek), and **all CJK** characters (Chinese, Japanese, Korean). * **Cyrillic and Greek Support:** The :ref:`Base-14-fonts` have integrated support of Cyrillic and Greek characters **without specifying encoding.** Your text may be a mixture of Latin, Greek and Cyrillic. * **Transparency support:** Parameter *opacity* is supported. This offers a handy way to create watermark-style text. @@ -70,17 +72,17 @@ Using this object entails three steps: :arg point_like pos: start position of the text, the bottom left point of the first character. :arg str text: a string of arbitrary length. It will be written starting at position "pos". - :arg font: a :ref:`Font`. If omitted, `fitz.Font("helv")` will be used. - :arg float fontsize: the fontsize, a positive number, default 11. + :arg font: a :ref:`Font`. If omitted, `pymupdf.Font("helv")` will be used. + :arg float fontsize: the :data:`fontsize`, a positive number, default 11. :arg str language: the language to use, e.g. "en" for English. Meaningful values should be compliant with the ISO 639 standards 1, 2, 3 or 5. Reserved for future use: currently has no effect as far as we know. - :arg bool right_to_left: *(New in v1.18.9)* whether the text should be written from right to left. Applicable for languages like Arabian or Hebrew. Default is *False*. If *True*, any Latin parts within the text will automatically converted. There are no other consequences, i.e. :attr:`TextWriter.last_point` will still be the rightmost character, and there neither is any alignment taking place. Hence you may want to use :meth:`TextWriter.fill_textbox` instead. + :arg bool right_to_left: *(New in v1.18.9)* whether the text should be written from right to left. Applicable for languages like Arabian or Hebrew. Default is ``False``. If ``True``, any Latin parts within the text will automatically converted. There are no other consequences, i.e. :attr:`TextWriter.last_point` will still be the rightmost character, and there neither is any alignment taking place. Hence you may want to use :meth:`TextWriter.fill_textbox` instead. :arg bool small_caps: *(New in v1.18.15)* look for the character's Small Capital version in the font. If present, take that value instead. Otherwise the original character (this font or the fallback font) will be taken. The fallback font will never return small caps. For example, this snippet:: - >>> doc = fitz.open() + >>> doc = pymupdf.open() >>> page = doc.new_page() >>> text = "PyMuPDF: the Python bindings for MuPDF" - >>> font = fitz.Font("figo") # choose a font with small caps - >>> tw = fitz.TextWriter(page.rect) + >>> font = pymupdf.Font("figo") # choose a font with small caps + >>> tw = pymupdf.TextWriter(page.rect) >>> tw.append((50,100), text, font=font, small_caps=True) >>> tw.write_text(page) >>> doc.ez_save("x.pdf") @@ -101,8 +103,8 @@ Using this object entails three steps: :arg point_like pos: start position of the text, the bottom left point of the first character. :arg str text: a string. It will be written starting at position "pos". - :arg font: a :ref:`Font`. If omitted, `fitz.Font("helv")` will be used. - :arg float fontsize: the fontsize, a positive float, default 11. + :arg font: a :ref:`Font`. If omitted, `pymupdf.Font("helv")` will be used. + :arg float fontsize: the :data:`fontsize`, a positive float, default 11. :arg str language: the language to use, e.g. "en" for English. Meaningful values should be compliant with the ISO 639 standards 1, 2, 3 or 5. Reserved for future use: currently has no effect as far as we know. :arg bool small_caps: *(New in v1.18.15)* see :meth:`append`. @@ -119,13 +121,13 @@ Using this object entails three steps: :arg rect_like rect: the area to fill. No part of the text will appear outside of this. :arg str,sequ text: the text. Can be specified as a (UTF-8) string or a list / tuple of strings. A string will first be converted to a list using *splitlines()*. Every list item will begin on a new line (forced line breaks). :arg point_like pos: *(new in v1.17.3)* start storing at this point. Default is a point near rectangle top-left. - :arg font: the :ref:`Font`, default `fitz.Font("helv")`. - :arg float fontsize: the fontsize. + :arg font: the :ref:`Font`, default `pymupdf.Font("helv")`. + :arg float fontsize: the :data:`fontsize`. :arg int align: text alignment. Use one of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, TEXT_ALIGN_RIGHT or TEXT_ALIGN_JUSTIFY. - :arg bool right_to_left: *(New in v1.18.9)* whether the text should be written from right to left. Applicable for languages like Arabian or Hebrew. Default is *False*. If *True*, any Latin parts are automatically reverted. You must still set the alignment (if you want right alignment), it does not happen automatically -- the other alignment options remain available as well. + :arg bool right_to_left: *(New in v1.18.9)* whether the text should be written from right to left. Applicable for languages like Arabian or Hebrew. Default is ``False``. If ``True``, any Latin parts are automatically reverted. You must still set the alignment (if you want right alignment), it does not happen automatically -- the other alignment options remain available as well. :arg bool warn: on text overflow do nothing, warn, or raise an exception. Overflow text will never be written. **Changed in v1.18.9:** - * Default is *None*. + * Default is ``None``. * The list of overflow lines will be returned. :arg bool small_caps: *(New in v1.18.15)* see :meth:`append`. @@ -182,12 +184,12 @@ Using this object entails three steps: :rtype: :ref:`Rect` -.. note:: To see some demo scripts dealing with TextWriter, have a look at `this `_ repository. +.. note:: To see some demo scripts dealing with TextWriter, have a look at `the TextWriter demo scripts `_. 1. Opacity and color apply to **all the text** in this object. 2. If you need different colors / transparency, you must create a separate TextWriter. Whenever you determine the color should change, simply append the text to the respective TextWriter using the previously returned :attr:`last_point` as position for the new text span. 3. Appending items or text boxes can occur in arbitrary order: only the position parameter controls where text appears. - 4. Font and fontsize can freely vary within the same TextWriter. This can be used to let text with different properties appear on the same displayed line: just specify *pos* accordingly, and e.g. set it to :attr:`last_point` of the previously added item. + 4. Font and :data:`fontsize` can freely vary within the same TextWriter. This can be used to let text with different properties appear on the same displayed line: just specify *pos* accordingly, and e.g. set it to :attr:`last_point` of the previously added item. 5. You can use the *pos* argument of :meth:`TextWriter.fill_textbox` to set the position of the first text character. This allows filling the same textbox with contents from different :ref:`TextWriter` objects, thus allowing for multiple colors, opacities, etc. 6. MuPDF does not support all fonts with this feature, e.g. no Type3 fonts. Starting with v1.18.0 this can be checked via the font attribute :attr:`Font.is_writable`. This attribute is also checked when using :ref:`TextWriter` methods. diff --git a/docs/the-basics.rst b/docs/the-basics.rst index e878f2e86..9bab2728b 100644 --- a/docs/the-basics.rst +++ b/docs/the-basics.rst @@ -7,144 +7,6 @@ The Basics ============================== -.. _Supported_File_Types: - -Supported File Types -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:title:`PyMuPDF` supports the following file types: - -.. raw:: html - - - - - - - - - - - - - - - - - - - -
File type
Document Formats - PDF - XPS - EPUB - MOBI - FB2 - CBZ - SVG -
Image Formats - -
Input formats JPG/JPEG, PNG, BMP, GIF, TIFF, PNM, PGM, PBM, PPM, PAM, JXR, JPX/JP2, PSD
-
Output formats JPG/JPEG, PNG, PNM, PGM, PBM, PPM, PAM, PSD, PS
-
- - .. _The_Basics_Opening_Files: Opening a File @@ -153,123 +15,103 @@ Opening a File To open a file, do the following: -.. raw:: html - -
-        
-            import fitz
-
-            doc = fitz.open("a.pdf") # open a document
-        
-    
+.. code-block:: python + import pymupdf -Opening with :index:`a Wrong File Extension ` -"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" - -If you have a document with a wrong file extension for its type, you can still correctly open it. - -Assume that *"some.file"* is actually an XPS. Open it like so: - -.. raw:: html - -
-        
-            import fitz
-
-            doc = fitz.open("some.file", filetype="xps")
-        
-    
- + doc = pymupdf.open("a.pdf") # open a document .. note:: **Taking it further** - There are many file types beyond :title:`PDF` which can be opened by :title:`PyMuPDF`, for more details see the list of :ref:`supported file types`. + See the :ref:`list of supported file types` and :ref:`The How to Guide on Opening Files ` for more advanced options. - :title:`PyMuPDF` itself does not try to determine the file type from the file contents. **You** are responsible for supplying the filetype info in some way -- either implicitly via the file extension, or explicitly as shown. There are pure :title:`Python` packages like `filetype `_ that help you doing this. Also consult the :ref:`Document` chapter for a full description. - - If :title:`PyMuPDF` encounters a file with an unknown / missing extension, it will try to open it as a :title:`PDF`. So in these cases there is no need for additional precautions. Similarly, for memory documents, you can just specify `doc=fitz.open(stream=mem_area)` to open it as a :title:`PDF` document. - - If you attempt to open an unsupported file then :title:`PyMuPDF` will throw a file data error. ---------- .. _The_Basics_Extracting_Text: -Extract text from a :title:`PDF` +Extract text from a |PDF| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To extract all the text from a :title:`PDF` file, do the following: +To extract all the text from a |PDF| file, do the following: -.. raw:: html +.. code-block:: python -
-        
-            import fitz
+    import pymupdf
 
-            doc = fitz.open("a.pdf") # open a document
-            out = open("output.txt", "wb") # create a text output
-            for page in doc: # iterate the document pages
-                text = page.get_text().encode("utf8") # get plain text (is in UTF-8)
-                out.write(text) # write text of page
-                out.write(bytes((12,))) # write page delimiter (form feed 0x0C)
-            out.close()
-        
-    
+ doc = pymupdf.open("a.pdf") # open a document + out = open("output.txt", "wb") # create a text output + for page in doc: # iterate the document pages + text = page.get_text().encode("utf8") # get plain text (is in UTF-8) + out.write(text) # write text of page + out.write(bytes((12,))) # write page delimiter (form feed 0x0C) + out.close() + +Of course it is not just |PDF| which can have text extracted - all the :ref:`supported document file formats ` such as :title:`MOBI`, :title:`EPUB`, :title:`TXT` can have their text extracted. .. note:: **Taking it further** + If your document contains image based text content the use OCR on the page for subsequent text extraction: + + .. code-block:: python + + tp = page.get_textpage_ocr() + text = page.get_text(textpage=tp) + There are many more examples which explain how to extract text from specific areas or how to extract tables from documents. Please refer to the :ref:`How to Guide for Text`. + You can now also :ref:`extract text in Markdown format`. + **API reference** - :meth:`Page.get_text` + + + + ---------- .. _The_Basics_Extracting_Images: -Extract images from a :title:`PDF` +Extract images from a |PDF| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To extract all the images from a :title:`PDF` file, do the following: +To extract all the images from a |PDF| file, do the following: -.. raw:: html +.. code-block:: python -
-        
-            import fitz
+    import pymupdf
 
-            doc = fitz.open("test.pdf") # open a document
+    doc = pymupdf.open("test.pdf") # open a document
 
-            for page_index in range(len(doc)): # iterate over pdf pages
-                page = doc[page_index] # get the page
-                image_list = page.get_images()
+    for page_index in range(len(doc)): # iterate over pdf pages
+        page = doc[page_index] # get the page
+        image_list = page.get_images()
 
-                # print the number of images found on the page
-                if image_list:
-                    print(f"Found {len(image_list)} images on page {page_index}")
-                else:
-                    print("No images found on page", page_index)
+        # print the number of images found on the page
+        if image_list:
+            print(f"Found {len(image_list)} images on page {page_index}")
+        else:
+            print("No images found on page", page_index)
 
-                for image_index, img in enumerate(image_list, start=1): # enumerate the image list
-                    xref = img[0] # get the XREF of the image
-                    pix = fitz.Pixmap(doc, xref) # create a Pixmap
+        for image_index, img in enumerate(image_list, start=1): # enumerate the image list
+            xref = img[0] # get the XREF of the image
+            pix = pymupdf.Pixmap(doc, xref) # create a Pixmap
 
-                    if pix.n - pix.alpha > 3: # CMYK: convert to RGB first
-                        pix = fitz.Pixmap(fitz.csRGB, pix)
+            if pix.n - pix.alpha > 3: # CMYK: convert to RGB first
+                pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
+
+            pix.save("page_%s-image_%s.png" % (page_index, image_index)) # save the image as png
+            pix = None
 
-                    pix.save("page_%s-image_%s.png" % (page_index, image_index)) # save the image as png
-                    pix = None
-        
-    
.. note:: @@ -284,58 +126,79 @@ To extract all the images from a :title:`PDF` file, do the following: - :ref:`Pixmap` + +.. _The_Basics_Extracting_Vector_Graphics: + +Extract vector graphics +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To extract all the vector graphics from a document page, do the following: + + +.. code-block:: python + + doc = pymupdf.open("some.file") + page = doc[0] + paths = page.get_drawings() + + +This will return a dictionary of paths for any vector drawings found on the page. + +.. note:: + + **Taking it further** + + Please refer to: :ref:`How to Extract Drawings`. + + **API reference** + + - :meth:`Page.get_drawings` + + + ---------- .. _The_Basics_Merging_PDF: .. _merge PDF: .. _join PDF: -Merging :title:`PDF` files +Merging |PDF| files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To merge :title:`PDF` files, do the following: +To merge |PDF| files, do the following: -.. raw:: html +.. code-block:: python -
-        
-            import fitz
+    import pymupdf
 
-            doc_a = fitz.open("a.pdf") # open the 1st document
-            doc_b = fitz.open("b.pdf") # open the 2nd document
+    doc_a = pymupdf.open("a.pdf") # open the 1st document
+    doc_b = pymupdf.open("b.pdf") # open the 2nd document
 
-            doc_a.insert_pdf(doc_b) # merge the docs
-            doc_a.save("a+b.pdf") # save the merged document with a new filename
-        
-    
+ doc_a.insert_pdf(doc_b) # merge the docs + doc_a.save("a+b.pdf") # save the merged document with a new filename -Merging :title:`PDF` files with other types of file +Merging |PDF| files with other types of file """"""""""""""""""""""""""""""""""""""""""""""""""""" -With :meth:`Document.insert_file` you can invoke the method to merge :ref:`supported files` with :title:`PDF`. For example: +With :meth:`Document.insert_file` you can invoke the method to merge :ref:`supported files` with |PDF|. For example: -.. raw:: html +.. code-block:: python -
-        
-            import fitz
+    import pymupdf
 
-            doc_a = fitz.open("a.pdf") # open the 1st document
-            doc_b = fitz.open("b.svg") # open the 2nd document
-
-            doc_a.insert_file(doc_b) # merge the docs
-            doc_a.save("a+b.pdf") # save the merged document with a new filename
-        
-    
+ doc_a = pymupdf.open("a.pdf") # open the 1st document + doc_b = pymupdf.open("b.svg") # open the 2nd document + doc_a.insert_file(doc_b) # merge the docs + doc_a.save("a+b.pdf") # save the merged document with a new filename .. note:: **Taking it further** - It is easy to join PDFs with :meth:`Document.insert_pdf` & :meth:`Document.insert_file`. Given open :title:`PDF` documents, you can copy page ranges from one to the other. You can select the point where the copied pages should be placed, you can revert the page sequence and also change page rotation. This Wiki `article `_ contains a full description. + It is easy to join PDFs with :meth:`Document.insert_pdf` & :meth:`Document.insert_file`. Given open |PDF| documents, you can copy page ranges from one to the other. You can select the point where the copied pages should be placed, you can revert the page sequence and also change page rotation. The GUI script `join.py `_ uses this method to join a list of files while also joining the respective table of contents segments. It looks like this: @@ -351,38 +214,43 @@ With :meth:`Document.insert_file` you can invoke the method to merge :ref:`suppo ---------- +Working with Coordinates +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There is one *mathematical term* that you should feel comfortable with when using |PyMuPDF| - **"coordinates"**. Please have a quick look at the :ref:`Coordinates` section to understand the coordinate system to help you with positioning objects and understand your document space. + + + +---------- + .. _The_Basics_Watermarks: -Adding a watermark to a :title:`PDF` +Adding a watermark to a |PDF| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To add a watermark to a :title:`PDF` file, do the following: +To add a watermark to a |PDF| file, do the following: -.. raw:: html +.. code-block:: python -
-        
-            import fitz
+    import pymupdf
 
-            doc = fitz.open("document.pdf") # open a document
+    doc = pymupdf.open("document.pdf") # open a document
 
-            for page_index in range(len(doc)): # iterate over pdf pages
-                page = doc[page_index] # get the page
+    for page_index in range(len(doc)): # iterate over pdf pages
+        page = doc[page_index] # get the page
 
-                # insert an image watermark from a file name to fit the page bounds
-                page.insert_image(page.bound(),filename="watermark.png", overlay=False)
+        # insert an image watermark from a file name to fit the page bounds
+        page.insert_image(page.bound(),filename="watermark.png", overlay=False)
 
-            doc.save("watermarked-document.pdf") # save the document with a new filename
-        
-    
+ doc.save("watermarked-document.pdf") # save the document with a new filename .. note:: **Taking it further** - Adding watermarks is essentially as simple as adding an image at the base of each :title:`PDF` page. You should ensure that the image has the required opacity and aspect ratio to make it look the way you need it to. + Adding watermarks is essentially as simple as adding an image at the base of each |PDF| page. You should ensure that the image has the required opacity and aspect ratio to make it look the way you need it to. - In the example above a new image is created from each file reference, but to be more performant (by saving memory and file size) this image data should be referenced only once - see the code example and explanation on :meth:`Page.insert_image` for the implemetation. + In the example above a new image is created from each file reference, but to be more performant (by saving memory and file size) this image data should be referenced only once - see the code example and explanation on :meth:`Page.insert_image` for the implementation. **API reference** @@ -395,28 +263,24 @@ To add a watermark to a :title:`PDF` file, do the following: .. _The_Basics_Images: -Adding an image to a :title:`PDF` +Adding an image to a |PDF| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To add an image to a :title:`PDF` file, for example a logo, do the following: +To add an image to a |PDF| file, for example a logo, do the following: -.. raw:: html +.. code-block:: python -
-        
-            import fitz
+    import pymupdf
 
-            doc = fitz.open("document.pdf") # open a document
+    doc = pymupdf.open("document.pdf") # open a document
 
-            for page_index in range(len(doc)): # iterate over pdf pages
-                page = doc[page_index] # get the page
+    for page_index in range(len(doc)): # iterate over pdf pages
+        page = doc[page_index] # get the page
 
-                # insert an image logo from a file name at the top left of the document
-                page.insert_image(fitz.Rect(0,0,50,50),filename="my-logo.png")
+        # insert an image logo from a file name at the top left of the document
+        page.insert_image(pymupdf.Rect(0,0,50,50),filename="my-logo.png")
 
-            doc.save("logo-document.pdf") # save the document with a new filename
-        
-    
+ doc.save("logo-document.pdf") # save the document with a new filename .. note:: @@ -435,24 +299,19 @@ To add an image to a :title:`PDF` file, for example a logo, do the following: .. _The_Basics_Rotating: -Rotating a :title:`PDF` +Rotating a |PDF| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To add a rotation to a page, do the following: -.. raw:: html +.. code-block:: python -
-        
-            import fitz
-
-            doc = fitz.open("test.pdf") # open document
-            page = doc[0] # get the 1st page of the document
-            page.set_rotation(90) # rotate the page
-            doc.save("rotated-page-1.pdf")
-        
-    
+ import pymupdf + doc = pymupdf.open("test.pdf") # open document + page = doc[0] # get the 1st page of the document + page.set_rotation(90) # rotate the page + doc.save("rotated-page-1.pdf") .. note:: @@ -465,24 +324,19 @@ To add a rotation to a page, do the following: .. _The_Basics_Cropping: -Cropping a :title:`PDF` +Cropping a |PDF| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To crop a page to a defined :ref:`Rect`, do the following: -.. raw:: html - -
-        
-            import fitz
+.. code-block:: python
 
-            doc = fitz.open("test.pdf") # open document
-            page = doc[0] # get the 1st page of the document
-            page.set_cropbox(fitz.Rect(100, 100, 400, 400)) # set a cropbox for the page
-            doc.save("cropped-page-1.pdf")
-        
-    
+ import pymupdf + doc = pymupdf.open("test.pdf") # open document + page = doc[0] # get the 1st page of the document + page.set_cropbox(pymupdf.Rect(100, 100, 400, 400)) # set a cropbox for the page + doc.save("cropped-page-1.pdf") .. note:: @@ -501,25 +355,22 @@ To crop a page to a defined :ref:`Rect`, do the following: To attach another file to a page, do the following: -.. raw:: html +.. code-block:: python + + import pymupdf -
-    
-        import fitz
+    doc = pymupdf.open("test.pdf") # open main document
+    attachment = pymupdf.open("my-attachment.pdf") # open document you want to attach
 
-        doc = fitz.open("test.pdf") # open main document
-        attachment = fitz.open("my-attachment.pdf") # open document you want to attach
+    page = doc[0] # get the 1st page of the document
+    point = pymupdf.Point(100, 100) # create the point where you want to add the attachment
+    attachment_data = attachment.tobytes() # get the document byte data as a buffer
 
-        page = doc[0] # get the 1st page of the document
-        point = fitz.Point(100, 100) # create the point where you want to add the attachment
-        attachment_data = attachment.tobytes() # get the document byte data as a buffer
+    # add the file annotation with the point, data and the file name
+    file_annotation = page.add_file_annot(point, attachment_data, "attachment.pdf")
 
-        # add the file annotation with the point, data and the file name
-        file_annotation = page.add_file_annot(point, attachment_data, "attachment.pdf")
+    doc.save("document-with-attachment.pdf") # save the document
 
-        doc.save("document-with-attachment.pdf") # save the document
-    
-  
.. note:: @@ -546,26 +397,19 @@ To attach another file to a page, do the following: To embed a file to a document, do the following: -.. raw:: html - -
-    
-        import fitz
+.. code-block:: python
 
-        doc = fitz.open("test.pdf") # open main document
-        embedded_doc = fitz.open("my-embed.pdf") # open document you want to embed
+    import pymupdf
 
-        embedded_data = embedded_doc.tobytes() # get the document byte data as a buffer
-
-        # embed with the file name and the data
-        doc.embfile_add("my-embedded_file.pdf", embedded_data)
-
-        doc.save("document-with-embed.pdf") # save the document
-    
-  
+ doc = pymupdf.open("test.pdf") # open main document + embedded_doc = pymupdf.open("my-embed.pdf") # open document you want to embed + embedded_data = embedded_doc.tobytes() # get the document byte data as a buffer + # embed with the file name and the data + doc.embfile_add("my-embedded_file.pdf", embedded_data) + doc.save("document-with-embed.pdf") # save the document .. note:: @@ -590,35 +434,33 @@ Deleting Pages To delete a page from a document, do the following: -.. raw:: html +.. code-block:: python -
-    
-        import fitz
-
-        doc = fitz.open("test.pdf") # open a document
-        doc.delete_page(0) # delete the 1st page of the document
-        doc.save("test-deleted-page-one.pdf") # save the document
-
-    
-  
+ import pymupdf + doc = pymupdf.open("test.pdf") # open a document + doc.delete_page(0) # delete the 1st page of the document + doc.save("test-deleted-page-one.pdf") # save the document To delete a multiple pages from a document, do the following: .. raw:: html -
-    
-        import fitz
+.. code-block:: python
+
+    import pymupdf
 
-        doc = fitz.open("test.pdf") # open a document
-        doc.delete_pages(from_page=9, to_page=14) # delete a page range from the document
-        doc.save("test-deleted-pages.pdf") # save the document
+    doc = pymupdf.open("test.pdf") # open a document
+    doc.delete_pages(from_page=9, to_page=14) # delete a page range from the document
+    doc.save("test-deleted-pages.pdf") # save the document
 
-    
-  
+What happens if I delete a page referred to by bookmarks or hyperlinks? +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +- A bookmark (entry in the Table of Contents) will become inactive and will no longer navigate to any page. + +- A hyperlink will be removed from the page that contains it. The visible content on that page will not otherwise be changed in any way. .. note:: @@ -642,19 +484,16 @@ To delete a multiple pages from a document, do the following: Re-Arranging Pages ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To re-arrange pages, do the following: +To change the sequence of pages, i.e. re-arrange pages, do the following: -.. raw:: html +.. code-block:: python -
-    
-        import fitz
+    import pymupdf
+
+    doc = pymupdf.open("test.pdf") # open a document
+    doc.move_page(1,0) # move the 2nd page of the document to the start of the document
+    doc.save("test-page-moved.pdf") # save the document
 
-        doc = fitz.open("test.pdf") # open a document
-        doc.move_page(1,0) # move the 2nd page of the document to the start of the document
-        doc.save("test-page-moved.pdf") # save the document
-    
-  
.. note:: @@ -675,17 +514,13 @@ Copying Pages To copy pages, do the following: -.. raw:: html +.. code-block:: python -
-    
-        import fitz
+    import pymupdf
 
-        doc = fitz.open("test.pdf") # open a document
-        doc.copy_page(0) # copy the 1st page and puts it at the end of the document
-        doc.save("test-page-copied.pdf") # save the document
-    
-  
+ doc = pymupdf.open("test.pdf") # open a document + doc.copy_page(0) # copy the 1st page and puts it at the end of the document + doc.save("test-page-copied.pdf") # save the document .. note:: @@ -704,91 +539,70 @@ Selecting Pages To select pages, do the following: -.. raw:: html +.. code-block:: python + + import pymupdf -
-    
-        import fitz
+    doc = pymupdf.open("test.pdf") # open a document
+    doc.select([0, 1]) # select the 1st & 2nd page of the document
+    doc.save("just-page-one-and-two.pdf") # save the document
 
-        doc = fitz.open("test.pdf") # open a document
-        doc.select([0, 1]) # select the 1st & 2nd page of the document
-        doc.save("just-page-one-and-two.pdf") # save the document
-    
-  
.. note:: **Taking it further** - With :title:`PyMuPDF` you have all options to copy, move, delete or re-arrange the pages of a :title:`PDF`. Intuitive methods exist that allow you to do this on a page-by-page level, like the :meth:`Document.copy_page` method. + With |PyMuPDF| you have all options to copy, move, delete or re-arrange the pages of a |PDF|. Intuitive methods exist that allow you to do this on a page-by-page level, like the :meth:`Document.copy_page` method. Or you alternatively prepare a complete new page layout in form of a :title:`Python` sequence, that contains the page numbers you want, in the sequence you want, and as many times as you want each page. The following may illustrate what can be done with :meth:`Document.select` - .. raw:: html + .. code-block:: python -
-        
-            doc.select([1, 1, 1, 5, 4, 9, 9, 9, 0, 2, 2, 2])
-        
-      
+ doc.select([1, 1, 1, 5, 4, 9, 9, 9, 0, 2, 2, 2]) Now let's prepare a PDF for double-sided printing (on a printer not directly supporting this): The number of pages is given by `len(doc)` (equal to `doc.page_count`). The following lists represent the even and the odd page numbers, respectively: - .. raw:: html - -
-        
-            p_even = [p in range(doc.page_count) if p % 2 == 0]
-            p_odd  = [p in range(doc.page_count) if p % 2 == 1]
-        
-      
+ .. code-block:: python + p_even = [p in range(doc.page_count) if p % 2 == 0] + p_odd = [p in range(doc.page_count) if p % 2 == 1] This snippet creates the respective sub documents which can then be used to print the document: - .. raw:: html + .. code-block:: python + + doc.select(p_even) # only the even pages left over + doc.save("even.pdf") # save the "even" PDF + doc.close() # recycle the file + doc = pymupdf.open(doc.name) # re-open + doc.select(p_odd) # and do the same with the odd pages + doc.save("odd.pdf") -
-        
-            doc.select(p_even) # only the even pages left over
-            doc.save("even.pdf") # save the "even" PDF
-            doc.close() # recycle the file
-            doc = fitz.open(doc.name) # re-open
-            doc.select(p_odd) # and do the same with the odd pages
-            doc.save("odd.pdf")
-        
-      
For more information also have a look at this Wiki `article `_. The following example will reverse the order of all pages (**extremely fast:** sub-second time for the 756 pages of the :ref:`AdobeManual`): - .. raw:: html + .. code-block:: python + + lastPage = doc.page_count - 1 + for i in range(lastPage): + doc.move_page(lastPage, i) # move current last page to the front -
-        
-            lastPage = doc.page_count - 1
-            for i in range(lastPage):
-                doc.move_page(lastPage, i) # move current last page to the front
-        
-      
This snippet duplicates the PDF with itself so that it will contain the pages *0, 1, ..., n, 0, 1, ..., n* **(extremely fast and without noticeably increasing the file size!)**: - .. raw:: html + .. code-block:: python + + page_count = len(doc) + for i in range(page_count): + doc.copy_page(i) # copy this page to after last page -
-        
-            page_count = len(doc)
-            for i in range(page_count):
-                doc.copy_page(i) # copy this page to after last page
-        
-      
**API reference** @@ -808,19 +622,16 @@ Adding Blank Pages To add a blank page, do the following: -.. raw:: html +.. code-block:: python + + import pymupdf -
-    
-        import fitz
+    doc = pymupdf.open(...) # some new or existing PDF document
+    page = doc.new_page(-1, # insertion point: end of document
+                        width = 595, # page dimension: A4 portrait
+                        height = 842)
+    doc.save("doc-with-new-blank-page.pdf") # save the document
 
-        doc = fitz.open(...) # some new or existing PDF document
-        page = doc.new_page(-1, # insertion point: end of document
-                            width = 595, # page dimension: A4 portrait
-                            height = 842)
-        doc.save("doc-with-new-blank-page.pdf") # save the document
-    
-  
.. note:: @@ -828,26 +639,21 @@ To add a blank page, do the following: Use this to create the page with another pre-defined paper format: - .. raw:: html -
-            
-                w, h = fitz.paper_size("letter-l")  # 'Letter' landscape
-                page = doc.new_page(width = w, height = h)
-            
-        
+ .. code-block:: python + + w, h = pymupdf.paper_size("letter-l") # 'Letter' landscape + page = doc.new_page(width = w, height = h) + The convenience function :meth:`paper_size` knows over 40 industry standard paper formats to choose from. To see them, inspect dictionary :attr:`paperSizes`. Pass the desired dictionary key to :meth:`paper_size` to retrieve the paper dimensions. Upper and lower case is supported. If you append "-L" to the format name, the landscape version is returned. - Here is a 3-liner that creates a :title:`PDF`: with one empty page. Its file size is 460 bytes: + Here is a 3-liner that creates a |PDF|: with one empty page. Its file size is 460 bytes: + + .. code-block:: python - .. raw:: html -
-            
-                doc = fitz.open()
-                doc.new_page()
-                doc.save("A4.pdf")
-            
-        
+ doc = pymupdf.open() + doc.new_page() + doc.save("A4.pdf") **API reference** @@ -866,23 +672,20 @@ Inserting Pages with Text Content Using the :meth:`Document.insert_page` method also inserts a new page and accepts the same `width` and `height` parameters. But it lets you also insert arbitrary text into the new page and returns the number of inserted lines. -.. raw:: html +.. code-block:: python + + import pymupdf -
-    
-        import fitz
+    doc = pymupdf.open(...)  # some new or existing PDF document
+    n = doc.insert_page(-1, # default insertion point
+                        text = "The quick brown fox jumped over the lazy dog",
+                        fontsize = 11,
+                        width = 595,
+                        height = 842,
+                        fontname = "Helvetica", # default font
+                        fontfile = None, # any font file name
+                        color = (0, 0, 0)) # text color (RGB)
 
-        doc = fitz.open(...)  # some new or existing PDF document
-        n = doc.insert_page(-1, # default insertion point
-                            text = "The quick brown fox jumped over the lazy dog",
-                            fontsize = 11,
-                            width = 595,
-                            height = 842,
-                            fontname = "Helvetica", # default font
-                            fontfile = None, # any font file name
-                            color = (0, 0, 0)) # text color (RGB)
-    
-  
@@ -907,51 +710,47 @@ Using the :meth:`Document.insert_page` method also inserts a new page and accept Splitting Single Pages ~~~~~~~~~~~~~~~~~~~~~~~~~~ -This deals with splitting up pages of a :title:`PDF` in arbitrary pieces. For example, you may have a :title:`PDF` with *Letter* format pages which you want to print with a magnification factor of four: each page is split up in 4 pieces which each going to a separate :title:`PDF` page in *Letter* format again. +This deals with splitting up pages of a |PDF| in arbitrary pieces. For example, you may have a |PDF| with *Letter* format pages which you want to print with a magnification factor of four: each page is split up in 4 pieces which each going to a separate |PDF| page in *Letter* format again. -.. raw:: html +.. code-block:: python -
-    
-        import fitz
-
-        src = fitz.open("test.pdf")
-        doc = fitz.open()  # empty output PDF
-
-        for spage in src:  # for each page in input
-            r = spage.rect  # input page rectangle
-            d = fitz.Rect(spage.cropbox_position,  # CropBox displacement if not
-                          spage.cropbox_position)  # starting at (0, 0)
-            #--------------------------------------------------------------------------
-            # example: cut input page into 2 x 2 parts
-            #--------------------------------------------------------------------------
-            r1 = r / 2  # top left rect
-            r2 = r1 + (r1.width, 0, r1.width, 0)  # top right rect
-            r3 = r1 + (0, r1.height, 0, r1.height)  # bottom left rect
-            r4 = fitz.Rect(r1.br, r.br)  # bottom right rect
-            rect_list = [r1, r2, r3, r4]  # put them in a list
-
-            for rx in rect_list:  # run thru rect list
-                rx += d  # add the CropBox displacement
-                page = doc.new_page(-1,  # new output page with rx dimensions
-                                   width = rx.width,
-                                   height = rx.height)
-                page.show_pdf_page(
-                        page.rect,  # fill all new page with the image
-                        src,  # input document
-                        spage.number,  # input page number
-                        clip = rx,  # which part to use of input page
-                    )
-
-        # that's it, save output file
-        doc.save("poster-" + src.name,
-                 garbage=3,  # eliminate duplicate objects
-                 deflate=True,  # compress stuff where possible
-        )
-    
-  
+ import pymupdf + + src = pymupdf.open("test.pdf") + doc = pymupdf.open() # empty output PDF + + for spage in src: # for each page in input + r = spage.rect # input page rectangle + d = pymupdf.Rect(spage.cropbox_position, # CropBox displacement if not + spage.cropbox_position) # starting at (0, 0) + #-------------------------------------------------------------------------- + # example: cut input page into 2 x 2 parts + #-------------------------------------------------------------------------- + r1 = r / 2 # top left rect + r2 = r1 + (r1.width, 0, r1.width, 0) # top right rect + r3 = r1 + (0, r1.height, 0, r1.height) # bottom left rect + r4 = pymupdf.Rect(r1.br, r.br) # bottom right rect + rect_list = [r1, r2, r3, r4] # put them in a list + + for rx in rect_list: # run thru rect list + rx += d # add the CropBox displacement + page = doc.new_page(-1, # new output page with rx dimensions + width = rx.width, + height = rx.height) + page.show_pdf_page( + page.rect, # fill all new page with the image + src, # input document + spage.number, # input page number + clip = rx, # which part to use of input page + ) + + # that's it, save output file + doc.save("poster-" + src.name, + garbage=3, # eliminate duplicate objects + deflate=True, # compress stuff where possible + ) Example: @@ -975,45 +774,42 @@ Example: Combining Single Pages ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This deals with joining :title:`PDF` pages to form a new :title:`PDF` with pages each combining two or four original ones (also called "2-up", "4-up", etc.). This could be used to create booklets or thumbnail-like overviews. +This deals with joining |PDF| pages to form a new |PDF| with pages each combining two or four original ones (also called "2-up", "4-up", etc.). This could be used to create booklets or thumbnail-like overviews. -.. raw:: html +.. code-block:: python -
-    
-        import fitz
+    import pymupdf
 
-        src = fitz.open("test.pdf")
-        doc = fitz.open()  # empty output PDF
+    src = pymupdf.open("test.pdf")
+    doc = pymupdf.open()  # empty output PDF
 
-        width, height = fitz.paper_size("a4")  # A4 portrait output page format
-        r = fitz.Rect(0, 0, width, height)
+    width, height = pymupdf.paper_size("a4")  # A4 portrait output page format
+    r = pymupdf.Rect(0, 0, width, height)
+
+    # define the 4 rectangles per page
+    r1 = r / 2  # top left rect
+    r2 = r1 + (r1.width, 0, r1.width, 0)  # top right
+    r3 = r1 + (0, r1.height, 0, r1.height)  # bottom left
+    r4 = pymupdf.Rect(r1.br, r.br)  # bottom right
+
+    # put them in a list
+    r_tab = [r1, r2, r3, r4]
+
+    # now copy input pages to output
+    for spage in src:
+        if spage.number % 4 == 0:  # create new output page
+            page = doc.new_page(-1,
+                          width = width,
+                          height = height)
+        # insert input page into the correct rectangle
+        page.show_pdf_page(r_tab[spage.number % 4],  # select output rect
+                         src,  # input document
+                         spage.number)  # input page number
+
+    # by all means, save new file using garbage collection and compression
+    doc.save("4up.pdf", garbage=3, deflate=True)
 
-        # define the 4 rectangles per page
-        r1 = r / 2  # top left rect
-        r2 = r1 + (r1.width, 0, r1.width, 0)  # top right
-        r3 = r1 + (0, r1.height, 0, r1.height)  # bottom left
-        r4 = fitz.Rect(r1.br, r.br)  # bottom right
-
-        # put them in a list
-        r_tab = [r1, r2, r3, r4]
-
-        # now copy input pages to output
-        for spage in src:
-            if spage.number % 4 == 0:  # create new output page
-                page = doc.new_page(-1,
-                              width = width,
-                              height = height)
-            # insert input page into the correct rectangle
-            page.show_pdf_page(r_tab[spage.number % 4],  # select output rect
-                             src,  # input document
-                             spage.number)  # input page number
-
-        # by all means, save new file using garbage collection and compression
-        doc.save("4up.pdf", garbage=3, deflate=True)
-    
-  
Example: @@ -1033,11 +829,11 @@ Example: .. _The_Basics_Encryption_and_Decryption: -:title:`PDF` Encryption & Decryption +|PDF| Encryption & Decryption ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Starting with version 1.16.0, :title:`PDF` decryption and encryption (using passwords) are fully supported. You can do the following: +Starting with version 1.16.0, |PDF| decryption and encryption (using passwords) are fully supported. You can do the following: * Check whether a document is password protected / (still) encrypted (:attr:`Document.needs_pass`, :attr:`Document.is_encrypted`). * Gain access authorization to a document (:meth:`Document.authenticate`). @@ -1051,41 +847,38 @@ Starting with version 1.16.0, :title:`PDF` decryption and encryption (using pass .. note:: A PDF document may have two different passwords: * The **owner password** provides full access rights, including changing passwords, encryption method, or permission detail. - * The **user password** provides access to document content according to the established permission details. If present, opening the :title:`PDF` in a viewer will require providing it. + * The **user password** provides access to document content according to the established permission details. If present, opening the |PDF| in a viewer will require providing it. Method :meth:`Document.authenticate` will automatically establish access rights according to the password used. -The following snippet creates a new :title:`PDF` and encrypts it with separate user and owner passwords. Permissions are granted to print, copy and annotate, but no changes are allowed to someone authenticating with the user password. +The following snippet creates a new |PDF| and encrypts it with separate user and owner passwords. Permissions are granted to print, copy and annotate, but no changes are allowed to someone authenticating with the user password. -.. raw:: html +.. code-block:: python + + import pymupdf + + text = "some secret information" # keep this data secret + perm = int( + pymupdf.PDF_PERM_ACCESSIBILITY # always use this + | pymupdf.PDF_PERM_PRINT # permit printing + | pymupdf.PDF_PERM_COPY # permit copying + | pymupdf.PDF_PERM_ANNOTATE # permit annotations + ) + owner_pass = "owner" # owner password + user_pass = "user" # user password + encrypt_meth = pymupdf.PDF_ENCRYPT_AES_256 # strongest algorithm + doc = pymupdf.open() # empty pdf + page = doc.new_page() # empty page + page.insert_text((50, 72), text) # insert the data + doc.save( + "secret.pdf", + encryption=encrypt_meth, # set the encryption method + owner_pw=owner_pass, # set the owner password + user_pw=user_pass, # set the user password + permissions=perm, # set permissions + ) -
-    
-        import fitz
-
-        text = "some secret information" # keep this data secret
-        perm = int(
-            fitz.PDF_PERM_ACCESSIBILITY # always use this
-            | fitz.PDF_PERM_PRINT # permit printing
-            | fitz.PDF_PERM_COPY # permit copying
-            | fitz.PDF_PERM_ANNOTATE # permit annotations
-        )
-        owner_pass = "owner" # owner password
-        user_pass = "user" # user password
-        encrypt_meth = fitz.PDF_ENCRYPT_AES_256 # strongest algorithm
-        doc = fitz.open() # empty pdf
-        page = doc.new_page() # empty page
-        page.insert_text((50, 72), text) # insert the data
-        doc.save(
-            "secret.pdf",
-            encryption=encrypt_meth, # set the encryption method
-            owner_pw=owner_pass, # set the owner password
-            user_pw=user_pass, # set the user password
-            permissions=perm, # set permissions
-        )
-    
-  
.. note:: @@ -1098,7 +891,7 @@ The following snippet creates a new :title:`PDF` and encrypts it with separate u **Decrypting** will automatically happen on save as before when no encryption parameters are provided. - To **keep the encryption method** of a PDF save it using `encryption=fitz.PDF_ENCRYPT_KEEP`. If `doc.can_save_incrementally() == True`, an incremental save is also possible. + To **keep the encryption method** of a PDF save it using `encryption=pymupdf.PDF_ENCRYPT_KEEP`. If `doc.can_save_incrementally() == True`, an incremental save is also possible. To **change the encryption method** specify the full range of options above (`encryption`, `owner_pw`, `user_pw`, `permissions`). An incremental save is **not possible** in this case. @@ -1106,7 +899,195 @@ The following snippet creates a new :title:`PDF` and encrypts it with separate u - :meth:`Document.save` +-------------------------- + + + +.. _The_Basics_Extracting_Tables: + +Extracting Tables from a :title:`Page` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Tables can be found and extracted from any document :ref:`Page`. + +.. code-block:: python + + import pymupdf + from pprint import pprint + + doc = pymupdf.open("test.pdf") # open document + page = doc[0] # get the 1st page of the document + tabs = page.find_tables() # locate and extract any tables on page + print(f"{len(tabs.tables)} found on {page}") # display number of found tables + + if tabs.tables: # at least one table found? + pprint(tabs[0].extract()) # print content of first table + +.. note:: + + **API reference** + + - :meth:`Page.find_tables` + + +.. important:: + + There is also the `pdf2docx extract tables method`_ which is capable of table extraction if you prefer. + + +-------------------------- + + +.. _The_Basics_Get_Page_Links: + +Getting Page Links +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Links can be extracted from a :ref:`Page` to return :ref:`Link` objects. + + +.. code-block:: python + + import pymupdf + + for page in doc: # iterate the document pages + link = page.first_link # a `Link` object or `None` + + while link: # iterate over the links on page + # do something with the link, then: + link = link.next # get next link, last one has `None` in its `next` + + + +.. note:: + + **API reference** + + - :meth:`Page.first_link` + + +----------------------------- + + +.. _The_Basics_Get_All_Annotations: + +Getting All Annotations from a Document +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Annotations (:ref:`Annot`) on pages can be retrieved with the `page.annots()` method. + +.. code-block:: python + + import pymupdf + + for page in doc: + for annot in page.annots(): + print(f'Annotation on page: {page.number} with type: {annot.type} and rect: {annot.rect}') + + +.. note:: + + **API reference** + + - :meth:`Page.annots` + + +-------------------------- + + + +.. _The_Basics_Redacting: + +Redacting content from a |PDF| +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Redactions are special types of annotations which can be marked onto a document page to denote an area on the page which should be securely removed. After marking an area with a rectangle then this area will be marked for *redaction*, once the redaction is *applied* then the content is securely removed. + +For example if we wanted to redact all instances of the name "Jane Doe" from a document we could do the following: + +.. code-block:: python + + import pymupdf + + # Open the PDF document + doc = pymupdf.open('test.pdf') + + # Iterate over each page of the document + for page in doc: + # Find all instances of "Jane Doe" on the current page + instances = page.search_for("Jane Doe") + + # Redact each instance of "Jane Doe" on the current page + for inst in instances: + page.add_redact_annot(inst) + + # Apply the redactions to the current page + page.apply_redactions() + + # Save the modified document + doc.save('redacted_document.pdf') + + # Close the document + doc.close() + + +Another example could be redacting an area of a page, but not to redact any line art (i.e. vector graphics) within the defined area, by setting a parameter flag as follows: + + +.. code-block:: python + + import pymupdf + + # Open the PDF document + doc = pymupdf.open('test.pdf') + + # Get the first page + page = doc[0] + + # Add an area to redact + rect = [0,0,200,200] + + # Add a redacction annotation which will have a red fill color + page.add_redact_annot(rect, fill=(1,0,0)) + + # Apply the redactions to the current page, but ignore vector graphics + page.apply_redactions(graphics=0) + + # Save the modified document + doc.save('redactied_document.pdf') + + # Close the document + doc.close() + + +.. warning:: + + Once a redacted version of a document is saved then the redacted content in the |PDF| is *irretrievable*. Thus, a redacted area in a document removes text and graphics completely from that area. + + +.. note:: + + **Taking it further** + + The are a few options for creating and applying redactions to a page, for the full API details to understand the parameters to control these options refer to the API reference. + + **API reference** + + - :meth:`Page.add_redact_annot` + + - :meth:`Page.apply_redactions` + + +-------------------------- + + + +.. _The Basics_Coverting_PDF_Documents: + +Converting PDF Documents +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We recommend the pdf2docx_ library which uses |PyMuPDF| and the **python-docx** library to provide simple document conversion from |PDF| to **DOCX** format. diff --git a/docs/tools.rst b/docs/tools.rst index 0d6f72330..2347af6ec 100644 --- a/docs/tools.rst +++ b/docs/tools.rst @@ -10,11 +10,12 @@ This class is a collection of utility methods and attributes, mainly around memo ====================================== ================================================= **Method / Attribute** **Description** ====================================== ================================================= -:meth:`Tools.gen_id` generate a unique identifyer +:meth:`Tools.gen_id` generate a unique identifier :meth:`Tools.store_shrink` shrink the storables cache [#f1]_ :meth:`Tools.mupdf_warnings` return the accumulated MuPDF warnings -:meth:`Tools.mupdf_display_errors` return the accumulated MuPDF warnings -:meth:`Tools.reset_mupdf_warnings` empty MuPDF messages on STDOUT +:meth:`Tools.mupdf_display_errors` control whether MuPDF errors are displayed as messages. +:meth:`Tools.mupdf_display_warnings` control whether MuPDF warnings are displayed as messages. +:meth:`Tools.reset_mupdf_warnings` empty MuPDF warnings/errors message buffer. :meth:`Tools.set_aa_level` set the anti-aliasing values :meth:`Tools.set_annot_stem` set the prefix of new annotation / link ids :meth:`Tools.set_small_glyph_heights` search and extract using small bbox heights @@ -64,7 +65,7 @@ This class is a collection of utility methods and attributes, mainly around memo :arg bool on: if omitted or `None`, the current setting is returned. For other values the *bool()* function is applied to set a global variable. If `True`, :meth:`Page.search_for` and :meth:`Page.get_text` methods return character, span, line or block bboxes that have a height of *font size*. If `False` (standard setting when PyMuPDF is imported), bbox height will be based on font properties and normally equal *line height*. :rtype: bool - :returns: *True* or *False*. + :returns: ``True`` or ``False``. .. note:: Text extraction options "xml", "xhtml" and "html", which directly wrap MuPDF code, are not influenced by this. @@ -77,7 +78,7 @@ This class is a collection of utility methods and attributes, mainly around memo :arg bool on: if omitted / `None`, the current setting is returned. Arguments evaluating to `True` or `False` set a global variable. If `True`, options "dict", "json", "rawdict" and "rawjson" will return e.g. `"NOHSJV+Calibri-Light"`, otherwise only `"Calibri-Light"` (the default). The setting remains in effect until changed again. :rtype: bool - :returns: *True* or *False*. + :returns: ``True`` or ``False``. .. note:: Except mentioned above, no other text extraction variants are influenced by this. This is especially true for the options "xml", "xhtml" and "html", which are based on MuPDF code. They extract the font name `"Calibri-Light"`, or even just the **family** name -- `Calibri` in this example. @@ -91,7 +92,7 @@ This class is a collection of utility methods and attributes, mainly around memo :arg bool on: if omitted or `None`, the current setting is returned. For other values the *bool()* function is applied to set a global variable. If `True`, PyMuPDF will not try to access the resp. font properties and use values `ascender=0.8` and `descender=-0.2` instead. :rtype: bool - :returns: *True* or *False*. + :returns: ``True`` or ``False``. .. method:: store_shrink(percent) @@ -131,13 +132,34 @@ This class is a collection of utility methods and attributes, mainly around memo .. method:: mupdf_display_errors(value=None) + Control whether MuPDF errors should be displayed as |PyMuPDF| messages. + + :arg value: + * If `None`, the current setting is left unchanged. + * Otherwise changes the current setting to `bool(value)`; + if ``True``, future MuPDF errors will be shown as :ref:`Messages`. + * Regardless of this setting, MuPDF errors will always be stored in the warnings store. + * Upon import of |PyMuPDF| this value is ``True``. + + :returns: The current setting as ``True`` or ``False``. + * New in version 1.16.8 - Show or set whether MuPDF errors should be displayed. - :arg bool value: if not a bool, the current setting is returned. If true, MuPDF errors will be shown on *sys.stderr*, otherwise suppressed. In any case, messages continue to be stored in the warnings store. Upon import of PyMuPDF this value is *True*. + .. method:: mupdf_display_warnings(value=None) + + Control whether MuPDF warnings should be displayed as |PyMuPDF| messages. - :returns: *True* or *False* + :arg value: + * If `None`, the current setting is left unchanged. + * Otherwise changes the current setting to `bool(value)`; + if ``True``, future MuPDF warnings will be shown as :ref:`Messages`. + * Regardless of this setting, MuPDF warnings will always be stored in the warnings store. + * Upon import of |PyMuPDF| this value is ``True``. + + :returns: The current setting as ``True`` or ``False``. + + * New in version 1.16.8 .. method:: mupdf_warnings(reset=True) @@ -182,9 +204,9 @@ This class is a collection of utility methods and attributes, mainly around memo base14 Base-14 fonts (should always be true) ================= =================================================== - For an explanation of the term "TOFU" see `this Wikipedia article `_.:: + For an explanation of the term "TOFU" see `this Wikipedia article `_:: - In [1]: import fitz + In [1]: import pymupdf In [2]: TOOLS.fitz_config Out[2]: {'plotter-g': True, @@ -216,56 +238,55 @@ This class is a collection of utility methods and attributes, mainly around memo .. attribute:: store_maxsize - Maximum storables cache size in bytes. PyMuPDF is generated with a value of 268'435'456 (256 MB, the default value), which you should therefore always see here. If this value is zero, then an "unlimited" growth is permitted. + Maximum storables cache size in bytes. |PyMuPDF| is generated with a value of 268'435'456 (256 MB, the default value), which you should therefore always see here. If this value is zero, then an "unlimited" growth is permitted. :rtype: int .. attribute:: store_size - Current storables cache size in bytes. This value may change (and will usually increase) with every use of a PyMuPDF function. It will (automatically) decrease only when :attr:`Tools.store_maxize` is going to be exceeded: in this case, MuPDF will evict low-usage objects until the value is again in range. + Current storables cache size in bytes. This value may change (and will usually increase) with every use of a |PyMuPDF| function. It will (automatically) decrease only when :attr:`Tools.store_maxsize` is going to be exceeded: in this case, |MuPDF| will evict low-usage objects until the value is again in range. :rtype: int Example Session ---------------- -.. highlight:: python +.. code-block:: python -:: - >>> import fitz + >>> import pymupdf # print the maximum and current cache sizes - >>> fitz.TOOLS.store_maxsize + >>> pymupdf.TOOLS.store_maxsize 268435456 - >>> fitz.TOOLS.store_size + >>> pymupdf.TOOLS.store_size 0 - >>> doc = fitz.open("demo1.pdf") + >>> doc = pymupdf.open("demo1.pdf") # pixmap creation puts lots of object in cache (text, images, fonts), # apart from the pixmap itself >>> pix = doc[0].get_pixmap(alpha=False) - >>> fitz.TOOLS.store_size + >>> pymupdf.TOOLS.store_size 454519 # release (at least) 50% of the storage - >>> fitz.TOOLS.store_shrink(50) + >>> pymupdf.TOOLS.store_shrink(50) 13471 - >>> fitz.TOOLS.store_size + >>> pymupdf.TOOLS.store_size 13471 # get a few unique numbers - >>> fitz.TOOLS.gen_id() + >>> pymupdf.TOOLS.gen_id() 1 - >>> fitz.TOOLS.gen_id() + >>> pymupdf.TOOLS.gen_id() 2 - >>> fitz.TOOLS.gen_id() + >>> pymupdf.TOOLS.gen_id() 3 # close document and see how much cache is still in use >>> doc.close() - >>> fitz.TOOLS.store_size + >>> pymupdf.TOOLS.store_size 0 >>> .. rubric:: Footnotes -.. [#f1] This memory area is internally used by MuPDF, and it serves as a cache for objects that have already been read and interpreted, thus improving performance. The most bulky object types are images and also fonts. When an application starts up the MuPDF library (in our case this happens as part of *import fitz*), it must specify a maximum size for this area. PyMuPDF's uses the default value (256 MB) to limit memory consumption. Use the methods here to control or investigate store usage. For example: even after a document has been closed and all related objects have been deleted, the store usage may still not drop down to zero. So you might want to enforce that before opening another document. +.. [#f1] This memory area is internally used by MuPDF, and it serves as a cache for objects that have already been read and interpreted, thus improving performance. The most bulky object types are images and also fonts. When an application starts up the MuPDF library (in our case this happens as part of *import pymupdf*), it must specify a maximum size for this area. PyMuPDF's uses the default value (256 MB) to limit memory consumption. Use the methods here to control or investigate store usage. For example: even after a document has been closed and all related objects have been deleted, the store usage may still not drop down to zero. So you might want to enforce that before opening another document. .. [#f2] By default PyMuPDF and MuPDF use `malloc()`/`free()` for dynamic memory management. One can instead force them to use the Python allocation functions `PyMem_New()`/`PyMem_Del()`, by modifying *fitz/fitz.i* to do `#define JM_MEMORY 1` and rebuilding PyMuPDF. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 64da92c24..19ddb1968 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -2,22 +2,25 @@ .. _Tutorial: + ========= Tutorial ========= .. highlight:: python -This tutorial will show you the use of :title:`PyMuPDF`, :title:`MuPDF` in :title:`Python`, step by step. +This tutorial will show you the use of |PyMuPDF|, |MuPDF| in Python, step by step. + +Because |MuPDF| supports not only PDF, but also XPS, OpenXPS, CBZ, CBR, FB2 and EPUB formats, so does |PyMuPDF| [#f1]_. Nevertheless, for the sake of brevity we will only talk about PDF files. At places where indeed only PDF files are supported, this will be mentioned explicitly. -Because :title:`MuPDF` supports not only PDF, but also XPS, OpenXPS, CBZ, CBR, FB2 and EPUB formats, so does PyMuPDF [#f1]_. Nevertheless, for the sake of brevity we will only talk about PDF files. At places where indeed only PDF files are supported, this will be mentioned explicitly. +In addition to this introduction, please do visit PyMuPDF's `YouTube Channel `_ which covers most of the following in the form of YouTube "Shorts" and longer videos. Importing the Bindings ========================== The Python bindings to MuPDF are made available by this import statement. We also show here how your version can be checked:: - >>> import fitz - >>> print(fitz.__doc__) + >>> import pymupdf + >>> print(pymupdf.__doc__) PyMuPDF 1.16.0: Python bindings for the MuPDF 1.16.0 library. Version date: 2019-07-28 07:30:14. Built for Python 3.7 on win32 (64-bit). @@ -25,16 +28,18 @@ The Python bindings to MuPDF are made available by this import statement. We als Note on the Name *fitz* -------------------------- -The top level Python import name for this library is **"fitz"**. This has historical reasons: + +Old versions of |PyMuPDF| had their **Python** import name as `fitz`. Newer versions use `pymupdf` instead, and offer `fitz` as a fallback so that old code will still work. + +The reason for the name `fitz` is a historical curiosity: The original rendering library for MuPDF was called *Libart*. *"After Artifex Software acquired the MuPDF project, the development focus shifted on writing a new modern graphics library called "Fitz". Fitz was originally intended as an R&D project to replace the aging Ghostscript graphics library, but has instead become the rendering engine powering MuPDF."* (Quoted from `Wikipedia `_). - .. note:: - So :title:`PyMuPDF` **cannot coexist** with packages named "fitz" in the same Python environment. + Use of legacy name `fitz` can fail if defunct pypi.org package `fitz` is installed; see :ref:`problems-after-installation`. .. _Tutorial_Opening_a_Document: @@ -44,7 +49,7 @@ Opening a Document To access a :ref:`supported document`, it must be opened with the following statement:: - doc = fitz.open(filename) # or fitz.Document(filename) + doc = pymupdf.open(filename) # or pymupdf.Document(filename) This creates the :ref:`Document` object *doc*. *filename* must be a Python string (or a `pathlib.Path`) specifying the name of an existing file. @@ -66,7 +71,7 @@ Some :ref:`Document` Methods and Attributes Accessing Meta Data ======================== -PyMuPDF fully supports standard metadata. :attr:`Document.metadata` is a Python dictionary with the following keys. It is available for **all document types**, though not all entries may always contain data. For details of their meanings and formats consult the respective manuals, e.g. :ref:`AdobeManual` for PDF. Further information can also be found in chapter :ref:`Document`. The meta data fields are strings or *None* if not otherwise indicated. Also be aware that not all of them always contain meaningful data -- even if they are not *None*. +PyMuPDF fully supports standard metadata. :attr:`Document.metadata` is a Python dictionary with the following keys. It is available for **all document types**, though not all entries may always contain data. For details of their meanings and formats consult the respective manuals, e.g. :ref:`AdobeManual` for PDF. Further information can also be found in chapter :ref:`Document`. The meta data fields are strings or ``None`` if not otherwise indicated. Also be aware that not all of them always contain meaningful data -- even if they are not ``None``. ============== ================================= Key Value @@ -159,9 +164,9 @@ This example creates a **raster** image of a page's content:: pix = page.get_pixmap() -*pix* is a :ref:`Pixmap` object which (in this case) contains an **RGB** image of the page, ready to be used for many purposes. Method :meth:`Page.get_pixmap` offers lots of variations for controlling the image: resolution / DPI, colorspace (e.g. to produce a grayscale image or an image with a subtractive color scheme), transparency, rotation, mirroring, shifting, shearing, etc. For example: to create an **RGBA** image (i.e. containing an alpha channel), specify *pix = page.get_pixmap(alpha=True)*. +``pix`` is a :ref:`Pixmap` object which (in this case) contains an **RGB** image of the page, ready to be used for many purposes. Method :meth:`Page.get_pixmap` offers lots of variations for controlling the image: resolution / DPI, colorspace (e.g. to produce a grayscale image or an image with a subtractive color scheme), transparency, rotation, mirroring, shifting, shearing, etc. For example: to create an **RGBA** image (i.e. containing an alpha channel), specify *pix = page.get_pixmap(alpha=True)*. -A :ref:`Pixmap` contains a number of methods and attributes which are referenced below. Among them are the integers *width*, *height* (each in pixels) and *stride* (number of bytes of one horizontal image line). Attribute *samples* represents a rectangular area of bytes representing the image data (a Python *bytes* object). +A :ref:`Pixmap` contains a number of methods and attributes which are referenced below. Among them are the integers ``width``, ``height`` (each in pixels) and ``stride`` (number of bytes of one horizontal image line). Attribute ``samples`` represents a rectangular area of bytes representing the image data (a Python ``bytes`` object). .. note:: You can also create a **vector** image of a page by using :meth:`Page.get_svg_image`. Refer to this `Vector Image Support page`_ for details. @@ -198,7 +203,7 @@ Please also see section 3.19 of the `Pillow documentation`_:: The following **avoids using Pillow**:: # remove alpha if present - pix1 = fitz.Pixmap(pix, 0) if pix.alpha else pix # PPM does not support transparency + pix1 = pymupdf.Pixmap(pix, 0) if pix.alpha else pix # PPM does not support transparency imgdata = pix1.tobytes("ppm") # extremely fast! tkimg = tkinter.PhotoImage(data = imgdata) @@ -248,7 +253,7 @@ Use one of the following strings for *opt* to obtain different formats [#f2]_: * **"xml"**: contains no images, but full position and font information down to each single text character. Use an XML module to interpret. -To give you an idea about the output of these alternatives, we did text example extracts. See :ref:`Appendix2`. +To give you an idea about the output of these alternatives, we did text example extracts. See :ref:`Appendix1`. Searching for Text ------------------- @@ -349,7 +354,7 @@ Method :meth:`Document.insert_pdf` copies pages **between different** PDF docume Here is a snippet that **splits** *doc1*. It creates a new document of its first and its last 10 pages:: - doc2 = fitz.open() # new empty PDF + doc2 = pymupdf.open() # new empty PDF doc2.insert_pdf(doc1, to_page = 9) # first 10 pages doc2.insert_pdf(doc1, from_page = len(doc1) - 10) # last 10 pages doc2.save("first-and-last-10.pdf") @@ -361,7 +366,7 @@ Embedding Data PDFs can be used as containers for arbitrary data (executables, other PDFs, text or binary files, etc.) much like ZIP archives. -PyMuPDF fully supports this feature via :ref:`Document` *embfile_** methods and attributes. For some detail read :ref:`Appendix 3`, consult the Wiki on `dealing with embedding files`_, or the example scripts `embedded-copy.py`_, `embedded-export.py`_, `embedded-import.py`_, and `embedded-list.py`_. +PyMuPDF fully supports this feature via :ref:`Document` ``embfile_*`` methods and attributes. For some detail read :ref:`Appendix 3`, consult the Wiki on `dealing with embedding files`_, or the example scripts `embedded-copy.py`_, `embedded-export.py`_, `embedded-import.py`_, and `embedded-list.py`_. Saving @@ -369,7 +374,7 @@ Saving As mentioned above, :meth:`Document.save` will **always** save the document in its current state. -You can write changes back to the **original PDF** by specifying option *incremental=True*. This process is (usually) **extremely fast**, since changes are **appended to the original file** without completely rewriting it. +You can write changes back to the **original PDF** by specifying option ``incremental=True``. This process is (usually) **extremely fast**, since changes are **appended to the original file** without completely rewriting it. :meth:`Document.save` options correspond to options of MuPDF's command line utility *mutool clean*, see the following table. @@ -389,9 +394,9 @@ linear=True l create a linearized version expand=True d decompress all streams =================== =========== ================================================== -.. note:: For an explanation of terms like *object, stream, xref* consult the :ref:`Glossary` chapter. +.. note:: For an explanation of terms like ``object``, ``stream``, ``xref`` consult the :ref:`Glossary` chapter. -For example, *mutool clean -ggggz file.pdf* yields excellent compression results. It corresponds to *doc.save(filename, garbage=4, deflate=True)*. +For example, ``mutool clean -ggggz file.pdf`` yields excellent compression results. It corresponds to ``doc.save(filename, garbage=4, deflate=True)``. Closing ========= diff --git a/docs/vars.rst b/docs/vars.rst index cd86c6c74..cc0e8a5fc 100644 --- a/docs/vars.rst +++ b/docs/vars.rst @@ -3,7 +3,7 @@ =============================== Constants and Enumerations =============================== -Constants and enumerations of :title:`MuPDF` as implemented by :title:`PyMuPDF`. Each of the following variables is accessible as *fitz.variable*. +Constants and enumerations of :title:`MuPDF` as implemented by |PyMuPDF|. Each of the following values is accessible as `pymupdf.value`. Constants @@ -13,69 +13,89 @@ Constants Predefined Python list of valid :ref:`Base-14-Fonts`. - :rtype: list + :type: list .. py:data:: csRGB - Predefined RGB colorspace *fitz.Colorspace(fitz.CS_RGB)*. + Predefined RGB colorspace *pymupdf.Colorspace(pymupdf.CS_RGB)*. - :rtype: :ref:`Colorspace` + :type: :ref:`Colorspace` .. py:data:: csGRAY - Predefined GRAY colorspace *fitz.Colorspace(fitz.CS_GRAY)*. + Predefined GRAY colorspace *pymupdf.Colorspace(pymupdf.CS_GRAY)*. - :rtype: :ref:`Colorspace` + :type: :ref:`Colorspace` .. py:data:: csCMYK - Predefined CMYK colorspace *fitz.Colorspace(fitz.CS_CMYK)*. + Predefined CMYK colorspace *pymupdf.Colorspace(pymupdf.CS_CMYK)*. - :rtype: :ref:`Colorspace` + :type: :ref:`Colorspace` .. py:data:: CS_RGB 1 -- Type of :ref:`Colorspace` is RGBA - :rtype: int + :type: int .. py:data:: CS_GRAY 2 -- Type of :ref:`Colorspace` is GRAY - :rtype: int + :type: int .. py:data:: CS_CMYK 3 -- Type of :ref:`Colorspace` is CMYK - :rtype: int + :type: int -.. py:data:: VersionBind +.. py:data:: mupdf_version - 'x.xx.x' -- version of PyMuPDF (these bindings) + 'x.xx.x' -- MuPDF version that is being used by PyMuPDF. - :rtype: string + :type: string -.. py:data:: VersionFitz +.. py:data:: mupdf_version_tuple - 'x.xxx' -- version of MuPDF + MuPDF version as a tuple of integers, `(major, minor, patch)`. + + :type: tuple - :rtype: string +.. py:data:: pymupdf_version -.. py:data:: VersionDate + 'x.xx.x' -- PyMuPDF version. - ISO timestamp *YYYY-MM-DD HH:MM:SS* when these bindings were built. + :type: string - :rtype: string +.. py:data:: pymupdf_version_tuple -.. Note:: The docstring of *fitz* contains information of the above which can be retrieved like so: *print(fitz.__doc__)*, and should look like: *PyMuPDF 1.10.0: Python bindings for the MuPDF 1.10 library, built on 2016-11-30 13:09:13*. + PyMuPDF version as a tuple of integers, `(major, minor, patch)`. + + :type: tuple +.. py:data:: pymupdf_date + + Disabled (set to None) in 1.26.1. + .. py:data:: version - (VersionBind, VersionFitz, timestamp) -- combined version information where *timestamp* is the generation point in time formatted as "YYYYMMDDhhmmss". + (pymupdf_version, mupdf_version, timestamp) -- combined version information where `timestamp` is the generation point in time formatted as "YYYYMMDDhhmmss". + + :type: tuple - :rtype: tuple +.. py:data:: VersionBind + + Legacy equivalent to `mupdf_version`. + +.. py:data:: VersionFitz + + Legacy equivalent to `pymupdf_version`. + +.. py:data:: VersionDate + + Disabled (set to None) in 1.26.1. .. _PermissionCodes: @@ -165,11 +185,37 @@ Text Alignment .. _TextPreserve: +.. _FontProperties: + +Font Properties +----------------------- +Please note that the following bits are derived from what a font has to say about its properties. It may not be (and quite often is not) correct. + +.. py:data:: TEXT_FONT_SUPERSCRIPT + + 1 -- the character or span is a superscript. This property is computed by MuPDF and not part of any font information. + +.. py:data:: TEXT_FONT_ITALIC + + 2 -- the font is italic. + +.. py:data:: TEXT_FONT_SERIFED + + 4 -- the font is serifed. + +.. py:data:: TEXT_FONT_MONOSPACED + + 8 -- the font is mono-spaced. + +.. py:data:: TEXT_FONT_BOLD + + 16 -- the font is bold. + Text Extraction Flags --------------------- -Option bits controlling the amount of data, that are parsed into a :ref:`TextPage` -- this class is mainly used only internally in PyMuPDF. +Option bits controlling the amount of data, that are parsed into a :ref:`TextPage`. -For the PyMuPDF programmer, some combination (using Python's `|` operator, or simply use `+`) of these values are aggregated in the `flags` integer, a parameter of all text search and text extraction methods. Depending on the individual method, different default combinations of the values are used. Please use a value that meets your situation. Especially make sure to switch off image extraction unless you really need them. The impact on performance and memory is significant! +For the PyMuPDF programmer, some combination (using Python's `|` operator, or simply use `+`) of these values are aggregated in the ``flags`` integer, a parameter of all text search and text extraction methods. Depending on the individual method, different default combinations of the values are used. Please use a value that meets your situation. Especially make sure to switch off image extraction unless you really need them. The impact on performance and memory is significant! .. py:data:: TEXT_PRESERVE_LIGATURES @@ -197,45 +243,72 @@ For the PyMuPDF programmer, some combination (using Python's `|` operator, or si .. py:data:: TEXT_MEDIABOX_CLIP - 64 -- If set, characters entirely outside a page's **mediabox** will be ignored. This is default in PyMuPDF. + 64 -- Characters entirely outside a page's **mediabox** or contained in other "clipped" areas will be ignored. This is default in PyMuPDF. + +.. py:data:: TEXT_USE_CID_FOR_UNKNOWN_UNICODE + + 128 -- Use raw character codes instead of U+FFFD. This is the default for **text extraction** in PyMuPDF. If you **want to detect** when encoding information is missing or uncertain, toggle this flag and scan for the presence of U+FFFD (= `chr(0xfffd)`) code points in the resulting text. + +.. py:data:: TEXT_COLLECT_STRUCTURE + + 256 -- Not supported. + +.. py:data:: TEXT_ACCURATE_BBOXES + + 512 -- Ignore metric values of all fonts when computing character boundary boxes -- most prominently the `ascender `_ and `descender `_ values. Instead, follow the drawing commands of each character's glyph and compute its rectangle hull. This is the smallest rectangle wrapping all points used for drawing the visual appearance - see the :ref:`Shape` class for understanding the background. This will especially result in individual character heights. For instance a (white) space will have a **bbox of height 0** (because nothing is drawn) -- in contrast to the non-zero boundary box generated when using font metrics. This option may be useful to cope with getting meaningful boundary boxes even for fonts containing errors. Its use will slow down text extraction somewhat because of the incurred computational effort. + + Note that this has no effect by default - one must also disable the global + quad corrections setting with `pymupdf.TOOLS.unset_quad_corrections(True)`. + +.. py:data:: TEXT_COLLECT_VECTORS + + 1024 -- Not supported. + +.. py:data:: TEXT_IGNORE_ACTUALTEXT + + 2048 -- Ignore built-in differences between text appearing in e.g. PDF viewers versus text stored in the PDF. See :ref:`AdobeManual`, page 615 for background. If set, the **stored** ("replacement" text) is ignored in favor of the displayed text. + +.. py:data:: TEXT_SEGMENT + + 4096 -- Attempt to segment page into different regions. The following constants represent the default combinations of the above for text extraction and searching: .. py:data:: TEXTFLAGS_TEXT - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_WORDS - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_BLOCKS - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_DICT - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_RAWDICT - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_HTML - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_XHTML - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_PRESERVE_IMAGES | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_XML - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP` + `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_USE_CID_FOR_UNKNOWN_UNICODE` .. py:data:: TEXTFLAGS_SEARCH - `TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_DEHYPHENATE` + `TEXT_PRESERVE_WHITESPACE | TEXT_MEDIABOX_CLIP | TEXT_DEHYPHENATE` .. _linkDest Kinds: @@ -248,37 +321,43 @@ Possible values of :attr:`linkDest.kind` (link destination kind). 0 -- No destination. Indicates a dummy link. - :rtype: int + :type: int .. py:data:: LINK_GOTO 1 -- Points to a place in this document. - :rtype: int + :type: int .. py:data:: LINK_URI 2 -- Points to a URI -- typically a resource specified with internet syntax. + + * PyMuPDF treats any external link that contains a colon and does not start + with `file:`, as `LINK_URI`. - :rtype: int + :type: int .. py:data:: LINK_LAUNCH 3 -- Launch (open) another file (of any "executable" type). + + * |PyMuPDF| treats any external link that starts with `file:` or doesn't + contain a colon, as `LINK_LAUNCH`. - :rtype: int + :type: int .. py:data:: LINK_NAMED 4 -- points to a named location. - :rtype: int + :type: int .. py:data:: LINK_GOTOR 5 -- Points to a place in another PDF document. - :rtype: int + :type: int .. _linkDest Flags: @@ -291,43 +370,43 @@ Link Destination Flags 1 (bit 0) Top left x value is valid - :rtype: bool + :type: bool .. py:data:: LINK_FLAG_T_VALID 2 (bit 1) Top left y value is valid - :rtype: bool + :type: bool .. py:data:: LINK_FLAG_R_VALID 4 (bit 2) Bottom right x value is valid - :rtype: bool + :type: bool .. py:data:: LINK_FLAG_B_VALID 8 (bit 3) Bottom right y value is valid - :rtype: bool + :type: bool .. py:data:: LINK_FLAG_FIT_H 16 (bit 4) Horizontal fit - :rtype: bool + :type: bool .. py:data:: LINK_FLAG_FIT_V 32 (bit 5) Vertical fit - :rtype: bool + :type: bool .. py:data:: LINK_FLAG_R_IS_ZOOM 64 (bit 6) Bottom right x is a zoom figure - :rtype: bool + :type: bool Annotation Related Constants @@ -338,7 +417,7 @@ See chapter 8.4.5, pp. 615 of the :ref:`AdobeManual` for details. Annotation Types ~~~~~~~~~~~~~~~~~ -These identifiers also cover **links** and **widgets**: the PDF specification technically handles them all in the same way, whereas **MuPDF** (and PyMuPDF) treats them as three basically different types of objects. +These identifiers also cover **links** and **widgets**: the PDF specification technically handles them all in the same way, whereas |MuPDF| (and PyMuPDF) treats them as three basically different types of objects. :: diff --git a/docs/version.rst b/docs/version.rst index a9616dc77..f47538726 100644 --- a/docs/version.rst +++ b/docs/version.rst @@ -1,6 +1,15 @@ +.. include:: header.rst + ---- -This documentation covers **PyMuPDF v1.22.5** features as of **2023-06-21 00:00:01**. +This documentation covers PyMuPDF |version|. + +The major and minor versions of |PyMuPDF| and |MuPDF| will always be the same. Only the third qualifier (patch level) may deviate from that of |MuPDF|. + +Typically PyMuPDF is released more frequently than MuPDF so it will often be +the case that the patch level of PyMuPDF will be greater than the embedded +MuPDF. -The major and minor versions of **PyMuPDF** and **MuPDF** will always be the same. Only the third qualifier (patch level) may deviate from that of **MuPDF**. +For example PyMuPDF-1.24.5 contains MuPDF-1.24.2. +Also see `pymupdf_version` and `mupdf_version`. diff --git a/docs/widget.rst b/docs/widget.rst index d08ba0068..381c8c753 100644 --- a/docs/widget.rst +++ b/docs/widget.rst @@ -6,11 +6,15 @@ Widget ================ +|pdf_only_class| + This class represents a PDF Form field, also called a "widget". Throughout this documentation, we are using these terms synonymously. Fields technically are a special case of PDF annotations, which allow users with limited permissions to enter information in a PDF. This is primarily used for filling out forms. Like annotations, widgets live on PDF pages. Similar to annotations, the first widget on a page is accessible via :attr:`Page.first_widget` and subsequent widgets can be accessed via the :attr:`Widget.next` property. -*(Changed in version 1.16.0)* MuPDF no longer treats widgets as a subset of general annotations. Consequently, :attr:`Page.first_annot` and :meth:`Annot.next` will deliver **non-widget annotations exclusively**, and be *None* if only form fields exist on a page. Vice versa, :attr:`Page.first_widget` and :meth:`Widget.next` will only show widgets. This design decision is purely internal to MuPDF; technically, links, annotations and fields have a lot in common and also continue to share the better part of their code within (Py-) MuPDF. +Like annotations, widgets also lose connection to their page when the page becomes unavailable, please see `here `_ for details. This is relevant especially when updating the widget: this will fail if the original page object is no longer available. + +*(Changed in version 1.16.0)* MuPDF no longer treats widgets as a subset of general annotations. Consequently, :attr:`Page.first_annot` and :meth:`Annot.next` will deliver **non-widget annotations exclusively**, and be ``None`` if only form fields exist on a page. Vice versa, :attr:`Page.first_widget` and :meth:`Widget.next` will only show widgets. This design decision is purely internal to MuPDF; technically, links, annotations and fields have a lot in common and also continue to share the better part of their code within (Py-) MuPDF. **Class API** @@ -51,9 +55,11 @@ Like annotations, widgets live on PDF pages. Similar to annotations, the first w True - .. method:: update + .. method:: update(sync_flags=False) - After any changes to a widget, this method **must be used** to store them in the PDF [#f1]_. + After any changes to a widget, this **method must be used** to reflect changes in the PDF [#f1]_. + + :arg bool sync_flags: if ``True``, the widget's :attr:`Widget.field_flags` are copied to the ``Parent`` object (if present) and all widgets named in its ``Kids`` array. This provides a convenient way to -- for example -- set all instances of the widget to read-only, no matter on which page they may occur [#f2]_. .. method:: reset @@ -61,11 +67,11 @@ Like annotations, widgets live on PDF pages. Similar to annotations, the first w .. attribute:: next - Point to the next form field on the page. The last widget returns *None*. + Point to the next form field on the page. The last widget returns ``None``. .. attribute:: border_color - A list of up to 4 floats defining the field's border color. Default value is *None* which causes border style and border width to be ignored. + A list of up to 4 floats defining the field's border color. Default value is ``None`` which causes border style and border width to be ignored. .. attribute:: border_style @@ -117,7 +123,7 @@ Like annotations, widgets live on PDF pages. Similar to annotations, the first w .. attribute:: is_signed - A bool indicating the signing status of a signature field, else *None*. + A bool indicating the signing status of a signature field, else ``None``. .. attribute:: rect @@ -133,7 +139,7 @@ Like annotations, widgets live on PDF pages. Similar to annotations, the first w .. attribute:: text_fontsize - A float defining the text fontsize. Default value is zero, which causes PDF viewer software to dynamically choose a size suitable for the annotation's rectangle and text amount. + A float defining the text :data:`fontsize`. Default value is zero, which causes PDF viewer software to dynamically choose a size suitable for the annotation's rectangle and text amount. .. attribute:: text_maxlen @@ -151,52 +157,52 @@ Like annotations, widgets live on PDF pages. Similar to annotations, the first w * New in version 1.16.12 - JavaScript text (unicode) for an action associated with the widget, or *None*. This is the only script action supported for **button type** widgets. + JavaScript text (unicode) for an action associated with the widget, or ``None``. This is the only script action supported for **button type** widgets. .. attribute:: script_stroke * New in version 1.16.12 - JavaScript text (unicode) to be performed when the user types a key-stroke into a text field or combo box or modifies the selection in a scrollable list box. This action can check the keystroke for validity and reject or modify it. *None* if not present. + JavaScript text (unicode) to be performed when the user types a key-stroke into a text field or combo box or modifies the selection in a scrollable list box. This action can check the keystroke for validity and reject or modify it. ``None`` if not present. .. attribute:: script_format * New in version 1.16.12 - JavaScript text (unicode) to be performed before the field is formatted to display its current value. This action can modify the field’s value before formatting. *None* if not present. + JavaScript text (unicode) to be performed before the field is formatted to display its current value. This action can modify the field’s value before formatting. ``None`` if not present. .. attribute:: script_change * New in version 1.16.12 - JavaScript text (unicode) to be performed when the field’s value is changed. This action can check the new value for validity. *None* if not present. + JavaScript text (unicode) to be performed when the field’s value is changed. This action can check the new value for validity. ``None`` if not present. .. attribute:: script_calc * New in version 1.16.12 - JavaScript text (unicode) to be performed to recalculate the value of this field when that of another field changes. *None* if not present. + JavaScript text (unicode) to be performed to recalculate the value of this field when that of another field changes. ``None`` if not present. .. attribute:: script_blur * New in version 1.22.6 - JavaScript text (unicode) to be performed on losing the focus of this field. *None* if not present. + JavaScript text (unicode) to be performed on losing the focus of this field. ``None`` if not present. .. attribute:: script_focus * New in version 1.22.6 - JavaScript text (unicode) to be performed on focusing this field. *None* if not present. + JavaScript text (unicode) to be performed on focusing this field. ``None`` if not present. .. note:: 1. For **adding** or **changing** one of the above scripts, just put the appropriate JavaScript source code in the widget attribute. - To **remove** a script, set the respective attribute to *None*. + To **remove** a script, set the respective attribute to ``None``. 2. Button fields only support :attr:`script`. - Other script entries will automatically be set to *None*. + Other script entries will automatically be set to ``None``. 3. It is worthwhile to look at `this `_ @@ -211,7 +217,7 @@ Like annotations, widgets live on PDF pages. Similar to annotations, the first w Standard Fonts for Widgets ---------------------------------- -Widgets use their own resources object */DR*. A widget resources object must at least contain a */Font* object. Widget fonts are independent from page fonts. We currently support the 14 PDF base fonts using the following fixed reference names, or any name of an already existing field font. When specifying a text font for new or changed widgets, **either** choose one in the first table column (upper and lower case supported), **or** one of the already existing form fonts. In the latter case, spelling must exactly match. +Widgets use their own resources object ``/DR``. A widget resources object must at least contain a ``/Font`` object. Widget fonts are independent from page fonts. We currently support the 14 PDF base fonts using the following fixed reference names, or any name of an already existing field font. When specifying a text font for new or changed widgets, **either** choose one in the first table column (upper and lower case supported), **or** one of the already existing form fonts. In the latter case, spelling must exactly match. To find out already existing field fonts, inspect the list :attr:`Document.FormFonts`. @@ -234,7 +240,7 @@ TiRo Times-Roman ZaDb ZapfDingbats ============= ======================= -You are generally free to use any font for every widget. However, we recommend using *ZaDb* ("ZapfDingbats") and fontsize 0 for check boxes: typical viewers will put a correctly sized tickmark in the field's rectangle, when it is clicked. +You are generally free to use any font for every widget. However, we recommend using ``ZaDb`` ("ZapfDingbats") and :data:`fontsize` 0 for check boxes: typical viewers will put a correctly sized tickmark in the field's rectangle, when it is clicked. Supported Widget Types ----------------------- @@ -245,11 +251,13 @@ PyMuPDF supports the creation and update of many, but not all widget types. * check box (`PDF_WIDGET_TYPE_CHECKBOX`) * combo box (`PDF_WIDGET_TYPE_COMBOBOX`) * list box (`PDF_WIDGET_TYPE_LISTBOX`) -* radio button (`PDF_WIDGET_TYPE_RADIOBUTTON`): PyMuPDF does not currently support the **creation** of groups of (interconnected) radio buttons, where setting one automatically unsets the other buttons in the group. The widget object also does not reflect the presence of a button group. However: consistently selecting (or unselecting) a radio button is supported. This includes correctly setting the value maintained in the owning button group. Selecting a radio button may be done by either assigning `True` or `field.on_sate()` to the field value. **De-selecting** the button should be done assigning `False`. -* signature (`PDF_WIDGET_TYPE_SIGNATURE`) **read only**. +* radio button (`PDF_WIDGET_TYPE_RADIOBUTTON`): PyMuPDF does not currently support the **creation** of groups of (interconnected) radio buttons, where setting one button automatically unsets the other buttons in the group. The widget object also does not reflect the presence of a button group. However: consistently selecting (or unselecting) a radio button is supported. This includes correctly setting the value maintained in the owning button group. Selecting a radio button may be done by either assigning `True` or `field.on_state()` to the field value. **De-selecting** the button should be done assigning `False`. +* signature (`PDF_WIDGET_TYPE_SIGNATURE`) **read only** -- no update or creation of signatures. .. rubric:: Footnotes .. [#f1] If you intend to re-access a new or updated field (e.g. for making a pixmap), make sure to reload the page first. Either close and re-open the document, or load another page first, or simply do `page = doc.reload_page(page)`. +.. [#f2] Among other purposes, ``Parent`` objects are also used to facilitate multiple occurrences of a field (on the same or on different pages). The ``Kids`` array in this ``Parent`` object contains the cross references of all widgets that are "copies" of the same field. Whenever the field value of any "kid" widget is changed, all the other kids are immediately updated too. This is a very efficient way to handle multiple copies of the same field, e.g. for filling out forms. This simultaneous update only happens for :attr:`Widget.field value`. The new parameter ``sync_flags`` extends this to :attr:`Widget.field_flags`. This cannot be automated in the same way as for the field value to allow for more flexibility. + .. include:: footer.rst diff --git a/docs/xml-class.rst b/docs/xml-class.rst index da65c9b53..bfe1e970c 100644 --- a/docs/xml-class.rst +++ b/docs/xml-class.rst @@ -35,7 +35,7 @@ There is no need to ever directly construct an :ref:`Xml` object: after creating :meth:`~.add_var` Add code text (:htmlTag:`code` tag) - inline element, treated like text. :meth:`~.add_samp` Add code text (:htmlTag:`code` tag) - inline element, treated like text. :meth:`~.add_kbd` Add code text (:htmlTag:`code` tag) - inline element, treated like text. -:meth:`~.add_text` Add a text string. Line breaks `\n` are honored as :htmlTag:`br` tags. +:meth:`~.add_text` Add a text string. Line breaks ``\n`` are honored as :htmlTag:`br` tags. :meth:`~.append_child` Append a child node. :meth:`~.clone` Make a copy if this node. :meth:`~.create_element` Make a new node with a given tag name. @@ -159,7 +159,7 @@ There is no need to ever directly construct an :ref:`Xml` object: after creating .. method:: add_text(text) - Add a text string. Line breaks `\n` are honored as :htmlTag:`br` tags. + Add a text string. Line breaks ``\n`` are honored as :htmlTag:`br` tags. .. method:: set_align(value) @@ -359,7 +359,7 @@ There is no need to ever directly construct an :ref:`Xml` object: after creating .. method:: find_next( tag, att, match) - Continue a previous :meth:`Xml.find` (or ::meth:`find_next`) with the same values. + Continue a previous :meth:`Xml.find` (or :meth:`find_next`) with the same values. :rtype: :ref:`Xml`. :returns: `None` if none more found, otherwise the next matching node. diff --git a/docs/znames.rst b/docs/znames.rst index e822d2f60..3f950fc30 100644 --- a/docs/znames.rst +++ b/docs/znames.rst @@ -21,10 +21,10 @@ Starting with version 1.19.0, we will issue deprecation warnings on `sys.stderr` Starting immediately, all deprecated objects (methods and properties) will show a copy of the original's docstring, **prefixed** with the deprecation message, for example:: - >>> print(fitz.Document.pageCount.__doc__) + >>> print(pymupdf.Document.pageCount.__doc__) *** Deprecated and removed in version following 1.19.0 - use 'page_count'. *** Number of pages. - >>> print(fitz.Document.newPage.__doc__) + >>> print(pymupdf.Document.newPage.__doc__) *** Deprecated and removed in version following 1.19.0 - use 'new_page'. *** Create and return a new page object. @@ -38,10 +38,5 @@ Starting immediately, all deprecated objects (methods and properties) will show There is a utility script `alias-changer.py `_ which can be used to do mass-renames in your scripts. It accepts either a single file or a folder as argument. If a folder is supplied, all its Python files and those of its subfolders are changed. Optionally, backups of the scripts can be taken. -Deprecated names are not separately documented. The following list will help you find the documentation of the original. - -.. note:: This is automatically generated. One or two items refer to yet undocumented methods - please simply ignore them. - -.. include:: deprecated.rst .. include:: footer.rst diff --git a/fitz/__init__.py b/fitz/__init__.py deleted file mode 100644 index 090233e1f..000000000 --- a/fitz/__init__.py +++ /dev/null @@ -1,509 +0,0 @@ -# ------------------------------------------------------------------------ -# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com -# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html -# -# Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a -# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is -# maintained and developed by Artifex Software, Inc. https://artifex.com. -# ------------------------------------------------------------------------ -import sys - -import glob -import os -if os.path.exists( 'fitz/__init__.py'): - if not glob.glob( 'fitz/_fitz*'): - print( '#' * 40) - print( '# Warning: current directory appears to contain an incomplete') - print( '# fitz/ installation directory so "import fitz" may fail.') - print( '# This can happen if current directory is a PyMuPDF source tree.') - print( '# Suggest changing to a different current directory.') - print( '#' * 40) - -from fitz.fitz import * - -# define the supported colorspaces for convenience -fitz.csRGB = fitz.Colorspace(fitz.CS_RGB) -fitz.csGRAY = fitz.Colorspace(fitz.CS_GRAY) -fitz.csCMYK = fitz.Colorspace(fitz.CS_CMYK) -csRGB = fitz.csRGB -csGRAY = fitz.csGRAY -csCMYK = fitz.csCMYK - -# create the TOOLS object. -# -# Unfortunately it seems that this is never be destructed even if we use an -# atexit() handler, which makes MuPDF's Memento list it as a leak. In fitz.i -# we use Memento_startLeaking()/Memento_stopLeaking() when allocating -# the Tools instance so at least the leak is marked as known. -# -TOOLS = fitz.Tools() -TOOLS.thisown = True -fitz.TOOLS = TOOLS - -# This atexit handler runs, but doesn't cause ~Tools() to be run. -# -import atexit - - -def cleanup_tools(TOOLS): - # print(f'cleanup_tools: TOOLS={TOOLS} id(TOOLS)={id(TOOLS)}') - # print(f'TOOLS.thisown={TOOLS.thisown}') - del TOOLS - del fitz.TOOLS - - -atexit.register(cleanup_tools, TOOLS) - - -# Require that MuPDF matches fitz.TOOLS.mupdf_version(); also allow use with -# next minor version (e.g. 1.21.2 => 1.22), so we can test with mupdf master. -# -def v_str_to_tuple(s): - return tuple(map(int, s.split('.'))) - -def v_tuple_to_string(t): - return '.'.join(map(str, t)) - -mupdf_version_tuple = v_str_to_tuple(fitz.TOOLS.mupdf_version()) -mupdf_version_tuple_required = v_str_to_tuple(fitz.VersionFitz) -mupdf_version_tuple_required_prev = (mupdf_version_tuple_required[0], mupdf_version_tuple_required[1]-1) -mupdf_version_tuple_required_next = (mupdf_version_tuple_required[0], mupdf_version_tuple_required[1]+1) - -if mupdf_version_tuple[:2] not in ( - mupdf_version_tuple_required_prev[:2], - mupdf_version_tuple_required[:2], - mupdf_version_tuple_required_next[:2], - ): - raise ValueError( - f'MuPDF library {v_tuple_to_string(mupdf_version_tuple)!r} mismatch:' - f' require' - f' {v_tuple_to_string(mupdf_version_tuple_required_prev)!r}' - f' or {v_tuple_to_string(mupdf_version_tuple_required)!r}' - f' or {v_tuple_to_string(mupdf_version_tuple_required_next)!r}' - f'.' - ) - -# copy functions in 'utils' to their respective fitz classes -import fitz.utils - -# ------------------------------------------------------------------------------ -# General -# ------------------------------------------------------------------------------ -fitz.recover_quad = fitz.utils.recover_quad -fitz.recover_bbox_quad = fitz.utils.recover_bbox_quad -fitz.recover_line_quad = fitz.utils.recover_line_quad -fitz.recover_span_quad = fitz.utils.recover_span_quad -fitz.recover_char_quad = fitz.utils.recover_char_quad - -# ------------------------------------------------------------------------------ -# Document -# ------------------------------------------------------------------------------ -fitz.open = fitz.Document -fitz.Document._do_links = fitz.utils.do_links -fitz.Document.del_toc_item = fitz.utils.del_toc_item -fitz.Document.get_char_widths = fitz.utils.get_char_widths -fitz.Document.get_ocmd = fitz.utils.get_ocmd -fitz.Document.get_page_labels = fitz.utils.get_page_labels -fitz.Document.get_page_numbers = fitz.utils.get_page_numbers -fitz.Document.get_page_pixmap = fitz.utils.get_page_pixmap -fitz.Document.get_page_text = fitz.utils.get_page_text -fitz.Document.get_toc = fitz.utils.get_toc -fitz.Document.has_annots = fitz.utils.has_annots -fitz.Document.has_links = fitz.utils.has_links -fitz.Document.insert_page = fitz.utils.insert_page -fitz.Document.new_page = fitz.utils.new_page -fitz.Document.scrub = fitz.utils.scrub -fitz.Document.search_page_for = fitz.utils.search_page_for -fitz.Document.set_metadata = fitz.utils.set_metadata -fitz.Document.set_ocmd = fitz.utils.set_ocmd -fitz.Document.set_page_labels = fitz.utils.set_page_labels -fitz.Document.set_toc = fitz.utils.set_toc -fitz.Document.set_toc_item = fitz.utils.set_toc_item -fitz.Document.tobytes = fitz.Document.write -fitz.Document.subset_fonts = fitz.utils.subset_fonts -fitz.Document.get_oc = fitz.utils.get_oc -fitz.Document.set_oc = fitz.utils.set_oc -fitz.Document.xref_copy = fitz.utils.xref_copy - - -# ------------------------------------------------------------------------------ -# Page -# ------------------------------------------------------------------------------ -fitz.Page.apply_redactions = fitz.utils.apply_redactions -fitz.Page.delete_widget = fitz.utils.delete_widget -fitz.Page.draw_bezier = fitz.utils.draw_bezier -fitz.Page.draw_circle = fitz.utils.draw_circle -fitz.Page.draw_curve = fitz.utils.draw_curve -fitz.Page.draw_line = fitz.utils.draw_line -fitz.Page.draw_oval = fitz.utils.draw_oval -fitz.Page.draw_polyline = fitz.utils.draw_polyline -fitz.Page.draw_quad = fitz.utils.draw_quad -fitz.Page.draw_rect = fitz.utils.draw_rect -fitz.Page.draw_sector = fitz.utils.draw_sector -fitz.Page.draw_squiggle = fitz.utils.draw_squiggle -fitz.Page.draw_zigzag = fitz.utils.draw_zigzag -fitz.Page.get_links = fitz.utils.get_links -fitz.Page.get_pixmap = fitz.utils.get_pixmap -fitz.Page.get_text = fitz.utils.get_text -fitz.Page.get_image_info = fitz.utils.get_image_info -fitz.Page.get_text_blocks = fitz.utils.get_text_blocks -fitz.Page.get_text_selection = fitz.utils.get_text_selection -fitz.Page.get_text_words = fitz.utils.get_text_words -fitz.Page.get_textbox = fitz.utils.get_textbox -fitz.Page.insert_image = fitz.utils.insert_image -fitz.Page.insert_link = fitz.utils.insert_link -fitz.Page.insert_text = fitz.utils.insert_text -fitz.Page.insert_textbox = fitz.utils.insert_textbox -fitz.Page.new_shape = lambda x: fitz.utils.Shape(x) -fitz.Page.search_for = fitz.utils.search_for -fitz.Page.show_pdf_page = fitz.utils.show_pdf_page -fitz.Page.update_link = fitz.utils.update_link -fitz.Page.write_text = fitz.utils.write_text -fitz.Page.get_label = fitz.utils.get_label -fitz.Page.get_image_rects = fitz.utils.get_image_rects -fitz.Page.get_textpage_ocr = fitz.utils.get_textpage_ocr -fitz.Page.delete_image = fitz.utils.delete_image -fitz.Page.replace_image = fitz.utils.replace_image - -# ------------------------------------------------------------------------ -# Annot -# ------------------------------------------------------------------------ -fitz.Annot.get_text = fitz.utils.get_text -fitz.Annot.get_textbox = fitz.utils.get_textbox - -# ------------------------------------------------------------------------ -# Rect and IRect -# ------------------------------------------------------------------------ -fitz.Rect.get_area = fitz.utils.get_area -fitz.IRect.get_area = fitz.utils.get_area - -# ------------------------------------------------------------------------ -# TextWriter -# ------------------------------------------------------------------------ -fitz.TextWriter.fill_textbox = fitz.utils.fill_textbox - - -class FitzDeprecation(DeprecationWarning): - pass - - -def restore_aliases(): - import warnings - - warnings.filterwarnings( - "once", - category=FitzDeprecation, - ) - - def showthis(msg, cat, filename, lineno, file=None, line=None): - text = warnings.formatwarning(msg, cat, filename, lineno, line=line) - s = text.find("FitzDeprecation") - if s < 0: - print(text, file=sys.stderr) - return - text = text[s:].splitlines()[0][4:] - print(text, file=sys.stderr) - - warnings.showwarning = showthis - - def _alias(fitz_class, old, new): - fname = getattr(fitz_class, new) - r = str(fitz_class)[1:-1] - objname = " ".join(r.split()[:2]) - objname = objname.replace("fitz.fitz.", "") - objname = objname.replace("fitz.utils.", "") - if callable(fname): - - def deprecated_function(*args, **kw): - msg = "'%s' removed from %s after v1.19 - use '%s'." % ( - old, - objname, - new, - ) - if not VersionBind.startswith("1.18"): - warnings.warn(msg, category=FitzDeprecation) - return fname(*args, **kw) - - setattr(fitz_class, old, deprecated_function) - else: - if type(fname) is property: - setattr(fitz_class, old, property(fname.fget)) - else: - setattr(fitz_class, old, fname) - - eigen = getattr(fitz_class, old) - x = fname.__doc__ - if not x: - x = "" - try: - if callable(fname) or type(fname) is property: - eigen.__doc__ = ( - "*** Deprecated and removed after v1.19 - use '%s'. ***\n" % new + x - ) - except: - pass - - # deprecated Document aliases - _alias(fitz.Document, "chapterCount", "chapter_count") - _alias(fitz.Document, "chapterPageCount", "chapter_page_count") - _alias(fitz.Document, "convertToPDF", "convert_to_pdf") - _alias(fitz.Document, "copyPage", "copy_page") - _alias(fitz.Document, "deletePage", "delete_page") - _alias(fitz.Document, "deletePageRange", "delete_pages") - _alias(fitz.Document, "embeddedFileAdd", "embfile_add") - _alias(fitz.Document, "embeddedFileCount", "embfile_count") - _alias(fitz.Document, "embeddedFileDel", "embfile_del") - _alias(fitz.Document, "embeddedFileGet", "embfile_get") - _alias(fitz.Document, "embeddedFileInfo", "embfile_info") - _alias(fitz.Document, "embeddedFileNames", "embfile_names") - _alias(fitz.Document, "embeddedFileUpd", "embfile_upd") - _alias(fitz.Document, "extractFont", "extract_font") - _alias(fitz.Document, "extractImage", "extract_image") - _alias(fitz.Document, "findBookmark", "find_bookmark") - _alias(fitz.Document, "fullcopyPage", "fullcopy_page") - _alias(fitz.Document, "getCharWidths", "get_char_widths") - _alias(fitz.Document, "getOCGs", "get_ocgs") - _alias(fitz.Document, "getPageFontList", "get_page_fonts") - _alias(fitz.Document, "getPageImageList", "get_page_images") - _alias(fitz.Document, "getPagePixmap", "get_page_pixmap") - _alias(fitz.Document, "getPageText", "get_page_text") - _alias(fitz.Document, "getPageXObjectList", "get_page_xobjects") - _alias(fitz.Document, "getSigFlags", "get_sigflags") - _alias(fitz.Document, "getToC", "get_toc") - _alias(fitz.Document, "getXmlMetadata", "get_xml_metadata") - _alias(fitz.Document, "insertPage", "insert_page") - _alias(fitz.Document, "insertPDF", "insert_pdf") - _alias(fitz.Document, "isDirty", "is_dirty") - _alias(fitz.Document, "isFormPDF", "is_form_pdf") - _alias(fitz.Document, "isPDF", "is_pdf") - _alias(fitz.Document, "isReflowable", "is_reflowable") - _alias(fitz.Document, "isRepaired", "is_repaired") - _alias(fitz.Document, "isStream", "xref_is_stream") - _alias(fitz.Document, "is_stream", "xref_is_stream") - _alias(fitz.Document, "lastLocation", "last_location") - _alias(fitz.Document, "loadPage", "load_page") - _alias(fitz.Document, "makeBookmark", "make_bookmark") - _alias(fitz.Document, "metadataXML", "xref_xml_metadata") - _alias(fitz.Document, "movePage", "move_page") - _alias(fitz.Document, "needsPass", "needs_pass") - _alias(fitz.Document, "newPage", "new_page") - _alias(fitz.Document, "nextLocation", "next_location") - _alias(fitz.Document, "pageCount", "page_count") - _alias(fitz.Document, "pageCropBox", "page_cropbox") - _alias(fitz.Document, "pageXref", "page_xref") - _alias(fitz.Document, "PDFCatalog", "pdf_catalog") - _alias(fitz.Document, "PDFTrailer", "pdf_trailer") - _alias(fitz.Document, "previousLocation", "prev_location") - _alias(fitz.Document, "resolveLink", "resolve_link") - _alias(fitz.Document, "searchPageFor", "search_page_for") - _alias(fitz.Document, "setLanguage", "set_language") - _alias(fitz.Document, "setMetadata", "set_metadata") - _alias(fitz.Document, "setToC", "set_toc") - _alias(fitz.Document, "setXmlMetadata", "set_xml_metadata") - _alias(fitz.Document, "updateObject", "update_object") - _alias(fitz.Document, "updateStream", "update_stream") - _alias(fitz.Document, "xrefLength", "xref_length") - _alias(fitz.Document, "xrefObject", "xref_object") - _alias(fitz.Document, "xrefStream", "xref_stream") - _alias(fitz.Document, "xrefStreamRaw", "xref_stream_raw") - - # deprecated Page aliases - _alias(fitz.Page, "_isWrapped", "is_wrapped") - _alias(fitz.Page, "addCaretAnnot", "add_caret_annot") - _alias(fitz.Page, "addCircleAnnot", "add_circle_annot") - _alias(fitz.Page, "addFileAnnot", "add_file_annot") - _alias(fitz.Page, "addFreetextAnnot", "add_freetext_annot") - _alias(fitz.Page, "addHighlightAnnot", "add_highlight_annot") - _alias(fitz.Page, "addInkAnnot", "add_ink_annot") - _alias(fitz.Page, "addLineAnnot", "add_line_annot") - _alias(fitz.Page, "addPolygonAnnot", "add_polygon_annot") - _alias(fitz.Page, "addPolylineAnnot", "add_polyline_annot") - _alias(fitz.Page, "addRectAnnot", "add_rect_annot") - _alias(fitz.Page, "addRedactAnnot", "add_redact_annot") - _alias(fitz.Page, "addSquigglyAnnot", "add_squiggly_annot") - _alias(fitz.Page, "addStampAnnot", "add_stamp_annot") - _alias(fitz.Page, "addStrikeoutAnnot", "add_strikeout_annot") - _alias(fitz.Page, "addTextAnnot", "add_text_annot") - _alias(fitz.Page, "addUnderlineAnnot", "add_underline_annot") - _alias(fitz.Page, "addWidget", "add_widget") - _alias(fitz.Page, "cleanContents", "clean_contents") - _alias(fitz.Page, "CropBox", "cropbox") - _alias(fitz.Page, "CropBoxPosition", "cropbox_position") - _alias(fitz.Page, "deleteAnnot", "delete_annot") - _alias(fitz.Page, "deleteLink", "delete_link") - _alias(fitz.Page, "deleteWidget", "delete_widget") - _alias(fitz.Page, "derotationMatrix", "derotation_matrix") - _alias(fitz.Page, "drawBezier", "draw_bezier") - _alias(fitz.Page, "drawCircle", "draw_circle") - _alias(fitz.Page, "drawCurve", "draw_curve") - _alias(fitz.Page, "drawLine", "draw_line") - _alias(fitz.Page, "drawOval", "draw_oval") - _alias(fitz.Page, "drawPolyline", "draw_polyline") - _alias(fitz.Page, "drawQuad", "draw_quad") - _alias(fitz.Page, "drawRect", "draw_rect") - _alias(fitz.Page, "drawSector", "draw_sector") - _alias(fitz.Page, "drawSquiggle", "draw_squiggle") - _alias(fitz.Page, "drawZigzag", "draw_zigzag") - _alias(fitz.Page, "firstAnnot", "first_annot") - _alias(fitz.Page, "firstLink", "first_link") - _alias(fitz.Page, "firstWidget", "first_widget") - _alias(fitz.Page, "getContents", "get_contents") - _alias(fitz.Page, "getDisplayList", "get_displaylist") - _alias(fitz.Page, "getDrawings", "get_drawings") - _alias(fitz.Page, "getFontList", "get_fonts") - _alias(fitz.Page, "getImageBbox", "get_image_bbox") - _alias(fitz.Page, "getImageList", "get_images") - _alias(fitz.Page, "getLinks", "get_links") - _alias(fitz.Page, "getPixmap", "get_pixmap") - _alias(fitz.Page, "getSVGimage", "get_svg_image") - _alias(fitz.Page, "getText", "get_text") - _alias(fitz.Page, "getTextBlocks", "get_text_blocks") - _alias(fitz.Page, "getTextbox", "get_textbox") - _alias(fitz.Page, "getTextPage", "get_textpage") - _alias(fitz.Page, "getTextWords", "get_text_words") - _alias(fitz.Page, "insertFont", "insert_font") - _alias(fitz.Page, "insertImage", "insert_image") - _alias(fitz.Page, "insertLink", "insert_link") - _alias(fitz.Page, "insertText", "insert_text") - _alias(fitz.Page, "insertTextbox", "insert_textbox") - _alias(fitz.Page, "loadAnnot", "load_annot") - _alias(fitz.Page, "loadLinks", "load_links") - _alias(fitz.Page, "MediaBox", "mediabox") - _alias(fitz.Page, "MediaBoxSize", "mediabox_size") - _alias(fitz.Page, "newShape", "new_shape") - _alias(fitz.Page, "readContents", "read_contents") - _alias(fitz.Page, "rotationMatrix", "rotation_matrix") - _alias(fitz.Page, "searchFor", "search_for") - _alias(fitz.Page, "setCropBox", "set_cropbox") - _alias(fitz.Page, "setMediaBox", "set_mediabox") - _alias(fitz.Page, "setRotation", "set_rotation") - _alias(fitz.Page, "showPDFpage", "show_pdf_page") - _alias(fitz.Page, "transformationMatrix", "transformation_matrix") - _alias(fitz.Page, "updateLink", "update_link") - _alias(fitz.Page, "wrapContents", "wrap_contents") - _alias(fitz.Page, "writeText", "write_text") - - # deprecated Shape aliases - _alias(fitz.utils.Shape, "drawBezier", "draw_bezier") - _alias(fitz.utils.Shape, "drawCircle", "draw_circle") - _alias(fitz.utils.Shape, "drawCurve", "draw_curve") - _alias(fitz.utils.Shape, "drawLine", "draw_line") - _alias(fitz.utils.Shape, "drawOval", "draw_oval") - _alias(fitz.utils.Shape, "drawPolyline", "draw_polyline") - _alias(fitz.utils.Shape, "drawQuad", "draw_quad") - _alias(fitz.utils.Shape, "drawRect", "draw_rect") - _alias(fitz.utils.Shape, "drawSector", "draw_sector") - _alias(fitz.utils.Shape, "drawSquiggle", "draw_squiggle") - _alias(fitz.utils.Shape, "drawZigzag", "draw_zigzag") - _alias(fitz.utils.Shape, "insertText", "insert_text") - _alias(fitz.utils.Shape, "insertTextbox", "insert_textbox") - - # deprecated Annot aliases - _alias(fitz.Annot, "getText", "get_text") - _alias(fitz.Annot, "getTextbox", "get_textbox") - _alias(fitz.Annot, "fileGet", "get_file") - _alias(fitz.Annot, "fileUpd", "update_file") - _alias(fitz.Annot, "getPixmap", "get_pixmap") - _alias(fitz.Annot, "getTextPage", "get_textpage") - _alias(fitz.Annot, "lineEnds", "line_ends") - _alias(fitz.Annot, "setBlendMode", "set_blendmode") - _alias(fitz.Annot, "setBorder", "set_border") - _alias(fitz.Annot, "setColors", "set_colors") - _alias(fitz.Annot, "setFlags", "set_flags") - _alias(fitz.Annot, "setInfo", "set_info") - _alias(fitz.Annot, "setLineEnds", "set_line_ends") - _alias(fitz.Annot, "setName", "set_name") - _alias(fitz.Annot, "setOpacity", "set_opacity") - _alias(fitz.Annot, "setRect", "set_rect") - _alias(fitz.Annot, "setOC", "set_oc") - _alias(fitz.Annot, "soundGet", "get_sound") - - # deprecated TextWriter aliases - _alias(fitz.TextWriter, "writeText", "write_text") - _alias(fitz.TextWriter, "fillTextbox", "fill_textbox") - - # deprecated DisplayList aliases - _alias(fitz.DisplayList, "getPixmap", "get_pixmap") - _alias(fitz.DisplayList, "getTextPage", "get_textpage") - - # deprecated Pixmap aliases - _alias(fitz.Pixmap, "setAlpha", "set_alpha") - _alias(fitz.Pixmap, "gammaWith", "gamma_with") - _alias(fitz.Pixmap, "tintWith", "tint_with") - _alias(fitz.Pixmap, "clearWith", "clear_with") - _alias(fitz.Pixmap, "copyPixmap", "copy") - _alias(fitz.Pixmap, "getImageData", "tobytes") - _alias(fitz.Pixmap, "getPNGData", "tobytes") - _alias(fitz.Pixmap, "getPNGdata", "tobytes") - _alias(fitz.Pixmap, "writeImage", "save") - _alias(fitz.Pixmap, "writePNG", "save") - _alias(fitz.Pixmap, "pillowWrite", "pil_save") - _alias(fitz.Pixmap, "pillowData", "pil_tobytes") - _alias(fitz.Pixmap, "invertIRect", "invert_irect") - _alias(fitz.Pixmap, "setPixel", "set_pixel") - _alias(fitz.Pixmap, "setOrigin", "set_origin") - _alias(fitz.Pixmap, "setRect", "set_rect") - _alias(fitz.Pixmap, "setResolution", "set_dpi") - - # deprecated geometry aliases - _alias(fitz.Rect, "getArea", "get_area") - _alias(fitz.IRect, "getArea", "get_area") - _alias(fitz.Rect, "getRectArea", "get_area") - _alias(fitz.IRect, "getRectArea", "get_area") - _alias(fitz.Rect, "includePoint", "include_point") - _alias(fitz.IRect, "includePoint", "include_point") - _alias(fitz.Rect, "includeRect", "include_rect") - _alias(fitz.IRect, "includeRect", "include_rect") - _alias(fitz.Rect, "isInfinite", "is_infinite") - _alias(fitz.IRect, "isInfinite", "is_infinite") - _alias(fitz.Rect, "isEmpty", "is_empty") - _alias(fitz.IRect, "isEmpty", "is_empty") - _alias(fitz.Quad, "isEmpty", "is_empty") - _alias(fitz.Quad, "isRectangular", "is_rectangular") - _alias(fitz.Quad, "isConvex", "is_convex") - _alias(fitz.Matrix, "isRectilinear", "is_rectilinear") - _alias(fitz.Matrix, "preRotate", "prerotate") - _alias(fitz.Matrix, "preScale", "prescale") - _alias(fitz.Matrix, "preShear", "preshear") - _alias(fitz.Matrix, "preTranslate", "pretranslate") - - # deprecated other aliases - _alias(fitz.Outline, "isExternal", "is_external") - _alias(fitz.Outline, "isOpen", "is_open") - _alias(fitz.Link, "isExternal", "is_external") - _alias(fitz.Link, "setBorder", "set_border") - _alias(fitz.Link, "setColors", "set_colors") - _alias(fitz, "getPDFstr", "get_pdf_str") - _alias(fitz, "getPDFnow", "get_pdf_now") - _alias(fitz, "PaperSize", "paper_size") - _alias(fitz, "PaperRect", "paper_rect") - _alias(fitz, "paperSizes", "paper_sizes") - _alias(fitz, "ImageProperties", "image_profile") - _alias(fitz, "planishLine", "planish_line") - _alias(fitz, "getTextLength", "get_text_length") - _alias(fitz, "getTextlength", "get_text_length") - - -fitz.__doc__ = """ -PyMuPDF %s: Python bindings for the MuPDF %s library. -Version date: %s. -Built for Python %i.%i on %s (%i-bit). -""" % ( - fitz.VersionBind, - fitz.VersionFitz, - fitz.VersionDate, - sys.version_info[0], - sys.version_info[1], - sys.platform, - 64 if sys.maxsize > 2**32 else 32, -) - -if VersionBind.startswith("1.19"): # don't generate aliases after v1.19.* - restore_aliases() - -pdfcolor = dict( - [ - (k, (r / 255, g / 255, b / 255)) - for k, (r, g, b) in fitz.utils.getColorInfoDict().items() - ] -) diff --git a/fitz/helper-select.i b/fitz/helper-select.i deleted file mode 100644 index 33400dfb2..000000000 --- a/fitz/helper-select.i +++ /dev/null @@ -1,394 +0,0 @@ -%{ -/* -# ------------------------------------------------------------------------ -# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com -# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html -# -# Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a -# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is -# maintained and developed by Artifex Software, Inc. https://artifex.com. -# ------------------------------------------------------------------------ -*/ -//---------------------------------------------------------------------------- -// Helpers for document page selection - main logic was imported -// from pdf_clean_file.c. But instead of analyzing a string-based spec of -// selected pages, we accept a Python sequence. -//---------------------------------------------------------------------------- -typedef struct globals_s -{ - pdf_document *doc; - fz_context *ctx; -} globals; - -int string_in_names_list(fz_context *ctx, pdf_obj *p, pdf_obj *names_list) -{ - int n = pdf_array_len(ctx, names_list); - int i; - const char *str = pdf_to_text_string(ctx, p); - - for (i = 0; i < n ; i += 2) - { - if (!strcmp(pdf_to_text_string(ctx, pdf_array_get(ctx, names_list, i)), str)) - return 1; - } - return 0; -} - -//---------------------------------------------------------------------------- -// Recreate page tree to only retain specified pages. -//---------------------------------------------------------------------------- -void retainpage(fz_context *ctx, pdf_document *doc, pdf_obj *parent, pdf_obj *kids, int page) -{ - pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, page); - - pdf_flatten_inheritable_page_items(ctx, pageref); - - pdf_dict_put(ctx, pageref, PDF_NAME(Parent), parent); - - /* Store page object in new kids array */ - pdf_array_push(ctx, kids, pageref); -} - -int dest_is_valid_page(fz_context *ctx, pdf_obj *obj, int *page_object_nums, int pagecount) -{ - int i; - int num = pdf_to_num(ctx, obj); - - if (num == 0) - return 0; - for (i = 0; i < pagecount; i++) - { - if (page_object_nums[i] == num) - return 1; - } - return 0; -} - -int dest_is_valid(fz_context *ctx, pdf_obj *o, int page_count, int *page_object_nums, pdf_obj *names_list) -{ - pdf_obj *p; - - p = pdf_dict_get(ctx, o, PDF_NAME(A)); - if (pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME(S)), PDF_NAME(GoTo)) && - !string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME(D)), names_list)) - return 0; - - p = pdf_dict_get(ctx, o, PDF_NAME(Dest)); - if (p == NULL) - {} - else if (pdf_is_string(ctx, p)) - { - return string_in_names_list(ctx, p, names_list); - } - else if (!dest_is_valid_page(ctx, pdf_array_get(ctx, p, 0), page_object_nums, page_count)) - return 0; - - return 1; -} - -int strip_outlines(fz_context *ctx, pdf_document *doc, pdf_obj *outlines, int page_count, int *page_object_nums, pdf_obj *names_list); - -int strip_outline(fz_context *ctx, pdf_document *doc, pdf_obj *outlines, int page_count, int *page_object_nums, pdf_obj *names_list, pdf_obj **pfirst, pdf_obj **plast) -{ - pdf_obj *prev = NULL; - pdf_obj *first = NULL; - pdf_obj *current; - int count = 0; - - for (current = outlines; current != NULL; ) - { - int nc; - - /*********************************************************************/ - // Strip any children to start with. This takes care of - // First / Last / Count for us. - /*********************************************************************/ - nc = strip_outlines(ctx, doc, current, page_count, page_object_nums, names_list); - - if (!dest_is_valid(ctx, current, page_count, page_object_nums, names_list)) - { - if (nc == 0) - { - /*************************************************************/ - // Outline with invalid dest and no children. Drop it by - // pulling the next one in here. - /*************************************************************/ - pdf_obj *next = pdf_dict_get(ctx, current, PDF_NAME(Next)); - if (next == NULL) - { - // There is no next one to pull in - if (prev != NULL) - pdf_dict_del(ctx, prev, PDF_NAME(Next)); - } - else if (prev != NULL) - { - pdf_dict_put(ctx, prev, PDF_NAME(Next), next); - pdf_dict_put(ctx, next, PDF_NAME(Prev), prev); - } - else - { - pdf_dict_del(ctx, next, PDF_NAME(Prev)); - } - current = next; - } - else - { - // Outline with invalid dest, but children. Just drop the dest. - pdf_dict_del(ctx, current, PDF_NAME(Dest)); - pdf_dict_del(ctx, current, PDF_NAME(A)); - current = pdf_dict_get(ctx, current, PDF_NAME(Next)); - } - } - else - { - // Keep this one - if (first == NULL) - first = current; - prev = current; - current = pdf_dict_get(ctx, current, PDF_NAME(Next)); - count++; - } - } - - *pfirst = first; - *plast = prev; - - return count; -} - -int strip_outlines(fz_context *ctx, pdf_document *doc, pdf_obj *outlines, int page_count, int *page_object_nums, pdf_obj *names_list) -{ - int nc; - pdf_obj *first; - pdf_obj *last; - - if (outlines == NULL) - return 0; - - first = pdf_dict_get(ctx, outlines, PDF_NAME(First)); - if (first == NULL) - nc = 0; - else - nc = strip_outline(ctx, doc, first, page_count, page_object_nums, - names_list, &first, &last); - - if (nc == 0) - { - pdf_dict_del(ctx, outlines, PDF_NAME(First)); - pdf_dict_del(ctx, outlines, PDF_NAME(Last)); - pdf_dict_del(ctx, outlines, PDF_NAME(Count)); - } - else - { - int old_count = pdf_to_int(ctx, pdf_dict_get(ctx, outlines, PDF_NAME(Count))); - pdf_dict_put(ctx, outlines, PDF_NAME(First), first); - pdf_dict_put(ctx, outlines, PDF_NAME(Last), last); - pdf_dict_put_drop(ctx, outlines, PDF_NAME(Count), pdf_new_int(ctx, old_count > 0 ? nc : -nc)); - } - return nc; -} - -//---------------------------------------------------------------------------- -// This is called by PyMuPDF: -// liste = page numbers to retain -//---------------------------------------------------------------------------- -void retainpages(fz_context *ctx, globals *glo, PyObject *liste) -{ - pdf_obj *oldroot, *root, *pages, *kids, *countobj, *olddests; - Py_ssize_t argc = PySequence_Size(liste); - pdf_document *doc = glo->doc; - pdf_obj *names_list = NULL; - pdf_obj *outlines; - pdf_obj *ocproperties; - int pagecount = pdf_count_pages(ctx, doc); - - int i; - int *page_object_nums; - -/******************************************************************************/ -// Keep only pages/type and (reduced) dest entries to avoid -// references to dropped pages -/******************************************************************************/ - oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); - pages = pdf_dict_get(ctx, oldroot, PDF_NAME(Pages)); - olddests = pdf_load_name_tree(ctx, doc, PDF_NAME(Dests)); - outlines = pdf_dict_get(ctx, oldroot, PDF_NAME(Outlines)); - ocproperties = pdf_dict_get(ctx, oldroot, PDF_NAME(OCProperties)); - - root = pdf_new_dict(ctx, doc, 3); - pdf_dict_put(ctx, root, PDF_NAME(Type), pdf_dict_get(ctx, oldroot, PDF_NAME(Type))); - pdf_dict_put(ctx, root, PDF_NAME(Pages), pdf_dict_get(ctx, oldroot, PDF_NAME(Pages))); - if (outlines) - pdf_dict_put(ctx, root, PDF_NAME(Outlines), outlines); - if (ocproperties) - pdf_dict_put(ctx, root, PDF_NAME(OCProperties), ocproperties); - - pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); - - // Create a new kids array with only the pages we want to keep - kids = pdf_new_array(ctx, doc, 1); - - // Retain pages specified - Py_ssize_t page; - fz_try(ctx) { - for (page = 0; page < argc; page++) { - i = (int) PyInt_AsLong(PySequence_ITEM(liste, page)); - if (i < 0 || i >= pagecount) { - RAISEPY(ctx, MSG_BAD_PAGENO, PyExc_ValueError); - } - retainpage(ctx, doc, pages, kids, i); - } - } - fz_catch(ctx) { - fz_rethrow(ctx); - } - - // Update page count and kids array - countobj = pdf_new_int(ctx, pdf_array_len(ctx, kids)); - pdf_dict_put_drop(ctx, pages, PDF_NAME(Count), countobj); - pdf_dict_put_drop(ctx, pages, PDF_NAME(Kids), kids); - - pagecount = pdf_count_pages(ctx, doc); - page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums)); - for (i = 0; i < pagecount; i++) - { - pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); - page_object_nums[i] = pdf_to_num(ctx, pageref); - } - -/******************************************************************************/ -// If we had an old Dests tree (now reformed as an olddests dictionary), -// keep any entries in there that point to valid pages. -// This may mean we keep more than we need, but it is safe at least. -/******************************************************************************/ - if (olddests) - { - pdf_obj *names = pdf_new_dict(ctx, doc, 1); - pdf_obj *dests = pdf_new_dict(ctx, doc, 1); - int len = pdf_dict_len(ctx, olddests); - - names_list = pdf_new_array(ctx, doc, 32); - - for (i = 0; i < len; i++) - { - pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); - pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); - pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME(D)); - - dest = pdf_array_get(ctx, dest ? dest : val, 0); - if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount)) - { - pdf_obj *key_str = pdf_new_string(ctx, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); - pdf_array_push_drop(ctx, names_list, key_str); - pdf_array_push(ctx, names_list, val); - } - } - - pdf_dict_put(ctx, dests, PDF_NAME(Names), names_list); - pdf_dict_put(ctx, names, PDF_NAME(Dests), dests); - pdf_dict_put(ctx, root, PDF_NAME(Names), names); - - pdf_drop_obj(ctx, names); - pdf_drop_obj(ctx, dests); - pdf_drop_obj(ctx, olddests); - } - -/*****************************************************************************/ -// Edit each pages /Annot list to remove any links pointing to nowhere. -/*****************************************************************************/ - for (i = 0; i < pagecount; i++) - { - pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); - - pdf_obj *annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots)); - - int len = pdf_array_len(ctx, annots); - int j; - - for (j = 0; j < len; j++) - { - pdf_obj *o = pdf_array_get(ctx, annots, j); - - if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME(Subtype)), PDF_NAME(Link))) - continue; - - if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list)) - { - // Remove this annotation - pdf_array_delete(ctx, annots, j); - len--; - j--; - } - } - } - - if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0) - { - pdf_dict_del(ctx, root, PDF_NAME(Outlines)); - } - - fz_free(ctx, page_object_nums); - pdf_drop_obj(ctx, names_list); - pdf_drop_obj(ctx, root); -} - -void remove_dest_range(fz_context *ctx, pdf_document *pdf, PyObject *numbers) -{ - fz_try(ctx) { - int i, j, pno, len, pagecount = pdf_count_pages(ctx, pdf); - PyObject *n1 = NULL; - pdf_obj *target, *annots, *pageref, *o, *action, *dest; - for (i = 0; i < pagecount; i++) { - n1 = PyLong_FromLong((long) i); - if (PySet_Contains(numbers, n1)) { - Py_DECREF(n1); - continue; - } - Py_DECREF(n1); - - pageref = pdf_lookup_page_obj(ctx, pdf, i); - annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots)); - if (!annots) continue; - len = pdf_array_len(ctx, annots); - for (j = len - 1; j >= 0; j -= 1) { - o = pdf_array_get(ctx, annots, j); - if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME(Subtype)), PDF_NAME(Link))) { - continue; - } - action = pdf_dict_get(ctx, o, PDF_NAME(A)); - dest = pdf_dict_get(ctx, o, PDF_NAME(Dest)); - if (action) { - if (!pdf_name_eq(ctx, pdf_dict_get(ctx, action, - PDF_NAME(S)), PDF_NAME(GoTo))) - continue; - dest = pdf_dict_get(ctx, action, PDF_NAME(D)); - } - pno = -1; - if (pdf_is_array(ctx, dest)) { - target = pdf_array_get(ctx, dest, 0); - pno = pdf_lookup_page_number(ctx, pdf, target); - } - else if (pdf_is_string(ctx, dest)) { - fz_location location = fz_resolve_link(ctx, &pdf->super, - pdf_to_text_string(ctx, dest), - NULL, NULL); - pno = location.page; - } - if (pno < 0) { // page number lookup did not work - continue; - } - n1 = PyLong_FromLong((long) pno); - if (PySet_Contains(numbers, n1)) { - pdf_array_delete(ctx, annots, j); - } - Py_DECREF(n1); - } - } - } - - fz_catch(ctx) { - fz_rethrow(ctx); - } - return; -} -%} diff --git a/fitz/version.i b/fitz/version.i deleted file mode 100644 index 7c028e28a..000000000 --- a/fitz/version.i +++ /dev/null @@ -1,6 +0,0 @@ -%pythoncode %{ -VersionFitz = "1.22.2" # MuPDF version. -VersionBind = "1.22.5" # PyMuPDF version. -VersionDate = "2023-06-21 00:00:01" -version = (VersionBind, VersionFitz, "20230621000001") -%} diff --git a/pipcl.py b/pipcl.py new file mode 100644 index 000000000..64348d8b2 --- /dev/null +++ b/pipcl.py @@ -0,0 +1,3418 @@ +''' +Python packaging operations, including PEP-517 support, for use by a `setup.py` +script. + +Overview: + + The intention is to take care of as many packaging details as possible so + that setup.py contains only project-specific information, while also giving + as much flexibility as possible. + + For example we provide a function `build_extension()` that can be used + to build a SWIG extension, but we also give access to the located + compiler/linker so that a `setup.py` script can take over the details + itself. + +Doctests: + Doctest strings are provided in some comments. + + Test in the usual way with: + python -m doctest pipcl.py + + Test specific functions/classes with: + python pipcl.py --doctest run_if ... + + If no functions or classes are specified, this tests everything. + +Graal: + For Graal we require that PIPCL_GRAAL_PYTHON is set to non-graal Python (we + build for non-graal except with Graal Python's include paths and library + directory). +''' + +import base64 +import codecs +import difflib +import glob +import hashlib +import inspect +import io +import os +import platform +import re +import shlex +import shutil +import site +import subprocess +import sys +import sysconfig +import tarfile +import textwrap +import time +import zipfile + +import wdev + + +class Package: + ''' + Our constructor takes a definition of a Python package similar to that + passed to `distutils.core.setup()` or `setuptools.setup()` (name, version, + summary etc) plus callbacks for building, getting a list of sdist + filenames, and cleaning. + + We provide methods that can be used to implement a Python package's + `setup.py` supporting PEP-517. + + We also support basic command line handling for use + with a legacy (pre-PEP-517) pip, as implemented + by legacy distutils/setuptools and described in: + https://pip.pypa.io/en/stable/reference/build-system/setup-py/ + + The file pyproject.toml must exist; this is checked if/when fn_build() is + called. + + Here is a `doctest` example of using pipcl to create a SWIG extension + module. Requires `swig`. + + Create an empty test directory: + + >>> import os + >>> import shutil + >>> shutil.rmtree('pipcl_test', ignore_errors=1) + >>> os.mkdir('pipcl_test') + + Create a `setup.py` which uses `pipcl` to define an extension module. + + >>> import textwrap + >>> with open('pipcl_test/setup.py', 'w') as f: + ... _ = f.write(textwrap.dedent(""" + ... import sys + ... import pipcl + ... + ... def build(): + ... so_leaf = pipcl.build_extension( + ... name = 'foo', + ... path_i = 'foo.i', + ... outdir = 'build', + ... ) + ... return [ + ... ('build/foo.py', 'foo/__init__.py'), + ... ('cli.py', 'foo/__main__.py'), + ... (f'build/{so_leaf}', f'foo/'), + ... ('README', '$dist-info/'), + ... (b'Hello world', 'foo/hw.txt'), + ... ] + ... + ... def sdist(): + ... return [ + ... 'foo.i', + ... 'bar.i', + ... 'setup.py', + ... 'pipcl.py', + ... 'wdev.py', + ... 'README', + ... (b'Hello word2', 'hw2.txt'), + ... ] + ... + ... p = pipcl.Package( + ... name = 'foo', + ... version = '1.2.3', + ... fn_build = build, + ... fn_sdist = sdist, + ... entry_points = ( + ... { 'console_scripts': [ + ... 'foo_cli = foo.__main__:main', + ... ], + ... }), + ... ) + ... + ... build_wheel = p.build_wheel + ... build_sdist = p.build_sdist + ... + ... # Handle old-style setup.py command-line usage: + ... if __name__ == '__main__': + ... p.handle_argv(sys.argv) + ... """)) + + Create the files required by the above `setup.py` - the SWIG `.i` input + file, the README file, and copies of `pipcl.py` and `wdev.py`. + + >>> with open('pipcl_test/foo.i', 'w') as f: + ... _ = f.write(textwrap.dedent(""" + ... %include bar.i + ... %{ + ... #include + ... #include + ... int bar(const char* text) + ... { + ... printf("bar(): text: %s\\\\n", text); + ... int len = (int) strlen(text); + ... printf("bar(): len=%i\\\\n", len); + ... fflush(stdout); + ... return len; + ... } + ... %} + ... int bar(const char* text); + ... """)) + + >>> with open('pipcl_test/bar.i', 'w') as f: + ... _ = f.write( '\\n') + + >>> with open('pipcl_test/README', 'w') as f: + ... _ = f.write(textwrap.dedent(""" + ... This is Foo. + ... """)) + + >>> with open('pipcl_test/cli.py', 'w') as f: + ... _ = f.write(textwrap.dedent(""" + ... def main(): + ... print('pipcl_test:main().') + ... if __name__ == '__main__': + ... main() + ... """)) + + >>> root = os.path.dirname(__file__) + >>> _ = shutil.copy2(f'{root}/pipcl.py', 'pipcl_test/pipcl.py') + >>> _ = shutil.copy2(f'{root}/wdev.py', 'pipcl_test/wdev.py') + + Use `setup.py`'s command-line interface to build and install the extension + module into root `pipcl_test/install`. + + >>> _ = subprocess.run( + ... f'cd pipcl_test && {sys.executable} setup.py --root install install', + ... shell=1, check=1) + + The actual install directory depends on `sysconfig.get_path('platlib')`: + + >>> if windows(): + ... install_dir = 'pipcl_test/install' + ... else: + ... install_dir = f'pipcl_test/install/{sysconfig.get_path("platlib").lstrip(os.sep)}' + >>> assert os.path.isfile( f'{install_dir}/foo/__init__.py') + + Create a test script which asserts that Python function call `foo.bar(s)` + returns the length of `s`, and run it with `PYTHONPATH` set to the install + directory: + + >>> with open('pipcl_test/test.py', 'w') as f: + ... _ = f.write(textwrap.dedent(""" + ... import sys + ... import foo + ... text = 'hello' + ... print(f'test.py: calling foo.bar() with text={text!r}') + ... sys.stdout.flush() + ... l = foo.bar(text) + ... print(f'test.py: foo.bar() returned: {l}') + ... assert l == len(text) + ... """)) + >>> r = subprocess.run( + ... f'{sys.executable} pipcl_test/test.py', + ... shell=1, check=1, text=1, + ... stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ... env=os.environ | dict(PYTHONPATH=install_dir), + ... ) + >>> print(r.stdout) + test.py: calling foo.bar() with text='hello' + bar(): text: hello + bar(): len=5 + test.py: foo.bar() returned: 5 + + + Check that building sdist and wheel succeeds. For now we don't attempt to + check that the sdist and wheel actually work. + + >>> _ = subprocess.run( + ... f'cd pipcl_test && {sys.executable} setup.py sdist', + ... shell=1, check=1) + + >>> _ = subprocess.run( + ... f'cd pipcl_test && {sys.executable} setup.py bdist_wheel', + ... shell=1, check=1) + + Check that rebuild does nothing. + + >>> t0 = os.path.getmtime('pipcl_test/build/foo.py') + >>> _ = subprocess.run( + ... f'cd pipcl_test && {sys.executable} setup.py bdist_wheel', + ... shell=1, check=1) + >>> t = os.path.getmtime('pipcl_test/build/foo.py') + >>> assert t == t0 + + Check that touching bar.i forces rebuild. + + >>> os.utime('pipcl_test/bar.i') + >>> _ = subprocess.run( + ... f'cd pipcl_test && {sys.executable} setup.py bdist_wheel', + ... shell=1, check=1) + >>> t = os.path.getmtime('pipcl_test/build/foo.py') + >>> assert t > t0 + + Check that touching foo.i.cpp does not run swig, but does recompile/link. + + >>> t0 = time.time() + >>> os.utime('pipcl_test/build/foo.i.cpp') + >>> _ = subprocess.run( + ... f'cd pipcl_test && {sys.executable} setup.py bdist_wheel', + ... shell=1, check=1) + >>> assert os.path.getmtime('pipcl_test/build/foo.py') <= t0 + >>> so = glob.glob('pipcl_test/build/*.so') + >>> assert len(so) == 1 + >>> so = so[0] + >>> assert os.path.getmtime(so) > t0 + + Check `entry_points` causes creation of command `foo_cli` when we install + from our wheel using pip. [As of 2024-02-24 using pipcl's CLI interface + directly with `setup.py install` does not support entry points.] + + >>> print('Creating venv.', file=sys.stderr) + >>> _ = subprocess.run( + ... f'cd pipcl_test && {sys.executable} -m venv pylocal', + ... shell=1, check=1) + + >>> print('Installing from wheel into venv using pip.', file=sys.stderr) + >>> _ = subprocess.run( + ... f'. pipcl_test/pylocal/bin/activate && pip install pipcl_test/dist/*.whl', + ... shell=1, check=1) + + >>> print('Running foo_cli.', file=sys.stderr) + >>> _ = subprocess.run( + ... f'. pipcl_test/pylocal/bin/activate && foo_cli', + ... shell=1, check=1) + + Wheels and sdists + + Wheels: + We generate wheels according to: + https://packaging.python.org/specifications/binary-distribution-format/ + + * `{name}-{version}.dist-info/RECORD` uses sha256 hashes. + * We do not generate other `RECORD*` files such as + `RECORD.jws` or `RECORD.p7s`. + * `{name}-{version}.dist-info/WHEEL` has: + + * `Wheel-Version: 1.0` + * `Root-Is-Purelib: false` + * No support for signed wheels. + + Sdists: + We generate sdist's according to: + https://packaging.python.org/specifications/source-distribution-format/ + ''' + def __init__(self, + name, + version, + *, + platform = None, + supported_platform = None, + summary = None, + description = None, + description_content_type = None, + keywords = None, + home_page = None, + download_url = None, + author = None, + author_email = None, + maintainer = None, + maintainer_email = None, + license = None, + classifier = None, + requires_dist = None, + requires_python = None, + requires_external = None, + project_url = None, + provides_extra = None, + + entry_points = None, + + root = None, + fn_build = None, + fn_clean = None, + fn_sdist = None, + tag_python = None, + tag_abi = None, + tag_platform = None, + py_limited_api = None, + + wheel_compression = zipfile.ZIP_DEFLATED, + wheel_compresslevel = None, + ): + ''' + The initial args before `entry_points` define the + package metadata and closely follow the definitions in: + https://packaging.python.org/specifications/core-metadata/ + + Args: + + name: + Used for metadata `Name`. + A string, the name of the Python package. + version: + Used for metadata `Version`. + A string, the version of the Python package. Also see PEP-440 + `Version Identification and Dependency Specification`. + platform: + Used for metadata `Platform`. + A string or list of strings. + supported_platform: + Used for metadata `Supported-Platform`. + A string or list of strings. + summary: + Used for metadata `Summary`. + A string, short description of the package. + description: + Used for metadata `Description`. + A string. If contains newlines, a detailed description of the + package. Otherwise the path of a file containing the detailed + description of the package. + description_content_type: + Used for metadata `Description-Content-Type`. + A string describing markup of `description` arg. For example + `text/markdown; variant=GFM`. + keywords: + Used for metadata `Keywords`. + A string containing comma-separated keywords. + home_page: + Used for metadata `Home-page`. + URL of home page. + download_url: + Used for metadata `Download-URL`. + Where this version can be downloaded from. + author: + Used for metadata `Author`. + Author. + author_email: + Used for metadata `Author-email`. + Author email. + maintainer: + Used for metadata `Maintainer`. + Maintainer. + maintainer_email: + Used for metadata `Maintainer-email`. + Maintainer email. + license: + Used for metadata `License`. + A string containing the license text. Written into metadata + file `COPYING`. Is also written into metadata itself if not + multi-line. + classifier: + Used for metadata `Classifier`. + A string or list of strings. Also see: + + * https://pypi.org/pypi?%3Aaction=list_classifiers + * https://pypi.org/classifiers/ + + requires_dist: + Used for metadata `Requires-Dist`. + A string or list of strings, Python packages required + at runtime. None items are ignored. + requires_python: + Used for metadata `Requires-Python`. + A string or list of strings. + requires_external: + Used for metadata `Requires-External`. + A string or list of strings. + project_url: + Used for metadata `Project-URL`. + A string or list of strings, each of the form: `{name}, + {url}`. + provides_extra: + Used for metadata `Provides-Extra`. + A string or list of strings. + + entry_points: + String or dict specifying *.dist-info/entry_points.txt, for + example: + + ``` + [console_scripts] + foo_cli = foo.__main__:main + ``` + + or: + + { 'console_scripts': [ + 'foo_cli = foo.__main__:main', + ], + } + + See: https://packaging.python.org/en/latest/specifications/entry-points/ + + root: + Root of package, defaults to current directory. + + fn_build: + A function taking no args, or a single `config_settings` dict + arg (as described in PEP-517), that builds the package. + + Should return a list of items; each item should be a tuple + `(from_, to_)`, or a single string `path` which is treated as + the tuple `(path, path)`. + + `from_` can be a string or a `bytes`. If a string it should + be the path to a file; a relative path is treated as relative + to `root`. If a `bytes` it is the contents of the file to be + added. + + `to_` identifies what the file should be called within a wheel + or when installing. If `to_` is empty or `/` we set it to the + leaf of `from_` (`from_` must not be a `bytes`) - i.e. we place + the file in the root directory of the wheel; otherwise if + `to_` ends with `/` the leaf of `from_` is appended to it (and + `from_` must not be a `bytes`). + + Initial `$dist-info/` in `_to` is replaced by + `{name}-{version}.dist-info/`; this is useful for license files + etc. + + Initial `$data/` in `_to` is replaced by + `{name}-{version}.data/`. We do not enforce particular + subdirectories, instead it is up to `fn_build()` to specify + specific subdirectories such as `purelib`, `headers`, + `scripts`, `data` etc. + + If we are building a wheel (e.g. `python setup.py bdist_wheel`, + or PEP-517 pip calls `self.build_wheel()`), we add file `from_` + to the wheel archive with name `to_`. + + If we are installing (e.g. `install` command in + the argv passed to `self.handle_argv()`), then + we copy `from_` to `{sitepackages}/{to_}`, where + `sitepackages` is the installation directory, the + default being `sysconfig.get_path('platlib')` e.g. + `myvenv/lib/python3.9/site-packages/`. + + When calling this function, we assert that the file + pyproject.toml exists in the current directory. (We do this + here rather than in pipcl.Package's constructor, as otherwise + importing setup.py from non-package-related code could fail.) + + fn_clean: + A function taking a single arg `all_` that cleans generated + files. `all_` is true iff `--all` is in argv. + + For safety and convenience, can also returns a list of + files/directory paths to be deleted. Relative paths are + interpreted as relative to `root`. All paths are asserted to be + within `root`. + + fn_sdist: + A function taking no args, or a single `config_settings` dict + arg (as described in PEP517), that returns a list of items to + be copied into the sdist. The list should be in the same format + as returned by `fn_build`. + + It can be convenient to use `pipcl.git_items()`. + + The specification for sdists requires that the list contains + `pyproject.toml`; we enforce this with a Python assert. + + tag_python: + First element of wheel tag defined in PEP-425. If None we use + `cp{version}`. + + For example if code works with any Python version, one can use + 'py3'. + + tag_abi: + Second element of wheel tag defined in PEP-425. If None we use + `none`. + + tag_platform: + Third element of wheel tag defined in PEP-425. Default + is `os.environ('AUDITWHEEL_PLAT')` if set, otherwise + derived from `sysconfig.get_platform()` (was + `setuptools.distutils.util.get_platform(), before that + `distutils.util.get_platform()` as specified in the PEP), e.g. + `openbsd_7_0_amd64`. + + For pure python packages use: `tag_platform=any` + + py_limited_api: + If true we build wheels that use the Python Limited API. We use + the version of `sys.executable` to define `Py_LIMITED_API` when + compiling extensions, and use ABI tag `abi3` in the wheel name + if argument `tag_abi` is None. + + wheel_compression: + Used as `zipfile.ZipFile()`'s `compression` parameter when + creating wheels. + + wheel_compresslevel: + Used as `zipfile.ZipFile()`'s `compresslevel` parameter when + creating wheels. + + Occurrences of `None` in lists are ignored. + ''' + assert name + assert version + + def assert_str( v): + if v is not None: + assert isinstance( v, str), f'Not a string: {v!r}' + def assert_str_or_multi( v): + if v is not None: + assert isinstance( v, (str, tuple, list)), f'Not a string, tuple or list: {v!r}' + + assert_str( name) + assert_str( version) + assert_str_or_multi( platform) + assert_str_or_multi( supported_platform) + assert_str( summary) + assert_str( description) + assert_str( description_content_type) + assert_str( keywords) + assert_str( home_page) + assert_str( download_url) + assert_str( author) + assert_str( author_email) + assert_str( maintainer) + assert_str( maintainer_email) + assert_str( license) + assert_str_or_multi( classifier) + assert_str_or_multi( requires_dist) + assert_str( requires_python) + assert_str_or_multi( requires_external) + assert_str_or_multi( project_url) + assert_str_or_multi( provides_extra) + + assert re.match('^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])\\Z', name, re.IGNORECASE), ( + f'Invalid package name' + f' (https://packaging.python.org/en/latest/specifications/name-normalization/)' + f': {name!r}' + ) + + # https://packaging.python.org/en/latest/specifications/core-metadata/. + assert re.match('([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$', name, re.IGNORECASE), \ + f'Bad name: {name!r}' + + _assert_version_pep_440(version) + + # https://packaging.python.org/en/latest/specifications/binary-distribution-format/ + if tag_python: + assert '-' not in tag_python + if tag_abi: + assert '-' not in tag_abi + if tag_platform: + assert '-' not in tag_platform + + self.name = name + self.version = version + self.platform = platform + self.supported_platform = supported_platform + self.summary = summary + self.description = description + self.description_content_type = description_content_type + self.keywords = keywords + self.home_page = home_page + self.download_url = download_url + self.author = author + self.author_email = author_email + self.maintainer = maintainer + self.maintainer_email = maintainer_email + self.license = license + self.classifier = classifier + self.requires_dist = requires_dist + self.requires_python = requires_python + self.requires_external = requires_external + self.project_url = project_url + self.provides_extra = provides_extra + self.entry_points = entry_points + + self.root = os.path.abspath(root if root else os.getcwd()) + self.fn_build = fn_build + self.fn_clean = fn_clean + self.fn_sdist = fn_sdist + self.tag_python_ = tag_python + self.tag_abi_ = tag_abi + self.tag_platform_ = tag_platform + self.py_limited_api = py_limited_api + + self.wheel_compression = wheel_compression + self.wheel_compresslevel = wheel_compresslevel + + # If true and we are building for graal, we set PIPCL_PYTHON_CONFIG to + # a command that will print includes/libs from graal_py's sysconfig. + # + self.graal_legacy_python_config = True + + + def build_wheel(self, + wheel_directory, + config_settings=None, + metadata_directory=None, + ): + ''' + A PEP-517 `build_wheel()` function. + + Also called by `handle_argv()` to handle the `bdist_wheel` command. + + Returns leafname of generated wheel within `wheel_directory`. + ''' + log2( + f' wheel_directory={wheel_directory!r}' + f' config_settings={config_settings!r}' + f' metadata_directory={metadata_directory!r}' + ) + + if os.environ.get('CIBUILDWHEEL') == '1': + # Don't special-case graal builds when running under cibuildwheel. + pass + elif sys.implementation.name == 'graalpy': + # We build for Graal by building a native Python wheel with Graal + # Python's include paths and library directory. We then rename the + # wheel to contain graal's tag etc. + # + log0(f'### Graal build: deferring to cpython.') + python_native = os.environ.get('PIPCL_GRAAL_PYTHON') + assert python_native, f'Graal build requires that PIPCL_GRAAL_PYTHON is set.' + env_extra = dict( + PIPCL_SYSCONFIG_PATH_include = sysconfig.get_path('include'), + PIPCL_SYSCONFIG_PATH_platinclude = sysconfig.get_path('platinclude'), + PIPCL_SYSCONFIG_CONFIG_VAR_LIBDIR = sysconfig.get_config_var('LIBDIR'), + ) + # Tell native build to run pipcl.py itself to get python-config + # information about include paths etc. + if self.graal_legacy_python_config: + env_extra['PIPCL_PYTHON_CONFIG'] = f'{python_native} {os.path.abspath(__file__)} --graal-legacy-python-config' + + # Create venv. + venv_name = os.environ.get('PIPCL_GRAAL_NATIVE_VENV') + if venv_name: + log1(f'Graal using pre-existing {venv_name=}') + else: + venv_name = 'venv-pipcl-graal-native' + run(f'{shlex.quote(python_native)} -m venv {venv_name}') + log1(f'Graal using {venv_name=}') + + newfiles = NewFiles(f'{wheel_directory}/*.whl') + run( + f'. {venv_name}/bin/activate && python setup.py --dist-dir {shlex.quote(wheel_directory)} bdist_wheel', + env_extra = env_extra, + prefix = f'pipcl.py graal {python_native}: ', + ) + wheel = newfiles.get_one() + wheel_leaf = os.path.basename(wheel) + python_major_minor = run(f'{shlex.quote(python_native)} -c "import platform; import sys; sys.stdout.write(str().join(platform.python_version_tuple()[:2]))"', capture=1) + cpabi = f'cp{python_major_minor}-abi3' + assert cpabi in wheel_leaf, f'Expected wheel to be for {cpabi=}, but {wheel=}.' + graalpy_ext_suffix = sysconfig.get_config_var('EXT_SUFFIX') + log1(f'{graalpy_ext_suffix=}') + m = re.match(r'\.graalpy(\d+[^\-]*)-(\d+)', graalpy_ext_suffix) + gpver = m[1] + cpver = m[2] + graalpy_wheel_tag = f'graalpy{cpver}-graalpy{gpver}_{cpver}_native' + name = wheel_leaf.replace(cpabi, graalpy_wheel_tag) + destination = f'{wheel_directory}/{name}' + log0(f'### Graal build: copying {wheel=} to {destination=}') + # Copying results in two wheels which appears to confuse pip, showing: + # Found multiple .whl files; unspecified behaviour. Will call build_wheel. + os.rename(wheel, destination) + log1(f'Returning {name=}.') + return name + + wheel_name = self.wheel_name() + path = f'{wheel_directory}/{wheel_name}' + + # Do a build and get list of files to copy into the wheel. + # + items = list() + if self.fn_build: + items = self._call_fn_build(config_settings) + + log2(f'Creating wheel: {path}') + os.makedirs(wheel_directory, exist_ok=True) + record = _Record() + with zipfile.ZipFile(path, 'w', self.wheel_compression, self.wheel_compresslevel) as z: + + def add(from_, to_): + if isinstance(from_, str): + z.write(from_, to_) + record.add_file(from_, to_) + elif isinstance(from_, bytes): + z.writestr(to_, from_) + record.add_content(from_, to_) + else: + assert 0 + + def add_str(content, to_): + add(content.encode('utf8'), to_) + + dist_info_dir = self._dist_info_dir() + + # Add the files returned by fn_build(). + # + for item in items: + from_, (to_abs, to_rel) = self._fromto(item) + add(from_, to_rel) + + # Add -.dist-info/WHEEL. + # + add_str( + f'Wheel-Version: 1.0\n' + f'Generator: pipcl\n' + f'Root-Is-Purelib: false\n' + f'Tag: {self.wheel_tag_string()}\n' + , + f'{dist_info_dir}/WHEEL', + ) + # Add -.dist-info/METADATA. + # + add_str(self._metainfo(), f'{dist_info_dir}/METADATA') + + # Add -.dist-info/COPYING. + if self.license: + add_str(self.license, f'{dist_info_dir}/COPYING') + + # Add -.dist-info/entry_points.txt. + entry_points_text = self._entry_points_text() + if entry_points_text: + add_str(entry_points_text, f'{dist_info_dir}/entry_points.txt') + + # Update -.dist-info/RECORD. This must be last. + # + z.writestr(f'{dist_info_dir}/RECORD', record.get(f'{dist_info_dir}/RECORD')) + + st = os.stat(path) + log1( f'Have created wheel size={st.st_size:,}: {path}') + if g_verbose >= 2: + with zipfile.ZipFile(path, compression=self.wheel_compression) as z: + log2(f'Contents are:') + for zi in sorted(z.infolist(), key=lambda z: z.filename): + log2(f' {zi.file_size: 10,d} {zi.filename}') + + return os.path.basename(path) + + + def build_sdist(self, + sdist_directory, + formats, + config_settings=None, + ): + ''' + A PEP-517 `build_sdist()` function. + + Also called by `handle_argv()` to handle the `sdist` command. + + Returns leafname of generated archive within `sdist_directory`. + ''' + assert self.fn_sdist, f'fn_sdist() not provided.' + log2( + f' sdist_directory={sdist_directory!r}' + f' formats={formats!r}' + f' config_settings={config_settings!r}' + ) + if formats and formats != 'gztar': + raise Exception( f'Unsupported: formats={formats}') + items = list() + if inspect.signature(self.fn_sdist).parameters: + items = self.fn_sdist(config_settings) + else: + items = self.fn_sdist() + + prefix = f'{_normalise2(self.name)}-{self.version}' + os.makedirs(sdist_directory, exist_ok=True) + tarpath = f'{sdist_directory}/{prefix}.tar.gz' + log2(f'Creating sdist: {tarpath}') + + with tarfile.open(tarpath, 'w:gz') as tar: + + names_in_tar = list() + def check_name(name): + if name in names_in_tar: + raise Exception(f'Name specified twice: {name}') + names_in_tar.append(name) + + def add(from_, name): + check_name(name) + if isinstance(from_, str): + log2( f'Adding file: {os.path.relpath(from_)} => {name}') + tar.add( from_, f'{prefix}/{name}', recursive=False) + elif isinstance(from_, bytes): + log2( f'Adding: {name}') + ti = tarfile.TarInfo(f'{prefix}/{name}') + ti.size = len(from_) + ti.mtime = time.time() + tar.addfile(ti, io.BytesIO(from_)) + else: + assert 0 + + def add_string(text, name): + textb = text.encode('utf8') + return add(textb, name) + + found_pyproject_toml = False + for item in items: + from_, (to_abs, to_rel) = self._fromto(item) + if isinstance(from_, bytes): + add(from_, to_rel) + else: + if from_.startswith(f'{os.path.abspath(sdist_directory)}/'): + # Source files should not be inside . + assert 0, f'Path is inside sdist_directory={sdist_directory}: {from_!r}' + assert os.path.exists(from_), f'Path does not exist: {from_!r}' + assert os.path.isfile(from_), f'Path is not a file: {from_!r}' + add(from_, to_rel) + if to_rel == 'pyproject.toml': + found_pyproject_toml = True + + assert found_pyproject_toml, f'Cannot create sdist because file not specified: pyproject.toml' + + # Always add a PKG-INFO file. + add_string(self._metainfo(), 'PKG-INFO') + + if self.license: + if 'COPYING' in names_in_tar: + log2(f'Not writing .license because file already in sdist: COPYING') + else: + add_string(self.license, 'COPYING') + + log1( f'Have created sdist: {tarpath}') + return os.path.basename(tarpath) + + def wheel_tag_string(self): + ''' + Returns --. + ''' + return f'{self.tag_python()}-{self.tag_abi()}-{self.tag_platform()}' + + def tag_python(self): + ''' + Get two-digit python version, e.g. 'cp3.8' for python-3.8.6. + ''' + if self.tag_python_: + ret = self.tag_python_ + else: + ret = 'cp' + ''.join(platform.python_version().split('.')[:2]) + assert '-' not in ret + return ret + + def tag_abi(self): + ''' + ABI tag. + ''' + if self.tag_abi_: + return self.tag_abi_ + elif self.py_limited_api: + return 'abi3' + else: + return 'none' + + def tag_platform(self): + ''' + Find platform tag used in wheel filename. + ''' + ret = self.tag_platform_ + log0(f'From self.tag_platform_: {ret=}.') + + if not ret: + # Prefer this to PEP-425. Appears to be undocumented, + # but set in manylinux docker images and appears + # to be used by cibuildwheel and auditwheel, e.g. + # https://github.com/rapidsai/shared-action-workflows/issues/80 + ret = os.environ.get( 'AUDITWHEEL_PLAT') + log0(f'From AUDITWHEEL_PLAT: {ret=}.') + + if not ret: + # Notes: + # + # PEP-425. On Linux gives `linux_x86_64` which is rejected by + # pypi.org. + # + # On local MacOS/arm64 mac-mini have seen sysconfig.get_platform() + # unhelpfully return `macosx-10.9-universal2` if `python3` is the + # system Python /usr/bin/python3; this happens if we source `. + # /etc/profile`. + # + ret = sysconfig.get_platform() + ret = ret.replace('-', '_').replace('.', '_').lower() + log0(f'From sysconfig.get_platform(): {ret=}.') + + ret = _macos_fixup_platform_tag(ret) + + log0( f'tag_platform(): returning {ret=}.') + assert '-' not in ret + return ret + + def wheel_name(self): + ret = f'{_normalise2(self.name)}-{self.version}-{self.tag_python()}-{self.tag_abi()}-{self.tag_platform()}.whl' + assert ret.count('-') == 4, f'Expected 4 dash characters in {ret=}.' + return ret + + def wheel_name_match(self, wheel): + ''' + Returns true if `wheel` matches our wheel. We basically require the + name to be the same, except that we accept platform tags that contain + extra items (see pep-0600/), for example we return true with: + + self: foo-cp38-none-manylinux2014_x86_64.whl + wheel: foo-cp38-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl + ''' + log2(f'{wheel=}') + assert wheel.endswith('.whl') + wheel2 = wheel[:-len('.whl')] + name, version, tag_python, tag_abi, tag_platform = wheel2.split('-') + + py_limited_api_compatible = False + if self.py_limited_api and tag_abi == 'abi3': + # Allow lower tag_python number. + m = re.match('cp([0-9]+)', tag_python) + tag_python_int = int(m.group(1)) + m = re.match('cp([0-9]+)', self.tag_python()) + tag_python_int_self = int(m.group(1)) + if tag_python_int <= tag_python_int_self: + # This wheel uses Python stable ABI same or older than ours, so + # we can use it. + log2(f'py_limited_api; {tag_python=} compatible with {self.tag_python()=}.') + py_limited_api_compatible = True + + log2(f'{_normalise2(self.name) == name=}') + log2(f'{self.version == version=}') + log2(f'{self.tag_python() == tag_python=} {self.tag_python()=} {tag_python=}') + log2(f'{py_limited_api_compatible=}') + log2(f'{self.tag_abi() == tag_abi=}') + log2(f'{self.tag_platform() in tag_platform.split(".")=}') + log2(f'{self.tag_platform()=}') + log2(f'{tag_platform.split(".")=}') + ret = (1 + and _normalise2(self.name) == name + and self.version == version + and (self.tag_python() == tag_python or py_limited_api_compatible) + and self.tag_abi() == tag_abi + and self.tag_platform() in tag_platform.split('.') + ) + log2(f'Returning {ret=}.') + return ret + + def _entry_points_text(self): + if self.entry_points: + if isinstance(self.entry_points, str): + return self.entry_points + ret = '' + for key, values in self.entry_points.items(): + ret += f'[{key}]\n' + for value in values: + ret += f'{value}\n' + return ret + + def _call_fn_build( self, config_settings=None): + assert self.fn_build + assert os.path.isfile('pyproject.toml'), ( + 'Cannot create package because file does not exist: pyproject.toml' + ) + log2(f'calling self.fn_build={self.fn_build}') + if inspect.signature(self.fn_build).parameters: + ret = self.fn_build(config_settings) + else: + ret = self.fn_build() + assert isinstance( ret, (list, tuple)), \ + f'Expected list/tuple from {self.fn_build} but got: {ret!r}' + + # Check that any extensions that we have built, have same + # py_limited_api value. If package is marked with py_limited_api=True + # then non-py_limited_api extensions seem to fail at runtime on + # Windows. + # + # (We could possibly allow package py_limited_api=False and extensions + # py_limited_api=True, but haven't tested this, and it seems simpler to + # be strict.) + for item in ret: + from_, (to_abs, to_rel) = self._fromto(item) + from_abs = os.path.abspath(from_) + is_py_limited_api = _extensions_to_py_limited_api.get(from_abs) + if is_py_limited_api is not None: + assert bool(self.py_limited_api) == bool(is_py_limited_api), ( + f'Extension was built with' + f' py_limited_api={is_py_limited_api} but pipcl.Package' + f' name={self.name!r} has' + f' py_limited_api={self.py_limited_api}:' + f' {from_abs!r}' + ) + + return ret + + + def _argv_clean(self, all_): + ''' + Called by `handle_argv()`. + ''' + if not self.fn_clean: + return + paths = self.fn_clean(all_) + if paths: + if isinstance(paths, str): + paths = paths, + for path in paths: + if not os.path.isabs(path): + path = ps.path.join(self.root, path) + path = os.path.abspath(path) + assert path.startswith(self.root+os.sep), \ + f'path={path!r} does not start with root={self.root+os.sep!r}' + log2(f'Removing: {path}') + shutil.rmtree(path, ignore_errors=True) + + + def install(self, record_path=None, root=None): + ''' + Called by `handle_argv()` to handle `install` command.. + ''' + log2( f'{record_path=} {root=}') + + # Do a build and get list of files to install. + # + items = list() + if self.fn_build: + items = self._call_fn_build( dict()) + + root2 = install_dir(root) + log2( f'{root2=}') + + log1( f'Installing into: {root2!r}') + dist_info_dir = self._dist_info_dir() + + if not record_path: + record_path = f'{root2}/{dist_info_dir}/RECORD' + record = _Record() + + def add_file(from_, to_abs, to_rel): + os.makedirs( os.path.dirname( to_abs), exist_ok=True) + if isinstance(from_, bytes): + log2(f'Copying content into {to_abs}.') + with open(to_abs, 'wb') as f: + f.write(from_) + record.add_content(from_, to_rel) + else: + log0(f'{from_=}') + log2(f'Copying from {os.path.relpath(from_, self.root)} to {to_abs}') + shutil.copy2( from_, to_abs) + record.add_file(from_, to_rel) + + def add_str(content, to_abs, to_rel): + log2( f'Writing to: {to_abs}') + os.makedirs( os.path.dirname( to_abs), exist_ok=True) + with open( to_abs, 'w') as f: + f.write( content) + record.add_content(content, to_rel) + + for item in items: + from_, (to_abs, to_rel) = self._fromto(item) + log0(f'{from_=} {to_abs=} {to_rel=}') + to_abs2 = f'{root2}/{to_rel}' + add_file( from_, to_abs2, to_rel) + + add_str( self._metainfo(), f'{root2}/{dist_info_dir}/METADATA', f'{dist_info_dir}/METADATA') + + if self.license: + add_str( self.license, f'{root2}/{dist_info_dir}/COPYING', f'{dist_info_dir}/COPYING') + + entry_points_text = self._entry_points_text() + if entry_points_text: + add_str( + entry_points_text, + f'{root2}/{dist_info_dir}/entry_points.txt', + f'{dist_info_dir}/entry_points.txt', + ) + + log2( f'Writing to: {record_path}') + with open(record_path, 'w') as f: + f.write(record.get()) + + log2(f'Finished.') + + + def _argv_dist_info(self, root): + ''' + Called by `handle_argv()`. There doesn't seem to be any documentation + for `setup.py dist_info`, but it appears to be like `egg_info` except + it writes to a slightly different directory. + ''' + if root is None: + root = f'{normalise2(self.name)}-{self.version}.dist-info' + self._write_info(f'{root}/METADATA') + if self.license: + with open( f'{root}/COPYING', 'w') as f: + f.write( self.license) + + + def _argv_egg_info(self, egg_base): + ''' + Called by `handle_argv()`. + ''' + if egg_base is None: + egg_base = '.' + self._write_info(f'{egg_base}/.egg-info') + + + def _write_info(self, dirpath=None): + ''' + Writes egg/dist info to files in directory `dirpath` or `self.root` if + `None`. + ''' + if dirpath is None: + dirpath = self.root + log2(f'Creating files in directory {dirpath}') + os.makedirs(dirpath, exist_ok=True) + with open(os.path.join(dirpath, 'PKG-INFO'), 'w') as f: + f.write(self._metainfo()) + + # These don't seem to be required? + # + #with open(os.path.join(dirpath, 'SOURCES.txt', 'w') as f: + # pass + #with open(os.path.join(dirpath, 'dependency_links.txt', 'w') as f: + # pass + #with open(os.path.join(dirpath, 'top_level.txt', 'w') as f: + # f.write(f'{self.name}\n') + #with open(os.path.join(dirpath, 'METADATA', 'w') as f: + # f.write(self._metainfo()) + + + def handle_argv(self, argv): + ''' + Attempt to handles old-style (pre PEP-517) command line passed by + old releases of pip to a `setup.py` script, and manual running of + `setup.py`. + + This is partial support at best. + ''' + global g_verbose + #log2(f'argv: {argv}') + + class ArgsRaise: + pass + + class Args: + ''' + Iterates over argv items. + ''' + def __init__( self, argv): + self.items = iter( argv) + def next( self, eof=ArgsRaise): + ''' + Returns next arg. If no more args, we return or raise an + exception if is ArgsRaise. + ''' + try: + return next( self.items) + except StopIteration: + if eof is ArgsRaise: + raise Exception('Not enough args') + return eof + + command = None + opt_all = None + opt_dist_dir = 'dist' + opt_egg_base = None + opt_formats = None + opt_install_headers = None + opt_record = None + opt_root = None + + args = Args(argv[1:]) + + while 1: + arg = args.next(None) + if arg is None: + break + + elif arg in ('-h', '--help', '--help-commands'): + log0(textwrap.dedent(''' + Usage: + [...] [...] + Commands: + bdist_wheel + Creates a wheel called + /--
.whl, where + is "dist" or as specified by --dist-dir, + and
encodes ABI and platform etc. + clean + Cleans build files. + dist_info + Creates files in -.dist-info/ or + directory specified by --egg-base. + egg_info + Creates files in .egg-info/ or directory + directory specified by --egg-base. + install + Builds and installs. Writes installation + information to if --record was + specified. + sdist + Make a source distribution: + /-.tar.gz + Options: + --all + Used by "clean". + --compile + Ignored. + --dist-dir | -d + Default is "dist". + --egg-base + Used by "egg_info". + --formats + Used by "sdist". + --install-headers + Ignored. + --python-tag + Ignored. + --record + Used by "install". + --root + Used by "install". + --single-version-externally-managed + Ignored. + --verbose -v + Extra diagnostics. + Other: + windows-vs [-y ] [-v ] [-g ] [--verbose] + Windows only; looks for matching Python. + ''')) + return + + elif arg in ('bdist_wheel', 'clean', 'dist_info', 'egg_info', 'install', 'sdist'): + assert command is None, 'Two commands specified: {command} and {arg}.' + command = arg + + elif arg in ('windows-vs', 'windows-python', 'show-sysconfig'): + assert command is None, 'Two commands specified: {command} and {arg}.' + command = arg + + elif arg == '--all': opt_all = True + elif arg == '--compile': pass + elif arg == '--dist-dir' or arg == '-d': opt_dist_dir = args.next() + elif arg == '--egg-base': opt_egg_base = args.next() + elif arg == '--formats': opt_formats = args.next() + elif arg == '--install-headers': opt_install_headers = args.next() + elif arg == '--python-tag': pass + elif arg == '--record': opt_record = args.next() + elif arg == '--root': opt_root = args.next() + elif arg == '--single-version-externally-managed': pass + elif arg == '--verbose' or arg == '-v': g_verbose += 1 + + else: + raise Exception(f'Unrecognised arg: {arg}') + + assert command, 'No command specified' + + log1(f'Handling command={command}') + if 0: pass + elif command == 'bdist_wheel': self.build_wheel(opt_dist_dir) + elif command == 'clean': self._argv_clean(opt_all) + elif command == 'dist_info': self._argv_dist_info(opt_egg_base) + elif command == 'egg_info': self._argv_egg_info(opt_egg_base) + elif command == 'install': self.install(opt_record, opt_root) + elif command == 'sdist': self.build_sdist(opt_dist_dir, opt_formats) + + elif command == 'windows-python': + version = None + while 1: + arg = args.next(None) + if arg is None: + break + elif arg == '-v': + version = args.next() + elif arg == '--verbose': + g_verbose += 1 + else: + assert 0, f'Unrecognised {arg=}' + python = wdev.WindowsPython(version=version) + print(f'Python is:\n{python.description_ml(" ")}') + + elif command == 'windows-vs': + grade = None + version = None + year = None + while 1: + arg = args.next(None) + if arg is None: + break + elif arg == '-g': + grade = args.next() + elif arg == '-v': + version = args.next() + elif arg == '-y': + year = args.next() + elif arg == '--verbose': + g_verbose += 1 + else: + assert 0, f'Unrecognised {arg=}' + vs = wdev.WindowsVS(year=year, grade=grade, version=version) + print(f'Visual Studio is:\n{vs.description_ml(" ")}') + + elif command == 'show-sysconfig': + show_sysconfig() + for mod in platform, sys: + log0(f'{mod.__name__}:') + for n in dir(mod): + if n.startswith('_'): + continue + log0(f'{mod.__name__}.{n}') + if mod is platform and n == 'uname': + continue + if mod is platform and n == 'pdb': + continue + if mod is sys and n in ('breakpointhook', 'exit'): + # We don't want to call these. + continue + v = getattr(mod, n) + if callable(v): + try: + v = v() + except Exception: + pass + else: + #print(f'{n=}', flush=1) + try: + print(f' {mod.__name__}.{n}()={v!r}') + except Exception: + print(f' Failed to print value of {mod.__name__}.{n}().') + else: + try: + print(f' {mod.__name__}.{n}={v!r}') + except Exception: + print(f' Failed to print value of {mod.__name__}.{n}.') + + else: + assert 0, f'Unrecognised command: {command}' + + log2(f'Finished handling command: {command}') + + + def __str__(self): + return ('{' + f'name={self.name!r}' + f' version={self.version!r}' + f' platform={self.platform!r}' + f' supported_platform={self.supported_platform!r}' + f' summary={self.summary!r}' + f' description={self.description!r}' + f' description_content_type={self.description_content_type!r}' + f' keywords={self.keywords!r}' + f' home_page={self.home_page!r}' + f' download_url={self.download_url!r}' + f' author={self.author!r}' + f' author_email={self.author_email!r}' + f' maintainer={self.maintainer!r}' + f' maintainer_email={self.maintainer_email!r}' + f' license={self.license!r}' + f' classifier={self.classifier!r}' + f' requires_dist={self.requires_dist!r}' + f' requires_python={self.requires_python!r}' + f' requires_external={self.requires_external!r}' + f' project_url={self.project_url!r}' + f' provides_extra={self.provides_extra!r}' + + f' root={self.root!r}' + f' fn_build={self.fn_build!r}' + f' fn_sdist={self.fn_sdist!r}' + f' fn_clean={self.fn_clean!r}' + f' tag_python={self.tag_python_!r}' + f' tag_abi={self.tag_abi_!r}' + f' tag_platform={self.tag_platform_!r}' + '}' + ) + + def _dist_info_dir( self): + return f'{_normalise2(self.name)}-{self.version}.dist-info' + + def _metainfo(self): + ''' + Returns text for `.egg-info/PKG-INFO` file, or `PKG-INFO` in an sdist + `.tar.gz` file, or `...dist-info/METADATA` in a wheel. + ''' + # 2021-04-30: Have been unable to get multiline content working on + # test.pypi.org so we currently put the description as the body after + # all the other headers. + # + ret = [''] + def add(key, value): + if value is None: + return + if isinstance( value, (tuple, list)): + for v in value: + if v is not None: + add( key, v) + return + if key == 'License' and '\n' in value: + # This is ok because we write `self.license` into + # *.dist-info/COPYING. + # + log1( f'Omitting license because contains newline(s).') + return + assert '\n' not in value, f'key={key} value contains newline: {value!r}' + if key == 'Project-URL': + assert value.count(',') == 1, f'For {key=}, should have one comma in {value!r}.' + ret[0] += f'{key}: {value}\n' + #add('Description', self.description) + add('Metadata-Version', '2.1') + + # These names are from: + # https://packaging.python.org/specifications/core-metadata/ + # + for name in ( + 'Name', + 'Version', + 'Platform', + 'Supported-Platform', + 'Summary', + 'Description-Content-Type', + 'Keywords', + 'Home-page', + 'Download-URL', + 'Author', + 'Author-email', + 'Maintainer', + 'Maintainer-email', + 'License', + 'Classifier', + 'Requires-Dist', + 'Requires-Python', + 'Requires-External', + 'Project-URL', + 'Provides-Extra', + ): + identifier = name.lower().replace( '-', '_') + add( name, getattr( self, identifier)) + + ret = ret[0] + + # Append description as the body + if self.description: + if '\n' in self.description: + description_text = self.description.strip() + else: + with open(self.description) as f: + description_text = f.read() + ret += '\n' # Empty line separates headers from body. + ret += description_text + ret += '\n' + return ret + + def _path_relative_to_root(self, path, assert_within_root=True): + ''' + Returns `(path_abs, path_rel)`, where `path_abs` is absolute path and + `path_rel` is relative to `self.root`. + + Interprets `path` as relative to `self.root` if not absolute. + + We use `os.path.realpath()` to resolve any links. + + if `assert_within_root` is true, assert-fails if `path` is not within + `self.root`. + ''' + if os.path.isabs(path): + p = path + else: + p = os.path.join(self.root, path) + p = os.path.realpath(os.path.abspath(p)) + if assert_within_root: + assert p.startswith(self.root+os.sep) or p == self.root, \ + f'Path not within root={self.root+os.sep!r}: {path=} {p=}' + p_rel = os.path.relpath(p, self.root) + return p, p_rel + + def _fromto(self, p): + ''' + Returns `(from_, (to_abs, to_rel))`. + + If `p` is a string we convert to `(p, p)`. Otherwise we assert that + `p` is a tuple `(from_, to_)` where `from_` is str/bytes and `to_` is + str. If `from_` is a bytes it is contents of file to add, otherwise the + path of an existing file; non-absolute paths are assumed to be relative + to `self.root`. + + If `to_` is empty or `/` we set it to the leaf of `from_` (which must + be a str) - i.e. we place the file in the root directory of the wheel; + otherwise if `to_` ends with `/` we append the leaf of `from_` (which + must be a str). + + If `to_` starts with `$dist-info/`, we replace this with + `self._dist_info_dir()`. + + If `to_` starts with `$data/`, we replace this with + `{self.name}-{self.version}.data/`. + + We assert that `to_abs` is `within self.root`. + + `to_rel` is derived from the `to_abs` and is relative to self.root`. + ''' + ret = None + if isinstance(p, str): + p = p, p + assert isinstance(p, tuple) and len(p) == 2 + + from_, to_ = p + assert isinstance(from_, (str, bytes)) + assert isinstance(to_, str) + if to_ == '/' or to_ == '': + to_ = os.path.basename(from_) + elif to_.endswith('/'): + to_ += os.path.basename(from_) + prefix = '$dist-info/' + if to_.startswith( prefix): + to_ = f'{self._dist_info_dir()}/{to_[ len(prefix):]}' + prefix = '$data/' + if to_.startswith( prefix): + to_ = f'{_normalise2(self.name)}-{self.version}.data/{to_[ len(prefix):]}' + if isinstance(from_, str): + from_, _ = self._path_relative_to_root( from_, assert_within_root=False) + to_ = self._path_relative_to_root(to_) + assert isinstance(from_, (str, bytes)) + log2(f'returning {from_=} {to_=}') + return from_, to_ + +_extensions_to_py_limited_api = dict() + +def build_extension( + name, + path_i, + outdir, + *, + builddir=None, + includes=None, + defines=None, + libpaths=None, + libs=None, + optimise=True, + debug=False, + compiler_extra='', + linker_extra='', + swig=None, + cpp=True, + source_extra=None, + prerequisites_swig=None, + prerequisites_compile=None, + prerequisites_link=None, + infer_swig_includes=True, + py_limited_api=False, + ): + ''' + Builds a Python extension module using SWIG. Works on Windows, Linux, MacOS + and OpenBSD. + + On Unix, sets rpath when linking shared libraries. + + Args: + name: + Name of generated extension module. + path_i: + Path of input SWIG `.i` file. Internally we use swig to generate a + corresponding `.c` or `.cpp` file. + outdir: + Output directory for generated files: + + * `{outdir}/{name}.py` + * `{outdir}/_{name}.so` # Unix + * `{outdir}/_{name}.*.pyd` # Windows + We return the leafname of the `.so` or `.pyd` file. + builddir: + Where to put intermediate files, for example the .cpp file + generated by swig and `.d` dependency files. Default is `outdir`. + includes: + A string, or a sequence of extra include directories to be prefixed + with `-I`. + defines: + A string, or a sequence of extra preprocessor defines to be + prefixed with `-D`. + libpaths + A string, or a sequence of library paths to be prefixed with + `/LIBPATH:` on Windows or `-L` on Unix. + libs + A string, or a sequence of library names. Each item is prefixed + with `-l` on non-Windows. + optimise: + Whether to use compiler optimisations and define NDEBUG. + debug: + Whether to build with debug symbols. + compiler_extra: + Extra compiler flags. Can be None. + linker_extra: + Extra linker flags. Can be None. + swig: + Swig command; if false we use 'swig'. + cpp: + If true we tell SWIG to generate C++ code instead of C. + source_extra: + Extra source files to build into the shared library, + prerequisites_swig: + prerequisites_compile: + prerequisites_link: + + [These are mainly for use on Windows. On other systems we + automatically generate dynamic dependencies using swig/compile/link + commands' `-MD` and `-MF` args.] + + Sequences of extra input files/directories that should force + running of swig, compile or link commands if they are newer than + any existing generated SWIG `.i` file, compiled object file or + shared library file. + + If present, the first occurrence of `True` or `False` forces re-run + or no re-run. Any occurrence of None is ignored. If an item is a + directory path we look for newest file within the directory tree. + + If not a sequence, we convert into a single-item list. + + prerequisites_swig + + We use swig's -MD and -MF args to generate dynamic dependencies + automatically, so this is not usually required. + + prerequisites_compile + prerequisites_link + + On non-Windows we use cc's -MF and -MF args to generate dynamic + dependencies so this is not usually required. + infer_swig_includes: + If true, we extract `-I` and `-I ` args from + `compile_extra` (also `/I` on windows) and use them with swig so + that it can see the same header files as C/C++. This is useful + when using enviromment variables such as `CC` and `CXX` to set + `compile_extra`. + py_limited_api: + If true we build for current Python's limited API / stable ABI. + + Note that we will assert false if this extension is added to a + pipcl.Package that has a different , because + on Windows importing a non-py_limited_api extension inside a + py_limited=True package fails. + + Returns the leafname of the generated library file within `outdir`, e.g. + `_{name}.so` on Unix or `_{name}.cp311-win_amd64.pyd` on Windows. + ''' + if compiler_extra is None: + compiler_extra = '' + if linker_extra is None: + linker_extra = '' + if builddir is None: + builddir = outdir + if not swig: + swig = 'swig' + + if source_extra is None: + source_extra = list() + if isinstance(source_extra, str): + source_extra = [source_extra] + + includes_text = _flags( includes, '-I') + defines_text = _flags( defines, '-D') + libpaths_text = _flags( libpaths, '/LIBPATH:', '"') if windows() else _flags( libpaths, '-L') + libs_text = _flags( libs, '' if windows() else '-l') + path_cpp = f'{builddir}/{os.path.basename(path_i)}' + path_cpp += '.cpp' if cpp else '.c' + os.makedirs( outdir, exist_ok=True) + + # Run SWIG. + # + if infer_swig_includes: + # Extract include flags from `compiler_extra`. + swig_includes_extra = '' + compiler_extra_items = shlex.split(compiler_extra) + i = 0 + while i < len(compiler_extra_items): + item = compiler_extra_items[i] + # Swig doesn't seem to like a space after `I`. + if item == '-I' or (windows() and item == '/I'): + swig_includes_extra += f' -I{compiler_extra_items[i+1]}' + i += 1 + elif item.startswith('-I') or (windows() and item.startswith('/I')): + swig_includes_extra += f' -I{compiler_extra_items[i][2:]}' + i += 1 + swig_includes_extra = swig_includes_extra.strip() + deps_path = f'{path_cpp}.d' + prerequisites_swig2 = _get_prerequisites( deps_path) + run_if( + f''' + {swig} + -Wall + {"-c++" if cpp else ""} + -python + -module {name} + -outdir {outdir} + -o {path_cpp} + -MD -MF {deps_path} + {includes_text} + {swig_includes_extra} + {path_i} + ''' + , + path_cpp, + path_i, + prerequisites_swig, + prerequisites_swig2, + ) + + if pyodide(): + so_suffix = '.so' + log0(f'pyodide: PEP-3149 suffix untested, so omitting. {_so_suffix()=}.') + else: + so_suffix = _so_suffix(use_so_versioning = not py_limited_api) + path_so_leaf = f'_{name}{so_suffix}' + path_so = f'{outdir}/{path_so_leaf}' + + py_limited_api2 = current_py_limited_api() if py_limited_api else None + + compiler_command, pythonflags = base_compiler(cpp=cpp) + linker_command, _ = base_linker(cpp=cpp) + # setuptools on Linux seems to use slightly different compile flags: + # + # -fwrapv -O3 -Wall -O2 -g0 -DPY_CALL_TRAMPOLINE + # + + general_flags = '' + if windows(): + permissive = '/permissive-' + EHsc = '/EHsc' + T = '/Tp' if cpp else '/Tc' + optimise2 = '/DNDEBUG /O2' if optimise else '/D_DEBUG' + debug2 = '/Zi' if debug else '' + py_limited_api3 = f'/DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else '' + + else: + if debug: + general_flags += '/Zi' if windows() else ' -g' + if optimise: + general_flags += ' /DNDEBUG /O2' if windows() else ' -O2 -DNDEBUG' + + py_limited_api3 = f'-DPy_LIMITED_API={py_limited_api2}' if py_limited_api2 else '' + + if windows(): + pass + elif darwin(): + # MacOS's linker does not like `-z origin`. + rpath_flag = "-Wl,-rpath,@loader_path/" + # Avoid `Undefined symbols for ... "_PyArg_UnpackTuple" ...'. + general_flags += ' -undefined dynamic_lookup' + elif pyodide(): + # Setting `-Wl,-rpath,'$ORIGIN',-z,origin` gives: + # emcc: warning: ignoring unsupported linker flag: `-rpath` [-Wlinkflags] + # wasm-ld: error: unknown -z value: origin + # + rpath_flag = "-Wl,-rpath,'$ORIGIN'" + else: + rpath_flag = "-Wl,-rpath,'$ORIGIN',-z,origin" + + # Fun fact - on Linux, if the -L and -l options are before '{path_cpp}' + # they seem to be ignored... + # + path_os = list() + + for path_source in [path_cpp] + source_extra: + path_o = f'{path_source}.obj' if windows() else f'{path_source}.o' + path_os.append(f' {path_o}') + + prerequisites_path = f'{path_o}.d' + + if windows(): + compiler_command2 = f''' + {compiler_command} + # General: + /c # Compiles without linking. + {EHsc} # Enable "Standard C++ exception handling". + + #/MD # Creates a multithreaded DLL using MSVCRT.lib. + {'/MDd' if debug else '/MD'} + + # Input/output files: + {T}{path_source} # /Tp specifies C++ source file. + /Fo{path_o} # Output file. codespell:ignore + + # Include paths: + {includes_text} + {pythonflags.includes} # Include path for Python headers. + + # Code generation: + {optimise2} + {debug2} + {permissive} # Set standard-conformance mode. + + # Diagnostics: + #/FC # Display full path of source code files passed to cl.exe in diagnostic text. + /W3 # Sets which warning level to output. /W3 is IDE default. + /diagnostics:caret # Controls the format of diagnostic messages. + /nologo # + + {defines_text} + {compiler_extra} + + {py_limited_api3} + ''' + + else: + compiler_command2 = f''' + {compiler_command} + -fPIC + {general_flags.strip()} + {pythonflags.includes} + {includes_text} + {defines_text} + -MD -MF {prerequisites_path} + -c {path_source} + -o {path_o} + {compiler_extra} + {py_limited_api3} + ''' + run_if( + compiler_command2, + path_o, + path_source, + [path_source] + _get_prerequisites(prerequisites_path), + ) + + # Link + prerequisites_path = f'{path_so}.d' + if windows(): + debug2 = '/DEBUG' if debug else '' + base, _ = os.path.splitext(path_so_leaf) + command2 = f''' + {linker_command} + /DLL # Builds a DLL. + /EXPORT:PyInit__{name} # Exports a function. + /IMPLIB:{base}.lib # Overrides the default import library name. + {libpaths_text} + {pythonflags.ldflags} + /OUT:{path_so} # Specifies the output file name. + {debug2} + /nologo + {libs_text} + {' '.join(path_os)} + {linker_extra} + ''' + elif pyodide(): + command2 = f''' + {linker_command} + -MD -MF {prerequisites_path} + -o {path_so} + {' '.join(path_os)} + {libpaths_text} + {libs_text} + {linker_extra} + {pythonflags.ldflags} + {rpath_flag} + ''' + else: + command2 = f''' + {linker_command} + -shared + {general_flags.strip()} + -MD -MF {prerequisites_path} + -o {path_so} + {' '.join(path_os)} + {libpaths_text} + {libs_text} + {linker_extra} + {pythonflags.ldflags} + {rpath_flag} + {py_limited_api3} + ''' + link_was_run = run_if( + command2, + path_so, + path_cpp, + *path_os, + *_get_prerequisites(f'{path_so}.d'), + ) + + if link_was_run and darwin(): + # We need to patch up references to shared libraries in `libs`. + sublibraries = list() + for lib in () if libs is None else libs: + for libpath in libpaths: + found = list() + for suffix in '.so', '.dylib': + path = f'{libpath}/lib{os.path.basename(lib)}{suffix}' + if os.path.exists( path): + found.append( path) + if found: + assert len(found) == 1, f'More than one file matches lib={lib!r}: {found}' + sublibraries.append( found[0]) + break + else: + log2(f'Warning: can not find path of lib={lib!r} in libpaths={libpaths}') + macos_patch( path_so, *sublibraries) + + #run(f'ls -l {path_so}', check=0) + #run(f'file {path_so}', check=0) + + _extensions_to_py_limited_api[os.path.abspath(path_so)] = py_limited_api + + return path_so_leaf + + +# Functions that might be useful. +# + + +def base_compiler(vs=None, pythonflags=None, cpp=False, use_env=True): + ''' + Returns basic compiler command and PythonFlags. + + Args: + vs: + Windows only. A `wdev.WindowsVS` instance or None to use default + `wdev.WindowsVS` instance. + pythonflags: + A `pipcl.PythonFlags` instance or None to use default + `pipcl.PythonFlags` instance. + cpp: + If true we return C++ compiler command instead of C. On Windows + this has no effect - we always return `cl.exe`. + use_env: + If true we return '$CC' or '$CXX' if the corresponding + environmental variable is set (without evaluating with `getenv()` + or `os.environ`). + + Returns `(cc, pythonflags)`: + cc: + C or C++ command. On Windows this is of the form + `{vs.vcvars}&&{vs.cl}`; otherwise it is typically `cc` or `c++`. + pythonflags: + The `pythonflags` arg or a new `pipcl.PythonFlags` instance. + ''' + if not pythonflags: + pythonflags = PythonFlags() + cc = None + if use_env: + if cpp: + if os.environ.get( 'CXX'): + cc = '$CXX' + else: + if os.environ.get( 'CC'): + cc = '$CC' + if cc: + pass + elif windows(): + if not vs: + vs = wdev.WindowsVS() + cc = f'"{vs.vcvars}"&&"{vs.cl}"' + elif wasm(): + cc = 'em++' if cpp else 'emcc' + else: + cc = 'c++' if cpp else 'cc' + cc = macos_add_cross_flags( cc) + return cc, pythonflags + + +def base_linker(vs=None, pythonflags=None, cpp=False, use_env=True): + ''' + Returns basic linker command. + + Args: + vs: + Windows only. A `wdev.WindowsVS` instance or None to use default + `wdev.WindowsVS` instance. + pythonflags: + A `pipcl.PythonFlags` instance or None to use default + `pipcl.PythonFlags` instance. + cpp: + If true we return C++ linker command instead of C. On Windows this + has no effect - we always return `link.exe`. + use_env: + If true we use `os.environ['LD']` if set. + + Returns `(linker, pythonflags)`: + linker: + Linker command. On Windows this is of the form + `{vs.vcvars}&&{vs.link}`; otherwise it is typically `cc` or `c++`. + pythonflags: + The `pythonflags` arg or a new `pipcl.PythonFlags` instance. + ''' + if not pythonflags: + pythonflags = PythonFlags() + linker = None + if use_env: + if os.environ.get( 'LD'): + linker = '$LD' + if linker: + pass + elif windows(): + if not vs: + vs = wdev.WindowsVS() + linker = f'"{vs.vcvars}"&&"{vs.link}"' + elif wasm(): + linker = 'em++' if cpp else 'emcc' + else: + linker = 'c++' if cpp else 'cc' + linker = macos_add_cross_flags( linker) + return linker, pythonflags + + +def git_info( directory): + ''' + Returns `(sha, comment, diff, branch)`, all items are str or None if not + available. + + directory: + Root of git checkout. + ''' + sha, comment, diff, branch = None, None, None, None + e, out = run( + f'cd {directory} && (PAGER= git show --pretty=oneline|head -n 1 && git diff)', + capture=1, + check=0 + ) + if not e: + sha, _ = out.split(' ', 1) + comment, diff = _.split('\n', 1) + e, out = run( + f'cd {directory} && git rev-parse --abbrev-ref HEAD', + capture=1, + check=0 + ) + if not e: + branch = out.strip() + log1(f'git_info(): directory={directory!r} returning branch={branch!r} sha={sha!r} comment={comment!r}') + return sha, comment, diff, branch + + +def git_items( directory, submodules=False): + ''' + Returns list of paths for all files known to git within a `directory`. + + Args: + directory: + Must be somewhere within a git checkout. + submodules: + If true we also include git submodules. + + Returns: + A list of paths for all files known to git within `directory`. Each + path is relative to `directory`. `directory` must be somewhere within a + git checkout. + + We run a `git ls-files` command internally. + + This function can be useful for the `fn_sdist()` callback. + ''' + command = 'cd ' + directory + ' && git ls-files' + if submodules: + command += ' --recurse-submodules' + log1(f'Running {command=}') + text = subprocess.check_output( command, shell=True) + ret = [] + for path in text.decode('utf8').strip().split( '\n'): + path2 = os.path.join(directory, path) + # Sometimes git ls-files seems to list empty/non-existent directories + # within submodules. + # + if not os.path.exists(path2): + log2(f'Ignoring git ls-files item that does not exist: {path2}') + elif os.path.isdir(path2): + log2(f'Ignoring git ls-files item that is actually a directory: {path2}') + else: + ret.append(path) + return ret + + +def git_get( + local, + *, + remote=None, + branch=None, + tag=None, + text=None, + depth=1, + env_extra=None, + update=True, + submodules=True, + ): + ''' + Creates/updates local checkout of remote repository and returns + absolute path of . + + If is set but does not start with 'git:', it is assumed to be an up + to date local checkout, and we return absolute path of without doing + any git operations. + + Args: + local: + Local directory. Created and/or updated using `git clone` and `git + fetch` etc. + remote: + Remote git repostitory, for example + 'https://github.com/ArtifexSoftware/mupdf.git'. Can be overridden + by . + branch: + Branch to use; can be overridden by . + tag: + Tag to use; can be overridden by . + text: + If None or empty: + Ignored. + + If starts with 'git:': + The remaining text should be a command-line + style string containing some or all of these args: + --branch + --tag + + These overrides , and . + Otherwise: + is assumed to be a local directory, and we simply return + it as an absolute path without doing any git operations. + + For example these all clone/update/branch master of https://foo.bar/qwerty.git to local + checkout 'foo-local': + + git_get('foo-local', remote='https://foo.bar/qwerty.git', branch='master') + git_get('foo-local', text='git:--branch master https://foo.bar/qwerty.git') + git_get('foo-local', text='git:--branch master', remote='https://foo.bar/qwerty.git') + git_get('foo-local', text='git:', branch='master', remote='https://foo.bar/qwerty.git') + depth: + Depth of local checkout when cloning and fetching, or None. + env_extra: + Dict of extra name=value environment variables to use whenever we + run git. + update: + If false we do not update existing repository. Might be useful if + testing without network access. + submodules: + If true, we clone with `--recursive --shallow-submodules` and run + `git submodule update --init --recursive` before returning. + ''' + log0(f'{remote=} {local=} {branch=} {tag=} {text=}') + + if text: + if text.startswith('git:'): + args = iter(shlex.split(text[len('git:'):])) + while 1: + try: + arg = next(args) + except StopIteration: + break + if arg == '--branch': + branch = next(args) + tag = None + elif arg == '--tag': + tag = next(args) + branch = None + else: + remote = arg + assert remote, f' unset and no remote specified in {text=}.' + assert branch or tag, f' and unset and no branch/tag specified in {text=}.' + else: + log0(f'Using local directory {text!r}.') + return os.path.abspath(text) + + assert (branch and not tag) or (not branch and tag), f'Must specify exactly one of and ; {branch=} {tag=}.' + + depth_arg = f' --depth {depth}' if depth else '' + + def do_update(): + # This seems to pull in the entire repository. + log0(f'do_update(): attempting to update {local=}.') + # Remove any local changes. + run(f'cd {local} && git reset --hard', env_extra=env_extra) + if tag: + # `-u` avoids `fatal: Refusing to fetch into current branch`. + # Using '+' and `revs/tags/` prefix seems to avoid errors like: + # error: cannot update ref 'refs/heads/v3.16.44': + # trying to write non-commit object + # 06c4ae5fe39a03b37a25a8b95214d9f8f8a867b8 to branch + # 'refs/heads/v3.16.44' + # + run(f'cd {local} && git fetch -fuv{depth_arg} {remote} +refs/tags/{tag}:refs/tags/{tag}', env_extra=env_extra) + run(f'cd {local} && git checkout {tag}', env_extra=env_extra) + if branch: + # `-u` avoids `fatal: Refusing to fetch into current branch`. + run(f'cd {local} && git fetch -fuv{depth_arg} {remote} {branch}:{branch}', env_extra=env_extra) + run(f'cd {local} && git checkout {branch}', env_extra=env_extra) + + do_clone = True + if os.path.isdir(f'{local}/.git'): + if update: + # Try to update existing checkout. + try: + do_update() + do_clone = False + except Exception as e: + log0(f'Failed to update existing checkout {local}: {e}') + else: + do_clone = False + + if do_clone: + # No existing git checkout, so do a fresh clone. + #_fs_remove(local) + log0(f'Cloning to: {local}') + command = f'git clone --config core.longpaths=true{depth_arg}' + if submodules: + command += f' --recursive --shallow-submodules' + if branch: + command += f' -b {branch}' + if tag: + command += f' -b {tag}' + command += f' {remote} {local}' + run(command, env_extra=env_extra) + do_update() + + if submodules: + run(f'cd {local} && git submodule update --init --recursive', env_extra=env_extra) + + # Show sha of checkout. + run( f'cd {local} && git show --pretty=oneline|head -n 1', check=False) + return os.path.abspath(local) + + +def run( + command, + *, + capture=False, + check=1, + verbose=1, + env=None, + env_extra=None, + timeout=None, + caller=1, + prefix=None, + encoding=None, # System default. + errors='backslashreplace', + ): + ''' + Runs a command using `subprocess.run()`. + + Args: + command: + A string, the command to run. + + Multiple lines in `command` are treated as a single command. + + * If a line starts with `#` it is discarded. + * If a line contains ` #`, the trailing text is discarded. + + When running the command on Windows, newlines are replaced by + spaces; otherwise each line is terminated by a backslash character. + capture: + If true, we include the command's output in our return value. + check: + If true we raise an exception on error; otherwise we include the + command's returncode in our return value. + verbose: + If true we show the command. + env: + None or dict to use instead of . + env_extra: + None or dict to add to or . + timeout: + If not None, timeout in seconds; passed directly to + subprocess.run(). Note that on MacOS subprocess.run() seems to + leave processes running if timeout expires. + prefix: + String prefix for each line of output. + + If true: + * We run command with stdout=subprocess.PIPE and + stderr=subprocess.STDOUT, repetaedly reading the command's output + and writing it to stdout with . + * We do not support , which must be None. + Returns: + check capture Return + -------------------------- + false false returncode + false true (returncode, output) + true false None or raise exception + true true output or raise exception + ''' + if env is None: + env = os.environ + if env_extra: + env = env.copy() + if env_extra: + env.update(env_extra) + lines = _command_lines( command) + if verbose: + text = f'Running:' + nl = '\n ' + text += f' {nl.join(lines)}' + if env_extra: + text += f'\nwith:\n' + for k in sorted(env_extra.keys()): + text += f' {k}={shlex.quote(env_extra[k])}\n' + log1(text, caller=caller+1) + sep = ' ' if windows() else ' \\\n' + command2 = sep.join( lines) + + if prefix: + assert not timeout, f'Timeout not supported with prefix.' + child = subprocess.Popen( + command2, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding=encoding, + errors=errors, + env=env, + ) + if capture: + capture_text = '' + decoder = codecs.getincrementaldecoder(child.stdout.encoding)(errors) + line_start = True + + while 1: + raw = os.read( child.stdout.fileno(), 10000) + text = decoder.decode(raw, final=not raw) + if capture: + capture_text += text + lines = text.split('\n') + for i, line in enumerate(lines): + if line_start: + sys.stdout.write(prefix) + line_start = False + sys.stdout.write(line) + if i < len(lines) - 1: + sys.stdout.write('\n') + line_start = True + sys.stdout.flush() + if not raw: + break + if not line_start: + sys.stdout.write('\n') + e = child.wait() + if check and e: + raise subprocess.CalledProcessError(e, command2, capture_text if capture else None) + if check: + return capture_text if capture else None + else: + return (e, capture_text) if capture else e + else: + cp = subprocess.run( + command2, + shell=True, + stdout=subprocess.PIPE if capture else None, + stderr=subprocess.STDOUT if capture else None, + check=check, + encoding=encoding, + errors=errors, + env=env, + timeout=timeout, + ) + if check: + return cp.stdout if capture else None + else: + return (cp.returncode, cp.stdout) if capture else cp.returncode + + +def darwin(): + return sys.platform.startswith( 'darwin') + +def windows(): + return platform.system() == 'Windows' + +def wasm(): + return os.environ.get( 'OS') in ('wasm', 'wasm-mt') + +def pyodide(): + return os.environ.get( 'PYODIDE') == '1' + +def linux(): + return platform.system() == 'Linux' + +def openbsd(): + return platform.system() == 'OpenBSD' + + +def show_system(): + ''' + Show useful information about the system plus argv and environ. + ''' + def log(text): + log0(text, caller=3) + + #log(f'{__file__=}') + #log(f'{__name__=}') + log(f'{os.getcwd()=}') + log(f'{platform.machine()=}') + log(f'{platform.platform()=}') + log(f'{platform.python_implementation()=}') + log(f'{platform.python_version()=}') + log(f'{platform.system()=}') + if sys.implementation.name != 'graalpy': + log(f'{platform.uname()=}') + log(f'{sys.executable=}') + log(f'{sys.version=}') + log(f'{sys.version_info=}') + log(f'{list(sys.version_info)=}') + + log(f'CPU bits: {cpu_bits()}') + + log(f'sys.argv ({len(sys.argv)}):') + for i, arg in enumerate(sys.argv): + log(f' {i}: {arg!r}') + + log(f'os.environ ({len(os.environ)}):') + for k in sorted( os.environ.keys()): + v = os.environ[ k] + log( f' {k}: {v!r}') + + +class PythonFlags: + ''' + Compile/link flags for the current python, for example the include path + needed to get `Python.h`. + + The 'PIPCL_PYTHON_CONFIG' environment variable allows to override + the location of the python-config executable. + + Members: + .includes: + String containing compiler flags for include paths. + .ldflags: + String containing linker flags for library paths. + ''' + def __init__(self): + + # Experimental detection of python flags from sysconfig.*() instead of + # python-config command. + includes_, ldflags_ = sysconfig_python_flags() + + if pyodide(): + _include_dir = os.environ[ 'PYO3_CROSS_INCLUDE_DIR'] + _lib_dir = os.environ[ 'PYO3_CROSS_LIB_DIR'] + self.includes = f'-I {_include_dir}' + self.ldflags = f'-L {_lib_dir}' + + elif 0: + + self.includes = includes_ + self.ldflags = ldflags_ + + elif windows(): + wp = wdev.WindowsPython() + self.includes = f'/I"{wp.include}"' + self.ldflags = f'/LIBPATH:"{wp.libs}"' + + elif pyodide(): + _include_dir = os.environ[ 'PYO3_CROSS_INCLUDE_DIR'] + _lib_dir = os.environ[ 'PYO3_CROSS_LIB_DIR'] + self.includes = f'-I {_include_dir}' + self.ldflags = f'-L {_lib_dir}' + + else: + python_config = os.environ.get("PIPCL_PYTHON_CONFIG") + if not python_config: + # We use python-config which appears to work better than pkg-config + # because it copes with multiple installed python's, e.g. + # manylinux_2014's /opt/python/cp*-cp*/bin/python*. + # + # But... on non-macos it seems that we should not attempt to specify + # libpython on the link command. The manylinux docker containers + # don't actually contain libpython.so, and it seems that this + # deliberate. And the link command runs ok. + # + python_exe = os.path.realpath( sys.executable) + if darwin(): + # Basic install of dev tools with `xcode-select --install` doesn't + # seem to provide a `python3-config` or similar, but there is a + # `python-config.py` accessible via sysconfig. + # + # We try different possibilities and use the last one that + # works. + # + python_config = None + for pc in ( + f'python3-config', + f'{sys.executable} {sysconfig.get_config_var("srcdir")}/python-config.py', + f'{python_exe}-config', + ): + e = subprocess.run( + f'{pc} --includes', + shell=1, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=0, + ).returncode + log2(f'{e=} from {pc!r}.') + if e == 0: + python_config = pc + assert python_config, f'Cannot find python-config' + else: + python_config = f'{python_exe}-config' + log2(f'Using {python_config=}.') + try: + self.includes = run( f'{python_config} --includes', capture=1, verbose=0).strip() + except Exception as e: + raise Exception('We require python development tools to be installed.') from e + self.ldflags = run( f'{python_config} --ldflags', capture=1, verbose=0).strip() + if linux(): + # It seems that with python-3.10 on Linux, we can get an + # incorrect -lcrypt flag that on some systems (e.g. WSL) + # causes: + # + # ImportError: libcrypt.so.2: cannot open shared object file: No such file or directory + # + ldflags2 = self.ldflags.replace(' -lcrypt ', ' ') + if ldflags2 != self.ldflags: + log2(f'### Have removed `-lcrypt` from ldflags: {self.ldflags!r} -> {ldflags2!r}') + self.ldflags = ldflags2 + + if 0: + log1(f'{self.includes=}') + log1(f' {includes_=}') + log1(f'{self.ldflags=}') + log1(f' {ldflags_=}') + + +def macos_add_cross_flags(command): + ''' + If running on MacOS and environment variables ARCHFLAGS is set + (indicating we are cross-building, e.g. for arm64), returns + `command` with extra flags appended. Otherwise returns unchanged + `command`. + ''' + if darwin(): + archflags = os.environ.get( 'ARCHFLAGS') + if archflags: + command = f'{command} {archflags}' + log2(f'Appending ARCHFLAGS to command: {command}') + return command + return command + + +def macos_patch( library, *sublibraries): + ''' + If running on MacOS, patches `library` so that all references to items in + `sublibraries` are changed to `@rpath/{leafname}`. Does nothing on other + platforms. + + library: + Path of shared library. + sublibraries: + List of paths of shared libraries; these have typically been + specified with `-l` when `library` was created. + ''' + log2( f'macos_patch(): library={library} sublibraries={sublibraries}') + if not darwin(): + return + if not sublibraries: + return + subprocess.run( f'otool -L {library}', shell=1, check=1) + command = 'install_name_tool' + names = [] + for sublibrary in sublibraries: + name = subprocess.run( + f'otool -D {sublibrary}', + shell=1, + check=1, + capture_output=1, + encoding='utf8', + ).stdout.strip() + name = name.split('\n') + assert len(name) == 2 and name[0] == f'{sublibrary}:', f'{name=}' + name = name[1] + # strip trailing so_name. + leaf = os.path.basename(name) + m = re.match('^(.+[.]((so)|(dylib)))[0-9.]*$', leaf) + assert m + log2(f'Changing {leaf=} to {m.group(1)}') + leaf = m.group(1) + command += f' -change {name} @rpath/{leaf}' + command += f' {library}' + log2( f'Running: {command}') + subprocess.run( command, shell=1, check=1) + subprocess.run( f'otool -L {library}', shell=1, check=1) + + +def _macos_fixup_platform_tag(tag): + ''' + Patch up platform tag on MacOS. + + E.g. `foo-1.2.3-cp311-none-macosx_13_x86_64.whl` causes `pip` to fail with: + `not a supported wheel on this platform`. We seem to need to add `_0` to + the OS version. (This is documented at + https://packaging.python.org/en/latest/specifications/platform-compatibility-tags/#macos). + + And with graal we need to replace trailing `universal2` with x86_64 + or arm64. On non-graal this causes problems because non-universal + platform tags seem more restricted than platform tags from + sysconfig.get_platform(). For example: + + pip install ...-macosx_10_13_arm64.whl + ERROR: ...-macosx_10_13_arm64.whl is not a supported wheel on this platform. + pip install ...-macosx_10_13_universal2.whl + Ok. + ''' + m = re.match( '^macosx_([0-9_]+)_([^0-9].+)$', tag) + if not m: + return tag + a = m.group(1) + if '_' not in a: + a += '_0' + b = m.group(2) + if sys.implementation.name == 'graalpy' and b == 'universal2': + # Replace 'universal2' with x86_64 or arm64. + b = platform.machine() + ret = f'macosx_{a}_{b}' + #log0(f'Changing from {tag=} to {ret=}.') + return ret + + +# Internal helpers. +# + +def _command_lines( command): + ''' + Process multiline command by running through `textwrap.dedent()`, removes + comments (lines starting with `#` or ` #` until end of line), removes + entirely blank lines. + + Returns list of lines. + ''' + command = textwrap.dedent( command) + lines = [] + for line in command.split( '\n'): + if line.startswith( '#'): + h = 0 + else: + h = line.find( ' #') + if h >= 0: + line = line[:h] + if line.strip(): + lines.append(line.rstrip()) + return lines + + +def cpu_bits(): + return int.bit_length(sys.maxsize+1) + + +def _cpu_name(): + ''' + Returns `x32` or `x64` depending on Python build. + ''' + #log(f'sys.maxsize={hex(sys.maxsize)}') + return f'x{32 if sys.maxsize == 2**31 - 1 else 64}' + + +def run_if( command, out, *prerequisites, caller=1): + ''' + Runs a command only if the output file is not up to date. + + Args: + command: + The command to run. We write this into a file .cmd so that we + know to run a command if the command itself has changed. + out: + Path of the output file. + + prerequisites: + List of prerequisite paths or true/false/None items. If an item + is None it is ignored, otherwise if an item is not a string we + immediately return it cast to a bool. + + Returns: + True if we ran the command, otherwise None. + + + If the output file does not exist, the command is run: + + >>> verbose(1) + 1 + >>> log_line_numbers(0) + >>> out = 'run_if_test_out' + >>> if os.path.exists( out): + ... os.remove( out) + >>> if os.path.exists( f'{out}.cmd'): + ... os.remove( f'{out}.cmd') + >>> run_if( f'touch {out}', out, caller=0) + pipcl.py:run_if(): Running command because: File does not exist: 'run_if_test_out' + pipcl.py:run_if(): Running: touch run_if_test_out + True + + If we repeat, the output file will be up to date so the command is not run: + + >>> run_if( f'touch {out}', out, caller=0) + pipcl.py:run_if(): Not running command because up to date: 'run_if_test_out' + + If we change the command, the command is run: + + >>> run_if( f'touch {out};', out, caller=0) + pipcl.py:run_if(): Running command because: Command has changed: + pipcl.py:run_if(): @@ -1,2 +1,2 @@ + pipcl.py:run_if(): touch + pipcl.py:run_if(): -run_if_test_out + pipcl.py:run_if(): +run_if_test_out; + pipcl.py:run_if(): + pipcl.py:run_if(): Running: touch run_if_test_out; + True + + If we add a prerequisite that is newer than the output, the command is run: + + >>> time.sleep(1) + >>> prerequisite = 'run_if_test_prerequisite' + >>> run( f'touch {prerequisite}', caller=0) + pipcl.py:run(): Running: touch run_if_test_prerequisite + >>> run_if( f'touch {out}', out, prerequisite, caller=0) + pipcl.py:run_if(): Running command because: Command has changed: + pipcl.py:run_if(): @@ -1,2 +1,2 @@ + pipcl.py:run_if(): touch + pipcl.py:run_if(): -run_if_test_out; + pipcl.py:run_if(): +run_if_test_out + pipcl.py:run_if(): + pipcl.py:run_if(): Running: touch run_if_test_out + True + + If we repeat, the output will be newer than the prerequisite, so the + command is not run: + + >>> run_if( f'touch {out}', out, prerequisite, caller=0) + pipcl.py:run_if(): Not running command because up to date: 'run_if_test_out' + ''' + doit = False + cmd_path = f'{out}.cmd' + + if not doit: + out_mtime = _fs_mtime( out) + if out_mtime == 0: + doit = f'File does not exist: {out!r}' + + if not doit: + if os.path.isfile( cmd_path): + with open( cmd_path) as f: + cmd = f.read() + else: + cmd = None + cmd_args = shlex.split(cmd or '') + command_args = shlex.split(command or '') + if command_args != cmd_args: + if cmd is None: + doit = 'No previous command stored' + else: + doit = f'Command has changed' + if 0: + doit += f':\n {cmd!r}\n {command!r}' + if 0: + doit += f'\nbefore:\n' + doit += textwrap.indent(cmd, ' ') + doit += f'\nafter:\n' + doit += textwrap.indent(command, ' ') + if 1: + # Show diff based on commands split into pseudo lines by + # shlex.split(). + doit += ':\n' + lines = difflib.unified_diff( + cmd.split(), + command.split(), + lineterm='', + ) + # Skip initial lines. + assert next(lines) == '--- ' + assert next(lines) == '+++ ' + for line in lines: + doit += f' {line}\n' + + if not doit: + # See whether any prerequisites are newer than target. + def _make_prerequisites(p): + if isinstance( p, (list, tuple)): + return list(p) + else: + return [p] + prerequisites_all = list() + for p in prerequisites: + prerequisites_all += _make_prerequisites( p) + if 0: + log2( 'prerequisites_all:', caller=caller+1) + for i in prerequisites_all: + log2( f' {i!r}', caller=caller+1) + pre_mtime = 0 + pre_path = None + for prerequisite in prerequisites_all: + if isinstance( prerequisite, str): + mtime = _fs_mtime_newest( prerequisite) + if mtime >= pre_mtime: + pre_mtime = mtime + pre_path = prerequisite + elif prerequisite is None: + pass + elif prerequisite: + doit = str(prerequisite) + break + if not doit: + if pre_mtime > out_mtime: + doit = f'Prerequisite is new: {os.path.abspath(pre_path)!r}' + + if doit: + # Remove `cmd_path` before we run the command, so any failure + # will force rerun next time. + # + try: + os.remove( cmd_path) + except Exception: + pass + log1( f'Running command because: {doit}', caller=caller+1) + + run( command, caller=caller+1) + + # Write the command we ran, into `cmd_path`. + with open( cmd_path, 'w') as f: + f.write( command) + return True + else: + log1( f'Not running command because up to date: {out!r}', caller=caller+1) + + if 0: + log2( f'out_mtime={time.ctime(out_mtime)} pre_mtime={time.ctime(pre_mtime)}.' + f' pre_path={pre_path!r}: returning {ret!r}.' + ) + + +def _get_prerequisites(path): + ''' + Returns list of prerequisites from Makefile-style dependency file, e.g. + created by `cc -MD -MF `. + ''' + ret = list() + if os.path.isfile(path): + with open(path) as f: + for line in f: + for item in line.split(): + if item.endswith( (':', '\\')): + continue + ret.append( item) + return ret + + +def _fs_mtime_newest( path): + ''' + path: + If a file, returns mtime of the file. If a directory, returns mtime of + newest file anywhere within directory tree. Otherwise returns 0. + ''' + ret = 0 + if os.path.isdir( path): + for dirpath, dirnames, filenames in os.walk( path): + for filename in filenames: + path = os.path.join( dirpath, filename) + ret = max( ret, _fs_mtime( path)) + else: + ret = _fs_mtime( path) + return ret + + +def _flags( items, prefix='', quote=''): + ''' + Turns sequence into string, prefixing/quoting each item. + ''' + if not items: + return '' + if isinstance( items, str): + items = items, + ret = '' + for item in items: + if ret: + ret += ' ' + ret += f'{prefix}{quote}{item}{quote}' + return ret.strip() + + +def _fs_mtime( filename, default=0): + ''' + Returns mtime of file, or `default` if error - e.g. doesn't exist. + ''' + try: + return os.path.getmtime( filename) + except OSError: + return default + + +def _normalise(name): + # https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization + return re.sub(r"[-_.]+", "-", name).lower() + + +def _normalise2(name): + # https://packaging.python.org/en/latest/specifications/binary-distribution-format/ + return _normalise(name).replace('-', '_') + + +def _assert_version_pep_440(version): + assert re.match( + r'^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$', + version, + ), \ + f'Bad version: {version!r}.' + + +g_verbose = int(os.environ.get('PIPCL_VERBOSE', '1')) + +def verbose(level=None): + ''' + Sets verbose level if `level` is not None. + Returns verbose level. + ''' + global g_verbose + if level is not None: + g_verbose = level + return g_verbose + +g_log_line_numbers = True + +def log_line_numbers(yes): + ''' + Sets whether to include line numbers; helps with doctest. + ''' + global g_log_line_numbers + g_log_line_numbers = bool(yes) + +def log(text='', caller=1): + _log(text, 0, caller+1) + +def log0(text='', caller=1): + _log(text, 0, caller+1) + +def log1(text='', caller=1): + _log(text, 1, caller+1) + +def log2(text='', caller=1): + _log(text, 2, caller+1) + +def _log(text, level, caller): + ''' + Logs lines with prefix, if is lower or equal to . + ''' + if level <= g_verbose: + fr = inspect.stack(context=0)[caller] + filename = relpath(fr.filename) + for line in text.split('\n'): + if g_log_line_numbers: + print(f'{filename}:{fr.lineno}:{fr.function}(): {line}', file=sys.stdout, flush=1) + else: + print(f'{filename}:{fr.function}(): {line}', file=sys.stdout, flush=1) + + +def relpath(path, start=None, allow_up=True): + ''' + A safe alternative to os.path.relpath(), avoiding an exception on Windows + if the drive needs to change - in this case we use os.path.abspath(). + + Args: + path: + Path to be processed. + start: + Start directory or current directory if None. + allow_up: + If false we return absolute path is is not within . + ''' + if windows(): + try: + ret = os.path.relpath(path, start) + except ValueError: + # os.path.relpath() fails if trying to change drives. + ret = os.path.abspath(path) + else: + ret = os.path.relpath(path, start) + if not allow_up and ret.startswith('../') or ret.startswith('..\\'): + ret = os.path.abspath(path) + return ret + + +def _so_suffix(use_so_versioning=True): + ''' + Filename suffix for shared libraries is defined in pep-3149. The + pep claims to only address posix systems, but the recommended + sysconfig.get_config_var('EXT_SUFFIX') also seems to give the + right string on Windows. + + If use_so_versioning is false, we return only the last component of + the suffix, which removes any version number, for example changing + `.cp312-win_amd64.pyd` to `.pyd`. + ''' + # Example values: + # linux: .cpython-311-x86_64-linux-gnu.so + # macos: .cpython-311-darwin.so + # openbsd: .cpython-310.so + # windows .cp311-win_amd64.pyd + # + # Only Linux and Windows seem to identify the cpu. For example shared + # libraries in numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl are called + # things like `numpy/core/_simd.cpython-311-darwin.so`. + # + ret = sysconfig.get_config_var('EXT_SUFFIX') + if not use_so_versioning: + # Use last component only. + ret = os.path.splitext(ret)[1] + return ret + + +def get_soname(path): + ''' + If we are on Linux and `path` is softlink and points to a shared library + for which `objdump -p` contains 'SONAME', return the pointee. Otherwise + return `path`. Useful if Linux shared libraries have been created with + `-Wl,-soname,...`, where we need to embed the versioned library. + ''' + if linux() and os.path.islink(path): + path2 = os.path.realpath(path) + if subprocess.run(f'objdump -p {path2}|grep SONAME', shell=1, check=0).returncode == 0: + return path2 + elif openbsd(): + # Return newest .so with version suffix. + sos = glob.glob(f'{path}.*') + log1(f'{sos=}') + sos2 = list() + for so in sos: + suffix = so[len(path):] + if not suffix or re.match('^[.][0-9.]*[0-9]$', suffix): + sos2.append(so) + sos2.sort(key=lambda p: os.path.getmtime(p)) + log1(f'{sos2=}') + return sos2[-1] + return path + + +def current_py_limited_api(): + ''' + Returns value of PyLIMITED_API to build for current Python. + ''' + a, b = map(int, platform.python_version().split('.')[:2]) + return f'0x{a:02x}{b:02x}0000' + + +def install_dir(root=None): + ''' + Returns install directory used by `install()`. + + This will be `sysconfig.get_path('platlib')`, modified by `root` if not + None. + ''' + # todo: for pure-python we should use sysconfig.get_path('purelib') ? + root2 = sysconfig.get_path('platlib') + if root: + if windows(): + # If we are in a venv, `sysconfig.get_path('platlib')` + # can be absolute, e.g. + # `C:\\...\\venv-pypackage-3.11.1-64\\Lib\\site-packages`, so it's + # not clear how to append it to `root`. So we just use `root`. + return root + else: + # E.g. if `root` is `install' and `sysconfig.get_path('platlib')` + # is `/usr/local/lib/python3.9/site-packages`, we set `root2` to + # `install/usr/local/lib/python3.9/site-packages`. + # + return os.path.join( root, root2.lstrip( os.sep)) + else: + return root2 + + +class _Record: + ''' + Internal - builds up text suitable for writing to a RECORD item, e.g. + within a wheel. + ''' + def __init__(self): + self.text = '' + + def add_content(self, content, to_, verbose=True): + if isinstance(content, str): + content = content.encode('utf8') + + # Specification for the line we write is supposed to be in + # https://packaging.python.org/en/latest/specifications/binary-distribution-format + # but it's not very clear. + # + h = hashlib.sha256(content) + digest = h.digest() + digest = base64.urlsafe_b64encode(digest) + digest = digest.rstrip(b'=') + digest = digest.decode('utf8') + + self.text += f'{to_},sha256={digest},{len(content)}\n' + if verbose: + log2(f'Adding {to_}') + + def add_file(self, from_, to_): + log1(f'Adding file: {os.path.relpath(from_)} => {to_}') + with open(from_, 'rb') as f: + content = f.read() + self.add_content(content, to_, verbose=False) + + def get(self, record_path=None): + ''' + Returns contents of the RECORD file. If `record_path` is + specified we append a final line `,,`; this can be + used to include the RECORD file itself in the contents, with + empty hash and size fields. + ''' + ret = self.text + if record_path: + ret += f'{record_path},,\n' + return ret + + +class NewFiles: + ''' + Detects new/modified/updated files matching a glob pattern. Useful for + detecting wheels created by pip or cubuildwheel etc. + ''' + def __init__(self, glob_pattern): + # Find current matches of . + self.glob_pattern = glob_pattern + self.items0 = self._items() + def get(self): + ''' + Returns list of new matches of - paths of files that + were not present previously, or have different mtimes or have different + contents. + ''' + ret = list() + items = self._items() + for path, id_ in items.items(): + id0 = self.items0.get(path) + if id0 != id_: + ret.append(path) + return ret + def get_n(self, n): + ''' + Returns new files matching , asserting that there are + exactly . + ''' + ret = self.get() + assert len(ret) == n, f'{len(ret)=}: {ret}' + return ret + def get_one(self): + ''' + Returns new match of , asserting that there is exactly + one. + ''' + return self.get_n(1)[0] + def _file_id(self, path): + mtime = os.stat(path).st_mtime + with open(path, 'rb') as f: + content = f.read() + hash_ = hashlib.md5(content).digest() + # With python >= 3.11 we can do: + #hash_ = hashlib.file_digest(f, hashlib.md5).digest() + return mtime, hash_ + def _items(self): + ret = dict() + for path in glob.glob(self.glob_pattern): + if os.path.isfile(path): + ret[path] = self._file_id(path) + return ret + + +def swig_get(swig, quick, swig_local='pipcl-swig-git'): + ''' + Returns or a new swig binary. + + If is true and starts with 'git:' (not Windows), the remaining text + is passed to git_get() and we clone/update/build swig, and return the built + binary. We default to the main swig repository, branch master, so for + example 'git:' will return the latest swig from branch master. + + Otherwise we simply return . + + Args: + swig: + If starts with 'git:', passed as arg to git_get(). + quick: + If true, we do not update/build local checkout if the binary is + already present. + swig_local: + path to use for checkout. + ''' + if swig and swig.startswith('git:'): + assert platform.system() != 'Windows', f'Cannot build swig on Windows.' + # Note that {swig_local}/install/bin/swig doesn't work on MacOS because + # {swig_local}/INSTALL is a file and the fs is case-insensitive. + swig_binary = f'{swig_local}/install-dir/bin/swig' + if quick and os.path.isfile(swig_binary): + log1(f'{quick=} and {swig_binary=} already exists, so not downloading/building.') + else: + # Clone swig. + swig_env_extra = None + swig_local = git_get( + swig_local, + text=swig, + remote='https://github.com/swig/swig.git', + branch='master', + ) + if darwin(): + run(f'brew install automake') + run(f'brew install pcre2') + run(f'brew install bison') + # Default bison doesn't work, and Brew's bison is not added to $PATH. + # + # > bison is keg-only, which means it was not symlinked into /opt/homebrew, + # > because macOS already provides this software and installing another version in + # > parallel can cause all kinds of trouble. + # > + # > If you need to have bison first in your PATH, run: + # > echo 'export PATH="/opt/homebrew/opt/bison/bin:$PATH"' >> ~/.zshrc + # + swig_env_extra = dict() + macos_add_brew_path('bison', swig_env_extra) + run(f'which bison') + run(f'which bison', env_extra=swig_env_extra) + # Build swig. + run(f'cd {swig_local} && ./autogen.sh', env_extra=swig_env_extra) + run(f'cd {swig_local} && ./configure --prefix={swig_local}/install-dir', env_extra=swig_env_extra) + run(f'cd {swig_local} && make', env_extra=swig_env_extra) + run(f'cd {swig_local} && make install', env_extra=swig_env_extra) + assert os.path.isfile(swig_binary) + return swig_binary + else: + return swig + + +def macos_add_brew_path(package, env=None, gnubin=True): + ''' + Adds path(s) for Brew 's binaries to env['PATH']. + + We assert-fail if the relevant directory does no exist. + + Args: + package: + Name of package. We get of installed package by + running `brew --prefix `. + env: + The environment dict to modify. If None we use os.environ. If PATH + is not in , we first copy os.environ['PATH'] into . + gnubin: + If true, we also add path to gnu binaries if it exists, + /libexe/gnubin. + ''' + if not darwin(): + return + if env is None: + env = os.environ + if 'PATH' not in env: + env['PATH'] = os.environ['PATH'] + package_root = run(f'brew --prefix {package}', capture=1).strip() + log(f'{package=} {package_root=}') + def add(path): + log(f'{path=}') + if os.path.isdir(path): + log(f'Prepending to $PATH: {path}') + PATH = env['PATH'] + env['PATH'] = f'{path}:{PATH}' + return 1 + else: + log(f'Not a directory: {path=}') + return 0 + n = 0 + n += add(f'{package_root}/bin') + if gnubin: + n += add(f'{package_root}/libexec/gnubin') + assert n, f'Failed to add to $PATH, {package=} {gnubin=}.' + + +def _show_dict(d): + ret = '' + for n in sorted(d.keys()): + v = d[n] + ret += f' {n}: {v!r}\n' + return ret + +def show_sysconfig(): + ''' + Shows contents of sysconfig.get_paths() and sysconfig.get_config_vars() dicts. + ''' + import sysconfig + paths = sysconfig.get_paths() + log0(f'show_sysconfig().') + log0(f'sysconfig.get_paths():\n{_show_dict(sysconfig.get_paths())}') + log0(f'sysconfig.get_config_vars():\n{_show_dict(sysconfig.get_config_vars())}') + + +def sysconfig_python_flags(): + ''' + Returns include paths and library directory for Python. + + Uses sysconfig.*(), overridden by environment variables + PIPCL_SYSCONFIG_PATH_include, PIPCL_SYSCONFIG_PATH_platinclude and + PIPCL_SYSCONFIG_CONFIG_VAR_LIBDIR if set. + ''' + include1_ = os.environ.get('PIPCL_SYSCONFIG_PATH_include') or sysconfig.get_path('include') + include2_ = os.environ.get('PIPCL_SYSCONFIG_PATH_platinclude') or sysconfig.get_path('platinclude') + ldflags_ = os.environ.get('PIPCL_SYSCONFIG_CONFIG_VAR_LIBDIR') or sysconfig.get_config_var('LIBDIR') + + includes_ = [include1_] + if include2_ != include1_: + includes_.append(include2_) + if windows(): + includes_ = [f'/I"{i}"' for i in includes_] + ldflags_ = f'/LIBPATH:"{ldflags_}"' + else: + includes_ = [f'-I {i}' for i in includes_] + ldflags_ = f'-L {ldflags_}' + includes_ = ' '.join(includes_) + return includes_, ldflags_ + + +def venv_in(path=None): + ''' + If path is None, returns true if we are in a venv. Otherwise returns true + only if we are in venv . + ''' + if path: + return os.path.abspath(sys.prefix) == os.path.abspath(path) + else: + return sys.prefix != sys.base_prefix + + +def venv_run(args, path, recreate=True, clean=False): + ''' + Runs Python command inside venv and returns termination code. + + Args: + args: + List of args or string command. + path: + Path of venv directory. + recreate: + If false we do not run ` -m venv ` if + already exists. This avoids a delay in the common case where + is already set up, but fails if exists but does not contain + a valid venv. + clean: + If true we first delete . + ''' + if clean: + log(f'Removing any existing venv {path}.') + assert path.startswith('venv-') + shutil.rmtree(path, ignore_errors=1) + if recreate or not os.path.isdir(path): + run(f'{sys.executable} -m venv {path}') + + if isinstance(args, str): + args_string = args + elif platform.system() == 'Windows': + # shlex not reliable on Windows so we use Use crude quoting with "...". + args_string = '' + for i, arg in enumerate(args): + assert '"' not in arg + if i: + args_string += ' ' + args_string += f'"{arg}"' + else: + args_string = shlex.join(args) + + if platform.system() == 'Windows': + command = f'{path}\\Scripts\\activate && python {args_string}' + else: + command = f'. {path}/bin/activate && python {args_string}' + e = run(command, check=0) + return e + + +if __name__ == '__main__': + # Internal-only limited command line support, used if + # graal_legacy_python_config is true. + # + includes, ldflags = sysconfig_python_flags() + if sys.argv[1] == '--doctest': + import doctest + if sys.argv[2:]: + for f in sys.argv[2:]: + ff = globals()[f] + doctest.run_docstring_examples(ff, globals()) + else: + doctest.testmod(None) + elif sys.argv[1:] == ['--graal-legacy-python-config', '--includes']: + print(includes) + elif sys.argv[1:] == ['--graal-legacy-python-config', '--ldflags']: + print(ldflags) + else: + assert 0, f'Expected `--graal-legacy-python-config --includes|--ldflags` but {sys.argv=}' diff --git a/pyproject.toml b/pyproject.toml index 2a62ff1ee..92bfa9009 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,8 @@ [build-system] -requires = ["setuptools", "swig"] -build-backend = "setuptools.build_meta" +# We define required packages in setup.py:get_requires_for_build_wheel(). +requires = [] + +# See pep-517. +# +build-backend = "setup" +backend-path = ["."] diff --git a/scripts/gh_release.py b/scripts/gh_release.py new file mode 100755 index 000000000..1cc836894 --- /dev/null +++ b/scripts/gh_release.py @@ -0,0 +1,625 @@ +#! /usr/bin/env python3 + +''' +Build+test script for PyMuPDF using cibuildwheel. Mostly for use with github +builds. + +We run cibuild manually, in order to build and test PyMuPDF wheels. + +As of 2024-10-08 we also support the old two wheel flavours that make up +PyMuPDF: + + PyMuPDFb + Not specific to particular versions of Python. Contains shared + libraries for the MuPDF C and C++ bindings. + PyMuPDF + Specific to particular versions of Python. Contains the rest of + the PyMuPDF implementation. + +Args: + build + Build using cibuildwheel. + build-devel + Build using cibuild with `--platform` set. + pip_install + For internal use. Runs `pip install -*.whl`, + where `platform_tag` will be things like 'win32', 'win_amd64', + 'x86_64`, depending on the python we are running on. + venv + Run with remaining args inside a venv. + test + Internal. + +We also look at specific items in the environment. This allows use with Github +action inputs, which can't be easily translated into command-line arguments. + + inputs_flavours + If '0' or unset, build complete PyMuPDF wheels. + If '1', build separate PyMuPDF and PyMuPDFb wheels. + inputs_sdist + inputs_skeleton + Build minimal wheel; for testing only. + inputs_wheels_cps: + Python versions to build for. E.g. 'cp39* cp313*'. + inputs_wheels_default + Default value for other inputs_wheels_* if unset. + inputs_wheels_linux_aarch64 + inputs_wheels_linux_auto + inputs_wheels_linux_pyodide + inputs_wheels_macos_arm64 + inputs_wheels_macos_auto + inputs_wheels_windows_auto + If '1' we build the relevant wheels. + inputs_PYMUPDF_SETUP_MUPDF_BUILD + Used to directly set PYMUPDF_SETUP_MUPDF_BUILD. + E.g. 'git:--recursive --depth 1 --shallow-submodules --branch master https://github.com/ArtifexSoftware/mupdf.git' + inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE + Used to directly set PYMUPDF_SETUP_MUPDF_BUILD_TYPE. Note that as of + 2024-09-10 .github/workflows/build_wheels.yml does not set this. + PYMUPDF_SETUP_PY_LIMITED_API + If not '0' we build a single wheel for all python versions using the + Python Limited API. + +Building for Pyodide + + If `inputs_wheels_linux_pyodide` is true and we are on Linux, we build a + Pyodide wheel, using scripts/test.py. + +Set up for use outside Github + + sudo apt install docker.io + sudo usermod -aG docker $USER + +Example usage: + + PYMUPDF_SETUP_MUPDF_BUILD=../mupdf py -3.9-32 PyMuPDF/scripts/gh_release.py venv build-devel +''' + +import glob +import inspect +import os +import platform +import re +import shlex +import subprocess +import sys +import textwrap + +import test as test_py + +pymupdf_dir = os.path.abspath( f'{__file__}/../..') + +sys.path.insert(0, pymupdf_dir) +import pipcl +del sys.path[0] + +log = pipcl.log0 +run = pipcl.run + + +def main(): + + log( '### main():') + log(f'{platform.platform()=}') + log(f'{platform.python_version()=}') + log(f'{platform.architecture()=}') + log(f'{platform.machine()=}') + log(f'{platform.processor()=}') + log(f'{platform.release()=}') + log(f'{platform.system()=}') + log(f'{platform.version()=}') + log(f'{platform.uname()=}') + log(f'{sys.executable=}') + log(f'{sys.maxsize=}') + log(f'sys.argv ({len(sys.argv)}):') + for i, arg in enumerate(sys.argv): + log(f' {i}: {arg!r}') + log(f'os.environ ({len(os.environ)}):') + for k in sorted( os.environ.keys()): + v = os.environ[ k] + log( f' {k}: {v!r}') + + if test_py.github_workflow_unimportant(): + return + + valgrind = False + if len( sys.argv) == 1: + args = iter( ['build']) + else: + args = iter( sys.argv[1:]) + while 1: + try: + arg = next(args) + except StopIteration: + break + if arg == 'build': + build(valgrind=valgrind) + elif arg == 'build-devel': + if platform.system() == 'Linux': + p = 'linux' + elif platform.system() == 'Windows': + p = 'windows' + elif platform.system() == 'Darwin': + p = 'macos' + else: + assert 0, f'Unrecognised {platform.system()=}' + build(platform_=p) + elif arg == 'pip_install': + prefix = next(args) + d = os.path.dirname(prefix) + log( f'{prefix=}') + log( f'{d=}') + for leaf in os.listdir(d): + log( f' {d}/{leaf}') + pattern = f'{prefix}-*{platform_tag()}.whl' + paths = glob.glob( pattern) + log( f'{pattern=} {paths=}') + # Follow pipcl.py and look at AUDITWHEEL_PLAT. This allows us to + # cope if building for both musl and normal linux. + awp = os.environ.get('AUDITWHEEL_PLAT') + if awp: + paths = [i for i in paths if awp in i] + log(f'After selecting AUDITWHEEL_PLAT={awp!r}, {paths=}.') + paths = ' '.join( paths) + run( f'pip install {paths}') + elif arg == 'venv': + command = ['python', sys.argv[0]] + for arg in args: + command.append( arg) + venv( command, packages = 'cibuildwheel') + elif arg == 'test': + project = next(args) + package = next(args) + test( project, package, valgrind=valgrind) + elif arg == '--valgrind': + valgrind = int(next(args)) + else: + assert 0, f'Unrecognised {arg=}' + + +def build( platform_=None, valgrind=False): + log( '### build():') + + platform_arg = f' --platform {platform_}' if platform_ else '' + + # Parameters are in os.environ, as that seems to be the only way that + # Github workflow .yml files can encode them. + # + def get_bool(name, default=0): + v = os.environ.get(name) + if v in ('1', 'true'): + return 1 + elif v in ('0', 'false'): + return 0 + elif v is None: + return default + else: + assert 0, f'Bad environ {name=} {v=}' + inputs_flavours = get_bool('inputs_flavours', 1) + inputs_sdist = get_bool('inputs_sdist') + inputs_skeleton = os.environ.get('inputs_skeleton') + inputs_wheels_default = get_bool('inputs_wheels_default', 1) + inputs_wheels_linux_aarch64 = get_bool('inputs_wheels_linux_aarch64', inputs_wheels_default) + inputs_wheels_linux_auto = get_bool('inputs_wheels_linux_auto', inputs_wheels_default) + inputs_wheels_linux_pyodide = get_bool('inputs_wheels_linux_pyodide', 0) + inputs_wheels_macos_arm64 = get_bool('inputs_wheels_macos_arm64', 0) + inputs_wheels_macos_auto = get_bool('inputs_wheels_macos_auto', inputs_wheels_default) + inputs_wheels_windows_auto = get_bool('inputs_wheels_windows_auto', inputs_wheels_default) + inputs_wheels_cps = os.environ.get('inputs_wheels_cps') + inputs_PYMUPDF_SETUP_MUPDF_BUILD = os.environ.get('inputs_PYMUPDF_SETUP_MUPDF_BUILD') + inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE = os.environ.get('inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE') + + PYMUPDF_SETUP_PY_LIMITED_API = os.environ.get('PYMUPDF_SETUP_PY_LIMITED_API') + + log( f'{inputs_flavours=}') + log( f'{inputs_sdist=}') + log( f'{inputs_skeleton=}') + log( f'{inputs_wheels_default=}') + log( f'{inputs_wheels_linux_aarch64=}') + log( f'{inputs_wheels_linux_auto=}') + log( f'{inputs_wheels_linux_pyodide=}') + log( f'{inputs_wheels_macos_arm64=}') + log( f'{inputs_wheels_macos_auto=}') + log( f'{inputs_wheels_windows_auto=}') + log( f'{inputs_wheels_cps=}') + log( f'{inputs_PYMUPDF_SETUP_MUPDF_BUILD=}') + log( f'{inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE=}') + log( f'{PYMUPDF_SETUP_PY_LIMITED_API=}') + + # Build Pyodide wheel if specified. + # + if platform.system() == 'Linux' and inputs_wheels_linux_pyodide: + # Pyodide wheels are built by running scripts/test.py, not + # cibuildwheel. + command = f'{sys.executable} scripts/test.py -P 1' + if inputs_PYMUPDF_SETUP_MUPDF_BUILD: + command += f' -m {shlex.quote(inputs_PYMUPDF_SETUP_MUPDF_BUILD)}' + command += ' pyodide_wheel' + run(command) + + # Build sdist(s). + # + if inputs_sdist: + if pymupdf_dir != os.path.abspath( os.getcwd()): + log( f'Changing dir to {pymupdf_dir=}') + os.chdir( pymupdf_dir) + # Create PyMuPDF sdist. + run(f'{sys.executable} setup.py sdist') + assert glob.glob('dist/pymupdf-*.tar.gz') + if inputs_flavours: + # Create PyMuPDFb sdist. + run( + f'{sys.executable} setup.py sdist', + env_extra=dict(PYMUPDF_SETUP_FLAVOUR='b'), + ) + assert glob.glob('dist/pymupdfb-*.tar.gz') + + # Build wheels. + # + if (0 + or inputs_wheels_linux_aarch64 + or inputs_wheels_linux_auto + or inputs_wheels_macos_arm64 + or inputs_wheels_macos_auto + or inputs_wheels_windows_auto + ): + env_extra = dict() + + def set_if_unset(name, value): + v = os.environ.get(name) + if v is None: + log( f'Setting environment {name=} to {value=}') + env_extra[ name] = value + else: + log( f'Not changing {name}={v!r} to {value!r}') + set_if_unset( 'CIBW_BUILD_VERBOSITY', '1') + # We exclude pp* because of `fitz_wrap.obj : error LNK2001: unresolved + # external symbol PyUnicode_DecodeRawUnicodeEscape`. + # 2024-06-05: musllinux on aarch64 fails because libclang cannot find + # libclang.so. + # + # Note that we had to disable cp313-win32 when 3.13 was experimental + # because there was no 64-bit Python-3.13 available via `py + # -3.13`. (Win32 builds need to use win64 Python because win32 + # libclang is broken.) + # + set_if_unset( 'CIBW_SKIP', 'pp* *i686 cp36* cp37* *musllinux*aarch64*') + + def make_string(*items): + ret = list() + for item in items: + if item: + ret.append(item) + return ' '.join(ret) + + cps = inputs_wheels_cps if inputs_wheels_cps else 'cp39* cp310* cp311* cp312* cp313*' + set_if_unset( 'CIBW_BUILD', cps) + for cp in cps.split(): + m = re.match('cp([0-9]+)[*]', cp) + assert m, f'{cps=} {cp=}' + v = int(m.group(1)) + if v == 314: + # Need to set CIBW_PRERELEASE_PYTHONS, otherwise cibuildwheel + # will refuse. + log(f'Setting CIBW_PRERELEASE_PYTHONS for Python version {cp=}.') + set_if_unset( 'CIBW_PRERELEASE_PYTHONS', '1') + + if platform.system() == 'Linux': + set_if_unset( + 'CIBW_ARCHS_LINUX', + make_string( + 'auto64' * inputs_wheels_linux_auto, + 'aarch64' * inputs_wheels_linux_aarch64, + ), + ) + if env_extra.get('CIBW_ARCHS_LINUX') == '': + log(f'Not running cibuildwheel because CIBW_ARCHS_LINUX is empty string.') + return + + if platform.system() == 'Windows': + set_if_unset( + 'CIBW_ARCHS_WINDOWS', + make_string( + 'auto' * inputs_wheels_windows_auto, + ), + ) + if env_extra.get('CIBW_ARCHS_WINDOWS') == '': + log(f'Not running cibuildwheel because CIBW_ARCHS_WINDOWS is empty string.') + return + + if platform.system() == 'Darwin': + set_if_unset( + 'CIBW_ARCHS_MACOS', + make_string( + 'auto' * inputs_wheels_macos_auto, + 'arm64' * inputs_wheels_macos_arm64, + ), + ) + if env_extra.get('CIBW_ARCHS_MACOS') == '': + log(f'Not running cibuildwheel because CIBW_ARCHS_MACOS is empty string.') + return + + def env_pass(name): + ''' + Adds `name` to CIBW_ENVIRONMENT_PASS_LINUX if required to be available + when building wheel with cibuildwheel. + ''' + if platform.system() == 'Linux': + v = env_extra.get('CIBW_ENVIRONMENT_PASS_LINUX', '') + if v: + v += ' ' + v += name + env_extra['CIBW_ENVIRONMENT_PASS_LINUX'] = v + + def env_set(name, value, pass_=False): + assert isinstance( value, str) + if not name.startswith('CIBW'): + assert pass_, f'Non-CIBW* name requires `pass_` to be true. {name=} {value=}.' + env_extra[ name] = value + if pass_: + env_pass(name) + + env_pass('PYMUPDF_SETUP_PY_LIMITED_API') + + if os.environ.get('PYMUPDF_SETUP_LIBCLANG'): + env_pass('PYMUPDF_SETUP_LIBCLANG') + + if inputs_skeleton: + env_set('PYMUPDF_SETUP_SKELETON', inputs_skeleton, pass_=1) + + if inputs_PYMUPDF_SETUP_MUPDF_BUILD not in ('-', None): + log(f'Setting PYMUPDF_SETUP_MUPDF_BUILD to {inputs_PYMUPDF_SETUP_MUPDF_BUILD!r}.') + env_set('PYMUPDF_SETUP_MUPDF_BUILD', inputs_PYMUPDF_SETUP_MUPDF_BUILD, pass_=True) + env_set('PYMUPDF_SETUP_MUPDF_TGZ', '', pass_=True) # Don't put mupdf in sdist. + + if inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE not in ('-', None): + log(f'Setting PYMUPDF_SETUP_MUPDF_BUILD_TYPE to {inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE!r}.') + env_set('PYMUPDF_SETUP_MUPDF_BUILD_TYPE', inputs_PYMUPDF_SETUP_MUPDF_BUILD_TYPE, pass_=True) + + def set_cibuild_test(): + log( f'set_cibuild_test(): {inputs_skeleton=}') + valgrind_text = '' + if valgrind: + valgrind_text = ' --valgrind 1' + env_set('CIBW_TEST_COMMAND', f'python {{project}}/scripts/gh_release.py{valgrind_text} test {{project}} {{package}}') + + if pymupdf_dir != os.path.abspath( os.getcwd()): + log( f'Changing dir to {pymupdf_dir=}') + os.chdir( pymupdf_dir) + + run('pip install cibuildwheel') + + # We include MuPDF build-time files. + flavour_d = True + + if PYMUPDF_SETUP_PY_LIMITED_API != '0': + # Build one wheel with oldest python, then fake build with other python + # versions so we test everything. + log(f'{PYMUPDF_SETUP_PY_LIMITED_API=}') + env_pass('PYMUPDF_SETUP_PY_LIMITED_API') + CIBW_BUILD_old = env_extra.get('CIBW_BUILD') + assert CIBW_BUILD_old is not None + cp = cps.split()[0] + env_set('CIBW_BUILD', cp) + log(f'Building single wheel.') + run( f'cibuildwheel{platform_arg}', env_extra=env_extra) + + # Fake-build with all python versions, using the wheel we have + # just created. This works by setting PYMUPDF_SETUP_URL_WHEEL + # which makes PyMuPDF's setup.py copy an existing wheel instead + # of building a wheel itself; it also copes with existing + # wheels having extra platform tags (from cibuildwheel's use of + # auditwheel). + # + env_set('PYMUPDF_SETUP_URL_WHEEL', f'file://wheelhouse/', pass_=True) + + set_cibuild_test() + env_set('CIBW_BUILD', CIBW_BUILD_old) + + # Disable cibuildwheels use of auditwheel. The wheel was repaired + # when it was created above so we don't need to do so again. This + # also avoids problems with musl wheels on a Linux glibc host where + # auditwheel fails with: `ValueError: Cannot repair wheel, because + # required library "libgcc_s-a3a07607.so.1" could not be located`. + # + env_set('CIBW_REPAIR_WHEEL_COMMAND', '') + + if platform.system() == 'Linux' and env_extra.get('CIBW_ARCHS_LINUX') == 'aarch64': + log(f'Testing all Python versions on linux-aarch64 is too slow and is killed by github after 6h.') + log(f'Testing on restricted python versions using wheels in wheelhouse/.') + # Testing only on first and last python versions. + cp1 = cps.split()[0] + cp2 = cps.split()[-1] + cp = cp1 if cp1 == cp2 else f'{cp1} {cp2}' + env_set('CIBW_BUILD', cp) + else: + log(f'Testing on all python versions using wheels in wheelhouse/.') + run( f'cibuildwheel{platform_arg}', env_extra=env_extra) + + elif inputs_flavours: + # Build and test PyMuPDF and PyMuPDFb wheels. + # + + # First build PyMuPDFb wheel. cibuildwheel will build a single wheel + # here, which will work with any python version on current OS. + # + flavour = 'b' + if flavour_d: + # Include MuPDF build-time files. + flavour += 'd' + env_set( 'PYMUPDF_SETUP_FLAVOUR', flavour, pass_=1) + run( f'cibuildwheel{platform_arg}', env_extra=env_extra) + run( 'echo after {flavour=}') + run( 'ls -l wheelhouse') + + # Now set environment to build PyMuPDF wheels. cibuildwheel will build + # one for each Python version. + # + + # Tell cibuildwheel not to use `auditwheel`, because it cannot cope + # with us deliberately putting required libraries into a different + # wheel. + # + # Also, `auditwheel addtag` says `No tags to be added` and terminates + # with non-zero. See: https://github.com/pypa/auditwheel/issues/439. + # + env_set('CIBW_REPAIR_WHEEL_COMMAND_LINUX', '') + env_set('CIBW_REPAIR_WHEEL_COMMAND_MACOS', '') + + # We tell cibuildwheel to test these wheels, but also set + # CIBW_BEFORE_TEST to make it first run ourselves with the + # `pip_install` arg to install the PyMuPDFb wheel. Otherwise + # installation of PyMuPDF would fail because it lists the + # PyMuPDFb wheel as a prerequisite. We need to use `pip_install` + # because wildcards do not work on Windows, and we want to be + # careful to avoid incompatible wheels, e.g. 32 vs 64-bit wheels + # coexist during Windows builds. + # + env_set('CIBW_BEFORE_TEST', f'python scripts/gh_release.py pip_install wheelhouse/pymupdfb') + + set_cibuild_test() + + # Build main PyMuPDF wheel. + flavour = 'p' + env_set( 'PYMUPDF_SETUP_FLAVOUR', flavour, pass_=1) + run( f'cibuildwheel{platform_arg}', env_extra=env_extra) + + else: + # Build and test wheels which contain everything. + # + flavour = 'pb' + if flavour_d: + flavour += 'd' + set_cibuild_test() + env_set( 'PYMUPDF_SETUP_FLAVOUR', flavour, pass_=1) + + run( f'cibuildwheel{platform_arg}', env_extra=env_extra) + + run( 'ls -lt wheelhouse') + + +def cpu_bits(): + return 32 if sys.maxsize == 2**31 - 1 else 64 + + +# Name of venv used by `venv()`. +# +venv_name = f'venv-pymupdf-{platform.python_version()}-{cpu_bits()}' + +def venv( command=None, packages=None, quick=False, system_site_packages=False): + ''' + Runs remaining args, or the specified command if present, in a venv. + + command: + Command as string or list of args. Should usually start with 'python' + to run the venv's python. + packages: + List of packages (or comma-separated string) to install. + quick: + If true and venv directory already exists, we don't recreate venv or + install Python packages in it. + ''' + command2 = '' + if platform.system() == 'OpenBSD': + # libclang not available from pypi.org, but system py3-llvm package + # works. `pip install` should be run with --no-build-isolation and + # explicit `pip install swig psutil`. + system_site_packages = True + #ssp = ' --system-site-packages' + log(f'OpenBSD: libclang not available from pypi.org.') + log(f'OpenBSD: system package `py3-llvm` must be installed.') + log(f'OpenBSD: creating venv with --system-site-packages.') + log(f'OpenBSD: `pip install .../PyMuPDF` must be preceded by install of swig etc.') + ssp = ' --system-site-packages' if system_site_packages else '' + if quick and os.path.isdir(venv_name): + log(f'{quick=}: Not creating venv because directory already exists: {venv_name}') + command2 += 'true' + else: + quick = False + command2 += f'{sys.executable} -m venv{ssp} {venv_name}' + if platform.system() == 'Windows': + command2 += f' && {venv_name}\\Scripts\\activate' + else: + command2 += f' && . {venv_name}/bin/activate' + if quick: + log(f'{quick=}: Not upgrading pip or installing packages.') + else: + command2 += ' && python -m pip install --upgrade pip' + if packages: + if isinstance(packages, str): + packages = packages.split(',') + command2 += ' && pip install ' + ' '.join(packages) + command2 += ' &&' + if isinstance( command, str): + command2 += ' ' + command + else: + for arg in command: + command2 += ' ' + shlex.quote(arg) + + run( command2) + + +def test( project, package, valgrind): + + run(f'pip install {test_packages}') + if valgrind: + log('Installing valgrind.') + run(f'sudo apt update') + run(f'sudo apt install valgrind') + run(f'valgrind --version') + + log('Running PyMuPDF tests under valgrind.') + # We ignore memory leaks. + run( + f'{sys.executable} {project}/tests/run_compound.py' + f' valgrind --suppressions={project}/valgrind.supp --error-exitcode=100 --errors-for-leak-kinds=none --fullpath-after=' + f' pytest {project}/tests' + , + env_extra=dict( + PYTHONMALLOC='malloc', + PYMUPDF_RUNNING_ON_VALGRIND='1', + ), + ) + else: + run(f'{sys.executable} {project}/tests/run_compound.py pytest {project}/tests') + + +if platform.system() == 'Windows': + def relpath(path, start=None): + try: + return os.path.relpath(path, start) + except ValueError: + # os.path.relpath() fails if trying to change drives. + return os.path.abspath(path) +else: + def relpath(path, start=None): + return os.path.relpath(path, start) + + +def platform_tag(): + bits = cpu_bits() + if platform.system() == 'Windows': + return 'win32' if bits==32 else 'win_amd64' + elif platform.system() in ('Linux', 'Darwin'): + assert bits == 64 + return platform.machine() + #return 'x86_64' + else: + assert 0, f'Unrecognised: {platform.system()=}' + + +test_packages = 'pytest fontTools pymupdf-fonts flake8 pylint codespell' +if platform.system() == 'Windows' and cpu_bits() == 32: + # No pillow wheel available, and doesn't build easily. + pass +else: + test_packages += ' pillow' +if platform.system().startswith('MSYS_NT-'): + # psutil not available on msys2. + pass +else: + test_packages += ' psutil' + + +if __name__ == '__main__': + main() diff --git a/scripts/sysinstall.py b/scripts/sysinstall.py new file mode 100755 index 000000000..d4f2fc14f --- /dev/null +++ b/scripts/sysinstall.py @@ -0,0 +1,430 @@ +#! /usr/bin/env python3 + +''' +Test for Linux system install of MuPDF and PyMuPDF. + +We build and install MuPDF and PyMuPDF into a root directory, then use +scripts/test.py to run PyMuPDF's pytest tests with LD_PRELOAD_PATH and +PYTHONPATH set. + +PyMuPDF itself is installed using `python -m install` with a wheel created with +`pip wheel`. + +We run install commands with `sudo` if `--root /` is used. + +Note that we run some commands with sudo; it's important that these use the +same python as non-sudo, otherwise things can be build and installed for +different python versions. For example when we are run from a github action, it +should not do `- uses: actions/setup-python@v5` but instead use whatever system +python is already defined. + +Args: + + --gdb 0|1 + --mupdf-dir + Path of MuPDF checkout; default is 'mupdf'. + --mupdf-do 0|1 + Whether to build and install mupdf. + --mupdf-git + Get or update `mupdf_dir` using git. If `mupdf_dir` already + exists we run `git pull` in it; otherwise we run `git + clone` with ` `. For example: + --mupdf-git "--branch master https://github.com/ArtifexSoftware/mupdf.git" + --mupdf-so-mode + Used with `install -m ...` when installing MuPDF. For example + `--mupdf-so-mode 744`. + --packages 0|1 + If 1 (the default) we install required system packages such as + `libfreetype-dev`. + --pip 0|venv|sudo + Whether/how to install Python packages. + If '0' we assume required packages are already available. + If 'sudo' we install required Python packages using `sudo pip install + ...`. + If 'venv' (the default) we install Python packages and run installer + and test commands inside venv's. + --prefix: + Directory within `root`; default is `/usr/local`. Must start with `/`. + --pymupdf-dir + Path of PyMuPDF checkout; default is 'PyMuPDF'. + --pymupdf-do 0|1 + Whether to build and install pymupdf. + --root + Root of install directory; default is 'pymupdf-sysinstall-test-root'. + --tesseract5 0|1 + If 1 (the default), we force installation of libtesseract-dev version + 5 (which is not available as a default package in Ubuntu-22.04) from + package repository ppa:alex-p/tesseract-ocr-devel. + --test-venv + Set the name of the venv in which we run tests (only with `--pip + venv`); the default is a hard-coded venv name. The venv will be + created, and required packages installed using `pip`. + --use-installer 0|1 + If 1 (the default), we use `python -m installer` to install PyMuPDF + from a generated wheel. [Otherwise we use `pip install`, which refuses + to do a system install with `--root /`, referencing PEP-668.] + -i + Passed through to scripts/test.py. Default is 'rR'. + -f + Passed through to scripts/test.py. Default is '1'. + -p + Passed through to scripts/test.py. + -t + Passed through to scripts/test.py. + +To only show what commands would be run, but not actually run them, specify `-m +0 -p 0 -t 0`. +''' + +import glob +import multiprocessing +import os +import platform +import shlex +import subprocess +import sys +import sysconfig + +import test as test_py + +pymupdf_dir = os.path.abspath( f'{__file__}/../..') + +sys.path.insert(0, pymupdf_dir) +import pipcl +del sys.path[0] + +log = pipcl.log0 + +# Requirements for a system build and install: +# +# system packages (Debian names): +# +g_sys_packages = [ + 'libfreetype-dev', + 'libgumbo-dev', + 'libharfbuzz-dev', + 'libjbig2dec-dev', + 'libjpeg-dev', + 'libleptonica-dev', + 'libopenjp2-7-dev', + ] +# We also need libtesseract-dev version 5. +# + + +def main(): + + if 1: + log(f'## {__file__}: Starting.') + log(f'{sys.executable=}') + log(f'{platform.python_version()=}') + log(f'{__file__=}') + log(f'{os.environ.get("PYMUDF_SCRIPTS_SYSINSTALL_ARGS_PRE")=}') + log(f'{os.environ.get("PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST")=}') + log(f'{sys.argv=}') + log(f'{sysconfig.get_path("platlib")=}') + run_command(f'python -V', check=0) + run_command(f'python3 -V', check=0) + run_command(f'sudo python -V', check=0) + run_command(f'sudo python3 -V', check=0) + run_command(f'sudo PATH={os.environ["PATH"]} python -V', check=0) + run_command(f'sudo PATH={os.environ["PATH"]} python3 -V', check=0) + + if test_py.github_workflow_unimportant(): + return + + # Set default behaviour. + # + gdb = False + use_installer = True + mupdf_do = True + mupdf_dir = 'mupdf' + mupdf_git = None + mupdf_so_mode = None + packages = True + prefix = '/usr/local' + pymupdf_do = True + root = 'pymupdf-sysinstall-test-root' + tesseract5 = True + pytest_args = None + pytest_do = True + pytest_name = None + test_venv = 'venv-pymupdf-sysinstall-test' + pip = 'venv' + test_fitz = '1' + test_implementations = 'rR' + + # Parse command-line. + # + env_args_pre = shlex.split(os.environ.get('PYMUDF_SCRIPTS_SYSINSTALL_ARGS_PRE', '')) + env_args_post = shlex.split(os.environ.get('PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST', '')) + args = iter(env_args_pre + sys.argv[1:] + env_args_post) + while 1: + try: + arg = next(args) + except StopIteration: + break + if arg in ('-h', '--help'): + log(__doc__) + return + elif arg == '--gdb': gdb = int(next(args)) + elif arg == '--mupdf-do': mupdf_do = int(next(args)) + elif arg == '--mupdf-dir': mupdf_dir = next(args) + elif arg == '--mupdf-git': mupdf_git = next(args) + elif arg == '--mupdf-so-mode': mupdf_so_mode = next(args) + elif arg == '--packages': packages = int(next(args)) + elif arg == '--prefix': prefix = next(args) + elif arg == '--pymupdf-do': pymupdf_do = int(next(args)) + elif arg == '--root': root = next(args) + elif arg == '--tesseract5': tesseract5 = int(next(args)) + elif arg == '--pytest-do': pytest_do = int(next(args)) + elif arg == '--test-venv': test_venv = next(args) + elif arg == '--use-installer': use_installer = int(next(args)) + elif arg == '--pip': pip = next(args) + elif arg == '-f': test_fitz = next(args) + elif arg == '-i': test_implementations = next(args) + elif arg == '-p': pytest_args = next(args) + elif arg == '-t': pytest_name = next(args) + else: + assert 0, f'Unrecognised arg: {arg!r}' + + assert prefix.startswith('/') + pip_values = ('0', 'sudo', 'venv') + assert pip in pip_values, f'Unrecognised --pip value {pip!r} should be one of: {pip_values!r}' + root = os.path.abspath(root) + root_prefix = f'{root}{prefix}'.replace('//', '/') + + sudo = '' + if root == '/': + sudo = f'sudo PATH={os.environ["PATH"]} ' + def run(command, env_extra=None): + return run_command(command, doit=mupdf_do, env_extra=env_extra) + # Get MuPDF from git if specified. + # + if mupdf_git: + # Update existing checkout or do `git clone`. + if os.path.exists(mupdf_dir): + log(f'## Update MuPDF checkout {mupdf_dir}.') + run(f'cd {mupdf_dir} && git pull && git submodule update --init') + else: + # No existing git checkout, so do a fresh clone. + log(f'## Clone MuPDF into {mupdf_dir}.') + run(f'git clone --recursive --depth 1 --shallow-submodules {mupdf_git} {mupdf_dir}') + + if packages: + # Install required system packages. We assume a Debian package system. + # + log('## Install system packages required by MuPDF.') + run(f'sudo apt update') + run(f'sudo apt install {" ".join(g_sys_packages)}') + # Ubuntu-22.04 has freeglut3-dev, not libglut-dev. + run(f'sudo apt install libglut-dev | sudo apt install freeglut3-dev') + if tesseract5: + log(f'## Force installation of libtesseract-dev version 5.') + # https://stackoverflow.com/questions/76834972/how-can-i-run-pytesseract-python-library-in-ubuntu-22-04 + # + run('sudo apt install -y software-properties-common') + run('sudo add-apt-repository ppa:alex-p/tesseract-ocr-devel') + run('sudo apt update') + run('sudo apt install -y libtesseract-dev') + else: + run('sudo apt install libtesseract-dev') + + # Build+install MuPDF. We use mupd:Makefile's install-shared-python target. + # + if pip == 'sudo': + log('## Installing Python packages required for building MuPDF and PyMuPDF.') + #run(f'sudo pip install --upgrade pip') # Breaks on Github see: https://github.com/pypa/get-pip/issues/226. + # We need to install psutil and pillow as system packages, otherwise things like `import psutil` + # fail, seemingly because of pip warning: + # + # WARNING: Running pip as the 'root' user can result in broken + # permissions and conflicting behaviour with the system package + # manager. It is recommended to use a virtual environment instead: + # https://pip.pypa.io/warnings/venv + # + names = test_py.wrap_get_requires_for_build_wheel(f'{__file__}/../..') + names = names.split(' ') + names = [n for n in names if n not in ('psutil', 'pillow')] + names = ' '.join(names) + run(f'sudo pip install {names}') + run(f'sudo apt install python3-psutil python3-pillow') + + log('## Build and install MuPDF.') + command = f'cd {mupdf_dir}' + command += f' && {sudo}make' + command += f' -j {multiprocessing.cpu_count()}' + #command += f' EXE_LDFLAGS=-Wl,--trace' # Makes linker generate diagnostics as it runs. + command += f' DESTDIR={root}' + command += f' HAVE_LEPTONICA=yes' + command += f' HAVE_TESSERACT=yes' + command += f' USE_SYSTEM_LIBS=yes' + # We need latest zxingcpp so system version not ok. + command += f' USE_SYSTEM_ZXINGCPP=no' + command += f' barcode=yes' + command += f' VENV_FLAG={"--venv" if pip == "venv" else ""}' + if mupdf_so_mode: + command += f' SO_INSTALL_MODE={mupdf_so_mode}' + command += f' build_prefix=system-libs-' + command += f' prefix={prefix}' + command += f' verbose=yes' + command += f' install-shared-python' + command += f' INSTALL_MODE=755' + run( command) + + # Build+install PyMuPDF. + # + log('## Build and install PyMuPDF.') + def run(command): + return run_command(command, doit=pymupdf_do) + flags_freetype2 = run_command('pkg-config --cflags freetype2', capture=1) + compile_flags = f'-I {root_prefix}/include {flags_freetype2}' + link_flags = f'-L {root_prefix}/lib' + env = '' + env += f'CFLAGS="{compile_flags}" ' + env += f'CXXFLAGS="{compile_flags}" ' + env += f'LDFLAGS="-L {root}/{prefix}/lib" ' + env += f'PYMUPDF_SETUP_MUPDF_BUILD= ' # Use system MuPDF. + if use_installer: + log(f'## Building wheel.') + if pip == 'venv': + venv_name = 'venv-pymupdf-sysinstall' + run(f'pwd') + run(f'rm dist/* || true') + if pip == 'venv': + run(f'{sys.executable} -m venv {venv_name}') + run(f'. {venv_name}/bin/activate && pip install --upgrade pip') + run(f'. {venv_name}/bin/activate && pip install --upgrade installer') + run(f'{env} {venv_name}/bin/python -m pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}') + elif pip == 'sudo': + #run(f'sudo pip install --upgrade pip') # Breaks on Github see: https://github.com/pypa/get-pip/issues/226. + run(f'sudo pip install installer') + run(f'{env} pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}') + else: + log(f'Not installing "installer" because {pip=}.') + wheel = glob.glob(f'dist/*') + assert len(wheel) == 1, f'{wheel=}' + wheel = wheel[0] + log(f'## Installing wheel using `installer`.') + pv = '.'.join(platform.python_version_tuple()[:2]) + p = f'{root_prefix}/lib/python{pv}' + # `python -m installer` fails to overwrite existing files. + run(f'{sudo}rm -r {p}/site-packages/pymupdf || true') + run(f'{sudo}rm -r {p}/site-packages/pymupdf.py || true') + run(f'{sudo}rm -r {p}/site-packages/fitz || true') + run(f'{sudo}rm -r {p}/site-packages/fitz.py || true') + run(f'{sudo}rm -r {p}/site-packages/pymupdf-*.dist-info || true') + run(f'{sudo}rm -r {root_prefix}/bin/pymupdf || true') + if pip == 'venv': + run(f'{sudo}{venv_name}/bin/python -m installer --destdir {root} --prefix {prefix} {wheel}') + else: + run(f'{sudo}{sys.executable} -m installer --destdir {root} --prefix {prefix} {wheel}') + # It seems that MuPDF Python bindings are installed into + # `.../dist-packages` (from mupdf:Mafile's call of `$(shell python3 + # -c "import sysconfig; print(sysconfig.get_path('platlib'))")` while + # `python -m installer` installs PyMuPDF into `.../site-packages`. + # + # This might be because `sysconfig.get_path('platlib')` returns + # `.../site-packages` if run in a venv, otherwise `.../dist-packages`. + # + # And on github ubuntu-latest, sysconfig.get_path("platlib") is + # /opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages + # + # So we set pythonpath (used later) to import from all + # `pythonX.Y/site-packages/` and `pythonX.Y/dist-packages` directories + # within `root_prefix`: + # + pv = platform.python_version().split('.') + pv = f'python{pv[0]}.{pv[1]}' + pythonpath = list() + for dirpath, dirnames, filenames in os.walk(root_prefix): + if os.path.basename(dirpath) == pv: + for leaf in 'site-packages', 'dist-packages': + if leaf in dirnames: + pythonpath.append(os.path.join(dirpath, leaf)) + pythonpath = ':'.join(pythonpath) + log(f'{pythonpath=}') + else: + command = f'{env} pip install -vv --root {root} {os.path.abspath(pymupdf_dir)}' + run( command) + pythonpath = pipcl.install_dir(root) + + # Show contents of installation directory. This is very slow on github, + # where /usr/local contains lots of things. + #run(f'find {root_prefix}|sort') + + # Run pytest tests. + # + log('## Run PyMuPDF pytest tests.') + def run(command, env_extra=None): + return run_command(command, doit=pytest_do, env_extra=env_extra, caller=1) + import gh_release + if pip == 'venv': + # Create venv. + run(f'{sys.executable} -m venv {test_venv}') + # Install required packages. + command = f'. {test_venv}/bin/activate' + command += f' && pip install --upgrade pip' + command += f' && pip install --upgrade {gh_release.test_packages}' + run(command) + elif pip == 'sudo': + names = gh_release.test_packages + names = names.split(' ') + names = [n for n in names if n not in ('psutil', 'pillow')] + names = ' '.join(names) + run(f'sudo pip install --upgrade {names}') + else: + log(f'Not installing packages for testing because {pip=}.') + # Run pytest. + # + # We need to set PYTHONPATH and LD_LIBRARY_PATH. In particular we + # use pipcl.install_dir() to find where pipcl will have installed + # PyMuPDF. + command = '' + if pip == 'venv': + command += f'. {test_venv}/bin/activate &&' + command += f' LD_LIBRARY_PATH={root_prefix}/lib PYTHONPATH={pythonpath} PATH=$PATH:{root_prefix}/bin' + run(f'ls -l {root_prefix}/bin/') + # 2024-03-20: Not sure whether/where `pymupdf` binary is installed, so we + # disable the test_cli* tests. + command += f' {pymupdf_dir}/scripts/test.py' + if gdb: + command += ' --gdb 1' + command += f' -v 0' + if pytest_name is None: + excluded_tests = ( + 'test_color_count', + 'test_3050', + 'test_cli', + 'test_cli_out', + 'test_pylint', + 'test_textbox3', + 'test_3493', + 'test_4180', + ) + excluded_tests = ' and not '.join(excluded_tests) + if not pytest_args: + pytest_args = '' + pytest_args += f' -k \'not {excluded_tests}\'' + else: + command += f' -t {pytest_name}' + if test_fitz: + command += f' -f {test_fitz}' + if test_implementations: + command += f' -i {test_implementations}' + if pytest_args: + command += f' -p {shlex.quote(pytest_args)}' + if pytest_do: + command += ' test' + run(command, env_extra=dict(PYMUPDF_SYSINSTALL_TEST='1')) + + +def run_command(command, capture=False, check=True, doit=True, env_extra=None, caller=0): + if doit: + return pipcl.run(command, capture=capture, check=check, caller=caller+2, env_extra=env_extra) + else: + log(f'## Would have run: {command}', caller=2) + + +if __name__ == '__main__': + main() diff --git a/scripts/test.py b/scripts/test.py new file mode 100755 index 000000000..986b8c7f4 --- /dev/null +++ b/scripts/test.py @@ -0,0 +1,1601 @@ +#! /usr/bin/env python3 + +'''Developer build/test script for PyMuPDF. + +Examples: + + ./PyMuPDF/scripts/test.py -m mupdf build test + Build and test with pre-existing local mupdf/ checkout. + + ./PyMuPDF/scripts/test.py build test + Build and test with default internal download of mupdf. + + ./PyMuPDF/scripts/test.py -m 'git:https://git.ghostscript.com/mupdf.git' build test + Build and test with internal checkout of MuPDF master. + + ./PyMuPDF/scripts/test.py -m ':1.26.x' build test + Build and test using internal checkout of mupdf 1.26.x branch from + Github. + + ./PyMuPDF/scripts/test.py install test -i 1.26.3 -k test_2596 + Install pymupdf-1.26.3 from pupi.org and test only test_2596. + +Usage: + +* Command line arguments are called parameters if they start with `-`, + otherwise they are called commands. +* Parameters are evaluated first in the order that they were specified. +* Then commands are run in the order in which they were specified. +* Usually command `test` would be specified after a `build`, `install` or + `wheel` command. +* Parameters and commands can be interleaved but it may be clearer to separate + them on the command line. + +Other: + +* If we are not already running inside a Python venv, we automatically create a + venv and re-run ourselves inside it (also see the -v option). +* Build/wheel/install commands always install into the venv. +* Tests use whatever PyMuPDF/MuPDF is currently installed in the venv. +* We run tests with pytest. + +* One can generate call traces by setting environment variables in debug + builds. For details see: + https://mupdf.readthedocs.io/en/latest/language-bindings.html#environmental-variables + +Command line args: + + -a + Read next space-separated argument(s) from environmental variable + . + * Does nothing if is unset. + * Useful when running via Github action. + + -b + Set build type for `build` commands. `` should be one of + 'release', 'debug', 'memento'. [This makes `build` set environment + variable `PYMUPDF_SETUP_MUPDF_BUILD_TYPE`, which is used by PyMuPDF's + `setup.py`.] + + --build-flavour + [Obsolete.] + Combination of 'p', 'b', 'd'. See ../setup.py's description of + PYMUPDF_SETUP_FLAVOUR. Default is 'pbd', i.e. self-contained PyMuPDF + wheels including MuPDF build-time files. + + --build-isolation 0|1 + If true (the default on non-OpenBSD systems), we let pip create and use + its own new venv to build PyMuPDF. Otherwise we force pip to use the + current venv. + + --cibw-archs-linux + Set CIBW_ARCHS_LINUX, e.g. to `auto64 aarch64`. Default is `auto64` so + this allows control over whether to build linux-aarch64 wheels. + + --cibw-name + Name to use when installing cibuildwheel, e.g.: + --cibw-name cibuildwheel==3.0.0b1 + --cibw-name git+https://github.com/pypa/cibuildwheel + Default is `cibuildwheel`, i.e. the current release. + + --cibw-pyodide 0|1 + Experimental, make `cibw` command build a pyodide wheel. + 2025-05-27: this fails when building mupdf C API - `ld -r -b binary + ...` fails with: + emcc: error: binary: No such file or directory ("binary" was expected to be an input file, based on the commandline arguments provided) + + --cibw-pyodide-version + Override default Pyodide version to use with `cibuildwheel` command. If + empty string we use cibuildwheel's default. + + --cibw-release-1 + Set up so that `cibw` builds all wheels except linux-aarch64, and sdist + if on Linux. + + --cibw-release-2 + Set up so that `cibw` builds only linux-aarch64 wheel. + + --cibw-skip-add-defaults 0|1 + If 1 (the default) we add defaults to CIBW_SKIP such as `pp*` (to + exclude pypy) and `cp3??t-*` (to exclude free-threading). + + --cibw-test-project 0|1 + If 1, command `cibw` will use a minimal test project instead of the + PyMuPDF directory itself. + + The test project uses setjmp/longjmp and C++ throw/catch. + + The test checks for current behaviour, so with `--cibw-pyodide 1` it + succeeds if the cibw command fails with the expected error message. + + 2025-08-22: + Builds ok on Linux. + + Fails at runtime with --cibw-pyodide 1: + + With compile/link flags ``: + (+45.0s): remote.py:233:main: jules-devuan: Pyodide has suffered a fatal error. Please report this to the Pyodide maintainers. + (+45.1s): remote.py:233:main: jules-devuan: Stack (most recent call first): + (+45.1s): remote.py:233:main: jules-devuan: File "/tmp/cibw-run-h_pfo0wf/cp312-pyodide_wasm32/venv-test/lib/python3.12/site-packages/foo/__init__.py", line 63 in bar + (+45.1s): remote.py:233:main: jules-devuan: File "The cause of the fatal error was: + (+45.1s): remote.py:233:main: jules-devuan: CppException std::runtime_error: deliberate exception + (+45.1s): remote.py:233:main: jules-devuan: at convertCppException (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:48959) + (+45.1s): remote.py:233:main: jules-devuan: at API.fatal_error (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:49253) + (+45.1s): remote.py:233:main: jules-devuan: at main (file:///home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/python_cli_entry.mjs:149:13) { + (+45.1s): remote.py:233:main: jules-devuan: ty: 'std::runtime_error', + (+45.1s): remote.py:233:main: jules-devuan: pyodide_fatal_error: true + (+45.1s): remote.py:233:main: jules-devuan: } + (+45.1s): remote.py:233:main: jules-devuan: ", line 1 in + (+45.1s): remote.py:233:main: jules-devuan: CppException std::runtime_error: deliberate exception + (+45.1s): remote.py:233:main: jules-devuan: at convertCppException (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:48959) + (+45.1s): remote.py:233:main: jules-devuan: at API.fatal_error (/home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/pyodide.asm.js:10:49253) + (+45.1s): remote.py:233:main: jules-devuan: at main (file:///home/jules/.cache/cibuildwheel/pyodide-build-0.30.7/0.27.7/xbuildenv/pyodide-root/dist/python_cli_entry.mjs:149:13) { + (+45.1s): remote.py:233:main: jules-devuan: ty: 'std::runtime_error', + (+45.1s): remote.py:233:main: jules-devuan: pyodide_fatal_error: true + (+45.1s): remote.py:233:main: jules-devuan: } + + With compile/link flags `-fwasm-exceptions`: + [LinkError: WebAssembly.instantiate(): Import #60 module="env" function="__c_longjmp": tag import requires a WebAssembly.Tag] + + With compile/link flags `-fwasm-exceptions -sSUPPORT_LONGJMP=wasm`: + [LinkError: WebAssembly.instantiate(): Import #60 module="env" function="__c_longjmp": tag import requires a WebAssembly.Tag] + + --cibw-test-project-setjmp 0|1 + If 1, --cibw-test-project builds a project that uses + setjmp/longjmp. Default is 0 (Windows builds fail when attempting to + compile the output from swig). + + -d + Equivalent to `-b debug`. + + --dummy + Sets PYMUPDF_SETUP_DUMMY=1 which makes setup.py build a dummy wheel + with no content. For internal testing only. + + -e = + Add to environment used in build and test commands. Can be specified + multiple times. + + -f 0|1 + If 1 we also test alias `fitz` as well as `pymupdf`. Default is '0'. + + --graal + Use graal - run inside a Graal VM instead of a Python venv. + + As of 2025-08-04 we: + * Clone the latest pyenv and build it. + * Use pyenv to install graalpy. + * Use graalpy to create venv. + + [After the first time, suggest `-v 1` to avoid delay from + updating/building pyenv and recreating the graal venv.] + + --help + -h + Show help. + + -I + Set PyMuPDF implementations to test. + must contain only these individual characters: + 'r' - rebased. + 'R' - rebased without optimisations. + Default is 'r'. Also see `PyMuPDF:tests/run_compound.py`. + + -i + Controls behaviour of `install` command: + + * If ends with `.whl` we use `pip install + `. + * If starts with == or >= or >, we use `pip install + pymupdf`. + * Otherwise we use `pip install pymupdf==`. + + -k + Specify which test(s) to run; passed straight through to pytest's `-k`. + For example `-k test_3354`. + + -m | --mupdf + Location of mupdf as local directory or remote git, to be used when + building PyMuPDF. + + This sets environment variable PYMUPDF_SETUP_MUPDF_BUILD, which is used + by PyMuPDF/setup.py. If not specified PyMuPDF will download its default + mupdf .tgz. + + Additionally if starts with ':' we use the remaining text as + the branch name and add https://github.com/ArtifexSoftware/mupdf.git. + + For example: + + -m "git:--branch master https://github.com/ArtifexSoftware/mupdf.git" + -m :master + + -m "git:--branch 1.26.x https://github.com/ArtifexSoftware/mupdf.git" + -m :1.26.x + + --mupdf-clean 0|1 + If 1 we do a clean MuPDF build. + + -M 0|1 + --build-mupdf 0|1 + Whether to rebuild mupdf when we build PyMuPDF. Default is 1. + + -o + Control whether we do nothing on the current platform. + * is a comma-separated list of names. + * If is empty (the default), we always run normally. + * Otherwise we only run if an item in matches (case + insensitive) platform.system(). + * For example `-o linux,darwin` will do nothing unless on Linux or + MacOS. + + -p + Set pytest options; default is ''. + + -P 0|1 + If 1, automatically install required system packages such as + Valgrind. Default is 1 if running as Github action, otherwise 0. + + --pybind 0|1 + Experimental, for investigating + https://github.com/pymupdf/PyMuPDF/issues/3869. Runs run basic code + inside C++ pybind. Requires `sudo apt install pybind11-dev` or similar. + + --pyodide-build-version + Version of Python package pyodide-build to use with `pyodide` command. + + If None (the default) `pyodide` uses the latest available version. + 2025-02-13: pyodide_build_version='0.29.3' works. + + -s 0 | 1 + If 1 (the default), build with Python Limited API/Stable ABI. + [This simply sSets $PYMUPDF_SETUP_PY_LIMITED_API, which is used by + PyMuPDF/setup.py.] + + --show-args: + Show sys.argv and exit. For debugging. + + --sync-paths + Do not run anything, instead write required files/directories/checkouts + to , one per line. This is to help with automated running on + remote machines. + + --system-site-packages 0|1 + If 1, use `--system-site-packages` when creating venv. Defaults is 0. + + --swig + Use instead of the `swig` command. + + Unix only: + Clone/update/build swig from a git repository using 'git:' prefix. + + We default to https://github.com/swig/swig.git branch master, so these + are all equivalent: + + --swig 'git:--branch master https://github.com/swig/swig.git' + --swig 'git:--branch master' + --swig git: + + 2025-08-18: This fixes building with py_limited_api on python-3.13. + + --swig-quick 0|1 + If 1 and `--swig` starts with 'git:', we do not update/build swig if + already present. + + See description of PYMUPDF_SETUP_SWIG_QUICK in setup.py. + + -t + Pytest test names, comma-separated. Should be relative to PyMuPDF + directory. For example: + -t tests/test_general.py + -t tests/test_general.py::test_subset_fonts + To specify multiple tests, use comma-separated list and/or multiple `-t + ` args. + + --timeout + Sets timeout when running tests. + + -T + Use specified prefix when running pytest, must be one of: + gdb + helgrind + valgrind + + -v + venv is: + 0 - do not use a venv. + 1 - Use venv. If it already exists, we assume the existing directory + was created by us earlier and is a valid venv containing all + necessary packages; this saves a little time. + 2 - Use venv. + 3 - Use venv but delete it first if it already exists. + The default is 2. + +Commands: + + build + Builds and installs PyMuPDF into venv, using `pip install .../PyMuPDF`. + + buildtest + Same as 'build test'. + + cibw + Build and test PyMuPDF wheel(s) using cibuildwheel. Wheels are placed + in directory `wheelhouse`. + * We do not attempt to install wheels. + * So it is generally not useful to do `cibw test`. + + If CIBW_BUILD is unset, we set it as follows: + * On Github we build and test all supported Python versions. + * Otherwise we build and test the current Python version only. + + If CIBW_ARCHS is unset we set $CIBW_ARCHS_WINDOWS, $CIBW_ARCHS_MACOS + and $CIBW_ARCHS_LINUX to auto64 if they are unset. + + install + Install with `pip install --force-reinstall `. + + pyodide + Build Pyodide wheel. We clone `emsdk.git`, set it up, and run + `pyodide build`. This runs our setup.py with CC etc set up + to create Pyodide binaries in a wheel called, for example, + `PyMuPDF-1.23.2-cp311-none-emscripten_3_1_32_wasm32.whl`. + + It seems that sys.version must match the Python version inside emsdk; + as of 2025-02-14 this is 3.12. Otherwise we get build errors such as: + [wasm-validator error in function 723] unexpected false: all used features should be allowed, on ... + + test + Runs PyMuPDF's pytest tests. Default is to test rebased and unoptimised + rebased; use `-i` to change this. + + wheel + Build and install wheel. + + +Environment: + PYMUDF_SCRIPTS_TEST_options + Is prepended to command line args. +''' + +import glob +import os +import platform +import re +import shlex +import shutil +import subprocess +import sys +import textwrap + + +pymupdf_dir_abs = os.path.abspath( f'{__file__}/../..') + +try: + sys.path.insert(0, pymupdf_dir_abs) + import pipcl +finally: + del sys.path[0] + +try: + sys.path.insert(0, f'{pymupdf_dir_abs}/scripts') + import gh_release +finally: + del sys.path[0] + + +pymupdf_dir = pipcl.relpath(pymupdf_dir_abs) + +log = pipcl.log0 +run = pipcl.run + + +# We build and test Python 3.x for x in this range. +python_versions_minor = range(9, 14+1) + +def cibw_cp(*version_minors): + ''' + Returns in 'cp39*' format, e.g. suitable for CIBW_BUILD. + ''' + ret = list() + for version_minor in version_minors: + ret.append(f'cp3{version_minor}*') + return ' '.join(ret) + + +def main(argv): + + if github_workflow_unimportant(): + return + + build_isolation = None + cibw_name = None + cibw_pyodide = None + cibw_pyodide_version = None + cibw_skip_add_defaults = True + cibw_test_project = None + cibw_test_project_setjmp = False + commands = list() + env_extra = dict() + graal = False + implementations = 'r' + install_version = None + mupdf_sync = None + os_names = list() + system_packages = True if os.environ.get('GITHUB_ACTIONS') == 'true' else False + pybind = False + pyodide_build_version = None + pytest_options = '' + pytest_prefix = None + cibw_sdist = None + show_args = False + show_help = False + sync_paths = False + system_site_packages = False + swig = None + swig_quick = None + test_fitz = False + test_names = list() + test_timeout = None + valgrind = False + warnings = list() + venv = 2 + + options = os.environ.get('PYMUDF_SCRIPTS_TEST_options', '') + options = shlex.split(options) + + # Parse args and update the above state. We do this before moving into a + # venv, partly so we can return errors immediately. + # + args = iter(options + argv[1:]) + i = 0 + while 1: + try: + arg = next(args) + except StopIteration: + arg = None + break + + if 0: + pass + + elif arg == '-a': + _name = next(args) + _value = os.environ.get(_name, '') + _args = shlex.split(_value) + list(args) + args = iter(_args) + + elif arg == '-b': + env_extra['PYMUPDF_SETUP_MUPDF_BUILD_TYPE'] = next(args) + + elif arg == '--build-flavour': + env_extra['PYMUPDF_SETUP_FLAVOUR'] = next(args) + + elif arg == '--build-isolation': + build_isolation = int(next(args)) + + elif arg == '--cibw-pyodide-version': + cibw_pyodide_version = next(args) + + elif arg == '--cibw-release-1': + cibw_sdist = True + env_extra['CIBW_ARCHS_LINUX'] = 'auto64' + env_extra['CIBW_ARCHS_MACOS'] = 'auto64' + env_extra['CIBW_ARCHS_WINDOWS'] = 'auto' # win32 and win64. + env_extra['CIBW_SKIP'] = '*i686 *musllinux*aarch64* cp3??t-*' + cibw_skip_add_defaults = 0 + + elif arg == '--cibw-release-2': + # Testing only first and last python versions because otherwise + # Github times out after 6h. + env_extra['CIBW_BUILD'] = cibw_cp(python_versions_minor[0], python_versions_minor[-1]) + env_extra['CIBW_ARCHS_LINUX'] = 'aarch64' + env_extra['CIBW_SKIP'] = '*i686 *musllinux*aarch64* cp3??t-*' + cibw_skip_add_defaults = 0 + os_names = ['linux'] + + elif arg == '--cibw-archs-linux': + env_extra['CIBW_ARCHS_LINUX'] = next(args) + + elif arg == '--cibw-name': + cibw_name = next(args) + + elif arg == '--cibw-pyodide': + cibw_pyodide = int(next(args)) + + elif arg == '--cibw-skip-add-defaults': + cibw_skip_add_defaults = int(next(args)) + + elif arg == '--cibw-test-project': + cibw_test_project = int(next(args)) + + elif arg == '--cibw-test-project-setjmp': + cibw_test_project_setjmp = int(next(args)) + + elif arg == '-d': + env_extra['PYMUPDF_SETUP_MUPDF_BUILD_TYPE'] = 'debug' + + elif arg == '--dummy': + env_extra['PYMUPDF_SETUP_DUMMY'] = '1' + env_extra['CIBW_TEST_COMMAND'] = '' + + elif arg == '-e': + _nv = next(args) + assert '=' in _nv, f'-e = does not contain "=": {_nv!r}' + _name, _value = _nv.split('=', 1) + env_extra[_name] = _value + + elif arg == '-f': + test_fitz = int(next(args)) + + elif arg == '--graal': + graal = True + + elif arg in ('-h', '--help'): + show_help = True + + elif arg == '-i': + install_version = next(args) + + elif arg == '-I': + implementations = next(args) + + elif arg == '-k': + pytest_options += f' -k {shlex.quote(next(args))}' + + elif arg in ('-m', '--mupdf'): + _mupdf = next(args) + if _mupdf == '-': + _mupdf = None + elif _mupdf.startswith(':'): + _branch = _mupdf[1:] + _mupdf = f'git:--branch {_branch} https://github.com/ArtifexSoftware/mupdf.git' + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf + elif _mupdf.startswith('git:') or '://' in _mupdf: + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = _mupdf + else: + assert os.path.isdir(_mupdf), f'Not a directory: {_mupdf=}' + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = os.path.abspath(_mupdf) + mupdf_sync = _mupdf + + elif arg == '--mupdf-clean': + env_extra['PYMUPDF_SETUP_MUPDF_CLEAN']=next(args) + + elif arg in ('-M', '--build-mupdf'): + env_extra['PYMUPDF_SETUP_MUPDF_REBUILD'] = next(args) + + elif arg == '-o': + os_names += next(args).split(',') + + elif arg == '-p': + pytest_options += f' {next(args)}' + + elif arg == '-P': + system_packages = int(next(args)) + + elif arg == '--pybind': + pybind = int(next(args)) + + elif arg == '--pyodide-build-version': + pyodide_build_version = next(args) + + elif arg == '-s': + _value = next(args) + assert _value in ('0', '1'), f'`-s` must be followed by `0` or `1`, not {_value=}.' + env_extra['PYMUPDF_SETUP_PY_LIMITED_API'] = _value + + elif arg == '--show-args': + show_args = 1 + elif arg == '--sync-paths': + sync_paths = next(args) + + elif arg == '--system-site-packages': + system_site_packages = int(next(args)) + + elif arg == '--swig': + swig = next(args) + + elif arg == '--swig-quick': + swig_quick = int(next(args)) + + elif arg == '-t': + test_names += next(args).split(',') + + elif arg == '--timeout': + test_timeout = float(next(args)) + + elif arg == '-T': + pytest_prefix = next(args) + assert pytest_prefix in ('gdb', 'helgrind', 'valgrind'), \ + f'Unrecognised {pytest_prefix=}, should be one of: gdb valgrind helgrind.' + + elif arg == '-v': + venv = int(next(args)) + assert venv in (0, 1, 2, 3), f'Invalid {venv=} should be 0, 1, 2 or 3.' + + elif arg in ('build', 'cibw', 'install', 'pyodide', 'test', 'wheel'): + commands.append(arg) + + elif arg == 'buildtest': + commands += ['build', 'test'] + + else: + assert 0, f'Unrecognised option/command: {arg=}.' + + # Handle special args --sync-paths, -h, -v, -o first. + # + if sync_paths: + # Print required files, directories and checkouts. + with open(sync_paths, 'w') as f: + print(pymupdf_dir, file=f) + if mupdf_sync: + print(mupdf_sync, file=f) + return + + if show_help: + print(__doc__) + return + + if show_args: + print(f'sys.argv ({len(sys.argv)}):') + for arg in sys.argv: + print(f' {arg!r}') + return + + if os_names: + if platform.system().lower() not in os_names: + log(f'Not running because {platform.system().lower()=} not in {os_names=}') + return + + if commands: + if venv: + # Rerun ourselves inside a venv if not already in a venv. + if not venv_in(): + if graal: + if 'cibw' in commands: + # We don't create graal/pyenv so wheel/build commands + # will not work. + assert 'wheel' not in commands + assert 'build' not in commands + if graal and 'cibw' not in commands: + # 2025-07-24: We need the latest pyenv. + graalpy = 'graalpy-24.2.1' + venv_name = f'venv-pymupdf-{graalpy}' + pyenv_dir = f'{pymupdf_dir_abs}/pyenv-git' + os.environ['PYENV_ROOT'] = pyenv_dir + os.environ['PATH'] = f'{pyenv_dir}/bin:{os.environ["PATH"]}' + os.environ['PIPCL_GRAAL_PYTHON'] = sys.executable + + if venv >= 3: + shutil.rmtree(venv_name, ignore_errors=1) + if venv == 1 and os.path.exists(pyenv_dir) and os.path.exists(venv_name): + log(f'{venv=} and {venv_name=} already exists so not building pyenv or creating venv.') + else: + pipcl.git_get(pyenv_dir, remote='https://github.com/pyenv/pyenv.git', branch='master') + run(f'cd {pyenv_dir} && src/configure && make -C src') + run(f'which pyenv') + run(f'pyenv install -v -s {graalpy}') + run(f'{pyenv_dir}/versions/{graalpy}/bin/graalpy -m venv {venv_name}') + e = run(f'. {venv_name}/bin/activate && python {shlex.join(sys.argv)}', + check=False, + ) + else: + venv_name = f'venv-pymupdf-{platform.python_version()}-{int.bit_length(sys.maxsize+1)}' + e = venv_run( + sys.argv, + venv_name, + recreate=(venv>=2), + clean=(venv>=3), + ) + sys.exit(e) + else: + log(f'Warning, no commands specified so nothing to do.') + + # Clone/update/build swig if specified. + swig_binary = pipcl.swig_get(swig, swig_quick) + if swig_binary: + os.environ['PYMUPDF_SETUP_SWIG'] = swig_binary + + # Handle commands. + # + have_installed = False + for command in commands: + log(f'### {command=}.') + if 0: + pass + + elif command in ('build', 'wheel'): + build( + env_extra, + build_isolation=build_isolation, + venv=venv, + wheel=(command=='wheel'), + ) + have_installed = True + + elif command == 'cibw': + # Build wheel(s) with cibuildwheel. + + if platform.system() == 'Linux': + PYMUPDF_SETUP_MUPDF_BUILD = env_extra.get('PYMUPDF_SETUP_MUPDF_BUILD') + if PYMUPDF_SETUP_MUPDF_BUILD and not PYMUPDF_SETUP_MUPDF_BUILD.startswith('git:'): + assert PYMUPDF_SETUP_MUPDF_BUILD.startswith('/') + env_extra['PYMUPDF_SETUP_MUPDF_BUILD'] = f'/host/{PYMUPDF_SETUP_MUPDF_BUILD}' + + cibuildwheel( + env_extra, + cibw_name or 'cibuildwheel', + cibw_pyodide, + cibw_pyodide_version, + cibw_sdist, + cibw_test_project, + cibw_test_project_setjmp, + cibw_skip_add_defaults, + graal, + ) + + elif command == 'install': + p = 'pymupdf' + if install_version: + if install_version.endswith('.whl'): + p = install_version + elif install_version.startswith(('==', '>=', '>')): + p = f'{p}{install_version}' + else: + p = f'{p}=={install_version}' + run(f'pip install --force-reinstall {p}') + have_installed = True + + elif command == 'test': + if not have_installed: + log(f'## Warning: have not built/installed PyMuPDF; testing whatever is already installed.') + test( + env_extra=env_extra, + implementations=implementations, + test_names=test_names, + pytest_options=pytest_options, + test_timeout=test_timeout, + pytest_prefix=pytest_prefix, + test_fitz=test_fitz, + pybind=pybind, + system_packages=system_packages, + venv=venv, + ) + + elif command == 'pyodide': + build_pyodide_wheel(pyodide_build_version=pyodide_build_version) + + else: + assert 0, f'{command=}' + + +def get_env_bool(name, default=0): + v = os.environ.get(name) + if v in ('1', 'true'): + return 1 + elif v in ('0', 'false'): + return 0 + elif v is None: + return default + else: + assert 0, f'Bad environ {name=} {v=}' + +def show_help(): + print(__doc__) + print(venv_info()) + + +def github_workflow_unimportant(): + ''' + Returns true if we are running a Github scheduled workflow but in a + repository not called 'PyMuPDF'. This can be used to avoid consuming + unnecessary Github minutes running workflows on non-main repositories such + as ArtifexSoftware/PyMuPDF-julian. + ''' + GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME') + GITHUB_REPOSITORY = os.environ.get('GITHUB_REPOSITORY') + if GITHUB_EVENT_NAME == 'schedule' and GITHUB_REPOSITORY != 'pymupdf/PyMuPDF': + log(f'## This is an unimportant Github workflow: a scheduled event, not in the main repository `pymupdf/PyMuPDF`.') + log(f'## {GITHUB_EVENT_NAME=}.') + log(f'## {GITHUB_REPOSITORY=}.') + return True + +def venv_info(pytest_args=None): + ''' + Returns string containing information about the venv we use and how to + run tests manually. If specified, `pytest_args` contains the pytest args, + otherwise we use an example. + ''' + pymupdf_dir_rel = gh_release.relpath(pymupdf_dir) + ret = f'Name of venv: {gh_release.venv_name}\n' + if pytest_args is None: + pytest_args = f'{pymupdf_dir_rel}/tests/test_general.py::test_subset_fonts' + if platform.system() == 'Windows': + ret += textwrap.dedent(f''' + Rerun tests manually with rebased implementation: + Enter venv: + {gh_release.venv_name}\\Scripts\\activate + Run specific test in venv: + {gh_release.venv_name}\\Scripts\\python -m pytest {pytest_args} + ''') + else: + ret += textwrap.dedent(f''' + Rerun tests manually with rebased implementation: + Enter venv and run specific test, also under gdb: + . {gh_release.venv_name}/bin/activate + python -m pytest {pytest_args} + gdb --args python -m pytest {pytest_args} + Run without explicitly entering venv, also under gdb: + ./{gh_release.venv_name}/bin/python -m pytest {pytest_args} + gdb --args ./{gh_release.venv_name}/bin/python -m pytest {pytest_args} + ''') + return ret + + +def build( + env_extra, + *, + build_isolation, + venv, + wheel, + ): + log(f'{build_isolation=}') + + if build_isolation is None: + # On OpenBSD libclang is not available on pypi.org, so we need to force + # use of system package py3-llvm with --no-build-isolation, manually + # installing other required packages. + build_isolation = False if platform.system() == 'OpenBSD' else True + + if build_isolation: + # This is the default on non-OpenBSD. + build_isolation_text = '' + else: + # Not using build isolation - i.e. pip will not be using its own clean + # venv, so we need to explicitly install required packages. Manually + # install required packages from pyproject.toml. + sys.path.insert(0, os.path.abspath(f'{__file__}/../..')) + import setup + names = setup.get_requires_for_build_wheel() + del sys.path[0] + if names: + names = ' '.join(names) + if venv == 2: + run( f'python -m pip install --upgrade {names}') + else: + log(f'{venv=}: Not installing packages with pip: {names}') + build_isolation_text = ' --no-build-isolation' + + if wheel: + new_files = pipcl.NewFiles(f'wheelhouse/*.whl') + run(f'pip wheel{build_isolation_text} -w wheelhouse -v {pymupdf_dir_abs}', env_extra=env_extra) + wheel = new_files.get_one() + run(f'pip install --force-reinstall {wheel}') + else: + run(f'pip install{build_isolation_text} -v --force-reinstall {pymupdf_dir_abs}', env_extra=env_extra) + + +def cibuildwheel( + env_extra, + cibw_name, + cibw_pyodide, + cibw_pyodide_version, + cibw_sdist, + cibw_test_project, + cibw_test_project_setjmp, + cibw_skip_add_defaults, + graal, + ): + + if cibw_sdist and platform.system() == 'Linux': + log(f'Building sdist.') + run(f'cd {pymupdf_dir_abs} && {sys.executable} setup.py -d wheelhouse sdist', env_extra=env_extra) + sdists = glob.glob(f'{pymupdf_dir_abs}/wheelhouse/pymupdf-*.tar.gz') + log(f'{sdists=}') + assert sdists + + run(f'pip install --upgrade --force-reinstall {cibw_name}') + + # Some general flags. + if 'CIBW_BUILD_VERBOSITY' not in env_extra: + env_extra['CIBW_BUILD_VERBOSITY'] = '1' + + # Add default flags to CIBW_SKIP. + # 2025-10-07: `cp3??t-*` excludes free-threading, which currently breaks + # some tests. + + if cibw_skip_add_defaults: + CIBW_SKIP = env_extra.get('CIBW_SKIP', '') + CIBW_SKIP += ' *i686 *musllinux* *-win32 *-aarch64 cp3??t-*' + CIBW_SKIP = CIBW_SKIP.split() + CIBW_SKIP = sorted(list(set(CIBW_SKIP))) + CIBW_SKIP = ' '.join(CIBW_SKIP) + env_extra['CIBW_SKIP'] = CIBW_SKIP + + # Set what wheels to build, if not already specified. + if 'CIBW_ARCHS' not in env_extra: + if 'CIBW_ARCHS_WINDOWS' not in env_extra: + env_extra['CIBW_ARCHS_WINDOWS'] = 'auto64' + + if 'CIBW_ARCHS_MACOS' not in env_extra: + env_extra['CIBW_ARCHS_MACOS'] = 'auto64' + + if 'CIBW_ARCHS_LINUX' not in env_extra: + env_extra['CIBW_ARCHS_LINUX'] = 'auto64' + + # Tell cibuildwheel not to use `auditwheel` on Linux and MacOS, + # because it cannot cope with us deliberately having required + # libraries in different wheel - specifically in the PyMuPDF wheel. + # + # We cannot use a subset of auditwheel's functionality + # with `auditwheel addtag` because it says `No tags + # to be added` and terminates with non-zero. See: + # https://github.com/pypa/auditwheel/issues/439. + # + env_extra['CIBW_REPAIR_WHEEL_COMMAND_LINUX'] = '' + env_extra['CIBW_REPAIR_WHEEL_COMMAND_MACOS'] = '' + + # Tell cibuildwheel how to test PyMuPDF. + if 'CIBW_TEST_COMMAND' not in env_extra: + env_extra['CIBW_TEST_COMMAND'] = f'python {{project}}/scripts/test.py test' + + # Specify python versions. + CIBW_BUILD = env_extra.get('CIBW_BUILD') + log(f'{CIBW_BUILD=}') + if CIBW_BUILD is None: + if graal: + CIBW_BUILD = 'gp*' + env_extra['CIBW_ENABLE'] = 'graalpy' + elif cibw_pyodide: + # Using python-3.13 fixes problems with MuPDF's setjmp/longjmp. + CIBW_BUILD = 'cp313*' + elif os.environ.get('GITHUB_ACTIONS') == 'true': + # Build/test all supported Python versions. + CIBW_BUILD = cibw_cp(*python_versions_minor) + else: + # Build/test current Python only. + v = platform.python_version_tuple()[:2] + log(f'{v=}') + CIBW_BUILD = f'cp{"".join(v)}*' + log(f'Defaulting to {CIBW_BUILD=}.') + + cibw_pyodide_args = '' + if cibw_pyodide: + cibw_pyodide_args = ' --platform pyodide' + env_extra['HAVE_LIBCRYPTO'] = 'no' + env_extra['PYMUPDF_SETUP_MUPDF_TESSERACT'] = '0' + if cibw_pyodide_version: + # 2025-07-21: there is no --pyodide-version option so we set + # CIBW_PYODIDE_VERSION. + env_extra['CIBW_PYODIDE_VERSION'] = cibw_pyodide_version + env_extra['CIBW_ENABLE'] = 'pyodide-prerelease' + + # Pass all the environment variables we have set, to Linux docker. Note + # that this will miss any settings in the original environment. We have to + # add CIBW_BUILD explicitly because we haven't set it yet. + CIBW_ENVIRONMENT_PASS_LINUX = set(env_extra.keys()) + CIBW_ENVIRONMENT_PASS_LINUX.add('CIBW_BUILD') + CIBW_ENVIRONMENT_PASS_LINUX = sorted(list(CIBW_ENVIRONMENT_PASS_LINUX)) + CIBW_ENVIRONMENT_PASS_LINUX = ' '.join(CIBW_ENVIRONMENT_PASS_LINUX) + env_extra['CIBW_ENVIRONMENT_PASS_LINUX'] = CIBW_ENVIRONMENT_PASS_LINUX + + if cibw_test_project: + cibw_do_test_project( + env_extra, + CIBW_BUILD, + cibw_pyodide, + cibw_pyodide_args, + cibw_test_project_setjmp, + ) + return + + env_extra['CIBW_BUILD'] = CIBW_BUILD + run(f'cd {pymupdf_dir} && cibuildwheel{cibw_pyodide_args}', env_extra=env_extra, prefix='cibw: ') + run(f'ls -ld {pymupdf_dir}/wheelhouse/*') + + +def cibw_do_test_project( + env_extra, + CIBW_BUILD, + cibw_pyodide, + cibw_pyodide_args, + cibw_test_project_setjmp, + ): + testdir = f'{pymupdf_dir_abs}/cibw_test' + shutil.rmtree(testdir, ignore_errors=1) + os.mkdir(testdir) + with open(f'{testdir}/setup.py', 'w') as f: + f.write(textwrap.dedent(f''' + import shutil + import sys + import os + import pipcl + + def build(): + so_leaf = pipcl.build_extension( + name = 'foo', + path_i = 'foo.i', + outdir = 'build', + source_extra = 'qwerty.cpp', + py_limited_api = True, + ) + + return [ + ('build/foo.py', 'foo/__init__.py'), + (f'build/{{so_leaf}}', f'foo/'), + ] + + p = pipcl.Package( + name = 'pymupdf-test', + version = '1.2.3', + fn_build = build, + py_limited_api=True, + ) + + def get_requires_for_build_wheel(config_settings=None): + return ['swig'] + + build_wheel = p.build_wheel + build_sdist = p.build_sdist + + # Handle old-style setup.py command-line usage: + if __name__ == '__main__': + p.handle_argv(sys.argv) + ''')) + with open(f'{testdir}/foo.i', 'w') as f: + if cibw_test_project_setjmp: + f.write(textwrap.dedent(''' + %{ + #include + + #include + #include + #include + #include + + int qwerty(void); + + static sigjmp_buf jmpbuf; + static int bar0(const char* text) + { + printf("bar0(): text: %s\\n", text); + + int q = qwerty(); + printf("bar0(): q=%i\\n", q); + + int len = (int) strlen(text); + printf("bar0(): len=%i\\n", len); + printf("bar0(): calling longjmp().\\n"); + fflush(stdout); + longjmp(jmpbuf, 1); + assert(0); + } + int bar1(const char* text) + { + int ret = 0; + if (setjmp(jmpbuf) == 0) + { + ret = bar0(text); + } + else + { + printf("bar1(): setjmp() returned non-zero.\\n"); + throw std::runtime_error("deliberate exception"); + } + assert(0); + } + int bar(const char* text) + { + int ret = 0; + try + { + ret = bar1(text); + } + catch(std::exception& e) + { + printf("bar1(): received exception: %s\\n", e.what()); + } + return ret; + } + %} + int bar(const char* text); + ''')) + else: + f.write(textwrap.dedent(''' + %{ + #include + + #include + #include + #include + + int qwerty(void); + + int bar(const char* text) + { + qwerty(); + return strlen(text); + } + %} + int bar(const char* text); + ''')) + + with open(f'{testdir}/qwerty.cpp', 'w') as f: + f.write(textwrap.dedent(''' + #include + int qwerty(void) + { + printf("qwerty()\\n"); + return 3; + } + ''')) + + with open(f'{testdir}/pyproject.toml', 'w') as f: + f.write(textwrap.dedent(''' + [build-system] + # We define required packages in setup.py:get_requires_for_build_wheel(). + requires = [] + + # See pep-517. + # + build-backend = "setup" + backend-path = ["."] + ''')) + + shutil.copy2(f'{pymupdf_dir_abs}/pipcl.py', f'{testdir}/pipcl.py') + shutil.copy2(f'{pymupdf_dir_abs}/wdev.py', f'{testdir}/wdev.py') + + env_extra['CIBW_BUILD'] = CIBW_BUILD + CIBW_TEST_COMMAND = '' + if cibw_pyodide: + CIBW_TEST_COMMAND += 'pyodide xbuildenv search --all; ' + CIBW_TEST_COMMAND += 'python -c "import foo; foo.bar(\\"some text\\")"' + env_extra['CIBW_TEST_COMMAND'] = CIBW_TEST_COMMAND + #env_extra['CIBW_TEST_COMMAND'] = '' + + run(f'cd {testdir} && cibuildwheel --output-dir ../wheelhouse{cibw_pyodide_args}', + env_extra=env_extra, + prefix='cibw: ', + ) + run(f'ls -ldt {pymupdf_dir_abs}/wheelhouse/*') + + +def build_pyodide_wheel(pyodide_build_version=None): + ''' + Build Pyodide wheel. + + This runs `pyodide build` inside the PyMuPDF directory, which in turn runs + setup.py in a Pyodide build environment. + ''' + log(f'## Building Pyodide wheel.') + + # Our setup.py does not know anything about Pyodide; we set a few + # required environmental variables here. + # + env_extra = dict() + + # Disable libcrypto because not available in Pyodide. + env_extra['HAVE_LIBCRYPTO'] = 'no' + + # Tell MuPDF to build for Pyodide. + env_extra['OS'] = 'pyodide' + + # Build a single wheel without a separate PyMuPDFb wheel. + env_extra['PYMUPDF_SETUP_FLAVOUR'] = 'pb' + + # 2023-08-30: We set PYMUPDF_SETUP_MUPDF_BUILD_TESSERACT=0 because + # otherwise mupdf thirdparty/tesseract/src/ccstruct/dppoint.cpp fails to + # build because `#include "errcode.h"` finds a header inside emsdk. This is + # pyodide bug https://github.com/pyodide/pyodide/issues/3839. It's fixed in + # https://github.com/pyodide/pyodide/pull/3866 but the fix has not reached + # pypi.org's pyodide-build package. E.g. currently in tag 0.23.4, but + # current devuan pyodide-build is pyodide_build-0.23.4. + # + env_extra['PYMUPDF_SETUP_MUPDF_TESSERACT'] = '0' + setup = pyodide_setup(pymupdf_dir, pyodide_build_version=pyodide_build_version) + command = f'{setup} && echo "### Running pyodide build" && pyodide build --exports whole_archive' + + command = command.replace(' && ', '\n && ') + + run(command, env_extra=env_extra) + + # Copy wheel into `wheelhouse/` so it is picked up as a workflow + # artifact. + # + run(f'ls -l {pymupdf_dir}/dist/') + run(f'mkdir -p {pymupdf_dir}/wheelhouse && cp -p {pymupdf_dir}/dist/* {pymupdf_dir}/wheelhouse/') + run(f'ls -l {pymupdf_dir}/wheelhouse/') + + +def pyodide_setup( + directory, + clean=False, + pyodide_build_version=None, + ): + ''' + Returns a command that will set things up for a pyodide build. + + Args: + directory: + Our command cd's into this directory. + clean: + If true we create an entirely new environment. Otherwise + we reuse any existing emsdk repository and venv. + pyodide_build_version: + Version of Python package pyodide-build; if None we use latest + available version. + 2025-02-13: pyodide_build_version='0.29.3' works. + + The returned command does the following: + + * Checkout latest emsdk from https://github.com/emscripten-core/emsdk.git: + * Clone emsdk repository to `emsdk` if not already present. + * Run `git pull -r` inside emsdk checkout. + * Create venv `venv_pyodide_` if not already present. + * Activate venv `venv_pyodide_`. + * Install/upgrade package `pyodide-build`. + * Run emsdk install scripts and enter emsdk environment. + + Example usage in a build function: + + command = pyodide_setup() + command += ' && pyodide build --exports pyinit' + subprocess.run(command, shell=1, check=1) + ''' + + pv = platform.python_version_tuple()[:2] + assert pv == ('3', '12'), f'Pyodide builds need to be run with Python-3.12 but current Python is {platform.python_version()}.' + command = f'cd {directory}' + + # Clone/update emsdk. We always use the latest emsdk with `git pull`. + # + # 2025-02-13: this works: 2514ec738de72cebbba7f4fdba0cf2fabcb779a5 + # + dir_emsdk = 'emsdk' + if clean: + shutil.rmtree(dir_emsdk, ignore_errors=1) + # 2024-06-25: old `.pyodide-xbuildenv` directory was breaking build, so + # important to remove it here. + shutil.rmtree('.pyodide-xbuildenv', ignore_errors=1) + if not os.path.exists(f'{directory}/{dir_emsdk}'): + command += f' && echo "### Cloning emsdk.git"' + command += f' && git clone https://github.com/emscripten-core/emsdk.git {dir_emsdk}' + command += f' && echo "### Updating checkout {dir_emsdk}"' + command += f' && (cd {dir_emsdk} && git pull -r)' + command += f' && echo "### Checkout {dir_emsdk} is:"' + command += f' && (cd {dir_emsdk} && git show -s --oneline)' + + # Create and enter Python venv. + # + python = sys.executable + venv_pyodide = f'venv_pyodide_{sys.version_info[0]}.{sys.version_info[1]}' + + if not os.path.exists( f'{directory}/{venv_pyodide}'): + command += f' && echo "### Creating venv {venv_pyodide}"' + command += f' && {python} -m venv {venv_pyodide}' + command += f' && . {venv_pyodide}/bin/activate' + command += f' && echo "### Installing Python packages."' + command += f' && python -m pip install --upgrade pip wheel pyodide-build' + if pyodide_build_version: + command += f'=={pyodide_build_version}' + + # Run emsdk install scripts and enter emsdk environment. + # + command += f' && cd {dir_emsdk}' + command += ' && PYODIDE_EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)' + command += ' && echo "### PYODIDE_EMSCRIPTEN_VERSION is: $PYODIDE_EMSCRIPTEN_VERSION"' + command += ' && echo "### Running ./emsdk install"' + command += ' && ./emsdk install ${PYODIDE_EMSCRIPTEN_VERSION}' + command += ' && echo "### Running ./emsdk activate"' + command += ' && ./emsdk activate ${PYODIDE_EMSCRIPTEN_VERSION}' + command += ' && echo "### Running ./emsdk_env.sh"' + command += ' && . ./emsdk_env.sh' # Need leading `./` otherwise weird 'Not found' error. + + command += ' && cd ..' + return command + + +def test( + *, + env_extra, + implementations, + venv=False, + test_names=None, + pytest_options=None, + test_timeout=None, + pytest_prefix=None, + test_fitz=True, + pytest_k=None, + pybind=False, + system_packages=False, + ): + if pybind: + cpp_path = 'pymupdf_test_pybind.cpp' + cpp_exe = 'pymupdf_test_pybind.exe' + cpp = textwrap.dedent(''' + #include + + int main() + { + pybind11::scoped_interpreter guard{}; + pybind11::exec(R"( + print('Hello world', flush=1) + import pymupdf + pymupdf.JM_mupdf_show_warnings = 1 + print(f'{pymupdf.version=}', flush=1) + doc = pymupdf.Document() + pymupdf.mupdf.fz_warn('Dummy warning.') + pymupdf.mupdf.fz_warn('Dummy warning.') + pymupdf.mupdf.fz_warn('Dummy warning.') + print(f'{doc=}', flush=1) + )"); + } + ''') + def fs_read(path): + try: + with open(path) as f: + return f.read() + except Exception: + return + def fs_remove(path): + try: + os.remove(path) + except Exception: + pass + cpp_existing = fs_read(cpp_path) + if cpp == cpp_existing: + log(f'Not creating {cpp_exe} because unchanged: {cpp_path}') + else: + with open(cpp_path, 'w') as f: + f.write(cpp) + def getmtime(path): + try: + return os.path.getmtime(path) + except Exception: + return 0 + python_config = f'{os.path.realpath(sys.executable)}-config' + # `--embed` adds `-lpython3.11` to the link command, which appears to + # be necessary when building an executable. + flags = run(f'{python_config} --cflags --ldflags --embed', capture=1) + build_command = f'c++ {cpp_path} -o {cpp_exe} -g -W -Wall {flags}' + build_path = f'{cpp_exe}.cmd' + build_command_prev = fs_read(build_path) + if build_command != build_command_prev or getmtime(cpp_path) >= getmtime(cpp_exe): + fs_remove(build_path) + run(build_command) + with open(build_path, 'w') as f: + f.write(build_command) + run(f'./{cpp_exe}') + return + + pymupdf_dir_rel = gh_release.relpath(pymupdf_dir) + if not pytest_options and pytest_prefix == 'valgrind': + pytest_options = '-sv' + if pytest_k: + pytest_options += f' -k {shlex.quote(pytest_k)}' + pytest_arg = '' + if test_names: + for test_name in test_names: + pytest_arg += f' {pymupdf_dir_rel}/{test_name}' + else: + pytest_arg += f' {pymupdf_dir_rel}/tests' + python = gh_release.relpath(sys.executable) + log('Running tests with tests/run_compound.py and pytest.') + + PYODIDE_ROOT = os.environ.get('PYODIDE_ROOT') + if PYODIDE_ROOT is not None: + # We can't install packages with `pip install`; setup.py will have + # specified pytest in the wheels's , so it will be + # already installed. + # + log(f'Not installing test packages because {PYODIDE_ROOT=}.') + command = f'{pytest_options} {pytest_arg}' + args = shlex.split(command) + log(f'{PYODIDE_ROOT=} so calling pytest.main(args).') + log(f'{command=}') + log(f'args are ({len(args)}):') + for arg in args: + log(f' {arg!r}') + import pytest + e = pytest.main(args) + assert e == 0, f'pytest.main() failed: {e=}' + return + + if venv >= 2: + run(f'pip install --upgrade {gh_release.test_packages}') + else: + log(f'{venv=}: Not installing test packages: {gh_release.test_packages}') + run_compound_args = '' + + if implementations: + run_compound_args += f' -i {implementations}' + + if test_timeout: + run_compound_args += f' -t {test_timeout}' + + if pytest_prefix in ('valgrind', 'helgrind'): + if system_packages: + log('Installing valgrind.') + run(f'sudo apt update') + run(f'sudo apt install --upgrade valgrind') + run(f'valgrind --version') + + command = f'{python} {pymupdf_dir_rel}/tests/run_compound.py{run_compound_args}' + + if pytest_prefix is None: + pass + elif pytest_prefix == 'gdb': + command += ' gdb --args' + elif pytest_prefix == 'valgrind': + env_extra['PYMUPDF_RUNNING_ON_VALGRIND'] = '1' + env_extra['PYTHONMALLOC'] = 'malloc' + command += ( + f' valgrind' + f' --suppressions={pymupdf_dir_abs}/valgrind.supp' + f' --trace-children=no' + f' --num-callers=20' + f' --error-exitcode=100' + f' --errors-for-leak-kinds=none' + f' --fullpath-after=' + ) + elif pytest_prefix == 'helgrind': + env_extra['PYMUPDF_RUNNING_ON_VALGRIND'] = '1' + env_extra['PYTHONMALLOC'] = 'malloc' + command = ( + f' valgrind' + f' --tool=helgrind' + f' --trace-children=no' + f' --num-callers=20' + f' --error-exitcode=100' + f' --fullpath-after=' + ) + else: + assert 0, f'Unrecognised {pytest_prefix=}' + + if platform.system() == 'Windows': + # `python -m pytest` doesn't seem to work. + command += ' pytest' + else: + # On OpenBSD `pip install pytest` doesn't seem to install the pytest + # command, so we use `python -m pytest ...`. + command += f' {python} -m pytest' + + command += f' {pytest_options} {pytest_arg}' + + # Always start by removing any test_*_fitz.py files. + for p in glob.glob(f'{pymupdf_dir_rel}/tests/test_*_fitz.py'): + log(f'Removing {p=}') + os.remove(p) + if test_fitz: + # Create copies of each test file, modified to use `pymupdf` + # instead of `fitz`. + for p in glob.glob(f'{pymupdf_dir_rel}/tests/test_*.py'): + if os.path.basename(p).startswith('test_fitz_'): + # Don't recursively generate test_fitz_fitz_foo.py, + # test_fitz_fitz_fitz_foo.py, ... etc. + continue + branch, leaf = os.path.split(p) + p2 = f'{branch}/{leaf[:5]}fitz_{leaf[5:]}' + log(f'Converting {p=} to {p2=}.') + with open(p, encoding='utf8') as f: + text = f.read() + text2 = re.sub("([^\'])\\bpymupdf\\b", '\\1fitz', text) + if p.replace(os.sep, '/') == f'{pymupdf_dir_rel}/tests/test_docs_samples.py'.replace(os.sep, '/'): + assert text2 == text + else: + assert text2 != text, f'Unexpectedly unchanged when creating {p!r} => {p2!r}' + with open(p2, 'w', encoding='utf8') as f: + f.write(text2) + try: + log(f'Running tests with tests/run_compound.py and pytest.') + run(command, env_extra=env_extra, timeout=test_timeout) + + except subprocess.TimeoutExpired as e: + log(f'Timeout when running tests.') + raise + finally: + log(f'\n' + f'[As of 2024-10-10 we get warnings from pytest/Python such as:\n' + f' DeprecationWarning: builtin type SwigPyPacked has no __module__ attribute\n' + f'This seems to be due to Swig\'s handling of Py_LIMITED_API.\n' + f'For details see https://github.com/swig/swig/issues/2881.\n' + f']' + ) + log('\n' + venv_info(pytest_args=f'{pytest_options} {pytest_arg}')) + + +def get_pyproject_required(ppt=None): + ''' + Returns space-separated names of required packages in pyproject.toml. We + do not do a proper parse and rely on the packages being in a single line. + ''' + if ppt is None: + ppt = os.path.abspath(f'{__file__}/../../pyproject.toml') + with open(ppt) as f: + for line in f: + m = re.match('^requires = \\[(.*)\\]$', line) + if m: + names = m.group(1).replace(',', ' ').replace('"', '') + return names + else: + assert 0, f'Failed to find "requires" line in {ppt}' + +def wrap_get_requires_for_build_wheel(dir_): + ''' + Returns space-separated list of required + packages. Looks at `dir_`/pyproject.toml and calls + `dir_`/setup.py:get_requires_for_build_wheel(). + ''' + dir_abs = os.path.abspath(dir_) + ret = list() + ppt = os.path.join(dir_abs, 'pyproject.toml') + if os.path.exists(ppt): + ret += get_pyproject_required(ppt) + if os.path.exists(os.path.join(dir_abs, 'setup.py')): + sys.path.insert(0, dir_abs) + try: + from setup import get_requires_for_build_wheel as foo + for i in foo(): + ret.append(i) + finally: + del sys.path[0] + return ' '.join(ret) + + +def venv_in(path=None): + ''' + If path is None, returns true if we are in a venv. Otherwise returns true + only if we are in venv . + ''' + if path: + return os.path.abspath(sys.prefix) == os.path.abspath(path) + else: + return sys.prefix != sys.base_prefix + + +def venv_run(args, path, recreate=True, clean=False): + ''' + Runs command inside venv and returns termination code. + + Args: + args: + List of args. + path: + Name of venv. + recreate: + If false we do not run ` -m venv ` if + already exists. This avoids a delay in the common case where + is already set up, but fails if exists but does not contain + a valid venv. + clean: + If true we first delete . + ''' + if clean: + log(f'Removing any existing venv {path}.') + assert path.startswith('venv-') + shutil.rmtree(path, ignore_errors=1) + if recreate or not os.path.isdir(path): + run(f'{sys.executable} -m venv {path}') + if platform.system() == 'Windows': + command = f'{path}\\Scripts\\activate && python' + # shlex not reliable on Windows. + # Use crude quoting with "...". Seems to work. + for arg in args: + assert '"' not in arg + command += f' "{arg}"' + else: + command = f'. {path}/bin/activate && python {shlex.join(args)}' + e = run(command, check=0) + return e + + +if __name__ == '__main__': + try: + sys.exit(main(sys.argv)) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: + # Terminate relatively quietly, failed commands will usually have + # generated diagnostics. + log(f'{e}') + sys.exit(1) + # Other exceptions should not happen, and will generate a full Python + # backtrace etc here. diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 1a30ba225..43fc16431 --- a/setup.py +++ b/setup.py @@ -1,53 +1,107 @@ +#! /usr/bin/env python3 + ''' Overview: + Build script for PyMuPDF, supporting PEP-517 and simple command-line usage. + We hard-code the URL of the MuPDF .tar.gz file that we require. This generally points to a particular source release on mupdf.com. Default behaviour: Building an sdist: - We download the MuPDF .tar.gz file and embed within the sdist. + As of 2024-002-28 we no longer download the MuPDF .tar.gz file and + embed it within the sdist. Instead it will be downloaded at build + time. Building PyMuPDF: - If we are not in an sdist we first download the mupdf .tar.gz file. + We first download the hard-coded mupdf .tar.gz file. - Then we extract and build MuPDF locally, before setuptools builds - PyMuPDF. So PyMuPDF will always be built with the exact MuPDF + Then we extract and build MuPDF locally, before building PyMuPDF + itself. So PyMuPDF will always be built with the exact MuPDF release that we require. + Environmental variables: + + If building with system MuPDF (PYMUPDF_SETUP_MUPDF_BUILD is empty string): + + CFLAGS + CXXFLAGS + LDFLAGS + Added to c, c++, and link commands. + + PYMUPDF_INCLUDES + Colon-separated extra include paths. + + PYMUPDF_MUPDF_LIB + Directory containing MuPDF libraries, (libmupdf.so, + libmupdfcpp.so). PYMUPDF_SETUP_DEVENV - Location of devenv.com on Windows. If unset we search in some - hard-coded default locations; if that fails we use just 'devenv.com'. + Location of devenv.com on Windows. If unset we search for it - see + wdev.py. if that fails we use just 'devenv.com'. + + PYMUPDF_SETUP_DUMMY + If 1, we build dummy sdist and wheel with no files. + PYMUPDF_SETUP_FLAVOUR + Control building of separate wheels for PyMuPDF. + + Must be unset or a combination of 'p', 'b' and 'd'. + + Default is 'pbd'. + + 'p': + Generated wheel contains PyMuPDF code. + 'b': + Generated wheel contains MuPDF libraries; these are independent of + the Python version. + 'd': + Generated wheel contains includes and libraries for MuPDF. + + If 'p' is included, the generated wheel is called PyMuPDF. + Otherwise if 'b' is included the generated wheel is called PyMuPDFb. + Otherwise if 'd' is included the generated wheel is called PyMuPDFd. + + For example: + + 'pb': a `PyMuPDF` wheel with PyMuPDF runtime files and MuPDF + runtime shared libraries. + + 'b': a `PyMuPDFb` wheel containing MuPDF runtime shared libraries. + + 'pbd' a `PyMuPDF` wheel with PyMuPDF runtime files and MuPDF + runtime shared libraries, plus MuPDF build-time files (includes, + *.lib files on Windows). + + 'd': a `PyMuPDFd` wheel containing MuPDF build-time files + (includes, *.lib files on Windows). + + PYMUPDF_SETUP_LIBCLANG + For internal testing. + PYMUPDF_SETUP_MUPDF_BUILD - If set, overrides location of mupdf when building PyMuPDF: + If unset or '-', use internal hard-coded default MuPDF location. + Otherwise overrides location of MuPDF when building PyMuPDF: Empty string: - Build PyMuPDF with the system mupdf. + Build PyMuPDF with the system MuPDF. A string starting with 'git:': - Use `git clone` to get a mupdf directory. We use the string in - the git clone command; it must contain the git URL from which - to clone, and can also contain other `git clone` args, for - example: - PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master https://github.com/ArtifexSoftware/mupdf.git" + We use `git` commands to clone/update a local MuPDF checkout. + Should match `git:[--branch ][--tag ][]`. + If is omitted we use a default. + For example: + PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master" + Passed as arg to pipcl.git_get(). Otherwise: Location of mupdf directory. - - In addition if MuPDF is a git checkout and the branch is 'master', - PyMuPDF is configured to build with MuPDF master branch, which may - have a slightly different API from the current release banch. - PYMUPDF_SETUP_MUPDF_BUILD_BRANCH - If set to 'master', PyMuPDF is configured to build with MuPDF master - branch, which may have a slightly different API from the current - release banch. - - Other values are ignored. - - This is typically only useful if PYMUPDF_SETUP_MUPDF_BUILD is also set, - and not required if mupdf is a git checkout. + PYMUPDF_SETUP_MUPDF_BSYMBOLIC + If '0' we do not link libmupdf.so with -Bsymbolic. + + PYMUPDF_SETUP_MUPDF_TESSERACT + If '0' we build MuPDF without Tesseract. PYMUPDF_SETUP_MUPDF_BUILD_TYPE Unix only. Controls build type of MuPDF. Supported values are: @@ -58,21 +112,37 @@ PYMUPDF_SETUP_MUPDF_CLEAN Unix only. If '1', we do a clean MuPDF build. + PYMUPDF_SETUP_MUPDF_REFCHECK_IF + Should be preprocessor statement to enable MuPDF reference count + checking. + + As of 2024-09-27, MuPDF default is `#ifndef NDEBUG`. + + PYMUPDF_SETUP_MUPDF_TRACE_IF + Should be preprocessor statement to enable MuPDF runtime diagnostics in + response to environment variables such as MUPDF_trace. + + As of 2024-09-27, MuPDF default is `#ifndef NDEBUG`. + PYMUPDF_SETUP_MUPDF_THIRD If '0' and we are building on Linux with the system MuPDF (i.e. PYMUPDF_SETUP_MUPDF_BUILD=''), then don't link with `-lmupdf-third`. + PYMUPDF_SETUP_MUPDF_VS_UPGRADE + If '1' we run mupdf `scripts/mupdfwrap.py` with `--vs-upgrade 1` to + help Windows builds work with Visual Studio versions newer than 2019. + PYMUPDF_SETUP_MUPDF_TGZ If set, overrides location of MuPDF .tar.gz file: Empty string: Do not download MuPDF .tar.gz file. Sdist's will not contain MuPDF. - + A string containing '://': The URL from which to download the MuPDF .tar.gz file. Leaf must match mupdf-*.tar.gz. - + Otherwise: The path of local mupdf git checkout. We put all files in this checkout known to git into a local tar archive. @@ -83,65 +153,101 @@ PYMUPDF_SETUP_MUPDF_REBUILD If 0 we do not (re)build mupdf. + + PYMUPDF_SETUP_PY_LIMITED_API + If not '0', we build for current Python's stable ABI. + + However if unset and we are on Python-3.13 or later, we do + not build for the stable ABI because as of 2025-03-04 SWIG + generates incorrect stable ABI code with Python-3.13 - see: + https://github.com/swig/swig/issues/3059 + + PYMUPDF_SETUP_URL_WHEEL + If set, we use an existing wheel instead of building a new wheel. + + If starts with `http://` or `https://`: + If ends with '/', we append our wheel name and download. Otherwise + we download directly. + + If starts with `file://`: + If ends with '/' we look for a matching wheel name, `using + pipcl.wheel_name_match()` to cope with differing platform tags, + for example our `manylinux2014_x86_64` will match with an existing + wheel with `manylinux2014_x86_64.manylinux_2_17_x86_64`. + + Any other prefix is an error. -Known build failures: - Linux: - *musllinux*. - Windows: - pp*: - fitz_wrap.obj : error LNK2001: unresolved external symbol PyUnicode_DecodeRawUnicodeEscape + PYMUPDF_SETUP_SWIG + If set, we use this instead of `swig`. + + WDEV_VS_YEAR + If set, we use as Visual Studio year, for example '2019' or '2022'. - When using cibuildwheel, one can avoid building these failing wheels with: - CIBW_SKIP='*musllinux* pp*' + WDEV_VS_GRADE + If set, we use as Visual Studio grade, for example 'Community' or + 'Professional' or 'Enterprise'. ''' import glob -import json +import io import os +import textwrap +import time import platform import re +import shlex import shutil import stat import subprocess import sys import tarfile -import time +import traceback import urllib.request +import zipfile + +import pipcl -from setuptools import Extension, setup -from setuptools.command.build_py import build_py as build_py_orig +log = pipcl.log0 -_log_prefix = None -def log( text): - global _log_prefix - if not _log_prefix: - p = os.path.abspath( __file__) - p, p1 = os.path.split( p) - p, p0 = os.path.split( p) - _log_prefix = os.path.join( p0, p1) - print(f'{_log_prefix}: {text}', file=sys.stderr) - sys.stderr.flush() +run = pipcl.run if 1: # For debugging. - log(f'sys.argv: {sys.argv}') - log(f'os.getcwd(): {os.getcwd()}') - log(f'__file__: {__file__}') - log(f'$PYTHON_ARCH: {os.environ.get("PYTHON_ARCH")!r}') - log(f'os.environ ({len(os.environ)}):') - for k, v in os.environ.items(): - log( f' {k}: {v}') + log(f'### Starting.') + pipcl.show_system() -# setuptools seems to require current directory to be PyMuPDF. -# -assert os.path.abspath( os.getcwd()) == os.path.abspath( f'{__file__}/..'), \ - f'Current directory must be the PyMuPDF directory' +PYMUPDF_SETUP_FLAVOUR = os.environ.get( 'PYMUPDF_SETUP_FLAVOUR', 'pbd') +for i in PYMUPDF_SETUP_FLAVOUR: + assert i in 'pbd', f'Unrecognised flag "{i} in {PYMUPDF_SETUP_FLAVOUR=}. Should be one of "p", "b", "d"' + +g_root = os.path.abspath( f'{__file__}/..') + +# Name of file that identifies that we are in a PyMuPDF sdist. +g_pymupdfb_sdist_marker = 'pymupdfb_sdist' + +python_version_tuple = tuple(int(x) for x in platform.python_version_tuple()[:2]) +PYMUPDF_SETUP_PY_LIMITED_API = os.environ.get('PYMUPDF_SETUP_PY_LIMITED_API') +assert PYMUPDF_SETUP_PY_LIMITED_API in (None, '', '0', '1'), \ + f'Should be "", "0", "1" or undefined: {PYMUPDF_SETUP_PY_LIMITED_API=}.' +if PYMUPDF_SETUP_PY_LIMITED_API is None and python_version_tuple >= (3, 13): + log(f'Not defaulting to Python limited api because {platform.python_version_tuple()=}.') + g_py_limited_api = False +else: + g_py_limited_api = (PYMUPDF_SETUP_PY_LIMITED_API != '0') -def remove(path): +PYMUPDF_SETUP_URL_WHEEL = os.environ.get('PYMUPDF_SETUP_URL_WHEEL') +log(f'{PYMUPDF_SETUP_URL_WHEEL=}') + +PYMUPDF_SETUP_DUMMY = os.environ.get('PYMUPDF_SETUP_DUMMY') +log(f'{PYMUPDF_SETUP_DUMMY=}') + +PYMUPDF_SETUP_SWIG = os.environ.get('PYMUPDF_SETUP_SWIG') + +def _fs_remove(path): ''' Removes file or directory, without raising exception if it doesn't exist. @@ -168,6 +274,23 @@ def error_fn(fn, path, excinfo): assert not os.path.exists( path) +def _git_get_branch( directory): + command = f'cd {directory} && git branch --show-current' + log( f'Running: {command}') + p = subprocess.run( + command, + shell=True, + check=False, + text=True, + stdout=subprocess.PIPE, + ) + ret = None + if p.returncode == 0: + ret = p.stdout.strip() + log( f'Have found MuPDF git branch: ret={ret!r}') + return ret + + def tar_check(path, mode='r:gz', prefix=None, remove=False): ''' Checks items in tar file have same , or if not None. @@ -194,7 +317,7 @@ def tar_check(path, mode='r:gz', prefix=None, remove=False): else: prefix_actual = item[:s+1] if prefix: - assert prefix == prefix_actual, f'prefix={prefix} prefix_actual={prefix_actual}' + assert prefix == prefix_actual, f'{path=} {prefix=} {prefix_actual=}' for item in items[1:]: assert item.startswith( prefix_actual), f'prefix_actual={prefix_actual!r} != item={item!r}' return prefix_actual @@ -202,7 +325,7 @@ def tar_check(path, mode='r:gz', prefix=None, remove=False): def tar_extract(path, mode='r:gz', prefix=None, exists='raise'): ''' - Extracts tar file. + Extracts tar file into single local directory. We fail if items in tar file have different . @@ -239,164 +362,7 @@ def tar_extract(path, mode='r:gz', prefix=None, exists='raise'): return prefix_actual -def get_gitfiles( directory, submodules=False): - ''' - Returns list of all files known to git in ; must be - somewhere within a git checkout. - - Returned names are all relative to . - - If .git exists we use git-ls-files and write list of files to - /jtest-git-files. - - Otherwise we require that /jtest-git-files already exists. - ''' - def is_within_git_checkout( d): - while 1: - #log( 'd={d!r}') - if not d: - break - if os.path.isdir( f'{d}/.git'): - return True - d = os.path.dirname( d) - - if is_within_git_checkout( directory): - command = 'cd ' + directory + ' && git ls-files' - if submodules: - command += ' --recurse-submodules' - command += ' > jtest-git-files' - log( f'Running: {command}') - subprocess.run( command, shell=True, check=True) - - with open( '%s/jtest-git-files' % directory, 'r') as f: - text = f.read() - ret = text.strip().split( '\n') - return ret - - -def word_size(): - ''' - Returns integer word size (32 or 64) of build. - ''' - # Looks like on Windows, cibuildwheel runs us with a 64-bit Python - # interpreter even when building a 32-bit wheel. It appears to set - # PYTHON_ARCH to indicate word size (this isn't documented anywhere?). - # - a = os.environ.get( 'PYTHON_ARCH') - if a is None: - if sys.maxsize == 2**31-1: - return 32 - elif sys.maxsize == 2**63-1: - return 64 - else: - assert 0, 'Unrecognised sys.maxsize={sys.maxsize!r}' - else: - if a == '32': - return 32 - elif a == '64': - return 64 - else: - assert 0, f'Unrecognised $PYTHON_ARCH={a!r}' - - -class build_ext_first(build_py_orig): - """ - custom build_py command which runs build_ext first - this is necessary because build_py needs the fitz.py which is only generated - by SWIG in the build_ext step - """ - def run(self): - self.run_command("build_ext") - return super().run() - - -DEFAULT = ["mupdf"] -if os.environ.get( 'PYMUPDF_SETUP_MUPDF_THIRD') != '0': - DEFAULT.append("mupdf-third") - -ALPINE = DEFAULT + [ - "jbig2dec", - "jpeg", - "openjp2", - "harfbuzz", -] - -ARCH_LINUX = DEFAULT + [ - "jbig2dec", - "openjp2", - "jpeg", - "freetype", - "gumbo", -] - -NIX = ARCH_LINUX + [ - "harfbuzz", -] - -OPENSUSE = NIX + [ - "png16", -] - -DEBIAN = OPENSUSE + [ - "mujs", -] - -FEDORA = NIX + [ - "leptonica", - "tesseract", -] - -LIBRARIES = { - "default": DEFAULT, - "ubuntu": DEBIAN, - "arch": ARCH_LINUX, - "manjaro": ARCH_LINUX, - "artix": ARCH_LINUX, - "opensuse": OPENSUSE, - "fedora": FEDORA, - "alpine": ALPINE, - "nix": NIX, - "debian": DEBIAN, -} - - -def load_libraries(): - if os.getenv("NIX_STORE"): - return LIBRARIES["nix"] - - try: - import distro - - os_id = distro.id() - except: - os_id = "" - if os_id in list(LIBRARIES.keys()) + ["manjaro", "artix"]: - return LIBRARIES[os_id] - - filepath = "/etc/os-release" - if not os.path.exists(filepath): - return LIBRARIES["default"] - regex = re.compile("^([\\w]+)=(?:'|\")?(.*?)(?:'|\")?$") - with open(filepath) as os_release: - info = { - regex.match(line.strip()).group(1): re.sub( - r'\\([$"\'\\`])', r"\1", regex.match(line.strip()).group(2) - ) - for line in os_release - if regex.match(line.strip()) - } - - os_id = info["ID"] - if os_id.startswith("opensuse"): - os_id = "opensuse" - if os_id not in LIBRARIES.keys(): - return LIBRARIES["default"] - return LIBRARIES[os_id] - - - - -def get_git_id( directory): +def git_info( directory): ''' Returns `(sha, comment, diff, branch)`, all items are str or None if not available. @@ -422,486 +388,1090 @@ def get_git_id( directory): ) if cp.returncode == 0: branch = cp.stdout.strip() - log(f'get_git_id(): directory={directory!r} returning branch={branch!r} sha={sha!r} comment={comment!r}') + log(f'git_info(): directory={directory!r} returning branch={branch!r} sha={sha!r} comment={comment!r}') return sha, comment, diff, branch +def git_patch(directory, patch, hard=False): + ''' + Applies string with `git patch` in . + + If is true we clean the tree with `git checkout .` and then apply + the patch. + + Otherwise we apply patch only if it is not already applied; this might fail + if there are conflicting changes in the tree. + ''' + log(f'Applying patch in {directory}:\n{textwrap.indent(patch, " ")}') + if not patch: + return + # Carriage returns break `git apply` so we use `newline='\n'` in open(). + path = os.path.abspath(f'{directory}/pymupdf_patch.txt') + with open(path, 'w', newline='\n') as f: + f.write(patch) + log(f'Using patch file: {path}') + if hard: + run(f'cd {directory} && git checkout .') + run(f'cd {directory} && git apply {path}') + log(f'Have applied patch in {directory}.') + else: + e = run( f'cd {directory} && git apply --check --reverse {path}', check=0) + if e == 0: + log(f'Not patching {directory} because already patched.') + else: + run(f'cd {directory} && git apply {path}') + log(f'Have applied patch in {directory}.') + run(f'cd {directory} && git diff') + + mupdf_tgz = os.path.abspath( f'{__file__}/../mupdf.tgz') +def get_mupdf_internal(out, location=None, local_tgz=None): + ''' + Gets MuPDF as either a .tgz or a local directory. + + Args: + out: + Either 'dir' (we return name of local directory containing mupdf) or 'tgz' (we return + name of local .tgz file containing mupdf). + location: + First, if None we set to hard-coded default URL or git location. + If starts with 'git:', should be remote git location. + Otherwise if containing '://' should be URL for .tgz. + Otherwise should path of local mupdf checkout. + local_tgz: + If not None, must be local .tgz file. + Returns: + (path, location): + `path` is absolute path of local directory or .tgz containing + MuPDF, or None if we are to use system MuPDF. + + `location_out` is `location` if not None, else the hard-coded + default location. + + ''' + log(f'get_mupdf_internal(): {out=} {location=}') + assert out in ('dir', 'tgz') + if location is None: + location = f'https://mupdf.com/downloads/archive/mupdf-{version_mupdf}-source.tar.gz' + #location = 'git:--branch master https://github.com/ArtifexSoftware/mupdf.git' + + if location == '': + # Use system mupdf. + return None, location + + local_dir = None + if local_tgz: + assert os.path.isfile(local_tgz) + elif location.startswith( 'git:'): + local_dir = 'mupdf-git' + pipcl.git_get(local_dir, text=location, remote='https://github.com/ArtifexSoftware/mupdf.git') + + # Show sha of checkout. + run( + f'cd {local_dir} && git show --pretty=oneline|head -n 1', + check = False, + prefix = 'mupdf git id: ', + ) + elif '://' in location: + # Download .tgz. + local_tgz = os.path.basename( location) + suffix = '.tar.gz' + assert location.endswith(suffix), f'Unrecognised suffix in remote URL {location=}.' + name = local_tgz[:-len(suffix)] + log( f'Download {location=} {local_tgz=} {name=}') + if os.path.exists(local_tgz): + try: + tar_check(local_tgz, 'r:gz', prefix=f'{name}/') + except Exception as e: + log(f'Not using existing file {local_tgz} because invalid tar data: {e}') + _fs_remove( local_tgz) + if os.path.exists(local_tgz): + log(f'Not downloading from {location} because already present: {local_tgz!r}') + else: + log(f'Downloading from {location=} to {local_tgz=}.') + urllib.request.urlretrieve( location, local_tgz + '-') + os.rename(local_tgz + '-', local_tgz) + assert os.path.exists( local_tgz) + tar_check( local_tgz, 'r:gz', prefix=f'{name}/') + else: + assert os.path.isdir(location), f'Local MuPDF does not exist: {location=}' + local_dir = location + + assert bool(local_dir) != bool(local_tgz) + if out == 'dir': + if not local_dir: + assert local_tgz + local_dir = tar_extract( local_tgz, exists='return') + return os.path.abspath( local_dir), location + elif out == 'tgz': + if not local_tgz: + # Create .tgz containing git files in `local_dir`. + assert local_dir + if local_dir.endswith( '/'): + local_dir = local_dir[:-1] + top = os.path.basename(local_dir) + local_tgz = f'{local_dir}.tgz' + log( f'Creating .tgz from git files. {top=} {local_dir=} {local_tgz=}') + _fs_remove( local_tgz) + with tarfile.open( local_tgz, 'w:gz') as f: + for name in pipcl.git_items( local_dir, submodules=True): + path = os.path.join( local_dir, name) + if os.path.isfile( path): + path2 = f'{top}/{name}' + log(f'Adding {path=} {path2=}.') + f.add( path, path2, recursive=False) + return os.path.abspath( local_tgz), location + else: + assert 0, f'Unrecognised {out=}' + + + def get_mupdf_tgz(): ''' - Creates .tgz file containing MuPDF source, for inclusion in an sdist. + Creates .tgz file called containing MuPDF source, for inclusion in an + sdist. What we do depends on environmental variable PYMUPDF_SETUP_MUPDF_TGZ; see docs at start of this file for details. Returns name of top-level directory within the .tgz file. ''' - mupdf_url_or_local = os.environ.get( - 'PYMUPDF_SETUP_MUPDF_TGZ', - 'https://mupdf.com/downloads/archive/mupdf-1.22.2-source.tar.gz', - ) - log( f'mupdf_url_or_local={mupdf_url_or_local!r}') - if mupdf_url_or_local == '': - # No mupdf in sdist. - log( 'mupdf_url_or_local is empty string so removing any mupdf_tgz={mupdf_tgz}') - remove( mupdf_tgz) - return - - if '://' in mupdf_url_or_local: - # Download from URL into . - mupdf_url = mupdf_url_or_local - mupdf_url_leaf = os.path.basename( mupdf_url) - leaf = '.tar.gz' - assert mupdf_url_leaf.endswith(leaf), f'Unrecognised suffix in mupdf_url={mupdf_url!r}' - mupdf_local = mupdf_url_leaf[ : -len(leaf)] - assert mupdf_local.startswith( 'mupdf-') - log(f'Downloading from: {mupdf_url}') - remove( mupdf_url_leaf) - urllib.request.urlretrieve( mupdf_url, mupdf_url_leaf) - assert os.path.exists( mupdf_url_leaf) - tar_check( mupdf_url_leaf, 'r:gz', f'{mupdf_local}/') - if mupdf_url_leaf != mupdf_tgz: - remove( mupdf_tgz) - os.rename( mupdf_url_leaf, mupdf_tgz) - return mupdf_local - - else: - # Create archive contining local mupdf directory's git - # files. - mupdf_local = mupdf_url_or_local - if mupdf_local.endswith( '/'): - del mupdf_local[-1] - assert os.path.isdir( mupdf_local), f'Not a directory: {mupdf_local!r}' - log( f'Creating .tgz from git files in: {mupdf_local}') - remove( mupdf_tgz) - with tarfile.open( mupdf_tgz, 'w:gz') as f: - for name in get_gitfiles( mupdf_local, submodules=True): - path = os.path.join( mupdf_local, name) - if os.path.isfile( path): - f.add( path, f'mupdf/{name}', recursive=False) - return mupdf_local - - -def get_mupdf(): + name, location = get_mupdf_internal( 'tgz', os.environ.get('PYMUPDF_SETUP_MUPDF_TGZ')) + return name, location + + +def get_mupdf(path=None, sha=None): ''' - Downloads and/or extracts mupdf and returns location of mupdf directory. + Downloads and/or extracts mupdf and returns (path, location) where `path` + is the local mupdf directory and `location` is where it came from. Exact behaviour depends on environmental variable PYMUPDF_SETUP_MUPDF_BUILD; see docs at start of this file for details. ''' - path = os.environ.get( 'PYMUPDF_SETUP_MUPDF_BUILD') - log( f'PYMUPDF_SETUP_MUPDF_BUILD={path!r}') - if path is None: - # Default. - if os.path.exists( mupdf_tgz): - log( f'mupdf_tgz already exists: {mupdf_tgz}') + m = os.environ.get('PYMUPDF_SETUP_MUPDF_BUILD') + if m == '-': + # This allows easy specification in Github actions. + m = None + if m is None and os.path.isfile(mupdf_tgz): + # This makes us use tgz inside sdist. + log(f'Using local tgz: {mupdf_tgz=}') + return get_mupdf_internal('dir', local_tgz=mupdf_tgz) + return get_mupdf_internal('dir', m) + + +linux = sys.platform.startswith( 'linux') or 'gnu' in sys.platform +openbsd = sys.platform.startswith( 'openbsd') +freebsd = sys.platform.startswith( 'freebsd') +darwin = sys.platform.startswith( 'darwin') +windows = platform.system() == 'Windows' or platform.system().startswith('CYGWIN') +msys2 = platform.system().startswith('MSYS_NT-') + +if os.environ.get('PYODIDE') == '1': + if os.environ.get('OS') != 'pyodide': + log('PYODIDE=1, setting OS=pyodide.') + os.environ['OS'] = 'pyodide' + +pyodide = os.environ.get('OS') == 'pyodide' + +def build(): + ''' + pipcl.py `build_fn()` callback. + ''' + #pipcl.show_sysconfig() + + if PYMUPDF_SETUP_DUMMY == '1': + log(f'{PYMUPDF_SETUP_DUMMY=} Building dummy wheel with no files.') + return list() + + # Download MuPDF. + # + mupdf_local, mupdf_location = get_mupdf() + if mupdf_local: + mupdf_version_tuple = get_mupdf_version(mupdf_local) + # else we cannot determine version this way and do not use it + + build_type = os.environ.get( 'PYMUPDF_SETUP_MUPDF_BUILD_TYPE', 'release') + assert build_type in ('debug', 'memento', 'release'), \ + f'Unrecognised build_type={build_type!r}' + + overwrite_config = os.environ.get('PYMUPDF_SETUP_MUPDF_OVERWRITE_CONFIG', '1') == '1' + + PYMUPDF_SETUP_MUPDF_REFCHECK_IF = os.environ.get('PYMUPDF_SETUP_MUPDF_REFCHECK_IF') + PYMUPDF_SETUP_MUPDF_TRACE_IF = os.environ.get('PYMUPDF_SETUP_MUPDF_TRACE_IF') + + # Build MuPDF shared libraries. + # + if windows: + mupdf_build_dir = build_mupdf_windows( + mupdf_local, + build_type, + overwrite_config, + g_py_limited_api, + PYMUPDF_SETUP_MUPDF_REFCHECK_IF, + PYMUPDF_SETUP_MUPDF_TRACE_IF, + ) + else: + if 'p' not in PYMUPDF_SETUP_FLAVOUR and 'b' not in PYMUPDF_SETUP_FLAVOUR: + # We only need MuPDF headers, so no point building MuPDF. + log(f'Not building MuPDF because not Windows and {PYMUPDF_SETUP_FLAVOUR=}.') + mupdf_build_dir = None else: - get_mupdf_tgz() - return tar_extract( mupdf_tgz, exists='return') + mupdf_build_dir = build_mupdf_unix( + mupdf_local, + build_type, + overwrite_config, + g_py_limited_api, + PYMUPDF_SETUP_MUPDF_REFCHECK_IF, + PYMUPDF_SETUP_MUPDF_TRACE_IF, + PYMUPDF_SETUP_SWIG, + ) + log( f'build(): mupdf_build_dir={mupdf_build_dir!r}') - elif path == '': - # Use system mupdf. - log( f'PYMUPDF_SETUP_MUPDF_BUILD="", using system mupdf') - return None + # Build rebased `extra` module. + # + if 'p' in PYMUPDF_SETUP_FLAVOUR: + path_so_leaf = _build_extension( + mupdf_local, + mupdf_build_dir, + build_type, + g_py_limited_api, + ) + else: + log(f'Not building extension.') + path_so_leaf = None - git_prefix = 'git:' - if path.startswith( git_prefix): - # Get git clone of mupdf. - # - # `mupdf_url_or_local` is taken to be portion of a `git clone` command, - # for example: - # - # PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master git://git.ghostscript.com/mupdf.git" - # PYMUPDF_SETUP_MUPDF_BUILD="git:--branch 1.20.x https://github.com/ArtifexSoftware/mupdf.git" - # PYMUPDF_SETUP_MUPDF_BUILD="git:--branch master https://github.com/ArtifexSoftware/mupdf.git" - # - # One would usually also set PYMUPDF_SETUP_MUPDF_TGZ= (empty string) to - # avoid the need to download a .tgz into an sdist. - # - command_suffix = path[ len(git_prefix):] - path = 'mupdf' + # Generate list of (from, to) items to return to pipcl. What we add depends + # on PYMUPDF_SETUP_FLAVOUR. + # + ret = list() + def add(flavour, from_, to_): + assert flavour in 'pbd' + if flavour in PYMUPDF_SETUP_FLAVOUR: + ret.append((from_, to_)) + + to_dir = 'pymupdf/' + to_dir_d = f'{to_dir}/mupdf-devel' + + # Add implementation files. + add('p', f'{g_root}/src/__init__.py', to_dir) + add('p', f'{g_root}/src/__main__.py', to_dir) + add('p', f'{g_root}/src/pymupdf.py', to_dir) + add('p', f'{g_root}/src/table.py', to_dir) + add('p', f'{g_root}/src/utils.py', to_dir) + add('p', f'{g_root}/src/_wxcolors.py', to_dir) + add('p', f'{g_root}/src/_apply_pages.py', to_dir) + add('p', f'{g_root}/src/build/extra.py', to_dir) + if path_so_leaf: + add('p', f'{g_root}/src/build/{path_so_leaf}', to_dir) + + # Add support for `fitz` backwards compatibility. + add('p', f'{g_root}/src/fitz___init__.py', 'fitz/__init__.py') + add('p', f'{g_root}/src/fitz_table.py', 'fitz/table.py') + add('p', f'{g_root}/src/fitz_utils.py', 'fitz/utils.py') + + if mupdf_local: + # Add MuPDF Python API. + add('p', f'{mupdf_build_dir}/mupdf.py', to_dir) + + # Add MuPDF shared libraries. + if windows: + wp = pipcl.wdev.WindowsPython() + add('p', f'{mupdf_build_dir}/_mupdf.pyd', to_dir) + add('b', f'{mupdf_build_dir}/mupdfcpp{wp.cpu.windows_suffix}.dll', to_dir) + + # Add Windows .lib files. + mupdf_build_dir2 = _windows_lib_directory(mupdf_local, build_type) + add('d', f'{mupdf_build_dir2}/mupdfcpp{wp.cpu.windows_suffix}.lib', f'{to_dir_d}/lib/') + if mupdf_version_tuple >= (1, 26): + # MuPDF-1.25+ language bindings build also builds libmuthreads. + add('d', f'{mupdf_build_dir2}/libmuthreads.lib', f'{to_dir_d}/lib/') + elif darwin: + add('p', f'{mupdf_build_dir}/_mupdf.so', to_dir) + add('b', f'{mupdf_build_dir}/libmupdfcpp.so', to_dir) + add('b', f'{mupdf_build_dir}/libmupdf.dylib', to_dir) + add('d', f'{mupdf_build_dir}/libmupdf-threads.a', f'{to_dir_d}/lib/') + elif pyodide: + add('p', f'{mupdf_build_dir}/_mupdf.so', to_dir) + add('b', f'{mupdf_build_dir}/libmupdfcpp.so', to_dir) + add('b', f'{mupdf_build_dir}/libmupdf.so', to_dir) + else: + add('p', f'{mupdf_build_dir}/_mupdf.so', to_dir) + add('b', pipcl.get_soname(f'{mupdf_build_dir}/libmupdfcpp.so'), to_dir) + add('b', pipcl.get_soname(f'{mupdf_build_dir}/libmupdf.so'), to_dir) + add('d', f'{mupdf_build_dir}/libmupdf-threads.a', f'{to_dir_d}/lib/') + + if 'd' in PYMUPDF_SETUP_FLAVOUR: + # Add MuPDF C and C++ headers to `ret_d`. Would prefer to use + # pipcl.git_items() but hard-coded mupdf tree is not a git + # checkout. + # + for root in ( + f'{mupdf_local}/include', + f'{mupdf_local}/platform/c++/include', + ): + for dirpath, dirnames, filenames in os.walk(root): + for filename in filenames: + if not filename.endswith('.h'): + continue + header_abs = os.path.join(dirpath, filename) + assert header_abs.startswith(root) + header_rel = header_abs[len(root)+1:] + add('d', f'{header_abs}', f'{to_dir_d}/include/{header_rel}') + + # Add a .py file containing location of MuPDF. + try: + sha, comment, diff, branch = git_info(g_root) + except Exception as e: + log(f'Failed to get git information: {e}') + sha, comment, diff, branch = (None, None, None, None) + swig = PYMUPDF_SETUP_SWIG or 'swig' + swig_version_text = run(f'{swig} --version', capture=1) + m = re.search('\nSWIG Version ([^\n]+)', swig_version_text) + log(f'{swig_version_text=}') + assert m, f'Unrecognised {swig_version_text=}' + swig_version = m.group(1) + def int_or_0(text): + try: + return int(text) + except Exception: + return 0 + swig_version_tuple = tuple(int_or_0(i) for i in swig_version.split('.')) + version_p_tuple = tuple(int_or_0(i) for i in version_p.split('.')) + log(f'{swig_version=}') + text = '' + text += f'mupdf_location = {mupdf_location!r}\n' + text += f'pymupdf_version = {version_p!r}\n' + text += f'pymupdf_version_tuple = {version_p_tuple!r}\n' + text += f'pymupdf_git_sha = {sha!r}\n' + text += f'pymupdf_git_diff = {diff!r}\n' + text += f'pymupdf_git_branch = {branch!r}\n' + text += f'swig_version = {swig_version!r}\n' + text += f'swig_version_tuple = {swig_version_tuple!r}\n' + add('p', text.encode(), f'{to_dir}/_build.py') + + # Add single README file. + if 'p' in PYMUPDF_SETUP_FLAVOUR: + add('p', f'{g_root}/README.md', '$dist-info/README.md') + elif 'b' in PYMUPDF_SETUP_FLAVOUR: + add('b', f'{g_root}/READMEb.md', '$dist-info/README.md') + elif 'd' in PYMUPDF_SETUP_FLAVOUR: + add('d', f'{g_root}/READMEd.md', '$dist-info/README.md') + + return ret + + +def env_add(env, name, value, sep=' ', prepend=False, verbose=False): + ''' + Appends/prepends `` to `env[name]`. + + If `name` is not in `env`, we use os.environ[name] if it exists. + ''' + v = env.get(name) + if verbose: + log(f'Initally: {name}={v!r}') + if v is None: + v = os.environ.get(name) + if v is None: + env[ name] = value + else: + if prepend: + env[ name] = f'{value}{sep}{v}' + else: + env[ name] = f'{v}{sep}{value}' + if verbose: + log(f'Returning with {name}={env[name]!r}') + + +def build_mupdf_windows( + mupdf_local, + build_type, + overwrite_config, + g_py_limited_api, + PYMUPDF_SETUP_MUPDF_REFCHECK_IF, + PYMUPDF_SETUP_MUPDF_TRACE_IF, + ): + + assert mupdf_local + + if overwrite_config: + mupdf_config_h = f'{mupdf_local}/include/mupdf/fitz/config.h' + prefix = '#define TOFU_CJK_EXT 1 /* PyMuPDF override. */\n' + with open(mupdf_config_h) as f: + text = f.read() + if text.startswith(prefix): + print(f'Not modifying {mupdf_config_h} because already has prefix {prefix!r}.') + else: + print(f'Prefixing {mupdf_config_h} with {prefix!r}.') + text = prefix + text + st = os.stat(mupdf_config_h) + with open(mupdf_config_h, 'w') as f: + f.write(text) + os.utime(mupdf_config_h, (st.st_atime, st.st_mtime)) - # Remove any existing directory to avoid the clone failing. (We could - # assume any existing directory is a git checkout, and do `git pull` or - # similar, but that's complicated and fragile.) - # - remove(path) + wp = pipcl.wdev.WindowsPython() + tesseract = '' if os.environ.get('PYMUPDF_SETUP_MUPDF_TESSERACT') == '0' else 'tesseract-' + windows_build_tail = f'build\\shared-{tesseract}{build_type}' + if g_py_limited_api: + windows_build_tail += f'-Py_LIMITED_API_{pipcl.current_py_limited_api()}' + windows_build_tail += f'-x{wp.cpu.bits}-py{wp.version}' + windows_build_dir = f'{mupdf_local}\\{windows_build_tail}' + #log( f'Building mupdf.') + devenv = os.environ.get('PYMUPDF_SETUP_DEVENV') + if not devenv: + try: + # Prefer VS-2022 as that is what Github provide in windows-2022. + log(f'Looking for Visual Studio 2022.') + vs = pipcl.wdev.WindowsVS(year=2022) + except Exception as e: + log(f'Failed to find VS-2022:\n' + f'{textwrap.indent(traceback.format_exc(), " ")}' + ) + log(f'Looking for any Visual Studio.') + vs = pipcl.wdev.WindowsVS() + log(f'vs:\n{vs.description_ml(" ")}') + devenv = vs.devenv + if not devenv: + devenv = 'devenv.com' + log( f'Cannot find devenv.com in default locations, using: {devenv!r}') + command = f'cd "{mupdf_local}" && "{sys.executable}" ./scripts/mupdfwrap.py' + if os.environ.get('PYMUPDF_SETUP_MUPDF_VS_UPGRADE') == '1': + command += ' --vs-upgrade 1' - command = ('' - + f'git clone' - + f' --recursive' - #+ f' --single-branch' - #+ f' --recurse-submodules' - + f' --depth 1' - + f' --shallow-submodules' - #+ f' --branch {branch}' - #+ f' git://git.ghostscript.com/mupdf.git' - + f' {command_suffix}' - + f' {path}' - ) - log( f'Running: {command}') + # Would like to simply do f'... --devenv {shutil.quote(devenv)}', but + # it looks like if `devenv` has spaces then `shutil.quote()` puts it + # inside single quotes, which then appear to be ignored when run by + # subprocess.run(). + # + # So instead we strip any enclosing quotes and the enclose with + # double-quotes. + # + if len(devenv) >= 2: + for q in '"', "'": + if devenv.startswith( q) and devenv.endswith( q): + devenv = devenv[1:-1] + command += f' -d {windows_build_tail}' + command += f' -b' + if PYMUPDF_SETUP_MUPDF_REFCHECK_IF: + command += f' --refcheck-if "{PYMUPDF_SETUP_MUPDF_REFCHECK_IF}"' + if PYMUPDF_SETUP_MUPDF_TRACE_IF: + command += f' --trace-if "{PYMUPDF_SETUP_MUPDF_TRACE_IF}"' + command += f' --devenv "{devenv}"' + command += f' all' + if os.environ.get( 'PYMUPDF_SETUP_MUPDF_REBUILD') == '0': + log( f'PYMUPDF_SETUP_MUPDF_REBUILD is "0" so not building MuPDF; would have run: {command}') + else: + log( f'Building MuPDF by running: {command}') subprocess.run( command, shell=True, check=True) + log( f'Finished building mupdf.') + + return windows_build_dir - # Show sha of checkout. - command = f'cd {path} && git show --pretty=oneline|head -n 1' - log( f'Running: {command}') - subprocess.run( command, shell=True, check=False) - - if 1: - # Use custom mupdf directory. - log( f'Using custom mupdf directory from $PYMUPDF_SETUP_MUPDF_BUILD: {path}') - assert os.path.isdir( path), f'$PYMUPDF_SETUP_MUPDF_BUILD is not a directory: {path}' - return path - - -include_dirs = [] -library_dirs = [] -libraries = [] -extra_swig_args = [] -extra_link_args = [] -extra_compile_args = [] - -log( f'platform.system()={platform.system()!r}') -log( f'sys.platform={sys.platform!r}') - -linux = platform.system() == 'Linux' -openbsd = platform.system() == 'OpenBSD' -freebsd = platform.system() == 'FreeBSD' -darwin = platform.system() == 'Darwin' -windows = platform.system() == 'Windows' or platform.system().startswith('CYGWIN') + +def _windows_lib_directory(mupdf_local, build_type): + ret = f'{mupdf_local}/platform/win32/' + if _cpu_bits() == 64: + ret += 'x64/' + if build_type == 'release': + ret += 'Release/' + elif build_type == 'debug': + ret += 'Debug/' + else: + assert 0, f'Unrecognised {build_type=}.' + return ret -if 'sdist' in sys.argv: - # Create local mupdf.tgz, for inclusion in sdist. - get_mupdf_tgz() +def _cpu_bits(): + if sys.maxsize == 2**31 - 1: + return 32 + return 64 -if ('-h' not in sys.argv and '--help' not in sys.argv - and (0 - or 'bdist_wheel' in sys.argv - or 'build' in sys.argv - or 'bdist' in sys.argv - or 'install' in sys.argv - ) +def build_mupdf_unix( + mupdf_local, + build_type, + overwrite_config, + g_py_limited_api, + PYMUPDF_SETUP_MUPDF_REFCHECK_IF, + PYMUPDF_SETUP_MUPDF_TRACE_IF, + PYMUPDF_SETUP_SWIG, ): + ''' + Builds MuPDF. - # Build MuPDF before setuptools runs, so that it can link with the MuPDF - # libraries. - # - mupdf_local = get_mupdf() - if mupdf_local: - if mupdf_local.endswith( '/'): - mupdf_local = mupdf_local[:-1] - - log( f'mupdf_local={mupdf_local!r}') - unix_build_dir = None - - # Always force clean build of PyMuPDF SWIG files etc, because setuptools - # doesn't seem to notice when our mupdf headers etc are newer than the - # SWIG-generated files. - # - remove( os.path.abspath( f'{__file__}/../build/')) - remove( os.path.abspath( f'{__file__}/../install/')) + Args: + mupdf_local: + Path of MuPDF directory or None if we are using system MuPDF. - if mupdf_local: - # Build MuPDF before deferring to setuptools.setup(). - # - - log( f'Building mupdf.') - # Copy PyMuPDF's config file into mupdf. For example it #define's TOFU, - # which excludes various fonts in the MuPDF binaries. - if os.environ.get('PYMUPDF_SETUP_MUPDF_OVERWRITE_CONFIG') == '0': - # Use MuPDF default config. - log( f'Not copying fitz/_config.h to {mupdf_local}/include/mupdf/fitz/config.h.') - s = os.stat( f'{mupdf_local}/include/mupdf/fitz/config.h') - log( f'{mupdf_local}/include/mupdf/fitz/config.h: {s} mtime={time.strftime("%F-%T", time.gmtime(s.st_mtime))}') - else: - # Use our special config in MuPDF. - log( f'Copying fitz/_config.h to {mupdf_local}/include/mupdf/fitz/config.h') - shutil.copy2( 'fitz/_config.h', f'{mupdf_local}/include/mupdf/fitz/config.h') + Returns the absolute path of build directory within MuPDF, e.g. + `.../mupdf/build/pymupdf-shared-release`, or `None` if we are using the + system MuPDF. + ''' + if not mupdf_local: + log( f'Using system mupdf.') + return None + + env = dict() + if overwrite_config: + # By predefining TOFU_CJK_EXT here, we don't need to modify + # MuPDF's include/mupdf/fitz/config.h. + log( f'Setting XCFLAGS and XCXXFLAGS to predefine TOFU_CJK_EXT.') + env_add(env, 'XCFLAGS', '-DTOFU_CJK_EXT') + env_add(env, 'XCXXFLAGS', '-DTOFU_CJK_EXT') + + if openbsd or freebsd: + env_add(env, 'CXX', 'c++', ' ') - if windows: - # Windows build. - devenv = os.environ.get('PYMUPDF_SETUP_DEVENV') - log( 'PYMUPDF_SETUP_DEVENV={PYMUPDF_SETUP_DEVENV!r}') - if not devenv: - # Search for devenv in some known locations. - devenv = glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/2019/*/Common7/IDE/devenv.com') - if devenv: - devenv = devenv[0] - if not devenv: - devenv = 'devenv.com' - log( f'Cannot find devenv.com in default locations, using: {devenv!r}') - windows_config = 'Win32' if word_size()==32 else 'x64' - command = ( - f'cd {mupdf_local}&&' - f'"{devenv}"' - f' platform/win32/mupdf.sln' - f' /Build "ReleaseTesseract|{windows_config}"' - f' /Project mupdf' - ) - else: - # Unix build. + if darwin and os.environ.get('GITHUB_ACTIONS') == 'true': + if os.environ.get('ImageOS') == 'macos13': + # On Github macos13 we need to use Clang/LLVM (Homebrew) 15.0.7, + # otherwise mupdf:thirdparty/tesseract/src/api/baseapi.cpp fails to + # compile with: # - flags = 'HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no HAVE_LEPTONICA=yes HAVE_TESSERACT=yes' - flags += ' verbose=yes' - env = '' - make = 'make' - if linux: - env += ' CFLAGS="-fPIC"' - if openbsd or freebsd: - make = 'gmake' - env += ' CFLAGS="-fPIC" CXX=clang++' - - unix_build_type = os.environ.get( 'PYMUPDF_SETUP_MUPDF_BUILD_TYPE', 'release') - assert unix_build_type in ('debug', 'memento', 'release') - flags += f' build={unix_build_type}' - - # This is for MacOS cross-compilation, where ARCHFLAGS can be - # '-arch arm64'. + # thirdparty/tesseract/src/api/baseapi.cpp:150:25: error: 'recursive_directory_iterator' is unavailable: introduced in macOS 10.15 # - archflags = os.environ.get( 'ARCHFLAGS') - if archflags: - flags += f' XCFLAGS="{archflags}" XLIBS="{archflags}"' - - # We specify a build directory path containing 'pymupdf' so that we - # coexist with non-pymupdf builds (because pymupdf builds have a - # different config.h). + # See: + # https://github.com/actions/runner-images/blob/main/images/macos/macos-13-Readme.md # - # We also append further text to try to allow different builds to - # work if they reuse the mupdf directory. - # - # Using platform.machine() (e.g. 'amd64') ensures that different - # builds of mupdf on a shared filesystem can coexist. Using - # $_PYTHON_HOST_PLATFORM allows cross-compiled cibuildwheel builds - # to coexist, e.g. on github. - # - build_prefix = f'pymupdf-{platform.machine()}-' - build_prefix_extra = os.environ.get( '_PYTHON_HOST_PLATFORM') - if build_prefix_extra: - build_prefix += f'{build_prefix_extra}-' - flags += f' build_prefix={build_prefix}' - - unix_build_dir = f'{mupdf_local}/build/{build_prefix}{unix_build_type}' - - if os.environ.get( 'PYMUPDF_SETUP_MUPDF_CLEAN') == '1': - # Force clean build. - log(f'Removing {unix_build_dir} because PYMUPDF_SETUP_MUPDF_CLEAN=1') - assert '/build/' in unix_build_dir - remove(unix_build_dir) - - command = f'cd {mupdf_local} && {env} {make} {flags}' - command += f' && echo {unix_build_dir}:' - command += f' && ls -l build/{build_prefix}{unix_build_type}' - - if os.environ.get( 'PYMUPDF_SETUP_MUPDF_REBUILD') == '0': - log( f'PYMUPDF_SETUP_MUPDF_REBUILD is "0" so not building MuPDF; would have run: {command}') - else: - log( f'Building MuPDF by running: {command}') - subprocess.run( command, shell=True, check=True) - log( f'Finished building mupdf.') - else: - # Use installed MuPDF. - log( f'Using system mupdf.') - unix_build_type = '' + log(f'Using llvm@15 clang and clang++') + cl15 = pipcl.run(f'brew --prefix llvm@15', capture=1) + log(f'{cl15=}') + cl15 = cl15.strip() + pipcl.run(f'ls -lL {cl15}') + pipcl.run(f'ls -lL {cl15}/bin') + cc = f'{cl15}/bin/clang' + cxx = f'{cl15}/bin/clang++' + env['CC'] = cc + env['CXX'] = cxx - # Set include and library paths for building PyMuPDF. + # Show compiler versions. + cc = env.get('CC', 'cc') + cxx = env.get('CXX', 'c++') + pipcl.run(f'{cc} --version') + pipcl.run(f'{cxx} --version') + + # Add extra flags for MacOS cross-compilation, where ARCHFLAGS can be + # '-arch arm64'. + # + archflags = os.environ.get( 'ARCHFLAGS') + if archflags: + env_add(env, 'XCFLAGS', archflags) + env_add(env, 'XLIBS', archflags) + + mupdf_version_tuple = get_mupdf_version(mupdf_local) + + # We specify a build directory path containing 'pymupdf' so that we + # coexist with non-PyMuPDF builds (because PyMuPDF builds have a + # different config.h). + # + # We also append further text to try to allow different builds to + # work if they reuse the mupdf directory. # - # We also add MuPDF's include directory to include path for Swig so that - # fitz/fitz.i can do `%include "mupdf/fitz/version.h"` and .i code can use - # `#if` with FZ_VERSION_* macros. + # Using platform.machine() (e.g. 'amd64') ensures that different + # builds of mupdf on a shared filesystem can coexist. Using + # $_PYTHON_HOST_PLATFORM allows cross-compiled cibuildwheel builds + # to coexist, e.g. on github. # + # Have experimented with looking at getconf_ARG_MAX to decide whether to + # omit `PyMuPDF-` from the build directory, to avoid command-too-long + # errors with mupdf-1.26. But it seems that `getconf ARG_MAX` returns + # a system limit, not the actual limit of the current shell, and there + # doesn't seem to be a way to find the current shell's limit. + # + build_prefix = f'PyMuPDF-' + if mupdf_version_tuple >= (1, 26): + # Avoid link command length problems seen on musllinux. + build_prefix = '' + if pyodide: + build_prefix += 'pyodide-' + else: + build_prefix += f'{platform.machine()}-' + build_prefix_extra = os.environ.get( '_PYTHON_HOST_PLATFORM') + if build_prefix_extra: + build_prefix += f'{build_prefix_extra}-' + build_prefix += 'shared-' + if msys2: + # Error in mupdf/scripts/tesseract/endianness.h: + # #error "I don't know what architecture this is!" + log(f'msys2: building MuPDF without tesseract.') + elif os.environ.get('PYMUPDF_SETUP_MUPDF_TESSERACT') == '0': + log(f'PYMUPDF_SETUP_MUPDF_TESSERACT=0 so building mupdf without tesseract.') + else: + build_prefix += 'tesseract-' + if ( + linux + and os.environ.get('PYMUPDF_SETUP_MUPDF_BSYMBOLIC', '1') == '1' + ): + log(f'Appending `bsymbolic-` to MuPDF build path.') + build_prefix += 'bsymbolic-' + log(f'{g_py_limited_api=}') + if g_py_limited_api: + build_prefix += f'Py_LIMITED_API_{pipcl.current_py_limited_api()}-' + unix_build_dir = f'{mupdf_local}/build/{build_prefix}{build_type}' + PYMUPDF_SETUP_MUPDF_CLEAN = os.environ.get('PYMUPDF_SETUP_MUPDF_CLEAN') + if PYMUPDF_SETUP_MUPDF_CLEAN == '1': + log(f'{PYMUPDF_SETUP_MUPDF_CLEAN=}, deleting {unix_build_dir=}.') + shutil.rmtree(unix_build_dir, ignore_errors=1) + # We need MuPDF's Python bindings, so we build MuPDF with + # `mupdf/scripts/mupdfwrap.py` instead of running `make`. + # + command = f'cd {mupdf_local} &&' + for n, v in env.items(): + command += f' {n}={shlex.quote(v)}' + command += f' {sys.executable} ./scripts/mupdfwrap.py' + if PYMUPDF_SETUP_SWIG: + command += f' --swig {shlex.quote(PYMUPDF_SETUP_SWIG)}' + command += f' -d build/{build_prefix}{build_type} -b' + #command += f' --m-target libs' + if PYMUPDF_SETUP_MUPDF_REFCHECK_IF: + command += f' --refcheck-if "{PYMUPDF_SETUP_MUPDF_REFCHECK_IF}"' + if PYMUPDF_SETUP_MUPDF_TRACE_IF: + command += f' --trace-if "{PYMUPDF_SETUP_MUPDF_TRACE_IF}"' + if 'p' in PYMUPDF_SETUP_FLAVOUR: + command += ' all' + else: + command += ' m01' # No need for C++/Python bindings. + command += f' && echo {unix_build_dir}:' + command += f' && ls -l {unix_build_dir}' + + if os.environ.get( 'PYMUPDF_SETUP_MUPDF_REBUILD') == '0': + log( f'PYMUPDF_SETUP_MUPDF_REBUILD is "0" so not building MuPDF; would have run: {command}') + else: + log( f'Building MuPDF by running: {command}') + subprocess.run( command, shell=True, check=True) + log( f'Finished building mupdf.') + + return unix_build_dir + + +def get_mupdf_version(mupdf_dir): + path = f'{mupdf_dir}/include/mupdf/fitz/version.h' + with open(path) as f: + text = f.read() + v0 = re.search('#define FZ_VERSION_MAJOR ([0-9]+)', text) + v1 = re.search('#define FZ_VERSION_MINOR ([0-9]+)', text) + v2 = re.search('#define FZ_VERSION_PATCH ([0-9]+)', text) + assert v0 and v1 and v2, f'Cannot find MuPDF version numbers in {path=}.' + v0 = int(v0.group(1)) + v1 = int(v1.group(1)) + v2 = int(v2.group(1)) + return v0, v1, v2 + +def _fs_update(text, path): + try: + with open( path) as f: + text0 = f.read() + except OSError: + text0 = None + print(f'path={path!r} text==text0={text==text0!r}') + if text != text0: + with open( path, 'w') as f: + f.write( text) + + +def _build_extension( mupdf_local, mupdf_build_dir, build_type, g_py_limited_api): + ''' + Builds Python extension module `_extra`. + + Returns leafname of the generated shared libraries within mupdf_build_dir. + ''' + (compiler_extra, linker_extra, includes, defines, optimise, debug, libpaths, libs, libraries) \ + = _extension_flags( mupdf_local, mupdf_build_dir, build_type) + log(f'_build_extension(): {g_py_limited_api=} {defines=}') if mupdf_local: - assert os.path.isdir( mupdf_local), f'Not a directory: {mupdf_local!r}' - include_dirs.append( f'{mupdf_local}/include') - include_dirs.append( f'{mupdf_local}/include/mupdf') - include_dirs.append( f'{mupdf_local}/thirdparty/freetype/include') - if unix_build_dir: - library_dirs.append( unix_build_dir) - extra_swig_args.append(f'-I{mupdf_local}/include') - - if mupdf_local and (linux or openbsd or freebsd): - # setuptools' link command always seems to put '-L - # /usr/local/lib' before any that we specify, - # so '-l mupdf -l mupdf-third' will end up using the system - # libmupdf.so (if installed) instead of the one we've built in - # . - # - # So we force linking with our mupdf libraries by specifying - # them in . - # - extra_link_args.append( f'{unix_build_dir}/libmupdf.a') - extra_link_args.append( f'{unix_build_dir}/libmupdf-third.a') - library_dirs = [] - libraries = [] - if openbsd or freebsd: - if os.environ.get( 'PYMUPDF_SETUP_MUPDF_BUILD_TYPE') == 'memento': - extra_link_args.append( f'-lexecinfo') - - elif mupdf_local and darwin: - library_dirs.append(f'{unix_build_dir}') + includes = ( + f'{mupdf_local}/platform/c++/include', + f'{mupdf_local}/include', + ) + + # Build rebased extension module. + log('Building PyMuPDF rebased.') + compile_extra_cpp = '' + if darwin: + # Avoids `error: cannot pass object of non-POD type + # 'std::nullptr_t' through variadic function; call will abort at + # runtime` when compiling `mupdf::pdf_dict_getl(..., nullptr)`. + compile_extra_cpp += ' -Wno-non-pod-varargs' + # Avoid errors caused by mupdf's C++ bindings' exception classes + # not having `nothrow` to match the base exception class. + compile_extra_cpp += ' -std=c++14' + if windows: + wp = pipcl.wdev.WindowsPython() + libs = f'mupdfcpp{wp.cpu.windows_suffix}.lib' + else: + libs = ('mupdf', 'mupdfcpp') libraries = [ - f'mupdf', - f'mupdf-third', + f'{mupdf_build_dir}/libmupdf.so' + f'{mupdf_build_dir}/libmupdfcpp.so' ] - elif linux: - # Use system libraries. - include_dirs.append( '/usr/include/mupdf') - include_dirs.append( '/usr/local/include/mupdf') - include_dirs.append( '/usr/include/freetype2') - libraries = load_libraries() - extra_link_args = [] - extra_swig_args.append(f'-I/usr/local/include') - extra_swig_args.append(f'-I/usr/include') - - elif darwin or openbsd or freebsd: - # Use system libraries. - include_dirs.append("/usr/local/include/mupdf") - include_dirs.append("/usr/local/include") - include_dirs.append("/opt/homebrew/include/mupdf") - library_dirs.append("/usr/local/lib") - libraries = ["mupdf", "mupdf-third"] - library_dirs.append("/opt/homebrew/lib") - - include_dirs.append("/usr/include/freetype2") - include_dirs.append("/usr/local/include/freetype2") - include_dirs.append("/usr/X11R6/include/freetype2") - include_dirs.append("/opt/homebrew/include") - include_dirs.append("/opt/homebrew/include/freetype2") + path_so_leaf = pipcl.build_extension( + name = 'extra', + path_i = f'{g_root}/src/extra.i', + outdir = f'{g_root}/src/build', + includes = includes, + defines = defines, + libpaths = libpaths, + libs = libs, + compiler_extra = compiler_extra + compile_extra_cpp, + linker_extra = linker_extra, + optimise = optimise, + debug = debug, + prerequisites_swig = None, + prerequisites_compile = f'{mupdf_local}/include', + prerequisites_link = libraries, + py_limited_api = g_py_limited_api, + swig = PYMUPDF_SETUP_SWIG, + ) + + return path_so_leaf + + +def _extension_flags( mupdf_local, mupdf_build_dir, build_type): + ''' + Returns various flags to pass to pipcl.build_extension(). + ''' + compiler_extra = '' + linker_extra = '' + if build_type == 'memento': + compiler_extra += ' -DMEMENTO' + if mupdf_build_dir: + mupdf_build_dir_flags = os.path.basename( mupdf_build_dir).split( '-') + else: + mupdf_build_dir_flags = [build_type] + optimise = 'release' in mupdf_build_dir_flags + debug = 'debug' in mupdf_build_dir_flags + r_extra = '' + defines = list() + if windows: + defines.append('FZ_DLL_CLIENT') + wp = pipcl.wdev.WindowsPython() + if os.environ.get('PYMUPDF_SETUP_MUPDF_VS_UPGRADE') == '1': + # MuPDF C++ build uses a parallel build tree with updated VS files. + infix = 'win32-vs-upgrade' + else: + infix = 'win32' + build_type_infix = 'Debug' if debug else 'Release' + libpaths = ( + f'{mupdf_local}\\platform\\{infix}\\{wp.cpu.windows_subdir}{build_type_infix}', + f'{mupdf_local}\\platform\\{infix}\\{wp.cpu.windows_subdir}{build_type_infix}Tesseract', + ) + libs = f'mupdfcpp{wp.cpu.windows_suffix}.lib' + libraries = f'{mupdf_local}\\platform\\{infix}\\{wp.cpu.windows_subdir}{build_type_infix}\\{libs}' + compiler_extra = '' + else: + libs = ['mupdf'] + compiler_extra += ( + ' -Wall' + ' -Wno-deprecated-declarations' + ' -Wno-unused-const-variable' + ) + if mupdf_local: + libpaths = (mupdf_build_dir,) + libraries = f'{mupdf_build_dir}/{libs[0]}' + if openbsd: + compiler_extra += ' -Wno-deprecated-declarations' + else: + libpaths = os.environ.get('PYMUPDF_MUPDF_LIB') + libraries = None + if libpaths: + libpaths = libpaths.split(':') + + if mupdf_local: + includes = ( + f'{mupdf_local}/include', + f'{mupdf_local}/include/mupdf', + f'{mupdf_local}/thirdparty/freetype/include', + ) + else: + # Use system MuPDF. + includes = list() + pi = os.environ.get('PYMUPDF_INCLUDES') + if pi: + includes += pi.split(':') + pmi = os.environ.get('PYMUPDF_MUPDF_INCLUDE') + if pmi: + includes.append(pmi) + ldflags = os.environ.get('LDFLAGS') + if ldflags: + linker_extra += f' {ldflags}' + cflags = os.environ.get('CFLAGS') + if cflags: + compiler_extra += f' {cflags}' + cxxflags = os.environ.get('CXXFLAGS') + if cxxflags: + compiler_extra += f' {cxxflags}' + + return compiler_extra, linker_extra, includes, defines, optimise, debug, libpaths, libs, libraries, + + +def sdist(): + ret = list() + if PYMUPDF_SETUP_DUMMY == '1': + return ret + + if PYMUPDF_SETUP_FLAVOUR == 'b': + # Create a minimal sdist that will build/install a dummy PyMuPDFb. + for p in ( + 'setup.py', + 'pipcl.py', + 'wdev.py', + 'pyproject.toml', + ): + ret.append(p) + ret.append( + ( + b'This file indicates that we are a PyMuPDFb sdist and should build/install a dummy PyMuPDFb package.\n', + g_pymupdfb_sdist_marker, + ) + ) + return ret - extra_swig_args.append(f'-I/usr/local/include') - extra_swig_args.append(f'-I/opt/homebrew/include') - - library_dirs.append("/opt/homebrew/lib") - - if freebsd: - libraries += [ - 'freetype', - 'harfbuzz', - ] - - elif windows: - # Windows. - assert mupdf_local - if word_size() == 32: - library_dirs.append( f'{mupdf_local}/platform/win32/ReleaseTesseract') - library_dirs.append( f'{mupdf_local}/platform/win32/Release') + for p in pipcl.git_items( g_root): + if p.startswith( + ( + 'docs/', + 'signatures/', + '.', + ) + ): + pass else: - library_dirs.append( f'{mupdf_local}/platform/win32/x64/ReleaseTesseract') - library_dirs.append( f'{mupdf_local}/platform/win32/x64/Release') - libraries = [ - "libmupdf", - "libresources", - "libthirdparty", + ret.append(p) + if 0: + tgz, mupdf_location = get_mupdf_tgz() + if tgz: + ret.append((tgz, mupdf_tgz)) + else: + log(f'Not including MuPDF .tgz in sdist.') + return ret + + +classifier = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Information Technology', + 'Operating System :: MacOS', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: C', + 'Programming Language :: C++', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: Implementation :: CPython', + 'Topic :: Utilities', + 'Topic :: Multimedia :: Graphics', + 'Topic :: Software Development :: Libraries', ] - extra_link_args = ["/NODEFAULTLIB:MSVCRT"] + +# We generate different wheels depending on PYMUPDF_SETUP_FLAVOUR. +# + +# PyMuPDF version. +version_p = '1.26.6' + +version_mupdf = '1.26.10' + +# PyMuPDFb version. This is the PyMuPDF version whose PyMuPDFb wheels we will +# (re)use if generating separate PyMuPDFb wheels. Though as of PyMuPDF-1.24.11 +# (2024-10-03) we no longer use PyMuPDFb wheels so this is actually unused. +# +version_b = '1.26.3' + +if os.path.exists(f'{g_root}/{g_pymupdfb_sdist_marker}'): + + # We are in a PyMuPDFb sdist. We specify a dummy package so that pip builds + # from sdists work - pip's build using PyMuPDF's sdist will already create + # the required binaries, but pip will still see `requires_dist` set to + # 'PyMuPDFb', so will also download and build PyMuPDFb's sdist. + # + log(f'Specifying dummy PyMuPDFb wheel.') + + def get_requires_for_build_wheel(config_settings=None): + return list() + + p = pipcl.Package( + 'PyMuPDFb', + version_b, + summary = 'Dummy PyMuPDFb wheel', + description = '', + author = 'Artifex', + author_email = 'support@artifex.com', + license = 'GNU AFFERO GPL 3.0', + tag_python = 'py3', + ) + +else: + # A normal PyMuPDF package. + + with open( f'{g_root}/README.md', encoding='utf-8') as f: + readme_p = f.read() + + with open( f'{g_root}/READMEb.md', encoding='utf-8') as f: + readme_b = f.read() + + with open( f'{g_root}/READMEd.md', encoding='utf-8') as f: + readme_d = f.read() + + tag_python = None + requires_dist = list() + entry_points = None + if 'p' in PYMUPDF_SETUP_FLAVOUR: + version = version_p + name = 'PyMuPDF' + readme = readme_p + summary = 'A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents.' + if 'b' not in PYMUPDF_SETUP_FLAVOUR: + requires_dist.append(f'PyMuPDFb =={version_b}') + # Create a `pymupdf` command. + entry_points = textwrap.dedent(''' + [console_scripts] + pymupdf = pymupdf.__main__:main + ''') + elif 'b' in PYMUPDF_SETUP_FLAVOUR: + version = version_b + name = 'PyMuPDFb' + readme = readme_b + summary = 'MuPDF shared libraries for PyMuPDF.' + tag_python = 'py3' + elif 'd' in PYMUPDF_SETUP_FLAVOUR: + version = version_b + name = 'PyMuPDFd' + readme = readme_d + summary = 'MuPDF build-time files for PyMuPDF.' + tag_python = 'py3' else: - assert 0, 'Unrecognised OS' - - if linux or openbsd or freebsd or darwin: - extra_compile_args.append( '-Wno-incompatible-pointer-types') - extra_compile_args.append( '-Wno-pointer-sign') - extra_compile_args.append( '-Wno-sign-compare') - if unix_build_type == 'memento': - extra_compile_args.append( '-DMEMENTO') - if openbsd: - extra_compile_args.append( '-Wno-deprecated-declarations') - - # add any local include and library folders - pymupdf_dirs = os.environ.get("PYMUPDF_DIRS", None) - if pymupdf_dirs: - with open(pymupdf_dirs) as dirfile: - local_dirs = json.load(dirfile) - include_dirs += local_dirs.get("include_dirs", []) - library_dirs += local_dirs.get("library_dirs", []) - - with open(f'fitz/helper-git-versions.i', 'w') as f: - f.write('%pythoncode %{\n') - - def repr_escape(text): - text = repr(text) - text = text.replace('{', '{{') - text = text.replace('}', '}}') - text = text.replace('%', '{chr(37)})') # Avoid confusing swig. - return 'f' + text - def write_git(name, directory): - sha, comment, diff, branch = get_git_id(directory) - f.write(f'{name}_git_sha = \'{sha}\'\n') - f.write(f'{name}_git_comment = {repr_escape(comment)}\n') - f.write(f'{name}_git_diff = {repr_escape(diff)}\n') - f.write(f'{name}_git_branch = {repr_escape(branch)}\n') - f.write('\n') - - write_git('pymupdf', '.') - if mupdf_local: - write_git('mupdf', mupdf_local) - - f.write('%}\n') - -# Disable bogus SWIG warning 509, 'Overloaded method ... effectively ignored, -# as it is shadowed by ...'. -extra_swig_args.append( '-w509') - -log( f'include_dirs={include_dirs}') -log( f'library_dirs={library_dirs}') -log( f'libraries={libraries}') -log( f'extra_swig_args={extra_swig_args}') -log( f'extra_compile_args={extra_compile_args}') -log( f'extra_link_args={extra_link_args}') - -module = Extension( - "fitz._fitz", - ["fitz/fitz.i"], - language="c++", - include_dirs=include_dirs, - library_dirs=library_dirs, - libraries=libraries, - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - swig_opts=extra_swig_args, -) - - -setup_py_cwd = os.path.dirname(__file__) -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Information Technology", - "Operating System :: MacOS", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Programming Language :: C", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: Implementation :: CPython", - "Topic :: Utilities", - "Topic :: Multimedia :: Graphics", - "Topic :: Software Development :: Libraries", -] -with open(os.path.join(setup_py_cwd, "README.md"), encoding="utf-8") as f: - readme = f.read() - -setup( - name="PyMuPDF", - version="1.22.6", - description="Python bindings for the PDF toolkit and renderer MuPDF", - long_description=readme, - long_description_content_type="text/markdown", - classifiers=classifiers, - url="https://github.com/pymupdf/PyMuPDF", - author="Artifex", - author_email="support@artifex.com", - cmdclass={"build_py": build_ext_first}, - ext_modules=[module], - python_requires=">=3.7", - py_modules=["fitz.fitz", "fitz.utils", "fitz.__main__"], - license="GNU AFFERO GPL 3.0", - project_urls={ - "Documentation": "https://pymupdf.readthedocs.io/", - "Source": "https://github.com/pymupdf/pymupdf", - "Tracker": "https://github.com/pymupdf/PyMuPDF/issues", - "Changelog": "https://pymupdf.readthedocs.io/en/latest/changes.html", - }, -) + assert 0, f'Unrecognised {PYMUPDF_SETUP_FLAVOUR=}.' + + if os.environ.get('PYODIDE_ROOT'): + # We can't pip install pytest on pyodide, so specify it here. + requires_dist.append('pytest') + + p = pipcl.Package( + name, + version, + summary = summary, + description = readme, + description_content_type = 'text/markdown', + classifier = classifier, + author = 'Artifex', + author_email = 'support@artifex.com', + requires_dist = requires_dist, + requires_python = '>=3.9', + license = 'Dual Licensed - GNU AFFERO GPL 3.0 or Artifex Commercial License', + project_url = [ + ('Documentation, https://pymupdf.readthedocs.io/'), + ('Source, https://github.com/pymupdf/pymupdf'), + ('Tracker, https://github.com/pymupdf/PyMuPDF/issues'), + ('Changelog, https://pymupdf.readthedocs.io/en/latest/changes.html'), + ], + + entry_points = entry_points, + + fn_build=build, + fn_sdist=sdist, + + tag_python=tag_python, + py_limited_api=g_py_limited_api, + + # 30MB: 9 ZIP_DEFLATED + # 28MB: 9 ZIP_BZIP2 + # 23MB: 9 ZIP_LZMA + #wheel_compression = zipfile.ZIP_DEFLATED if (darwin or pyodide) else zipfile.ZIP_LZMA, + wheel_compresslevel = 9, + ) + + def get_requires_for_build_wheel(config_settings=None): + ''' + Adds to pyproject.toml:[build-system]:requires, allowing programmatic + control over what packages we require. + ''' + def platform_release_tuple(): + r = platform.release() + r = r.split('.') + r = tuple(int(i) for i in r) + log(f'platform_release_tuple() returning {r=}.') + return r + + ret = list() + libclang = os.environ.get('PYMUPDF_SETUP_LIBCLANG') + if libclang: + print(f'Overriding to use {libclang=}.') + ret.append(libclang) + elif openbsd: + print(f'OpenBSD: libclang not available via pip; assuming `pkg_add py3-llvm`.') + elif darwin and platform_release_tuple() < (18,): + # There are still of problems when building on old macos. + ret.append('libclang==14.0.6') + else: + ret.append('libclang') + if msys2: + print(f'msys2: pip install of swig does not build; assuming `pacman -S swig`.') + elif openbsd: + print(f'OpenBSD: pip install of swig does not build; assuming `pkg_add swig`.') + else: + ret.append( 'swig') + return ret + + +if PYMUPDF_SETUP_URL_WHEEL: + def build_wheel( + wheel_directory, + config_settings=None, + metadata_directory=None, + p=p, + ): + ''' + Instead of building wheel, we look for and copy a wheel from location + specified by PYMUPDF_SETUP_URL_WHEEL. + ''' + log(f'{PYMUPDF_SETUP_URL_WHEEL=}') + log(f'{p.wheel_name()=}') + url = PYMUPDF_SETUP_URL_WHEEL + if url.startswith(('http://', 'https://')): + leaf = p.wheel_name() + out_path = f'{wheel_directory}{leaf}' + out_path_temp = out_path + '-' + if url.endswith('/'): + url += leaf + log(f'Downloading from {url=} to {out_path_temp=}.') + urllib.request.urlretrieve(url, out_path_temp) + elif url.startswith(f'file://'): + in_path = url[len('file://'):] + log(f'{in_path=}') + if in_path.endswith('/'): + # Look for matching wheel within this directory. + wheels = glob.glob(f'{in_path}*.whl') + log(f'{len(wheels)=}') + for in_path in wheels: + log(f'{in_path=}') + leaf = os.path.basename(in_path) + if p.wheel_name_match(leaf): + log(f'Match: {in_path=}') + break + else: + message = f'Cannot find matching for {p.wheel_name()=} in ({len(wheels)=}):\n' + wheels_text = '' + for wheel in wheels: + wheels_text += f' {wheel}\n' + assert 0, f'Cannot find matching for {p.wheel_name()=} in:\n{wheels_text}' + else: + leaf = os.path.basename(in_path) + out_path = os.path.join(wheel_directory, leaf) + out_path_temp = out_path + '-' + log(f'Copying from {in_path=} to {out_path_temp=}.') + shutil.copy2(in_path, out_path_temp) + else: + assert 0, f'Unrecognised prefix in {PYMUPDF_SETUP_URL_WHEEL=}.' + + log(f'Renaming from:\n {out_path_temp}\nto:\n {out_path}.') + os.rename(out_path_temp, out_path) + return os.path.basename(out_path) +else: + build_wheel = p.build_wheel + +build_sdist = p.build_sdist + + +if __name__ == '__main__': + p.handle_argv(sys.argv) diff --git a/signatures/version1/cla.json b/signatures/version1/cla.json deleted file mode 100644 index a3d57209e..000000000 --- a/signatures/version1/cla.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "signedContributors": [ - { - "name": "jamie-lemon", - "id": 107279992, - "comment_id": 1346836521, - "created_at": "2022-12-12T16:30:10Z", - "repoId": 6105714, - "pullRequestNo": 2118 - }, - { - "name": "julian-smith-artifex-com", - "id": 83358719, - "comment_id": 1347087940, - "created_at": "2022-12-12T18:56:01Z", - "repoId": 6105714, - "pullRequestNo": 2120 - }, - { - "name": "JorjMcKie", - "id": 8290722, - "comment_id": 1347104970, - "created_at": "2022-12-12T19:02:45Z", - "repoId": 6105714, - "pullRequestNo": 2120 - }, - { - "name": "JorjMcKie", - "id": 8290722, - "comment_id": 1347107260, - "created_at": "2022-12-12T19:03:35Z", - "repoId": 6105714, - "pullRequestNo": 2120 - }, - { - "name": "arun-mani-j", - "id": 49952138, - "comment_id": 1374488387, - "created_at": "2023-01-07T13:50:31Z", - "repoId": 6105714, - "pullRequestNo": 2162 - }, - { - "name": "cbm755", - "id": 818622, - "comment_id": 1442341286, - "created_at": "2023-02-23T19:49:14Z", - "repoId": 6105714, - "pullRequestNo": 2234 - }, - { - "name": "kianmeng", - "id": 134518, - "comment_id": 1498410207, - "created_at": "2023-04-06T02:38:02Z", - "repoId": 6105714, - "pullRequestNo": 2315 - }, - { - "name": "hbghlyj", - "id": 53823634, - "comment_id": 1611808771, - "created_at": "2023-06-28T17:23:15Z", - "repoId": 6105714, - "pullRequestNo": 2502 - }, - { - "name": "ellacroix", - "id": 57907121, - "comment_id": 1623672777, - "created_at": "2023-07-06T13:20:38Z", - "repoId": 6105714, - "pullRequestNo": 2520 - }, - { - "name": "m42e", - "id": 2410802, - "comment_id": 1646548073, - "created_at": "2023-07-22T10:03:22Z", - "repoId": 6105714, - "pullRequestNo": 2554 - } - ] -} \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 000000000..7824eb522 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,25899 @@ +''' +PyMuPDF implemented on top of MuPDF Python bindings. + +License: + + SPDX-License-Identifier: GPL-3.0-only +''' + +# To reduce startup times, we don't import everything we require here. +# +import atexit +import binascii +import collections +import inspect +import io +import math +import os +import pathlib +import glob +import re +import string +import sys +import tarfile +import time +import typing +import warnings +import weakref +import zipfile + +from . import extra +import importlib.util + +# Set up g_out_log and g_out_message from environment variables. +# +# PYMUPDF_MESSAGE controls the destination of user messages (from function +# `pymupdf.message()`). +# +# PYMUPDF_LOG controls the destination of internal development logging (from +# function `pymupdf.log()`). +# +# For syntax, see _make_output()'s `text` arg. +# + +def _make_output( + *, + text=None, + fd=None, + stream=None, + path=None, + path_append=None, + pylogging=None, + pylogging_logger=None, + pylogging_level=None, + pylogging_name=None, + default=None, + ): + ''' + Returns a stream that writes to a specified destination, which can be a + file descriptor, a file, an existing stream or Python's `logging' system. + + Args: + text: text specification of destination. + fd: - write to file descriptor. + path: - write to file. + path+: - append to file. + logging: - write to Python `logging` module. + items: comma-separated pairs. + level= + name=. + Other names are ignored. + + fd: an int file descriptor. + stream: something with methods .write(text) and .flush(). + If specified we simply return . + path: a file path. + If specified we return a stream that writes to this file. + path_append: a file path. + If specified we return a stream that appends to this file. + pylogging*: + if any of these args is not None, we return a stream that writes to + Python's `logging` module. + + pylogging: + Unused other than to activate use of logging module. + pylogging_logger: + A logging.Logger; If None, set from . + pylogging_level: + An int log level, if None we use + pylogging_logger.getEffectiveLevel(). + pylogging_name: + Only used if is None: + If is None, we set it to 'pymupdf'. + Then we do: pylogging_logger = logging.getLogger(pylogging_name) + ''' + if text is not None: + # Textual specification, for example from from environment variable. + if text.startswith('fd:'): + fd = int(text[3:]) + elif text.startswith('path:'): + path = text[5:] + elif text.startswith('path+'): + path_append = text[5:] + elif text.startswith('logging:'): + pylogging = True + items_d = dict() + items = text[8:].split(',') + #items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]} + for item in items: + if not item: + continue + nv = item.split('=', 1) + assert len(nv) == 2, f'Need `=` in {item=}.' + n, v = nv + items_d[n] = v + pylogging_level = items_d.get('level') + if pylogging_level is not None: + pylogging_level = int(pylogging_level) + pylogging_name = items_d.get('name', 'pymupdf') + else: + assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.' + + if fd is not None: + ret = io.open(fd, mode='w', closefd=False) + elif stream is not None: + assert hasattr(stream, 'write') + assert hasattr(stream, 'flush') + ret = stream + elif path is not None: + ret = io.open(path, 'w') + elif path_append is not None: + ret = io.open(path_append, 'a') + elif (0 + or pylogging is not None + or pylogging_logger is not None + or pylogging_level is not None + or pylogging_name is not None + ): + import logging + if pylogging_logger is None: + if pylogging_name is None: + pylogging_name = 'pymupdf' + pylogging_logger = logging.getLogger(pylogging_name) + assert isinstance(pylogging_logger, logging.Logger) + if pylogging_level is None: + pylogging_level = pylogging_logger.getEffectiveLevel() + class Out: + def write(self, text): + # `logging` module appends newlines, but so does the `print()` + # functions in our caller message() and log() fns, so we need to + # remove them here. + text = text.rstrip('\n') + if text: + pylogging_logger.log(pylogging_level, text) + def flush(self): + pass + ret = Out() + else: + ret = default + return ret + +# Set steam used by PyMuPDF messaging. +_g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout) + +# Set steam used by PyMuPDF development/debugging logging. +_g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout) + +# Things for testing logging. +_g_log_items = list() +_g_log_items_active = False + +def _log_items(): + return _g_log_items + +def _log_items_active(active): + global _g_log_items_active + _g_log_items_active = active + +def _log_items_clear(): + del _g_log_items[:] + + +def set_messages( + *, + text=None, + fd=None, + stream=None, + path=None, + path_append=None, + pylogging=None, + pylogging_logger=None, + pylogging_level=None, + pylogging_name=None, + ): + ''' + Sets destination of PyMuPDF messages. See _make_output() for details. + ''' + global _g_out_message + _g_out_message = _make_output( + text=text, + fd=fd, + stream=stream, + path=path, + path_append=path_append, + pylogging=pylogging, + pylogging_logger=pylogging_logger, + pylogging_level=pylogging_level, + pylogging_name=pylogging_name, + default=_g_out_message, + ) + +def set_log( + *, + text=None, + fd=None, + stream=None, + path=None, + path_append=None, + pylogging=None, + pylogging_logger=None, + pylogging_level=None, + pylogging_name=None, + ): + ''' + Sets destination of PyMuPDF development/debugging logging. See + _make_output() for details. + ''' + global _g_out_log + _g_out_log = _make_output( + text=text, + fd=fd, + stream=stream, + path=path, + path_append=path_append, + pylogging=pylogging, + pylogging_logger=pylogging_logger, + pylogging_level=pylogging_level, + pylogging_name=pylogging_name, + default=_g_out_log, + ) + +def log( text='', caller=1): + ''' + For development/debugging diagnostics. + ''' + try: + stack = inspect.stack(context=0) + except StopIteration: + pass + else: + frame_record = stack[caller] + try: + filename = os.path.relpath(frame_record.filename) + except Exception: # Can fail on windows. + filename = frame_record.filename + line = frame_record.lineno + function = frame_record.function + text = f'{filename}:{line}:{function}(): {text}' + if _g_log_items_active: + _g_log_items.append(text) + if _g_out_log: + print(text, file=_g_out_log, flush=1) + + +def message(text=''): + ''' + For user messages. + ''' + # It looks like `print()` does nothing if sys.stdout is None (without + # raising an exception), but we don't rely on this. + if _g_out_message: + print(text, file=_g_out_message, flush=1) + + +def exception_info(): + import traceback + log(f'exception_info:') + log(traceback.format_exc()) + + +# PDF names must not contain these characters: +INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0)) + +def get_env_bool( name, default): + ''' + Returns `True`, `False` or `default` depending on whether $ is '1', + '0' or unset. Otherwise assert-fails. + ''' + v = os.environ.get( name) + if v is None: + ret = default + elif v == '1': + ret = True + elif v == '0': + ret = False + else: + assert 0, f'Unrecognised value for {name}: {v!r}' + if ret != default: + log(f'Using non-default setting from {name}: {v!r}') + return ret + +def get_env_int( name, default): + ''' + Returns `True`, `False` or `default` depending on whether $ is '1', + '0' or unset. Otherwise assert-fails. + ''' + v = os.environ.get( name) + if v is None: + ret = default + else: + ret = int(v) + if ret != default: + log(f'Using non-default setting from {name}: {v}') + return ret + +# All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is +# true. +g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1) + +# $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`. +# +g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True) + + +# Global switches +# + +class _Globals: + def __init__(self): + self.no_device_caching = 0 + self.small_glyph_heights = 0 + self.subset_fontnames = 0 + self.skip_quad_corrections = 0 + +_globals = _Globals() + +_get_layout: typing.Optional[typing.Callable] = None + +# global switch ensuring that the recommendation message is shown at most once +_recommend_layout = True # must be referred to as "global" everywhere + + +def no_recommend_layout(): + """For users who never want to see the layout recommendation.""" + global _recommend_layout + _recommend_layout = False + + +def _warn_layout_once(): + """Check if we should recommend installing the layout package.""" + msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis.""" + + global _recommend_layout + if ( + 1 + and _recommend_layout # still True? + and _get_layout is None # no layout function stored here + + # client did not globally disable the recommendation + and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0" + + # layout is not available in this Python + and not importlib.util.find_spec("pymupdf.layout") + ): + print(msg) + _recommend_layout = False # never show the message again + + +# Optionally use MuPDF via cppyy bindings; experimental and not tested recently +# as of 2023-01-20 11:51:40 +# +mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY') +if mupdf_cppyy is not None: + # pylint: disable=all + log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.') + log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}') + if mupdf_cppyy == '': + import mupdf_cppyy + else: + import importlib + mupdf_cppyy = importlib.machinery.SourceFileLoader( + 'mupdf_cppyy', + mupdf_cppyy + ).load_module() + mupdf = mupdf_cppyy.cppyy.gbl.mupdf +else: + # Use MuPDF Python SWIG bindings. We allow import from either our own + # directory for conventional wheel installs, or from separate place in case + # we are using a separately-installed system installation of mupdf. + # + try: + from . import mupdf + except Exception: + import mupdf + if hasattr(mupdf, 'internal_check_ndebug'): + mupdf.internal_check_ndebug() + mupdf.reinit_singlethreaded() + +def _int_rc(text): + ''' + Converts string to int, ignoring trailing 'rc...'. + ''' + rc = text.find('rc') + if rc >= 0: + text = text[:rc] + return int(text) + +# Basic version information. +# +# (We use `noqa F401` to avoid flake8 errors such as `F401 +# '._build.mupdf_location' imported but unused`. +# +from ._build import mupdf_location # noqa F401 +from ._build import pymupdf_git_branch # noqa F401 +from ._build import pymupdf_git_diff # noqa F401 +from ._build import pymupdf_git_sha # noqa F401 +from ._build import pymupdf_version # noqa F401 +from ._build import pymupdf_version_tuple # noqa F401 +from ._build import swig_version # noqa F401 +from ._build import swig_version_tuple # noqa F401 + +mupdf_version = mupdf.FZ_VERSION + +# Removed in PyMuPDF-1.26.1. +pymupdf_date = None + +# Versions as tuples; useful when comparing versions. +# +mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')]) + +assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \ + f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}' + +# Legacy version information. +# +version = (pymupdf_version, mupdf_version, None) +VersionFitz = mupdf_version +VersionBind = pymupdf_version +VersionDate = None + + +# String formatting. + +def _format_g(value, *, fmt='%g'): + ''' + Returns `value` formatted with mupdf.fz_format_double() if available, + otherwise with Python's `%`. + + If `value` is a list or tuple, we return a space-separated string of + formatted values. + ''' + if isinstance(value, (list, tuple)): + ret = '' + for v in value: + if ret: + ret += ' ' + ret += _format_g(v, fmt=fmt) + return ret + else: + return mupdf.fz_format_double(fmt, value) + +format_g = _format_g + +# ByteString is gone from typing in 3.14. +# collections.abc.Buffer available from 3.12 only +try: + ByteString = typing.ByteString +except AttributeError: + ByteString = bytes | bytearray | memoryview + +# Names required by class method typing annotations. +OptBytes = typing.Optional[ByteString] +OptDict = typing.Optional[dict] +OptFloat = typing.Optional[float] +OptInt = typing.Union[int, None] +OptSeq = typing.Optional[typing.Sequence] +OptStr = typing.Optional[str] + +Page = 'Page_forward_decl' +Point = 'Point_forward_decl' + +matrix_like = 'matrix_like' +point_like = 'point_like' +quad_like = 'quad_like' +rect_like = 'rect_like' + + +def _as_fz_document(document): + ''' + Returns document as a mupdf.FzDocument, upcasting as required. Raises + 'document closed' exception if closed. + ''' + if isinstance(document, Document): + if document.is_closed: + raise ValueError('document closed') + document = document.this + if isinstance(document, mupdf.FzDocument): + return document + elif isinstance(document, mupdf.PdfDocument): + return document.super() + elif document is None: + assert 0, f'document is None' + else: + assert 0, f'Unrecognised {type(document)=}' + +def _as_pdf_document(document, required=True): + ''' + Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e. + `document` is not actually a `PdfDocument`) then we assert-fail if `required` + is true (the default) else return a `mupdf.PdfDocument` with `.m_internal` + false. + ''' + if isinstance(document, Document): + if document.is_closed: + raise ValueError('document closed') + document = document.this + if isinstance(document, mupdf.PdfDocument): + return document + elif isinstance(document, mupdf.FzDocument): + ret = mupdf.PdfDocument(document) + if required: + assert ret.m_internal + return ret + elif document is None: + assert 0, f'document is None' + else: + assert 0, f'Unrecognised {type(document)=}' + +def _as_fz_page(page): + ''' + Returns page as a mupdf.FzPage, upcasting as required. + ''' + if isinstance(page, Page): + page = page.this + if isinstance(page, mupdf.PdfPage): + return page.super() + elif isinstance(page, mupdf.FzPage): + return page + elif page is None: + assert 0, f'page is None' + else: + assert 0, f'Unrecognised {type(page)=}' + +def _as_pdf_page(page, required=True): + ''' + Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page` + is not actually a `PdfPage`) then we assert-fail if `required` is true (the + default) else return a `mupdf.PdfPage` with `.m_internal` false. + ''' + if isinstance(page, Page): + page = page.this + if isinstance(page, mupdf.PdfPage): + return page + elif isinstance(page, mupdf.FzPage): + ret = mupdf.pdf_page_from_fz_page(page) + if required: + assert ret.m_internal + return ret + elif page is None: + assert 0, f'page is None' + else: + assert 0, f'Unrecognised {type(page)=}' + + +def _pdf_annot_page(annot): + ''' + Wrapper for mupdf.pdf_annot_page() which raises an exception if + is not bound to a page instead of returning a mupdf.PdfPage with + `.m_internal=None`. + + [Some other MuPDF functions such as pdf_update_annot()` already raise a + similar exception if a pdf_annot's .page field is null.] + ''' + page = mupdf.pdf_annot_page(annot) + if not page.m_internal: + raise RuntimeError('Annot is not bound to a page') + return page + + +# Fixme: we don't support JM_MEMORY=1. +JM_MEMORY = 0 + +# Classes +# + +class Annot: + + def __init__(self, annot): + assert isinstance( annot, mupdf.PdfAnnot) + self.this = annot + + def __repr__(self): + parent = getattr(self, 'parent', '<>') + return "'%s' annotation on %s" % (self.type[1], str(parent)) + + def __str__(self): + return self.__repr__() + + def _erase(self): + if getattr(self, "thisown", False): + self.thisown = False + + def _get_redact_values(self): + annot = self.this + if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT: + return + + values = dict() + try: + obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO") + if obj.m_internal: + message_warning("Ignoring redaction key '/RO'.") + xref = mupdf.pdf_to_num(obj) + values[dictkey_xref] = xref + obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText") + if obj.m_internal: + text = mupdf.pdf_to_text_string(obj) + values[dictkey_text] = JM_UnicodeFromStr(text) + else: + values[dictkey_text] = '' + obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q')) + align = 0 + if obj.m_internal: + align = mupdf.pdf_to_int(obj) + values[dictkey_align] = align + except Exception: + if g_exceptions_verbose: exception_info() + return + val = values + + if not val: + return val + val["rect"] = self.rect + text_color, fontname, fontsize = TOOLS._parse_da(self) + val["text_color"] = text_color + val["fontname"] = fontname + val["fontsize"] = fontsize + fill = self.colors["fill"] + val["fill"] = fill + return val + + def _getAP(self): + if g_use_extra: + assert isinstance( self.this, mupdf.PdfAnnot) + ret = extra.Annot_getAP(self.this) + assert isinstance( ret, bytes) + return ret + else: + r = None + res = None + annot = self.this + assert isinstance( annot, mupdf.PdfAnnot) + annot_obj = mupdf.pdf_annot_obj( annot) + ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N')) + if mupdf.pdf_is_stream( ap): + res = mupdf.pdf_load_stream( ap) + if res and res.m_internal: + r = JM_BinFromBuffer(res) + return r + + def _setAP(self, buffer_, rect=0): + try: + annot = self.this + annot_obj = mupdf.pdf_annot_obj( annot) + page = _pdf_annot_page(annot) + apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N')) + if not apobj.m_internal: + raise RuntimeError( MSG_BAD_APN) + if not mupdf.pdf_is_stream( apobj): + raise RuntimeError( MSG_BAD_APN) + res = JM_BufferFromBytes( buffer_) + if not res.m_internal: + raise ValueError( MSG_BAD_BUFFER) + JM_update_stream( page.doc(), apobj, res, 1) + if rect: + bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect')) + mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox) + except Exception: + if g_exceptions_verbose: exception_info() + + def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1): + annot = self.this + assert annot.m_internal + annot_obj = mupdf.pdf_annot_obj( annot) + page = _pdf_annot_page(annot) + pdf = page.doc() + type_ = mupdf.pdf_annot_type( annot) + nfcol, fcol = JM_color_FromSequence(fill_color) + + try: + # remove fill color from unsupported annots + # or if so requested + if nfcol == 0 or type_ not in ( + mupdf.PDF_ANNOT_SQUARE, + mupdf.PDF_ANNOT_CIRCLE, + mupdf.PDF_ANNOT_LINE, + mupdf.PDF_ANNOT_POLY_LINE, + mupdf.PDF_ANNOT_POLYGON + ): + mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC')) + elif nfcol > 0: + mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol]) + + insert_rot = 1 if rotate >= 0 else 0 + if type_ not in ( + mupdf.PDF_ANNOT_CARET, + mupdf.PDF_ANNOT_CIRCLE, + mupdf.PDF_ANNOT_FREE_TEXT, + mupdf.PDF_ANNOT_FILE_ATTACHMENT, + mupdf.PDF_ANNOT_INK, + mupdf.PDF_ANNOT_LINE, + mupdf.PDF_ANNOT_POLY_LINE, + mupdf.PDF_ANNOT_POLYGON, + mupdf.PDF_ANNOT_SQUARE, + mupdf.PDF_ANNOT_STAMP, + mupdf.PDF_ANNOT_TEXT, + ): + insert_rot = 0 + + if insert_rot: + mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate) + + # insert fill color + if type_ == mupdf.PDF_ANNOT_FREE_TEXT: + if nfcol > 0: + mupdf.pdf_set_annot_color(annot, fcol[:nfcol]) + elif nfcol > 0: + col = mupdf.pdf_new_array(page.doc(), nfcol) + for i in range( nfcol): + mupdf.pdf_array_push_real(col, fcol[i]) + mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col) + mupdf.pdf_dirty_annot(annot) + mupdf.pdf_update_annot(annot) # let MuPDF update + pdf.resynth_required = 0 + except Exception as e: + if g_exceptions_verbose: + exception_info() + message( f'cannot update annot: {e}') + raise + + if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode + return True + + try: # create or update /ExtGState + ap = mupdf.pdf_dict_getl( + mupdf.pdf_annot_obj(annot), + PDF_NAME('AP'), + PDF_NAME('N') + ) + if not ap.m_internal: # should never happen + raise RuntimeError( MSG_BAD_APN) + + resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources')) + if not resources.m_internal: # no Resources yet: make one + resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2) + + alp0 = mupdf.pdf_new_dict( page.doc(), 3) + if opacity >= 0 and opacity < 1: + mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity) + mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity) + mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity) + + if blend_mode: + mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode) + mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode) + + extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState')) + if not extg.m_internal: # no ExtGState yet: make one + extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2) + + mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0) + + except Exception as e: + if g_exceptions_verbose: exception_info() + message( f'cannot set opacity or blend mode\n: {e}') + raise + + return True + + @property + def apn_bbox(self): + """annotation appearance bbox""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N')) + if not ap.m_internal: + val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)) + else: + rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox')) + val = JM_py_from_rect(rect) + + val = Rect(val) * self.get_parent().transformation_matrix + val *= self.get_parent().derotation_matrix + return val + + @property + def apn_matrix(self): + """annotation appearance matrix""" + try: + CheckParent(self) + annot = self.this + assert isinstance(annot, mupdf.PdfAnnot) + ap = mupdf.pdf_dict_getl( + mupdf.pdf_annot_obj(annot), + mupdf.PDF_ENUM_NAME_AP, + mupdf.PDF_ENUM_NAME_N + ) + if not ap.m_internal: + return JM_py_from_matrix(mupdf.FzMatrix()) + mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix) + val = JM_py_from_matrix(mat) + + val = Matrix(val) + + return val + except Exception: + if g_exceptions_verbose: exception_info() + raise + + @property + def blendmode(self): + """annotation BlendMode""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM')) + blend_mode = None + if obj.m_internal: + blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj)) + return blend_mode + # loop through the /AP/N/Resources/ExtGState objects + obj = mupdf.pdf_dict_getl( + annot_obj, + PDF_NAME('AP'), + PDF_NAME('N'), + PDF_NAME('Resources'), + PDF_NAME('ExtGState'), + ) + if mupdf.pdf_is_dict(obj): + n = mupdf.pdf_dict_len(obj) + for i in range(n): + obj1 = mupdf.pdf_dict_get_val(obj, i) + if mupdf.pdf_is_dict(obj1): + m = mupdf.pdf_dict_len(obj1) + for j in range(m): + obj2 = mupdf.pdf_dict_get_key(obj1, j) + if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0: + blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j))) + return blend_mode + return blend_mode + + @property + def border(self): + """Border information.""" + CheckParent(self) + atype = self.type[0] + if atype not in ( + mupdf.PDF_ANNOT_CIRCLE, + mupdf.PDF_ANNOT_FREE_TEXT, + mupdf.PDF_ANNOT_INK, + mupdf.PDF_ANNOT_LINE, + mupdf.PDF_ANNOT_POLY_LINE, + mupdf.PDF_ANNOT_POLYGON, + mupdf.PDF_ANNOT_SQUARE, + ): + return dict() + ao = mupdf.pdf_annot_obj(self.this) + ret = JM_annot_border(ao) + return ret + + def clean_contents(self, sanitize=1): + """Clean appearance contents stream.""" + CheckParent(self) + annot = self.this + pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot)) + filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize) + mupdf.pdf_filter_annot_contents(pdf, annot, filter_) + + @property + def colors(self): + """Color definitions.""" + try: + CheckParent(self) + annot = self.this + assert isinstance(annot, mupdf.PdfAnnot) + return JM_annot_colors(mupdf.pdf_annot_obj(annot)) + except Exception: + if g_exceptions_verbose: exception_info() + raise + + def delete_responses(self): + """Delete 'Popup' and responding annotations.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + page = _pdf_annot_page(annot) + while 1: + irt_annot = JM_find_annot_irt(annot) + if not irt_annot: + break + mupdf.pdf_delete_annot(page, irt_annot) + mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup')) + + annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots')) + n = mupdf.pdf_array_len(annots) + found = 0 + for i in range(n-1, -1, -1): + o = mupdf.pdf_array_get(annots, i) + p = mupdf.pdf_dict_get(o, PDF_NAME('Parent')) + if not o.m_internal: + continue + if not mupdf.pdf_objcmp(p, annot_obj): + mupdf.pdf_array_delete(annots, i) + found = 1 + if found: + mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots) + + @property + def file_info(self): + """Attached file information.""" + CheckParent(self) + res = dict() + length = -1 + size = -1 + desc = None + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + type_ = mupdf.pdf_annot_type(annot) + if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT: + raise TypeError( MSG_BAD_ANNOT_TYPE) + stream = mupdf.pdf_dict_getl( + annot_obj, + PDF_NAME('FS'), + PDF_NAME('EF'), + PDF_NAME('F'), + ) + if not stream.m_internal: + RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError) + + fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS')) + + o = mupdf.pdf_dict_get(fs, PDF_NAME('UF')) + if o.m_internal: + filename = mupdf.pdf_to_text_string(o) + else: + o = mupdf.pdf_dict_get(fs, PDF_NAME('F')) + if o.m_internal: + filename = mupdf.pdf_to_text_string(o) + + o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc')) + if o.m_internal: + desc = mupdf.pdf_to_text_string(o) + + o = mupdf.pdf_dict_get(stream, PDF_NAME('Length')) + if o.m_internal: + length = mupdf.pdf_to_int(o) + + o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size')) + if o.m_internal: + size = mupdf.pdf_to_int(o) + + res[ dictkey_filename] = JM_EscapeStrFromStr(filename) + res[ dictkey_descr] = JM_UnicodeFromStr(desc) + res[ dictkey_length] = length + res[ dictkey_size] = size + return res + + @property + def flags(self): + """Flags field.""" + CheckParent(self) + annot = self.this + return mupdf.pdf_annot_flags(annot) + + def get_file(self): + """Retrieve attached file content.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + type = mupdf.pdf_annot_type(annot) + if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT: + raise TypeError( MSG_BAD_ANNOT_TYPE) + stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F')) + if not stream.m_internal: + RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError) + buf = mupdf.pdf_load_stream(stream) + res = JM_BinFromBuffer(buf) + return res + + def get_oc(self): + """Get annotation optional content reference.""" + CheckParent(self) + oc = 0 + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC')) + if obj.m_internal: + oc = mupdf.pdf_to_num(obj) + return oc + + # PyMuPDF doesn't seem to have this .parent member, but removing it breaks + # 11 tests...? + #@property + def get_parent(self): + try: + ret = getattr( self, 'parent') + except AttributeError: + page = _pdf_annot_page(self.this) + assert isinstance( page, mupdf.PdfPage) + document = Document( page.doc()) if page.m_internal else None + ret = Page(page, document) + #self.parent = weakref.proxy( ret) + self.parent = ret + #log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.') + #log( f'Have set self.parent') + return ret + + def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0): + """annotation Pixmap""" + + CheckParent(self) + cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK} + if type(colorspace) is str: + colorspace = cspaces.get(colorspace.lower(), None) + if dpi: + matrix = Matrix(dpi / 72, dpi / 72) + ctm = JM_matrix_from_py(matrix) + cs = colorspace + if not cs: + cs = mupdf.fz_device_rgb() + + pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha) + ret = Pixmap(pix) + if dpi: + ret.set_dpi(dpi, dpi) + return ret + + def get_sound(self): + """Retrieve sound stream.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + type = mupdf.pdf_annot_type(annot) + sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound')) + if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal: + raise TypeError( MSG_BAD_ANNOT_TYPE) + if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal: + RAISEPY( "unsupported sound stream", JM_Exc_FileDataError) + res = dict() + obj = mupdf.pdf_dict_get(sound, PDF_NAME('R')) + if obj.m_internal: + res['rate'] = mupdf.pdf_to_real(obj) + obj = mupdf.pdf_dict_get(sound, PDF_NAME('C')) + if obj.m_internal: + res['channels'] = mupdf.pdf_to_int(obj) + obj = mupdf.pdf_dict_get(sound, PDF_NAME('B')) + if obj.m_internal: + res['bps'] = mupdf.pdf_to_int(obj) + obj = mupdf.pdf_dict_get(sound, PDF_NAME('E')) + if obj.m_internal: + res['encoding'] = mupdf.pdf_to_name(obj) + obj = mupdf.pdf_dict_gets(sound, "CO") + if obj.m_internal: + res['compression'] = mupdf.pdf_to_name(obj) + buf = mupdf.pdf_load_stream(sound) + stream = JM_BinFromBuffer(buf) + res['stream'] = stream + return res + + def get_text(self, *args, **kwargs): + return utils.get_text(self, *args, **kwargs) + + def get_textbox(self, *args, **kwargs): + return utils.get_textbox(self, *args, **kwargs) + + def get_textpage(self, clip=None, flags=0): + """Make annotation TextPage.""" + CheckParent(self) + options = mupdf.FzStextOptions(flags) + if clip: + assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.' + clip2 = JM_rect_from_py(clip) + options.clip = clip2.internal() + options.flags |= mupdf.FZ_STEXT_CLIP_RECT + annot = self.this + stextpage = mupdf.FzStextPage(annot, options) + ret = TextPage(stextpage) + p = self.get_parent() + if isinstance(p, weakref.ProxyType): + ret.parent = p + else: + ret.parent = weakref.proxy(p) + return ret + + @property + def has_popup(self): + """Check if annotation has a Popup.""" + CheckParent(self) + annot = self.this + obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup')) + return True if obj.m_internal else False + + @property + def info(self): + """Various information details.""" + CheckParent(self) + annot = self.this + res = dict() + + res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot)) + + o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name')) + res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o)) + + # Title (= author) + o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T')) + res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) + + # CreationDate + o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate") + res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) + + # ModDate + o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M')) + res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) + + # Subj + o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj") + res[dictkey_subject] = mupdf.pdf_to_text_string(o) + + # Identification (PDF key /NM) + o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM") + res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o)) + + return res + + @property + def irt_xref(self): + ''' + annotation IRT xref + ''' + annot = self.this + annot_obj = mupdf.pdf_annot_obj( annot) + irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT')) + if not irt.m_internal: + return 0 + return mupdf.pdf_to_num( irt) + + @property + def is_open(self): + """Get 'open' status of annotation or its Popup.""" + CheckParent(self) + return mupdf.pdf_annot_is_open(self.this) + + @property + def language(self): + """annotation language""" + this_annot = self.this + lang = mupdf.pdf_annot_language(this_annot) + if lang == mupdf.FZ_LANG_UNSET: + return + assert hasattr(mupdf, 'fz_string_from_text_language2') + return mupdf.fz_string_from_text_language2(lang) + + @property + def line_ends(self): + """Line end codes.""" + CheckParent(self) + annot = self.this + # return nothing for invalid annot types + if not mupdf.pdf_annot_has_line_ending_styles(annot): + return + lstart = mupdf.pdf_annot_line_start_style(annot) + lend = mupdf.pdf_annot_line_end_style(annot) + return lstart, lend + + @property + def next(self): + """Next annotation.""" + CheckParent(self) + this_annot = self.this + assert isinstance(this_annot, mupdf.PdfAnnot) + assert this_annot.m_internal + type_ = mupdf.pdf_annot_type(this_annot) + if type_ != mupdf.PDF_ANNOT_WIDGET: + annot = mupdf.pdf_next_annot(this_annot) + else: + annot = mupdf.pdf_next_widget(this_annot) + + val = Annot(annot) if annot.m_internal else None + if not val: + return None + val.thisown = True + assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value() + val.parent._annot_refs[id(val)] = val + + if val.type[0] == mupdf.PDF_ANNOT_WIDGET: + widget = Widget() + TOOLS._fill_widget(val, widget) + val = widget + return val + + @property + def opacity(self): + """Opacity.""" + CheckParent(self) + annot = self.this + opy = -1 + ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA) + if mupdf.pdf_is_number(ca): + opy = mupdf.pdf_to_real(ca) + return opy + + @property + def popup_rect(self): + """annotation 'Popup' rectangle""" + CheckParent(self) + rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) + annot = self.this + annot_obj = mupdf.pdf_annot_obj( annot) + obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup')) + if obj.m_internal: + rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect')) + #log( '{rect=}') + val = JM_py_from_rect(rect) + #log( '{val=}') + + val = Rect(val) * self.get_parent().transformation_matrix + val *= self.get_parent().derotation_matrix + + return val + + @property + def popup_xref(self): + """annotation 'Popup' xref""" + CheckParent(self) + xref = 0 + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup')) + if obj.m_internal: + xref = mupdf.pdf_to_num(obj) + return xref + + @property + def rect(self): + """annotation rectangle""" + if g_use_extra: + val = extra.Annot_rect3( self.this) + else: + val = mupdf.pdf_bound_annot(self.this) + val = Rect(val) + + # Caching self.parent_() reduces 1000x from 0.07 to 0.04. + # + p = self.get_parent() + #p = getattr( self, 'parent', None) + #if p is None: + # p = self.parent + # self.parent = p + #p = self.parent_() + val *= p.derotation_matrix + return val + + @property + def rect_delta(self): + ''' + annotation delta values to rectangle + ''' + annot_obj = mupdf.pdf_annot_obj(self.this) + arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD')) + if mupdf.pdf_array_len( arr) == 4: + return ( + mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)), + mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)), + -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)), + -mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)), + ) + + @property + def rotation(self): + """annotation rotation""" + CheckParent(self) + annot = self.this + rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate) + if not rotation.m_internal: + return -1 + return mupdf.pdf_to_int( rotation) + + def set_apn_bbox(self, bbox): + """ + Set annotation appearance bbox. + """ + CheckParent(self) + page = self.get_parent() + rot = page.rotation_matrix + mat = page.transformation_matrix + bbox *= rot * ~mat + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N')) + if not ap.m_internal: + raise RuntimeError( MSG_BAD_APN) + rect = JM_rect_from_py(bbox) + mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect) + + def set_apn_matrix(self, matrix): + """Set annotation appearance matrix.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N')) + if not ap.m_internal: + raise RuntimeError( MSG_BAD_APN) + mat = JM_matrix_from_py(matrix) + mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat) + + def set_blendmode(self, blend_mode): + """Set annotation BlendMode.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode) + + def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1): + """Set border properties. + + Either a dict, or direct arguments width, style, dashes or clouds.""" + CheckParent(self) + atype, atname = self.type[:2] # annotation type + if atype not in ( + mupdf.PDF_ANNOT_CIRCLE, + mupdf.PDF_ANNOT_FREE_TEXT, + mupdf.PDF_ANNOT_INK, + mupdf.PDF_ANNOT_LINE, + mupdf.PDF_ANNOT_POLY_LINE, + mupdf.PDF_ANNOT_POLYGON, + mupdf.PDF_ANNOT_SQUARE, + ): + message(f"Cannot set border for '{atname}'.") + return None + if atype not in ( + mupdf.PDF_ANNOT_CIRCLE, + mupdf.PDF_ANNOT_FREE_TEXT, + mupdf.PDF_ANNOT_POLYGON, + mupdf.PDF_ANNOT_SQUARE, + ): + if clouds > 0: + message(f"Cannot set cloudy border for '{atname}'.") + clouds = -1 # do not set border effect + if type(border) is not dict: + border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds} + border.setdefault("width", -1) + border.setdefault("style", None) + border.setdefault("dashes", None) + border.setdefault("clouds", -1) + if border["width"] is None: + border["width"] = -1 + if border["clouds"] is None: + border["clouds"] = -1 + if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers + border["dashes"] = tuple(border["dashes"]) + for item in border["dashes"]: + if not isinstance(item, int): + border["dashes"] = None + break + annot = self.this + annot_obj = mupdf.pdf_annot_obj( annot) + pdf = mupdf.pdf_get_bound_document( annot_obj) + return JM_annot_set_border( border, pdf, annot_obj) + + def set_colors(self, colors=None, stroke=None, fill=None): + """Set 'stroke' and 'fill' colors. + + Use either a dict or the direct arguments. + """ + if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT: + raise ValueError("cannot be used for FreeText annotations") + + CheckParent(self) + doc = self.get_parent().parent + if type(colors) is not dict: + colors = {"fill": fill, "stroke": stroke} + fill = colors.get("fill") + stroke = colors.get("stroke") + + fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON, + mupdf.PDF_ANNOT_REDACT,) + + if stroke in ([], ()): + doc.xref_set_key(self.xref, "C", "[]") + elif stroke is not None: + if hasattr(stroke, "__float__"): + stroke = [float(stroke)] + CheckColor(stroke) + assert len(stroke) in (1, 3, 4) + s = f"[{_format_g(stroke)}]" + doc.xref_set_key(self.xref, "C", s) + + if fill and self.type[0] not in fill_annots: + message("Warning: fill color ignored for annot type '%s'." % self.type[1]) + return + if fill in ([], ()): + doc.xref_set_key(self.xref, "IC", "[]") + elif fill is not None: + if hasattr(fill, "__float__"): + fill = [float(fill)] + CheckColor(fill) + assert len(fill) in (1, 3, 4) + s = f"[{_format_g(fill)}]" + doc.xref_set_key(self.xref, "IC", s) + + def set_flags(self, flags): + """Set annotation flags.""" + CheckParent(self) + annot = self.this + mupdf.pdf_set_annot_flags(annot, flags) + + def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None): + """Set various properties.""" + CheckParent(self) + if type(info) is dict: # build the args from the dictionary + content = info.get("content", None) + title = info.get("title", None) + creationDate = info.get("creationDate", None) + modDate = info.get("modDate", None) + subject = info.get("subject", None) + info = None + annot = self.this + # use this to indicate a 'markup' annot type + is_markup = mupdf.pdf_annot_has_author(annot) + # contents + if content: + mupdf.pdf_set_annot_contents(annot, content) + if is_markup: + # title (= author) + if title: + mupdf.pdf_set_annot_author(annot, title) + # creation date + if creationDate: + mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate) + # mod date + if modDate: + mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate) + # subject + if subject: + mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject)) + + def set_irt_xref(self, xref): + ''' + Set annotation IRT xref + ''' + annot = self.this + annot_obj = mupdf.pdf_annot_obj( annot) + page = _pdf_annot_page(annot) + if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()): + raise ValueError( MSG_BAD_XREF) + irt = mupdf.pdf_new_indirect( page.doc(), xref, 0) + subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype')) + irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt)) + if irt_subt < 0: + raise ValueError( MSG_IS_NO_ANNOT) + mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt) + + def set_language(self, language=None): + """Set annotation language.""" + CheckParent(self) + this_annot = self.this + if not language: + lang = mupdf.FZ_LANG_UNSET + else: + lang = mupdf.fz_text_language_from_string(language) + mupdf.pdf_set_annot_language(this_annot, lang) + + def set_line_ends(self, start, end): + """Set line end codes.""" + CheckParent(self) + annot = self.this + if mupdf.pdf_annot_has_line_ending_styles(annot): + mupdf.pdf_set_annot_line_ending_styles(annot, start, end) + else: + message_warning("bad annot type for line ends") + + def set_name(self, name): + """Set /Name (icon) of annotation.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name) + + def set_oc(self, oc=0): + """Set / remove annotation OC xref.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + if not oc: + mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC')) + else: + JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc) + + def set_opacity(self, opacity): + """Set opacity.""" + CheckParent(self) + annot = self.this + if not _INRANGE(opacity, 0.0, 1.0): + mupdf.pdf_set_annot_opacity(annot, 1) + return + mupdf.pdf_set_annot_opacity(annot, opacity) + if opacity < 1.0: + page = _pdf_annot_page(annot) + page.transparency = 1 + + def set_open(self, is_open): + """Set 'open' status of annotation or its Popup.""" + CheckParent(self) + annot = self.this + mupdf.pdf_set_annot_is_open(annot, is_open) + + def set_popup(self, rect): + ''' + Create annotation 'Popup' or update rectangle. + ''' + CheckParent(self) + annot = self.this + pdfpage = _pdf_annot_page(annot) + rot = JM_rotate_page_matrix(pdfpage) + r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot) + mupdf.pdf_set_annot_popup(annot, r) + + def set_rect(self, rect): + """Set annotation rectangle.""" + CheckParent(self) + annot = self.this + + pdfpage = _pdf_annot_page(annot) + rot = JM_rotate_page_matrix(pdfpage) + r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot) + if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r): + raise ValueError( MSG_BAD_RECT) + try: + mupdf.pdf_set_annot_rect(annot, r) + except Exception as e: + message(f'cannot set rect: {e}') + return False + + def set_rotation(self, rotate=0): + """Set annotation rotation.""" + CheckParent(self) + + annot = self.this + type = mupdf.pdf_annot_type(annot) + if type not in ( + mupdf.PDF_ANNOT_CARET, + mupdf.PDF_ANNOT_CIRCLE, + mupdf.PDF_ANNOT_FREE_TEXT, + mupdf.PDF_ANNOT_FILE_ATTACHMENT, + mupdf.PDF_ANNOT_INK, + mupdf.PDF_ANNOT_LINE, + mupdf.PDF_ANNOT_POLY_LINE, + mupdf.PDF_ANNOT_POLYGON, + mupdf.PDF_ANNOT_SQUARE, + mupdf.PDF_ANNOT_STAMP, + mupdf.PDF_ANNOT_TEXT, + ): + return + rot = rotate + while rot < 0: + rot += 360 + while rot >= 360: + rot -= 360 + if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0: + rot = 0 + annot_obj = mupdf.pdf_annot_obj(annot) + mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot) + + @property + def type(self): + """annotation type""" + CheckParent(self) + if not self.this.m_internal: + return 'null' + type_ = mupdf.pdf_annot_type(self.this) + c = mupdf.pdf_string_from_annot_type(type_) + o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT') + if not o.m_internal or mupdf.pdf_is_name(o): + return (type_, c) + it = mupdf.pdf_to_name(o) + return (type_, c, it) + + def update(self, + blend_mode: OptStr =None, + opacity: OptFloat =None, + fontsize: float =0, + fontname: OptStr =None, + text_color: OptSeq =None, + border_color: OptSeq =None, + fill_color: OptSeq =None, + cross_out: bool =True, + rotate: int =-1, + ): + """Update annot appearance. + + Notes: + Depending on the annot type, some parameters make no sense, + while others are only available in this method to achieve the + desired result. This is especially true for 'FreeText' annots. + Args: + blend_mode: set the blend mode, all annotations. + opacity: set the opacity, all annotations. + fontsize: set fontsize, 'FreeText' only. + fontname: set the font, 'FreeText' only. + border_color: set border color, 'FreeText' only. + text_color: set text color, 'FreeText' only. + fill_color: set fill color, all annotations. + cross_out: draw diagonal lines, 'Redact' only. + rotate: set rotation, 'FreeText' and some others. + """ + annot_obj = mupdf.pdf_annot_obj(self.this) + + if border_color: + is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC")) + if not is_rich_text: + raise ValueError("cannot set border_color if rich_text is False") + Annot.update_timing_test() + CheckParent(self) + def color_string(cs, code): + """Return valid PDF color operator for a given color sequence. + """ + cc = ColorCode(cs, code) + if not cc: + return b"" + return (cc + "\n").encode() + + annot_type = self.type[0] # get the annot type + + dt = self.border.get("dashes", None) # get the dashes spec + bwidth = self.border.get("width", -1) # get border line width + stroke = self.colors["stroke"] # get the stroke color + if fill_color is not None: + fill = fill_color + else: + fill = self.colors["fill"] + rect = None # self.rect # prevent MuPDF fiddling with it + apnmat = self.apn_matrix # prevent MuPDF fiddling with it + if rotate != -1: # sanitize rotation value + while rotate < 0: + rotate += 360 + while rotate >= 360: + rotate -= 360 + if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0: + rotate = 0 + + #------------------------------------------------------------------ + # handle opacity and blend mode + #------------------------------------------------------------------ + if blend_mode is None: + blend_mode = self.blendmode + if not hasattr(opacity, "__float__"): + opacity = self.opacity + + if 0 <= opacity < 1 or blend_mode: + opa_code = "/H gs\n" # then we must reference this 'gs' + else: + opa_code = "" + + if annot_type == mupdf.PDF_ANNOT_FREE_TEXT: + CheckColor(text_color) + CheckColor(fill_color) + tcol, fname, fsize = TOOLS._parse_da(self) + + # read and update default appearance as necessary + if fsize <= 0: + fsize = 12 + if text_color: + tcol = text_color + if fontname: + fname = fontname + if fontsize > 0: + fsize = fontsize + JM_make_annot_DA(self, len(tcol), tcol, fname, fsize) + blend_mode = None # not supported for free text annotations! + + #------------------------------------------------------------------ + # now invoke MuPDF to update the annot appearance + #------------------------------------------------------------------ + val = self._update_appearance( + opacity=opacity, + blend_mode=blend_mode, + fill_color=fill, + rotate=rotate, + ) + if val is False: + raise RuntimeError("Error updating annotation.") + + if annot_type == mupdf.PDF_ANNOT_FREE_TEXT: + # in absence of previous opacity, we may need to modify the AP + ap = self._getAP() + if 0 <= opacity < 1 and not ap.startswith(b"/H gs"): + self._setAP(b"/H gs\n" + ap) + return + + bfill = color_string(fill, "f") + bstroke = color_string(stroke, "c") + + p_ctm = self.get_parent().transformation_matrix + imat = ~p_ctm # inverse page transf. matrix + + if dt: + dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n" + dashes = dashes.encode("utf-8") + else: + dashes = None + + if self.line_ends: + line_end_le, line_end_ri = self.line_ends + else: + line_end_le, line_end_ri = 0, 0 # init line end codes + + # read contents as created by MuPDF + ap = self._getAP() + ap_tab = ap.splitlines() # split in single lines + ap_updated = False # assume we did nothing + + if annot_type == mupdf.PDF_ANNOT_REDACT: + if cross_out: # create crossed-out rect + ap_updated = True + ap_tab = ap_tab[:-1] + _, LL, LR, UR, UL = ap_tab + ap_tab.append(LR) + ap_tab.append(LL) + ap_tab.append(UR) + ap_tab.append(LL) + ap_tab.append(UL) + ap_tab.append(b"S") + + if bwidth > 0 or bstroke != b"": + ap_updated = True + ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else [] + for line in ap_tab: + if line.endswith(b"w"): + continue + if line.endswith(b"RG") and bstroke != b"": + line = bstroke[:-1] + ntab.append(line) + ap_tab = ntab + + ap = b"\n".join(ap_tab) + + if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE): + ap = b"\n".join(ap_tab[:-1]) + b"\n" + ap_updated = True + if bfill != b"": + if annot_type == mupdf.PDF_ANNOT_POLYGON: + ap = ap + bfill + b"b" # close, fill, and stroke + elif annot_type == mupdf.PDF_ANNOT_POLY_LINE: + ap = ap + b"S" # stroke + else: + if annot_type == mupdf.PDF_ANNOT_POLYGON: + ap = ap + b"s" # close and stroke + elif annot_type == mupdf.PDF_ANNOT_POLY_LINE: + ap = ap + b"S" # stroke + + if dashes is not None: # handle dashes + ap = dashes + ap + # reset dashing - only applies for LINE annots with line ends given + ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1) + ap_updated = True + + if opa_code: + ap = opa_code.encode("utf-8") + ap + ap_updated = True + + ap = b"q\n" + ap + b"\nQ\n" + #---------------------------------------------------------------------- + # the following handles line end symbols for 'Polygon' and 'Polyline' + #---------------------------------------------------------------------- + if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE): + + le_funcs = (None, TOOLS._le_square, TOOLS._le_circle, + TOOLS._le_diamond, TOOLS._le_openarrow, + TOOLS._le_closedarrow, TOOLS._le_butt, + TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow, + TOOLS._le_slash) + le_funcs_range = range(1, len(le_funcs)) + d = 2 * max(1, self.border["width"]) + rect = self.rect + (-d, -d, d, d) + ap_updated = True + points = self.vertices + if line_end_le in le_funcs_range: + p1 = Point(points[0]) * imat + p2 = Point(points[1]) * imat + left = le_funcs[line_end_le](self, p1, p2, False, fill_color) + ap += left.encode() + if line_end_ri in le_funcs_range: + p1 = Point(points[-2]) * imat + p2 = Point(points[-1]) * imat + left = le_funcs[line_end_ri](self, p1, p2, True, fill_color) + ap += left.encode() + + if ap_updated: + if rect: # rect modified here? + self.set_rect(rect) + self._setAP(ap, rect=1) + else: + self._setAP(ap, rect=0) + + #------------------------------- + # handle annotation rotations + #------------------------------- + if annot_type not in ( # only these types are supported + mupdf.PDF_ANNOT_CARET, + mupdf.PDF_ANNOT_CIRCLE, + mupdf.PDF_ANNOT_FILE_ATTACHMENT, + mupdf.PDF_ANNOT_INK, + mupdf.PDF_ANNOT_LINE, + mupdf.PDF_ANNOT_POLY_LINE, + mupdf.PDF_ANNOT_POLYGON, + mupdf.PDF_ANNOT_SQUARE, + mupdf.PDF_ANNOT_STAMP, + mupdf.PDF_ANNOT_TEXT, + ): + return + + rot = self.rotation # get value from annot object + if rot == -1: # nothing to change + return + + M = (self.rect.tl + self.rect.br) / 2 # center of annot rect + + if rot == 0: # undo rotations + if abs(apnmat - Matrix(1, 1)) < 1e-5: + return # matrix already is a no-op + quad = self.rect.morph(M, ~apnmat) # derotate rect + self.setRect(quad.rect) + self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op + return + + mat = Matrix(rot) + quad = self.rect.morph(M, mat) + self.set_rect(quad.rect) + self.set_apn_matrix(apnmat * mat) + + def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None): + """Update attached file.""" + CheckParent(self) + annot = self.this + annot_obj = mupdf.pdf_annot_obj(annot) + pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF + type = mupdf.pdf_annot_type(annot) + if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT: + raise TypeError( MSG_BAD_ANNOT_TYPE) + stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F')) + # the object for file content + if not stream.m_internal: + RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError) + + fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS')) + + # file content given + res = JM_BufferFromBytes(buffer_) + if buffer_ and not res.m_internal: + raise ValueError( MSG_BAD_BUFFER) + if res: + JM_update_stream(pdf, stream, res, 1) + # adjust /DL and /Size parameters + len, _ = mupdf.fz_buffer_storage(res) + l = mupdf.pdf_new_int(len) + mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l) + mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size')) + + if filename: + mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename) + mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename) + mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename) + mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename) + mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename) + + if ufilename: + mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename) + mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename) + + if desc: + mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc) + mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc) + + @staticmethod + def update_timing_test(): + total = 0 + for i in range( 30*1000): + total += i + return total + + @property + def vertices(self): + """annotation vertex points""" + CheckParent(self) + annot = self.this + assert isinstance(annot, mupdf.PdfAnnot) + annot_obj = mupdf.pdf_annot_obj(annot) + page = _pdf_annot_page(annot) + page_ctm = mupdf.FzMatrix() # page transformation matrix + dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform(). + mupdf.pdf_page_transform(page, dummy, page_ctm) + derot = JM_derotate_page_matrix(page) + page_ctm = mupdf.fz_concat(page_ctm, derot) + + #---------------------------------------------------------------- + # The following objects occur in different annotation types. + # So we are sure that (!o) occurs at most once. + # Every pair of floats is one point, that needs to be separately + # transformed with the page transformation matrix. + #---------------------------------------------------------------- + o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices')) + if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L')) + if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints')) + if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL') + + if o.m_internal: + # handle lists with 1-level depth + # weiter + res = [] + for i in range(0, mupdf.pdf_array_len(o), 2): + x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i)) + y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1)) + point = mupdf.FzPoint(x, y) + point = mupdf.fz_transform_point(point, page_ctm) + res.append( (point.x, point.y)) + return res + + o = mupdf.pdf_dict_gets(annot_obj, 'InkList') + if o.m_internal: + # InkList has 2-level lists + #inklist: + res = [] + for i in range(mupdf.pdf_array_len(o)): + res1 = [] + o1 = mupdf.pdf_array_get(o, i) + for j in range(0, mupdf.pdf_array_len(o1), 2): + x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j)) + y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1)) + point = mupdf.FzPoint(x, y) + point = mupdf.fz_transform_point(point, page_ctm) + res1.append( (point.x, point.y)) + res.append(res1) + return res + + @property + def xref(self): + """annotation xref number""" + CheckParent(self) + annot = self.this + return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)) + + +class Archive: + def __init__( self, *args): + ''' + Archive(dirname [, path]) - from folder + Archive(file [, path]) - from file name or object + Archive(data, name) - from memory item + Archive() - empty archive + Archive(archive [, path]) - from archive + ''' + self._subarchives = list() + self.this = mupdf.fz_new_multi_archive() + if args: + self.add( *args) + + def __repr__( self): + return f'Archive, sub-archives: {len(self._subarchives)}' + + def _add_arch( self, subarch, path=None): + mupdf.fz_mount_multi_archive( self.this, subarch, path) + + def _add_dir( self, folder, path=None): + sub = mupdf.fz_open_directory( folder) + mupdf.fz_mount_multi_archive( self.this, sub, path) + + def _add_treeitem( self, memory, name, path=None): + buff = JM_BufferFromBytes( memory) + sub = mupdf.fz_new_tree_archive( mupdf.FzTree()) + mupdf.fz_tree_archive_add_buffer( sub, name, buff) + mupdf.fz_mount_multi_archive( self.this, sub, path) + + def _add_ziptarfile( self, filepath, type_, path=None): + if type_ == 1: + sub = mupdf.fz_open_zip_archive( filepath) + else: + sub = mupdf.fz_open_tar_archive( filepath) + mupdf.fz_mount_multi_archive( self.this, sub, path) + + def _add_ziptarmemory( self, memory, type_, path=None): + buff = JM_BufferFromBytes( memory) + stream = mupdf.fz_open_buffer( buff) + if type_==1: + sub = mupdf.fz_open_zip_archive_with_stream( stream) + else: + sub = mupdf.fz_open_tar_archive_with_stream( stream) + mupdf.fz_mount_multi_archive( self.this, sub, path) + + def add( self, content, path=None): + ''' + Add a sub-archive. + + Args: + content: + The content to be added. May be one of: + `str` - must be path of directory or file. + `bytes`, `bytearray`, `io.BytesIO` - raw data. + `zipfile.Zipfile`. + `tarfile.TarFile`. + `pymupdf.Archive`. + A two-item tuple `(data, name)`. + List or tuple (but not tuple with length 2) of the above. + path: (str) a "virtual" path name, under which the elements + of content can be retrieved. Use it to e.g. cope with + duplicate element names. + ''' + def is_binary_data(x): + return isinstance(x, (bytes, bytearray, io.BytesIO)) + + def make_subarch(entries, mount, fmt): + subarch = dict(fmt=fmt, entries=entries, path=mount) + if fmt != "tree" or self._subarchives == []: + self._subarchives.append(subarch) + else: + ltree = self._subarchives[-1] + if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]: + self._subarchives.append(subarch) + else: + ltree["entries"].extend(subarch["entries"]) + self._subarchives[-1] = ltree + + if isinstance(content, pathlib.Path): + content = str(content) + + if isinstance(content, str): + if os.path.isdir(content): + self._add_dir(content, path) + return make_subarch(os.listdir(content), path, 'dir') + elif os.path.isfile(content): + assert isinstance(path, str) and path != '', \ + f'Need name for binary content, but {path=}.' + with io.open(content, 'rb') as f: + ff = f.read() + self._add_treeitem(ff, path) + return make_subarch([path], None, 'tree') + else: + raise ValueError(f'Not a file or directory: {content!r}') + + elif is_binary_data(content): + assert isinstance(path, str) and path != '' \ + f'Need name for binary content, but {path=}.' + self._add_treeitem(content, path) + return make_subarch([path], None, 'tree') + + elif isinstance(content, zipfile.ZipFile): + filename = getattr(content, "filename", None) + if filename is None: + fp = content.fp.getvalue() + self._add_ziptarmemory(fp, 1, path) + else: + self._add_ziptarfile(filename, 1, path) + return make_subarch(content.namelist(), path, 'zip') + + elif isinstance(content, tarfile.TarFile): + filename = getattr(content.fileobj, "name", None) + if filename is None: + fp = content.fileobj + if not isinstance(fp, io.BytesIO): + fp = fp.fileobj + self._add_ziptarmemory(fp.getvalue(), 0, path) + else: + self._add_ziptarfile(filename, 0, path) + return make_subarch(content.getnames(), path, 'tar') + + elif isinstance(content, Archive): + self._add_arch(content, path) + return make_subarch([], path, 'multi') + + if isinstance(content, tuple) and len(content) == 2: + # covers the tree item plus path + data, name = content + assert isinstance(name, str), f'Unexpected {type(name)=}' + if is_binary_data(data): + self._add_treeitem(data, name, path=path) + elif isinstance(data, str): + if os.path.isfile(data): + with io.open(data, 'rb') as f: + ff = f.read() + self._add_treeitem(ff, name, path=path) + else: + assert 0, f'Unexpected {type(data)=}.' + return make_subarch([name], path, 'tree') + + elif hasattr(content, '__getitem__'): + # Deal with sequence of disparate items. + for item in content: + self.add(item, path) + return + + else: + raise TypeError(f'Unrecognised type {type(content)}.') + assert 0 + + @property + def entry_list( self): + ''' + List of sub archives. + ''' + return self._subarchives + + def has_entry( self, name): + return mupdf.fz_has_archive_entry( self.this, name) + + def read_entry( self, name): + buff = mupdf.fz_read_archive_entry( self.this, name) + return JM_BinFromBuffer( buff) + + +class Xml: + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def __init__(self, rhs): + if isinstance(rhs, mupdf.FzXml): + self.this = rhs + elif isinstance(rhs, str): + buff = mupdf.fz_new_buffer_from_copied_data(rhs) + self.this = mupdf.fz_parse_xml_from_html5(buff) + else: + assert 0, f'Unsupported type for rhs: {type(rhs)}' + + def _get_node_tree( self): + def show_node(node, items, shift): + while node is not None: + if node.is_text: + items.append((shift, f'"{node.text}"')) + node = node.next + continue + items.append((shift, f"({node.tagname}")) + for k, v in node.get_attributes().items(): + items.append((shift, f"={k} '{v}'")) + child = node.first_child + if child: + items = show_node(child, items, shift + 1) + items.append((shift, f"){node.tagname}")) + node = node.next + return items + + shift = 0 + items = [] + items = show_node(self, items, shift) + return items + + def add_bullet_list(self): + """Add bulleted list ("ul" tag)""" + child = self.create_element("ul") + self.append_child(child) + return child + + def add_class(self, text): + """Set some class via CSS. Replaces complete class spec.""" + cls = self.get_attribute_value("class") + if cls is not None and text in cls: + return self + self.remove_attribute("class") + if cls is None: + cls = text + else: + cls += " " + text + self.set_attribute("class", cls) + return self + + def add_code(self, text=None): + """Add a "code" tag""" + child = self.create_element("code") + if type(text) is str: + child.append_child(self.create_text_node(text)) + prev = self.span_bottom() + if prev is None: + prev = self + prev.append_child(child) + return self + + def add_codeblock(self): + """Add monospaced lines ("pre" node)""" + child = self.create_element("pre") + self.append_child(child) + return child + + def add_description_list(self): + """Add description list ("dl" tag)""" + child = self.create_element("dl") + self.append_child(child) + return child + + def add_division(self): + """Add "div" tag""" + child = self.create_element("div") + self.append_child(child) + return child + + def add_header(self, level=1): + """Add header tag""" + if level not in range(1, 7): + raise ValueError("Header level must be in [1, 6]") + this_tag = self.tagname + new_tag = f"h{level}" + child = self.create_element(new_tag) + if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"): + self.append_child(child) + return child + self.parent.append_child(child) + return child + + def add_horizontal_line(self): + """Add horizontal line ("hr" tag)""" + child = self.create_element("hr") + self.append_child(child) + return child + + def add_image(self, name, width=None, height=None, imgfloat=None, align=None): + """Add image node (tag "img").""" + child = self.create_element("img") + if width is not None: + child.set_attribute("width", f"{width}") + if height is not None: + child.set_attribute("height", f"{height}") + if imgfloat is not None: + child.set_attribute("style", f"float: {imgfloat}") + if align is not None: + child.set_attribute("align", f"{align}") + child.set_attribute("src", f"{name}") + self.append_child(child) + return child + + def add_link(self, href, text=None): + """Add a hyperlink ("a" tag)""" + child = self.create_element("a") + if not isinstance(text, str): + text = href + child.set_attribute("href", href) + child.append_child(self.create_text_node(text)) + prev = self.span_bottom() + if prev is None: + prev = self + prev.append_child(child) + return self + + def add_list_item(self): + """Add item ("li" tag) under a (numbered or bulleted) list.""" + if self.tagname not in ("ol", "ul"): + raise ValueError("cannot add list item to", self.tagname) + child = self.create_element("li") + self.append_child(child) + return child + + def add_number_list(self, start=1, numtype=None): + """Add numbered list ("ol" tag)""" + child = self.create_element("ol") + if start > 1: + child.set_attribute("start", str(start)) + if numtype is not None: + child.set_attribute("type", numtype) + self.append_child(child) + return child + + def add_paragraph(self): + """Add "p" tag""" + child = self.create_element("p") + if self.tagname != "p": + self.append_child(child) + else: + self.parent.append_child(child) + return child + + def add_span(self): + child = self.create_element("span") + self.append_child(child) + return child + + def add_style(self, text): + """Set some style via CSS style. Replaces complete style spec.""" + style = self.get_attribute_value("style") + if style is not None and text in style: + return self + self.remove_attribute("style") + if style is None: + style = text + else: + style += ";" + text + self.set_attribute("style", style) + return self + + def add_subscript(self, text=None): + """Add a subscript ("sub" tag)""" + child = self.create_element("sub") + if type(text) is str: + child.append_child(self.create_text_node(text)) + prev = self.span_bottom() + if prev is None: + prev = self + prev.append_child(child) + return self + + def add_superscript(self, text=None): + """Add a superscript ("sup" tag)""" + child = self.create_element("sup") + if type(text) is str: + child.append_child(self.create_text_node(text)) + prev = self.span_bottom() + if prev is None: + prev = self + prev.append_child(child) + return self + + def add_text(self, text): + """Add text. Line breaks are honored.""" + lines = text.splitlines() + line_count = len(lines) + prev = self.span_bottom() + if prev is None: + prev = self + + for i, line in enumerate(lines): + prev.append_child(self.create_text_node(line)) + if i < line_count - 1: + prev.append_child(self.create_element("br")) + return self + + def append_child( self, child): + mupdf.fz_dom_append_child( self.this, child.this) + + def append_styled_span(self, style): + span = self.create_element("span") + span.add_style(style) + prev = self.span_bottom() + if prev is None: + prev = self + prev.append_child(span) + return prev + + def bodytag( self): + return Xml( mupdf.fz_dom_body( self.this)) + + def clone( self): + ret = mupdf.fz_dom_clone( self.this) + return Xml( ret) + + @staticmethod + def color_text(color): + if type(color) is str: + return color + if type(color) is int: + return f"rgb({sRGB_to_rgb(color)})" + if type(color) in (tuple, list): + return f"rgb{tuple(color)}" + return color + + def create_element( self, tag): + return Xml( mupdf.fz_dom_create_element( self.this, tag)) + + def create_text_node( self, text): + return Xml( mupdf.fz_dom_create_text_node( self.this, text)) + + def debug(self): + """Print a list of the node tree below self.""" + items = self._get_node_tree() + for item in items: + message(" " * item[0] + item[1].replace("\n", "\\n")) + + def find( self, tag, att, match): + ret = mupdf.fz_dom_find( self.this, tag, att, match) + if ret.m_internal: + return Xml( ret) + + def find_next( self, tag, att, match): + ret = mupdf.fz_dom_find_next( self.this, tag, att, match) + if ret.m_internal: + return Xml( ret) + + @property + def first_child( self): + if mupdf.fz_xml_text( self.this): + # text node, has no child. + return + ret = mupdf.fz_dom_first_child( self) + if ret.m_internal: + return Xml( ret) + + def get_attribute_value( self, key): + assert key + return mupdf.fz_dom_attribute( self.this, key) + + def get_attributes( self): + if mupdf.fz_xml_text( self.this): + # text node, has no attributes. + return + result = dict() + i = 0 + while 1: + val, key = mupdf.fz_dom_get_attribute( self.this, i) + if not val or not key: + break + result[ key] = val + i += 1 + return result + + def insert_after( self, node): + mupdf.fz_dom_insert_after( self.this, node.this) + + def insert_before( self, node): + mupdf.fz_dom_insert_before( self.this, node.this) + + def insert_text(self, text): + lines = text.splitlines() + line_count = len(lines) + for i, line in enumerate(lines): + self.append_child(self.create_text_node(line)) + if i < line_count - 1: + self.append_child(self.create_element("br")) + return self + + @property + def is_text(self): + """Check if this is a text node.""" + return self.text is not None + + @property + def last_child(self): + """Return last child node.""" + child = self.first_child + if child is None: + return None + while True: + next = child.next + if not next: + return child + child = next + + @property + def next( self): + ret = mupdf.fz_dom_next( self.this) + if ret.m_internal: + return Xml( ret) + + @property + def parent( self): + ret = mupdf.fz_dom_parent( self.this) + if ret.m_internal: + return Xml( ret) + + @property + def previous( self): + ret = mupdf.fz_dom_previous( self.this) + if ret.m_internal: + return Xml( ret) + + def remove( self): + mupdf.fz_dom_remove( self.this) + + def remove_attribute( self, key): + assert key + mupdf.fz_dom_remove_attribute( self.this, key) + + @property + def root( self): + return Xml( mupdf.fz_xml_root( self.this)) + + def set_align(self, align): + """Set text alignment via CSS style""" + text = "text-align: %s" + if isinstance( align, str): + t = align + elif align == TEXT_ALIGN_LEFT: + t = "left" + elif align == TEXT_ALIGN_CENTER: + t = "center" + elif align == TEXT_ALIGN_RIGHT: + t = "right" + elif align == TEXT_ALIGN_JUSTIFY: + t = "justify" + else: + raise ValueError(f"Unrecognised {align=}") + text = text % t + self.add_style(text) + return self + + def set_attribute( self, key, value): + assert key + mupdf.fz_dom_add_attribute( self.this, key, value) + + def set_bgcolor(self, color): + """Set background color via CSS style""" + text = f"background-color: %s" % self.color_text(color) + self.add_style(text) # does not work on span level + return self + + def set_bold(self, val=True): + """Set bold on / off via CSS style""" + if val: + val="bold" + else: + val="normal" + text = "font-weight: %s" % val + self.append_styled_span(text) + return self + + def set_color(self, color): + """Set text color via CSS style""" + text = f"color: %s" % self.color_text(color) + self.append_styled_span(text) + return self + + def set_columns(self, cols): + """Set number of text columns via CSS style""" + text = f"columns: {cols}" + self.append_styled_span(text) + return self + + def set_font(self, font): + """Set font-family name via CSS style""" + text = "font-family: %s" % font + self.append_styled_span(text) + return self + + def set_fontsize(self, fontsize): + """Set font size name via CSS style""" + if type(fontsize) is str: + px="" + else: + px="px" + text = f"font-size: {fontsize}{px}" + self.append_styled_span(text) + return self + + def set_id(self, unique): + """Set a unique id.""" + # check uniqueness + root = self.root + if root.find(None, "id", unique): + raise ValueError(f"id '{unique}' already exists") + self.set_attribute("id", unique) + return self + + def set_italic(self, val=True): + """Set italic on / off via CSS style""" + if val: + val="italic" + else: + val="normal" + text = "font-style: %s" % val + self.append_styled_span(text) + return self + + def set_leading(self, leading): + """Set inter-line spacing value via CSS style - block-level only.""" + text = f"-mupdf-leading: {leading}" + self.add_style(text) + return self + + def set_letter_spacing(self, spacing): + """Set inter-letter spacing value via CSS style""" + text = f"letter-spacing: {spacing}" + self.append_styled_span(text) + return self + + def set_lineheight(self, lineheight): + """Set line height name via CSS style - block-level only.""" + text = f"line-height: {lineheight}" + self.add_style(text) + return self + + def set_margins(self, val): + """Set margin values via CSS style""" + text = "margins: %s" % val + self.append_styled_span(text) + return self + + def set_opacity(self, opacity): + """Set opacity via CSS style""" + text = f"opacity: {opacity}" + self.append_styled_span(text) + return self + + def set_pagebreak_after(self): + """Insert a page break after this node.""" + text = "page-break-after: always" + self.add_style(text) + return self + + def set_pagebreak_before(self): + """Insert a page break before this node.""" + text = "page-break-before: always" + self.add_style(text) + return self + + def set_properties( + self, + align=None, + bgcolor=None, + bold=None, + color=None, + columns=None, + font=None, + fontsize=None, + indent=None, + italic=None, + leading=None, + letter_spacing=None, + lineheight=None, + margins=None, + pagebreak_after=None, + pagebreak_before=None, + word_spacing=None, + unqid=None, + cls=None, + ): + """Set any or all properties of a node. + + To be used for existing nodes preferably. + """ + root = self.root + temp = root.add_division() + if align is not None: + temp.set_align(align) + if bgcolor is not None: + temp.set_bgcolor(bgcolor) + if bold is not None: + temp.set_bold(bold) + if color is not None: + temp.set_color(color) + if columns is not None: + temp.set_columns(columns) + if font is not None: + temp.set_font(font) + if fontsize is not None: + temp.set_fontsize(fontsize) + if indent is not None: + temp.set_text_indent(indent) + if italic is not None: + temp.set_italic(italic) + if leading is not None: + temp.set_leading(leading) + if letter_spacing is not None: + temp.set_letter_spacing(letter_spacing) + if lineheight is not None: + temp.set_lineheight(lineheight) + if margins is not None: + temp.set_margins(margins) + if pagebreak_after is not None: + temp.set_pagebreak_after() + if pagebreak_before is not None: + temp.set_pagebreak_before() + if word_spacing is not None: + temp.set_word_spacing(word_spacing) + if unqid is not None: + self.set_id(unqid) + if cls is not None: + self.add_class(cls) + + styles = [] + top_style = temp.get_attribute_value("style") + if top_style is not None: + styles.append(top_style) + child = temp.first_child + while child: + styles.append(child.get_attribute_value("style")) + child = child.first_child + self.set_attribute("style", ";".join(styles)) + temp.remove() + return self + + def set_text_indent(self, indent): + """Set text indentation name via CSS style - block-level only.""" + text = f"text-indent: {indent}" + self.add_style(text) + return self + + def set_underline(self, val="underline"): + text = "text-decoration: %s" % val + self.append_styled_span(text) + return self + + def set_word_spacing(self, spacing): + """Set inter-word spacing value via CSS style""" + text = f"word-spacing: {spacing}" + self.append_styled_span(text) + return self + + def span_bottom(self): + """Find deepest level in stacked spans.""" + parent = self + child = self.last_child + if child is None: + return None + while child.is_text: + child = child.previous + if child is None: + break + if child is None or child.tagname != "span": + return None + + while True: + if child is None: + return parent + if child.tagname in ("a", "sub","sup","body") or child.is_text: + child = child.next + continue + if child.tagname == "span": + parent = child + child = child.first_child + else: + return parent + + @property + def tagname( self): + return mupdf.fz_xml_tag( self.this) + + @property + def text( self): + return mupdf.fz_xml_text( self.this) + + add_var = add_code + add_samp = add_code + add_kbd = add_code + + +class Colorspace: + + def __init__(self, type_): + """Supported are GRAY, RGB and CMYK.""" + if isinstance( type_, mupdf.FzColorspace): + self.this = type_ + elif type_ == CS_GRAY: + self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY) + elif type_ == CS_CMYK: + self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK) + elif type_ == CS_RGB: + self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB) + else: + self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB) + + def __repr__(self): + x = ("", "GRAY", "", "RGB", "CMYK")[self.n] + return "Colorspace(CS_%s) - %s" % (x, self.name) + + def _name(self): + return mupdf.fz_colorspace_name(self.this) + + @property + def n(self): + """Size of one pixel.""" + return mupdf.fz_colorspace_n(self.this) + + @property + def name(self): + """Name of the Colorspace.""" + return self._name() + + +class DeviceWrapper: + def __init__(self, *args): + if args_match( args, mupdf.FzDevice): + device, = args + self.this = device + elif args_match( args, Pixmap, None): + pm, clip = args + bbox = JM_irect_from_py( clip) + if mupdf.fz_is_infinite_irect( bbox): + self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm) + else: + self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox) + elif args_match( args, mupdf.FzDisplayList): + dl, = args + self.this = mupdf.fz_new_list_device( dl) + elif args_match( args, mupdf.FzStextPage, None): + tp, flags = args + opts = mupdf.FzStextOptions( flags) + self.this = mupdf.fz_new_stext_device( tp, opts) + else: + raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}') + + +class DisplayList: + def __del__(self): + if not type(self) is DisplayList: return + self.thisown = False + + def __init__(self, *args): + if len(args) == 1 and isinstance(args[0], mupdf.FzRect): + self.this = mupdf.FzDisplayList(args[0]) + elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList): + self.this = args[0] + else: + assert 0, f'Unrecognised {args=}' + + def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None): + if isinstance(colorspace, Colorspace): + colorspace = colorspace.this + else: + colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB) + val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None) + val.thisown = True + return val + + def get_textpage(self, flags=3): + """Make a TextPage from a DisplayList.""" + stext_options = mupdf.FzStextOptions() + stext_options.flags = flags + val = mupdf.FzStextPage(self.this, stext_options) + val.thisown = True + return val + + @property + def rect(self): + val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this)) + val = Rect(val) + return val + + def run(self, dw, m, area): + mupdf.fz_run_display_list( + self.this, + dw.device, + JM_matrix_from_py(m), + JM_rect_from_py(area), + mupdf.FzCookie(), + ) + +if g_use_extra: + extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf + + +class Document: + + def __contains__(self, loc) -> bool: + if type(loc) is int: + if loc < self.page_count: + return True + return False + if type(loc) not in (tuple, list) or len(loc) != 2: + return False + chapter, pno = loc + if (0 + or not isinstance(chapter, int) + or chapter < 0 + or chapter >= self.chapter_count + ): + return False + if (0 + or not isinstance(pno, int) + or pno < 0 + or pno >= self.chapter_page_count(chapter) + ): + return False + return True + + def __delitem__(self, i)->None: + if not self.is_pdf: + raise ValueError("is no PDF") + if type(i) is int: + return self.delete_page(i) + if type(i) in (list, tuple, range): + return self.delete_pages(i) + if type(i) is not slice: + raise ValueError("bad argument type") + pc = self.page_count + start = i.start if i.start else 0 + stop = i.stop if i.stop else pc + step = i.step if i.step else 1 + while start < 0: + start += pc + if start >= pc: + raise ValueError("bad page number(s)") + while stop < 0: + stop += pc + if stop > pc: + raise ValueError("bad page number(s)") + return self.delete_pages(range(start, stop, step)) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + @typing.overload + def __getitem__(self, i: int = 0) -> Page: + ... + + if sys.version_info >= (3, 9): + @typing.overload + def __getitem__(self, i: slice) -> list[Page]: + ... + + @typing.overload + def __getitem__(self, i: tuple[int, int]) -> Page: + ... + + def __getitem__(self, i=0): + if isinstance(i, slice): + return [self[j] for j in range(*i.indices(len(self)))] + assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \ + f'Invalid item number: {i=}.' + if i not in self: + raise IndexError(f"page {i} not in document") + return self.load_page(i) + + def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11): + """Creates a document. Use 'open' as a synonym. + + Notes: + Basic usages: + open() - new PDF document + open(filename) - string or pathlib.Path, must have supported + file extension. + open(type, buffer) - type: valid extension, buffer: bytes object. + open(stream=buffer, filetype=type) - keyword version of previous. + open(filename, fileype=type) - filename with unrecognized extension. + rect, width, height, fontsize: layout reflowable document + on open (e.g. EPUB). Ignored if n/a. + """ + # We temporarily set JM_mupdf_show_errors=0 while we are constructing, + # then restore its original value in a `finally:` block. + # + global JM_mupdf_show_errors + JM_mupdf_show_errors_old = JM_mupdf_show_errors + JM_mupdf_show_errors = 0 + + try: + self.is_closed = False + self.is_encrypted = False + self.is_encrypted = False + self.metadata = None + self.FontInfos = [] + self.Graftmaps = {} + self.ShownPages = {} + self.InsertedImages = {} + self._page_refs = weakref.WeakValueDictionary() + if isinstance(filename, mupdf.PdfDocument): + pdf_document = filename + self.this = pdf_document + self.this_is_pdf = True + return + + w = width + h = height + r = JM_rect_from_py(rect) + if not mupdf.fz_is_infinite_rect(r): + w = r.x1 - r.x0 + h = r.y1 - r.y0 + + self._name = filename + self.stream = stream + + if stream is not None: + if filename is not None and filetype is None: + # 2025-05-06: Use as the filetype. This is + # reversing precedence - we used to use if both + # were set. + filetype = filename + if isinstance(stream, (bytes, memoryview)): + pass + elif isinstance(stream, bytearray): + stream = bytes(stream) + elif isinstance(stream, io.BytesIO): + stream = stream.getvalue() + else: + raise TypeError(f"bad stream: {type(stream)=}.") + self.stream = stream + + assert isinstance(stream, (bytes, memoryview)) + if len(stream) == 0: + # MuPDF raise an exception for this but also generates + # warnings, which is not very helpful for us. So instead we + # raise a specific exception. + raise EmptyFileError('Cannot open empty stream.') + + stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream)) + try: + doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2) + except Exception as e: + if g_exceptions_verbose > 1: exception_info() + raise FileDataError('Failed to open stream') from e + + elif filename: + assert not stream + if isinstance(filename, str): + pass + elif hasattr(filename, "absolute"): + filename = str(filename) + elif hasattr(filename, "name"): + filename = filename.name + else: + raise TypeError(f"bad filename: {type(filename)=} {filename=}.") + self._name = filename + + # Generate our own specific exceptions. This avoids MuPDF + # generating warnings etc. + if not os.path.exists(filename): + raise FileNotFoundError(f"no such file: '{filename}'") + elif not os.path.isfile(filename): + raise FileDataError(f"'{filename}' is no file") + elif os.path.getsize(filename) == 0: + raise EmptyFileError(f'Cannot open empty file: {filename=}.') + + if filetype: + # Override the type implied by . MuPDF does not + # have a way to do this directly so we open via a stream. + try: + fz_stream = mupdf.fz_open_file(filename) + doc = mupdf.fz_open_document_with_stream(filetype, fz_stream) + except Exception as e: + if g_exceptions_verbose > 1: exception_info() + raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e + else: + try: + doc = mupdf.fz_open_document(filename) + except Exception as e: + if g_exceptions_verbose > 1: exception_info() + raise FileDataError(f'Failed to open file {filename!r}.') from e + + else: + pdf = mupdf.PdfDocument() + doc = mupdf.FzDocument(pdf) + + if w > 0 and h > 0: + mupdf.fz_layout_document(doc, w, h, fontsize) + elif mupdf.fz_is_document_reflowable(doc): + mupdf.fz_layout_document(doc, 400, 600, 11) + + self.this = doc + + # fixme: not sure where self.thisown gets initialised in PyMuPDF. + # + self.thisown = True + + if self.thisown: + self._graft_id = TOOLS.gen_id() + if self.needs_pass: + self.is_encrypted = True + else: # we won't init until doc is decrypted + self.init_doc() + # the following hack detects invalid/empty SVG files, which else may lead + # to interpreter crashes + if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower(): + try: + _ = self.convert_to_pdf() # this seems to always work + except Exception as e: + if g_exceptions_verbose > 1: exception_info() + raise FileDataError("cannot open broken document") from e + + if g_use_extra: + self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument) + if self.this_is_pdf: + self.page_count2 = extra.page_count_pdf + else: + self.page_count2 = extra.page_count_fz + finally: + JM_mupdf_show_errors = JM_mupdf_show_errors_old + + def __len__(self) -> int: + return self.page_count + + def __repr__(self) -> str: + m = "closed " if self.is_closed else "" + if self.stream is None: + if self.name == "": + return m + "Document()" % self._graft_id + return m + "Document('%s')" % (self.name,) + return m + "Document('%s', )" % (self.name, self._graft_id) + + def _addFormFont(self, name, font): + """Add new form font.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return + fonts = mupdf.pdf_dict_getl( + mupdf.pdf_trailer( pdf), + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + PDF_NAME('DR'), + PDF_NAME('Font'), + ) + if not fonts.m_internal or not mupdf.pdf_is_dict( fonts): + raise RuntimeError( "PDF has no form fonts yet") + k = mupdf.pdf_new_name( name) + v = JM_pdf_obj_from_str( pdf, font) + mupdf.pdf_dict_put( fonts, k, v) + + def del_toc_item( + self, + idx: int, + ) -> None: + """Delete TOC / bookmark item by index.""" + xref = self.get_outline_xrefs()[idx] + self._remove_toc_item(xref) + + def _delToC(self): + """Delete the TOC.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + xrefs = [] # create Python list + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return xrefs # not a pdf + # get the main root + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) + # get the outline root + olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines')) + if not olroot.m_internal: + return xrefs # no outlines or some problem + + first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline + + xrefs = JM_outline_xrefs(first, xrefs) + xref_count = len(xrefs) + + olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root + mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root + mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root + + for i in range(xref_count): + _, xref = JM_INT_ITEM(xrefs, i) + mupdf.pdf_delete_object(pdf, xref) # delete outline item + xrefs.append(olroot_xref) + val = xrefs + self.init_doc() + return val + + def _delete_page(self, pno): + pdf = _as_pdf_document(self) + mupdf.pdf_delete_page( pdf, pno) + if pdf.m_internal.rev_page_map: + mupdf.ll_pdf_drop_page_tree( pdf.m_internal) + + def _deleteObject(self, xref): + """Delete object.""" + pdf = _as_pdf_document(self) + if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1): + raise ValueError( MSG_BAD_XREF) + mupdf.pdf_delete_object(pdf, xref) + + def _do_links( + doc1: 'Document', + doc2: 'Document', + from_page: int = -1, + to_page: int = -1, + start_at: int = -1, + ) -> None: + """Insert links contained in copied page range into destination PDF. + + Parameter values **must** equal those of method insert_pdf(), which must + have been previously executed. + """ + #pymupdf.log( 'utils.do_links()') + # -------------------------------------------------------------------------- + # internal function to create the actual "/Annots" object string + # -------------------------------------------------------------------------- + def cre_annot(lnk, xref_dst, pno_src, ctm): + """Create annotation object string for a passed-in link.""" + + r = lnk["from"] * ctm # rect in PDF coordinates + rect = _format_g(tuple(r)) + if lnk["kind"] == LINK_GOTO: + txt = annot_skel["goto1"] # annot_goto + idx = pno_src.index(lnk["page"]) + p = lnk["to"] * ctm # target point in PDF coordinates + annot = txt(xref_dst[idx], p.x, p.y, lnk["zoom"], rect) + + elif lnk["kind"] == LINK_GOTOR: + if lnk["page"] >= 0: + txt = annot_skel["gotor1"] # annot_gotor + pnt = lnk.get("to", Point(0, 0)) # destination point + if type(pnt) is not Point: + pnt = Point(0, 0) + annot = txt( + lnk["page"], + pnt.x, + pnt.y, + lnk["zoom"], + lnk["file"], + lnk["file"], + rect, + ) + else: + txt = annot_skel["gotor2"] # annot_gotor_n + to = get_pdf_str(lnk["to"]) + to = to[1:-1] + f = lnk["file"] + annot = txt(to, f, rect) + + elif lnk["kind"] == LINK_LAUNCH: + txt = annot_skel["launch"] # annot_launch + annot = txt(lnk["file"], lnk["file"], rect) + + elif lnk["kind"] == LINK_URI: + txt = annot_skel["uri"] # annot_uri + annot = txt(lnk["uri"], rect) + + else: + annot = "" + + return annot + + # -------------------------------------------------------------------------- + + # validate & normalize parameters + if from_page < 0: + fp = 0 + elif from_page >= doc2.page_count: + fp = doc2.page_count - 1 + else: + fp = from_page + + if to_page < 0 or to_page >= doc2.page_count: + tp = doc2.page_count - 1 + else: + tp = to_page + + if start_at < 0: + raise ValueError("'start_at' must be >= 0") + sa = start_at + + incr = 1 if fp <= tp else -1 # page range could be reversed + + # lists of source / destination page numbers + pno_src = list(range(fp, tp + incr, incr)) + pno_dst = [sa + i for i in range(len(pno_src))] + + # lists of source / destination page xrefs + xref_src = [] + xref_dst = [] + for i in range(len(pno_src)): + p_src = pno_src[i] + p_dst = pno_dst[i] + old_xref = doc2.page_xref(p_src) + new_xref = doc1.page_xref(p_dst) + xref_src.append(old_xref) + xref_dst.append(new_xref) + + # create the links for each copied page in destination PDF + for i in range(len(xref_src)): + page_src = doc2[pno_src[i]] # load source page + links = page_src.get_links() # get all its links + #log( '{pno_src=}') + #log( '{type(page_src)=}') + #log( '{page_src=}') + #log( '{=i len(links)}') + if len(links) == 0: # no links there + page_src = None + continue + ctm = ~page_src.transformation_matrix # calc page transformation matrix + page_dst = doc1[pno_dst[i]] # load destination page + link_tab = [] # store all link definitions here + for l in links: + if l["kind"] == LINK_GOTO and (l["page"] not in pno_src): + continue # GOTO link target not in copied pages + annot_text = cre_annot(l, xref_dst, pno_src, ctm) + if annot_text: + link_tab.append(annot_text) + if link_tab != []: + page_dst._addAnnot_FromString( tuple(link_tab)) + #log( 'utils.do_links() returning.') + + def _do_widgets( + tar: 'Document', + src: 'Document', + graftmap, + from_page: int = -1, + to_page: int = -1, + start_at: int = -1, + join_duplicates=0, + ) -> None: + """Insert widgets of copied page range into target PDF. + + Parameter values **must** equal those of method insert_pdf() which + must have been previously executed. + """ + if not src.is_form_pdf: # nothing to do: source PDF has no fields + return + + def clean_kid_parents(acro_fields): + """ Make sure all kids have correct "Parent" pointers.""" + for i in range(acro_fields.pdf_array_len()): + parent = acro_fields.pdf_array_get(i) + kids = parent.pdf_dict_get(PDF_NAME("Kids")) + for j in range(kids.pdf_array_len()): + kid = kids.pdf_array_get(j) + kid.pdf_dict_put(PDF_NAME("Parent"), parent) + + def join_widgets(pdf, acro_fields, xref1, xref2, name): + """Called for each pair of widgets having the same name. + + Args: + pdf: target MuPDF document + acro_fields: object Root/AcroForm/Fields + xref1, xref2: widget xrefs having same names + name: (str) the name + + Result: + Defined or updated widget parent that points to both widgets. + """ + + def re_target(pdf, acro_fields, xref1, kids1, xref2, kids2): + """Merge widget in xref2 into "Kids" list of widget xref1. + + Args: + xref1, kids1: target widget and its "Kids" array. + xref2, kids2: source wwidget and its "Kids" array (may be empty). + """ + # make indirect objects from widgets + w1_ind = mupdf.pdf_new_indirect(pdf, xref1, 0) + w2_ind = mupdf.pdf_new_indirect(pdf, xref2, 0) + # find source widget in "Fields" array + idx = acro_fields.pdf_array_find(w2_ind) + acro_fields.pdf_array_delete(idx) + + if not kids2.pdf_is_array(): # source widget has no kids + widget = mupdf.pdf_load_object(pdf, xref2) + + # delete name from widget and insert target as parent + widget.pdf_dict_del(PDF_NAME("T")) + widget.pdf_dict_put(PDF_NAME("Parent"), w1_ind) + + # put in target Kids + kids1.pdf_array_push(w2_ind) + else: # copy source kids to target kids + for i in range(kids2.pdf_array_len()): + kid = kids2.pdf_array_get(i) + kid.pdf_dict_put(PDF_NAME("Parent"), w1_ind) + kid_ind = mupdf.pdf_new_indirect(pdf, kid.pdf_to_num(), 0) + kids1.pdf_array_push(kid_ind) + + def new_target(pdf, acro_fields, xref1, w1, xref2, w2, name): + """Make new "Parent" for two widgets with same name. + + Args: + xref1, w1: first widget + xref2, w2: second widget + name: field name + + Result: + Both widgets have no "Kids". We create a new object with the + name and a "Kids" array containing the widgets. + Original widgets must be removed from AcroForm/Fields. + """ + # make new "Parent" object + new = mupdf.pdf_new_dict(pdf, 5) + new.pdf_dict_put_text_string(PDF_NAME("T"), name) + kids = new.pdf_dict_put_array(PDF_NAME("Kids"), 2) + new_obj = mupdf.pdf_add_object(pdf, new) + new_obj_xref = new_obj.pdf_to_num() + new_ind = mupdf.pdf_new_indirect(pdf, new_obj_xref, 0) + + # copy over some required source widget properties + ft = w1.pdf_dict_get(PDF_NAME("FT")) + w1.pdf_dict_del(PDF_NAME("FT")) + new_obj.pdf_dict_put(PDF_NAME("FT"), ft) + + aa = w1.pdf_dict_get(PDF_NAME("AA")) + w1.pdf_dict_del(PDF_NAME("AA")) + new_obj.pdf_dict_put(PDF_NAME("AA"), aa) + + # remove name field, insert "Parent" field in source widgets + w1.pdf_dict_del(PDF_NAME("T")) + w1.pdf_dict_put(PDF_NAME("Parent"), new_ind) + w2.pdf_dict_del(PDF_NAME("T")) + w2.pdf_dict_put(PDF_NAME("Parent"), new_ind) + + # put source widgets in "kids" array + ind1 = mupdf.pdf_new_indirect(pdf, xref1, 0) + ind2 = mupdf.pdf_new_indirect(pdf, xref2, 0) + kids.pdf_array_push(ind1) + kids.pdf_array_push(ind2) + + # remove source widgets from "AcroForm/Fields" + idx = acro_fields.pdf_array_find(ind1) + acro_fields.pdf_array_delete(idx) + idx = acro_fields.pdf_array_find(ind2) + acro_fields.pdf_array_delete(idx) + + acro_fields.pdf_array_push(new_ind) + + w1 = mupdf.pdf_load_object(pdf, xref1) + w2 = mupdf.pdf_load_object(pdf, xref2) + kids1 = w1.pdf_dict_get(PDF_NAME("Kids")) + kids2 = w2.pdf_dict_get(PDF_NAME("Kids")) + + # check which widget has a suitable "Kids" array + if kids1.pdf_is_array(): + re_target(pdf, acro_fields, xref1, kids1, xref2, kids2) # pylint: disable=arguments-out-of-order + elif kids2.pdf_is_array(): + re_target(pdf, acro_fields, xref2, kids2, xref1, kids1) # pylint: disable=arguments-out-of-order + else: + new_target(pdf, acro_fields, xref1, w1, xref2, w2, name) # pylint: disable=arguments-out-of-order + + def get_kids(parent, kids_list): + """Return xref list of leaf kids for a parent. + + Call with an empty list. + """ + kids = mupdf.pdf_dict_get(parent, PDF_NAME("Kids")) + if not kids.pdf_is_array(): + return kids_list + for i in range(kids.pdf_array_len()): + kid = kids.pdf_array_get(i) + if mupdf.pdf_is_dict(mupdf.pdf_dict_get(kid, PDF_NAME("Kids"))): + kids_list = get_kids(kid, kids_list) + else: + kids_list.append(kid.pdf_to_num()) + return kids_list + + def kids_xrefs(widget): + """Get the xref of top "Parent" and the list of leaf widgets.""" + kids_list = [] + parent = mupdf.pdf_dict_get(widget, PDF_NAME("Parent")) + parent_xref = parent.pdf_to_num() + if parent_xref == 0: + return parent_xref, kids_list + kids_list = get_kids(parent, kids_list) + return parent_xref, kids_list + + def deduplicate_names(pdf, acro_fields, join_duplicates=False): + """Handle any widget name duplicates caused by the merge.""" + names = {} # key is a widget name, value a list of widgets having it. + + # extract all names and widgets in "AcroForm/Fields" + for i in range(mupdf.pdf_array_len(acro_fields)): + wobject = mupdf.pdf_array_get(acro_fields, i) + xref = wobject.pdf_to_num() + + # extract widget name and collect widget(s) using it + T = mupdf.pdf_dict_get_text_string(wobject, PDF_NAME("T")) + xrefs = names.get(T, []) + xrefs.append(xref) + names[T] = xrefs + + for name, xrefs in names.items(): + if len(xrefs) < 2: + continue + xref0, xref1 = xrefs[:2] # only exactly 2 should occur! + if join_duplicates: # combine fields with equal names + join_widgets(pdf, acro_fields, xref0, xref1, name) + else: # make field names unique + newname = name + f" [{xref1}]" # append this to the name + wobject = mupdf.pdf_load_object(pdf, xref1) + wobject.pdf_dict_put_text_string(PDF_NAME("T"), newname) + + clean_kid_parents(acro_fields) + + def get_acroform(doc): + """Retrieve the AcroForm dictionary form a PDF.""" + pdf = mupdf.pdf_document_from_fz_document(doc) + # AcroForm (= central form field info) + return mupdf.pdf_dict_getp(mupdf.pdf_trailer(pdf), "Root/AcroForm") + + tarpdf = mupdf.pdf_document_from_fz_document(tar) + srcpdf = mupdf.pdf_document_from_fz_document(src) + + if tar.is_form_pdf: + # target is a Form PDF, so use it to include source fields + acro = get_acroform(tar) + # Important arrays in AcroForm + acro_fields = acro.pdf_dict_get(PDF_NAME("Fields")) + tar_co = acro.pdf_dict_get(PDF_NAME("CO")) + if not tar_co.pdf_is_array(): + tar_co = acro.pdf_dict_put_array(PDF_NAME("CO"), 5) + else: + # target is no Form PDF, so copy over source AcroForm + acro = mupdf.pdf_deep_copy_obj(get_acroform(src)) # make a copy + + # Clear "Fields" and "CO" arrays: will be populated by page fields. + # This is required to avoid copying unneeded objects. + acro.pdf_dict_del(PDF_NAME("Fields")) + acro.pdf_dict_put_array(PDF_NAME("Fields"), 5) + acro.pdf_dict_del(PDF_NAME("CO")) + acro.pdf_dict_put_array(PDF_NAME("CO"), 5) + + # Enrich AcroForm for copying to target + acro_graft = mupdf.pdf_graft_mapped_object(graftmap, acro) + + # Insert AcroForm into target PDF + acro_tar = mupdf.pdf_add_object(tarpdf, acro_graft) + acro_fields = acro_tar.pdf_dict_get(PDF_NAME("Fields")) + tar_co = acro_tar.pdf_dict_get(PDF_NAME("CO")) + + # get its xref and insert it into target catalog + tar_xref = acro_tar.pdf_to_num() + acro_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0) + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(tarpdf), PDF_NAME("Root")) + root.pdf_dict_put(PDF_NAME("AcroForm"), acro_tar_ind) + + if from_page <= to_page: + src_range = range(from_page, to_page + 1) + else: + src_range = range(from_page, to_page - 1, -1) + + parents = {} # information about widget parents + + # remove "P" owning page reference from all widgets of all source pages + for i in src_range: + src_page = src[i] + for xref in [ + xref + for xref, wtype, _ in src_page.annot_xrefs() + if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member + ]: + w_obj = mupdf.pdf_load_object(srcpdf, xref) + w_obj.pdf_dict_del(PDF_NAME("P")) + + # get the widget's parent structure + parent_xref, old_kids = kids_xrefs(w_obj) + if parent_xref: + parents[parent_xref] = { + "new_xref": 0, + "old_kids": old_kids, + "new_kids": [], + } + # Copy over Parent widgets first - they are not page-dependent + for xref in parents.keys(): # pylint: disable=consider-using-dict-items + parent = mupdf.pdf_load_object(srcpdf, xref) + parent_graft = mupdf.pdf_graft_mapped_object(graftmap, parent) + parent_tar = mupdf.pdf_add_object(tarpdf, parent_graft) + kids_xrefs_new = get_kids(parent_tar, []) + parent_xref_new = parent_tar.pdf_to_num() + parent_ind = mupdf.pdf_new_indirect(tarpdf, parent_xref_new, 0) + acro_fields.pdf_array_push(parent_ind) + parents[xref]["new_xref"] = parent_xref_new + parents[xref]["new_kids"] = kids_xrefs_new + + for i in range(len(src_range)): + # read first copied over page in target + tar_page = tar[start_at + i] + + # read the original page in the source PDF + src_page = src[src_range[i]] + + # now walk through source page widgets and copy over + w_xrefs = [ # widget xrefs of the source page + xref + for xref, wtype, _ in src_page.annot_xrefs() + if wtype == mupdf.PDF_ANNOT_WIDGET # pylint: disable=no-member + ] + if not w_xrefs: # no widgets on this source page + continue + + # convert to formal PDF page + tar_page_pdf = mupdf.pdf_page_from_fz_page(tar_page) + + # extract annotations array + tar_annots = mupdf.pdf_dict_get(tar_page_pdf.obj(), PDF_NAME("Annots")) + if not mupdf.pdf_is_array(tar_annots): + tar_annots = mupdf.pdf_dict_put_array( + tar_page_pdf.obj(), PDF_NAME("Annots"), 5 + ) + + for xref in w_xrefs: + w_obj = mupdf.pdf_load_object(srcpdf, xref) + + # check if field takes part in inter-field validations + is_aac = mupdf.pdf_is_dict(mupdf.pdf_dict_getp(w_obj, "AA/C")) + + # check if parent of widget already in target + parent_xref = mupdf.pdf_to_num( + w_obj.pdf_dict_get(PDF_NAME("Parent")) + ) + if parent_xref == 0: # parent not in target yet + try: + w_obj_graft = mupdf.pdf_graft_mapped_object(graftmap, w_obj) + except Exception as e: + message_warning(f"cannot copy widget at {xref=}: {e}") + continue + w_obj_tar = mupdf.pdf_add_object(tarpdf, w_obj_graft) + tar_xref = w_obj_tar.pdf_to_num() + w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0) + mupdf.pdf_array_push(tar_annots, w_obj_tar_ind) + mupdf.pdf_array_push(acro_fields, w_obj_tar_ind) + else: + parent = parents[parent_xref] + idx = parent["old_kids"].index(xref) # search for xref in parent + tar_xref = parent["new_kids"][idx] + w_obj_tar_ind = mupdf.pdf_new_indirect(tarpdf, tar_xref, 0) + mupdf.pdf_array_push(tar_annots, w_obj_tar_ind) + + # Into "AcroForm/CO" if a computation field. + if is_aac: + mupdf.pdf_array_push(tar_co, w_obj_tar_ind) + + deduplicate_names(tarpdf, acro_fields, join_duplicates=join_duplicates) + + def _embeddedFileGet(self, idx): + pdf = _as_pdf_document(self) + names = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + entry = mupdf.pdf_array_get(names, 2*idx+1) + filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F')) + buf = mupdf.pdf_load_stream(filespec) + cont = JM_BinFromBuffer(buf) + return cont + + def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int: + filenames = self.embfile_names() + msg = "'%s' not in EmbeddedFiles array." % str(item) + if item in filenames: + idx = filenames.index(item) + elif item in range(len(filenames)): + idx = item + else: + raise ValueError(msg) + return idx + + def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None): + pdf = _as_pdf_document(self) + data = JM_BufferFromBytes(buffer_) + if not data.m_internal: + raise TypeError( MSG_BAD_BUFFER) + + names = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + if not mupdf.pdf_is_array(names): + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) + names = mupdf.pdf_new_array(pdf, 6) # an even number! + mupdf.pdf_dict_putl( + root, + names, + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1) + xref = mupdf.pdf_to_num( + mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F')) + ) + mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name)) + mupdf.pdf_array_push(names, fileentry) + return xref + + def _embfile_del(self, idx): + pdf = _as_pdf_document(self) + names = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + mupdf.pdf_array_delete(names, idx + 1) + mupdf.pdf_array_delete(names, idx) + + def _embfile_info(self, idx, infodict): + pdf = _as_pdf_document(self) + xref = 0 + ci_xref=0 + + trailer = mupdf.pdf_trailer(pdf) + + names = mupdf.pdf_dict_getl( + trailer, + PDF_NAME('Root'), + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + o = mupdf.pdf_array_get(names, 2*idx+1) + ci = mupdf.pdf_dict_get(o, PDF_NAME('CI')) + if ci.m_internal: + ci_xref = mupdf.pdf_to_num(ci) + infodict["collection"] = ci_xref + name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F'))) + infodict[dictkey_filename] = JM_EscapeStrFromStr(name) + + name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF'))) + infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name) + + name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc'))) + infodict[dictkey_descr] = JM_UnicodeFromStr(name) + + len_ = -1 + DL = -1 + fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F')) + xref = mupdf.pdf_to_num(fileentry) + o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length')) + if o.m_internal: + len_ = mupdf.pdf_to_int(o) + + o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL')) + if o.m_internal: + DL = mupdf.pdf_to_int(o) + else: + o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size')) + if o.m_internal: + DL = mupdf.pdf_to_int(o) + infodict[dictkey_size] = DL + infodict[dictkey_length] = len_ + return xref + + def _embfile_names(self, namelist): + """Get list of embedded file names.""" + pdf = _as_pdf_document(self) + names = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + if mupdf.pdf_is_array(names): + n = mupdf.pdf_array_len(names) + for i in range(0, n, 2): + val = JM_EscapeStrFromStr( + mupdf.pdf_to_text_string( + mupdf.pdf_array_get(names, i) + ) + ) + namelist.append(val) + + def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None): + pdf = _as_pdf_document(self) + xref = 0 + names = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + entry = mupdf.pdf_array_get(names, 2*idx+1) + + filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F')) + if not filespec.m_internal: + RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError) + res = JM_BufferFromBytes(buffer_) + if buffer_ and buffer_.m_internal and not res.m_internal: + raise TypeError( MSG_BAD_BUFFER) + if res.m_internal and buffer_ and buffer_.m_internal: + JM_update_stream(pdf, filespec, res, 1) + # adjust /DL and /Size parameters + len, _ = mupdf.fz_buffer_storage(res) + l = mupdf.pdf_new_int(len) + mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l) + mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size')) + xref = mupdf.pdf_to_num(filespec) + if filename: + mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename) + + if ufilename: + mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename) + + if desc: + mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc) + return xref + + def _extend_toc_items(self, items): + """Add color info to all items of an extended TOC list.""" + if self.is_closed: + raise ValueError("document closed") + if g_use_extra: + return extra.Document_extend_toc_items( self.this, items) + pdf = _as_pdf_document(self) + zoom = "zoom" + bold = "bold" + italic = "italic" + collapse = "collapse" + + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) + if not root.m_internal: + return + olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines')) + if not olroot.m_internal: + return + first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) + if not first.m_internal: + return + xrefs = [] + xrefs = JM_outline_xrefs(first, xrefs) + n = len(xrefs) + m = len(items) + if not n: + return + if n != m: + raise IndexError( "internal error finding outline xrefs") + + # update all TOC item dictionaries + for i in range(n): + xref = int(xrefs[i]) + item = items[i] + itemdict = item[3] + if not isinstance(itemdict, dict): + raise ValueError( "need non-simple TOC format") + itemdict[dictkey_xref] = xrefs[i] + bm = mupdf.pdf_load_object(pdf, xref) + flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F'))) + if flags == 1: + itemdict[italic] = True + elif flags == 2: + itemdict[bold] = True + elif flags == 3: + itemdict[italic] = True + itemdict[bold] = True + count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count'))) + if count < 0: + itemdict[collapse] = True + elif count > 0: + itemdict[collapse] = False + col = mupdf.pdf_dict_get(bm, PDF_NAME('C')) + if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3: + color = ( + mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)), + mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)), + mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)), + ) + itemdict[dictkey_color] = color + z=0 + obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest')) + if not obj.m_internal or not mupdf.pdf_is_array(obj): + obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D')) + if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5: + z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4)) + itemdict[zoom] = float(z) + item[3] = itemdict + items[i] = item + + def _forget_page(self, page: Page): + """Remove a page from document page dict.""" + pid = id(page) + if pid in self._page_refs: + #self._page_refs[pid] = None + del self._page_refs[pid] + + def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0): + pdf = _as_pdf_document(self) + mylimit = limit + if mylimit < 256: + mylimit = 256 + if ordering >= 0: + data, size, index = mupdf.fz_lookup_cjk_font(ordering) + font = mupdf.fz_new_font_from_memory(None, data, size, index, 0) + else: + data, size = mupdf.fz_lookup_base14_font(bfname) + if data: + font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0) + else: + buf = JM_get_fontbuffer(pdf, xref) + if not buf.m_internal: + raise Exception("font at xref %d is not supported" % xref) + + font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0) + wlist = [] + for i in range(mylimit): + glyph = mupdf.fz_encode_character(font, i) + adv = mupdf.fz_advance_glyph(font, glyph, 0) + if ordering >= 0: + glyph = i + if glyph > 0: + wlist.append( (glyph, adv)) + else: + wlist.append( (glyph, 0.0)) + return wlist + + def _get_page_labels(self): + pdf = _as_pdf_document(self) + rc = [] + pagelabels = mupdf.pdf_new_name("PageLabels") + obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels) + if not obj.m_internal: + return rc + # simple case: direct /Nums object + nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums'))) + if nums.m_internal: + JM_get_page_labels(rc, nums) + return rc + # case: /Kids/Nums + nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums'))) + if nums.m_internal: + JM_get_page_labels(rc, nums) + return rc + # case: /Kids is an array of multiple /Nums + kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids'))) + if not kids.m_internal or not mupdf.pdf_is_array(kids): + return rc + n = mupdf.pdf_array_len(kids) + for i in range(n): + nums = mupdf.pdf_resolve_indirect( + mupdf.pdf_dict_get( + mupdf.pdf_array_get(kids, i), + PDF_NAME('Nums'), + ) + ) + JM_get_page_labels(rc, nums) + return rc + + def _getMetadata(self, key): + """Get metadata.""" + try: + return mupdf.fz_lookup_metadata2( self.this, key) + except Exception: + if g_exceptions_verbose > 2: exception_info() + return '' + + def _getOLRootNumber(self): + """Get xref of Outline Root, create it if missing.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + # get main root + root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) + # get outline root + olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines')) + if not olroot.m_internal: + olroot = mupdf.pdf_new_dict( pdf, 4) + mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines')) + ind_obj = mupdf.pdf_add_object( pdf, olroot) + mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj) + olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines')) + return mupdf.pdf_to_num( olroot) + + def _getPDFfileid(self): + """Get PDF file id.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return + idlist = [] + identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID')) + if identity.m_internal: + n = mupdf.pdf_array_len(identity) + for i in range(n): + o = mupdf.pdf_array_get(identity, i) + text = mupdf.pdf_to_text_string(o) + hex_ = binascii.hexlify(text) + idlist.append(hex_) + return idlist + + def _getPageInfo(self, pno, what): + """List fonts, images, XObjects used on a page.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + doc = self.this + pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc) + n = pno # pno < 0 is allowed + while n < 0: + n += pageCount # make it non-negative + if n >= pageCount: + raise ValueError( MSG_BAD_PAGENO) + pdf = _as_pdf_document(self) + pageref = mupdf.pdf_lookup_page_obj(pdf, n) + rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources) + liste = [] + tracer = [] + if rsrc.m_internal: + JM_scan_resources(pdf, rsrc, liste, what, 0, tracer) + return liste + + def _insert_font(self, fontfile=None, fontbuffer=None): + ''' + Utility: insert font from file or binary. + ''' + pdf = _as_pdf_document(self) + if not fontfile and not fontbuffer: + raise ValueError( MSG_FILE_OR_BUFFER) + value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1) + return value + + def _loadOutline(self): + """Load first outline.""" + doc = self.this + assert isinstance( doc, mupdf.FzDocument) + try: + ol = mupdf.fz_load_outline( doc) + except Exception: + if g_exceptions_verbose > 1: exception_info() + return + return Outline( ol) + + def _make_page_map(self): + """Make an array page number -> page object.""" + if self.is_closed: + raise ValueError("document closed") + assert 0, f'_make_page_map() is no-op' + + def _move_copy_page(self, pno, nb, before, copy): + """Move or copy a PDF page reference.""" + pdf = _as_pdf_document(self) + same = 0 + # get the two page objects ----------------------------------- + # locate the /Kids arrays and indices in each + + page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno) + + kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids')) + + page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb) + kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids')) + if before: # calc index of source page in target /Kids + pos = i2 + else: + pos = i2 + 1 + + # same /Kids array? ------------------------------------------ + same = mupdf.pdf_objcmp( kids1, kids2) + + # put source page in target /Kids array ---------------------- + if not copy and same != 0: # update parent in page object + mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2) + mupdf.pdf_array_insert( kids2, page1, pos) + + if same != 0: # different /Kids arrays ---------------------- + parent = parent2 + while parent.m_internal: # increase /Count objects in parents + count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count')) + mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1) + parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent')) + if not copy: # delete original item + mupdf.pdf_array_delete( kids1, i1) + parent = parent1 + while parent.m_internal: # decrease /Count objects in parents + count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count')) + mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1) + parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent')) + else: # same /Kids array + if copy: # source page is copied + parent = parent2 + while parent.m_internal: # increase /Count object in parents + count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count')) + mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1) + parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent')) + else: + if i1 < pos: + mupdf.pdf_array_delete( kids1, i1) + else: + mupdf.pdf_array_delete( kids1, i1 + 1) + if pdf.m_internal.rev_page_map: # page map no longer valid: drop it + mupdf.ll_pdf_drop_page_tree( pdf.m_internal) + + self._reset_page_refs() + + def _newPage(self, pno=-1, width=595, height=842): + """Make a new PDF page.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if g_use_extra: + extra._newPage( self.this, pno, width, height) + else: + pdf = _as_pdf_document(self) + mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) + mediabox.x1 = width + mediabox.y1 = height + contents = mupdf.FzBuffer() + if pno < -1: + raise ValueError( MSG_BAD_PAGENO) + # create /Resources and /Contents objects + #resources = pdf.add_object(pdf.new_dict(1)) + resources = mupdf.pdf_add_new_dict(pdf, 1) + page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents) + mupdf.pdf_insert_page( pdf, pno, page_obj) + # fixme: pdf->dirty = 1; + + self._reset_page_refs() + return self[pno] + + def _remove_links_to(self, numbers): + pdf = _as_pdf_document(self) + _remove_dest_range(pdf, numbers) + + def _remove_toc_item(self, xref): + # "remove" bookmark by letting it point to nowhere + pdf = _as_pdf_document(self) + item = mupdf.pdf_new_indirect(pdf, xref, 0) + mupdf.pdf_dict_del( item, PDF_NAME('Dest')) + mupdf.pdf_dict_del( item, PDF_NAME('A')) + color = mupdf.pdf_new_array( pdf, 3) + for i in range(3): + mupdf.pdf_array_push_real( color, 0.8) + mupdf.pdf_dict_put( item, PDF_NAME('C'), color) + + def _reset_page_refs(self): + """Invalidate all pages in document dictionary.""" + if getattr(self, "is_closed", True): + return + pages = [p for p in self._page_refs.values()] + for page in pages: + if page: + page._erase() + page = None + self._page_refs.clear() + + def _set_page_labels(self, labels): + pdf = _as_pdf_document(self) + pagelabels = mupdf.pdf_new_name("PageLabels") + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) + mupdf.pdf_dict_del(root, pagelabels) + mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums')) + + xref = self.pdf_catalog() + text = self.xref_object(xref, compressed=True) + text = text.replace("/Nums[]", "/Nums[%s]" % labels) + self.update_object(xref, text) + + def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None): + ''' + "update" bookmark by letting it point to nowhere + ''' + pdf = _as_pdf_document(self) + item = mupdf.pdf_new_indirect( pdf, xref, 0) + if title: + mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title) + if action: + mupdf.pdf_dict_del( item, PDF_NAME('Dest')) + obj = JM_pdf_obj_from_str( pdf, action) + mupdf.pdf_dict_put( item, PDF_NAME('A'), obj) + mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags) + if color: + c = mupdf.pdf_new_array( pdf, 3) + for i in range(3): + f = color[i] + mupdf.pdf_array_push_real( c, f) + mupdf.pdf_dict_put( item, PDF_NAME('C'), c) + elif color is not None: + mupdf.pdf_dict_del( item, PDF_NAME('C')) + if collapse is not None: + if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal: + i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count')) + if (i < 0 and collapse is False) or (i > 0 and collapse is True): + i = i * (-1) + mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i) + + @property + def FormFonts(self): + """Get list of field font resource names.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return + fonts = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + PDF_NAME('DR'), + PDF_NAME('Font'), + ) + liste = list() + if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist + n = mupdf.pdf_dict_len(fonts) + for i in range(n): + f = mupdf.pdf_dict_get_key(fonts, i) + liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f))) + return liste + + def add_layer(self, name, creator=None, on=None): + """Add a new OC layer.""" + pdf = _as_pdf_document(self) + JM_add_layer_config( pdf, name, creator, on) + mupdf.ll_pdf_read_ocg( pdf.m_internal) + + def add_ocg(self, name, config=-1, on=1, intent=None, usage=None): + """Add new optional content group.""" + xref = 0 + pdf = _as_pdf_document(self) + + # make the OCG + ocg = mupdf.pdf_add_new_dict(pdf, 3) + mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG')) + mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name) + intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2) + if not intent: + mupdf.pdf_array_push(intents, PDF_NAME('View')) + elif not isinstance(intent, str): + assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}' + #n = len(intent) + #for i in range(n): + # item = intent[i] + # c = JM_StrAsChar(item); + # if (c) { + # pdf_array_push(gctx, intents, pdf_new_name(gctx, c)); + # } + # Py_DECREF(item); + #} + else: + mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent)) + use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3) + ci_name = mupdf.pdf_new_name("CreatorInfo") + cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2) + mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF") + if usage: + mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage) + else: + mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork") + indocg = mupdf.pdf_add_object(pdf, ocg) + + # Insert OCG in the right config + ocp = JM_ensure_ocproperties(pdf) + obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs')) + mupdf.pdf_array_push(obj, indocg) + + if config > -1: + obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs')) + if not mupdf.pdf_is_array(obj): + raise ValueError( MSG_BAD_OC_CONFIG) + cfg = mupdf.pdf_array_get(obj, config) + if not cfg.m_internal: + raise ValueError( MSG_BAD_OC_CONFIG) + else: + cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D')) + + obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order')) + if not obj.m_internal: + obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1) + mupdf.pdf_array_push(obj, indocg) + if on: + obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON')) + if not obj.m_internal: + obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1) + else: + obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF')) + if not obj.m_internal: + obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1) + mupdf.pdf_array_push(obj, indocg) + + # let MuPDF take note: re-read OCProperties + mupdf.ll_pdf_read_ocg(pdf.m_internal) + + xref = mupdf.pdf_to_num(indocg) + return xref + + def authenticate(self, password): + """Decrypt document.""" + if self.is_closed: + raise ValueError("document closed") + val = mupdf.fz_authenticate_password(self.this, password) + if val: # the doc is decrypted successfully and we init the outline + self.is_encrypted = False + self.is_encrypted = False + self.init_doc() + self.thisown = True + return val + + def can_save_incrementally(self): + """Check whether incremental saves are possible.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return False + return mupdf.pdf_can_be_saved_incrementally(pdf) + + def bake(self, *, annots: bool = True, widgets: bool = True) -> None: + """Convert annotations or fields to permanent content. + + Notes: + Converts annotations or widgets to permanent page content, like + text and vector graphics, as appropriate. + After execution, pages will still look the same, but no longer + have annotations, respectively no fields. + If widgets are selected the PDF will no longer be a Form PDF. + + Args: + annots: convert annotations + widgets: convert form fields + + """ + pdf = _as_pdf_document(self) + mupdf.pdf_bake_document(pdf, int(annots), int(widgets)) + + @property + def chapter_count(self): + """Number of chapters.""" + if self.is_closed: + raise ValueError("document closed") + return mupdf.fz_count_chapters( self.this) + + def chapter_page_count(self, chapter): + """Page count of chapter.""" + if self.is_closed: + raise ValueError("document closed") + chapters = mupdf.fz_count_chapters( self.this) + if chapter < 0 or chapter >= chapters: + raise ValueError( "bad chapter number") + pages = mupdf.fz_count_chapter_pages( self.this, chapter) + return pages + + def close(self): + """Close document.""" + if getattr(self, "is_closed", True): + raise ValueError("document closed") + # self._cleanup() + if hasattr(self, "_outline") and self._outline: + self._outline = None + self._reset_page_refs() + #self.metadata = None + #self.stream = None + self.is_closed = True + #self.FontInfos = [] + self.Graftmaps = {} # Fixes test_3140(). + #self.ShownPages = {} + #self.InsertedImages = {} + #self.this = None + self.this = None + + def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0): + """Convert document to a PDF, selecting page range and optional rotation. Output bytes object.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + fz_doc = self.this + fp = from_page + tp = to_page + srcCount = mupdf.fz_count_pages(fz_doc) + if fp < 0: + fp = 0 + if fp > srcCount - 1: + fp = srcCount - 1 + if tp < 0: + tp = srcCount - 1 + if tp > srcCount - 1: + tp = srcCount - 1 + len0 = len(JM_mupdf_warnings_store) + doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate) + len1 = len(JM_mupdf_warnings_store) + for i in range(len0, len1): + message(f'{JM_mupdf_warnings_store[i]}') + return doc + + def copy_page(self, pno: int, to: int =-1): + """Copy a page within a PDF document. + + This will only create another reference of the same page object. + Args: + pno: source page number + to: put before this page, '-1' means after last page. + """ + if self.is_closed: + raise ValueError("document closed") + + page_count = len(self) + if ( + pno not in range(page_count) + or to not in range(-1, page_count) + ): + raise ValueError("bad page number(s)") + before = 1 + copy = 1 + if to == -1: + to = page_count - 1 + before = 0 + + return self._move_copy_page(pno, to, before, copy) + + def del_xml_metadata(self): + """Delete XML metadata.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) + if root.m_internal: + mupdf.pdf_dict_del( root, PDF_NAME('Metadata')) + + def delete_page(self, pno: int =-1): + """ Delete one page from a PDF. + """ + return self.delete_pages(pno) + + def delete_pages(self, *args, **kw): + """Delete pages from a PDF. + + Args: + Either keywords 'from_page'/'to_page', or two integers to + specify the first/last page to delete. + Or a list/tuple/range object, which can contain arbitrary + page numbers. + Or a single integer page number. + """ + if not self.is_pdf: + raise ValueError("is no PDF") + if self.is_closed: + raise ValueError("document closed") + + page_count = self.page_count # page count of document + f = t = -1 + if kw: # check if keywords were used + if args: # then no positional args are allowed + raise ValueError("cannot mix keyword and positional argument") + f = kw.get("from_page", -1) # first page to delete + t = kw.get("to_page", -1) # last page to delete + while f < 0: + f += page_count + while t < 0: + t += page_count + if not f <= t < page_count: + raise ValueError("bad page number(s)") + numbers = tuple(range(f, t + 1)) + else: + if len(args) > 2 or args == []: + raise ValueError("need 1 or 2 positional arguments") + if len(args) == 2: + f, t = args + if not (type(f) is int and type(t) is int): + raise ValueError("both arguments must be int") + if f > t: + f, t = t, f + if not f <= t < page_count: + raise ValueError("bad page number(s)") + numbers = tuple(range(f, t + 1)) + elif isinstance(args[0], int): + pno = args[0] + while pno < 0: + pno += page_count + numbers = (pno,) + else: + numbers = tuple(args[0]) + + numbers = list(map(int, set(numbers))) # ensure unique integers + if numbers == []: + message("nothing to delete") + return + numbers.sort() + if numbers[0] < 0 or numbers[-1] >= page_count: + raise ValueError("bad page number(s)") + frozen_numbers = frozenset(numbers) + toc = self.get_toc() + for i, xref in enumerate(self.get_outline_xrefs()): + if toc[i][2] - 1 in frozen_numbers: + self._remove_toc_item(xref) # remove target in PDF object + + self._remove_links_to(frozen_numbers) + + for i in reversed(numbers): # delete pages, last to first + self._delete_page(i) + + self._reset_page_refs() + + def embfile_add(self, + name: str, + buffer_: ByteString, + filename: OptStr =None, + ufilename: OptStr =None, + desc: OptStr =None, + ) -> None: + """Add an item to the EmbeddedFiles array. + + Args: + name: name of the new item, must not already exist. + buffer_: (binary data) the file content. + filename: (str) the file name, default: the name + ufilename: (unicode) the file name, default: filename + desc: (str) the description. + """ + filenames = self.embfile_names() + msg = "Name '%s' already exists." % str(name) + if name in filenames: + raise ValueError(msg) + + if filename is None: + filename = name + if ufilename is None: + ufilename = filename + if desc is None: + desc = name + xref = self._embfile_add( + name, + buffer_=buffer_, + filename=filename, + ufilename=ufilename, + desc=desc, + ) + date = get_pdf_now() + self.xref_set_key(xref, "Type", "/EmbeddedFile") + self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date)) + self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) + return xref + + def embfile_count(self) -> int: + """Get number of EmbeddedFiles.""" + return len(self.embfile_names()) + + def embfile_del(self, item: typing.Union[int, str]): + """Delete an entry from EmbeddedFiles. + + Notes: + The argument must be name or index of an EmbeddedFiles item. + Physical deletion of data will happen on save to a new + file with appropriate garbage option. + Args: + item: name or number of item. + Returns: + None + """ + idx = self._embeddedFileIndex(item) + return self._embfile_del(idx) + + def embfile_get(self, item: typing.Union[int, str]) -> bytes: + """Get the content of an item in the EmbeddedFiles array. + + Args: + item: number or name of item. + Returns: + (bytes) The file content. + """ + idx = self._embeddedFileIndex(item) + return self._embeddedFileGet(idx) + + def embfile_info(self, item: typing.Union[int, str]) -> dict: + """Get information of an item in the EmbeddedFiles array. + + Args: + item: number or name of item. + Returns: + Information dictionary. + """ + idx = self._embeddedFileIndex(item) + infodict = {"name": self.embfile_names()[idx]} + xref = self._embfile_info(idx, infodict) + t, date = self.xref_get_key(xref, "Params/CreationDate") + if t != "null": + infodict["creationDate"] = date + t, date = self.xref_get_key(xref, "Params/ModDate") + if t != "null": + infodict["modDate"] = date + t, md5 = self.xref_get_key(xref, "Params/CheckSum") + if t != "null": + infodict["checksum"] = binascii.hexlify(md5.encode()).decode() + return infodict + + def embfile_names(self) -> list: + """Get list of names of EmbeddedFiles.""" + filenames = [] + self._embfile_names(filenames) + return filenames + + def embfile_upd(self, + item: typing.Union[int, str], + buffer_: OptBytes =None, + filename: OptStr =None, + ufilename: OptStr =None, + desc: OptStr =None, + ) -> None: + """Change an item of the EmbeddedFiles array. + + Notes: + Only provided parameters are changed. If all are omitted, + the method is a no-op. + Args: + item: number or name of item. + buffer_: (binary data) the new file content. + filename: (str) the new file name. + ufilename: (unicode) the new filen ame. + desc: (str) the new description. + """ + idx = self._embeddedFileIndex(item) + xref = self._embfile_upd( + idx, + buffer_=buffer_, + filename=filename, + ufilename=ufilename, + desc=desc, + ) + date = get_pdf_now() + self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) + return xref + + def extract_font(self, xref=0, info_only=0, named=None): + ''' + Get a font by xref. Returns a tuple or dictionary. + ''' + #log( '{=xref info_only}') + pdf = _as_pdf_document(self) + obj = mupdf.pdf_load_object(pdf, xref) + type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type')) + subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) + if (mupdf.pdf_name_eq(type_, PDF_NAME('Font')) + and not mupdf.pdf_to_name( subtype).startswith('CIDFontType') + ): + basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont')) + if not basefont.m_internal or mupdf.pdf_is_null(basefont): + bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name')) + else: + bname = basefont + ext = JM_get_fontextension(pdf, xref) + if ext != 'n/a' and not info_only: + buffer_ = JM_get_fontbuffer(pdf, xref) + bytes_ = JM_BinFromBuffer(buffer_) + else: + bytes_ = b'' + if not named: + rc = ( + JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)), + JM_UnicodeFromStr(ext), + JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)), + bytes_, + ) + else: + rc = { + dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)), + dictkey_ext: JM_UnicodeFromStr(ext), + dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)), + dictkey_content: bytes_, + } + else: + if not named: + rc = '', '', '', b'' + else: + rc = { + dictkey_name: '', + dictkey_ext: '', + dictkey_type: '', + dictkey_content: b'', + } + return rc + + def extract_image(self, xref): + """Get image by xref. Returns a dictionary.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + + pdf = _as_pdf_document(self) + + if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1): + raise ValueError( MSG_BAD_XREF) + + obj = mupdf.pdf_new_indirect(pdf, xref, 0) + subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) + + if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')): + raise ValueError( "not an image") + + o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask')) + if o.m_internal: + smask = mupdf.pdf_to_num(o) + else: + smask = 0 + + # load the image + img = mupdf.pdf_load_image(pdf, obj) + rc = dict() + _make_image_dict(img, rc) + rc[dictkey_smask] = smask + rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace()) + return rc + + def ez_save( + self, + filename, + garbage=3, + clean=False, + deflate=True, + deflate_images=True, + deflate_fonts=True, + incremental=False, + ascii=False, + expand=False, + linear=False, + pretty=False, + encryption=1, + permissions=4095, + owner_pw=None, + user_pw=None, + no_new_id=True, + preserve_metadata=1, + use_objstms=1, + compression_effort=0, + ): + ''' + Save PDF using some different defaults + ''' + return self.save( + filename, + garbage=garbage, + clean=clean, + deflate=deflate, + deflate_images=deflate_images, + deflate_fonts=deflate_fonts, + incremental=incremental, + ascii=ascii, + expand=expand, + linear=linear, + pretty=pretty, + encryption=encryption, + permissions=permissions, + owner_pw=owner_pw, + user_pw=user_pw, + no_new_id=no_new_id, + preserve_metadata=preserve_metadata, + use_objstms=use_objstms, + compression_effort=compression_effort, + ) + + def find_bookmark(self, bm): + """Find new location after layouting a document.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + location = mupdf.fz_lookup_bookmark2( self.this, bm) + return location.chapter, location.page + + def fullcopy_page(self, pno, to=-1): + """Make a full page duplicate.""" + pdf = _as_pdf_document(self) + page_count = mupdf.pdf_count_pages( pdf) + try: + if (not _INRANGE(pno, 0, page_count - 1) + or not _INRANGE(to, -1, page_count - 1) + ): + raise ValueError( MSG_BAD_PAGENO) + + page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno)) + + page2 = mupdf.pdf_deep_copy_obj( page1) + old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots')) + + # copy annotations, but remove Popup and IRT types + if old_annots.m_internal: + n = mupdf.pdf_array_len( old_annots) + new_annots = mupdf.pdf_new_array( pdf, n) + for i in range(n): + o = mupdf.pdf_array_get( old_annots, i) + subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype')) + if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')): + continue + if mupdf.pdf_dict_gets( o, "IRT").m_internal: + continue + copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o)) + xref = mupdf.pdf_create_object( pdf) + mupdf.pdf_update_object( pdf, xref, copy_o) + copy_o = mupdf.pdf_new_indirect( pdf, xref, 0) + mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup')) + mupdf.pdf_dict_del( copy_o, PDF_NAME('P')) + mupdf.pdf_array_push( new_annots, copy_o) + mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots) + + # copy the old contents stream(s) + res = JM_read_contents( page1) + + # create new /Contents object for page2 + if res and res.m_internal: + #contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0) + contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0) + JM_update_stream( pdf, contents, res, 1) + mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents) + + # now insert target page, making sure it is an indirect object + xref = mupdf.pdf_create_object( pdf) # get new xref + mupdf.pdf_update_object( pdf, xref, page2) # store new page + + page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object + mupdf.pdf_insert_page( pdf, to, page2) # and store the page + finally: + mupdf.ll_pdf_drop_page_tree( pdf.m_internal) + + self._reset_page_refs() + + def get_char_widths( + doc: 'Document', + xref: int, + limit: int = 256, + idx: int = 0, + fontdict: OptDict = None, + ) -> list: + """Get list of glyph information of a font. + + Notes: + Must be provided by its XREF number. If we already dealt with the + font, it will be recorded in doc.FontInfos. Otherwise we insert an + entry there. + Finally we return the glyphs for the font. This is a list of + (glyph, width) where glyph is an integer controlling the char + appearance, and width is a float controlling the char's spacing: + width * fontsize is the actual space. + For 'simple' fonts, glyph == ord(char) will usually be true. + Exceptions are 'Symbol' and 'ZapfDingbats'. We are providing data for these directly here. + """ + fontinfo = CheckFontInfo(doc, xref) + if fontinfo is None: # not recorded yet: create it + if fontdict is None: + name, ext, stype, asc, dsc = utils._get_font_properties(doc, xref) + fontdict = { + "name": name, + "type": stype, + "ext": ext, + "ascender": asc, + "descender": dsc, + } + else: + name = fontdict["name"] + ext = fontdict["ext"] + stype = fontdict["type"] + ordering = fontdict["ordering"] + simple = fontdict["simple"] + + if ext == "": + raise ValueError("xref is not a font") + + # check for 'simple' fonts + if stype in ("Type1", "MMType1", "TrueType"): + simple = True + else: + simple = False + + # check for CJK fonts + if name in ("Fangti", "Ming"): + ordering = 0 + elif name in ("Heiti", "Song"): + ordering = 1 + elif name in ("Gothic", "Mincho"): + ordering = 2 + elif name in ("Dotum", "Batang"): + ordering = 3 + else: + ordering = -1 + + fontdict["simple"] = simple + + if name == "ZapfDingbats": + glyphs = zapf_glyphs + elif name == "Symbol": + glyphs = symbol_glyphs + else: + glyphs = None + + fontdict["glyphs"] = glyphs + fontdict["ordering"] = ordering + fontinfo = [xref, fontdict] + doc.FontInfos.append(fontinfo) + else: + fontdict = fontinfo[1] + glyphs = fontdict["glyphs"] + simple = fontdict["simple"] + ordering = fontdict["ordering"] + + if glyphs is None: + oldlimit = 0 + else: + oldlimit = len(glyphs) + + mylimit = max(256, limit) + + if mylimit <= oldlimit: + return glyphs + + if ordering < 0: # not a CJK font + glyphs = doc._get_char_widths( + xref, fontdict["name"], fontdict["ext"], fontdict["ordering"], mylimit, idx + ) + else: # CJK fonts use char codes and width = 1 + glyphs = None + + fontdict["glyphs"] = glyphs + fontinfo[1] = fontdict + UpdateFontInfo(doc, fontinfo) + + return glyphs + + def get_layer(self, config=-1): + """Content of ON, OFF, RBGroups of an OC layer.""" + pdf = _as_pdf_document(self) + ocp = mupdf.pdf_dict_getl( + mupdf.pdf_trailer( pdf), + PDF_NAME('Root'), + PDF_NAME('OCProperties'), + ) + if not ocp.m_internal: + return + if config == -1: + obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D')) + else: + obj = mupdf.pdf_array_get( + mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')), + config, + ) + if not obj.m_internal: + raise ValueError( MSG_BAD_OC_CONFIG) + rc = JM_get_ocg_arrays( obj) + return rc + + def get_layers(self): + """Show optional OC layers.""" + pdf = _as_pdf_document(self) + n = mupdf.pdf_count_layer_configs( pdf) + if n == 1: + obj = mupdf.pdf_dict_getl( + mupdf.pdf_trailer( pdf), + PDF_NAME('Root'), + PDF_NAME('OCProperties'), + PDF_NAME('Configs'), + ) + if not mupdf.pdf_is_array( obj): + n = 0 + rc = [] + info = mupdf.PdfLayerConfig() + for i in range(n): + mupdf.pdf_layer_config_info( pdf, i, info) + item = { + "number": i, + "name": info.name, + "creator": info.creator, + } + rc.append( item) + return rc + + def get_new_xref(self): + """Make new xref.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + xref = 0 + ENSURE_OPERATION(pdf) + xref = mupdf.pdf_create_object(pdf) + return xref + + def get_oc(doc: 'Document', xref: int) -> int: + """Return optional content object xref for an image or form xobject. + + Args: + xref: (int) xref number of an image or form xobject. + """ + if doc.is_closed or doc.is_encrypted: + raise ValueError("document close or encrypted") + t, name = doc.xref_get_key(xref, "Subtype") + if t != "name" or name not in ("/Image", "/Form"): + raise ValueError("bad object type at xref %i" % xref) + t, oc = doc.xref_get_key(xref, "OC") + if t != "xref": + return 0 + rc = int(oc.replace("0 R", "")) + return rc + + def get_ocgs(self): + """Show existing optional content groups.""" + ci = mupdf.pdf_new_name( "CreatorInfo") + pdf = _as_pdf_document(self) + ocgs = mupdf.pdf_dict_getl( + mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')), + PDF_NAME('OCProperties'), + PDF_NAME('OCGs'), + ) + rc = dict() + if not mupdf.pdf_is_array( ocgs): + return rc + n = mupdf.pdf_array_len( ocgs) + for i in range(n): + ocg = mupdf.pdf_array_get( ocgs, i) + xref = mupdf.pdf_to_num( ocg) + name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name'))) + obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype')) + usage = None + if obj.m_internal: + usage = mupdf.pdf_to_name( obj) + intents = list() + intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent')) + if intent.m_internal: + if mupdf.pdf_is_name( intent): + intents.append( mupdf.pdf_to_name( intent)) + elif mupdf.pdf_is_array( intent): + m = mupdf.pdf_array_len( intent) + for j in range(m): + o = mupdf.pdf_array_get( intent, j) + if mupdf.pdf_is_name( o): + intents.append( mupdf.pdf_to_name( o)) + if mupdf_version_tuple >= (1, 26, 11): + resource_stack = mupdf.PdfResourceStack() + hidden = mupdf.pdf_is_ocg_hidden( pdf, resource_stack, usage, ocg) + else: + hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg) + item = { + "name": name, + "intent": intents, + "on": not hidden, + "usage": usage, + } + temp = xref + rc[ temp] = item + return rc + + def get_ocmd(doc: 'Document', xref: int) -> dict: + """Return the definition of an OCMD (optional content membership dictionary). + + Recognizes PDF dict keys /OCGs (PDF array of OCGs), /P (policy string) and + /VE (visibility expression, PDF array). Via string manipulation, this + info is converted to a Python dictionary with keys "xref", "ocgs", "policy" + and "ve" - ready to recycle as input for 'set_ocmd()'. + """ + + if xref not in range(doc.xref_length()): + raise ValueError("bad xref") + text = doc.xref_object(xref, compressed=True) + if "/Type/OCMD" not in text: + raise ValueError("bad object type") + textlen = len(text) + + p0 = text.find("/OCGs[") # look for /OCGs key + p1 = text.find("]", p0) + if p0 < 0 or p1 < 0: # no OCGs found + ocgs = None + else: + ocgs = text[p0 + 6 : p1].replace("0 R", " ").split() + ocgs = list(map(int, ocgs)) + + p0 = text.find("/P/") # look for /P policy key + if p0 < 0: + policy = None + else: + p1 = text.find("ff", p0) + if p1 < 0: + p1 = text.find("on", p0) + if p1 < 0: # some irregular syntax + raise ValueError("bad object at xref") + else: + policy = text[p0 + 3 : p1 + 2] + + p0 = text.find("/VE[") # look for /VE visibility expression key + if p0 < 0: # no visibility expression found + ve = None + else: + lp = rp = 0 # find end of /VE by finding last ']'. + p1 = p0 + while lp < 1 or lp != rp: + p1 += 1 + if not p1 < textlen: # some irregular syntax + raise ValueError("bad object at xref") + if text[p1] == "[": + lp += 1 + if text[p1] == "]": + rp += 1 + # p1 now positioned at the last "]" + ve = text[p0 + 3 : p1 + 1] # the PDF /VE array + ve = ( + ve.replace("/And", '"and",') + .replace("/Not", '"not",') + .replace("/Or", '"or",') + ) + ve = ve.replace(" 0 R]", "]").replace(" 0 R", ",").replace("][", "],[") + import json + try: + ve = json.loads(ve) + except Exception: + exception_info() + message(f"bad /VE key: {ve!r}") + raise + return {"xref": xref, "ocgs": ocgs, "policy": policy, "ve": ve} + + def get_outline_xrefs(self): + """Get list of outline xref numbers.""" + xrefs = [] + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return xrefs + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) + if not root.m_internal: + return xrefs + olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines')) + if not olroot.m_internal: + return xrefs + first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) + if not first.m_internal: + return xrefs + xrefs = JM_outline_xrefs(first, xrefs) + return xrefs + + def get_page_fonts(self, pno: int, full: bool =False) -> list: + """Retrieve a list of fonts used on a page. + """ + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if not self.is_pdf: + return () + if type(pno) is not int: + try: + pno = pno.number + except Exception: + exception_info() + raise ValueError("need a Page or page number") + val = self._getPageInfo(pno, 1) + if not full: + return [v[:-1] for v in val] + return val + + def get_page_images(self, pno: int, full: bool =False) -> list: + """Retrieve a list of images used on a page. + """ + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if not self.is_pdf: + return () + val = self._getPageInfo(pno, 2) + if not full: + return [v[:-1] for v in val] + return val + + def get_page_labels(self): + """Return page label definitions in PDF document. + + Returns: + A list of dictionaries with the following format: + {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}. + """ + # Jorj McKie, 2021-01-10 + return [utils.rule_dict(item) for item in self._get_page_labels()] + + def get_page_numbers(doc, label, only_one=False): + """Return a list of page numbers with the given label. + + Args: + doc: PDF document object (resp. 'self'). + label: (str) label. + only_one: (bool) stop searching after first hit. + Returns: + List of page numbers having this label. + """ + # Jorj McKie, 2021-01-06 + + numbers = [] + if not label: + return numbers + labels = doc._get_page_labels() + if labels == []: + return numbers + for i in range(doc.page_count): + plabel = utils.get_label_pno(i, labels) + if plabel == label: + numbers.append(i) + if only_one: + break + return numbers + + def get_page_pixmap( + doc: 'Document', + pno: int, + *, + matrix: matrix_like = None, + dpi=None, + colorspace: Colorspace = None, + clip: rect_like = None, + alpha: bool = False, + annots: bool = True, + ) -> 'Pixmap': + """Create pixmap of document page by page number. + + Notes: + Convenience function calling page.get_pixmap. + Args: + pno: (int) page number + matrix: pymupdf.Matrix for transformation (default: pymupdf.Identity). + colorspace: (str,pymupdf.Colorspace) rgb, rgb, gray - case ignored, default csRGB. + clip: (irect-like) restrict rendering to this area. + alpha: (bool) include alpha channel + annots: (bool) also render annotations + """ + if matrix is None: + matrix = Identity + if colorspace is None: + colorspace = csRGB + return doc[pno].get_pixmap( + matrix=matrix, + dpi=dpi, colorspace=colorspace, + clip=clip, + alpha=alpha, + annots=annots + ) + + def get_page_text( + doc: 'Document', + pno: int, + option: str = "text", + clip: rect_like = None, + flags: OptInt = None, + textpage: 'TextPage' = None, + sort: bool = False, + ) -> typing.Any: + """Extract a document page's text by page number. + + Notes: + Convenience function calling page.get_text(). + Args: + pno: page number + option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml. + Returns: + output from page.TextPage(). + """ + return doc[pno].get_text(option, clip=clip, flags=flags, sort=sort) + + def get_page_xobjects(self, pno: int) -> list: + """Retrieve a list of XObjects used on a page. + """ + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if not self.is_pdf: + return () + val = self._getPageInfo(pno, 3) + return val + + def get_sigflags(self): + """Get the /SigFlags value.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return -1 # not a PDF + sigflags = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + PDF_NAME('SigFlags'), + ) + sigflag = -1 + if sigflags.m_internal: + sigflag = mupdf.pdf_to_int(sigflags) + return sigflag + + def get_toc( + doc: 'Document', + simple: bool = True, + ) -> list: + """Create a table of contents. + + Args: + simple: a bool to control output. Returns a list, where each entry consists of outline level, title, page number and link destination (if simple = False). For details see PyMuPDF's documentation. + """ + def recurse(olItem, liste, lvl): + """Recursively follow the outline item chain and record item information in a list.""" + while olItem and olItem.this.m_internal: + if olItem.title: + title = olItem.title + else: + title = " " + + if not olItem.is_external: + if olItem.uri: + if olItem.page == -1: + resolve = doc.resolve_link(olItem.uri) + page = resolve[0] + 1 + else: + page = olItem.page + 1 + else: + page = -1 + else: + page = -1 + + if not simple: + link = utils.getLinkDict(olItem, doc) + liste.append([lvl, title, page, link]) + else: + liste.append([lvl, title, page]) + + if olItem.down: + liste = recurse(olItem.down, liste, lvl + 1) + olItem = olItem.next + return liste + + # ensure document is open + if doc.is_closed: + raise ValueError("document closed") + doc.init_doc() + olItem = doc.outline + if not olItem: + return [] + lvl = 1 + liste = [] + toc = recurse(olItem, liste, lvl) + if doc.is_pdf and not simple: + doc._extend_toc_items(toc) + return toc + + def get_xml_metadata(self): + """Get document XML metadata.""" + xml = None + pdf = _as_pdf_document(self, required=0) + if pdf.m_internal: + xml = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('Metadata'), + ) + if xml is not None and xml.m_internal: + buff = mupdf.pdf_load_stream(xml) + rc = JM_UnicodeFromBuffer(buff) + else: + rc = '' + return rc + + def has_annots(doc: 'Document') -> bool: + """Check whether there are annotations on any page.""" + if doc.is_closed: + raise ValueError("document closed") + if not doc.is_pdf: + raise ValueError("is no PDF") + for i in range(doc.page_count): + for item in doc.page_annot_xrefs(i): + # pylint: disable=no-member + if not (item[1] == mupdf.PDF_ANNOT_LINK or item[1] == mupdf.PDF_ANNOT_WIDGET): # pylint: disable=no-member + return True + return False + + def has_links(doc: 'Document') -> bool: + """Check whether there are links on any page.""" + if doc.is_closed: + raise ValueError("document closed") + if not doc.is_pdf: + raise ValueError("is no PDF") + for i in range(doc.page_count): + for item in doc.page_annot_xrefs(i): + if item[1] == mupdf.PDF_ANNOT_LINK: # pylint: disable=no-member + return True + return False + + def init_doc(self): + if self.is_encrypted: + raise ValueError("cannot initialize - document still encrypted") + self._outline = self._loadOutline() + self.metadata = dict( + [ + (k,self._getMetadata(v)) for k,v in { + 'format':'format', + 'title':'info:Title', + 'author':'info:Author', + 'subject':'info:Subject', + 'keywords':'info:Keywords', + 'creator':'info:Creator', + 'producer':'info:Producer', + 'creationDate':'info:CreationDate', + 'modDate':'info:ModDate', + 'trapped':'info:Trapped' + }.items() + ] + ) + self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption') + + def insert_file(self, + infile, + from_page=-1, + to_page=-1, + start_at=-1, + rotate=-1, + links=True, + annots=True, + show_progress=0, + final=1, + ): + ''' + Insert an arbitrary supported document to an existing PDF. + + The infile may be given as a filename, a Document or a Pixmap. Other + parameters - where applicable - equal those of insert_pdf(). + ''' + src = None + if isinstance(infile, Pixmap): + if infile.colorspace.n > 3: + infile = Pixmap(csRGB, infile) + src = Document("png", infile.tobytes()) + elif isinstance(infile, Document): + src = infile + else: + src = Document(infile) + if not src: + raise ValueError("bad infile parameter") + if not src.is_pdf: + pdfbytes = src.convert_to_pdf() + src = Document("pdf", pdfbytes) + return self.insert_pdf( + src, + from_page=from_page, + to_page=to_page, + start_at=start_at, + rotate=rotate, + links=links, + annots=annots, + show_progress=show_progress, + final=final, + ) + + def insert_page( + doc: 'Document', + pno: int, + text: typing.Union[str, list, None] = None, + fontsize: float = 11, + width: float = 595, + height: float = 842, + fontname: str = "helv", + fontfile: OptStr = None, + color: OptSeq = (0,), + ) -> int: + """Create a new PDF page and insert some text. + + Notes: + Function combining pymupdf.Document.new_page() and pymupdf.Page.insert_text(). + For parameter details see these methods. + """ + page = doc.new_page(pno=pno, width=width, height=height) + if not bool(text): + return 0 + rc = page.insert_text( + (50, 72), + text, + fontsize=fontsize, + fontname=fontname, + fontfile=fontfile, + color=color, + ) + return rc + + def insert_pdf( + self, + docsrc, + *, + from_page=-1, + to_page=-1, + start_at=-1, + rotate=-1, + links=1, + annots=1, + widgets=1, + join_duplicates=0, + show_progress=0, + final=1, + _gmap=None, + ): + """Insert a page range from another PDF. + + Args: + docsrc: PDF to copy from. Must be different object, but may be same file. + from_page: (int) first source page to copy, 0-based, default 0. + to_page: (int) last source page to copy, 0-based, default last page. + start_at: (int) from_page will become this page number in target. + rotate: (int) rotate copied pages, default -1 is no change. + links: (int/bool) whether to also copy links. + annots: (int/bool) whether to also copy annotations. + widgets: (int/bool) whether to also copy form fields. + join_duplicates: (int/bool) join or rename duplicate widget names. + show_progress: (int) progress message interval, 0 is no messages. + final: (bool) indicates last insertion from this source PDF. + _gmap: internal use only + + Copy sequence reversed if from_page > to_page.""" + + # Insert pages from a source PDF into this PDF. + # For reconstructing the links (_do_links method), we must save the + # insertion point (start_at) if it was specified as -1. + #log( 'insert_pdf(): start') + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if self._graft_id == docsrc._graft_id: + raise ValueError("source and target cannot be same object") + sa = start_at + if sa < 0: + sa = self.page_count + outCount = self.page_count + srcCount = docsrc.page_count + + # local copies of page numbers + fp = from_page + tp = to_page + sa = start_at + + # normalize page numbers + fp = max(fp, 0) # -1 = first page + fp = min(fp, srcCount - 1) # but do not exceed last page + + if tp < 0: + tp = srcCount - 1 # -1 = last page + tp = min(tp, srcCount - 1) # but do not exceed last page + + if sa < 0: + sa = outCount # -1 = behind last page + sa = min(sa, outCount) # but that is also the limit + + if len(docsrc) > show_progress > 0: + inname = os.path.basename(docsrc.name) + if not inname: + inname = "memory PDF" + outname = os.path.basename(self.name) + if not outname: + outname = "memory PDF" + message("Inserting '%s' at '%s'" % (inname, outname)) + + # retrieve / make a Graftmap to avoid duplicate objects + #log( 'insert_pdf(): Graftmaps') + isrt = docsrc._graft_id + _gmap = self.Graftmaps.get(isrt, None) + if _gmap is None: + #log( 'insert_pdf(): Graftmaps2') + _gmap = Graftmap(self) + self.Graftmaps[isrt] = _gmap + + if g_use_extra: + #log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()') + extra_FzDocument_insert_pdf( + self.this, + docsrc.this, + from_page, + to_page, + start_at, + rotate, + links, + annots, + show_progress, + final, + _gmap, + ) + #log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.') + else: + pdfout = _as_pdf_document(self) + pdfsrc = _as_pdf_document(docsrc) + + if not pdfout.m_internal or not pdfsrc.m_internal: + raise TypeError( "source or target not a PDF") + ENSURE_OPERATION(pdfout) + JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap) + + #log( 'insert_pdf(): calling self._reset_page_refs()') + self._reset_page_refs() + if links: + #log( 'insert_pdf(): calling self._do_links()') + self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa) + if widgets: + self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates) + if final == 1: + self.Graftmaps[isrt] = None + #log( 'insert_pdf(): returning') + + @property + def is_dirty(self): + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return False + r = mupdf.pdf_has_unsaved_changes(pdf) + return True if r else False + + @property + def is_fast_webaccess(self): + ''' + Check whether we have a linearized PDF. + ''' + pdf = _as_pdf_document(self, required=0) + if pdf.m_internal: + return mupdf.pdf_doc_was_linearized(pdf) + return False # gracefully handle non-PDF + + @property + def is_form_pdf(self): + """Either False or PDF field count.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return False + count = -1 + try: + fields = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + mupdf.PDF_ENUM_NAME_Root, + mupdf.PDF_ENUM_NAME_AcroForm, + mupdf.PDF_ENUM_NAME_Fields, + ) + if mupdf.pdf_is_array(fields): + count = mupdf.pdf_array_len(fields) + except Exception: + if g_exceptions_verbose: exception_info() + return False + if count >= 0: + return count + return False + + @property + def is_pdf(self): + """Check for PDF.""" + if isinstance(self.this, mupdf.PdfDocument): + return True + # Avoid calling smupdf.pdf_specifics because it will end up creating + # a new PdfDocument which will call pdf_create_document(), which is ok + # but a little unnecessary. + # + if mupdf.ll_pdf_specifics(self.this.m_internal): + ret = True + else: + ret = False + return ret + + @property + def is_reflowable(self): + """Check if document is layoutable.""" + if self.is_closed: + raise ValueError("document closed") + return bool(mupdf.fz_is_document_reflowable(self)) + + @property + def is_repaired(self): + """Check whether PDF was repaired.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return False + r = mupdf.pdf_was_repaired(pdf) + if r: + return True + return False + + def journal_can_do(self): + """Show if undo and / or redo are possible.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + undo=0 + redo=0 + pdf = _as_pdf_document(self) + undo = mupdf.pdf_can_undo(pdf) + redo = mupdf.pdf_can_redo(pdf) + return {'undo': bool(undo), 'redo': bool(redo)} + + def journal_enable(self): + """Activate document journalling.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + mupdf.pdf_enable_journal(pdf) + + def journal_is_enabled(self): + """Check if journalling is enabled.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + enabled = pdf.m_internal and pdf.m_internal.journal + return enabled + + def journal_load(self, filename): + """Load a journal from a file.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + if isinstance(filename, str): + mupdf.pdf_load_journal(pdf, filename) + else: + res = JM_BufferFromBytes(filename) + stm = mupdf.fz_open_buffer(res) + mupdf.pdf_deserialise_journal(pdf, stm) + if not pdf.m_internal.journal: + RAISEPY( "Journal and document do not match", JM_Exc_FileDataError) + + def journal_op_name(self, step): + """Show operation name for given step.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + name = mupdf.pdf_undoredo_step(pdf, step) + return name + + def journal_position(self): + """Show journalling state.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + steps=0 + pdf = _as_pdf_document(self) + rc, steps = mupdf.pdf_undoredo_state(pdf) + return rc, steps + + def journal_redo(self): + """Move forward in the journal.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + mupdf.pdf_redo(pdf) + return True + + def journal_save(self, filename): + """Save journal to a file.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + if isinstance(filename, str): + mupdf.pdf_save_journal(pdf, filename) + else: + out = JM_new_output_fileptr(filename) + mupdf.pdf_write_journal(pdf, out) + out.fz_close_output() + + def journal_start_op(self, name=None): + """Begin a journalling operation.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + if not pdf.m_internal.journal: + raise RuntimeError( "Journalling not enabled") + if name: + mupdf.pdf_begin_operation(pdf, name) + else: + mupdf.pdf_begin_implicit_operation(pdf) + + def journal_stop_op(self): + """End a journalling operation.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + mupdf.pdf_end_operation(pdf) + + def journal_undo(self): + """Move backwards in the journal.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + mupdf.pdf_undo(pdf) + return True + + @property + def language(self): + """Document language.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return + lang = mupdf.pdf_document_language(pdf) + if lang == mupdf.FZ_LANG_UNSET: + return + return mupdf.fz_string_from_text_language2(lang) + + @property + def last_location(self): + """Id (chapter, page) of last page.""" + if self.is_closed: + raise ValueError("document closed") + last_loc = mupdf.fz_last_page(self.this) + return last_loc.chapter, last_loc.page + + def layer_ui_configs(self): + """Show OC visibility status modifiable by user.""" + pdf = _as_pdf_document(self) + info = mupdf.PdfLayerConfigUi() + n = mupdf.pdf_count_layer_config_ui( pdf) + rc = [] + for i in range(n): + mupdf.pdf_layer_config_ui_info( pdf, i, info) + if info.type == 1: + type_ = "checkbox" + elif info.type == 2: + type_ = "radiobox" + else: + type_ = "label" + item = { + "number": i, + "text": info.text, + "depth": info.depth, + "type": type_, + "on": info.selected, + "locked": info.locked, + } + rc.append(item) + return rc + + def layout(self, rect=None, width=0, height=0, fontsize=11): + """Re-layout a reflowable document.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + doc = self.this + if not mupdf.fz_is_document_reflowable( doc): + return + w = width + h = height + r = JM_rect_from_py(rect) + if not mupdf.fz_is_infinite_rect(r): + w = r.x1 - r.x0 + h = r.y1 - r.y0 + if w <= 0.0 or h <= 0.0: + raise ValueError( "bad page size") + mupdf.fz_layout_document( doc, w, h, fontsize) + + self._reset_page_refs() + self.init_doc() + + def load_page(self, page_id): + """Load a page. + + 'page_id' is either a 0-based page number or a tuple (chapter, pno), + with chapter number and page number within that chapter. + """ + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if page_id is None: + page_id = 0 + if page_id not in self: + raise ValueError("page not in document") + if type(page_id) is int and page_id < 0: + np = self.page_count + while page_id < 0: + page_id += np + if isinstance(page_id, int): + page = mupdf.fz_load_page(self.this, page_id) + else: + chapter, pagenum = page_id + page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum) + val = Page(page, self) + + val.thisown = True + val.parent = self + self._page_refs[id(val)] = val + val._annot_refs = weakref.WeakValueDictionary() + val.number = page_id + return val + + def location_from_page_number(self, pno): + """Convert pno to (chapter, page).""" + if self.is_closed: + raise ValueError("document closed") + this_doc = self.this + loc = mupdf.fz_make_location(-1, -1) + page_count = mupdf.fz_count_pages(this_doc) + while pno < 0: + pno += page_count + if pno >= page_count: + raise ValueError( MSG_BAD_PAGENO) + loc = mupdf.fz_location_from_page_number(this_doc, pno) + return loc.chapter, loc.page + + def make_bookmark(self, loc): + """Make a page pointer before layouting document.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + loc = mupdf.FzLocation(*loc) + mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal()) + return mark + + @property + def markinfo(self) -> dict: + """Return the PDF MarkInfo value.""" + xref = self.pdf_catalog() + if xref == 0: + return None + rc = self.xref_get_key(xref, "MarkInfo") + if rc[0] == "null": + return {} + if rc[0] == "xref": + xref = int(rc[1].split()[0]) + val = self.xref_object(xref, compressed=True) + elif rc[0] == "dict": + val = rc[1] + else: + val = None + if val is None or not (val[:2] == "<<" and val[-2:] == ">>"): + return {} + valid = {"Marked": False, "UserProperties": False, "Suspects": False} + val = val[2:-2].split("/") + for v in val[1:]: + try: + key, value = v.split() + except Exception: + if g_exceptions_verbose > 1: exception_info() + return valid + if value == "true": + valid[key] = True + return valid + + def move_page(self, pno: int, to: int =-1): + """Move a page within a PDF document. + + Args: + pno: source page number. + to: put before this page, '-1' means after last page. + """ + if self.is_closed: + raise ValueError("document closed") + page_count = len(self) + if (pno not in range(page_count) or to not in range(-1, page_count)): + raise ValueError("bad page number(s)") + before = 1 + copy = 0 + if to == -1: + to = page_count - 1 + before = 0 + + return self._move_copy_page(pno, to, before, copy) + + @property + def name(self): + return self._name + + def need_appearances(self, value=None): + """Get/set the NeedAppearances value.""" + if not self.is_form_pdf: + return None + + pdf = _as_pdf_document(self) + oldval = -1 + appkey = "NeedAppearances" + + form = mupdf.pdf_dict_getp( + mupdf.pdf_trailer(pdf), + "Root/AcroForm", + ) + app = mupdf.pdf_dict_gets(form, appkey) + if mupdf.pdf_is_bool(app): + oldval = mupdf.pdf_to_bool(app) + if value: + mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE) + else: + mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE) + if value is None: + return oldval >= 0 + return value + + @property + def needs_pass(self): + """Indicate password required.""" + if self.is_closed: + raise ValueError("document closed") + document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super() + ret = mupdf.fz_needs_password( document) + return ret + + def new_page( + doc: 'Document', + pno: int = -1, + width: float = 595, + height: float = 842, + ) -> Page: + """Create and return a new page object. + + Args: + pno: (int) insert before this page. Default: after last page. + width: (float) page width in points. Default: 595 (ISO A4 width). + height: (float) page height in points. Default 842 (ISO A4 height). + Returns: + A pymupdf.Page object. + """ + doc._newPage(pno, width=width, height=height) + return doc[pno] + + def next_location(self, page_id): + """Get (chapter, page) of next page.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if type(page_id) is int: + page_id = (0, page_id) + if page_id not in self: + raise ValueError("page id not in document") + if tuple(page_id) == self.last_location: + return () + this_doc = _as_fz_document(self) + val = page_id[ 0] + if not isinstance(val, int): + RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError) + chapter = val + val = page_id[ 1] + pno = val + loc = mupdf.fz_make_location(chapter, pno) + next_loc = mupdf.fz_next_page( this_doc, loc) + return next_loc.chapter, next_loc.page + + def page_annot_xrefs(self, n): + if g_use_extra: + return extra.page_annot_xrefs( self.this, n) + + if isinstance(self.this, mupdf.PdfDocument): + page_count = mupdf.pdf_count_pages(self.this) + pdf_document = self.this + else: + page_count = mupdf.fz_count_pages(self.this) + pdf_document = _as_pdf_document(self) + while n < 0: + n += page_count + if n > page_count: + raise ValueError( MSG_BAD_PAGENO) + page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n) + annots = JM_get_annot_xref_list(page_obj) + return annots + + @property + def page_count(self): + """Number of pages.""" + if self.is_closed: + raise ValueError('document closed') + if g_use_extra: + return self.page_count2(self) + if isinstance( self.this, mupdf.FzDocument): + return mupdf.fz_count_pages( self.this) + else: + return mupdf.pdf_count_pages( self.this) + + def page_cropbox(self, pno): + """Get CropBox of page number (without loading page).""" + if self.is_closed: + raise ValueError("document closed") + this_doc = self.this + page_count = mupdf.fz_count_pages( this_doc) + n = pno + while n < 0: + n += page_count + pdf = _as_pdf_document(self) + if n >= page_count: + raise ValueError( MSG_BAD_PAGENO) + pageref = mupdf.pdf_lookup_page_obj( pdf, n) + cropbox = JM_cropbox(pageref) + val = JM_py_from_rect(cropbox) + + val = Rect(val) + + return val + + def page_number_from_location(self, page_id): + """Convert (chapter, pno) to page number.""" + if type(page_id) is int: + np = self.page_count + while page_id < 0: + page_id += np + page_id = (0, page_id) + if page_id not in self: + raise ValueError("page id not in document") + chapter, pno = page_id + loc = mupdf.fz_make_location( chapter, pno) + page_n = mupdf.fz_page_number_from_location( self.this, loc) + return page_n + + def page_xref(self, pno): + """Get xref of page number.""" + if g_use_extra: + return extra.page_xref( self.this, pno) + if self.is_closed: + raise ValueError("document closed") + page_count = mupdf.fz_count_pages(self.this) + n = pno + while n < 0: + n += page_count + pdf = _as_pdf_document(self) + xref = 0 + if n >= page_count: + raise ValueError( MSG_BAD_PAGENO) + xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n)) + return xref + + @property + def pagelayout(self) -> str: + """Return the PDF PageLayout value. + """ + xref = self.pdf_catalog() + if xref == 0: + return None + rc = self.xref_get_key(xref, "PageLayout") + if rc[0] == "null": + return "SinglePage" + if rc[0] == "name": + return rc[1][1:] + return "SinglePage" + + @property + def pagemode(self) -> str: + """Return the PDF PageMode value. + """ + xref = self.pdf_catalog() + if xref == 0: + return None + rc = self.xref_get_key(xref, "PageMode") + if rc[0] == "null": + return "UseNone" + if rc[0] == "name": + return rc[1][1:] + return "UseNone" + + if sys.implementation.version < (3, 9): + # Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`. + _pages_ret = collections.abc.Iterable + else: + _pages_ret = collections.abc.Iterable[Page] + + def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret: + """Return a generator iterator over a page range. + + Arguments have the same meaning as for the range() built-in. + """ + if not self.page_count: + return + # set the start value + start = start or 0 + while start < 0: + start += self.page_count + if start not in range(self.page_count): + raise ValueError("bad start page number") + + # set the stop value + stop = stop if stop is not None and stop <= self.page_count else self.page_count + + # set the step value + if step == 0: + raise ValueError("arg 3 must not be zero") + if step is None: + if start > stop: + step = -1 + else: + step = 1 + + for pno in range(start, stop, step): + yield (self.load_page(pno)) + + def pdf_catalog(self): + """Get xref of PDF catalog.""" + pdf = _as_pdf_document(self, required=0) + xref = 0 + if not pdf.m_internal: + return xref + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) + xref = mupdf.pdf_to_num(root) + return xref + + def pdf_trailer(self, compressed=0, ascii=0): + """Get PDF trailer as a string.""" + return self.xref_object(-1, compressed=compressed, ascii=ascii) + + @property + def permissions(self): + """Document permissions.""" + if self.is_encrypted: + return 0 + doc =self.this + pdf = mupdf.pdf_document_from_fz_document(doc) + + # for PDF return result of standard function + if pdf.m_internal: + return mupdf.pdf_document_permissions(pdf) + + # otherwise simulate the PDF return value + perm = 0xFFFFFFFC # all permissions granted + # now switch off where needed + if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT): + perm = perm ^ mupdf.PDF_PERM_PRINT + if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT): + perm = perm ^ mupdf.PDF_PERM_MODIFY + if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY): + perm = perm ^ mupdf.PDF_PERM_COPY + if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE): + perm = perm ^ mupdf.PDF_PERM_ANNOTATE + return perm + + def prev_location(self, page_id): + + """Get (chapter, page) of previous page.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if type(page_id) is int: + page_id = (0, page_id) + if page_id not in self: + raise ValueError("page id not in document") + if page_id == (0, 0): + return () + chapter, pno = page_id + loc = mupdf.fz_make_location(chapter, pno) + prev_loc = mupdf.fz_previous_page(self.this, loc) + return prev_loc.chapter, prev_loc.page + + def reload_page(self, page: Page) -> Page: + """Make a fresh copy of a page.""" + old_annots = {} # copy annot references to here + pno = page.number # save the page number + for k, v in page._annot_refs.items(): # save the annot dictionary + old_annots[k] = v + + # When we call `self.load_page()` below, it will end up in + # fz_load_chapter_page(), which will return any matching page in the + # document's list of non-ref-counted loaded pages, instead of actually + # reloading the page. + # + # We want to assert that we have actually reloaded the fz_page, and not + # simply returned the same `fz_page*` pointer from the document's list + # of non-ref-counted loaded pages. + # + # So we first remove our reference to the `fz_page*`. This will + # decrement .refs, and if .refs was 1, this is guaranteed to free the + # `fz_page*` and remove it from the document's list if it was there. So + # we are guaranteed that our returned `fz_page*` is from a genuine + # reload, even if it happens to reuse the original block of memory. + # + # However if the original .refs is greater than one, there must be + # other references to the `fz_page` somewhere, and we require that + # these other references are not keeping the page in the document's + # list. We check that we are returning a newly loaded page by + # asserting that our returned `fz_page*` is different from the original + # `fz_page*` - the original was not freed, so a new `fz_page` cannot + # reuse the same block of memory. + # + + refs_old = page.this.m_internal.refs + m_internal_old = page.this.m_internal_value() + + page.this = None + page._erase() # remove the page + page = None + TOOLS.store_shrink(100) + page = self.load_page(pno) # reload the page + + # copy annot refs over to the new dictionary + #page_proxy = weakref.proxy(page) + for k, v in old_annots.items(): + annot = old_annots[k] + #annot.parent = page_proxy # refresh parent to new page + page._annot_refs[k] = annot + if refs_old == 1: + # We know that `page.this = None` will have decremented the ref + # count to zero so we are guaranteed that the new `fz_page` is a + # new page even if it happens to have reused the same block of + # memory. + pass + else: + # Check that the new `fz_page*` is different from the original. + m_internal_new = page.this.m_internal_value() + assert m_internal_new != m_internal_old, \ + f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}' + return page + + def resolve_link(self, uri=None, chapters=0): + """Calculate internal link destination. + + Args: + uri: (str) some Link.uri + chapters: (bool) whether to use (chapter, page) format + Returns: + (page_id, x, y) where x, y are point coordinates on the page. + page_id is either page number (if chapters=0), or (chapter, pno). + """ + if not uri: + if chapters: + return (-1, -1), 0, 0 + return -1, 0, 0 + try: + loc, xp, yp = mupdf.fz_resolve_link(self.this, uri) + except Exception: + if g_exceptions_verbose: exception_info() + if chapters: + return (-1, -1), 0, 0 + return -1, 0, 0 + if chapters: + return (loc.chapter, loc.page), xp, yp + pno = mupdf.fz_page_number_from_location(self.this, loc) + return pno, xp, yp + + def rewrite_images( + self, + dpi_threshold=None, + dpi_target=0, + quality=0, + lossy=True, + lossless=True, + bitonal=True, + color=True, + gray=True, + set_to_gray=False, + options=None, + ): + """Rewrite images in a PDF document. + + The typical use case is to reduce the size of the PDF by recompressing + images. Default parameters will convert all images to JPEG where + possible, using the specified resolutions and quality. Exclude + undesired images by setting parameters to False. + Args: + dpi_threshold: look at images with a larger DPI only. + dpi_target: change eligible images to this DPI. + quality: Quality of the recompressed images (0-100). + lossy: process lossy image types (e.g. JPEG). + lossless: process lossless image types (e.g. PNG). + bitonal: process black-and-white images (e.g. FAX) + color: process colored images. + gray: process gray images. + set_to_gray: whether to change the PDF to gray at process start. + options: (PdfImageRewriterOptions) Custom options for image + rewriting (optional). Expert use only. If provided, other + parameters are ignored, except set_to_gray. + """ + quality_str = str(quality) + if not dpi_threshold: + dpi_threshold = dpi_target = 0 + if dpi_target > 0 and dpi_target >= dpi_threshold: + raise ValueError("{dpi_target=} must be less than {dpi_threshold=}") + template_opts = mupdf.PdfImageRewriterOptions() + dir1 = set(dir(template_opts)) # for checking that only existing options are set + if not options: + opts = mupdf.PdfImageRewriterOptions() + if bitonal: + opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX + opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE + opts.bitonal_image_subsample_to = dpi_target + opts.bitonal_image_recompress_quality = quality_str + opts.bitonal_image_subsample_threshold = dpi_threshold + if color: + if lossless: + opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG + opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE + opts.color_lossless_image_subsample_to = dpi_target + opts.color_lossless_image_subsample_threshold = dpi_threshold + opts.color_lossless_image_recompress_quality = quality_str + if lossy: + opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG + opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE + opts.color_lossy_image_subsample_threshold = dpi_threshold + opts.color_lossy_image_subsample_to = dpi_target + opts.color_lossy_image_recompress_quality = quality_str + if gray: + if lossless: + opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG + opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE + opts.gray_lossless_image_subsample_to = dpi_target + opts.gray_lossless_image_subsample_threshold = dpi_threshold + opts.gray_lossless_image_recompress_quality = quality_str + if lossy: + opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG + opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE + opts.gray_lossy_image_subsample_threshold = dpi_threshold + opts.gray_lossy_image_subsample_to = dpi_target + opts.gray_lossy_image_recompress_quality = quality_str + else: + opts = options + + dir2 = set(dir(opts)) # checking that only possible options were used + invalid_options = dir2 - dir1 + if invalid_options: + raise ValueError(f"Invalid options: {invalid_options}") + + if set_to_gray: + self.recolor(1) + pdf = _as_pdf_document(self) + mupdf.pdf_rewrite_images(pdf, opts) + + def recolor(self, components=1): + """Change the color component count on all pages. + + Args: + components: (int) desired color component count, one of 1, 3, 4. + + Invokes the same-named method for all pages. + """ + if not self.is_pdf: + raise ValueError("is no PDF") + for i in range(self.page_count): + self.load_page(i).recolor(components) + + def resolve_names(self): + """Convert the PDF's destination names into a Python dict. + + The only parameter is the pymupdf.Document. + All names found in the catalog under keys "/Dests" and "/Names/Dests" are + being included. + + Returns: + A dcitionary with the following layout: + - key: (str) the name + - value: (dict) with the following layout: + * "page": target page number (0-based). If no page number found -1. + * "to": (x, y) target point on page - currently in PDF coordinates, + i.e. point (0,0) is the bottom-left of the page. + * "zoom": (float) the zoom factor + * "dest": (str) only occurs if the target location on the page has + not been provided as "/XYZ" or if no page number was found. + Examples: + {'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0}, + '__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}} + + or + + '21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ... + """ + if hasattr(self, "_resolved_names"): # do not execute multiple times! + return self._resolved_names + # this is a backward listing of page xref to page number + page_xrefs = {self.page_xref(i): i for i in range(self.page_count)} + + def obj_string(obj): + """Return string version of a PDF object definition.""" + buffer = mupdf.fz_new_buffer(512) + output = mupdf.FzOutput(buffer) + mupdf.pdf_print_obj(output, obj, 1, 0) + output.fz_close_output() + return JM_UnicodeFromBuffer(buffer) + + def get_array(val): + """Generate value of one item of the names dictionary.""" + templ_dict = {"page": -1, "dest": ""} # value template + if val.pdf_is_indirect(): + val = mupdf.pdf_resolve_indirect(val) + if val.pdf_is_array(): + array = obj_string(val) + elif val.pdf_is_dict(): + array = obj_string(mupdf.pdf_dict_gets(val, "D")) + else: # if all fails return the empty template + return templ_dict + + # replace PDF "null" by zero, omit the square brackets + array = array.replace("null", "0")[1:-1] + + # find stuff before first "/" + idx = array.find("/") + if idx < 1: # this has no target page spec + templ_dict["dest"] = array # return the orig. string + return templ_dict + + subval = array[:idx].strip() # stuff before "/" + array = array[idx:] # stuff from "/" onwards + templ_dict["dest"] = array + # if we start with /XYZ: extract x, y, zoom + # 1, 2 or 3 of these values may actually be supplied + if array.startswith("/XYZ"): + del templ_dict["dest"] # don't return orig string in this case + + # make a list of the 3 tokens following "/XYZ" + array_list = array.split()[1:4] # omit "/XYZ" + + # fill up missing tokens with "0" strings + while len(array_list) < 3: # fill up if too short + array_list.append("0") # add missing values + + # make list of 3 floats: x, y and zoom + t = list(map(float, array_list)) # the resulting x, y, z values + templ_dict["to"] = (t[0], t[1]) + templ_dict["zoom"] = t[2] + + # extract page number + if subval.endswith("0 R"): # page xref given? + templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1) + else: # naked page number given + templ_dict["page"] = int(subval) + return templ_dict + + def fill_dict(dest_dict, pdf_dict): + """Generate name resolution items for pdf_dict. + + This may be either "/Names/Dests" or just "/Dests" + """ + # length of the PDF dictionary + name_count = mupdf.pdf_dict_len(pdf_dict) + + # extract key-val of each dict item + for i in range(name_count): + key = mupdf.pdf_dict_get_key(pdf_dict, i) + val = mupdf.pdf_dict_get_val(pdf_dict, i) + if key.pdf_is_name(): # this should always be true! + dict_key = key.pdf_to_name() + else: + message(f"key {i} is no /Name") + dict_key = None + + if dict_key: + dest_dict[dict_key] = get_array(val) # store key/value in dict + + # access underlying PDF document of fz Document + pdf = mupdf.pdf_document_from_fz_document(self) + + # access PDF catalog + catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root") + + dest_dict = {} + + # make PDF_NAME(Dests) + dests = mupdf.pdf_new_name("Dests") + + # extract destinations old style (PDF 1.1) + old_dests = mupdf.pdf_dict_get(catalog, dests) + if old_dests.pdf_is_dict(): + fill_dict(dest_dict, old_dests) + + # extract destinations new style (PDF 1.2+) + tree = mupdf.pdf_load_name_tree(pdf, dests) + if tree.pdf_is_dict(): + fill_dict(dest_dict, tree) + + self._resolved_names = dest_dict # store result or reuse + return dest_dict + + def save( + self, + filename, + garbage=0, + clean=0, + deflate=0, + deflate_images=0, + deflate_fonts=0, + incremental=0, + ascii=0, + expand=0, + linear=0, + no_new_id=0, + appearance=0, + pretty=0, + encryption=1, + permissions=4095, + owner_pw=None, + user_pw=None, + preserve_metadata=1, + use_objstms=0, + compression_effort=0, + ): + # From %pythonprepend save + # + """Save PDF to file, pathlib.Path or file pointer.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if type(filename) is str: + pass + elif hasattr(filename, "open"): # assume: pathlib.Path + filename = str(filename) + elif hasattr(filename, "name"): # assume: file object + filename = filename.name + elif not hasattr(filename, "seek"): # assume file object + raise ValueError("filename must be str, Path or file object") + if filename == self.name and not incremental: + raise ValueError("save to original must be incremental") + if linear and use_objstms: + raise ValueError("'linear' and 'use_objstms' cannot both be requested") + if self.page_count < 1: + raise ValueError("cannot save with zero pages") + if incremental: + if self.name != filename or self.stream: + raise ValueError("incremental needs original file") + if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40: + raise ValueError("password length must not exceed 40") + + pdf = _as_pdf_document(self) + opts = mupdf.PdfWriteOptions() + opts.do_incremental = incremental + opts.do_ascii = ascii + opts.do_compress = deflate + opts.do_compress_images = deflate_images + opts.do_compress_fonts = deflate_fonts + opts.do_decompress = expand + opts.do_garbage = garbage + opts.do_pretty = pretty + opts.do_linear = linear + opts.do_clean = clean + opts.do_sanitize = clean + opts.dont_regenerate_id = no_new_id + opts.do_appearance = appearance + opts.do_encrypt = encryption + opts.permissions = permissions + if owner_pw is not None: + opts.opwd_utf8_set_value(owner_pw) + elif user_pw is not None: + opts.opwd_utf8_set_value(user_pw) + if user_pw is not None: + opts.upwd_utf8_set_value(user_pw) + opts.do_preserve_metadata = preserve_metadata + opts.do_use_objstms = use_objstms + opts.compression_effort = compression_effort + + out = None + pdf.m_internal.resynth_required = 0 + JM_embedded_clean(pdf) + if no_new_id == 0: + JM_ensure_identity(pdf) + if isinstance(filename, str): + #log( 'calling mupdf.pdf_save_document()') + mupdf.pdf_save_document(pdf, filename, opts) + else: + out = JM_new_output_fileptr(filename) + #log( f'{type(out)=} {type(out.this)=}') + mupdf.pdf_write_document(pdf, out, opts) + out.fz_close_output() + + def save_snapshot(self, filename): + """Save a file snapshot suitable for journalling.""" + if self.is_closed: + raise ValueError("doc is closed") + if type(filename) is str: + pass + elif hasattr(filename, "open"): # assume: pathlib.Path + filename = str(filename) + elif hasattr(filename, "name"): # assume: file object + filename = filename.name + else: + raise ValueError("filename must be str, Path or file object") + if filename == self.name: + raise ValueError("cannot snapshot to original") + pdf = _as_pdf_document(self) + mupdf.pdf_save_snapshot(pdf, filename) + + def saveIncr(self): + """ Save PDF incrementally""" + return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP) + + # ------------------------------------------------------------------------------ + # Remove potentially sensitive data from a PDF. Similar to the Adobe + # Acrobat 'sanitize' function + # ------------------------------------------------------------------------------ + def scrub( + doc: 'Document', + attached_files: bool = True, + clean_pages: bool = True, + embedded_files: bool = True, + hidden_text: bool = True, + javascript: bool = True, + metadata: bool = True, + redactions: bool = True, + redact_images: int = 0, + remove_links: bool = True, + reset_fields: bool = True, + reset_responses: bool = True, + thumbnails: bool = True, + xml_metadata: bool = True, + ) -> None: + + def remove_hidden(cont_lines): + """Remove hidden text from a PDF page. + + Args: + cont_lines: list of lines with /Contents content. Should have status + from after page.cleanContents(). + + Returns: + List of /Contents lines from which hidden text has been removed. + + Notes: + The input must have been created after the page's /Contents object(s) + have been cleaned with page.cleanContents(). This ensures a standard + formatting: one command per line, single spaces between operators. + This allows for drastic simplification of this code. + """ + out_lines = [] # will return this + in_text = False # indicate if within BT/ET object + suppress = False # indicate text suppression active + make_return = False + for line in cont_lines: + if line == b"BT": # start of text object + in_text = True # switch on + out_lines.append(line) # output it + continue + if line == b"ET": # end of text object + in_text = False # switch off + out_lines.append(line) # output it + continue + if line == b"3 Tr": # text suppression operator + suppress = True # switch on + make_return = True + continue + if line[-2:] == b"Tr" and line[0] != b"3": + suppress = False # text rendering changed + out_lines.append(line) + continue + if line == b"Q": # unstack command also switches off + suppress = False + out_lines.append(line) + continue + if suppress and in_text: # suppress hidden lines + continue + out_lines.append(line) + if make_return: + return out_lines + else: + return None + + if not doc.is_pdf: # only works for PDF + raise ValueError("is no PDF") + if doc.is_encrypted or doc.is_closed: + raise ValueError("closed or encrypted doc") + + if not clean_pages: + hidden_text = False + redactions = False + + if metadata: + doc.set_metadata({}) # remove standard metadata + + for page in doc: + if reset_fields: + # reset form fields (widgets) + for widget in page.widgets(): + widget.reset() + + if remove_links: + links = page.get_links() # list of all links on page + for link in links: # remove all links + page.delete_link(link) + + found_redacts = False + for annot in page.annots(): + if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files: + annot.update_file(buffer_=b" ") # set file content to empty + if reset_responses: + annot.delete_responses() + if annot.type[0] == mupdf.PDF_ANNOT_REDACT: # pylint: disable=no-member + found_redacts = True + + if redactions and found_redacts: + page.apply_redactions(images=redact_images) + + if not (clean_pages or hidden_text): + continue # done with the page + + page.clean_contents() + if not page.get_contents(): + continue + if hidden_text: + xrefs = page.get_contents() + assert len(xrefs) == 1 # only one because of cleaning. + xref = xrefs[0] + cont = doc.xref_stream(xref) + cont_lines = remove_hidden(cont.splitlines()) # remove hidden text + if cont_lines: # something was actually removed + cont = b"\n".join(cont_lines) + doc.update_stream(xref, cont) # rewrite the page /Contents + + if thumbnails: # remove page thumbnails? + if doc.xref_get_key(page.xref, "Thumb")[0] != "null": + doc.xref_set_key(page.xref, "Thumb", "null") + + # pages are scrubbed, now perform document-wide scrubbing + # remove embedded files + if embedded_files: + for name in doc.embfile_names(): + doc.embfile_del(name) + + if xml_metadata: + doc.del_xml_metadata() + if not (xml_metadata or javascript): + xref_limit = 0 + else: + xref_limit = doc.xref_length() + for xref in range(1, xref_limit): + if not doc.xref_object(xref): + msg = "bad xref %i - clean PDF before scrubbing" % xref + raise ValueError(msg) + if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript": + # a /JavaScript action object + obj = "<>" # replace with a null JavaScript + doc.update_object(xref, obj) # update this object + continue # no further handling + + if not xml_metadata: + continue + + if doc.xref_get_key(xref, "Type")[1] == "/Metadata": + # delete any metadata object directly + doc.update_object(xref, "<<>>") + doc.update_stream(xref, b"deleted", new=True) + continue + + if doc.xref_get_key(xref, "Metadata")[0] != "null": + doc.xref_set_key(xref, "Metadata", "null") + + def search_page_for( + doc: 'Document', + pno: int, + text: str, + quads: bool = False, + clip: rect_like = None, + flags: int = None, + textpage: 'TextPage' = None, + ) -> list: + """Search for a string on a page. + + Args: + pno: page number + text: string to be searched for + clip: restrict search to this rectangle + quads: (bool) return quads instead of rectangles + flags: bit switches, default: join hyphened words + textpage: reuse a prepared textpage + Returns: + a list of rectangles or quads, each containing an occurrence. + """ + if flags is None: + flags = (0 + | TEXT_DEHYPHENATE + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + ) + return doc[pno].search_for( + text, + quads=quads, + clip=clip, + flags=flags, + textpage=textpage, + ) + + def select(self, pyliste): + """Build sub-pdf with page numbers in the list.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if not self.is_pdf: + raise ValueError("is no PDF") + if not hasattr(pyliste, "__getitem__"): + raise ValueError("sequence required") + + valid_range = range(len(self)) + if (len(pyliste) == 0 + or min(pyliste) not in valid_range + or max(pyliste) not in valid_range + ): + raise ValueError("bad page number(s)") + + # get underlying pdf document, + pdf = _as_pdf_document(self) + # create page sub-pdf via pdf_rearrange_pages2(). + # + if mupdf_version_tuple >= (1, 25, 3): + # We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree + # which, for example, breaks test_3705. + mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP) + else: + mupdf.pdf_rearrange_pages2(pdf, pyliste) + + # remove any existing pages with their kids + self._reset_page_refs() + + def set_language(self, language=None): + pdf = _as_pdf_document(self) + if not language: + lang = mupdf.FZ_LANG_UNSET + else: + lang = mupdf.fz_text_language_from_string(language) + mupdf.pdf_set_document_language(pdf, lang) + return True + + def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None): + """Set the PDF keys /ON, /OFF, /RBGroups of an OC layer.""" + if self.is_closed: + raise ValueError("document closed") + ocgs = set(self.get_ocgs().keys()) + if ocgs == set(): + raise ValueError("document has no optional content") + + if on: + if type(on) not in (list, tuple): + raise ValueError("bad type: 'on'") + s = set(on).difference(ocgs) + if s != set(): + raise ValueError("bad OCGs in 'on': %s" % s) + + if off: + if type(off) not in (list, tuple): + raise ValueError("bad type: 'off'") + s = set(off).difference(ocgs) + if s != set(): + raise ValueError("bad OCGs in 'off': %s" % s) + + if locked: + if type(locked) not in (list, tuple): + raise ValueError("bad type: 'locked'") + s = set(locked).difference(ocgs) + if s != set(): + raise ValueError("bad OCGs in 'locked': %s" % s) + + if rbgroups: + if type(rbgroups) not in (list, tuple): + raise ValueError("bad type: 'rbgroups'") + for x in rbgroups: + if not type(x) in (list, tuple): + raise ValueError("bad RBGroup '%s'" % x) + s = set(x).difference(ocgs) + if s != set(): + raise ValueError("bad OCGs in RBGroup: %s" % s) + + if basestate: + basestate = str(basestate).upper() + if basestate == "UNCHANGED": + basestate = "Unchanged" + if basestate not in ("ON", "OFF", "Unchanged"): + raise ValueError("bad 'basestate'") + pdf = _as_pdf_document(self) + ocp = mupdf.pdf_dict_getl( + mupdf.pdf_trailer( pdf), + PDF_NAME('Root'), + PDF_NAME('OCProperties'), + ) + if not ocp.m_internal: + return + if config == -1: + obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D')) + else: + obj = mupdf.pdf_array_get( + mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')), + config, + ) + if not obj.m_internal: + raise ValueError( MSG_BAD_OC_CONFIG) + JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked) + mupdf.ll_pdf_read_ocg( pdf.m_internal) + + def set_layer_ui_config(self, number, action=0): + """Set / unset OC intent configuration.""" + # The user might have given the name instead of sequence number, + # so select by that name and continue with corresp. number + if isinstance(number, str): + select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number] + if select == []: + raise ValueError(f"bad OCG '{number}'.") + number = select[0] # this is the number for the name + pdf = _as_pdf_document(self) + if action == 1: + mupdf.pdf_toggle_layer_config_ui(pdf, number) + elif action == 2: + mupdf.pdf_deselect_layer_config_ui(pdf, number) + else: + mupdf.pdf_select_layer_config_ui(pdf, number) + + def set_markinfo(self, markinfo: dict) -> bool: + """Set the PDF MarkInfo values.""" + xref = self.pdf_catalog() + if xref == 0: + raise ValueError("not a PDF") + if not markinfo or not isinstance(markinfo, dict): + return False + valid = {"Marked": False, "UserProperties": False, "Suspects": False} + + if not set(valid.keys()).issuperset(markinfo.keys()): + badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}" + raise ValueError(badkeys) + pdfdict = "<<" + valid.update(markinfo) + for key, value in valid.items(): + value=str(value).lower() + if value not in ("true", "false"): + raise ValueError(f"bad key value '{key}': '{value}'") + pdfdict += f"/{key} {value}" + pdfdict += ">>" + self.xref_set_key(xref, "MarkInfo", pdfdict) + return True + + def set_metadata(doc: 'Document', m: dict = None) -> None: + """Update the PDF /Info object. + + Args: + m: a dictionary like doc.metadata. + """ + if not doc.is_pdf: + raise ValueError("is no PDF") + if doc.is_closed or doc.is_encrypted: + raise ValueError("document closed or encrypted") + if m is None: + m = {} + elif type(m) is not dict: + raise ValueError("bad metadata") + keymap = { + "author": "Author", + "producer": "Producer", + "creator": "Creator", + "title": "Title", + "format": None, + "encryption": None, + "creationDate": "CreationDate", + "modDate": "ModDate", + "subject": "Subject", + "keywords": "Keywords", + "trapped": "Trapped", + } + valid_keys = set(keymap.keys()) + diff_set = set(m.keys()).difference(valid_keys) + if diff_set != set(): + msg = "bad dict key(s): %s" % diff_set + raise ValueError(msg) + + t, temp = doc.xref_get_key(-1, "Info") + if t != "xref": + info_xref = 0 + else: + info_xref = int(temp.replace("0 R", "")) + + if m == {} and info_xref == 0: # nothing to do + return + + if info_xref == 0: # no prev metadata: get new xref + info_xref = doc.get_new_xref() + doc.update_object(info_xref, "<<>>") # fill it with empty object + doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref) + elif m == {}: # remove existing metadata + doc.xref_set_key(-1, "Info", "null") + doc.init_doc() + return + + for key, val in [(k, v) for k, v in m.items() if keymap[k] is not None]: + pdf_key = keymap[key] + if not bool(val) or val in ("none", "null"): + val = "null" + else: + val = get_pdf_str(val) + doc.xref_set_key(info_xref, pdf_key, val) + doc.init_doc() + return + + def set_oc(doc: 'Document', xref: int, oc: int) -> None: + """Attach optional content object to image or form xobject. + + Args: + xref: (int) xref number of an image or form xobject + oc: (int) xref number of an OCG or OCMD + """ + if doc.is_closed or doc.is_encrypted: + raise ValueError("document close or encrypted") + t, name = doc.xref_get_key(xref, "Subtype") + if t != "name" or name not in ("/Image", "/Form"): + raise ValueError("bad object type at xref %i" % xref) + if oc > 0: + t, name = doc.xref_get_key(oc, "Type") + if t != "name" or name not in ("/OCG", "/OCMD"): + raise ValueError("bad object type at xref %i" % oc) + if oc == 0 and "OC" in doc.xref_get_keys(xref): + doc.xref_set_key(xref, "OC", "null") + return None + doc.xref_set_key(xref, "OC", "%i 0 R" % oc) + return None + + def set_ocmd( + doc: 'Document', + xref: int = 0, + ocgs: typing.Union[list, None] = None, + policy: OptStr = None, + ve: typing.Union[list, None] = None, + ) -> int: + """Create or update an OCMD object in a PDF document. + + Args: + xref: (int) 0 for creating a new object, otherwise update existing one. + ocgs: (list) OCG xref numbers, which shall be subject to 'policy'. + policy: one of 'AllOn', 'AllOff', 'AnyOn', 'AnyOff' (any casing). + ve: (list) visibility expression. Use instead of 'ocgs' with 'policy'. + + Returns: + Xref of the created or updated OCMD. + """ + + all_ocgs = set(doc.get_ocgs().keys()) + + def ve_maker(ve): + if type(ve) not in (list, tuple) or len(ve) < 2: + raise ValueError("bad 've' format: %s" % ve) + if ve[0].lower() not in ("and", "or", "not"): + raise ValueError("bad operand: %s" % ve[0]) + if ve[0].lower() == "not" and len(ve) != 2: + raise ValueError("bad 've' format: %s" % ve) + item = "[/%s" % ve[0].title() + for x in ve[1:]: + if type(x) is int: + if x not in all_ocgs: + raise ValueError("bad OCG %i" % x) + item += " %i 0 R" % x + else: + item += " %s" % ve_maker(x) + item += "]" + return item + + text = "<>". + """ + s = "%i<<" % label["startpage"] + if label.get("prefix", "") != "": + s += "/P(%s)" % label["prefix"] + if label.get("style", "") != "": + s += "/S/%s" % label["style"] + if label.get("firstpagenum", 1) > 1: + s += "/St %i" % label["firstpagenum"] + s += ">>" + return s + + def create_nums(labels): + """Return concatenated string of all labels rules. + + Args: + labels: (list) dictionaries as created by function 'rule_dict'. + Returns: + PDF compatible string for page label definitions, ready to be + enclosed in PDF array 'Nums[...]'. + """ + labels.sort(key=lambda x: x["startpage"]) + s = "".join([create_label_str(label) for label in labels]) + return s + + doc._set_page_labels(create_nums(labels)) + + def set_toc( + doc: 'Document', + toc: list, + collapse: int = 1, + ) -> int: + """Create new outline tree (table of contents, TOC). + + Args: + toc: (list, tuple) each entry must contain level, title, page and + optionally top margin on the page. None or '()' remove the TOC. + collapse: (int) collapses entries beyond this level. Zero or None + shows all entries unfolded. + Returns: + the number of inserted items, or the number of removed items respectively. + """ + if doc.is_closed or doc.is_encrypted: + raise ValueError("document closed or encrypted") + if not doc.is_pdf: + raise ValueError("is no PDF") + if not toc: # remove all entries + return len(doc._delToC()) + + # validity checks -------------------------------------------------------- + if type(toc) not in (list, tuple): + raise ValueError("'toc' must be list or tuple") + toclen = len(toc) + page_count = doc.page_count + t0 = toc[0] + if type(t0) not in (list, tuple): + raise ValueError("items must be sequences of 3 or 4 items") + if t0[0] != 1: + raise ValueError("hierarchy level of item 0 must be 1") + for i in list(range(toclen - 1)): + t1 = toc[i] + t2 = toc[i + 1] + if not -1 <= t1[2] <= page_count: + raise ValueError("row %i: page number out of range" % i) + if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4): + raise ValueError("bad row %i" % (i + 1)) + if (type(t2[0]) is not int) or t2[0] < 1: + raise ValueError("bad hierarchy level in row %i" % (i + 1)) + if t2[0] > t1[0] + 1: + raise ValueError("bad hierarchy level in row %i" % (i + 1)) + # no formal errors in toc -------------------------------------------------- + + # -------------------------------------------------------------------------- + # make a list of xref numbers, which we can use for our TOC entries + # -------------------------------------------------------------------------- + old_xrefs = doc._delToC() # del old outlines, get their xref numbers + + # prepare table of xrefs for new bookmarks + old_xrefs = [] + xref = [0] + old_xrefs + xref[0] = doc._getOLRootNumber() # entry zero is outline root xref number + if toclen > len(old_xrefs): # too few old xrefs? + for i in range((toclen - len(old_xrefs))): + xref.append(doc.get_new_xref()) # acquire new ones + + lvltab = {0: 0} # to store last entry per hierarchy level + + # ------------------------------------------------------------------------------ + # contains new outline objects as strings - first one is the outline root + # ------------------------------------------------------------------------------ + olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}] + # ------------------------------------------------------------------------------ + # build olitems as a list of PDF-like connected dictionaries + # ------------------------------------------------------------------------------ + for i in range(toclen): + o = toc[i] + lvl = o[0] # level + title = get_pdf_str(o[1]) # title + pno = min(doc.page_count - 1, max(0, o[2] - 1)) # page number + page_xref = doc.page_xref(pno) + page_height = doc.page_cropbox(pno).height + top = Point(72, page_height - 36) + dest_dict = {"to": top, "kind": LINK_GOTO} # fall back target + if o[2] < 0: + dest_dict["kind"] = LINK_NONE + if len(o) > 3: # some target is specified + if type(o[3]) in (int, float): # convert a number to a point + dest_dict["to"] = Point(72, page_height - o[3]) + else: # if something else, make sure we have a dict + # We make a copy of o[3] to avoid modifying our caller's data. + dest_dict = o[3].copy() if type(o[3]) is dict else dest_dict + if "to" not in dest_dict: # target point not in dict? + dest_dict["to"] = top # put default in + else: # transform target to PDF coordinates + page = doc[pno] + point = Point(dest_dict["to"]) + point.y = page.cropbox.height - point.y + point = point * page.rotation_matrix + dest_dict["to"] = (point.x, point.y) + d = {} + d["first"] = -1 + d["count"] = 0 + d["last"] = -1 + d["prev"] = -1 + d["next"] = -1 + d["dest"] = utils.getDestStr(page_xref, dest_dict) + d["top"] = dest_dict["to"] + d["title"] = title + d["parent"] = lvltab[lvl - 1] + d["xref"] = xref[i + 1] + d["color"] = dest_dict.get("color") + d["flags"] = dest_dict.get("italic", 0) + 2 * dest_dict.get("bold", 0) + lvltab[lvl] = i + 1 + parent = olitems[lvltab[lvl - 1]] # the parent entry + + if ( + dest_dict.get("collapse") or collapse and lvl > collapse + ): # suppress expansion + parent["count"] -= 1 # make /Count negative + else: + parent["count"] += 1 # positive /Count + + if parent["first"] == -1: + parent["first"] = i + 1 + parent["last"] = i + 1 + else: + d["prev"] = parent["last"] + prev = olitems[parent["last"]] + prev["next"] = i + 1 + parent["last"] = i + 1 + olitems.append(d) + + # ------------------------------------------------------------------------------ + # now create each outline item as a string and insert it in the PDF + # ------------------------------------------------------------------------------ + for i, ol in enumerate(olitems): + txt = "<<" + if ol["count"] != 0: + txt += "/Count %i" % ol["count"] + try: + txt += ol["dest"] + except Exception: + # Verbose in PyMuPDF/tests. + if g_exceptions_verbose >= 2: exception_info() + pass + try: + if ol["first"] > -1: + txt += "/First %i 0 R" % xref[ol["first"]] + except Exception: + if g_exceptions_verbose >= 2: exception_info() + pass + try: + if ol["last"] > -1: + txt += "/Last %i 0 R" % xref[ol["last"]] + except Exception: + if g_exceptions_verbose >= 2: exception_info() + pass + try: + if ol["next"] > -1: + txt += "/Next %i 0 R" % xref[ol["next"]] + except Exception: + # Verbose in PyMuPDF/tests. + if g_exceptions_verbose >= 2: exception_info() + pass + try: + if ol["parent"] > -1: + txt += "/Parent %i 0 R" % xref[ol["parent"]] + except Exception: + # Verbose in PyMuPDF/tests. + if g_exceptions_verbose >= 2: exception_info() + pass + try: + if ol["prev"] > -1: + txt += "/Prev %i 0 R" % xref[ol["prev"]] + except Exception: + # Verbose in PyMuPDF/tests. + if g_exceptions_verbose >= 2: exception_info() + pass + try: + txt += "/Title" + ol["title"] + except Exception: + # Verbose in PyMuPDF/tests. + if g_exceptions_verbose >= 2: exception_info() + pass + + if ol.get("color") and len(ol["color"]) == 3: + txt += f"/C[ {_format_g(tuple(ol['color']))}]" + if ol.get("flags", 0) > 0: + txt += "/F %i" % ol["flags"] + + if i == 0: # special: this is the outline root + txt += "/Type/Outlines" # so add the /Type entry + txt += ">>" + doc.update_object(xref[i], txt) # insert the PDF object + + doc.init_doc() + return toclen + + def set_toc_item( + doc: 'Document', + idx: int, + dest_dict: OptDict = None, + kind: OptInt = None, + pno: OptInt = None, + uri: OptStr = None, + title: OptStr = None, + to: point_like = None, + filename: OptStr = None, + zoom: float = 0, + ) -> None: + """Update TOC item by index. + + It allows changing the item's title and link destination. + + Args: + idx: + (int) desired index of the TOC list, as created by get_toc. + dest_dict: + (dict) destination dictionary as created by get_toc(False). + Outrules all other parameters. If None, the remaining parameters + are used to make a dest dictionary. + kind: + (int) kind of link (pymupdf.LINK_GOTO, etc.). If None, then only + the title will be updated. If pymupdf.LINK_NONE, the TOC item will + be deleted. + pno: + (int) page number (1-based like in get_toc). Required if + pymupdf.LINK_GOTO. + uri: + (str) the URL, required if pymupdf.LINK_URI. + title: + (str) the new title. No change if None. + to: + (point-like) destination on the target page. If omitted, (72, 36) + will be used as target coordinates. + filename: + (str) destination filename, required for pymupdf.LINK_GOTOR and + pymupdf.LINK_LAUNCH. + name: + (str) a destination name for pymupdf.LINK_NAMED. + zoom: + (float) a zoom factor for the target location (pymupdf.LINK_GOTO). + """ + xref = doc.get_outline_xrefs()[idx] + page_xref = 0 + if type(dest_dict) is dict: + if dest_dict["kind"] == LINK_GOTO: + pno = dest_dict["page"] + page_xref = doc.page_xref(pno) + page_height = doc.page_cropbox(pno).height + to = dest_dict.get('to', Point(72, 36)) + to.y = page_height - to.y + dest_dict["to"] = to + action = utils.getDestStr(page_xref, dest_dict) + if not action.startswith("/A"): + raise ValueError("bad bookmark dest") + color = dest_dict.get("color") + if color: + color = list(map(float, color)) + if len(color) != 3 or min(color) < 0 or max(color) > 1: + raise ValueError("bad color value") + bold = dest_dict.get("bold", False) + italic = dest_dict.get("italic", False) + flags = italic + 2 * bold + collapse = dest_dict.get("collapse") + return doc._update_toc_item( + xref, + action=action[2:], + title=title, + color=color, + flags=flags, + collapse=collapse, + ) + + if kind == LINK_NONE: # delete bookmark item + return doc.del_toc_item(idx) + if kind is None and title is None: # treat as no-op + return None + if kind is None: # only update title text + return doc._update_toc_item(xref, action=None, title=title) + + if kind == LINK_GOTO: + if pno is None or pno not in range(1, doc.page_count + 1): + raise ValueError("bad page number") + page_xref = doc.page_xref(pno - 1) + page_height = doc.page_cropbox(pno - 1).height + if to is None: + to = Point(72, page_height - 36) + else: + to = Point(to) + to.y = page_height - to.y + + ddict = { + "kind": kind, + "to": to, + "uri": uri, + "page": pno, + "file": filename, + "zoom": zoom, + } + action = utils.getDestStr(page_xref, ddict) + if action == "" or not action.startswith("/A"): + raise ValueError("bad bookmark dest") + + return doc._update_toc_item(xref, action=action[2:], title=title) + + def set_xml_metadata(self, metadata): + """Store XML document level metadata.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) + if not root.m_internal: + RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError) + res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8')) + xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata')) + if xml.m_internal: + JM_update_stream( pdf, xml, res, 0) + else: + xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0) + mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata')) + mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML')) + mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml) + + def subset_fonts(doc: 'Document', verbose: bool = False, fallback: bool = False) -> OptInt: + """Build font subsets in a PDF. + + Eligible fonts are potentially replaced by smaller versions. Page text is + NOT rewritten and thus should retain properties like being hidden or + controlled by optional content. + + This method by default uses MuPDF's own internal feature to create subset + fonts. As this is a new function, errors may still occur. In this case, + please fall back to using the previous version by using "fallback=True". + Fallback mode requires the external package 'fontTools'. + + Args: + fallback: use the older deprecated implementation. + verbose: only used by fallback mode. + + Returns: + The new MuPDF-based code returns None. The deprecated fallback + mode returns 0 if there are no fonts to subset. Otherwise, it + returns the decrease in fontsize (the difference in fontsize), + measured in bytes. + """ + # Font binaries: - "buffer" -> (names, xrefs, (unicodes, glyphs)) + # An embedded font is uniquely defined by its fontbuffer only. It may have + # multiple names and xrefs. + # Once the sets of used unicodes and glyphs are known, we compute a + # smaller version of the buffer user package fontTools. + + if not fallback: # by default use MuPDF function + pdf = mupdf.pdf_document_from_fz_document(doc) + mupdf.pdf_subset_fonts2(pdf, list(range(doc.page_count))) + return + + font_buffers = {} + + def get_old_widths(xref): + """Retrieve old font '/W' and '/DW' values.""" + df = doc.xref_get_key(xref, "DescendantFonts") + if df[0] != "array": # only handle xref specifications + return None, None + df_xref = int(df[1][1:-1].replace("0 R", "")) + widths = doc.xref_get_key(df_xref, "W") + if widths[0] != "array": # no widths key found + widths = None + else: + widths = widths[1] + dwidths = doc.xref_get_key(df_xref, "DW") + if dwidths[0] != "int": + dwidths = None + else: + dwidths = dwidths[1] + return widths, dwidths + + def set_old_widths(xref, widths, dwidths): + """Restore the old '/W' and '/DW' in subsetted font. + + If either parameter is None or evaluates to False, the corresponding + dictionary key will be set to null. + """ + df = doc.xref_get_key(xref, "DescendantFonts") + if df[0] != "array": # only handle xref specs + return None + df_xref = int(df[1][1:-1].replace("0 R", "")) + if (type(widths) is not str or not widths) and doc.xref_get_key(df_xref, "W")[ + 0 + ] != "null": + doc.xref_set_key(df_xref, "W", "null") + else: + doc.xref_set_key(df_xref, "W", widths) + if (type(dwidths) is not str or not dwidths) and doc.xref_get_key( + df_xref, "DW" + )[0] != "null": + doc.xref_set_key(df_xref, "DW", "null") + else: + doc.xref_set_key(df_xref, "DW", dwidths) + return None + + def set_subset_fontname(new_xref): + """Generate a name prefix to tag a font as subset. + + We use a random generator to select 6 upper case ASCII characters. + The prefixed name must be put in the font xref as the "/BaseFont" value + and in the FontDescriptor object as the '/FontName' value. + """ + # The following generates a prefix like 'ABCDEF+' + import random + import string + prefix = "".join(random.choices(tuple(string.ascii_uppercase), k=6)) + "+" + font_str = doc.xref_object(new_xref, compressed=True) + font_str = font_str.replace("/BaseFont/", "/BaseFont/" + prefix) + df = doc.xref_get_key(new_xref, "DescendantFonts") + if df[0] == "array": + df_xref = int(df[1][1:-1].replace("0 R", "")) + fd = doc.xref_get_key(df_xref, "FontDescriptor") + if fd[0] == "xref": + fd_xref = int(fd[1].replace("0 R", "")) + fd_str = doc.xref_object(fd_xref, compressed=True) + fd_str = fd_str.replace("/FontName/", "/FontName/" + prefix) + doc.update_object(fd_xref, fd_str) + doc.update_object(new_xref, font_str) + + def build_subset(buffer, unc_set, gid_set): + """Build font subset using fontTools. + + Args: + buffer: (bytes) the font given as a binary buffer. + unc_set: (set) required glyph ids. + Returns: + Either None if subsetting is unsuccessful or the subset font buffer. + """ + try: + import fontTools.subset as fts + except ImportError: + if g_exceptions_verbose: exception_info() + message("This method requires fontTools to be installed.") + raise + import tempfile + with tempfile.TemporaryDirectory() as tmp_dir: + oldfont_path = f"{tmp_dir}/oldfont.ttf" + newfont_path = f"{tmp_dir}/newfont.ttf" + uncfile_path = f"{tmp_dir}/uncfile.txt" + args = [ + oldfont_path, + "--retain-gids", + f"--output-file={newfont_path}", + "--layout-features=*", + "--passthrough-tables", + "--ignore-missing-glyphs", + "--ignore-missing-unicodes", + "--symbol-cmap", + ] + + # store glyph ids or unicodes as file + with io.open(f"{tmp_dir}/uncfile.txt", "w", encoding='utf8') as unc_file: + if 0xFFFD in unc_set: # error unicode exists -> use glyphs + args.append(f"--gids-file={uncfile_path}") + gid_set.add(189) + unc_list = list(gid_set) + for unc in unc_list: + unc_file.write("%i\n" % unc) + else: + args.append(f"--unicodes-file={uncfile_path}") + unc_set.add(255) + unc_list = list(unc_set) + for unc in unc_list: + unc_file.write("%04x\n" % unc) + + # store fontbuffer as a file + with io.open(oldfont_path, "wb") as fontfile: + fontfile.write(buffer) + try: + os.remove(newfont_path) # remove old file + except Exception: + pass + try: # invoke fontTools subsetter + fts.main(args) + font = Font(fontfile=newfont_path) + new_buffer = font.buffer # subset font binary + if font.glyph_count == 0: # intercept empty font + new_buffer = None + except Exception: + exception_info() + new_buffer = None + return new_buffer + + def repl_fontnames(doc): + """Populate 'font_buffers'. + + For each font candidate, store its xref and the list of names + by which PDF text may refer to it (there may be multiple). + """ + + def norm_name(name): + """Recreate font name that contains PDF hex codes. + + E.g. #20 -> space, chr(32) + """ + while "#" in name: + p = name.find("#") + c = int(name[p + 1 : p + 3], 16) + name = name.replace(name[p : p + 3], chr(c)) + return name + + def get_fontnames(doc, item): + """Return a list of fontnames for an item of page.get_fonts(). + + There may be multiple names e.g. for Type0 fonts. + """ + fontname = item[3] + names = [fontname] + fontname = doc.xref_get_key(item[0], "BaseFont")[1][1:] + fontname = norm_name(fontname) + if fontname not in names: + names.append(fontname) + descendents = doc.xref_get_key(item[0], "DescendantFonts") + if descendents[0] != "array": + return names + descendents = descendents[1][1:-1] + if descendents.endswith(" 0 R"): + xref = int(descendents[:-4]) + descendents = doc.xref_object(xref, compressed=True) + p1 = descendents.find("/BaseFont") + if p1 >= 0: + p2 = descendents.find("/", p1 + 1) + p1 = min(descendents.find("/", p2 + 1), descendents.find(">>", p2 + 1)) + fontname = descendents[p2 + 1 : p1] + fontname = norm_name(fontname) + if fontname not in names: + names.append(fontname) + return names + + for i in range(doc.page_count): + for f in doc.get_page_fonts(i, full=True): + font_xref = f[0] # font xref + font_ext = f[1] # font file extension + basename = f[3] # font basename + + if font_ext not in ( # skip if not supported by fontTools + "otf", + "ttf", + "woff", + "woff2", + ): + continue + # skip fonts which already are subsets + if len(basename) > 6 and basename[6] == "+": + continue + + extr = doc.extract_font(font_xref) + fontbuffer = extr[-1] + names = get_fontnames(doc, f) + name_set, xref_set, subsets = font_buffers.get( + fontbuffer, (set(), set(), (set(), set())) + ) + xref_set.add(font_xref) + for name in names: + name_set.add(name) + font = Font(fontbuffer=fontbuffer) + name_set.add(font.name) + del font + font_buffers[fontbuffer] = (name_set, xref_set, subsets) + + def find_buffer_by_name(name): + for buffer, (name_set, _, _) in font_buffers.items(): + if name in name_set: + return buffer + return None + + # ----------------- + # main function + # ----------------- + repl_fontnames(doc) # populate font information + if not font_buffers: # nothing found to do + if verbose: + message(f'No fonts to subset.') + return 0 + + old_fontsize = 0 + new_fontsize = 0 + for fontbuffer in font_buffers.keys(): + old_fontsize += len(fontbuffer) + + # Scan page text for usage of subsettable fonts + for page in doc: + # go through the text and extend set of used glyphs by font + # we use a modified MuPDF trace device, which delivers us glyph ids. + for span in page.get_texttrace(): + if type(span) is not dict: # skip useless information + continue + fontname = span["font"][:33] # fontname for the span + buffer = find_buffer_by_name(fontname) + if buffer is None: + continue + name_set, xref_set, (set_ucs, set_gid) = font_buffers[buffer] + for c in span["chars"]: + set_ucs.add(c[0]) # unicode + set_gid.add(c[1]) # glyph id + font_buffers[buffer] = (name_set, xref_set, (set_ucs, set_gid)) + + # build the font subsets + for old_buffer, (name_set, xref_set, subsets) in font_buffers.items(): + new_buffer = build_subset(old_buffer, subsets[0], subsets[1]) + fontname = list(name_set)[0] + if new_buffer is None or len(new_buffer) >= len(old_buffer): + # subset was not created or did not get smaller + if verbose: + message(f'Cannot subset {fontname!r}.') + continue + if verbose: + message(f"Built subset of font {fontname!r}.") + val = doc._insert_font(fontbuffer=new_buffer) # store subset font in PDF + new_xref = val[0] # get its xref + set_subset_fontname(new_xref) # tag fontname as subset font + font_str = doc.xref_object( # get its object definition + new_xref, + compressed=True, + ) + # walk through the original font xrefs and replace each by the subset def + for font_xref in xref_set: + # we need the original '/W' and '/DW' width values + width_table, def_width = get_old_widths(font_xref) + # ... and replace original font definition at xref with it + doc.update_object(font_xref, font_str) + # now copy over old '/W' and '/DW' values + if width_table or def_width: + set_old_widths(font_xref, width_table, def_width) + # 'new_xref' remains unused in the PDF and must be removed + # by garbage collection. + new_fontsize += len(new_buffer) + + return old_fontsize - new_fontsize + + def switch_layer(self, config, as_default=0): + """Activate an OC layer.""" + pdf = _as_pdf_document(self) + cfgs = mupdf.pdf_dict_getl( + mupdf.pdf_trailer( pdf), + PDF_NAME('Root'), + PDF_NAME('OCProperties'), + PDF_NAME('Configs') + ) + if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs): + if config < 1: + return + raise ValueError( MSG_BAD_OC_LAYER) + if config < 0: + return + mupdf.pdf_select_layer_config( pdf, config) + if as_default: + mupdf.pdf_set_layer_config_as_default( pdf) + mupdf.ll_pdf_read_ocg( pdf.m_internal) + + def update_object(self, xref, text, page=None): + """Replace object definition source.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len(pdf) + if not _INRANGE(xref, 1, xreflen-1): + RAISEPY("bad xref", MSG_BAD_XREF) + ENSURE_OPERATION(pdf) + # create new object with passed-in string + new_obj = JM_pdf_obj_from_str(pdf, text) + mupdf.pdf_update_object(pdf, xref, new_obj) + if page: + JM_refresh_links( _as_pdf_page(page)) + + def update_stream(self, xref=0, stream=None, new=1, compress=1): + """Replace xref stream part.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len(pdf) + if xref < 1 or xref > xreflen: + raise ValueError( MSG_BAD_XREF) + # get the object + obj = mupdf.pdf_new_indirect(pdf, xref, 0) + if not mupdf.pdf_is_dict(obj): + raise ValueError( MSG_IS_NO_DICT) + res = JM_BufferFromBytes(stream) + if not res.m_internal: + raise TypeError( MSG_BAD_BUFFER) + JM_update_stream(pdf, obj, res, compress) + pdf.dirty = 1 + + @property + def version_count(self): + ''' + Count versions of PDF document. + ''' + pdf = _as_pdf_document(self, required=0) + if pdf.m_internal: + return mupdf.pdf_count_versions(pdf) + return 0 + + def write( + self, + garbage=False, + clean=False, + deflate=False, + deflate_images=False, + deflate_fonts=False, + incremental=False, + ascii=False, + expand=False, + linear=False, + no_new_id=False, + appearance=False, + pretty=False, + encryption=1, + permissions=4095, + owner_pw=None, + user_pw=None, + preserve_metadata=1, + use_objstms=0, + compression_effort=0, + ): + from io import BytesIO + bio = BytesIO() + self.save( + bio, + garbage=garbage, + clean=clean, + no_new_id=no_new_id, + appearance=appearance, + deflate=deflate, + deflate_images=deflate_images, + deflate_fonts=deflate_fonts, + incremental=incremental, + ascii=ascii, + expand=expand, + linear=linear, + pretty=pretty, + encryption=encryption, + permissions=permissions, + owner_pw=owner_pw, + user_pw=user_pw, + preserve_metadata=preserve_metadata, + use_objstms=use_objstms, + compression_effort=compression_effort, + ) + return bio.getvalue() + + def tobytes(self, *args, **kwargs): + return self.write(*args, **kwargs) + + @property + def xref(self): + """PDF xref number of page.""" + CheckParent(self) + return self.parent.page_xref(self.number) + + def xref_copy(doc: 'Document', source: int, target: int, *, keep: list = None) -> None: + """Copy a PDF dictionary object to another one given their xref numbers. + + Args: + doc: PDF document object + source: source xref number + target: target xref number, the xref must already exist + keep: an optional list of 1st level keys in target that should not be + removed before copying. + Notes: + This works similar to the copy() method of dictionaries in Python. The + source may be a stream object. + """ + if doc.xref_is_stream(source): + # read new xref stream, maintaining compression + stream = doc.xref_stream_raw(source) + doc.update_stream( + target, + stream, + compress=False, # keeps source compression + new=True, # in case target is no stream + ) + + # empty the target completely, observe exceptions + if keep is None: + keep = [] + for key in doc.xref_get_keys(target): + if key in keep: + continue + doc.xref_set_key(target, key, "null") + # copy over all source dict items + for key in doc.xref_get_keys(source): + item = doc.xref_get_key(source, key) + doc.xref_set_key(target, key, item[1]) + + def xref_get_key(self, xref, key): + """Get PDF dict key value of object at 'xref'.""" + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len(pdf) + if not _INRANGE(xref, 1, xreflen-1) and xref != -1: + raise ValueError( MSG_BAD_XREF) + if xref > 0: + obj = mupdf.pdf_load_object(pdf, xref) + else: + obj = mupdf.pdf_trailer(pdf) + if not obj.m_internal: + return ("null", "null") + subobj = mupdf.pdf_dict_getp(obj, key) + if not subobj.m_internal: + return ("null", "null") + text = None + if mupdf.pdf_is_indirect(subobj): + type = "xref" + text = "%i 0 R" % mupdf.pdf_to_num(subobj) + elif mupdf.pdf_is_array(subobj): + type = "array" + elif mupdf.pdf_is_dict(subobj): + type = "dict" + elif mupdf.pdf_is_int(subobj): + type = "int" + text = "%i" % mupdf.pdf_to_int(subobj) + elif mupdf.pdf_is_real(subobj): + type = "float" + elif mupdf.pdf_is_null(subobj): + type = "null" + text = "null" + elif mupdf.pdf_is_bool(subobj): + type = "bool" + if mupdf.pdf_to_bool(subobj): + text = "true" + else: + text = "false" + elif mupdf.pdf_is_name(subobj): + type = "name" + text = "/%s" % mupdf.pdf_to_name(subobj) + elif mupdf.pdf_is_string(subobj): + type = "string" + text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj)) + else: + type = "unknown" + if text is None: + res = JM_object_to_buffer(subobj, 1, 0) + text = JM_UnicodeFromBuffer(res) + return (type, text) + + def xref_get_keys(self, xref): + """Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer.""" + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len( pdf) + if not _INRANGE(xref, 1, xreflen-1) and xref != -1: + raise ValueError( MSG_BAD_XREF) + if xref > 0: + obj = mupdf.pdf_load_object( pdf, xref) + else: + obj = mupdf.pdf_trailer( pdf) + n = mupdf.pdf_dict_len( obj) + rc = [] + if n == 0: + return rc + for i in range(n): + key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i)) + rc.append(key) + return rc + + def xref_is_font(self, xref): + """Check if xref is a font object.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if self.xref_get_key(xref, "Type")[1] == "/Font": + return True + return False + + def xref_is_image(self, xref): + """Check if xref is an image object.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if self.xref_get_key(xref, "Subtype")[1] == "/Image": + return True + return False + + def xref_is_stream(self, xref=0): + """Check if xref is a stream object.""" + pdf = _as_pdf_document(self, required=0) + if not pdf.m_internal: + return False # not a PDF + return bool(mupdf.pdf_obj_num_is_stream(pdf, xref)) + + def xref_is_xobject(self, xref): + """Check if xref is a form xobject.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if self.xref_get_key(xref, "Subtype")[1] == "/Form": + return True + return False + + def xref_length(self): + """Get length of xref table.""" + xreflen = 0 + pdf = _as_pdf_document(self, required=0) + if pdf.m_internal: + xreflen = mupdf.pdf_xref_len(pdf) + return xreflen + + def xref_object(self, xref, compressed=0, ascii=0): + """Get xref object source as a string.""" + if self.is_closed: + raise ValueError("document closed") + if g_use_extra: + ret = extra.xref_object( self.this, xref, compressed, ascii) + return ret + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len(pdf) + if not _INRANGE(xref, 1, xreflen-1) and xref != -1: + raise ValueError( MSG_BAD_XREF) + if xref > 0: + obj = mupdf.pdf_load_object(pdf, xref) + else: + obj = mupdf.pdf_trailer(pdf) + res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii) + text = JM_EscapeStrFromBuffer(res) + return text + + def xref_set_key(self, xref, key, value): + """Set the value of a PDF dictionary key.""" + if self.is_closed: + raise ValueError("document closed") + + if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}): + raise ValueError("bad 'key'") + if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set(): + raise ValueError("bad 'value'") + + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len(pdf) + #if not _INRANGE(xref, 1, xreflen-1) and xref != -1: + # THROWMSG("bad xref") + #if len(value) == 0: + # THROWMSG("bad 'value'") + #if len(key) == 0: + # THROWMSG("bad 'key'") + if not _INRANGE(xref, 1, xreflen-1) and xref != -1: + raise ValueError( MSG_BAD_XREF) + if xref != -1: + obj = mupdf.pdf_load_object(pdf, xref) + else: + obj = mupdf.pdf_trailer(pdf) + new_obj = JM_set_object_value(obj, key, value) + if not new_obj.m_internal: + return # did not work: skip update + if xref != -1: + mupdf.pdf_update_object(pdf, xref, new_obj) + else: + n = mupdf.pdf_dict_len(new_obj) + for i in range(n): + mupdf.pdf_dict_put( + obj, + mupdf.pdf_dict_get_key(new_obj, i), + mupdf.pdf_dict_get_val(new_obj, i), + ) + + def xref_stream(self, xref): + """Get decompressed xref stream.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len( pdf) + if not _INRANGE(xref, 1, xreflen-1) and xref != -1: + raise ValueError( MSG_BAD_XREF) + if xref >= 0: + obj = mupdf.pdf_new_indirect( pdf, xref, 0) + else: + obj = mupdf.pdf_trailer( pdf) + r = None + if mupdf.pdf_is_stream( obj): + res = mupdf.pdf_load_stream_number( pdf, xref) + r = JM_BinFromBuffer( res) + return r + + def xref_stream_raw(self, xref): + """Get xref stream without decompression.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + pdf = _as_pdf_document(self) + xreflen = mupdf.pdf_xref_len( pdf) + if not _INRANGE(xref, 1, xreflen-1) and xref != -1: + raise ValueError( MSG_BAD_XREF) + if xref >= 0: + obj = mupdf.pdf_new_indirect( pdf, xref, 0) + else: + obj = mupdf.pdf_trailer( pdf) + r = None + if mupdf.pdf_is_stream( obj): + res = mupdf.pdf_load_raw_stream_number( pdf, xref) + r = JM_BinFromBuffer( res) + return r + + def xref_xml_metadata(self): + """Get xref of document XML metadata.""" + pdf = _as_pdf_document(self) + root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) + if not root.m_internal: + RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError) + xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata')) + xref = 0 + if xml.m_internal: + xref = mupdf.pdf_to_num( xml) + return xref + + __slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__') + + outline = property(lambda self: self._outline) + is_stream = xref_is_stream + +open = Document + + +class DocumentWriter: + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def __init__(self, path, options=''): + if isinstance( path, str): + pass + elif hasattr( path, 'absolute'): + path = str( path) + elif hasattr( path, 'name'): + path = path.name + if isinstance( path, str): + self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF) + else: + # Need to keep the Python JM_new_output_fileptr_Output instance + # alive for the lifetime of this DocumentWriter, otherwise calls + # to virtual methods implemented in Python fail. So we make it a + # member of this DocumentWriter. + # + # Unrelated to this, mupdf.FzDocumentWriter will set + # self._out.m_internal to null because ownership is passed in. + # + out = JM_new_output_fileptr( path) + self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF) + assert out.m_internal_value() == 0 + assert hasattr( self.this, '_out') + + def begin_page( self, mediabox): + mediabox2 = JM_rect_from_py(mediabox) + device = mupdf.fz_begin_page( self.this, mediabox2) + device_wrapper = DeviceWrapper( device) + return device_wrapper + + def close( self): + mupdf.fz_close_document_writer( self.this) + + def end_page( self): + mupdf.fz_end_page( self.this) + + +class Font: + + def __del__(self): + if type(self) is not Font: + return None + + def __init__( + self, + fontname=None, + fontfile=None, + fontbuffer=None, + script=0, + language=None, + ordering=-1, + is_bold=0, + is_italic=0, + is_serif=0, + embed=1, + ): + + if fontbuffer: + if hasattr(fontbuffer, "getvalue"): + fontbuffer = fontbuffer.getvalue() + elif isinstance(fontbuffer, bytearray): + fontbuffer = bytes(fontbuffer) + if not isinstance(fontbuffer, bytes): + raise ValueError("bad type: 'fontbuffer'") + + if isinstance(fontname, str): + fname_lower = fontname.lower() + if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower: + message("Warning: did you mean a fontfile?") + + if fname_lower in ("cjk", "china-t", "china-ts"): + ordering = 0 + + elif fname_lower.startswith("china-s"): + ordering = 1 + elif fname_lower.startswith("korea"): + ordering = 3 + elif fname_lower.startswith("japan"): + ordering = 2 + elif fname_lower in fitz_fontdescriptors.keys(): + import pymupdf_fonts # optional fonts + fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy + fontname = None # ensure using fontbuffer only + del pymupdf_fonts # remove package again + + elif ordering < 0: + fontname = Base14_fontdict.get(fontname, fontname) + + lang = mupdf.fz_text_language_from_string(language) + font = JM_get_font(fontname, fontfile, + fontbuffer, script, lang, ordering, + is_bold, is_italic, is_serif, embed) + self.this = font + + def __repr__(self): + return "Font('%s')" % self.name + + @property + def ascender(self): + """Return the glyph ascender value.""" + return mupdf.fz_font_ascender(self.this) + + @property + def bbox(self): + return self.this.fz_font_bbox() + + @property + def buffer(self): + buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer)) + return mupdf.fz_buffer_extract_copy( buffer_) + + def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0): + """Return tuple of char lengths of unicode 'text' under a fontsize.""" + lang = mupdf.fz_text_language_from_string(language) + rc = [] + for ch in text: + c = ord(ch) + if small_caps: + gid = mupdf.fz_encode_character_sc(self.this, c) + if gid >= 0: + font = self.this + else: + gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang) + rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode)) + return rc + + @property + def descender(self): + """Return the glyph descender value.""" + return mupdf.fz_font_descender(self.this) + + @property + def flags(self): + f = mupdf.ll_fz_font_flags(self.this.m_internal) + if not f: + return + assert isinstance( f, mupdf.fz_font_flags_t) + #log( '{=f}') + if mupdf_cppyy: + # cppyy includes remaining higher bits. + v = [f.is_mono] + def b(bits): + ret = v[0] & ((1 << bits)-1) + v[0] = v[0] >> bits + return ret + is_mono = b(1) + is_serif = b(1) + is_bold = b(1) + is_italic = b(1) + ft_substitute = b(1) + ft_stretch = b(1) + fake_bold = b(1) + fake_italic = b(1) + has_opentype = b(1) + invalid_bbox = b(1) + cjk_lang = b(1) + embed = b(1) + never_embed = b(1) + return { + "mono": is_mono if mupdf_cppyy else f.is_mono, + "serif": is_serif if mupdf_cppyy else f.is_serif, + "bold": is_bold if mupdf_cppyy else f.is_bold, + "italic": is_italic if mupdf_cppyy else f.is_italic, + "substitute": ft_substitute if mupdf_cppyy else f.ft_substitute, + "stretch": ft_stretch if mupdf_cppyy else f.ft_stretch, + "fake-bold": fake_bold if mupdf_cppyy else f.fake_bold, + "fake-italic": fake_italic if mupdf_cppyy else f.fake_italic, + "opentype": has_opentype if mupdf_cppyy else f.has_opentype, + "invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox, + 'cjk': cjk_lang if mupdf_cppyy else f.cjk, + 'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang, + 'embed': embed if mupdf_cppyy else f.embed, + 'never-embed': never_embed if mupdf_cppyy else f.never_embed, + } + + def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0): + """Return the glyph width of a unicode (font size 1).""" + lang = mupdf.fz_text_language_from_string(language) + if small_caps: + gid = mupdf.fz_encode_character_sc(self.this, chr_) + if gid >= 0: + font = self.this + else: + gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang) + return mupdf.fz_advance_glyph(font, gid, wmode) + + def glyph_bbox(self, char, language=None, script=0, small_caps=0): + """Return the glyph bbox of a unicode (font size 1).""" + lang = mupdf.fz_text_language_from_string(language) + if small_caps: + gid = mupdf.fz_encode_character_sc( self.this, char) + if gid >= 0: + font = self.this + else: + gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang) + return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix())) + + @property + def glyph_count(self): + return self.this.m_internal.glyph_count + + def glyph_name_to_unicode(self, name): + """Return the unicode for a glyph name.""" + return glyph_name_to_unicode(name) + + def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0): + """Check whether font has a glyph for this unicode.""" + if fallback: + lang = mupdf.fz_text_language_from_string(language) + gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang) + else: + if small_caps: + gid = mupdf.fz_encode_character_sc(self.this, chr) + else: + gid = mupdf.fz_encode_character(self.this, chr) + return gid + + @property + def is_bold(self): + return mupdf.fz_font_is_bold( self.this) + + @property + def is_italic(self): + return mupdf.fz_font_is_italic( self.this) + + @property + def is_monospaced(self): + return mupdf.fz_font_is_monospaced( self.this) + + @property + def is_serif(self): + return mupdf.fz_font_is_serif( self.this) + + @property + def is_writable(self): + return True # see pymupdf commit ef4056ee4da2 + font = self.this + flags = mupdf.ll_fz_font_flags(font.m_internal) + if mupdf_cppyy: + # cppyy doesn't handle bitfields correctly. + import cppyy + ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags) + else: + ft_substitute = flags.ft_substitute + + if ( mupdf.ll_fz_font_t3_procs(font.m_internal) + or ft_substitute + or not mupdf.pdf_font_writing_supported(font) + ): + return False + return True + + @property + def name(self): + ret = mupdf.fz_font_name(self.this) + #log( '{ret=}') + return ret + + def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0): + """Return length of unicode 'text' under a fontsize.""" + thisfont = self.this + lang = mupdf.fz_text_language_from_string(language) + rc = 0 + if not isinstance(text, str): + raise TypeError( MSG_BAD_TEXT) + for ch in text: + c = ord(ch) + if small_caps: + gid = mupdf.fz_encode_character_sc(thisfont, c) + if gid >= 0: + font = thisfont + else: + gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang) + rc += mupdf.fz_advance_glyph(font, gid, wmode) + rc *= fontsize + return rc + + def unicode_to_glyph_name(self, ch): + """Return the glyph name for a unicode.""" + return unicode_to_glyph_name(ch) + + def valid_codepoints(self): + ''' + Returns sorted list of valid unicodes of a fz_font. + ''' + ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this) + ucss = [i.ucs for i in ucs_gids] + ucss_unique = set(ucss) + ucss_unique_sorted = sorted(ucss_unique) + return ucss_unique_sorted + + +class Graftmap: + + def __del__(self): + if not type(self) is Graftmap: + return + self.thisown = False + + def __init__(self, doc): + dst = _as_pdf_document(doc) + map_ = mupdf.pdf_new_graft_map(dst) + self.this = map_ + self.thisown = True + + +class Link: + def __del__(self): + self._erase() + + def __init__( self, this): + assert isinstance( this, mupdf.FzLink) + self.this = this + + def __repr__(self): + CheckParent(self) + return "link on " + str(self.parent) + + def __str__(self): + CheckParent(self) + return "link on " + str(self.parent) + + def _border(self, doc, xref): + pdf = _as_pdf_document(doc, required=0) + if not pdf.m_internal: + return + link_obj = mupdf.pdf_new_indirect(pdf, xref, 0) + if not link_obj.m_internal: + return + b = JM_annot_border(link_obj) + return b + + def _colors(self, doc, xref): + pdf = _as_pdf_document(doc, required=0) + if not pdf.m_internal: + return + link_obj = mupdf.pdf_new_indirect( pdf, xref, 0) + if not link_obj.m_internal: + raise ValueError( MSG_BAD_XREF) + b = JM_annot_colors( link_obj) + return b + + def _erase(self): + self.parent = None + self.thisown = False + + def _setBorder(self, border, doc, xref): + pdf = _as_pdf_document(doc, required=0) + if not pdf.m_internal: + return + link_obj = mupdf.pdf_new_indirect(pdf, xref, 0) + if not link_obj.m_internal: + return + b = JM_annot_set_border(border, pdf, link_obj) + return b + + @property + def border(self): + return self._border(self.parent.parent.this, self.xref) + + @property + def colors(self): + return self._colors(self.parent.parent.this, self.xref) + + @property + def dest(self): + """Create link destination details.""" + if hasattr(self, "parent") and self.parent is None: + raise ValueError("orphaned object: parent is None") + if self.parent.parent.is_closed or self.parent.parent.is_encrypted: + raise ValueError("document closed or encrypted") + doc = self.parent.parent + + if self.is_external or self.uri.startswith("#"): + uri = None + else: + uri = doc.resolve_link(self.uri) + + return linkDest(self, uri, doc) + + @property + def flags(self)->int: + CheckParent(self) + doc = self.parent.parent + if not doc.is_pdf: + return 0 + f = doc.xref_get_key(self.xref, "F") + if f[1] != "null": + return int(f[1]) + return 0 + + @property + def is_external(self): + """Flag the link as external.""" + CheckParent(self) + if g_use_extra: + return extra.Link_is_external( self.this) + this_link = self.this + if not this_link.m_internal or not this_link.m_internal.uri: + return False + return bool( mupdf.fz_is_external_link( this_link.m_internal.uri)) + + @property + def next(self): + """Next link.""" + if not self.this.m_internal: + return None + CheckParent(self) + if 0 and g_use_extra: + val = extra.Link_next( self.this) + else: + val = self.this.next() + if not val.m_internal: + return None + val = Link( val) + if val: + val.thisown = True + val.parent = self.parent # copy owning page from prev link + val.parent._annot_refs[id(val)] = val + if self.xref > 0: # prev link has an xref + link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK] + link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK] + idx = link_xrefs.index(self.xref) + val.xref = link_xrefs[idx + 1] + val.id = link_ids[idx + 1] + else: + val.xref = 0 + val.id = "" + return val + + @property + def rect(self): + """Rectangle ('hot area').""" + CheckParent(self) + # utils.py:getLinkDict() appears to expect exceptions from us, so we + # ensure that we raise on error. + if self.this is None or not self.this.m_internal: + raise Exception( 'self.this.m_internal not available') + val = JM_py_from_rect( self.this.rect()) + val = Rect(val) + return val + + def set_border(self, border=None, width=0, dashes=None, style=None): + if type(border) is not dict: + border = {"width": width, "style": style, "dashes": dashes} + return self._setBorder(border, self.parent.parent.this, self.xref) + + def set_colors(self, colors=None, stroke=None, fill=None): + """Set border colors.""" + CheckParent(self) + doc = self.parent.parent + if type(colors) is not dict: + colors = {"fill": fill, "stroke": stroke} + fill = colors.get("fill") + stroke = colors.get("stroke") + if fill is not None: + message("warning: links have no fill color") + if stroke in ([], ()): + doc.xref_set_key(self.xref, "C", "[]") + return + if hasattr(stroke, "__float__"): + stroke = [float(stroke)] + CheckColor(stroke) + assert len(stroke) in (1, 3, 4) + s = f"[{_format_g(stroke)}]" + doc.xref_set_key(self.xref, "C", s) + + def set_flags(self, flags): + CheckParent(self) + doc = self.parent.parent + if not doc.is_pdf: + raise ValueError("is no PDF") + if not type(flags) is int: + raise ValueError("bad 'flags' value") + doc.xref_set_key(self.xref, "F", str(flags)) + return None + + @property + def uri(self): + """Uri string.""" + #CheckParent(self) + if g_use_extra: + return extra.link_uri(self.this) + this_link = self.this + return this_link.m_internal.uri if this_link.m_internal else '' + + page = -1 + + +class Matrix: + + def __abs__(self): + return math.sqrt(sum([c*c for c in self])) + + def __add__(self, m): + if hasattr(m, "__float__"): + return Matrix(self.a + m, self.b + m, self.c + m, + self.d + m, self.e + m, self.f + m) + if len(m) != 6: + raise ValueError("Matrix: bad seq len") + return Matrix(self.a + m[0], self.b + m[1], self.c + m[2], + self.d + m[3], self.e + m[4], self.f + m[5]) + + def __bool__(self): + return not (max(self) == min(self) == 0) + + def __eq__(self, mat): + if not hasattr(mat, "__len__"): + return False + return len(mat) == 6 and not (self - mat) + + def __getitem__(self, i): + return (self.a, self.b, self.c, self.d, self.e, self.f)[i] + + def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None): + """ + Matrix() - all zeros + Matrix(a, b, c, d, e, f) + Matrix(zoom-x, zoom-y) - zoom + Matrix(shear-x, shear-y, 1) - shear + Matrix(degree) - rotate + Matrix(Matrix) - new copy + Matrix(sequence) - from 'sequence' + Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix. + + Explicit keyword args a, b, c, d, e, f override any earlier settings if + not None. + """ + if not args: + self.a = self.b = self.c = self.d = self.e = self.f = 0.0 + elif len(args) > 6: + raise ValueError("Matrix: bad seq len") + elif len(args) == 6: # 6 numbers + self.a, self.b, self.c, self.d, self.e, self.f = map(float, args) + elif len(args) == 1: # either an angle or a sequ + if isinstance(args[0], mupdf.FzMatrix): + self.a = args[0].a + self.b = args[0].b + self.c = args[0].c + self.d = args[0].d + self.e = args[0].e + self.f = args[0].f + elif hasattr(args[0], "__float__"): + theta = math.radians(args[0]) + c_ = round(math.cos(theta), 8) + s_ = round(math.sin(theta), 8) + self.a = self.d = c_ + self.b = s_ + self.c = -s_ + self.e = self.f = 0.0 + else: + self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0]) + elif len(args) == 2 or len(args) == 3 and args[2] == 0: + self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \ + 0.0, 0.0, float(args[1]), 0.0, 0.0 + elif len(args) == 3 and args[2] == 1: + self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \ + float(args[1]), float(args[0]), 1.0, 0.0, 0.0 + else: + raise ValueError("Matrix: bad args") + + # Override with explicit args if specified. + if a is not None: self.a = a + if b is not None: self.b = b + if c is not None: self.c = c + if d is not None: self.d = d + if e is not None: self.e = e + if f is not None: self.f = f + + def __invert__(self): + """Calculate inverted matrix.""" + m1 = Matrix() + m1.invert(self) + return m1 + + def __len__(self): + return 6 + + def __mul__(self, m): + if hasattr(m, "__float__"): + return Matrix(self.a * m, self.b * m, self.c * m, + self.d * m, self.e * m, self.f * m) + m1 = Matrix(1,1) + return m1.concat(self, m) + + def __neg__(self): + return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f) + + def __nonzero__(self): + return not (max(self) == min(self) == 0) + + def __pos__(self): + return Matrix(self) + + def __repr__(self): + return "Matrix" + str(tuple(self)) + + def __setitem__(self, i, v): + v = float(v) + if i == 0: self.a = v + elif i == 1: self.b = v + elif i == 2: self.c = v + elif i == 3: self.d = v + elif i == 4: self.e = v + elif i == 5: self.f = v + else: + raise IndexError("index out of range") + return + + def __sub__(self, m): + if hasattr(m, "__float__"): + return Matrix(self.a - m, self.b - m, self.c - m, + self.d - m, self.e - m, self.f - m) + if len(m) != 6: + raise ValueError("Matrix: bad seq len") + return Matrix(self.a - m[0], self.b - m[1], self.c - m[2], + self.d - m[3], self.e - m[4], self.f - m[5]) + + def __truediv__(self, m): + if hasattr(m, "__float__"): + return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m, + self.d * 1./m, self.e * 1./m, self.f * 1./m) + m1 = util_invert_matrix(m)[1] + if not m1: + raise ZeroDivisionError("matrix not invertible") + m2 = Matrix(1,1) + return m2.concat(self, m1) + + def concat(self, one, two): + """Multiply two matrices and replace current one.""" + if not len(one) == len(two) == 6: + raise ValueError("Matrix: bad seq len") + self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two) + return self + + def invert(self, src=None): + """Calculate the inverted matrix. Return 0 if successful and replace + current one. Else return 1 and do nothing. + """ + if src is None: + dst = util_invert_matrix(self) + else: + dst = util_invert_matrix(src) + if dst[0] == 1: + return 1 + self.a, self.b, self.c, self.d, self.e, self.f = dst[1] + return 0 + + @property + def is_rectilinear(self): + """True if rectangles are mapped to rectangles.""" + return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \ + (abs(self.a) < EPSILON and abs(self.d) < EPSILON) + + def prerotate(self, theta): + """Calculate pre rotation and replace current matrix.""" + theta = float(theta) + while theta < 0: theta += 360 + while theta >= 360: theta -= 360 + if abs(0 - theta) < EPSILON: + pass + + elif abs(90.0 - theta) < EPSILON: + a = self.a + b = self.b + self.a = self.c + self.b = self.d + self.c = -a + self.d = -b + + elif abs(180.0 - theta) < EPSILON: + self.a = -self.a + self.b = -self.b + self.c = -self.c + self.d = -self.d + + elif abs(270.0 - theta) < EPSILON: + a = self.a + b = self.b + self.a = -self.c + self.b = -self.d + self.c = a + self.d = b + + else: + rad = math.radians(theta) + s = math.sin(rad) + c = math.cos(rad) + a = self.a + b = self.b + self.a = c * a + s * self.c + self.b = c * b + s * self.d + self.c =-s * a + c * self.c + self.d =-s * b + c * self.d + + return self + + def prescale(self, sx, sy): + """Calculate pre scaling and replace current matrix.""" + sx = float(sx) + sy = float(sy) + self.a *= sx + self.b *= sx + self.c *= sy + self.d *= sy + return self + + def preshear(self, h, v): + """Calculate pre shearing and replace current matrix.""" + h = float(h) + v = float(v) + a, b = self.a, self.b + self.a += v * self.c + self.b += v * self.d + self.c += h * a + self.d += h * b + return self + + def pretranslate(self, tx, ty): + """Calculate pre translation and replace current matrix.""" + tx = float(tx) + ty = float(ty) + self.e += tx * self.a + ty * self.c + self.f += tx * self.b + ty * self.d + return self + + __inv__ = __invert__ + __div__ = __truediv__ + norm = __abs__ + + +class IdentityMatrix(Matrix): + """Identity matrix [1, 0, 0, 1, 0, 0]""" + + def __hash__(self): + return hash((1,0,0,1,0,0)) + + def __init__(self): + Matrix.__init__(self, 1.0, 1.0) + + def __repr__(self): + return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)" + + def __setattr__(self, name, value): + if name in "ad": + self.__dict__[name] = 1.0 + elif name in "bcef": + self.__dict__[name] = 0.0 + else: + self.__dict__[name] = value + + def checkargs(*args): + raise NotImplementedError("Identity is readonly") + +Identity = IdentityMatrix() + + +class linkDest: + """link or outline destination details""" + + def __init__(self, obj, rlink, document=None): + isExt = obj.is_external + isInt = not isExt + self.dest = "" + self.file_spec = "" + self.flags = 0 + self.is_map = False + self.is_uri = False + self.kind = LINK_NONE + self.lt = Point(0, 0) + self.named = dict() + self.new_window = "" + self.page = obj.page + self.rb = Point(0, 0) + self.uri = obj.uri + + def uri_to_dict(uri): + items = self.uri[1:].split('&') + ret = dict() + for item in items: + eq = item.find('=') + if eq >= 0: + ret[item[:eq]] = item[eq+1:] + else: + ret[item] = None + return ret + + def unescape(name): + """Unescape '%AB' substrings to chr(0xAB).""" + split = name.replace("%%", "%25") # take care of escaped '%' + split = split.split("%") + newname = split[0] + for item in split[1:]: + piece = item[:2] + newname += chr(int(piece, base=16)) + newname += item[2:] + return newname + + if rlink and not self.uri.startswith("#"): + self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}" + if obj.is_external: + self.page = -1 + self.kind = LINK_URI + if not self.uri: + self.page = -1 + self.kind = LINK_NONE + if isInt and self.uri: + self.uri = self.uri.replace("&zoom=nan", "&zoom=0") + if self.uri.startswith("#"): + self.kind = LINK_GOTO + m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri) + if m: + self.page = int(m.group(1)) - 1 + self.lt = Point(float((m.group(3))), float(m.group(4))) + self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID + else: + m = re.match('^#page=([0-9]+)$', self.uri) + if m: + self.page = int(m.group(1)) - 1 + else: + self.kind = LINK_NAMED + m = re.match('^#nameddest=(.*)', self.uri) + assert document + if document and m: + named = unescape(m.group(1)) + self.named = document.resolve_names().get(named) + if self.named is None: + # document.resolve_names() does not contain an + # entry for `named` so use an empty dict. + self.named = dict() + self.named['nameddest'] = named + else: + self.named = uri_to_dict(self.uri[1:]) + else: + self.kind = LINK_NAMED + self.named = uri_to_dict(self.uri) + if obj.is_external: + if not self.uri: + pass + elif self.uri.startswith("file:"): + self.file_spec = self.uri[5:] + if self.file_spec.startswith("//"): + self.file_spec = self.file_spec[2:] + self.is_uri = False + self.uri = "" + self.kind = LINK_LAUNCH + ftab = self.file_spec.split("#") + if len(ftab) == 2: + if ftab[1].startswith("page="): + self.kind = LINK_GOTOR + self.file_spec = ftab[0] + self.page = int(ftab[1].split("&")[0][5:]) - 1 + elif ":" in self.uri: + self.is_uri = True + self.kind = LINK_URI + else: + self.is_uri = True + self.kind = LINK_LAUNCH + assert isinstance(self.named, dict) + +class Widget: + ''' + Class describing a PDF form field ("widget") + ''' + + def __init__(self): + self.border_color = None + self.border_style = "S" + self.border_width = 0 + self.border_dashes = None + self.choice_values = None # choice fields only + self.rb_parent = None # radio buttons only: xref of owning parent + + self.field_name = None # field name + self.field_label = None # field label + self.field_value = None + self.field_flags = 0 + self.field_display = 0 + self.field_type = 0 # valid range 1 through 7 + self.field_type_string = None # field type as string + + self.fill_color = None + self.button_caption = None # button caption + self.is_signed = None # True / False if signature + self.text_color = (0, 0, 0) + self.text_font = "Helv" + self.text_fontsize = 0 + self.text_maxlen = 0 # text fields only + self.text_format = 0 # text fields only + self._text_da = "" # /DA = default appearance + + self.script = None # JavaScript (/A) + self.script_stroke = None # JavaScript (/AA/K) + self.script_format = None # JavaScript (/AA/F) + self.script_change = None # JavaScript (/AA/V) + self.script_calc = None # JavaScript (/AA/C) + self.script_blur = None # JavaScript (/AA/Bl) + self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore + + self.rect = None # annot value + self.xref = 0 # annot value + + def __repr__(self): + #return "'%s' widget on %s" % (self.field_type_string, str(self.parent)) + # No self.parent. + return f'Widget:(field_type={self.field_type_string} script={self.script})' + return "'%s' widget" % (self.field_type_string) + + def _adjust_font(self): + """Ensure text_font is from our list and correctly spelled. + """ + if not self.text_font: + self.text_font = "Helv" + return + valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb") + for f in valid_fonts: + if self.text_font.lower() == f.lower(): + self.text_font = f + return + self.text_font = "Helv" + return + + def _checker(self): + """Any widget type checks. + """ + if self.field_type not in range(1, 8): + raise ValueError("bad field type") + + # if setting a radio button to ON, first set Off all buttons + # in the group - this is not done by MuPDF: + if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"): + # so we are about setting this button to ON/True + # check other buttons in same group and set them to 'Off' + doc = self.parent.parent + kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids") + if kids_type == "array": + xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split())) + for xref in xrefs: + if xref != self.xref: + doc.xref_set_key(xref, "AS", "/Off") + # the calling method will now set the intended button to on and + # will find everything prepared for correct functioning. + + def _parse_da(self): + """Extract font name, size and color from default appearance string (/DA object). + + Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'. + """ + if not self._text_da: + return + font = "Helv" + fsize = 0 + col = (0, 0, 0) + dat = self._text_da.split() # split on any whitespace + for i, item in enumerate(dat): + if item == "Tf": + font = dat[i - 2][1:] + fsize = float(dat[i - 1]) + dat[i] = dat[i-1] = dat[i-2] = "" + continue + if item == "g": # unicolor text + col = [(float(dat[i - 1]))] + dat[i] = dat[i-1] = "" + continue + if item == "rg": # RGB colored text + col = [float(f) for f in dat[i - 3:i]] + dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = "" + continue + self.text_font = font + self.text_fontsize = fsize + self.text_color = col + self._text_da = "" + return + + def _validate(self): + """Validate the class entries. + """ + if (self.rect.is_infinite + or self.rect.is_empty + ): + raise ValueError("bad rect") + + if not self.field_name: + raise ValueError("field name missing") + + if self.field_label == "Unnamed": + self.field_label = None + CheckColor(self.border_color) + CheckColor(self.fill_color) + if not self.text_color: + self.text_color = (0, 0, 0) + CheckColor(self.text_color) + + if not self.border_width: + self.border_width = 0 + + if not self.text_fontsize: + self.text_fontsize = 0 + + self.border_style = self.border_style.upper()[0:1] + + # standardize content of JavaScript entries + btn_type = self.field_type in ( + mupdf.PDF_WIDGET_TYPE_BUTTON, + mupdf.PDF_WIDGET_TYPE_CHECKBOX, + mupdf.PDF_WIDGET_TYPE_RADIOBUTTON, + ) + if not self.script: + self.script = None + elif type(self.script) is not str: + raise ValueError("script content must be a string") + + # buttons cannot have the following script actions + if btn_type or not self.script_calc: + self.script_calc = None + elif type(self.script_calc) is not str: + raise ValueError("script_calc content must be a string") + + if btn_type or not self.script_change: + self.script_change = None + elif type(self.script_change) is not str: + raise ValueError("script_change content must be a string") + + if btn_type or not self.script_format: + self.script_format = None + elif type(self.script_format) is not str: + raise ValueError("script_format content must be a string") + + if btn_type or not self.script_stroke: + self.script_stroke = None + elif type(self.script_stroke) is not str: + raise ValueError("script_stroke content must be a string") + + if btn_type or not self.script_blur: + self.script_blur = None + elif type(self.script_blur) is not str: + raise ValueError("script_blur content must be a string") + + if btn_type or not self.script_focus: + self.script_focus = None + elif type(self.script_focus) is not str: + raise ValueError("script_focus content must be a string") + + self._checker() # any field_type specific checks + + def _sync_flags(self): + """Propagate the field flags. + + If this widget has a "/Parent", set its field flags and that of all + its /Kids widgets to the value of the current widget. + Only possible for widgets existing in the PDF. + + Returns True or False. + """ + if not self.xref: + return False # no xref: widget not in the PDF + doc = self.parent.parent # the owning document + assert doc + pdf = _as_pdf_document(doc) + # load underlying PDF object + pdf_widget = mupdf.pdf_load_object(pdf, self.xref) + Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent")) + if not Parent.pdf_is_dict(): + return False # no /Parent: nothing to do + + # put the field flags value into the parent field flags: + Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags) + + # also put that value into all kids of the Parent + kids = Parent.pdf_dict_get(PDF_NAME("Kids")) + if not kids.pdf_is_array(): + message("warning: malformed PDF, Parent has no Kids array") + return False # no /Kids: should never happen! + + for i in range(kids.pdf_array_len()): # walk through all kids + # access kid widget, and do some precautionary checks + kid = kids.pdf_array_get(i) + if not kid.pdf_is_dict(): + continue + xref = kid.pdf_to_num() # get xref of the kid + if xref == self.xref: # skip self widget + continue + subtype = kid.pdf_dict_get(PDF_NAME("Subtype")) + if not subtype.pdf_to_name() == "Widget": + continue + # put the field flags value into the kid field flags: + kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags) + + return True # all done + + def button_states(self): + """Return the on/off state names for button widgets. + + A button may have 'normal' or 'pressed down' appearances. While the 'Off' + state is usually called like this, the 'On' state is often given a name + relating to the functional context. + """ + if self.field_type not in (2, 5): + return None # no button type + if hasattr(self, "parent"): # field already exists on page + doc = self.parent.parent + else: + return + xref = self.xref + states = {"normal": None, "down": None} + APN = doc.xref_get_key(xref, "AP/N") + if APN[0] == "dict": + nstates = [] + APN = APN[1][2:-2] + apnt = APN.split("/")[1:] + for x in apnt: + nstates.append(x.split()[0]) + states["normal"] = nstates + if APN[0] == "xref": + nstates = [] + nxref = int(APN[1].split(" ")[0]) + APN = doc.xref_object(nxref) + apnt = APN.split("/")[1:] + for x in apnt: + nstates.append(x.split()[0]) + states["normal"] = nstates + APD = doc.xref_get_key(xref, "AP/D") + if APD[0] == "dict": + dstates = [] + APD = APD[1][2:-2] + apdt = APD.split("/")[1:] + for x in apdt: + dstates.append(x.split()[0]) + states["down"] = dstates + if APD[0] == "xref": + dstates = [] + dxref = int(APD[1].split(" ")[0]) + APD = doc.xref_object(dxref) + apdt = APD.split("/")[1:] + for x in apdt: + dstates.append(x.split()[0]) + states["down"] = dstates + return states + + @property + def next(self): + return self._annot.next + + def on_state(self): + """Return the "On" value for button widgets. + + This is useful for radio buttons mainly. Checkboxes will always return + "Yes". Radio buttons will return the string that is unequal to "Off" + as returned by method button_states(). + If the radio button is new / being created, it does not yet have an + "On" value. In this case, a warning is shown and True is returned. + """ + if self.field_type not in (2, 5): + return None # no checkbox or radio button + bstate = self.button_states() + if bstate is None: + bstate = dict() + for k in bstate.keys(): + for v in bstate[k]: + if v != "Off": + return v + message("warning: radio button has no 'On' value.") + return True + + def reset(self): + """Reset the field value to its default. + """ + TOOLS._reset_widget(self._annot) + + def update(self, sync_flags=False): + """Reflect Python object in the PDF.""" + self._validate() + + self._adjust_font() # ensure valid text_font name + + # now create the /DA string + self._text_da = "" + if len(self.text_color) == 3: + fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da + elif len(self.text_color) == 1: + fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da + elif len(self.text_color) == 4: + fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da + self._text_da = fmt.format(*self.text_color, f=self.text_font, + s=self.text_fontsize) + # finally update the widget + + # if widget has a '/AA/C' script, make sure it is in the '/CO' + # array of the '/AcroForm' dictionary. + if self.script_calc: # there is a "calculation" script: + # make sure we are in the /CO array + util_ensure_widget_calc(self._annot) + + # finally update the widget + TOOLS._save_widget(self._annot, self) + self._text_da = "" + if sync_flags: + self._sync_flags() # propagate field flags to parent and kids + + +from . import _extra + + +class Outline: + + def __init__(self, ol): + self.this = ol + + @property + def dest(self): + '''outline destination details''' + return linkDest(self, None, None) + + def destination(self, document): + ''' + Like `dest` property but uses `document` to resolve destinations for + kind=LINK_NAMED. + ''' + return linkDest(self, None, document) + + @property + def down(self): + ol = self.this + down_ol = ol.down() + if not down_ol.m_internal: + return + return Outline(down_ol) + + @property + def is_external(self): + if g_use_extra: + # calling _extra.* here appears to save significant time in + # test_toc.py:test_full_toc, 1.2s=>0.94s. + # + return _extra.Outline_is_external( self.this) + ol = self.this + if not ol.m_internal: + return False + uri = ol.m_internal.uri if 1 else ol.uri() + if uri is None: + return False + return mupdf.fz_is_external_link(uri) + + @property + def is_open(self): + if 1: + return self.this.m_internal.is_open + return self.this.is_open() + + @property + def next(self): + ol = self.this + next_ol = ol.next() + if not next_ol.m_internal: + return + return Outline(next_ol) + + @property + def page(self): + if 1: + return self.this.m_internal.page.page + return self.this.page().page + + @property + def title(self): + return self.this.m_internal.title + + @property + def uri(self): + ol = self.this + if not ol.m_internal: + return None + return ol.m_internal.uri + + @property + def x(self): + return self.this.m_internal.x + + @property + def y(self): + return self.this.m_internal.y + + __slots__ = [ 'this'] + + +def _make_PdfFilterOptions( + recurse=0, + instance_forms=0, + ascii=0, + no_update=0, + sanitize=0, + sopts=None, + ): + ''' + Returns a mupdf.PdfFilterOptions instance. + ''' + + filter_ = mupdf.PdfFilterOptions() + filter_.recurse = recurse + filter_.instance_forms = instance_forms + filter_.ascii = ascii + + filter_.no_update = no_update + if sanitize: + # We want to use a PdfFilterFactory whose `.filter` fn pointer is + # set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to + # get access to this raw fn in Python; and on Windows raw MuPDF + # functions are not even available to C++. + # + # So we use SWIG Director to implement our own + # PdfFilterFactory whose `filter()` method calls + # `mupdf.ll_pdf_new_sanitize_filter()`. + if sopts: + assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions) + else: + sopts = mupdf.PdfSanitizeFilterOptions() + class Factory(mupdf.PdfFilterFactory2): + def __init__(self): + super().__init__() + self.use_virtual_filter() + self.sopts = sopts + def filter(self, ctx, doc, chain, struct_parents, transform, options): + if 0: + log(f'sanitize filter.filter():') + log(f' {self=}') + log(f' {ctx=}') + log(f' {doc=}') + log(f' {chain=}') + log(f' {struct_parents=}') + log(f' {transform=}') + log(f' {options=}') + log(f' {self.sopts.internal()=}') + return mupdf.ll_pdf_new_sanitize_filter( + doc, + chain, + struct_parents, + transform, + options, + self.sopts.internal(), + ) + + factory = Factory() + filter_.add_factory(factory.internal()) + filter_._factory = factory + return filter_ + + +class Page: + + def __init__(self, page, document): + assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}' + self.this = page + self.thisown = True + self.last_point = None + self.draw_cont = '' + self._annot_refs = dict() + self.parent = document + if page.m_internal: + if isinstance( page, mupdf.PdfPage): + self.number = page.m_internal.super.number + else: + self.number = page.m_internal.number + else: + self.number = None + + def __repr__(self): + return self.__str__() + CheckParent(self) + x = self.parent.name + if self.parent.stream is not None: + x = "" % (self.parent._graft_id,) + if x == "": + x = "" % self.parent._graft_id + return "page %s of %s" % (self.number, x) + + def __str__(self): + #CheckParent(self) + parent = getattr(self, 'parent', None) + if isinstance(self.this.m_internal, mupdf.pdf_page): + number = self.this.m_internal.super.number + else: + number = self.this.m_internal.number + ret = f'page {number}' + if parent: + x = self.parent.name + if self.parent.stream is not None: + x = "" % (self.parent._graft_id,) + if x == "": + x = "" % self.parent._graft_id + ret += f' of {x}' + return ret + + def _add_caret_annot(self, point): + if g_use_extra: + annot = extra._add_caret_annot( self.this, JM_point_from_py(point)) + else: + page = self._pdf_page() + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET) + if point: + p = JM_point_from_py(point) + r = mupdf.pdf_annot_rect(annot) + r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0) + mupdf.pdf_set_annot_rect(annot, r) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + return annot + + def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None): + page = self._pdf_page() + uf = ufilename if ufilename else filename + d = desc if desc else filename + p = JM_point_from_py(point) + filebuf = JM_BufferFromBytes(buffer_) + if not filebuf.m_internal: + raise TypeError( MSG_BAD_BUFFER) + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT) + r = mupdf.pdf_annot_rect(annot) + r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0) + mupdf.pdf_set_annot_rect(annot, r) + flags = mupdf.PDF_ANNOT_IS_PRINT + mupdf.pdf_set_annot_flags(annot, flags) + + if icon: + mupdf.pdf_set_annot_icon_name(annot, icon) + + val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1) + mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val) + mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename) + mupdf.pdf_update_annot(annot) + mupdf.pdf_set_annot_rect(annot, r) + mupdf.pdf_set_annot_flags(annot, flags) + JM_add_annot_id(annot, "A") + return Annot(annot) + + def _add_freetext_annot( + self, rect, + text, + fontsize=11, + fontname=None, + text_color=None, + fill_color=None, + border_color=None, + border_width=0, + dashes=None, + callout=None, + line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW, + opacity=1, + align=0, + rotate=0, + richtext=False, + style=None, + ): + rc = f""" + + {text}""" + page = self._pdf_page() + if border_color and not richtext: + raise ValueError("cannot set border_color if rich_text is False") + if border_color and not text_color: + text_color = border_color + nfcol, fcol = JM_color_FromSequence(fill_color) + ntcol, tcol = JM_color_FromSequence(text_color) + r = JM_rect_from_py(rect) + if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r): + raise ValueError( MSG_BAD_RECT) + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT) + annot_obj = mupdf.pdf_annot_obj(annot) + + #insert text as 'contents' or 'RC' depending on 'richtext' + if not richtext: + mupdf.pdf_set_annot_contents(annot, text) + else: + mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc) + if style: + mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style) + + mupdf.pdf_set_annot_rect(annot, r) + + while rotate < 0: + rotate += 360 + while rotate >= 360: + rotate -= 360 + if rotate != 0: + mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate) + + mupdf.pdf_set_annot_quadding(annot, align) + + if nfcol > 0: + mupdf.pdf_set_annot_color(annot, fcol[:nfcol]) + + mupdf.pdf_set_annot_border_width(annot, border_width) + mupdf.pdf_set_annot_opacity(annot, opacity) + if dashes: + for d in dashes: + mupdf.pdf_add_annot_border_dash_item(annot, float(d)) + + # Insert callout information + if callout: + mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout")) + mupdf.pdf_set_annot_callout_style(annot, line_end) + point_count = len(callout) + extra.JM_set_annot_callout_line(annot, tuple(callout), point_count) + + # insert the default appearance string + if not richtext: + JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize) + + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + val = Annot(annot) + return val + + def _add_ink_annot(self, list): + page = _as_pdf_page(self.this) + if not PySequence_Check(list): + raise ValueError( MSG_BAD_ARG_INK_ANNOT) + ctm = mupdf.FzMatrix() + mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm) + inv_ctm = mupdf.fz_invert_matrix(ctm) + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK) + annot_obj = mupdf.pdf_annot_obj(annot) + n0 = len(list) + inklist = mupdf.pdf_new_array(page.doc(), n0) + + for j in range(n0): + sublist = list[j] + n1 = len(sublist) + stroke = mupdf.pdf_new_array(page.doc(), 2 * n1) + + for i in range(n1): + p = sublist[i] + if not PySequence_Check(p) or PySequence_Size(p) != 2: + raise ValueError( MSG_BAD_ARG_INK_ANNOT) + point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm) + mupdf.pdf_array_push_real(stroke, point.x) + mupdf.pdf_array_push_real(stroke, point.y) + + mupdf.pdf_array_push(inklist, stroke) + + mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + return Annot(annot) + + def _add_line_annot(self, p1, p2): + page = self._pdf_page() + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE) + a = JM_point_from_py(p1) + b = JM_point_from_py(p2) + mupdf.pdf_set_annot_line(annot, a, b) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + assert annot.m_internal + return Annot(annot) + + def _add_multiline(self, points, annot_type): + page = self._pdf_page() + if len(points) < 2: + raise ValueError( MSG_BAD_ARG_POINTS) + annot = mupdf.pdf_create_annot(page, annot_type) + for p in points: + if (PySequence_Size(p) != 2): + raise ValueError( MSG_BAD_ARG_POINTS) + point = JM_point_from_py(p) + mupdf.pdf_add_annot_vertex(annot, point) + + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + return Annot(annot) + + def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None): + page = self._pdf_page() + fcol = [ 1, 1, 1, 0] + nfcol = 0 + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT) + q = JM_quad_from_py(quad) + r = mupdf.fz_rect_from_quad(q) + # TODO calculate de-rotated rect + mupdf.pdf_set_annot_rect(annot, r) + if fill: + nfcol, fcol = JM_color_FromSequence(fill) + arr = mupdf.pdf_new_array(page.doc(), nfcol) + for i in range(nfcol): + mupdf.pdf_array_push_real(arr, fcol[i]) + mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr) + if text: + assert da_str + mupdf.pdf_dict_puts( + mupdf.pdf_annot_obj(annot), + "OverlayText", + mupdf.pdf_new_text_string(text), + ) + mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str) + mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + annot = mupdf.ll_pdf_keep_annot(annot.m_internal) + annot = mupdf.PdfAnnot( annot) + return Annot(annot) + + def _add_square_or_circle(self, rect, annot_type): + page = self._pdf_page() + r = JM_rect_from_py(rect) + if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r): + raise ValueError( MSG_BAD_RECT) + annot = mupdf.pdf_create_annot(page, annot_type) + mupdf.pdf_set_annot_rect(annot, r) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + assert annot.m_internal + return Annot(annot) + + def _add_stamp_annot(self, rect, stamp=0): + rect = Rect(rect) + r = JM_rect_from_py(rect) + if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r): + raise ValueError(MSG_BAD_RECT) + page = self._pdf_page() + stamp_id = [ + "Approved", + "AsIs", + "Confidential", + "Departmental", + "Experimental", + "Expired", + "Final", + "ForComment", + "ForPublicRelease", + "NotApproved", + "NotForPublicRelease", + "Sold", + "TopSecret", + "Draft", + ] + n = len(stamp_id) + buf = None + name = None + if stamp in range(n): + name = stamp_id[stamp] + elif isinstance(stamp, Pixmap): + buf = stamp.tobytes() + elif isinstance(stamp, str): + buf = pathlib.Path(stamp).read_bytes() + elif isinstance(stamp, (bytes, bytearray)): + buf = stamp + elif isinstance(stamp, io.BytesIO): + buf = stamp.getvalue() + else: + name = stamp_id[0] + + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP) + if buf: # image stamp + fzbuff = mupdf.fz_new_buffer_from_copied_data(buf) + img = mupdf.fz_new_image_from_buffer(fzbuff) + + # compute image boundary box on page + w, h = img.w(), img.h() + scale = min(rect.width / w, rect.height / h) + width = w * scale # bbox width + height = h * scale # bbox height + + # center of "rect" + center = (rect.tl + rect.br) / 2 + x0 = center.x - width / 2 + y0 = center.y - height / 2 + x1 = x0 + width + y1 = y0 + height + r = mupdf.fz_make_rect(x0, y0, x1, y1) + mupdf.pdf_set_annot_rect(annot, r) + mupdf.pdf_set_annot_stamp_image(annot, img) + mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp")) + mupdf.pdf_set_annot_contents(annot, "Image Stamp") + else: # text stamp + mupdf.pdf_set_annot_rect(annot, r) + mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name)) + mupdf.pdf_set_annot_contents(annot, name) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + return Annot(annot) + + def _add_text_annot(self, point, text, icon=None): + page = self._pdf_page() + p = JM_point_from_py( point) + annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT) + r = mupdf.pdf_annot_rect(annot) + r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0) + mupdf.pdf_set_annot_rect(annot, r) + mupdf.pdf_set_annot_contents(annot, text) + if icon: + mupdf.pdf_set_annot_icon_name(annot, icon) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + return Annot(annot) + + def _add_text_marker(self, quads, annot_type): + + CheckParent(self) + if not self.parent.is_pdf: + raise ValueError("is no PDF") + + val = Page__add_text_marker(self, quads, annot_type) + if not val: + return None + val.parent = weakref.proxy(self) + self._annot_refs[id(val)] = val + + return val + + def _addAnnot_FromString(self, linklist): + """Add links from list of object sources.""" + CheckParent(self) + if g_use_extra: + self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString + #log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().') + return extra.Page_addAnnot_FromString( self.this, linklist) + page = _as_pdf_page(self.this) + lcount = len(linklist) # link count + if lcount < 1: + return + i = -1 + + # insert links from the provided sources + if not isinstance(linklist, tuple): + raise ValueError( "bad 'linklist' argument") + if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal: + mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount) + annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')) + assert annots.m_internal, f'{lcount=} {annots.m_internal=}' + for i in range(lcount): + txtpy = linklist[i] + text = JM_StrAsChar(txtpy) + if not text: + message("skipping bad link / annot item %i.", i) + continue + try: + annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text)) + ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0) + mupdf.pdf_array_push( annots, ind_obj) + except Exception: + if g_exceptions_verbose: exception_info() + message("skipping bad link / annot item %i.\n" % i) + + def _addWidget(self, field_type, field_name): + page = self._pdf_page() + pdf = page.doc() + annot = JM_create_widget(pdf, page, field_type, field_name) + if not annot.m_internal: + raise RuntimeError( "cannot create widget") + JM_add_annot_id(annot, "W") + return Annot(annot) + + def _apply_redactions(self, text, images, graphics): + page = self._pdf_page() + opts = mupdf.PdfRedactOptions() + opts.black_boxes = 0 # no black boxes + opts.text = text # how to treat text + opts.image_method = images # how to treat images + opts.line_art = graphics # how to treat vector graphics + success = mupdf.pdf_redact_page(page.doc(), page, opts) + return success + + def _erase(self): + self._reset_annot_refs() + try: + self.parent._forget_page(self) + except Exception: + exception_info() + pass + self.parent = None + self.thisown = False + self.number = None + self.this = None + + def _count_q_balance(self): + """Count missing graphic state pushs and pops. + + Returns: + A pair of integers (push, pop). Push is the number of missing + PDF "q" commands, pop is the number of "Q" commands. + A balanced graphics state for the page will be reached if its + /Contents is prepended with 'push' copies of string "q\n" + and appended with 'pop' copies of "\nQ". + """ + page = _as_pdf_page(self) # need the underlying PDF page + res = mupdf.pdf_dict_get( # access /Resources + page.obj(), + mupdf.PDF_ENUM_NAME_Resources, + ) + cont = mupdf.pdf_dict_get( # access /Contents + page.obj(), + mupdf.PDF_ENUM_NAME_Contents, + ) + pdf = _as_pdf_document(self.parent) # need underlying PDF document + + # return value of MuPDF function + return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont) + + def _get_optional_content(self, oc: OptInt) -> OptStr: + if oc is None or oc == 0: + return None + doc = self.parent + check = doc.xref_object(oc, compressed=True) + if not ("/Type/OCG" in check or "/Type/OCMD" in check): + #log( 'raising "bad optional content"') + raise ValueError("bad optional content: 'oc'") + #log( 'Looking at self._get_resource_properties()') + props = {} + for p, x in self._get_resource_properties(): + props[x] = p + if oc in props.keys(): + return props[oc] + i = 0 + mc = "MC%i" % i + while mc in props.values(): + i += 1 + mc = "MC%i" % i + self._set_resource_property(mc, oc) + #log( 'returning {mc=}') + return mc + + def _get_resource_properties(self): + ''' + page list Resource/Properties + ''' + page = self._pdf_page() + rc = JM_get_resource_properties(page.obj()) + return rc + + def _get_textpage(self, clip=None, flags=0, matrix=None): + if 1 or g_use_extra: + ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix) + tpage = mupdf.FzStextPage(ll_tpage) + return tpage + page = self.this + options = mupdf.FzStextOptions(flags) + rect = JM_rect_from_py(clip) + # Default to page's rect if `clip` not specified, for #2048. + rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip) + ctm = JM_matrix_from_py(matrix) + tpage = mupdf.FzStextPage(rect) + dev = mupdf.fz_new_stext_device(tpage, options) + if _globals.no_device_caching: + mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE) + if isinstance(page, mupdf.FzPage): + pass + elif isinstance(page, mupdf.PdfPage): + page = page.super() + else: + assert 0, f'Unrecognised {type(page)=}' + mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie()) + mupdf.fz_close_device(dev) + return tpage + + def _insert_image(self, + filename=None, pixmap=None, stream=None, imask=None, clip=None, + overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0, + xref=0, alpha=-1, _imgname=None, digests=None + ): + maskbuf = mupdf.FzBuffer() + page = self._pdf_page() + # This will create an empty PdfDocument with a call to + # pdf_new_document() then assign page.doc()'s return value to it (which + # drop the original empty pdf_document). + pdf = page.doc() + w = width + h = height + img_xref = xref + rc_digest = 0 + + do_process_pixmap = 1 + do_process_stream = 1 + do_have_imask = 1 + do_have_image = 1 + do_have_xref = 1 + + if xref > 0: + ref = mupdf.pdf_new_indirect(pdf, xref, 0) + w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W'))) + h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H'))) + if w + h == 0: + raise ValueError( MSG_IS_NO_IMAGE) + #goto have_xref() + do_process_pixmap = 0 + do_process_stream = 0 + do_have_imask = 0 + do_have_image = 0 + + else: + if stream: + imgbuf = JM_BufferFromBytes(stream) + do_process_pixmap = 0 + else: + if filename: + imgbuf = mupdf.fz_read_file(filename) + #goto have_stream() + do_process_pixmap = 0 + + if do_process_pixmap: + #log( 'do_process_pixmap') + # process pixmap --------------------------------- + arg_pix = pixmap.this + w = arg_pix.w() + h = arg_pix.h() + digest = mupdf.fz_md5_pixmap2(arg_pix) + md5_py = digest + temp = digests.get(md5_py, None) + if temp is not None: + img_xref = temp + ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0) + #goto have_xref() + do_process_stream = 0 + do_have_imask = 0 + do_have_image = 0 + else: + if arg_pix.alpha() == 0: + image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage()) + else: + pm = mupdf.fz_convert_pixmap( + arg_pix, + mupdf.FzColorspace(), + mupdf.FzColorspace(), + mupdf.FzDefaultColorspaces(None), + mupdf.FzColorParams(), + 1, + ) + pm.alpha = 0 + pm.colorspace = None + mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage()) + image = mupdf.fz_new_image_from_pixmap(arg_pix, mask) + #goto have_image() + do_process_stream = 0 + do_have_imask = 0 + + if do_process_stream: + #log( 'do_process_stream') + # process stream --------------------------------- + state = mupdf.FzMd5() + if mupdf_cppyy: + mupdf.fz_md5_update_buffer( state, imgbuf) + else: + mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len) + if imask: + maskbuf = JM_BufferFromBytes(imask) + if mupdf_cppyy: + mupdf.fz_md5_update_buffer( state, maskbuf) + else: + mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len) + digest = mupdf.fz_md5_final2(state) + md5_py = bytes(digest) + temp = digests.get(md5_py, None) + if temp is not None: + img_xref = temp + ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0) + w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W'))) + h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H'))) + #goto have_xref() + do_have_imask = 0 + do_have_image = 0 + else: + image = mupdf.fz_new_image_from_buffer(imgbuf) + w = image.w() + h = image.h() + if not imask: + #goto have_image() + do_have_imask = 0 + + if do_have_imask: + # `fz_compressed_buffer` is reference counted and + # `mupdf.fz_new_image_from_compressed_buffer2()` + # is povided as a Swig-friendly wrapper for + # `fz_new_image_from_compressed_buffer()`, so we can do things + # straightfowardly. + # + cbuf1 = mupdf.fz_compressed_image_buffer( image) + if not cbuf1.m_internal: + raise ValueError( "uncompressed image cannot have mask") + bpc = image.bpc() + colorspace = image.colorspace() + xres, yres = mupdf.fz_image_resolution(image) + mask = mupdf.fz_new_image_from_buffer(maskbuf) + image = mupdf.fz_new_image_from_compressed_buffer2( + w, + h, + bpc, + colorspace, + xres, + yres, + 1, # interpolate + 0, # imagemask, + list(), # decode + list(), # colorkey + cbuf1, + mask, + ) + + if do_have_image: + #log( 'do_have_image') + ref = mupdf.pdf_add_image(pdf, image) + if oc: + JM_add_oc_object(pdf, ref, oc) + img_xref = mupdf.pdf_to_num(ref) + digests[md5_py] = img_xref + rc_digest = 1 + + if do_have_xref: + #log( 'do_have_xref') + resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources')) + if not resources.m_internal: + resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2) + xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject')) + if not xobject.m_internal: + xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2) + mat = calc_image_matrix(w, h, clip, rotate, keep_proportion) + mupdf.pdf_dict_puts(xobject, _imgname, ref) + nres = mupdf.fz_new_buffer(50) + s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n" + #s = s.replace('\n', '\r\n') + mupdf.fz_append_string(nres, s) + JM_insert_contents(pdf, page.obj(), nres, overlay) + + if rc_digest: + return img_xref, digests + else: + return img_xref, None + + def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering): + page = self._pdf_page() + pdf = page.doc() + + value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering) + # get the objects /Resources, /Resources/Font + resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources')) + if not resources.pdf_is_dict(): + resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5) + fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font')) + if not fonts.m_internal: # page has no fonts yet + fonts = mupdf.pdf_new_dict(pdf, 5) + mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font')) + # store font in resources and fonts objects will contain named reference to font + _, xref = JM_INT_ITEM(value, 0) + if not xref: + raise RuntimeError( "cannot insert font") + font_obj = mupdf.pdf_new_indirect(pdf, xref, 0) + mupdf.pdf_dict_puts(fonts, fontname, font_obj) + return value + + def _load_annot(self, name, xref): + page = self._pdf_page() + if xref == 0: + annot = JM_get_annot_by_name(page, name) + else: + annot = JM_get_annot_by_xref(page, xref) + if annot.m_internal: + return Annot(annot) + + def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None): + pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip) + return Pixmap(pix) + + def _other_box(self, boxtype): + rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE) + page = _as_pdf_page(self.this, required=False) + if page.m_internal: + obj = mupdf.pdf_dict_gets( page.obj(), boxtype) + if mupdf.pdf_is_array(obj): + rect = mupdf.pdf_to_rect(obj) + if mupdf.fz_is_infinite_rect( rect): + return + return JM_py_from_rect(rect) + + def _pdf_page(self, required=True): + return _as_pdf_page(self.this, required=required) + + def _reset_annot_refs(self): + """Invalidate / delete all annots of this page.""" + self._annot_refs.clear() + + def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None): + + if CA >= 1 and ca >= 1 and blendmode is None: + return + tCA = int(round(max(CA , 0) * 100)) + if tCA >= 100: + tCA = 99 + tca = int(round(max(ca, 0) * 100)) + if tca >= 100: + tca = 99 + gstate = "fitzca%02i%02i" % (tCA, tca) + + if not gstate: + return + page = _as_pdf_page(self.this) + resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources')) + if not resources.m_internal: + resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2) + extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState')) + if not extg.m_internal: + extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2) + n = mupdf.pdf_dict_len(extg) + for i in range(n): + o1 = mupdf.pdf_dict_get_key(extg, i) + name = mupdf.pdf_to_name(o1) + if name == gstate: + return gstate + opa = mupdf.pdf_new_dict(page.doc(), 3) + mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA) + mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca) + mupdf.pdf_dict_puts(extg, gstate, opa) + return gstate + + def _set_pagebox(self, boxtype, rect): + doc = self.parent + if doc is None: + raise ValueError("orphaned object: parent is None") + + if not doc.is_pdf: + raise ValueError("is no PDF") + + valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox") + + if boxtype not in valid_boxes: + raise ValueError("bad boxtype") + + rect = Rect(rect) + mb = self.mediabox + rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) + if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1): + raise ValueError(f"{boxtype} not in MediaBox") + + doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]") + + def _set_resource_property(self, name, xref): + page = self._pdf_page() + JM_set_resource_property(page.obj(), name, xref) + + def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None): + cropbox = JM_rect_from_py(clip) + mat = JM_matrix_from_py(matrix) + rc_xref = xref + tpage = _as_pdf_page(self.this) + tpageref = tpage.obj() + pdfout = tpage.doc() # target PDF + ENSURE_OPERATION(pdfout) + #------------------------------------------------------------- + # convert the source page to a Form XObject + #------------------------------------------------------------- + xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this) + if not rc_xref: + rc_xref = mupdf.pdf_to_num(xobj1) + + #------------------------------------------------------------- + # create referencing XObject (controls display on target page) + #------------------------------------------------------------- + # fill reference to xobj1 into the /Resources + #------------------------------------------------------------- + subres1 = mupdf.pdf_new_dict(pdfout, 5) + mupdf.pdf_dict_puts(subres1, "fullpage", xobj1) + subres = mupdf.pdf_new_dict(pdfout, 5) + mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1) + + res = mupdf.fz_new_buffer(20) + mupdf.fz_append_string(res, "/fullpage Do") + + xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res) + if oc > 0: + JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc) + + #------------------------------------------------------------- + # update target page with xobj2: + #------------------------------------------------------------- + # 1. insert Xobject in Resources + #------------------------------------------------------------- + resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources')) + if not resources.m_internal: + resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5) + subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject')) + if not subres.m_internal: + subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5) + + mupdf.pdf_dict_puts(subres, _imgname, xobj2) + + #------------------------------------------------------------- + # 2. make and insert new Contents object + #------------------------------------------------------------- + nres = mupdf.fz_new_buffer(50) # buffer for Do-command + mupdf.fz_append_string(nres, " q /") # Do-command + mupdf.fz_append_string(nres, _imgname) + mupdf.fz_append_string(nres, " Do Q ") + + JM_insert_contents(pdfout, tpageref, nres, overlay) + return rc_xref + + def add_caret_annot(self, point: point_like) -> Annot: + """Add a 'Caret' annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_caret_annot(point) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot = Annot( annot) + annot_postprocess(self, annot) + assert hasattr( annot, 'parent') + return annot + + def add_circle_annot(self, rect: rect_like) -> Annot: + """Add a 'Circle' (ellipse, oval) annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_file_annot( + self, + point: point_like, + buffer_: ByteString, + filename: str, + ufilename: OptStr =None, + desc: OptStr =None, + icon: OptStr =None + ) -> Annot: + """Add a 'FileAttachment' annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_file_annot(point, + buffer_, + filename, + ufilename=ufilename, + desc=desc, + icon=icon, + ) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_freetext_annot( + self, + rect: rect_like, + text: str, + *, + fontsize: float =11, + fontname: OptStr =None, + text_color: OptSeq =None, + fill_color: OptSeq =None, + border_color: OptSeq =None, + border_width: float =0, + dashes: OptSeq =None, + callout: OptSeq =None, + line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW, + opacity: float =1, + align: int =0, + rotate: int =0, + richtext=False, + style=None, + ) -> Annot: + """Add a 'FreeText' annotation.""" + + old_rotation = annot_preprocess(self) + try: + annot = self._add_freetext_annot( + rect, + text, + fontsize=fontsize, + fontname=fontname, + text_color=text_color, + fill_color=fill_color, + border_color=border_color, + border_width=border_width, + dashes=dashes, + callout=callout, + line_end=line_end, + opacity=opacity, + align=align, + rotate=rotate, + richtext=richtext, + style=style, + ) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_highlight_annot(self, quads=None, start=None, + stop=None, clip=None) -> Annot: + """Add a 'Highlight' annotation.""" + if quads is None: + q = get_highlight_selection(self, start=start, stop=stop, clip=clip) + else: + q = CheckMarkerArg(quads) + ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT) + return ret + + def add_ink_annot(self, handwriting: list) -> Annot: + """Add a 'Ink' ('handwriting') annotation. + + The argument must be a list of lists of point_likes. + """ + old_rotation = annot_preprocess(self) + try: + annot = self._add_ink_annot(handwriting) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_line_annot(self, p1: point_like, p2: point_like) -> Annot: + """Add a 'Line' annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_line_annot(p1, p2) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_polygon_annot(self, points: list) -> Annot: + """Add a 'Polygon' annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_polyline_annot(self, points: list) -> Annot: + """Add a 'PolyLine' annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_rect_annot(self, rect: rect_like) -> Annot: + """Add a 'Square' (rectangle) annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_redact_annot( + self, + quad, + text: OptStr =None, + fontname: OptStr =None, + fontsize: float =11, + align: int =0, + fill: OptSeq =None, + text_color: OptSeq =None, + cross_out: bool =True, + ) -> Annot: + """Add a 'Redact' annotation.""" + da_str = None + if text and not set(string.whitespace).issuperset(text): + CheckColor(fill) + CheckColor(text_color) + if not fontname: + fontname = "Helv" + if not fontsize: + fontsize = 11 + if not text_color: + text_color = (0, 0, 0) + if hasattr(text_color, "__float__"): + text_color = (text_color, text_color, text_color) + if len(text_color) > 3: + text_color = text_color[:3] + fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + da_str = fmt.format(*text_color, f=fontname, s=fontsize) + if fill is None: + fill = (1, 1, 1) + if fill: + if hasattr(fill, "__float__"): + fill = (fill, fill, fill) + if len(fill) > 3: + fill = fill[:3] + else: + text = None + + old_rotation = annot_preprocess(self) + try: + annot = self._add_redact_annot(quad, text=text, da_str=da_str, + align=align, fill=fill) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + #------------------------------------------------------------- + # change appearance to show a crossed-out rectangle + #------------------------------------------------------------- + if cross_out: + ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only + _, LL, LR, UR, UL = ap_tab + ap_tab.append(LR) + ap_tab.append(LL) + ap_tab.append(UR) + ap_tab.append(LL) + ap_tab.append(UL) + ap_tab.append(b"S") + ap = b"\n".join(ap_tab) + annot._setAP(ap, 0) + return annot + + def add_squiggly_annot( + self, + quads=None, + start=None, + stop=None, + clip=None, + ) -> Annot: + """Add a 'Squiggly' annotation.""" + if quads is None: + q = get_highlight_selection(self, start=start, stop=stop, clip=clip) + else: + q = CheckMarkerArg(quads) + return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY) + + def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot: + """Add a ('rubber') 'Stamp' annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_stamp_annot(rect, stamp) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot: + """Add a 'StrikeOut' annotation.""" + if quads is None: + q = get_highlight_selection(self, start=start, stop=stop, clip=clip) + else: + q = CheckMarkerArg(quads) + return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT) + + def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot: + """Add a 'Text' (sticky note) annotation.""" + old_rotation = annot_preprocess(self) + try: + annot = self._add_text_annot(point, text, icon=icon) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + annot_postprocess(self, annot) + return annot + + def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot: + """Add a 'Underline' annotation.""" + if quads is None: + q = get_highlight_selection(self, start=start, stop=stop, clip=clip) + else: + q = CheckMarkerArg(quads) + return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE) + + def add_widget(self, widget: Widget) -> Annot: + """Add a 'Widget' (form field).""" + CheckParent(self) + doc = self.parent + if not doc.is_pdf: + raise ValueError("is no PDF") + widget._validate() + annot = self._addWidget(widget.field_type, widget.field_name) + if not annot: + return None + annot.thisown = True + annot.parent = weakref.proxy(self) # owning page object + self._annot_refs[id(annot)] = annot + widget.parent = annot.parent + widget._annot = annot + widget.update() + return annot + + def annot_names(self): + ''' + page get list of annot names + ''' + """List of names of annotations, fields and links.""" + CheckParent(self) + page = self._pdf_page(required=False) + if not page.m_internal: + return [] + return JM_get_annot_id_list(page) + + def annot_xrefs(self): + ''' + List of xref numbers of annotations, fields and links. + ''' + return JM_get_annot_xref_list2(self) + + def annots(self, types=None): + """ Generator over the annotations of a page. + + Args: + types: (list) annotation types to subselect from. If none, + all annotations are returned. E.g. types=[PDF_ANNOT_LINE] + will only yield line annotations. + """ + skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET) + if not hasattr(types, "__getitem__"): + annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types] + else: + annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types] + for xref in annot_xrefs: + annot = self.load_annot(xref) + annot._yielded=True + yield annot + + def apply_redactions( + page: 'Page', + images: int = 2, + graphics: int = 1, + text: int = 0, + ) -> bool: + """Apply the redaction annotations of the page. + + Args: + page: the PDF page. + images: + 0 - ignore images + 1 - remove all overlapping images + 2 - blank out overlapping image parts + 3 - remove image unless invisible + graphics: + 0 - ignore graphics + 1 - remove graphics if contained in rectangle + 2 - remove all overlapping graphics + text: + 0 - remove text + 1 - ignore text + """ + + def center_rect(annot_rect, new_text, font, fsize): + """Calculate minimal sub-rectangle for the overlay text. + + Notes: + Because 'insert_textbox' supports no vertical text centering, + we calculate an approximate number of lines here and return a + sub-rect with smaller height, which should still be sufficient. + Args: + annot_rect: the annotation rectangle + new_text: the text to insert. + font: the fontname. Must be one of the CJK or Base-14 set, else + the rectangle is returned unchanged. + fsize: the fontsize + Returns: + A rectangle to use instead of the annot rectangle. + """ + if not new_text or annot_rect.width <= EPSILON: + return annot_rect + try: + text_width = get_text_length(new_text, font, fsize) + except (ValueError, mupdf.FzErrorBase): # unsupported font + if g_exceptions_verbose: + exception_info() + return annot_rect + line_height = fsize * 1.2 + limit = annot_rect.width + h = math.ceil(text_width / limit) * line_height # estimate rect height + if h >= annot_rect.height: + return annot_rect + r = annot_rect + y = (annot_rect.tl.y + annot_rect.bl.y - h) * 0.5 + r.y0 = y + return r + + CheckParent(page) + doc = page.parent + if doc.is_encrypted or doc.is_closed: + raise ValueError("document closed or encrypted") + if not doc.is_pdf: + raise ValueError("is no PDF") + + redact_annots = [] # storage of annot values + for annot in page.annots( + types=(mupdf.PDF_ANNOT_REDACT,) # pylint: disable=no-member + ): + # loop redactions + redact_annots.append(annot._get_redact_values()) # save annot values + + if redact_annots == []: # any redactions on this page? + return False # no redactions + + rc = page._apply_redactions(text, images, graphics) # call MuPDF + if not rc: # should not happen really + raise ValueError("Error applying redactions.") + + # now write replacement text in old redact rectangles + shape = page.new_shape() + for redact in redact_annots: + annot_rect = redact["rect"] + fill = redact["fill"] + if fill: + shape.draw_rect(annot_rect) # colorize the rect background + shape.finish(fill=fill, color=fill) + if "text" in redact.keys(): # if we also have text + new_text = redact["text"] + align = redact.get("align", 0) + fname = redact["fontname"] + fsize = redact["fontsize"] + color = redact["text_color"] + # try finding vertical centered sub-rect + trect = center_rect(annot_rect, new_text, fname, fsize) + + rc = -1 + while rc < 0 and fsize >= 4: # while not enough room + # (re-) try insertion + rc = shape.insert_textbox( + trect, + new_text, + fontname=fname, + fontsize=fsize, + color=color, + align=align, + ) + fsize -= 0.5 # reduce font if unsuccessful + shape.commit() # append new contents object + return True + + def recolor(self, components=1): + """Convert colorspaces of objects on the page. + + Valid values are 1, 3 and 4. + """ + if components not in (1, 3, 4): + raise ValueError("components must be one of 1, 3, 4") + pdfdoc = _as_pdf_document(self.parent) + ropt = mupdf.pdf_recolor_options() + ropt.num_comp = components + ropts = mupdf.PdfRecolorOptions(ropt) + mupdf.pdf_recolor_page(pdfdoc, self.number, ropts) + + def clip_to_rect(self, rect): + """Clip away page content outside the rectangle.""" + clip = Rect(rect) + if clip.is_infinite or (clip & self.rect).is_empty: + raise ValueError("rect must not be infinite or empty") + clip *= self.transformation_matrix + pdfpage = _as_pdf_page(self) + pclip = JM_rect_from_py(clip) + mupdf.pdf_clip_page(pdfpage, pclip) + + def get_layout(self): + """Try to access layout information.""" + + if self.layout_information is not None: + # layout information already present + return + + if not _get_layout: + # no layout information available + return + + layout_info = _get_layout(self) + self.layout_information = layout_info + + @property + def artbox(self): + """The ArtBox""" + rect = self._other_box("ArtBox") + if rect is None: + return self.cropbox + mb = self.mediabox + return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) + + @property + def bleedbox(self): + """The BleedBox""" + rect = self._other_box("BleedBox") + if rect is None: + return self.cropbox + mb = self.mediabox + return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) + + def bound(self): + """Get page rectangle.""" + CheckParent(self) + page = _as_fz_page(self.this) + val = mupdf.fz_bound_page(page) + val = Rect(val) + + if val.is_infinite and self.parent.is_pdf: + cb = self.cropbox + w, h = cb.width, cb.height + if self.rotation not in (0, 180): + w, h = h, w + val = Rect(0, 0, w, h) + msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1] + message(msg) + + return val + + def clean_contents(self, sanitize=1): + if not sanitize and not self.is_wrapped: + self.wrap_contents() + page = _as_pdf_page( self.this, required=False) + if not page.m_internal: + return + filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize) + mupdf.pdf_filter_page_contents( page.doc(), page, filter_) + + @property + def cropbox(self): + """The CropBox.""" + CheckParent(self) + page = self._pdf_page(required=False) + if not page.m_internal: + val = mupdf.fz_bound_page(self.this) + else: + val = JM_cropbox(page.obj()) + val = Rect(val) + + return val + + @property + def cropbox_position(self): + return self.cropbox.tl + + def delete_annot(self, annot): + """Delete annot and return next one.""" + CheckParent(self) + CheckParent(annot) + + page = self._pdf_page() + while 1: + # first loop through all /IRT annots and remove them + irt_annot = JM_find_annot_irt(annot.this) + if not irt_annot: # no more there + break + mupdf.pdf_delete_annot(page, irt_annot.this) + nextannot = mupdf.pdf_next_annot(annot.this) # store next + mupdf.pdf_delete_annot(page, annot.this) + val = Annot(nextannot) + + if val: + val.thisown = True + val.parent = weakref.proxy(self) # owning page object + val.parent._annot_refs[id(val)] = val + annot._erase() + return val + + def delete_image(page: 'Page', xref: int): + """Delete the image referred to by xef. + + Actually replaces by a small transparent Pixmap using method Page.replace_image. + + Args: + xref: xref of the image to delete. + """ + # make a small 100% transparent pixmap (of just any dimension) + pix = Pixmap(csGRAY, (0, 0, 1, 1), 1) + pix.clear_with() # clear all samples bytes to 0x00 + page.replace_image(xref, pixmap=pix) + + def delete_link(self, linkdict): + """Delete a Link.""" + CheckParent(self) + if not isinstance( linkdict, dict): + return # have no dictionary + + def finished(): + if linkdict["xref"] == 0: return + try: + linkid = linkdict["id"] + linkobj = self._annot_refs[linkid] + linkobj._erase() + except Exception: + # Don't print this exception, to match classic. Issue #2841. + if g_exceptions_verbose > 1: exception_info() + pass + + page = _as_pdf_page(self.this, required=False) + if not page.m_internal: + return finished() # have no PDF + xref = linkdict[dictkey_xref] + if xref < 1: + return finished() # invalid xref + annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')) + if not annots.m_internal: + return finished() # have no annotations + len_ = mupdf.pdf_array_len( annots) + if len_ == 0: + return finished() + oxref = 0 + for i in range( len_): + oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i)) + if xref == oxref: + break # found xref in annotations + + if xref != oxref: + return finished() # xref not in annotations + mupdf.pdf_array_delete( annots, i) # delete entry in annotations + mupdf.pdf_delete_object( page.doc(), xref) # delete link object + mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots) + JM_refresh_links( page) + + return finished() + + def delete_widget(page: 'Page', widget: Widget) -> Widget: + """Delete widget from page and return the next one.""" + CheckParent(page) + annot = getattr(widget, "_annot", None) + if annot is None: + raise ValueError("bad type: widget") + nextwidget = widget.next + page.delete_annot(annot) + widget._annot.parent = None + keylist = list(widget.__dict__.keys()) + for key in keylist: + del widget.__dict__[key] + return nextwidget + + @property + def derotation_matrix(self) -> Matrix: + """Reflects page de-rotation.""" + if g_use_extra: + return Matrix(extra.Page_derotate_matrix( self.this)) + pdfpage = self._pdf_page(required=False) + if not pdfpage.m_internal: + return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT)) + return Matrix(JM_derotate_page_matrix(pdfpage)) + + def draw_bezier( + page: 'Page', + p1: point_like, + p2: point_like, + p3: point_like, + p4: point_like, + color: OptSeq = (0,), + fill: OptSeq = None, + dashes: OptStr = None, + width: float = 1, + morph: OptStr = None, + closePath: bool = False, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw a general cubic Bezier curve from p1 to p4 using control points p2 and p3.""" + img = page.new_shape() + Q = img.draw_bezier(Point(p1), Point(p2), Point(p3), Point(p4)) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + closePath=closePath, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return Q + + def draw_circle( + page: 'Page', + center: point_like, + radius: float, + color: OptSeq = (0,), + fill: OptSeq = None, + morph: OptSeq = None, + dashes: OptStr = None, + width: float = 1, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw a circle given its center and radius.""" + img = page.new_shape() + Q = img.draw_circle(Point(center), radius) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + return Q + + def draw_curve( + page: 'Page', + p1: point_like, + p2: point_like, + p3: point_like, + color: OptSeq = (0,), + fill: OptSeq = None, + dashes: OptStr = None, + width: float = 1, + morph: OptSeq = None, + closePath: bool = False, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw a special Bezier curve from p1 to p3, generating control points on lines p1 to p2 and p2 to p3.""" + img = page.new_shape() + Q = img.draw_curve(Point(p1), Point(p2), Point(p3)) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + closePath=closePath, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return Q + + def draw_line( + page: 'Page', + p1: point_like, + p2: point_like, + color: OptSeq = (0,), + dashes: OptStr = None, + width: float = 1, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + morph: OptSeq = None, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc=0, + ) -> Point: + """Draw a line from point p1 to point p2.""" + img = page.new_shape() + p = img.draw_line(Point(p1), Point(p2)) + img.finish( + color=color, + dashes=dashes, + width=width, + closePath=False, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return p + + def draw_oval( + page: 'Page', + rect: typing.Union[rect_like, quad_like], + color: OptSeq = (0,), + fill: OptSeq = None, + dashes: OptStr = None, + morph: OptSeq = None, + width: float = 1, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw an oval given its containing rectangle or quad.""" + img = page.new_shape() + Q = img.draw_oval(rect) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return Q + + def draw_polyline( + page: 'Page', + points: list, + color: OptSeq = (0,), + fill: OptSeq = None, + dashes: OptStr = None, + width: float = 1, + morph: OptSeq = None, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + closePath: bool = False, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw multiple connected line segments.""" + img = page.new_shape() + Q = img.draw_polyline(points) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + closePath=closePath, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return Q + + def draw_quad( + page: 'Page', + quad: quad_like, + color: OptSeq = (0,), + fill: OptSeq = None, + dashes: OptStr = None, + width: float = 1, + lineCap: int = 0, + lineJoin: int = 0, + morph: OptSeq = None, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw a quadrilateral.""" + img = page.new_shape() + Q = img.draw_quad(Quad(quad)) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return Q + + def draw_rect( + page: 'Page', + rect: rect_like, + color: OptSeq = (0,), + fill: OptSeq = None, + dashes: OptStr = None, + width: float = 1, + lineCap: int = 0, + lineJoin: int = 0, + morph: OptSeq = None, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + radius=None, + ) -> Point: + ''' + Draw a rectangle. See Shape class method for details. + ''' + img = page.new_shape() + Q = img.draw_rect(Rect(rect), radius=radius) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return Q + + def draw_sector( + page: 'Page', + center: point_like, + point: point_like, + beta: float, + color: OptSeq = (0,), + fill: OptSeq = None, + dashes: OptStr = None, + fullSector: bool = True, + morph: OptSeq = None, + width: float = 1, + closePath: bool = False, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw a circle sector given circle center, one arc end point and the angle of the arc. + + Parameters: + center -- center of circle + point -- arc end point + beta -- angle of arc (degrees) + fullSector -- connect arc ends with center + """ + img = page.new_shape() + Q = img.draw_sector(Point(center), Point(point), beta, fullSector=fullSector) + img.finish( + color=color, + fill=fill, + dashes=dashes, + width=width, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + closePath=closePath, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return Q + + def draw_squiggle( + page: 'Page', + p1: point_like, + p2: point_like, + breadth: float = 2, + color: OptSeq = (0,), + dashes: OptStr = None, + width: float = 1, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + morph: OptSeq = None, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw a squiggly line from point p1 to point p2.""" + img = page.new_shape() + p = img.draw_squiggle(Point(p1), Point(p2), breadth=breadth) + img.finish( + color=color, + dashes=dashes, + width=width, + closePath=False, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return p + + def draw_zigzag( + page: 'Page', + p1: point_like, + p2: point_like, + breadth: float = 2, + color: OptSeq = (0,), + dashes: OptStr = None, + width: float = 1, + lineCap: int = 0, + lineJoin: int = 0, + overlay: bool = True, + morph: OptSeq = None, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> Point: + """Draw a zigzag line from point p1 to point p2.""" + img = page.new_shape() + p = img.draw_zigzag(Point(p1), Point(p2), breadth=breadth) + img.finish( + color=color, + dashes=dashes, + width=width, + closePath=False, + lineCap=lineCap, + lineJoin=lineJoin, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + img.commit(overlay) + + return p + + def extend_textpage(self, tpage, flags=0, matrix=None): + page = self.this + tp = tpage.this + assert isinstance( tp, mupdf.FzStextPage) + options = mupdf.FzStextOptions() + options.flags = flags + ctm = JM_matrix_from_py(matrix) + dev = mupdf.FzDevice(tp, options) + mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie()) + mupdf.fz_close_device( dev) + + @property + def first_annot(self): + """First annotation.""" + CheckParent(self) + page = self._pdf_page(required=False) + if not page.m_internal: + return + annot = mupdf.pdf_first_annot(page) + if not annot.m_internal: + return + val = Annot(annot) + val.thisown = True + val.parent = weakref.proxy(self) # owning page object + self._annot_refs[id(val)] = val + return val + + @property + def first_link(self): + ''' + First link on page + ''' + return self.load_links() + + @property + def first_widget(self): + """First widget/field.""" + CheckParent(self) + annot = 0 + page = self._pdf_page(required=False) + if not page.m_internal: + return + annot = mupdf.pdf_first_widget(page) + if not annot.m_internal: + return + val = Annot(annot) + val.thisown = True + val.parent = weakref.proxy(self) # owning page object + self._annot_refs[id(val)] = val + widget = Widget() + TOOLS._fill_widget(val, widget) + val = widget + return val + + def get_bboxlog(self, layers=None): + CheckParent(self) + old_rotation = self.rotation + if old_rotation != 0: + self.set_rotation(0) + page = self.this + rc = [] + inc_layers = True if layers else False + dev = JM_new_bbox_device( rc, inc_layers) + mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) + mupdf.fz_close_device( dev) + + if old_rotation != 0: + self.set_rotation(old_rotation) + return rc + + def get_cdrawings(self, extended=None, callback=None, method=None): + """Extract vector graphics ("line art") from the page.""" + CheckParent(self) + old_rotation = self.rotation + if old_rotation != 0: + self.set_rotation(0) + page = self.this + if isinstance(page, mupdf.PdfPage): + # Downcast pdf_page to fz_page. + page = mupdf.FzPage(page) + assert isinstance(page, mupdf.FzPage), f'{self.this=}' + clips = True if extended else False + prect = mupdf.fz_bound_page(page) + if 1 or g_use_extra: + rc = extra.get_cdrawings(page, extended, callback, method) + else: + rc = list() + if callable(callback) or method is not None: + dev = JM_new_lineart_device_Device(callback, clips, method) + else: + dev = JM_new_lineart_device_Device(rc, clips, method) + dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1) + mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) + mupdf.fz_close_device(dev) + + if old_rotation != 0: + self.set_rotation(old_rotation) + if callable(callback) or method is not None: + return + return rc + + def get_contents(self): + """Get xrefs of /Contents objects.""" + CheckParent(self) + ret = [] + page = _as_pdf_page(self.this) + obj = page.obj() + contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents) + if mupdf.pdf_is_array(contents): + n = mupdf.pdf_array_len(contents) + for i in range(n): + icont = mupdf.pdf_array_get(contents, i) + xref = mupdf.pdf_to_num(icont) + ret.append(xref) + elif contents.m_internal: + xref = mupdf.pdf_to_num(contents) + ret.append( xref) + return ret + + def get_displaylist(self, annots=1): + ''' + Make a DisplayList from the page for Pixmap generation. + + Include (default) or exclude annotations. + ''' + CheckParent(self) + if annots: + dl = mupdf.fz_new_display_list_from_page(self.this) + else: + dl = mupdf.fz_new_display_list_from_page_contents(self.this) + return DisplayList(dl) + + def get_drawings(self, extended: bool=False) -> list: + """Retrieve vector graphics. The extended version includes clips. + + Note: + For greater comfort, this method converts point-likes, rect-likes, quad-likes + of the C version to respective Point / Rect / Quad objects. + It also adds default items that are missing in original path types. + """ + allkeys = ( + 'closePath', + 'fill', + 'color', + 'width', + 'lineCap', + 'lineJoin', + 'dashes', + 'stroke_opacity', + 'fill_opacity', + 'even_odd', + ) + val = self.get_cdrawings(extended=extended) + for i in range(len(val)): + npath = val[i] + if not npath["type"].startswith("clip"): + npath["rect"] = Rect(npath["rect"]) + else: + npath["scissor"] = Rect(npath["scissor"]) + if npath["type"]!="group": + items = npath["items"] + newitems = [] + for item in items: + cmd = item[0] + rest = item[1:] + if cmd == "re": + item = ("re", Rect(rest[0]).normalize(), rest[1]) + elif cmd == "qu": + item = ("qu", Quad(rest[0])) + else: + item = tuple([cmd] + [Point(i) for i in rest]) + newitems.append(item) + npath["items"] = newitems + if npath['type'] in ('f', 's'): + for k in allkeys: + npath[k] = npath.get(k) + + val[i] = npath + return val + + class Drawpath(object): + """Reflects a path dictionary from get_cdrawings().""" + def __init__(self, **args): + self.__dict__.update(args) + + class Drawpathlist(object): + """List of Path objects representing get_cdrawings() output.""" + def __getitem__(self, item): + return self.paths.__getitem__(item) + + def __init__(self): + self.paths = [] + self.path_count = 0 + self.group_count = 0 + self.clip_count = 0 + self.fill_count = 0 + self.stroke_count = 0 + self.fillstroke_count = 0 + + def __len__(self): + return self.paths.__len__() + + def append(self, path): + self.paths.append(path) + self.path_count += 1 + if path.type == "clip": + self.clip_count += 1 + elif path.type == "group": + self.group_count += 1 + elif path.type == "f": + self.fill_count += 1 + elif path.type == "s": + self.stroke_count += 1 + elif path.type == "fs": + self.fillstroke_count += 1 + + def clip_parents(self, i): + """Return list of parent clip paths. + + Args: + i: (int) return parents of this path. + Returns: + List of the clip parents.""" + if i >= self.path_count: + raise IndexError("bad path index") + while i < 0: + i += self.path_count + lvl = self.paths[i].level + clips = list( # clip paths before identified one + reversed( + [ + p + for p in self.paths[:i] + if p.type == "clip" and p.level < lvl + ] + ) + ) + if clips == []: # none found: empty list + return [] + nclips = [clips[0]] # init return list + for p in clips[1:]: + if p.level >= nclips[-1].level: + continue # only accept smaller clip levels + nclips.append(p) + return nclips + + def group_parents(self, i): + """Return list of parent group paths. + + Args: + i: (int) return parents of this path. + Returns: + List of the group parents.""" + if i >= self.path_count: + raise IndexError("bad path index") + while i < 0: + i += self.path_count + lvl = self.paths[i].level + groups = list( # group paths before identified one + reversed( + [ + p + for p in self.paths[:i] + if p.type == "group" and p.level < lvl + ] + ) + ) + if groups == []: # none found: empty list + return [] + ngroups = [groups[0]] # init return list + for p in groups[1:]: + if p.level >= ngroups[-1].level: + continue # only accept smaller group levels + ngroups.append(p) + return ngroups + + def get_lineart(self) -> object: + """Get page drawings paths. + + Note: + For greater comfort, this method converts point-like, rect-like, quad-like + tuples of the C version to respective Point / Rect / Quad objects. + Also adds default items that are missing in original path types. + In contrast to get_drawings(), this output is an object. + """ + + val = self.get_cdrawings(extended=True) + paths = self.Drawpathlist() + for path in val: + npath = self.Drawpath(**path) + if npath.type != "clip": + npath.rect = Rect(path["rect"]) + else: + npath.scissor = Rect(path["scissor"]) + if npath.type != "group": + items = path["items"] + newitems = [] + for item in items: + cmd = item[0] + rest = item[1:] + if cmd == "re": + item = ("re", Rect(rest[0]).normalize(), rest[1]) + elif cmd == "qu": + item = ("qu", Quad(rest[0])) + else: + item = tuple([cmd] + [Point(i) for i in rest]) + newitems.append(item) + npath.items = newitems + + if npath.type == "f": + npath.stroke_opacity = None + npath.dashes = None + npath.line_join = None + npath.line_cap = None + npath.color = None + npath.width = None + + paths.append(npath) + + val = None + return paths + + def get_image_info( + page: 'Page', + hashes: bool = False, + xrefs: bool = False + ) -> list: + """Extract image information only from a pymupdf.TextPage. + + Args: + hashes: (bool) include MD5 hash for each image. + xrefs: (bool) try to find the xref for each image. Sets hashes to true. + """ + doc = page.parent + if xrefs and doc.is_pdf: + hashes = True + if not doc.is_pdf: + xrefs = False + imginfo = getattr(page, "_image_info", None) + if imginfo and not xrefs: + return imginfo + if not imginfo: + tp = page.get_textpage(flags=TEXT_PRESERVE_IMAGES) + imginfo = tp.extractIMGINFO(hashes=hashes) + del tp + if hashes: + page._image_info = imginfo + if not xrefs or not doc.is_pdf: + return imginfo + imglist = page.get_images() + digests = {} + for item in imglist: + xref = item[0] + pix = Pixmap(doc, xref) + digests[pix.digest] = xref + del pix + for i in range(len(imginfo)): + item = imginfo[i] + xref = digests.get(item["digest"], 0) + item["xref"] = xref + imginfo[i] = item + return imginfo + + def get_image_rects(page: 'Page', name, transform=False) -> list: + """Return list of image positions on a page. + + Args: + name: (str, list, int) image identification. May be reference name, an + item of the page's image list or an xref. + transform: (bool) whether to also return the transformation matrix. + Returns: + A list of pymupdf.Rect objects or tuples of (pymupdf.Rect, pymupdf.Matrix) + for all image locations on the page. + """ + if type(name) in (list, tuple): + xref = name[0] + elif type(name) is int: + xref = name + else: + imglist = [i for i in page.get_images() if i[7] == name] + if imglist == []: + raise ValueError("bad image name") + elif len(imglist) != 1: + raise ValueError("multiple image names found") + xref = imglist[0][0] + pix = Pixmap(page.parent, xref) # make pixmap of the image to compute MD5 + digest = pix.digest + del pix + infos = page.get_image_info(hashes=True) + if not transform: + bboxes = [Rect(im["bbox"]) for im in infos if im["digest"] == digest] + else: + bboxes = [ + (Rect(im["bbox"]), Matrix(im["transform"])) + for im in infos + if im["digest"] == digest + ] + return bboxes + + def get_label(page): + """Return the label for this PDF page. + + Args: + page: page object. + Returns: + The label (str) of the page. Errors return an empty string. + """ + # Jorj McKie, 2021-01-06 + + labels = page.parent._get_page_labels() + if not labels: + return "" + labels.sort() + return utils.get_label_pno(page.number, labels) + + def get_links(page: 'Page') -> list: + """Create a list of all links contained in a PDF page. + + Notes: + see PyMuPDF ducmentation for details. + """ + + CheckParent(page) + ln = page.first_link + links = [] + while ln: + nl = utils.getLinkDict(ln, page.parent) + links.append(nl) + ln = ln.next + if links != [] and page.parent.is_pdf: + linkxrefs = [x for x in + #page.annot_xrefs() + JM_get_annot_xref_list2(page) + if x[1] == mupdf.PDF_ANNOT_LINK # pylint: disable=no-member + ] + if len(linkxrefs) == len(links): + for i in range(len(linkxrefs)): + links[i]["xref"] = linkxrefs[i][0] + links[i]["id"] = linkxrefs[i][2] + return links + + def get_pixmap( + page: 'Page', + *, + matrix: matrix_like=Identity, + dpi=None, + colorspace: Colorspace=None, + clip: rect_like=None, + alpha: bool=False, + annots: bool=True, + ) -> 'Pixmap': + """Create pixmap of page. + + Keyword args: + matrix: Matrix for transformation (default: Identity). + dpi: desired dots per inch. If given, matrix is ignored. + colorspace: (str/Colorspace) cmyk, rgb, gray - case ignored, default csRGB. + clip: (irect-like) restrict rendering to this area. + alpha: (bool) whether to include alpha channel + annots: (bool) whether to also render annotations + """ + if colorspace is None: + colorspace = csRGB + if dpi: + zoom = dpi / 72 + matrix = Matrix(zoom, zoom) + + if type(colorspace) is str: + if colorspace.upper() == "GRAY": + colorspace = csGRAY + elif colorspace.upper() == "CMYK": + colorspace = csCMYK + else: + colorspace = csRGB + if colorspace.n not in (1, 3, 4): + raise ValueError("unsupported colorspace") + + dl = page.get_displaylist(annots=annots) + pix = dl.get_pixmap(matrix=matrix, colorspace=colorspace, alpha=alpha, clip=clip) + dl = None + if dpi: + pix.set_dpi(dpi, dpi) + return pix + + def remove_rotation(self): + """Set page rotation to 0 while maintaining visual appearance.""" + rot = self.rotation # normalized rotation value + if rot == 0: + return Identity # nothing to do + + # need to derotate the page's content + mb = self.mediabox # current mediabox + + if rot == 90: + # before derotation, shift content horizontally + mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0) + elif rot == 270: + # before derotation, shift content vertically + mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0) + else: # rot = 180 + mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0) + + # prefix with derotation matrix + mat = mat0 * self.derotation_matrix + cmd = _format_g(tuple(mat)) + ' cm ' + cmd = cmd.encode('utf8') + _ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents + + # swap x- and y-coordinates + if rot in (90, 270): + x0, y0, x1, y1 = mb + mb.x0 = y0 + mb.y0 = x0 + mb.x1 = y1 + mb.y1 = x1 + self.set_mediabox(mb) + + self.set_rotation(0) + rot = ~mat # inverse of the derotation matrix + + for annot in self.annots(): # modify rectangles of annotations + r = annot.rect * rot + # TODO: only try to set rectangle for applicable annot types + annot.set_rect(r) + for link in self.get_links(): # modify 'from' rectangles of links + r = link["from"] * rot + self.delete_link(link) + link["from"] = r + try: # invalid links remain deleted + self.insert_link(link) + except Exception: + pass + for widget in self.widgets(): # modify field rectangles + r = widget.rect * rot + widget.rect = r + widget.update() + return rot # the inverse of the generated derotation matrix + + def cluster_drawings( + self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3, + final_filter: bool = True, + ) -> list: + """Join rectangles of neighboring vector graphic items. + + Args: + clip: optional rect-like to restrict the page area to consider. + drawings: (optional) output of a previous "get_drawings()". + x_tolerance: horizontal neighborhood threshold. + y_tolerance: vertical neighborhood threshold. + + Notes: + Vector graphics (also called line-art or drawings) usually consist + of independent items like rectangles, lines or curves to jointly + form table grid lines or bar, line, pie charts and similar. + This method identifies rectangles wrapping these disparate items. + + Returns: + A list of Rect items, each wrapping line-art items that are close + enough to be considered forming a common vector graphic. + Only "significant" rectangles will be returned, i.e. having both, + width and height larger than the tolerance values. + """ + CheckParent(self) + parea = self.rect # the default clipping area + if clip is not None: + parea = Rect(clip) + delta_x = x_tolerance # shorter local name + delta_y = y_tolerance # shorter local name + if drawings is None: # if we cannot re-use a previous output + drawings = self.get_drawings() + + def are_neighbors(r1, r2): + """Detect whether r1, r2 are "neighbors". + + Items r1, r2 are called neighbors if the minimum distance between + their points is less-equal delta. + + Both parameters must be (potentially invalid) rectangles. + """ + # normalize rectangles as needed + rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0) + rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0) + rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0) + rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0) + if ( + 0 + or rr1_x1 < rr2_x0 - delta_x + or rr1_x0 > rr2_x1 + delta_x + or rr1_y1 < rr2_y0 - delta_y + or rr1_y0 > rr2_y1 + delta_y + ): + # Rects do not overlap. + return False + else: + # Rects overlap. + return True + + # exclude graphics not contained in the clip + paths = [ + p + for p in drawings + if 1 + and p["rect"].x0 >= parea.x0 + and p["rect"].x1 <= parea.x1 + and p["rect"].y0 >= parea.y0 + and p["rect"].y1 <= parea.y1 + ] + + # list of all vector graphic rectangles + prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0)) + + new_rects = [] # the final list of the joined rectangles + + # ------------------------------------------------------------------------- + # The strategy is to identify and join all rects that are neighbors + # ------------------------------------------------------------------------- + while prects: # the algorithm will empty this list + r = +prects[0] # copy of first rectangle + repeat = True + while repeat: + repeat = False + for i in range(len(prects) - 1, 0, -1): # from back to front + if are_neighbors(prects[i], r): + r |= prects[i].tl # include in first rect + r |= prects[i].br # include in first rect + del prects[i] # delete this rect + repeat = True + + new_rects.append(r) + del prects[0] + prects = sorted(set(prects), key=lambda r: (r.y1, r.x0)) + + new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0)) + if not final_filter: + return new_rects + return [r for r in new_rects if r.width > delta_x and r.height > delta_y] + + def get_fonts(self, full=False): + """List of fonts defined in the page object.""" + CheckParent(self) + return self.parent.get_page_fonts(self.number, full=full) + + def get_image_bbox(self, name, transform=0): + """Get rectangle occupied by image 'name'. + + 'name' is either an item of the image list, or the referencing + name string - elem[7] of the resp. item. + Option 'transform' also returns the image transformation matrix. + """ + CheckParent(self) + doc = self.parent + if doc.is_closed or doc.is_encrypted: + raise ValueError('document closed or encrypted') + + inf_rect = Rect(1, 1, -1, -1) + null_mat = Matrix() + if transform: + rc = (inf_rect, null_mat) + else: + rc = inf_rect + + if type(name) in (list, tuple): + if not type(name[-1]) is int: + raise ValueError('need item of full page image list') + item = name + else: + imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]] + if len(imglist) == 1: + item = imglist[0] + elif imglist == []: + raise ValueError('bad image name') + else: + raise ValueError("found multiple images named '%s'." % name) + xref = item[-1] + if xref != 0 or transform: + try: + return self.get_image_rects(item, transform=transform)[0] + except Exception: + exception_info() + return inf_rect + pdf_page = self._pdf_page() + val = JM_image_reporter(pdf_page) + + if not bool(val): + return rc + + for v in val: + if v[0] != item[-3]: + continue + q = Quad(v[1]) + bbox = q.rect + if transform == 0: + rc = bbox + break + + hm = Matrix(util_hor_matrix(q.ll, q.lr)) + h = abs(q.ll - q.ul) + w = abs(q.ur - q.ul) + m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0) + m = ~(hm * m0) + rc = (bbox, m) + break + val = rc + + return val + + def get_images(self, full=False): + """List of images defined in the page object.""" + CheckParent(self) + return self.parent.get_page_images(self.number, full=full) + + def get_oc_items(self) -> list: + """Get OCGs and OCMDs used in the page's contents. + + Returns: + List of items (name, xref, type), where type is one of "ocg" / "ocmd", + and name is the property name. + """ + rc = [] + for pname, xref in self._get_resource_properties(): + text = self.parent.xref_object(xref, compressed=True) + if "/Type/OCG" in text: + octype = "ocg" + elif "/Type/OCMD" in text: + octype = "ocmd" + else: + continue + rc.append((pname, xref, octype)) + return rc + + def get_svg_image(self, matrix=None, text_as_path=1): + """Make SVG image from page.""" + CheckParent(self) + mediabox = mupdf.fz_bound_page(self.this) + ctm = JM_matrix_from_py(matrix) + tbounds = mediabox + text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT + tbounds = mupdf.fz_transform_rect(tbounds, ctm) + + res = mupdf.fz_new_buffer(1024) + out = mupdf.FzOutput(res) + dev = mupdf.fz_new_svg_device( + out, + tbounds.x1-tbounds.x0, # width + tbounds.y1-tbounds.y0, # height + text_option, + 1, + ) + mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie()) + mupdf.fz_close_device(dev) + out.fz_close_output() + text = JM_EscapeStrFromBuffer(res) + return text + + def get_textbox( + page: Page, + rect: rect_like, + textpage=None, #: TextPage = None, + ) -> str: + tp = textpage + if tp is None: + tp = page.get_textpage() + elif getattr(tp, "parent") != page: + raise ValueError("not a textpage of this page") + rc = tp.extractTextbox(rect) + if textpage is None: + del tp + return rc + + def get_text(self, *args, **kwargs): + return utils.get_text(self, *args, **kwargs) + + def get_text_blocks(self, *args, **kwargs): + return utils.get_text_blocks(self, *args, **kwargs) + + def get_text_selection(self, *args, **kwargs): + return utils.get_text_selection(self, *args, **kwargs) + + def get_text_words(self, *args, **kwargs): + return utils.get_text_words(self, *args, **kwargs) + + def get_textpage_ocr(self, *args, **kwargs): + return utils.get_textpage_ocr(self, *args, **kwargs) + + def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage": + CheckParent(self) + if matrix is None: + matrix = Matrix(1, 1) + old_rotation = self.rotation + if old_rotation != 0: + self.set_rotation(0) + try: + textpage = self._get_textpage(clip, flags=flags, matrix=matrix) + finally: + if old_rotation != 0: + self.set_rotation(old_rotation) + textpage = TextPage(textpage) + textpage.parent = weakref.proxy(self) + return textpage + + def get_texttrace(self): + + CheckParent(self) + old_rotation = self.rotation + if old_rotation != 0: + self.set_rotation(0) + page = self.this + rc = [] + if 1 or g_use_extra: + dev = extra.JM_new_texttrace_device(rc) + else: + dev = JM_new_texttrace_device(rc) + prect = mupdf.fz_bound_page(page) + dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1) + mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) + mupdf.fz_close_device(dev) + + if old_rotation != 0: + self.set_rotation(old_rotation) + return rc + + def get_xobjects(self): + """List of xobjects defined in the page object.""" + CheckParent(self) + return self.parent.get_page_xobjects(self.number) + + def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None, + set_simple=False, wmode=0, encoding=0): + doc = self.parent + if doc is None: + raise ValueError("orphaned object: parent is None") + idx = 0 + + if fontname.startswith("/"): + fontname = fontname[1:] + inv_chars = INVALID_NAME_CHARS.intersection(fontname) + if inv_chars != set(): + raise ValueError(f"bad fontname chars {inv_chars}") + + font = CheckFont(self, fontname) + if font is not None: # font already in font list of page + xref = font[0] # this is the xref + if CheckFontInfo(doc, xref): # also in our document font list? + return xref # yes: we are done + # need to build the doc FontInfo entry - done via get_char_widths + doc.get_char_widths(xref) + return xref + + #-------------------------------------------------------------------------- + # the font is not present for this page + #-------------------------------------------------------------------------- + + bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font + + serif = 0 + CJK_number = -1 + CJK_list_n = ["china-t", "china-s", "japan", "korea"] + CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"] + + try: + CJK_number = CJK_list_n.index(fontname) + serif = 0 + except Exception: + # Verbose in PyMuPDF/tests. + if g_exceptions_verbose > 1: exception_info() + pass + + if CJK_number < 0: + try: + CJK_number = CJK_list_s.index(fontname) + serif = 1 + except Exception: + # Verbose in PyMuPDF/tests. + if g_exceptions_verbose > 1: exception_info() + pass + + if fontname.lower() in fitz_fontdescriptors.keys(): + import pymupdf_fonts + fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy + del pymupdf_fonts + + # install the font for the page + if fontfile is not None: + if type(fontfile) is str: + fontfile_str = fontfile + elif hasattr(fontfile, "absolute"): + fontfile_str = str(fontfile) + elif hasattr(fontfile, "name"): + fontfile_str = fontfile.name + else: + raise ValueError("bad fontfile") + else: + fontfile_str = None + val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx, + wmode, serif, encoding, CJK_number) + + if not val: # did not work, error return + return val + + xref = val[0] # xref of installed font + fontdict = val[1] + + if CheckFontInfo(doc, xref): # check again: document already has this font + return xref # we are done + + # need to create document font info + doc.get_char_widths(xref, fontdict=fontdict) + return xref + + def insert_htmlbox( + page, + rect, + text, + *, + css=None, + scale_low=0, + archive=None, + rotate=0, + oc=0, + opacity=1, + overlay=True, + _scale_word_width=True, + _verbose=False, + ) -> tuple: + """Insert text with optional HTML tags and stylings into a rectangle. + + Args: + rect: (rect-like) rectangle into which the text should be placed. + text: (str) text with optional HTML tags and stylings. + css: (str) CSS styling commands. + scale_low: (float) force-fit content by scaling it down. Must be in + range [0, 1]. If 1, no scaling will take place. If 0, arbitrary + down-scaling is acceptable. A value of 0.1 would mean that content + may be scaled down by at most 90%. + archive: Archive object pointing to locations of used fonts or images + rotate: (int) rotate the text in the box by a multiple of 90 degrees. + oc: (int) the xref of an OCG / OCMD (Optional Content). + opacity: (float) set opacity of inserted content. + overlay: (bool) put text on top of page content. + _scale_word_width: internal, for testing only. + _verbose: internal, for testing only. + Returns: + A tuple of floats (spare_height, scale). + spare_height: + The height of the remaining space in below the + text, or -1 if we failed to fit. + scale: + The scaling required; `0 < scale <= 1`. + Will be less than `scale_low` if we failed to fit. + """ + # normalize rotation angle + if not rotate % 90 == 0: + raise ValueError("bad rotation angle") + while rotate < 0: + rotate += 360 + while rotate >= 360: + rotate -= 360 + + if not 0 <= scale_low <= 1: + raise ValueError("'scale_low' must be in [0, 1]") + + if css is None: + css = "" + + rect = Rect(rect) + if rotate in (90, 270): + temp_rect = Rect(0, 0, rect.height, rect.width) + else: + temp_rect = Rect(0, 0, rect.width, rect.height) + + # use a small border by default + mycss = "body {margin:1px;}" + css # append user CSS + + # either make a story, or accept a given one + if isinstance(text, str): # if a string, convert to a Story + story = Story(html=text, user_css=mycss, archive=archive) + elif isinstance(text, Story): + story = text + else: + raise ValueError("'text' must be a string or a Story") + + # ---------------------------------------------------------------- + # Find a scaling factor that lets our story fit in. Instead of scaling + # the text smaller, we instead look at how much bigger the rect needs + # to be to fit the text, then reverse the scaling to get how much we + # need to scale down the text. + # ---------------------------------------------------------------- + rect_scale_max = None if scale_low == 0 else 1 / scale_low + + fit = story.fit_scale( + temp_rect, + scale_min=1, + scale_max=rect_scale_max, + flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0, + verbose=_verbose, + ) + + if not fit.big_enough: # there was no fit + scale = 1 / fit.parameter + return (-1, scale) + + # fit.filled is a tuple; we convert it in place to a Rect for + # convenience. (fit.rect is already a Rect.) + fit.filled = Rect(fit.filled) + assert (fit.rect.x0, fit.rect.y0) == (0, 0) + assert (fit.filled.x0, fit.filled.y0) == (0, 0) + + scale = 1 / fit.parameter + assert scale >= scale_low, f'{scale_low=} {scale=}' + + spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0) + + def rect_function(*args): + return fit.rect, fit.rect, None + + # draw story on temp PDF page + doc = story.write_with_links(rect_function) + + # Insert opacity if requested. + # For this, we prepend a command to the /Contents. + if 0 <= opacity < 1: + tpage = doc[0] # load page + # generate /ExtGstate for the page + alp0 = tpage._set_opacity(CA=opacity, ca=opacity) + s = f"/{alp0} gs\n" # generate graphic state command + TOOLS._insert_contents(tpage, s.encode(), 0) + + # put result in target page + page.show_pdf_page(rect, doc, 0, rotate=rotate, oc=oc, overlay=overlay) + + # ------------------------------------------------------------------------- + # re-insert links in target rect (show_pdf_page cannot copy annotations) + # ------------------------------------------------------------------------- + # scaled center point of fit.rect + mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale + + # center point of target rect + mp2 = (rect.tl + rect.br) / 2 + + # compute link positioning matrix: + # - move center of scaled-down fit.rect to (0,0) + # - rotate + # - move (0,0) to center of target rect + mat = ( + Matrix(scale, 0, 0, scale, -mp1.x, -mp1.y) + * Matrix(-rotate) + * Matrix(1, 0, 0, 1, mp2.x, mp2.y) + ) + + # copy over links + for link in doc[0].get_links(): + link["from"] *= mat + page.insert_link(link) + + return spare_height, scale + + def insert_image( + page, + rect, + *, + alpha=-1, + filename=None, + height=0, + keep_proportion=True, + mask=None, + oc=0, + overlay=True, + pixmap=None, + rotate=0, + stream=None, + width=0, + xref=0, + ): + """Insert an image for display in a rectangle. + + Args: + rect: (rect_like) position of image on the page. + alpha: (int, optional) set to 0 if image has no transparency. + filename: (str, Path, file object) image filename. + height: (int) + keep_proportion: (bool) keep width / height ratio (default). + mask: (bytes, optional) image consisting of alpha values to use. + oc: (int) xref of OCG or OCMD to declare as Optional Content. + overlay: (bool) put in foreground (default) or background. + pixmap: (pymupdf.Pixmap) use this as image. + rotate: (int) rotate by 0, 90, 180 or 270 degrees. + stream: (bytes) use this as image. + width: (int) + xref: (int) use this as image. + + 'page' and 'rect' are positional, all other parameters are keywords. + + If 'xref' is given, that image is used. Other input options are ignored. + Else, exactly one of pixmap, stream or filename must be given. + + 'alpha=0' for non-transparent images improves performance significantly. + Affects stream and filename only. + + Optimum transparent insertions are possible by using filename / stream in + conjunction with a 'mask' image of alpha values. + + Returns: + xref (int) of inserted image. Re-use as argument for multiple insertions. + """ + CheckParent(page) + doc = page.parent + if not doc.is_pdf: + raise ValueError("is no PDF") + + if xref == 0 and (bool(filename) + bool(stream) + bool(pixmap) != 1): + raise ValueError("xref=0 needs exactly one of filename, pixmap, stream") + + if filename: + if type(filename) is str: + pass + elif hasattr(filename, "absolute"): + filename = str(filename) + elif hasattr(filename, "name"): + filename = filename.name + else: + raise ValueError("bad filename") + + if filename and not os.path.exists(filename): + raise FileNotFoundError("No such file: '%s'" % filename) + elif stream and type(stream) not in (bytes, bytearray, io.BytesIO): + raise ValueError("stream must be bytes-like / BytesIO") + elif pixmap and type(pixmap) is not Pixmap: + raise ValueError("pixmap must be a Pixmap") + if mask and not (stream or filename): + raise ValueError("mask requires stream or filename") + if mask and type(mask) not in (bytes, bytearray, io.BytesIO): + raise ValueError("mask must be bytes-like / BytesIO") + while rotate < 0: + rotate += 360 + while rotate >= 360: + rotate -= 360 + if rotate not in (0, 90, 180, 270): + raise ValueError("bad rotate value") + + r = Rect(rect) + if r.is_empty or r.is_infinite: + raise ValueError("rect must be finite and not empty") + clip = r * ~page.transformation_matrix + + # Create a unique image reference name. + ilst = [i[7] for i in doc.get_page_images(page.number)] + ilst += [i[1] for i in doc.get_page_xobjects(page.number)] + ilst += [i[4] for i in doc.get_page_fonts(page.number)] + n = "fzImg" # 'pymupdf image' + i = 0 + _imgname = n + "0" # first name candidate + while _imgname in ilst: + i += 1 + _imgname = n + str(i) # try new name + + if overlay: + page.wrap_contents() # ensure a balanced graphics state + digests = doc.InsertedImages + xref, digests = page._insert_image( + filename=filename, + pixmap=pixmap, + stream=stream, + imask=mask, + clip=clip, + overlay=overlay, + oc=oc, + xref=xref, + rotate=rotate, + keep_proportion=keep_proportion, + width=width, + height=height, + alpha=alpha, + _imgname=_imgname, + digests=digests, + ) + if digests is not None: + doc.InsertedImages = digests + + return xref + + def insert_link(page: 'Page', lnk: dict, mark: bool = True) -> None: + """Insert a new link for the current page.""" + CheckParent(page) + annot = utils.getLinkText(page, lnk) + if annot == "": + raise ValueError("link kind not supported") + page._addAnnot_FromString((annot,)) + + def insert_text( + page: 'Page', + point: point_like, + text: typing.Union[str, list], + *, + fontsize: float = 11, + lineheight: OptFloat = None, + fontname: str = "helv", + fontfile: OptStr = None, + set_simple: int = 0, + encoding: int = 0, + color: OptSeq = None, + fill: OptSeq = None, + border_width: float = 0.05, + miter_limit: float = 1, + render_mode: int = 0, + rotate: int = 0, + morph: OptSeq = None, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ): + + img = page.new_shape() + rc = img.insert_text( + point, + text, + fontsize=fontsize, + lineheight=lineheight, + fontname=fontname, + fontfile=fontfile, + set_simple=set_simple, + encoding=encoding, + color=color, + fill=fill, + border_width=border_width, + render_mode=render_mode, + miter_limit=miter_limit, + rotate=rotate, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + if rc >= 0: + img.commit(overlay) + return rc + + def insert_textbox( + page: 'Page', + rect: rect_like, + buffer: typing.Union[str, list], + *, + fontname: str = "helv", + fontfile: OptStr = None, + set_simple: int = 0, + encoding: int = 0, + fontsize: float = 11, + lineheight: OptFloat = None, + color: OptSeq = None, + fill: OptSeq = None, + expandtabs: int = 1, + align: int = 0, + rotate: int = 0, + render_mode: int = 0, + miter_limit: float = 1, + border_width: float = 0.05, + morph: OptSeq = None, + overlay: bool = True, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> float: + """Insert text into a given rectangle. + + Notes: + Creates a Shape object, uses its same-named method and commits it. + Parameters: + rect: (rect-like) area to use for text. + buffer: text to be inserted + fontname: a Base-14 font, font name or '/name' + fontfile: name of a font file + fontsize: font size + lineheight: overwrite the font property + color: RGB color triple + expandtabs: handles tabulators with string function + align: left, center, right, justified + rotate: 0, 90, 180, or 270 degrees + morph: morph box with a matrix and a fixpoint + overlay: put text in foreground or background + Returns: + unused or deficit rectangle area (float) + """ + img = page.new_shape() + rc = img.insert_textbox( + rect, + buffer, + fontsize=fontsize, + lineheight=lineheight, + fontname=fontname, + fontfile=fontfile, + set_simple=set_simple, + encoding=encoding, + color=color, + fill=fill, + expandtabs=expandtabs, + render_mode=render_mode, + miter_limit=miter_limit, + border_width=border_width, + align=align, + rotate=rotate, + morph=morph, + stroke_opacity=stroke_opacity, + fill_opacity=fill_opacity, + oc=oc, + ) + if rc >= 0: + img.commit(overlay) + return rc + + @property + def is_wrapped(self): + """Check if /Contents is in a balanced graphics state.""" + return self._count_q_balance() == (0, 0) + + @property + def language(self): + """Page language.""" + pdfpage = _as_pdf_page(self.this, required=False) + if not pdfpage.m_internal: + return + lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang')) + if not lang.m_internal: + return + return mupdf.pdf_to_str_buf(lang) + + def links(self, kinds=None): + """ Generator over the links of a page. + + Args: + kinds: (list) link kinds to subselect from. If none, + all links are returned. E.g. kinds=[LINK_URI] + will only yield URI links. + """ + all_links = self.get_links() + for link in all_links: + if kinds is None or link["kind"] in kinds: + yield (link) + + def load_annot(self, ident: typing.Union[str, int]) -> Annot: + """Load an annot by name (/NM key) or xref. + + Args: + ident: identifier, either name (str) or xref (int). + """ + CheckParent(self) + if type(ident) is str: + xref = 0 + name = ident + elif type(ident) is int: + xref = ident + name = None + else: + raise ValueError("identifier must be a string or integer") + val = self._load_annot(name, xref) + if not val: + return val + val.thisown = True + val.parent = weakref.proxy(self) + self._annot_refs[id(val)] = val + return val + + def load_links(self): + """Get first Link.""" + CheckParent(self) + val = mupdf.fz_load_links( self.this) + if not val.m_internal: + return + val = Link( val) + val.thisown = True + val.parent = weakref.proxy(self) # owning page object + self._annot_refs[id(val)] = val + val.xref = 0 + val.id = "" + if self.parent.is_pdf: + xrefs = self.annot_xrefs() + xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK] + if xrefs: + link_id = xrefs[0] + val.xref = link_id[0] + val.id = link_id[2] + else: + val.xref = 0 + val.id = "" + return val + + #---------------------------------------------------------------- + # page load widget by xref + #---------------------------------------------------------------- + def load_widget( self, xref): + """Load a widget by its xref.""" + CheckParent(self) + + page = _as_pdf_page(self.this) + annot = JM_get_widget_by_xref( page, xref) + #log( '{=type(annot)}') + val = annot + if not val: + return val + val.thisown = True + val.parent = weakref.proxy(self) + self._annot_refs[id(val)] = val + widget = Widget() + TOOLS._fill_widget(val, widget) + val = widget + return val + + @property + def mediabox(self): + """The MediaBox.""" + CheckParent(self) + page = self._pdf_page(required=False) + if not page.m_internal: + rect = mupdf.fz_bound_page( self.this) + else: + rect = JM_mediabox( page.obj()) + return Rect(rect) + + @property + def mediabox_size(self): + return Point(self.mediabox.x1, self.mediabox.y1) + + def new_shape(self): + return Shape(self) + + #@property + #def parent( self): + # assert self._parent + # if self._parent: + # return self._parent + # return Document( self.this.document()) + + def read_contents(self): + """All /Contents streams concatenated to one bytes object.""" + return TOOLS._get_all_contents(self) + + def refresh(self): + """Refresh page after link/annot/widget updates.""" + CheckParent(self) + doc = self.parent + page = doc.reload_page(self) + # fixme this looks wrong. + self.this = page + + def replace_image( + page: 'Page', + xref: int, + *, + filename=None, + pixmap=None, + stream=None, + ): + """Replace the image referred to by xref. + + Replace the image by changing the object definition stored under xref. This + will leave the pages appearance instructions intact, so the new image is + being displayed with the same bbox, rotation etc. + By providing a small fully transparent image, an effect as if the image had + been deleted can be achieved. + A typical use may include replacing large images by a smaller version, + e.g. with a lower resolution or graylevel instead of colored. + + Args: + xref: the xref of the image to replace. + filename, pixmap, stream: exactly one of these must be provided. The + meaning being the same as in Page.insert_image. + """ + doc = page.parent # the owning document + if not doc.xref_is_image(xref): + raise ValueError("xref not an image") # insert new image anywhere in page + if bool(filename) + bool(stream) + bool(pixmap) != 1: + raise ValueError("Exactly one of filename/stream/pixmap must be given") + new_xref = page.insert_image( + page.rect, filename=filename, stream=stream, pixmap=pixmap + ) + doc.xref_copy(new_xref, xref) # copy over new to old + last_contents_xref = page.get_contents()[-1] + # new image insertion has created a new /Contents source, + # which we will set to spaces now + doc.update_stream(last_contents_xref, b" ") + page._image_info = None # clear cache of extracted image information + + @property + def rotation(self): + """Page rotation.""" + CheckParent(self) + page = _as_pdf_page(self.this, required=0) + if not page.m_internal: + return 0 + return JM_page_rotation(page) + + @property + def rotation_matrix(self) -> Matrix: + """Reflects page rotation.""" + return Matrix(TOOLS._rotate_matrix(self)) + + def run(self, dw, m): + """Run page through a device. + dw: DeviceWrapper + """ + CheckParent(self) + mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie()) + + def search_for( + page, + text, + *, + clip=None, + quads=False, + flags=None, + textpage=None, + ) -> list: + """Search for a string on a page. + + Args: + text: string to be searched for + clip: restrict search to this rectangle + quads: (bool) return quads instead of rectangles + flags: bit switches, default: join hyphened words + textpage: a pre-created pymupdf.TextPage + Returns: + a list of rectangles or quads, each containing one occurrence. + """ + if flags is None: + flags=(0 + | TEXT_DEHYPHENATE + | TEXT_PRESERVE_WHITESPACE + | TEXT_PRESERVE_LIGATURES + | TEXT_MEDIABOX_CLIP + ) + if clip is not None: + clip = Rect(clip) + + CheckParent(page) + tp = textpage + if tp is None: + tp = page.get_textpage(clip=clip, flags=flags) # create pymupdf.TextPage + elif getattr(tp, "parent") != page: + raise ValueError("not a textpage of this page") + rlist = tp.search(text, quads=quads) + if textpage is None: + del tp + return rlist + + def set_artbox(self, rect): + """Set the ArtBox.""" + return self._set_pagebox("ArtBox", rect) + + def set_bleedbox(self, rect): + """Set the BleedBox.""" + return self._set_pagebox("BleedBox", rect) + + def set_contents(self, xref): + """Set object at 'xref' as the page's /Contents.""" + CheckParent(self) + doc = self.parent + if doc.is_closed: + raise ValueError("document closed") + if not doc.is_pdf: + raise ValueError("is no PDF") + if xref not in range(1, doc.xref_length()): + raise ValueError("bad xref") + if not doc.xref_is_stream(xref): + raise ValueError("xref is no stream") + doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref) + + def set_cropbox(self, rect): + """Set the CropBox. Will also change Page.rect.""" + return self._set_pagebox("CropBox", rect) + + def set_language(self, language=None): + """Set PDF page default language.""" + CheckParent(self) + pdfpage = _as_pdf_page(self.this) + if not language: + mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang')) + else: + lang = mupdf.fz_text_language_from_string(language) + assert hasattr(mupdf, 'fz_string_from_text_language2') + mupdf.pdf_dict_put_text_string( + pdfpage.obj, + PDF_NAME('Lang'), + mupdf.fz_string_from_text_language2(lang) + ) + + def set_mediabox(self, rect): + """Set the MediaBox.""" + CheckParent(self) + page = self._pdf_page() + mediabox = JM_rect_from_py(rect) + if (mupdf.fz_is_empty_rect(mediabox) + or mupdf.fz_is_infinite_rect(mediabox) + ): + raise ValueError( MSG_BAD_RECT) + mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox) + mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox')) + mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox')) + mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox')) + mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox')) + + def set_rotation(self, rotation): + """Set page rotation.""" + CheckParent(self) + page = _as_pdf_page(self.this) + rot = JM_norm_rotation(rotation) + mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot) + + def set_trimbox(self, rect): + """Set the TrimBox.""" + return self._set_pagebox("TrimBox", rect) + + def show_pdf_page( + page, + rect, + docsrc, + pno=0, + keep_proportion=True, + overlay=True, + oc=0, + rotate=0, + clip=None, + ) -> int: + """Show page number 'pno' of PDF 'docsrc' in rectangle 'rect'. + + Args: + rect: (rect-like) where to place the source image + docsrc: (document) source PDF + pno: (int) source page number + keep_proportion: (bool) do not change width-height-ratio + overlay: (bool) put in foreground + oc: (xref) make visibility dependent on this OCG / OCMD (which must be defined in the target PDF) + rotate: (int) degrees (multiple of 90) + clip: (rect-like) part of source page rectangle + Returns: + xref of inserted object (for reuse) + """ + def calc_matrix(sr, tr, keep=True, rotate=0): + """Calculate transformation matrix from source to target rect. + + Notes: + The product of four matrices in this sequence: (1) translate correct + source corner to origin, (2) rotate, (3) scale, (4) translate to + target's top-left corner. + Args: + sr: source rect in PDF (!) coordinate system + tr: target rect in PDF coordinate system + keep: whether to keep source ratio of width to height + rotate: rotation angle in degrees + Returns: + Transformation matrix. + """ + # calc center point of source rect + smp = (sr.tl + sr.br) / 2.0 + # calc center point of target rect + tmp = (tr.tl + tr.br) / 2.0 + + # m moves to (0, 0), then rotates + m = Matrix(1, 0, 0, 1, -smp.x, -smp.y) * Matrix(rotate) + + sr1 = sr * m # resulting source rect to calculate scale factors + + fw = tr.width / sr1.width # scale the width + fh = tr.height / sr1.height # scale the height + if keep: + fw = fh = min(fw, fh) # take min if keeping aspect ratio + + m *= Matrix(fw, fh) # concat scale matrix + m *= Matrix(1, 0, 0, 1, tmp.x, tmp.y) # concat move to target center + return JM_TUPLE(m) + + CheckParent(page) + doc = page.parent + + if not doc.is_pdf or not docsrc.is_pdf: + raise ValueError("is no PDF") + + if rect.is_empty or rect.is_infinite: + raise ValueError("rect must be finite and not empty") + + while pno < 0: # support negative page numbers + pno += docsrc.page_count + src_page = docsrc[pno] # load source page + + tar_rect = rect * ~page.transformation_matrix # target rect in PDF coordinates + + src_rect = src_page.rect if not clip else src_page.rect & clip # source rect + if src_rect.is_empty or src_rect.is_infinite: + raise ValueError("clip must be finite and not empty") + src_rect = src_rect * ~src_page.transformation_matrix # ... in PDF coord + + matrix = calc_matrix(src_rect, tar_rect, keep=keep_proportion, rotate=rotate) + + # list of existing /Form /XObjects + ilst = [i[1] for i in doc.get_page_xobjects(page.number)] + ilst += [i[7] for i in doc.get_page_images(page.number)] + ilst += [i[4] for i in doc.get_page_fonts(page.number)] + + # create a name not in that list + n = "fzFrm" + i = 0 + _imgname = n + "0" + while _imgname in ilst: + i += 1 + _imgname = n + str(i) + + isrc = docsrc._graft_id # used as key for graftmaps + if doc._graft_id == isrc: + raise ValueError("source document must not equal target") + + # retrieve / make Graftmap for source PDF + gmap = doc.Graftmaps.get(isrc, None) + if gmap is None: + gmap = Graftmap(doc) + doc.Graftmaps[isrc] = gmap + + # take note of generated xref for automatic reuse + pno_id = (isrc, pno) # id of docsrc[pno] + xref = doc.ShownPages.get(pno_id, 0) + + if overlay: + page.wrap_contents() # ensure a balanced graphics state + xref = page._show_pdf_page( + src_page, + overlay=overlay, + matrix=matrix, + xref=xref, + oc=oc, + clip=src_rect, + graftmap=gmap, + _imgname=_imgname, + ) + doc.ShownPages[pno_id] = xref + + return xref + + @property + def transformation_matrix(self): + """Page transformation matrix.""" + CheckParent(self) + + ctm = mupdf.FzMatrix() + page = self._pdf_page(required=False) + if not page.m_internal: + return JM_py_from_matrix(ctm) + mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL. + mupdf.pdf_page_transform(page, mediabox, ctm) + val = JM_py_from_matrix(ctm) + + if self.rotation % 360 == 0: + val = Matrix(val) + else: + val = Matrix(1, 0, 0, -1, 0, self.cropbox.height) + return val + + @property + def trimbox(self): + """The TrimBox""" + rect = self._other_box("TrimBox") + if rect is None: + return self.cropbox + mb = self.mediabox + return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) + + def update_link(page: 'Page', lnk: dict) -> None: + """Update a link on the current page.""" + CheckParent(page) + annot = utils.getLinkText(page, lnk) + if annot == "": + raise ValueError("link kind not supported") + + page.parent.update_object(lnk["xref"], annot, page=page) + + def widgets(self, types=None): + """ Generator over the widgets of a page. + + Args: + types: (list) field types to subselect from. If none, + all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT] + will only yield text fields. + """ + #for a in self.annot_xrefs(): + # log( '{a=}') + widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET] + #log(f'widgets(): {widget_xrefs=}') + for xref in widget_xrefs: + widget = self.load_widget(xref) + if types is None or widget.field_type in types: + yield (widget) + + def wrap_contents(self): + """Ensure page is in a balanced graphics state.""" + push, pop = self._count_q_balance() # count missing "q"/"Q" commands + if push > 0: # prepend required push commands + prepend = b"q\n" * push + TOOLS._insert_contents(self, prepend, False) + if pop > 0: # append required pop commands + append = b"\nQ" * pop + b"\n" + TOOLS._insert_contents(self, append, True) + + def write_text( + page: 'Page', + rect=None, + writers=None, + overlay=True, + color=None, + opacity=None, + keep_proportion=True, + rotate=0, + oc=0, + ) -> None: + """Write the text of one or more pymupdf.TextWriter objects. + + Args: + rect: target rectangle. If None, the union of the text writers is used. + writers: one or more pymupdf.TextWriter objects. + overlay: put in foreground or background. + keep_proportion: maintain aspect ratio of rectangle sides. + rotate: arbitrary rotation angle. + oc: the xref of an optional content object + """ + assert isinstance(page, Page) + if not writers: + raise ValueError("need at least one pymupdf.TextWriter") + if type(writers) is TextWriter: + if rotate == 0 and rect is None: + writers.write_text(page, opacity=opacity, color=color, overlay=overlay) + return None + else: + writers = (writers,) + clip = writers[0].text_rect + textdoc = Document() + tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height) + for writer in writers: + clip |= writer.text_rect + writer.write_text(tpage, opacity=opacity, color=color) + if rect is None: + rect = clip + page.show_pdf_page( + rect, + textdoc, + 0, + overlay=overlay, + keep_proportion=keep_proportion, + rotate=rotate, + clip=clip, + oc=oc, + ) + textdoc = None + tpage = None + + @property + def xref(self): + """PDF xref number of page.""" + CheckParent(self) + return self.parent.page_xref(self.number) + + rect = property(bound, doc="page rectangle") + + # any result of layout analysis is stored here + layout_information = None + + +class Pixmap: + + def __init__(self, *args): + """ + Pixmap(colorspace, irect, alpha) - empty pixmap. + Pixmap(colorspace, src) - copy changing colorspace. + Pixmap(src, width, height,[clip]) - scaled copy, float dimensions. + Pixmap(src, alpha=1) - copy and add or drop alpha channel. + Pixmap(filename) - from an image in a file. + Pixmap(image) - from an image in memory (bytes). + Pixmap(colorspace, width, height, samples, alpha) - from samples data. + Pixmap(PDFdoc, xref) - from an image at xref in a PDF document. + """ + # Cache for property `self.samples_mv`. Set here so __del_() sees it if + # we raise. + # + self._samples_mv = None + + # 2024-01-16: Experimental support for a memory-view of the underlying + # data. Doesn't seem to make much difference to Pixmap.set_pixel() so + # not currently used. + self._memory_view = None + + if 0: + pass + + elif args_match(args, + (Colorspace, mupdf.FzColorspace), + (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple) + ): + # create empty pixmap with colorspace and IRect + cs, rect = args + alpha = 0 + pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha) + self.this = pm + + elif args_match(args, + (Colorspace, mupdf.FzColorspace), + (mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple), + (int, bool) + ): + # create empty pixmap with colorspace and IRect + cs, rect, alpha = args + pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha) + self.this = pm + + elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)): + # copy pixmap, converting colorspace + cs, spix = args + if isinstance(cs, Colorspace): + cs = cs.this + elif cs is None: + cs = mupdf.FzColorspace(None) + if isinstance(spix, Pixmap): + spix = spix.this + if not mupdf.fz_pixmap_colorspace(spix).m_internal: + raise ValueError( "source colorspace must not be None") + + if cs.m_internal: + self.this = mupdf.fz_convert_pixmap( + spix, + cs, + mupdf.FzColorspace(), + mupdf.FzDefaultColorspaces(None), + mupdf.FzColorParams(), + 1 + ) + else: + self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix) + if not self.this.m_internal: + raise RuntimeError( MSG_PIX_NOALPHA) + + elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)): + # add mask to a pixmap w/o alpha channel + spix, mpix = args + if isinstance(spix, Pixmap): + spix = spix.this + if isinstance(mpix, Pixmap): + mpix = mpix.this + spm = spix + mpm = mpix + if not spix.m_internal: # intercept NULL for spix: make alpha only pix + dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm) + if not dst.m_internal: + raise RuntimeError( MSG_PIX_NOALPHA) + else: + dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm) + self.this = dst + + elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or + args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))): + # create pixmap as scaled copy of another one + if len(args) == 3: + spix, w, h = args + bbox = mupdf.FzIrect(mupdf.fz_infinite_irect) + else: + spix, w, h, clip = args + bbox = JM_irect_from_py(clip) + + src_pix = spix.this if isinstance(spix, Pixmap) else spix + if not mupdf.fz_is_infinite_irect(bbox): + pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox) + else: + pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect)) + self.this = pm + + elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw': + # Special raw construction where we set .this directly. + _, pm = args + if isinstance(pm, Pixmap): + pm = pm.this + self.this = pm + + elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)): + # Pixmap(struct Pixmap *spix, int alpha=1) + # copy pixmap & add / drop the alpha channel + spix = args[0] + alpha = args[1] if len(args) == 2 else 1 + src_pix = spix.this if isinstance(spix, Pixmap) else spix + if not _INRANGE(alpha, 0, 1): + raise ValueError( "bad alpha value") + cs = mupdf.fz_pixmap_colorspace(src_pix) + if not cs.m_internal and not alpha: + raise ValueError( "cannot drop alpha for 'NULL' colorspace") + seps = mupdf.FzSeparations() + n = mupdf.fz_pixmap_colorants(src_pix) + w = mupdf.fz_pixmap_width(src_pix) + h = mupdf.fz_pixmap_height(src_pix) + pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha) + pm.m_internal.x = src_pix.m_internal.x + pm.m_internal.y = src_pix.m_internal.y + pm.m_internal.xres = src_pix.m_internal.xres + pm.m_internal.yres = src_pix.m_internal.yres + + # copy samples data ------------------------------------------ + if 1: + # We use our pixmap_copy() to get best performance. + # test_pixmap.py:test_setalpha(): 3.9s t=0.0062 + extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n) + elif 1: + # Use memoryview. + # test_pixmap.py:test_setalpha(): 4.6 t=0.51 + src_view = mupdf.fz_pixmap_samples_memoryview( src_pix) + pm_view = mupdf.fz_pixmap_samples_memoryview( pm) + if src_pix.alpha() == pm.alpha(): # identical samples + #memcpy(tptr, sptr, w * h * (n + alpha)); + size = w * h * (n + alpha) + pm_view[ 0 : size] = src_view[ 0 : size] + else: + tptr = 0 + sptr = 0 + # This is a little faster than calling + # pm.fz_samples_set(), but still quite slow. E.g. reduces + # test_pixmap.py:test_setalpha() from 6.7s to 4.5s. + # + # t=0.53 + pm_stride = pm.stride() + pm_n = pm.n() + pm_alpha = pm.alpha() + src_stride = src_pix.stride() + src_n = src_pix.n() + #log( '{=pm_stride pm_n src_stride src_n}') + for y in range( h): + for x in range( w): + pm_i = pm_stride * y + pm_n * x + src_i = src_stride * y + src_n * x + pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n] + if pm_alpha: + pm_view[ pm_i + n] = 255 + else: + # Copy individual bytes from Python. Very slow. + # test_pixmap.py:test_setalpha(): 6.89 t=2.601 + if src_pix.alpha() == pm.alpha(): # identical samples + #memcpy(tptr, sptr, w * h * (n + alpha)); + for i in range(w * h * (n + alpha)): + mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i)) + else: + # t=2.56 + tptr = 0 + sptr = 0 + src_pix_alpha = src_pix.alpha() + for i in range(w * h): + #memcpy(tptr, sptr, n); + for j in range(n): + mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j)) + tptr += n + if pm.alpha(): + mupdf.fz_samples_set(pm, tptr, 255) + tptr += 1 + sptr += n + src_pix_alpha + self.this = pm + + elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)): + # create pixmap from samples data + cs, w, h, samples, alpha = args + if isinstance(cs, Colorspace): + cs = cs.this + assert isinstance(cs, mupdf.FzColorspace) + n = mupdf.fz_colorspace_n(cs) + stride = (n + alpha) * w + seps = mupdf.FzSeparations() + pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha) + + if isinstance( samples, (bytes, bytearray)): + #log('using mupdf.python_buffer_data()') + samples2 = mupdf.python_buffer_data(samples) + size = len(samples) + else: + res = JM_BufferFromBytes(samples) + if not res.m_internal: + raise ValueError( "bad samples data") + size, c = mupdf.fz_buffer_storage(res) + samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`. + if stride * h != size: + raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}") + mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2) + self.this = pm + + elif args_match(args, None): + # create pixmap from filename, file object, pathlib.Path or memory + imagedata, = args + name = 'name' + if hasattr(imagedata, "resolve"): + fname = imagedata.__str__() + if fname: + img = mupdf.fz_new_image_from_file(fname) + elif hasattr(imagedata, name): + fname = imagedata.name + if fname: + img = mupdf.fz_new_image_from_file(fname) + elif isinstance(imagedata, str): + img = mupdf.fz_new_image_from_file(imagedata) + else: + res = JM_BufferFromBytes(imagedata) + if not res.m_internal or not res.m_internal.len: + raise ValueError( "bad image data") + img = mupdf.fz_new_image_from_buffer(res) + + # Original code passed null for subarea and ctm, but that's not + # possible with MuPDF's python bindings. The equivalent is an + # infinite rect and identify matrix scaled by img.w() and img.h(). + pm, w, h = mupdf.fz_get_pixmap_from_image( + img, + mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT), + mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0), + ) + xres, yres = mupdf.fz_image_resolution(img) + pm.m_internal.xres = xres + pm.m_internal.yres = yres + self.this = pm + + elif args_match(args, (Document, mupdf.FzDocument), int): + # Create pixmap from PDF image identified by XREF number + doc, xref = args + pdf = _as_pdf_document(doc) + xreflen = mupdf.pdf_xref_len(pdf) + if not _INRANGE(xref, 1, xreflen-1): + raise ValueError( MSG_BAD_XREF) + ref = mupdf.pdf_new_indirect(pdf, xref, 0) + type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype')) + if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image')) + and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha')) + and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity')) + ): + raise ValueError( MSG_IS_NO_IMAGE) + img = mupdf.pdf_load_image(pdf, ref) + # Original code passed null for subarea and ctm, but that's not + # possible with MuPDF's python bindings. The equivalent is an + # infinite rect and identify matrix scaled by img.w() and img.h(). + pix, w, h = mupdf.fz_get_pixmap_from_image( + img, + mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT), + mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0), + ) + self.this = pix + + else: + text = 'Unrecognised args for constructing Pixmap:\n' + for arg in args: + text += f' {type(arg)}: {arg}\n' + raise Exception( text) + + def __len__(self): + return self.size + + def __repr__(self): + if not type(self) is Pixmap: return + if self.colorspace: + return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha) + else: + return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha) + + def _tobytes(self, format_, jpg_quality): + ''' + Pixmap._tobytes + ''' + pm = self.this + size = mupdf.fz_pixmap_stride(pm) * pm.h() + res = mupdf.fz_new_buffer(size) + out = mupdf.FzOutput(res) + if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm) + elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm) + elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm) + elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm) + elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm) + elif format_ == 7: + mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0) + else: + mupdf.fz_write_pixmap_as_png(out, pm) + out.fz_close_output() + barray = JM_BinFromBuffer(res) + return barray + + def _writeIMG(self, filename, format_, jpg_quality): + pm = self.this + if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename) + elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename) + elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename) + elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename) + elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename) + elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality) + else: mupdf.fz_save_pixmap_as_png(pm, filename) + + @property + def alpha(self): + """Indicates presence of alpha channel.""" + return mupdf.fz_pixmap_alpha(self.this) + + def clear_with(self, value=None, bbox=None): + """Fill all color components with same value.""" + if value is None: + mupdf.fz_clear_pixmap(self.this) + elif bbox is None: + mupdf.fz_clear_pixmap_with_value(self.this, value) + else: + JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox)) + + def color_count(self, colors=0, clip=None): + ''' + Return count of each color. + ''' + pm = self.this + rc = JM_color_count( pm, clip) + if not colors: + return len( rc) + return rc + + def color_topusage(self, clip=None): + """Return most frequent color and its usage ratio.""" + allpixels = 0 + cnt = 0 + if clip is not None and self.irect in Rect(clip): + clip = self.irect + for pixel, count in self.color_count(colors=True,clip=clip).items(): + allpixels += count + if count > cnt: + cnt = count + maxpixel = pixel + if not allpixels: + return (1, bytes([255] * self.n)) + return (cnt / allpixels, maxpixel) + + @property + def colorspace(self): + """Pixmap Colorspace.""" + cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this)) + if cs.name == "None": + return None + return cs + + def copy(self, src, bbox): + """Copy bbox from another Pixmap.""" + pm = self.this + src_pix = src.this + if not mupdf.fz_pixmap_colorspace(src_pix): + raise ValueError( "cannot copy pixmap with NULL colorspace") + if pm.alpha() != src_pix.alpha(): + raise ValueError( "source and target alpha must be equal") + mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None)) + + @property + def digest(self): + """MD5 digest of pixmap (bytes).""" + ret = mupdf.fz_md5_pixmap2(self.this) + return bytes(ret) + + def gamma_with(self, gamma): + """Apply correction with some float. + gamma=1 is a no-op.""" + if not mupdf.fz_pixmap_colorspace( self.this): + message_warning("colorspace invalid for function") + return + mupdf.fz_gamma_pixmap( self.this, gamma) + + @property + def h(self): + """The height.""" + return mupdf.fz_pixmap_height(self.this) + + def invert_irect(self, bbox=None): + """Invert the colors inside a bbox.""" + pm = self.this + if not mupdf.fz_pixmap_colorspace(pm).m_internal: + message_warning("ignored for stencil pixmap") + return False + r = JM_irect_from_py(bbox) + if mupdf.fz_is_infinite_irect(r): + mupdf.fz_invert_pixmap(pm) + return True + mupdf.fz_invert_pixmap_rect(pm, r) + return True + + @property + def irect(self): + """Pixmap bbox - an IRect object.""" + val = mupdf.fz_pixmap_bbox(self.this) + return JM_py_from_irect( val) + + @property + def is_monochrome(self): + """Check if pixmap is monochrome.""" + return mupdf.fz_is_pixmap_monochrome( self.this) + + @property + def is_unicolor(self): + ''' + Check if pixmap has only one color. + ''' + pm = self.this + n = pm.n() + count = pm.w() * pm.h() * n + def _pixmap_read_samples(pm, offset, n): + ret = list() + for i in range(n): + ret.append(mupdf.fz_samples_get(pm, offset+i)) + return ret + for offset in range( 0, count, n): + if offset == 0: + sample0 = _pixmap_read_samples( pm, 0, n) + else: + sample = _pixmap_read_samples( pm, offset, n) + if sample != sample0: + return False + return True + + @property + def n(self): + """The size of one pixel.""" + if g_use_extra: + # Setting self.__class__.n gives a small reduction in overhead of + # test_general.py:test_2093, e.g. 1.4x -> 1.3x. + #return extra.pixmap_n(self.this) + def n2(self): + return extra.pixmap_n(self.this) + self.__class__.n = property(n2) + return self.n + return mupdf.fz_pixmap_components(self.this) + + def pdfocr_save(self, filename, compress=1, language=None, tessdata=None): + ''' + Save pixmap as an OCR-ed PDF page. + ''' + tessdata = get_tessdata(tessdata) + opts = mupdf.FzPdfocrOptions() + opts.compress = compress + if language: + opts.language_set2( language) + if tessdata: + opts.datadir_set2( tessdata) + pix = self.this + if isinstance(filename, str): + mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts) + else: + out = JM_new_output_fileptr( filename) + try: + mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts) + finally: + out.fz_close_output() # Avoid MuPDF warning. + + def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None): + """Save pixmap as an OCR-ed PDF page. + + Args: + compress: (bool) compress, default 1 (True). + language: (str) language(s) occurring on page, default "eng" (English), + multiples like "eng+ger" for English and German. + tessdata: (str) folder name of Tesseract's language support. If None + we use environment variable TESSDATA_PREFIX or search for + Tesseract installation. + Notes: + On failure, make sure Tesseract is installed and you have set + or environment variable "TESSDATA_PREFIX" to the folder + containing your Tesseract's language support data. + """ + tessdata = get_tessdata(tessdata) + from io import BytesIO + bio = BytesIO() + self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata) + return bio.getvalue() + + def pil_image(self): + """Create a Pillow Image from the Pixmap.""" + try: + from PIL import Image + except ImportError: + message("PIL/Pillow not installed") + raise + + cspace = self.colorspace + if not cspace: + mode = "L" + elif cspace.n == 1: + mode = "L" if not self.alpha else "LA" + elif cspace.n == 3: + mode = "RGB" if not self.alpha else "RGBA" + else: + mode = "CMYK" + + img = Image.frombytes(mode, (self.width, self.height), self.samples) + return img + + def pil_save(self, *args, **kwargs): + """Write to image file using Pillow. + + An intermediate PIL Image is created, and its "save" method is used + to store the image. See Pillow documentation to learn about the + meaning of possible positional and keyword parameters. + Use this when other output formats are desired. + """ + img = self.pil_image() + + if "dpi" not in kwargs.keys(): + kwargs["dpi"] = (self.xres, self.yres) + + img.save(*args, **kwargs) + + def pil_tobytes(self, *args, **kwargs): + """Convert to an image in memory using Pillow. + + An intermediate PIL Image is created, and its "save" method is used + to store the image. See Pillow documentation to learn about the + meaning of possible positional or keyword parameters. + Use this when other output formats are desired. + """ + bytes_out = io.BytesIO() + img = self.pil_image() + + if "dpi" not in kwargs.keys(): + kwargs["dpi"] = (self.xres, self.yres) + + img.save(bytes_out, *args, **kwargs) + return bytes_out.getvalue() + + def pixel(self, x, y): + """Get color tuple of pixel (x, y). + Last item is the alpha if Pixmap.alpha is true.""" + if g_use_extra: + return extra.pixmap_pixel(self.this.m_internal, x, y) + if (0 + or x < 0 + or x >= self.this.m_internal.w + or y < 0 + or y >= self.this.m_internal.h + ): + RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError) + n = self.this.m_internal.n + stride = self.this.m_internal.stride + i = stride * y + n * x + ret = tuple( self.samples_mv[ i: i+n]) + return ret + + @property + def samples(self)->bytes: + mv = self.samples_mv + return bytes( mv) + + @property + def samples_mv(self): + ''' + Pixmap samples memoryview. + ''' + # We remember the returned memoryview so that our `__del__()` can + # release it; otherwise accessing it after we have been destructed will + # fail, possibly crashing Python; this is #4155. + # + if self._samples_mv is None: + self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this) + return self._samples_mv + + def _samples_mv_release(self): + if self._samples_mv: + self._samples_mv.release() + + @property + def samples_ptr(self): + return mupdf.fz_pixmap_samples_int(self.this) + + def save(self, filename, output=None, jpg_quality=95): + """Output as image in format determined by filename extension. + + Args: + output: (str) only use to overrule filename extension. Default is PNG. + Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS. + """ + valid_formats = { + "png": 1, + "pnm": 2, + "pgm": 2, + "ppm": 2, + "pbm": 2, + "pam": 3, + "psd": 5, + "ps": 6, + "jpg": 7, + "jpeg": 7, + } + + if type(filename) is str: + pass + elif hasattr(filename, "absolute"): + filename = str(filename) + elif hasattr(filename, "name"): + filename = filename.name + if output is None: + _, ext = os.path.splitext(filename) + output = ext[1:] + + idx = valid_formats.get(output.lower(), None) + if idx is None: + raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}") + if self.alpha and idx in (2, 6, 7): + raise ValueError("'%s' cannot have alpha" % output) + if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4): + raise ValueError(f"unsupported colorspace for '{output}'") + if idx == 7: + self.set_dpi(self.xres, self.yres) + return self._writeIMG(filename, idx, jpg_quality) + + def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None): + """Set alpha channel to values contained in a byte array. + If omitted, set alphas to 255. + + Args: + alphavalues: (bytes) with length (width * height) or 'None'. + premultiply: (bool, True) premultiply colors with alpha values. + opaque: (tuple, length colorspace.n) this color receives opacity 0. + matte: (tuple, length colorspace.n)) preblending background color. + """ + pix = self.this + alpha = 0 + m = 0 + if pix.alpha() == 0: + raise ValueError( MSG_PIX_NOALPHA) + n = mupdf.fz_pixmap_colorants(pix) + w = mupdf.fz_pixmap_width(pix) + h = mupdf.fz_pixmap_height(pix) + balen = w * h * (n+1) + colors = [0, 0, 0, 0] # make this color opaque + bgcolor = [0, 0, 0, 0] # preblending background color + zero_out = 0 + bground = 0 + if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n: + for i in range(n): + colors[i] = opaque[i] + zero_out = 1 + if matte and isinstance( matte, (tuple, list)) and len(matte) == n: + for i in range(n): + bgcolor[i] = matte[i] + bground = 1 + data = bytes() + data_len = 0 + if alphavalues: + #res = JM_BufferFromBytes(alphavalues) + #data_len, data = mupdf.fz_buffer_storage(res) + #if data_len < w * h: + # THROWMSG("bad alpha values") + # fixme: don't seem to need to create an fz_buffer - can + # use directly? + if isinstance(alphavalues, (bytes, bytearray)): + data = alphavalues + data_len = len(alphavalues) + else: + assert 0, f'unexpected type for alphavalues: {type(alphavalues)}' + if data_len < w * h: + raise ValueError( "bad alpha values") + if 1: + # Use C implementation for speed. + mupdf.Pixmap_set_alpha_helper( + balen, + n, + data_len, + zero_out, + mupdf.python_buffer_data( data), + pix.m_internal, + premultiply, + bground, + colors, + bgcolor, + ) + else: + i = k = j = 0 + data_fix = 255 + while i < balen: + alpha = data[k] + if zero_out: + for j in range(i, i+n): + if mupdf.fz_samples_get(pix, j) != colors[j - i]: + data_fix = 255 + break + else: + data_fix = 0 + if data_len: + def fz_mul255( a, b): + x = a * b + 128 + x += x // 256 + return x // 256 + + if data_fix == 0: + mupdf.fz_samples_set(pix, i+n, 0) + else: + mupdf.fz_samples_set(pix, i+n, alpha) + if premultiply and not bground: + for j in range(i, i+n): + mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha)) + elif bground: + for j in range( i, i+n): + m = bgcolor[j - i] + mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha)) + else: + mupdf.fz_samples_set(pix, i+n, data_fix) + i += n+1 + k += 1 + + def tobytes(self, output="png", jpg_quality=95): + ''' + Convert to binary image stream of desired type. + ''' + valid_formats = { + "png": 1, + "pnm": 2, + "pgm": 2, + "ppm": 2, + "pbm": 2, + "pam": 3, + "tga": 4, + "tpic": 4, + "psd": 5, + "ps": 6, + 'jpg': 7, + 'jpeg': 7, + } + idx = valid_formats.get(output.lower(), None) + if idx is None: + raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}") + if self.alpha and idx in (2, 6, 7): + raise ValueError("'{output}' cannot have alpha") + if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4): + raise ValueError(f"unsupported colorspace for '{output}'") + if idx == 7: + self.set_dpi(self.xres, self.yres) + barray = self._tobytes(idx, jpg_quality) + return barray + + def set_dpi(self, xres, yres): + """Set resolution in both dimensions.""" + pm = self.this + pm.m_internal.xres = xres + pm.m_internal.yres = yres + + def set_origin(self, x, y): + """Set top-left coordinates.""" + pm = self.this + pm.m_internal.x = x + pm.m_internal.y = y + + def set_pixel(self, x, y, color): + """Set color of pixel (x, y).""" + if g_use_extra: + return extra.set_pixel(self.this.m_internal, x, y, color) + pm = self.this + if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1): + raise ValueError( MSG_PIXEL_OUTSIDE) + n = pm.n() + for j in range(n): + i = color[j] + if not _INRANGE(i, 0, 255): + raise ValueError( MSG_BAD_COLOR_SEQ) + stride = mupdf.fz_pixmap_stride( pm) + i = stride * y + n * x + if 0: + # Using a cached self._memory_view doesn't actually make much + # difference to speed. + if not self._memory_view: + self._memory_view = self.samples_mv + for j in range(n): + self._memory_view[i + j] = color[j] + else: + for j in range(n): + pm.fz_samples_set(i + j, color[j]) + + def set_rect(self, bbox, color): + """Set color of all pixels in bbox.""" + pm = self.this + n = pm.n() + c = [] + for j in range(n): + i = color[j] + if not _INRANGE(i, 0, 255): + raise ValueError( MSG_BAD_COLOR_SEQ) + c.append(i) + bbox = JM_irect_from_py(bbox) + i = JM_fill_pixmap_rect_with_color(pm, c, bbox) + rc = bool(i) + return rc + + def shrink(self, factor): + """Divide width and height by 2**factor. + E.g. factor=1 shrinks to 25% of original size (in place).""" + if factor < 1: + message_warning("ignoring shrink factor < 1") + return + mupdf.fz_subsample_pixmap( self.this, factor) + # Pixmap has changed so clear our memory view. + self._memory_view = None + self._samples_mv_release() + + @property + def size(self): + """Pixmap size.""" + return mupdf.fz_pixmap_size( self.this) + + @property + def stride(self): + """Length of one image line (width * n).""" + return self.this.stride() + + def tint_with(self, black, white): + """Tint colors with modifiers for black and white.""" + if not self.colorspace or self.colorspace.n > 3: + message("warning: colorspace invalid for function") + return + return mupdf.fz_tint_pixmap( self.this, black, white) + + @property + def w(self): + """The width.""" + return mupdf.fz_pixmap_width(self.this) + + def warp(self, quad, width, height): + """Return pixmap from a warped quad.""" + if not quad.is_convex: raise ValueError("quad must be convex") + q = JM_quad_from_py(quad) + points = [ q.ul, q.ur, q.lr, q.ll] + dst = mupdf.fz_warp_pixmap( self.this, points, width, height) + return Pixmap( dst) + + @property + def x(self): + """x component of Pixmap origin.""" + return mupdf.fz_pixmap_x(self.this) + + @property + def xres(self): + """Resolution in x direction.""" + return self.this.xres() + + @property + def y(self): + """y component of Pixmap origin.""" + return mupdf.fz_pixmap_y(self.this) + + @property + def yres(self): + """Resolution in y direction.""" + return self.this.yres() + + width = w + height = h + + def __del__(self): + if self._samples_mv: + self._samples_mv.release() + + +del Point +class Point: + + def __abs__(self): + return math.sqrt(self.x * self.x + self.y * self.y) + + def __add__(self, p): + if hasattr(p, "__float__"): + return Point(self.x + p, self.y + p) + if len(p) != 2: + raise ValueError("Point: bad seq len") + return Point(self.x + p[0], self.y + p[1]) + + def __bool__(self): + return not (max(self) == min(self) == 0) + + def __eq__(self, p): + if not hasattr(p, "__len__"): + return False + return len(p) == 2 and not (self - p) + + def __getitem__(self, i): + return (self.x, self.y)[i] + + def __hash__(self): + return hash(tuple(self)) + + def __init__(self, *args, x=None, y=None): + ''' + Point() - all zeros + Point(x, y) + Point(Point) - new copy + Point(sequence) - from 'sequence' + + Explicit keyword args x, y override earlier settings if not None. + ''' + if not args: + self.x = 0.0 + self.y = 0.0 + elif len(args) > 2: + raise ValueError("Point: bad seq len") + elif len(args) == 2: + self.x = float(args[0]) + self.y = float(args[1]) + elif len(args) == 1: + l = args[0] + if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)): + self.x = l.x + self.y = l.y + else: + if not hasattr(l, "__getitem__"): + raise ValueError("Point: bad args") + if len(l) != 2: + raise ValueError("Point: bad seq len") + self.x = float(l[0]) + self.y = float(l[1]) + else: + raise ValueError("Point: bad seq len") + if x is not None: self.x = x + if y is not None: self.y = y + + def __len__(self): + return 2 + + def __mul__(self, m): + if hasattr(m, "__float__"): + return Point(self.x * m, self.y * m) + if hasattr(m, "__getitem__") and len(m) == 2: + # dot product + return self.x * m[0] + self.y * m[1] + p = Point(self) + return p.transform(m) + + def __neg__(self): + return Point(-self.x, -self.y) + + def __nonzero__(self): + return not (max(self) == min(self) == 0) + + def __pos__(self): + return Point(self) + + def __repr__(self): + return "Point" + str(tuple(self)) + + def __setitem__(self, i, v): + v = float(v) + if i == 0: self.x = v + elif i == 1: self.y = v + else: + raise IndexError("index out of range") + return None + + def __sub__(self, p): + if hasattr(p, "__float__"): + return Point(self.x - p, self.y - p) + if len(p) != 2: + raise ValueError("Point: bad seq len") + return Point(self.x - p[0], self.y - p[1]) + + def __truediv__(self, m): + if hasattr(m, "__float__"): + return Point(self.x * 1./m, self.y * 1./m) + m1 = util_invert_matrix(m)[1] + if not m1: + raise ZeroDivisionError("matrix not invertible") + p = Point(self) + return p.transform(m1) + + @property + def abs_unit(self): + """Unit vector with positive coordinates.""" + s = self.x * self.x + self.y * self.y + if s < EPSILON: + return Point(0,0) + s = math.sqrt(s) + return Point(abs(self.x) / s, abs(self.y) / s) + + def distance_to(self, *args): + """Return distance to rectangle or another point.""" + if not len(args) > 0: + raise ValueError("at least one parameter must be given") + + x = args[0] + if len(x) == 2: + x = Point(x) + elif len(x) == 4: + x = Rect(x) + else: + raise ValueError("arg1 must be point-like or rect-like") + + if len(args) > 1: + unit = args[1] + else: + unit = "px" + u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.), + "mm": (25.4, 72.)} + f = u[unit][0] / u[unit][1] + + if type(x) is Point: + return abs(self - x) * f + + # from here on, x is a rectangle + # as a safeguard, make a finite copy of it + r = Rect(x.top_left, x.top_left) + r = r | x.bottom_right + if self in r: + return 0.0 + if self.x > r.x1: + if self.y >= r.y1: + return self.distance_to(r.bottom_right, unit) + elif self.y <= r.y0: + return self.distance_to(r.top_right, unit) + else: + return (self.x - r.x1) * f + elif r.x0 <= self.x <= r.x1: + if self.y >= r.y1: + return (self.y - r.y1) * f + else: + return (r.y0 - self.y) * f + else: + if self.y >= r.y1: + return self.distance_to(r.bottom_left, unit) + elif self.y <= r.y0: + return self.distance_to(r.top_left, unit) + else: + return (r.x0 - self.x) * f + + def transform(self, m): + """Replace point by its transformation with matrix-like m.""" + if len(m) != 6: + raise ValueError("Matrix: bad seq len") + self.x, self.y = util_transform_point(self, m) + return self + + @property + def unit(self): + """Unit vector of the point.""" + s = self.x * self.x + self.y * self.y + if s < EPSILON: + return Point(0,0) + s = math.sqrt(s) + return Point(self.x / s, self.y / s) + + __div__ = __truediv__ + norm = __abs__ + + +class Quad: + + def __abs__(self): + if self.is_empty: + return 0.0 + return abs(self.ul - self.ur) * abs(self.ul - self.ll) + + def __add__(self, q): + if hasattr(q, "__float__"): + return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q) + if len(q) != 4: + raise ValueError("Quad: bad seq len") + return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3]) + + def __bool__(self): + return not self.is_empty + + def __contains__(self, x): + try: + l = x.__len__() + except Exception: + if g_exceptions_verbose > 1: exception_info() + return False + if l == 2: + return util_point_in_quad(x, self) + if l != 4: + return False + if CheckRect(x): + if Rect(x).is_empty: + return True + return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self) + if CheckQuad(x): + for i in range(4): + if not util_point_in_quad(x[i], self): + return False + return True + return False + + def __eq__(self, quad): + if not hasattr(quad, "__len__"): + return False + return len(quad) == 4 and ( + self.ul == quad[0] and + self.ur == quad[1] and + self.ll == quad[2] and + self.lr == quad[3] + ) + + def __getitem__(self, i): + return (self.ul, self.ur, self.ll, self.lr)[i] + + def __hash__(self): + return hash(tuple(self)) + + def __init__(self, *args, ul=None, ur=None, ll=None, lr=None): + ''' + Quad() - all zero points + Quad(ul, ur, ll, lr) + Quad(quad) - new copy + Quad(sequence) - from 'sequence' + + Explicit keyword args ul, ur, ll, lr override earlier settings if not + None. + + ''' + if not args: + self.ul = self.ur = self.ll = self.lr = Point() + elif len(args) > 4: + raise ValueError("Quad: bad seq len") + elif len(args) == 4: + self.ul, self.ur, self.ll, self.lr = map(Point, args) + elif len(args) == 1: + l = args[0] + if isinstance(l, mupdf.FzQuad): + self.this = l + self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr) + elif not hasattr(l, "__getitem__"): + raise ValueError("Quad: bad args") + elif len(l) != 4: + raise ValueError("Quad: bad seq len") + else: + self.ul, self.ur, self.ll, self.lr = map(Point, l) + else: + raise ValueError("Quad: bad args") + if ul is not None: self.ul = Point(ul) + if ur is not None: self.ur = Point(ur) + if ll is not None: self.ll = Point(ll) + if lr is not None: self.lr = Point(lr) + + def __len__(self): + return 4 + + def __mul__(self, m): + q = Quad(self) + q = q.transform(m) + return q + + def __neg__(self): + return Quad(-self.ul, -self.ur, -self.ll, -self.lr) + + def __nonzero__(self): + return not self.is_empty + + def __pos__(self): + return Quad(self) + + def __repr__(self): + return "Quad" + str(tuple(self)) + + def __setitem__(self, i, v): + if i == 0: self.ul = Point(v) + elif i == 1: self.ur = Point(v) + elif i == 2: self.ll = Point(v) + elif i == 3: self.lr = Point(v) + else: + raise IndexError("index out of range") + return None + + def __sub__(self, q): + if hasattr(q, "__float__"): + return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q) + if len(q) != 4: + raise ValueError("Quad: bad seq len") + return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3]) + + def __truediv__(self, m): + if hasattr(m, "__float__"): + im = 1. / m + else: + im = util_invert_matrix(m)[1] + if not im: + raise ZeroDivisionError("Matrix not invertible") + q = Quad(self) + q = q.transform(im) + return q + + @property + def is_convex(self): + """Check if quad is convex and not degenerate. + + Notes: + Check that for the two diagonals, the other two corners are not + on the same side of the diagonal. + Returns: + True or False. + """ + m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis + p1 = self.ll * m # transform the + p2 = self.ur * m # other two points + if p1.y * p2.y > 0: + return False + m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis + p1 = self.lr * m # transform the + p2 = self.ul * m # remaining points + if p1.y * p2.y > 0: + return False + return True + + @property + def is_empty(self): + """Check whether all quad corners are on the same line. + + This is the case if width or height is zero. + """ + return self.width < EPSILON or self.height < EPSILON + + @property + def is_infinite(self): + """Check whether this is the infinite quad.""" + return self.rect.is_infinite + + @property + def is_rectangular(self): + """Check if quad is rectangular. + + Notes: + Some rotation matrix can thus transform it into a rectangle. + This is equivalent to three corners enclose 90 degrees. + Returns: + True or False. + """ + + sine = util_sine_between(self.ul, self.ur, self.lr) + if abs(sine - 1) > EPSILON: # the sine of the angle + return False + + sine = util_sine_between(self.ur, self.lr, self.ll) + if abs(sine - 1) > EPSILON: + return False + + sine = util_sine_between(self.lr, self.ll, self.ul) + if abs(sine - 1) > EPSILON: + return False + + return True + + def morph(self, p, m): + """Morph the quad with matrix-like 'm' and point-like 'p'. + + Return a new quad.""" + if self.is_infinite: + return INFINITE_QUAD() + delta = Matrix(1, 1).pretranslate(p.x, p.y) + q = self * ~delta * m * delta + return q + + @property + def rect(self): + r = Rect() + r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x) + r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y) + r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x) + r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y) + return r + + def transform(self, m): + """Replace quad by its transformation with matrix m.""" + if hasattr(m, "__float__"): + pass + elif len(m) != 6: + raise ValueError("Matrix: bad seq len") + self.ul *= m + self.ur *= m + self.ll *= m + self.lr *= m + return self + + __div__ = __truediv__ + width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr))) + height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr))) + + +class Rect: + + def __abs__(self): + if self.is_empty or self.is_infinite: + return 0.0 + return (self.x1 - self.x0) * (self.y1 - self.y0) + + def __add__(self, p): + if hasattr(p, "__float__"): + return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p) + if len(p) != 4: + raise ValueError("Rect: bad seq len") + return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3]) + + def __and__(self, x): + if not hasattr(x, "__len__"): + raise ValueError("bad operand 2") + + r1 = Rect(x) + r = Rect(self) + return r.intersect(r1) + + def __bool__(self): + return not (max(self) == min(self) == 0) + + def __contains__(self, x): + if hasattr(x, "__float__"): + return x in tuple(self) + l = len(x) + if l == 2: + return util_is_point_in_rect(x, self) + if l == 4: + r = INFINITE_RECT() + try: + r = Rect(x) + except Exception: + if g_exceptions_verbose > 1: exception_info() + r = Quad(x).rect + return (self.x0 <= r.x0 <= r.x1 <= self.x1 and + self.y0 <= r.y0 <= r.y1 <= self.y1) + return False + + def __eq__(self, rect): + if not hasattr(rect, "__len__"): + return False + return len(rect) == 4 and not (self - rect) + + def __getitem__(self, i): + return (self.x0, self.y0, self.x1, self.y1)[i] + + def __hash__(self): + return hash(tuple(self)) + + def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): + """ + Rect() - all zeros + Rect(x0, y0, x1, y1) + Rect(top-left, x1, y1) + Rect(x0, y0, bottom-right) + Rect(top-left, bottom-right) + Rect(Rect or IRect) - new copy + Rect(sequence) - from 'sequence' + + Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings + if not None. + """ + x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1) + self.x0 = float( x0) + self.y0 = float( y0) + self.x1 = float( x1) + self.y1 = float( y1) + + def __len__(self): + return 4 + + def __mul__(self, m): + if hasattr(m, "__float__"): + return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m) + r = Rect(self) + r = r.transform(m) + return r + + def __neg__(self): + return Rect(-self.x0, -self.y0, -self.x1, -self.y1) + + def __nonzero__(self): + return not (max(self) == min(self) == 0) + + def __or__(self, x): + if not hasattr(x, "__len__"): + raise ValueError("bad operand 2") + r = Rect(self) + if len(x) == 2: + return r.include_point(x) + if len(x) == 4: + return r.include_rect(x) + raise ValueError("bad operand 2") + + def __pos__(self): + return Rect(self) + + def __repr__(self): + return "Rect" + str(tuple(self)) + + def __setitem__(self, i, v): + v = float(v) + if i == 0: self.x0 = v + elif i == 1: self.y0 = v + elif i == 2: self.x1 = v + elif i == 3: self.y1 = v + else: + raise IndexError("index out of range") + return None + + def __sub__(self, p): + if hasattr(p, "__float__"): + return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p) + if len(p) != 4: + raise ValueError("Rect: bad seq len") + return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3]) + + def __truediv__(self, m): + if hasattr(m, "__float__"): + return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m) + im = util_invert_matrix(m)[1] + if not im: + raise ZeroDivisionError(f"Matrix not invertible: {m}") + r = Rect(self) + r = r.transform(im) + return r + + @property + def bottom_left(self): + """Bottom-left corner.""" + return Point(self.x0, self.y1) + + @property + def bottom_right(self): + """Bottom-right corner.""" + return Point(self.x1, self.y1) + + def contains(self, x): + """Check if containing point-like or rect-like x.""" + return self.__contains__(x) + + @property + def height(self): + return max(0, self.y1 - self.y0) + + def get_area(self, *args) -> float: + """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'.""" + return _rect_area(self.width, self.height, args) + + def include_point(self, p): + """Extend to include point-like p.""" + if len(p) != 2: + raise ValueError("Point: bad seq len") + self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p) + return self + + def include_rect(self, r): + """Extend to include rect-like r.""" + if len(r) != 4: + raise ValueError("Rect: bad seq len") + r = Rect(r) + if r.is_infinite or self.is_infinite: + self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT + elif r.is_empty: + return self + elif self.is_empty: + self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1 + else: + self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r) + return self + + def intersect(self, r): + """Restrict to common rect with rect-like r.""" + if not len(r) == 4: + raise ValueError("Rect: bad seq len") + r = Rect(r) + if r.is_infinite: + return self + elif self.is_infinite: + self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1 + elif r.is_empty: + self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1 + elif self.is_empty: + return self + else: + self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r) + return self + + def intersects(self, x): + """Check if intersection with rectangle x is not empty.""" + rect2 = Rect(x) + return (1 + and not self.is_empty + and not self.is_infinite + and not rect2.is_empty + and not rect2.is_infinite + and self.x0 < rect2.x1 + and rect2.x0 < self.x1 + and self.y0 < rect2.y1 + and rect2.y0 < self.y1 + ) + + @property + def is_empty(self): + """True if rectangle area is empty.""" + return self.x0 >= self.x1 or self.y0 >= self.y1 + + @property + def is_infinite(self): + """True if this is the infinite rectangle.""" + return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT + + @property + def is_valid(self): + """True if rectangle is valid.""" + return self.x0 <= self.x1 and self.y0 <= self.y1 + + def morph(self, p, m): + """Morph with matrix-like m and point-like p. + + Returns a new quad.""" + if self.is_infinite: + return INFINITE_QUAD() + return self.quad.morph(p, m) + + def norm(self): + return math.sqrt(sum([c*c for c in self])) + + def normalize(self): + """Replace rectangle with its finite version.""" + if self.x1 < self.x0: + self.x0, self.x1 = self.x1, self.x0 + if self.y1 < self.y0: + self.y0, self.y1 = self.y1, self.y0 + return self + + @property + def quad(self): + """Return Quad version of rectangle.""" + return Quad(self.tl, self.tr, self.bl, self.br) + + def round(self): + """Return the IRect.""" + return IRect(util_round_rect(self)) + + @property + def top_left(self): + """Top-left corner.""" + return Point(self.x0, self.y0) + + @property + def top_right(self): + """Top-right corner.""" + return Point(self.x1, self.y0) + + def torect(self, r): + """Return matrix that converts to target rect.""" + + r = Rect(r) + if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty: + raise ValueError("rectangles must be finite and not empty") + return ( + Matrix(1, 0, 0, 1, -self.x0, -self.y0) + * Matrix(r.width / self.width, r.height / self.height) + * Matrix(1, 0, 0, 1, r.x0, r.y0) + ) + + def transform(self, m): + """Replace with the transformation by matrix-like m.""" + if not len(m) == 6: + raise ValueError("Matrix: bad seq len") + self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m) + return self + + @property + def width(self): + return max(0, self.x1 - self.x0) + + __div__ = __truediv__ + + bl = bottom_left + br = bottom_right + irect = property(round) + tl = top_left + tr = top_right + + +class Shape: + """Create a new shape.""" + + @staticmethod + def horizontal_angle(C, P): + """Return the angle to the horizontal for the connection from C to P. + This uses the arcus sine function and resolves its inherent ambiguity by + looking up in which quadrant vector S = P - C is located. + """ + S = Point(P - C).unit # unit vector 'C' -> 'P' + alfa = math.asin(abs(S.y)) # absolute angle from horizontal + if S.x < 0: # make arcsin result unique + if S.y <= 0: # bottom-left + alfa = -(math.pi - alfa) + else: # top-left + alfa = math.pi - alfa + else: + if S.y >= 0: # top-right + pass + else: # bottom-right + alfa = -alfa + return alfa + + def __init__(self, page: Page): + CheckParent(page) + self.page = page + self.doc = page.parent + if not self.doc.is_pdf: + raise ValueError("is no PDF") + self.height = page.mediabox_size.y + self.width = page.mediabox_size.x + self.x = page.cropbox_position.x + self.y = page.cropbox_position.y + + self.pctm = page.transformation_matrix # page transf. matrix + self.ipctm = ~self.pctm # inverted transf. matrix + + self.draw_cont = "" + self.text_cont = "" + self.totalcont = "" + self.last_point = None + self.rect = None + + def updateRect(self, x): + if self.rect is None: + if len(x) == 2: + self.rect = Rect(x, x) + else: + self.rect = Rect(x) + + else: + if len(x) == 2: + x = Point(x) + self.rect.x0 = min(self.rect.x0, x.x) + self.rect.y0 = min(self.rect.y0, x.y) + self.rect.x1 = max(self.rect.x1, x.x) + self.rect.y1 = max(self.rect.y1, x.y) + else: + x = Rect(x) + self.rect.x0 = min(self.rect.x0, x.x0) + self.rect.y0 = min(self.rect.y0, x.y0) + self.rect.x1 = max(self.rect.x1, x.x1) + self.rect.y1 = max(self.rect.y1, x.y1) + + def draw_line(self, p1: point_like, p2: point_like) -> Point: + """Draw a line between two points.""" + p1 = Point(p1) + p2 = Point(p2) + if not (self.last_point == p1): + self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n" + self.last_point = p1 + self.updateRect(p1) + + self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n" + self.updateRect(p2) + self.last_point = p2 + return self.last_point + + def draw_polyline(self, points: list) -> Point: + """Draw several connected line segments.""" + for i, p in enumerate(points): + if i == 0: + if not (self.last_point == Point(p)): + self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n" + self.last_point = Point(p) + else: + self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n" + self.updateRect(p) + + self.last_point = Point(points[-1]) + return self.last_point + + def draw_bezier( + self, + p1: point_like, + p2: point_like, + p3: point_like, + p4: point_like, + ) -> Point: + """Draw a standard cubic Bezier curve.""" + p1 = Point(p1) + p2 = Point(p2) + p3 = Point(p3) + p4 = Point(p4) + if not (self.last_point == p1): + self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n" + args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm)) + self.draw_cont += _format_g(args) + " c\n" + self.updateRect(p1) + self.updateRect(p2) + self.updateRect(p3) + self.updateRect(p4) + self.last_point = p4 + return self.last_point + + def draw_oval(self, tetra: typing.Union[quad_like, rect_like]) -> Point: + """Draw an ellipse inside a tetrapod.""" + if len(tetra) != 4: + raise ValueError("invalid arg length") + if hasattr(tetra[0], "__float__"): + q = Rect(tetra).quad + else: + q = Quad(tetra) + + mt = q.ul + (q.ur - q.ul) * 0.5 + mr = q.ur + (q.lr - q.ur) * 0.5 + mb = q.ll + (q.lr - q.ll) * 0.5 + ml = q.ul + (q.ll - q.ul) * 0.5 + if not (self.last_point == ml): + self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n" + self.last_point = ml + self.draw_curve(ml, q.ll, mb) + self.draw_curve(mb, q.lr, mr) + self.draw_curve(mr, q.ur, mt) + self.draw_curve(mt, q.ul, ml) + self.updateRect(q.rect) + self.last_point = ml + return self.last_point + + def draw_circle(self, center: point_like, radius: float) -> Point: + """Draw a circle given its center and radius.""" + if not radius > EPSILON: + raise ValueError("radius must be positive") + center = Point(center) + p1 = center - (radius, 0) + return self.draw_sector(center, p1, 360, fullSector=False) + + def draw_curve( + self, + p1: point_like, + p2: point_like, + p3: point_like, + ) -> Point: + """Draw a curve between points using one control point.""" + kappa = 0.55228474983 + p1 = Point(p1) + p2 = Point(p2) + p3 = Point(p3) + k1 = p1 + (p2 - p1) * kappa + k2 = p3 + (p2 - p3) * kappa + return self.draw_bezier(p1, k1, k2, p3) + + def draw_sector( + self, + center: point_like, + point: point_like, + beta: float, + fullSector: bool = True, + ) -> Point: + """Draw a circle sector.""" + center = Point(center) + point = Point(point) + l3 = lambda a, b: _format_g((a, b)) + " m\n" + l4 = lambda a, b, c, d, e, f: _format_g((a, b, c, d, e, f)) + " c\n" + l5 = lambda a, b: _format_g((a, b)) + " l\n" + betar = math.radians(-beta) + w360 = math.radians(math.copysign(360, betar)) * (-1) + w90 = math.radians(math.copysign(90, betar)) + w45 = w90 / 2 + while abs(betar) > 2 * math.pi: + betar += w360 # bring angle below 360 degrees + if not (self.last_point == point): + self.draw_cont += l3(*JM_TUPLE(point * self.ipctm)) + self.last_point = point + Q = Point(0, 0) # just make sure it exists + C = center + P = point + S = P - C # vector 'center' -> 'point' + rad = abs(S) # circle radius + + if not rad > EPSILON: + raise ValueError("radius must be positive") + + alfa = self.horizontal_angle(center, point) + while abs(betar) > abs(w90): # draw 90 degree arcs + q1 = C.x + math.cos(alfa + w90) * rad + q2 = C.y + math.sin(alfa + w90) * rad + Q = Point(q1, q2) # the arc's end point + r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45) + r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45) + R = Point(r1, r2) # crossing point of tangents + kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q) + kappa = kappah * abs(P - Q) + cp1 = P + (R - P) * kappa # control point 1 + cp2 = Q + (R - Q) * kappa # control point 2 + self.draw_cont += l4(*JM_TUPLE( + list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) + )) + + betar -= w90 # reduce param angle by 90 deg + alfa += w90 # advance start angle by 90 deg + P = Q # advance to arc end point + # draw (remaining) arc + if abs(betar) > 1e-3: # significant degrees left? + beta2 = betar / 2 + q1 = C.x + math.cos(alfa + betar) * rad + q2 = C.y + math.sin(alfa + betar) * rad + Q = Point(q1, q2) # the arc's end point + r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2) + r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2) + R = Point(r1, r2) # crossing point of tangents + # kappa height is 4/3 of segment height + kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q) # kappa height + kappa = kappah * abs(P - Q) / (1 - math.cos(betar)) + cp1 = P + (R - P) * kappa # control point 1 + cp2 = Q + (R - Q) * kappa # control point 2 + self.draw_cont += l4(*JM_TUPLE( + list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) + )) + if fullSector: + self.draw_cont += l3(*JM_TUPLE(point * self.ipctm)) + self.draw_cont += l5(*JM_TUPLE(center * self.ipctm)) + self.draw_cont += l5(*JM_TUPLE(Q * self.ipctm)) + self.last_point = Q + return self.last_point + + def draw_rect(self, rect: rect_like, *, radius=None) -> Point: + """Draw a rectangle. + + Args: + radius: if not None, the rectangle will have rounded corners. + This is the radius of the curvature, given as percentage of + the rectangle width or height. Valid are values 0 < v <= 0.5. + For a sequence of two values, the corners will have different + radii. Otherwise, the percentage will be computed from the + shorter side. A value of (0.5, 0.5) will draw an ellipse. + """ + r = Rect(rect) + if radius is None: # standard rectangle + self.draw_cont += _format_g(JM_TUPLE( + list(r.bl * self.ipctm) + [r.width, r.height] + )) + " re\n" + self.updateRect(r) + self.last_point = r.tl + return self.last_point + # rounded corners requested. This requires 1 or 2 values, each + # with 0 < value <= 0.5 + if hasattr(radius, "__float__"): + if radius <= 0 or radius > 0.5: + raise ValueError(f"bad radius value {radius}.") + d = min(r.width, r.height) * radius + px = (d, 0) + py = (0, d) + elif hasattr(radius, "__len__") and len(radius) == 2: + rx, ry = radius + px = (rx * r.width, 0) + py = (0, ry * r.height) + if min(rx, ry) <= 0 or max(rx, ry) > 0.5: + raise ValueError(f"bad radius value {radius}.") + else: + raise ValueError(f"bad radius value {radius}.") + + lp = self.draw_line(r.tl + py, r.bl - py) + lp = self.draw_curve(lp, r.bl, r.bl + px) + + lp = self.draw_line(lp, r.br - px) + lp = self.draw_curve(lp, r.br, r.br - py) + + lp = self.draw_line(lp, r.tr + py) + lp = self.draw_curve(lp, r.tr, r.tr - px) + + lp = self.draw_line(lp, r.tl + px) + self.last_point = self.draw_curve(lp, r.tl, r.tl + py) + + self.updateRect(r) + return self.last_point + + def draw_quad(self, quad: quad_like) -> Point: + """Draw a Quad.""" + q = Quad(quad) + return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul]) + + def draw_zigzag( + self, + p1: point_like, + p2: point_like, + breadth: float = 2, + ) -> Point: + """Draw a zig-zagged line from p1 to p2.""" + p1 = Point(p1) + p2 = Point(p2) + S = p2 - p1 # vector start - end + rad = abs(S) # distance of points + cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases + if cnt < 4: + raise ValueError("points too close") + mb = rad / cnt # revised breadth + matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis + i_mat = ~matrix # get original position + points = [] # stores edges + for i in range(1, cnt): + if i % 4 == 1: # point "above" connection + p = Point(i, -1) * mb + elif i % 4 == 3: # point "below" connection + p = Point(i, 1) * mb + else: # ignore others + continue + points.append(p * i_mat) + self.draw_polyline([p1] + points + [p2]) # add start and end points + return p2 + + def draw_squiggle( + self, + p1: point_like, + p2: point_like, + breadth=2, + ) -> Point: + """Draw a squiggly line from p1 to p2.""" + p1 = Point(p1) + p2 = Point(p2) + S = p2 - p1 # vector start - end + rad = abs(S) # distance of points + cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases + if cnt < 4: + raise ValueError("points too close") + mb = rad / cnt # revised breadth + matrix = Matrix(util_hor_matrix(p1, p2)) # normalize line to x-axis + i_mat = ~matrix # get original position + k = 2.4142135623765633 # y of draw_curve helper point + + points = [] # stores edges + for i in range(1, cnt): + if i % 4 == 1: # point "above" connection + p = Point(i, -k) * mb + elif i % 4 == 3: # point "below" connection + p = Point(i, k) * mb + else: # else on connection line + p = Point(i, 0) * mb + points.append(p * i_mat) + + points = [p1] + points + [p2] + cnt = len(points) + i = 0 + while i + 2 < cnt: + self.draw_curve(points[i], points[i + 1], points[i + 2]) + i += 2 + return p2 + + # ============================================================================== + # Shape.insert_text + # ============================================================================== + def insert_text( + self, + point: point_like, + buffer: typing.Union[str, list], + *, + fontsize: float = 11, + lineheight: OptFloat = None, + fontname: str = "helv", + fontfile: OptStr = None, + set_simple: bool = 0, + encoding: int = 0, + color: OptSeq = None, + fill: OptSeq = None, + render_mode: int = 0, + border_width: float = 0.05, + miter_limit: float = 1, + rotate: int = 0, + morph: OptSeq = None, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> int: + + # ensure 'text' is a list of strings, worth dealing with + if not bool(buffer): + return 0 + + if type(buffer) not in (list, tuple): + text = buffer.splitlines() + else: + text = buffer + + if not len(text) > 0: + return 0 + + point = Point(point) + try: + maxcode = max([ord(c) for c in " ".join(text)]) + except Exception: + exception_info() + return 0 + + # ensure valid 'fontname' + fname = fontname + if fname.startswith("/"): + fname = fname[1:] + + xref = self.page.insert_font( + fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple + ) + fontinfo = CheckFontInfo(self.doc, xref) + + fontdict = fontinfo[1] + ordering = fontdict["ordering"] + simple = fontdict["simple"] + bfname = fontdict["name"] + ascender = fontdict["ascender"] + descender = fontdict["descender"] + if lineheight: + lheight = fontsize * lineheight + elif ascender - descender <= 1: + lheight = fontsize * 1.2 + else: + lheight = fontsize * (ascender - descender) + + if maxcode > 255: + glyphs = self.doc.get_char_widths(xref, maxcode + 1) + else: + glyphs = fontdict["glyphs"] + + tab = [] + for t in text: + if simple and bfname not in ("Symbol", "ZapfDingbats"): + g = None + else: + g = glyphs + tab.append(getTJstr(t, g, simple, ordering)) + text = tab + + color_str = ColorCode(color, "c") + fill_str = ColorCode(fill, "f") + if not fill and render_mode == 0: # ensure fill color when 0 Tr + fill = color + fill_str = ColorCode(color, "f") + + morphing = CheckMorph(morph) + rot = rotate + if rot % 90 != 0: + raise ValueError("bad rotate value") + + while rot < 0: + rot += 360 + rot = rot % 360 # text rotate = 0, 90, 270, 180 + + templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {_format_g(g)} Tf " + templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n" + cmp90 = "0 1 -1 0 0 0 cm\n" # rotates 90 deg counter-clockwise + cmm90 = "0 -1 1 0 0 0 cm\n" # rotates 90 deg clockwise + cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg. + height = self.height + width = self.width + + # setting up for standard rotation directions + # case rotate = 0 + if morphing: + m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y) + mat = ~m1 * morph[1] * m1 + cm = _format_g(JM_TUPLE(mat)) + " cm\n" + else: + cm = "" + top = height - point.y - self.y # start of 1st char + left = point.x + self.x # start of 1. char + space = top # space available + #headroom = point.y + self.y # distance to page border + if rot == 90: + left = height - point.y - self.y + top = -point.x - self.x + cm += cmp90 + space = width - abs(top) + #headroom = point.x + self.x + + elif rot == 270: + left = -height + point.y + self.y + top = point.x + self.x + cm += cmm90 + space = abs(top) + #headroom = width - point.x - self.x + + elif rot == 180: + left = -point.x - self.x + top = -height + point.y + self.y + cm += cm180 + space = abs(point.y + self.y) + #headroom = height - point.y - self.y + + optcont = self.page._get_optional_content(oc) + if optcont is not None: + bdc = "/OC /%s BDC\n" % optcont + emc = "EMC\n" + else: + bdc = emc = "" + + alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) + if alpha is None: + alpha = "" + else: + alpha = "/%s gs\n" % alpha + nres = templ1(bdc, alpha, cm, left, top, fname, fontsize) + + if render_mode > 0: + nres += "%i Tr " % render_mode + nres += _format_g(border_width * fontsize) + " w " + if miter_limit is not None: + nres += _format_g(miter_limit) + " M " + if color is not None: + nres += color_str + if fill is not None: + nres += fill_str + + # ========================================================================= + # start text insertion + # ========================================================================= + nres += text[0] + nlines = 1 # set output line counter + if len(text) > 1: + nres += templ2(lheight) # line 1 + else: + nres += 'TJ' + for i in range(1, len(text)): + if space < lheight: + break # no space left on page + if i > 1: + nres += "\nT* " + nres += text[i] + 'TJ' + space -= lheight + nlines += 1 + + nres += "\nET\n%sQ\n" % emc + + # ========================================================================= + # end of text insertion + # ========================================================================= + # update the /Contents object + self.text_cont += nres + return nlines + + # ============================================================================== + # Shape.insert_textbox + # ============================================================================== + def insert_textbox( + self, + rect: rect_like, + buffer: typing.Union[str, list], + *, + fontname: OptStr = "helv", + fontfile: OptStr = None, + fontsize: float = 11, + lineheight: OptFloat = None, + set_simple: bool = 0, + encoding: int = 0, + color: OptSeq = None, + fill: OptSeq = None, + expandtabs: int = 1, + border_width: float = 0.05, + miter_limit: float = 1, + align: int = 0, + render_mode: int = 0, + rotate: int = 0, + morph: OptSeq = None, + stroke_opacity: float = 1, + fill_opacity: float = 1, + oc: int = 0, + ) -> float: + """Insert text into a given rectangle. + + Args: + rect -- the textbox to fill + buffer -- text to be inserted + fontname -- a Base-14 font, font name or '/name' + fontfile -- name of a font file + fontsize -- font size + lineheight -- overwrite the font property + color -- RGB stroke color triple + fill -- RGB fill color triple + render_mode -- text rendering control + border_width -- thickness of glyph borders as percentage of fontsize + expandtabs -- handles tabulators with string function + align -- left, center, right, justified + rotate -- 0, 90, 180, or 270 degrees + morph -- morph box with a matrix and a fixpoint + Returns: + unused or deficit rectangle area (float) + """ + rect = Rect(rect) + if rect.is_empty or rect.is_infinite: + raise ValueError("text box must be finite and not empty") + + color_str = ColorCode(color, "c") + fill_str = ColorCode(fill, "f") + if fill is None and render_mode == 0: # ensure fill color for 0 Tr + fill = color + fill_str = ColorCode(color, "f") + + optcont = self.page._get_optional_content(oc) + if optcont is not None: + bdc = "/OC /%s BDC\n" % optcont + emc = "EMC\n" + else: + bdc = emc = "" + + # determine opacity / transparency + alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) + if alpha is None: + alpha = "" + else: + alpha = "/%s gs\n" % alpha + + if rotate % 90 != 0: + raise ValueError("rotate must be multiple of 90") + + rot = rotate + while rot < 0: + rot += 360 + rot = rot % 360 + + # is buffer worth of dealing with? + if not bool(buffer): + return rect.height if rot in (0, 180) else rect.width + + cmp90 = "0 1 -1 0 0 0 cm\n" # rotates counter-clockwise + cmm90 = "0 -1 1 0 0 0 cm\n" # rotates clockwise + cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg. + height = self.height + + fname = fontname + if fname.startswith("/"): + fname = fname[1:] + + xref = self.page.insert_font( + fontname=fname, fontfile=fontfile, encoding=encoding, set_simple=set_simple + ) + fontinfo = CheckFontInfo(self.doc, xref) + + fontdict = fontinfo[1] + ordering = fontdict["ordering"] + simple = fontdict["simple"] + glyphs = fontdict["glyphs"] + bfname = fontdict["name"] + ascender = fontdict["ascender"] + descender = fontdict["descender"] + + if lineheight: + lheight_factor = lineheight + elif ascender - descender <= 1: + lheight_factor = 1.2 + else: + lheight_factor = ascender - descender + lheight = fontsize * lheight_factor + + # create a list from buffer, split into its lines + if type(buffer) in (list, tuple): + t0 = "\n".join(buffer) + else: + t0 = buffer + + maxcode = max([ord(c) for c in t0]) + # replace invalid char codes for simple fonts + if simple and maxcode > 255: + t0 = "".join([c if ord(c) < 256 else "?" for c in t0]) + + t0 = t0.splitlines() + + glyphs = self.doc.get_char_widths(xref, maxcode + 1) + if simple and bfname not in ("Symbol", "ZapfDingbats"): + tj_glyphs = None + else: + tj_glyphs = glyphs + + # ---------------------------------------------------------------------- + # calculate pixel length of a string + # ---------------------------------------------------------------------- + def pixlen(x): + """Calculate pixel length of x.""" + if ordering < 0: + return sum([glyphs[ord(c)][1] for c in x]) * fontsize + else: + return len(x) * fontsize + + # --------------------------------------------------------------------- + + if ordering < 0: + blen = glyphs[32][1] * fontsize # pixel size of space character + else: + blen = fontsize + + text = "" # output buffer + + if CheckMorph(morph): + m1 = Matrix( + 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y + ) + mat = ~m1 * morph[1] * m1 + cm = _format_g(JM_TUPLE(mat)) + " cm\n" + else: + cm = "" + + # --------------------------------------------------------------------- + # adjust for text orientation / rotation + # --------------------------------------------------------------------- + progr = 1 # direction of line progress + c_pnt = Point(0, fontsize * ascender) # used for line progress + if rot == 0: # normal orientation + point = rect.tl + c_pnt # line 1 is 'lheight' below top + maxwidth = rect.width # pixels available in one line + maxheight = rect.height # available text height + + elif rot == 90: # rotate counter clockwise + c_pnt = Point(fontsize * ascender, 0) # progress in x-direction + point = rect.bl + c_pnt # line 1 'lheight' away from left + maxwidth = rect.height # pixels available in one line + maxheight = rect.width # available text height + cm += cmp90 + + elif rot == 180: # text upside down + # progress upwards in y direction + c_pnt = -Point(0, fontsize * ascender) + point = rect.br + c_pnt # line 1 'lheight' above bottom + maxwidth = rect.width # pixels available in one line + progr = -1 # subtract lheight for next line + maxheight =rect.height # available text height + cm += cm180 + + else: # rotate clockwise (270 or -90) + # progress from right to left + c_pnt = -Point(fontsize * ascender, 0) + point = rect.tr + c_pnt # line 1 'lheight' left of right + maxwidth = rect.height # pixels available in one line + progr = -1 # subtract lheight for next line + maxheight = rect.width # available text height + cm += cmm90 + + # ===================================================================== + # line loop + # ===================================================================== + just_tab = [] # 'justify' indicators per line + + for i, line in enumerate(t0): + line_t = line.expandtabs(expandtabs).split(" ") # split into words + num_words = len(line_t) + lbuff = "" # init line buffer + rest = maxwidth # available line pixels + # ================================================================= + # word loop + # ================================================================= + for j in range(num_words): + word = line_t[j] + pl_w = pixlen(word) # pixel len of word + if rest >= pl_w: # does it fit on the line? + lbuff += word + " " # yes, append word + rest -= pl_w + blen # update available line space + continue # next word + + # word doesn't fit - output line (if not empty) + if lbuff: + lbuff = lbuff.rstrip() + "\n" # line full, append line break + text += lbuff # append to total text + just_tab.append(True) # can align-justify + + lbuff = "" # re-init line buffer + rest = maxwidth # re-init avail. space + + if pl_w <= maxwidth: # word shorter than 1 line? + lbuff = word + " " # start the line with it + rest = maxwidth - pl_w - blen # update free space + continue + + # long word: split across multiple lines - char by char ... + if len(just_tab) > 0: + just_tab[-1] = False # cannot align-justify + for c in word: + if pixlen(lbuff) <= maxwidth - pixlen(c): + lbuff += c + else: # line full + lbuff += "\n" # close line + text += lbuff # append to text + just_tab.append(False) # cannot align-justify + lbuff = c # start new line with this char + + lbuff += " " # finish long word + rest = maxwidth - pixlen(lbuff) # long word stored + + if lbuff: # unprocessed line content? + text += lbuff.rstrip() # append to text + just_tab.append(False) # cannot align-justify + + if i < len(t0) - 1: # not the last line? + text += "\n" # insert line break + + # compute used part of the textbox + if text.endswith("\n"): + text = text[:-1] + lb_count = text.count("\n") + 1 # number of lines written + + # text height = line count * line height plus one descender value + text_height = lheight * lb_count - descender * fontsize + + more = text_height - maxheight # difference to height limit + if more > EPSILON: # landed too much outside rect + return (-1) * more # return deficit, don't output + + more = abs(more) + if more < EPSILON: + more = 0 # don't bother with epsilons + nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm # initialize output buffer + templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf " + # center, right, justify: output each line with its own specifics + text_t = text.splitlines() # split text in lines again + just_tab[-1] = False # never justify last line + for i, t in enumerate(text_t): + spacing = 0 + pl = maxwidth - pixlen(t) # length of empty line part + pnt = point + c_pnt * (i * lheight_factor) # text start of line + if align == 1: # center: right shift by half width + if rot in (0, 180): + pnt = pnt + Point(pl / 2, 0) * progr + else: + pnt = pnt - Point(0, pl / 2) * progr + elif align == 2: # right: right shift by full width + if rot in (0, 180): + pnt = pnt + Point(pl, 0) * progr + else: + pnt = pnt - Point(0, pl) * progr + elif align == 3: # justify + spaces = t.count(" ") # number of spaces in line + if spaces > 0 and just_tab[i]: # if any, and we may justify + spacing = pl / spaces # make every space this much larger + else: + spacing = 0 # keep normal space length + top = height - pnt.y - self.y + left = pnt.x + self.x + if rot == 90: + left = height - pnt.y - self.y + top = -pnt.x - self.x + elif rot == 270: + left = -height + pnt.y + self.y + top = pnt.x + self.x + elif rot == 180: + left = -pnt.x - self.x + top = -height + pnt.y + self.y + + nres += templ(left, top, fname, fontsize) + + if render_mode > 0: + nres += "%i Tr " % render_mode + nres += _format_g(border_width * fontsize) + " w " + if miter_limit is not None: + nres += _format_g(miter_limit) + " M " + + if align == 3: + nres += _format_g(spacing) + " Tw " + + if color is not None: + nres += color_str + if fill is not None: + nres += fill_str + nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering) + + nres += "ET\n%sQ\n" % emc + + self.text_cont += nres + self.updateRect(rect) + return more + + def finish( + self, + width: float = 1, + color: OptSeq = (0,), + fill: OptSeq = None, + lineCap: int = 0, + lineJoin: int = 0, + dashes: OptStr = None, + even_odd: bool = False, + morph: OptSeq = None, + closePath: bool = True, + fill_opacity: float = 1, + stroke_opacity: float = 1, + oc: int = 0, + ) -> None: + """Finish the current drawing segment. + + Notes: + Apply colors, opacity, dashes, line style and width, or + morphing. Also whether to close the path + by connecting last to first point. + """ + if self.draw_cont == "": # treat empty contents as no-op + return + + if width == 0: # border color makes no sense then + color = None + elif color is None: # vice versa + width = 0 + # if color == None and fill == None: + # raise ValueError("at least one of 'color' or 'fill' must be given") + color_str = ColorCode(color, "c") # ensure proper color string + fill_str = ColorCode(fill, "f") # ensure proper fill string + + optcont = self.page._get_optional_content(oc) + if optcont is not None: + self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont + emc = "EMC\n" + else: + emc = "" + + alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) + if alpha is not None: + self.draw_cont = "/%s gs\n" % alpha + self.draw_cont + + if width != 1 and width != 0: + self.draw_cont += _format_g(width) + " w\n" + + if lineCap != 0: + self.draw_cont = "%i J\n" % lineCap + self.draw_cont + if lineJoin != 0: + self.draw_cont = "%i j\n" % lineJoin + self.draw_cont + + if dashes not in (None, "", "[] 0"): + self.draw_cont = "%s d\n" % dashes + self.draw_cont + + if closePath: + self.draw_cont += "h\n" + self.last_point = None + + if color is not None: + self.draw_cont += color_str + + if fill is not None: + self.draw_cont += fill_str + if color is not None: + if not even_odd: + self.draw_cont += "B\n" + else: + self.draw_cont += "B*\n" + else: + if not even_odd: + self.draw_cont += "f\n" + else: + self.draw_cont += "f*\n" + else: + self.draw_cont += "S\n" + + self.draw_cont += emc + if CheckMorph(morph): + m1 = Matrix( + 1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y + ) + mat = ~m1 * morph[1] * m1 + self.draw_cont = _format_g(JM_TUPLE(mat)) + " cm\n" + self.draw_cont + + self.totalcont += "\nq\n" + self.draw_cont + "Q\n" + self.draw_cont = "" + self.last_point = None + return + + def commit(self, overlay: bool = True) -> None: + """Update the page's /Contents object with Shape data. + + The argument controls whether data appear in foreground (default) + or background. + """ + CheckParent(self.page) # doc may have died meanwhile + self.totalcont += self.text_cont + self.totalcont = self.totalcont.encode() + + if self.totalcont: + if overlay: + self.page.wrap_contents() # ensure a balanced graphics state + # make /Contents object with dummy stream + xref = TOOLS._insert_contents(self.page, b" ", overlay) + # update it with potential compression + self.doc.update_stream(xref, self.totalcont) + + self.last_point = None # clean up ... + self.rect = None # + self.draw_cont = "" # for potential ... + self.text_cont = "" # ... + self.totalcont = "" # re-use + + +class Story: + + def __init__( self, html='', user_css=None, em=12, archive=None): + buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8')) + if archive and not isinstance(archive, Archive): + archive = Archive(archive) + arch = archive.this if archive else mupdf.FzArchive( None) + if hasattr(mupdf, 'FzStoryS'): + self.this = mupdf.FzStoryS( buffer_, user_css, em, arch) + else: + self.this = mupdf.FzStory( buffer_, user_css, em, arch) + + def add_header_ids(self): + ''' + Look for `` items in `self` and adds unique `id` + attributes if not already present. + ''' + dom = self.body + i = 0 + x = dom.find(None, None, None) + while x: + name = x.tagname + if len(name) == 2 and name[0]=="h" and name[1] in "123456": + attr = x.get_attribute_value("id") + if not attr: + id_ = f"h_id_{i}" + #log(f"{name=}: setting {id_=}") + x.set_attribute("id", id_) + i += 1 + x = x.find_next(None, None, None) + + @staticmethod + def add_pdf_links(document_or_stream, positions): + """ + Adds links to PDF document. + Args: + document_or_stream: + A PDF `Document` or raw PDF content, for example an + `io.BytesIO` instance. + positions: + List of `ElementPosition`'s for `document_or_stream`, + typically from Story.element_positions(). We raise an + exception if two or more positions have same id. + Returns: + `document_or_stream` if a `Document` instance, otherwise a + new `Document` instance. + We raise an exception if an `href` in `positions` refers to an + internal position `#` but no item in `positions` has `id = + name`. + """ + if isinstance(document_or_stream, Document): + document = document_or_stream + else: + document = Document("pdf", document_or_stream) + + # Create dict from id to position, which we will use to find + # link destinations. + # + id_to_position = dict() + #log(f"positions: {positions}") + for position in positions: + #log(f"add_pdf_links(): position: {position}") + if (position.open_close & 1) and position.id: + #log(f"add_pdf_links(): position with id: {position}") + if position.id in id_to_position: + #log(f"Ignoring duplicate positions with id={position.id!r}") + pass + else: + id_to_position[ position.id] = position + + # Insert links for all positions that have an `href`. + # + for position_from in positions: + + if (position_from.open_close & 1) and position_from.href: + + #log(f"add_pdf_links(): position with href: {position}") + link = dict() + link['from'] = Rect(position_from.rect) + + if position_from.href.startswith("#"): + #`...` internal link. + target_id = position_from.href[1:] + try: + position_to = id_to_position[ target_id] + except Exception as e: + if g_exceptions_verbose > 1: exception_info() + raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e + # Make link from `position_from`'s rect to top-left of + # `position_to`'s rect. + if 0: + log(f"add_pdf_links(): making link from:") + log(f"add_pdf_links(): {position_from}") + log(f"add_pdf_links(): to:") + log(f"add_pdf_links(): {position_to}") + link["kind"] = LINK_GOTO + x0, y0, x1, y1 = position_to.rect + # This appears to work well with viewers which scroll + # to make destination point top-left of window. + link["to"] = Point(x0, y0) + link["page"] = position_to.page_num - 1 + + else: + # `...` external link. + if position_from.href.startswith('name:'): + link['kind'] = LINK_NAMED + link['name'] = position_from.href[5:] + else: + link['kind'] = LINK_URI + link['uri'] = position_from.href + + #log(f'Adding link: {position_from.page_num=} {link=}.') + document[position_from.page_num - 1].insert_link(link) + + return document + + @property + def body(self): + dom = self.document() + return dom.bodytag() + + def document( self): + dom = mupdf.fz_story_document( self.this) + return Xml( dom) + + def draw( self, device, matrix=None): + ctm2 = JM_matrix_from_py( matrix) + dev = device.this if device else mupdf.FzDevice( None) + mupdf.fz_draw_story( self.this, dev, ctm2) + + def element_positions( self, function, args=None): + ''' + Trigger a callback function to record where items have been placed. + ''' + if type(args) is dict: + for k in args.keys(): + if not (type(k) is str and k.isidentifier()): + raise ValueError(f"invalid key '{k}'") + else: + args = {} + if not callable(function) or function.__code__.co_argcount != 1: + raise ValueError("callback 'function' must be a callable with exactly one argument") + + def function2( position): + class Position2: + pass + position2 = Position2() + position2.depth = position.depth + position2.heading = position.heading + position2.id = position.id + position2.rect = JM_py_from_rect(position.rect) + position2.text = position.text + position2.open_close = position.open_close + position2.rect_num = position.rectangle_num + position2.href = position.href + if args: + for k, v in args.items(): + setattr( position2, k, v) + function( position2) + mupdf.fz_story_positions( self.this, function2) + + def place( self, where, flags=0): + ''' + Wrapper for fz_place_story_flags(). + ''' + where = JM_rect_from_py( where) + filled = mupdf.FzRect() + more = mupdf.fz_place_story_flags( self.this, where, filled, flags) + return more, JM_py_from_rect( filled) + + def reset( self): + mupdf.fz_reset_story( self.this) + + def write(self, writer, rectfn, positionfn=None, pagefn=None): + dev = None + page_num = 0 + rect_num = 0 + filled = Rect(0, 0, 0, 0) + while 1: + mediabox, rect, ctm = rectfn(rect_num, filled) + rect_num += 1 + if mediabox: + # new page. + page_num += 1 + more, filled = self.place( rect) + if positionfn: + def positionfn2(position): + # We add a `.page_num` member to the + # `ElementPosition` instance. + position.page_num = page_num + positionfn(position) + self.element_positions(positionfn2) + if writer: + if mediabox: + # new page. + if dev: + if pagefn: + pagefn(page_num, mediabox, dev, 1) + writer.end_page() + dev = writer.begin_page( mediabox) + if pagefn: + pagefn(page_num, mediabox, dev, 0) + self.draw( dev, ctm) + if not more: + if pagefn: + pagefn( page_num, mediabox, dev, 1) + writer.end_page() + else: + self.draw(None, ctm) + if not more: + break + + @staticmethod + def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True): + positions = list() + content = None + # Iterate until stable. + while 1: + content_prev = content + content = contentfn( positions) + stable = False + if content == content_prev: + stable = True + content2 = content + story = Story(content2, user_css, em, archive) + + if add_header_ids: + story.add_header_ids() + + positions = list() + def positionfn2(position): + #log(f"write_stabilized(): {stable=} {positionfn=} {position=}") + positions.append(position) + if stable and positionfn: + positionfn(position) + story.write( + writer if stable else None, + rectfn, + positionfn2, + pagefn, + ) + if stable: + break + + @staticmethod + def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True): + #log("write_stabilized_with_links()") + stream = io.BytesIO() + writer = DocumentWriter(stream) + positions = [] + def positionfn2(position): + #log(f"write_stabilized_with_links(): {position=}") + positions.append(position) + if positionfn: + positionfn(position) + Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids) + writer.close() + stream.seek(0) + return Story.add_pdf_links(stream, positions) + + def write_with_links(self, rectfn, positionfn=None, pagefn=None): + #log("write_with_links()") + stream = io.BytesIO() + writer = DocumentWriter(stream) + positions = [] + def positionfn2(position): + #log(f"write_with_links(): {position=}") + positions.append(position) + if positionfn: + positionfn(position) + self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn) + writer.close() + stream.seek(0) + return Story.add_pdf_links(stream, positions) + + class FitResult: + ''' + The result from a `Story.fit*()` method. + + Members: + + `big_enough`: + `True` if the fit succeeded. + `filled`: + Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This + will be wider than .rect if any single word (which we never split) + was too wide for .rect. + `more`: + `False` if the fit succeeded. + `numcalls`: + Number of calls made to `self.place()`. + `parameter`: + The successful parameter value, or the largest failing value. + `rect`: + The pumupdf.Rect created from `parameter`. + ''' + def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None): + self.big_enough = big_enough + self.filled = filled + self.more = more + self.numcalls = numcalls + self.parameter = parameter + self.rect = rect + + def __repr__(self): + return ( + f' big_enough={self.big_enough}' + f' filled={self.filled}' + f' more={self.more}' + f' numcalls={self.numcalls}' + f' parameter={self.parameter}' + f' rect={self.rect}' + ) + + def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0): + ''' + Finds optimal rect that contains the story `self`. + + Returns a `Story.FitResult` instance. + + On success, the last call to `self.place()` will have been with the + returned rectangle, so `self.draw()` can be used directly. + + Args: + :arg fn: + A callable taking a floating point `parameter` and returning a + `pymupdf.Rect()`. If the rect is empty, we assume the story will + not fit and do not call `self.place()`. + + Must guarantee that `self.place()` behaves monotonically when + given rect `fn(parameter`) as `parameter` increases. This + usually means that both width and height increase or stay + unchanged as `parameter` increases. + :arg pmin: + Minimum parameter to consider; `None` for -infinity. + :arg pmax: + Maximum parameter to consider; `None` for +infinity. + :arg delta: + Maximum error in returned `parameter`. + :arg verbose: + If true we output diagnostics. + :arg flags: + Passed to mupdf.fz_place_story_flags(). e.g. + zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`. + ''' + def log(text): + assert verbose + message(f'fit(): {text}') + + assert isinstance(pmin, (int, float)) or pmin is None + assert isinstance(pmax, (int, float)) or pmax is None + + class State: + def __init__(self): + self.pmin = pmin + self.pmax = pmax + self.pmin_result = None + self.pmax_result = None + self.result = None + self.numcalls = 0 + if verbose: + self.pmin0 = pmin + self.pmax0 = pmax + state = State() + + if verbose: + log(f'starting. {state.pmin=} {state.pmax=}.') + + self.reset() + + def ret(): + if state.pmax is not None: + if state.last_p != state.pmax: + if verbose: + log(f'Calling update() with pmax, because was overwritten by later calls.') + big_enough = update(state.pmax) + assert big_enough + result = state.pmax_result + else: + result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls) + if verbose: + log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}') + return result + + def update(parameter): + ''' + Evaluates `more, _ = self.place(fn(parameter))`. If `more` is + false, then `rect` is big enough to contain `self` and we + set `state.pmax=parameter` and return True. Otherwise we set + `state.pmin=parameter` and return False. + ''' + rect = fn(parameter) + assert isinstance(rect, Rect), f'{type(rect)=} {rect=}' + if rect.is_empty: + big_enough = False + result = Story.FitResult(parameter=parameter, numcalls=state.numcalls) + if verbose: + log(f'update(): not calling self.place() because rect is empty.') + else: + more, filled = self.place(rect, flags) + state.numcalls += 1 + big_enough = not more + result = Story.FitResult( + filled=filled, + more=more, + numcalls=state.numcalls, + parameter=parameter, + rect=rect, + big_enough=big_enough, + ) + if verbose: + log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.') + if big_enough: + state.pmax = parameter + state.pmax_result = result + else: + state.pmin = parameter + state.pmin_result = result + state.last_p = parameter + return big_enough + + def opposite(p, direction): + ''' + Returns same sign as `direction`, larger or smaller than `p` if + direction is positive or negative respectively. + ''' + if p is None or p==0: + return direction + if direction * p > 0: + return 2 * p + return -p + + if state.pmin is None: + # Find an initial finite pmin value. + if verbose: log(f'finding pmin.') + parameter = opposite(state.pmax, -1) + while 1: + if not update(parameter): + break + parameter *= 2 + else: + if update(state.pmin): + if verbose: log(f'{state.pmin=} is big enough.') + return ret() + + if state.pmax is None: + # Find an initial finite pmax value. + if verbose: log(f'finding pmax.') + parameter = opposite(state.pmin, +1) + while 1: + if update(parameter): + break + parameter *= 2 + else: + if not update(state.pmax): + # No solution possible. + state.pmax = None + if verbose: log(f'No solution possible {state.pmax=}.') + return ret() + + # Do binary search in pmin..pmax. + if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.') + while 1: + if state.pmax - state.pmin < delta: + return ret() + parameter = (state.pmin + state.pmax) / 2 + update(parameter) + + def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0): + ''' + Finds smallest value `scale` in range `scale_min..scale_max` where + `scale * rect` is large enough to contain the story `self`. + + Returns a `Story.FitResult` instance with `.parameter` set to `scale`. + + :arg width: + width of rect. + :arg height: + height of rect. + :arg scale_min: + Minimum scale to consider; must be >= 0. + :arg scale_max: + Maximum scale to consider, must be >= scale_min or `None` for + infinite. + :arg delta: + Maximum error in returned scale. + :arg verbose: + If true we output diagnostics. + :arg flags: + Passed to Story.place(). + ''' + x0, y0, x1, y1 = rect + width = x1 - x0 + height = y1 - y0 + def fn(scale): + return Rect(x0, y0, x0 + scale*width, y0 + scale*height) + return self.fit(fn, scale_min, scale_max, delta, verbose, flags) + + def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False): + ''' + Finds smallest height in range `height_min..height_max` where a rect + with size `(width, height)` is large enough to contain the story + `self`. + + Returns a `Story.FitResult` instance. + + :arg width: + width of rect. + :arg height_min: + Minimum height to consider; must be >= 0. + :arg height_max: + Maximum height to consider, must be >= height_min or `None` for + infinite. + :arg origin: + `(x0, y0)` of rect. + :arg delta: + Maximum error in returned height. + :arg verbose: + If true we output diagnostics. + ''' + x0, y0 = origin + x1 = x0 + width + def fn(height): + return Rect(x0, y0, x1, y0+height) + return self.fit(fn, height_min, height_max, delta, verbose) + + def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False): + ''' + Finds smallest width in range `width_min..width_max` where a rect with size + `(width, height)` is large enough to contain the story `self`. + + Returns a `Story.FitResult` instance. + Returns a `FitResult` instance. + + :arg height: + height of rect. + :arg width_min: + Minimum width to consider; must be >= 0. + :arg width_max: + Maximum width to consider, must be >= width_min or `None` for + infinite. + :arg origin: + `(x0, y0)` of rect. + :arg delta: + Maximum error in returned width. + :arg verbose: + If true we output diagnostics. + ''' + x0, y0 = origin + y1 = y0 + height + def fn(width): + return Rect(x0, y0, x0+width, y1) + return self.fit(fn, width_min, width_max, delta, verbose) + + +class TextPage: + + def __init__(self, *args): + if args_match(args, mupdf.FzRect): + mediabox = args[0] + self.this = mupdf.FzStextPage( mediabox) + elif args_match(args, mupdf.FzStextPage): + self.this = args[0] + else: + raise Exception(f'Unrecognised args: {args}') + self.thisown = True + self.parent = None + + def _extractText(self, format_): + this_tpage = self.this + res = mupdf.fz_new_buffer(1024) + out = mupdf.FzOutput( res) + # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly + # because there is no .refs member visible and no fz_keep_output() fn, + # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer() + # doesn't convert the returned fz_output* into a mupdf.FzOutput. + #out = mupdf.FzOutput(out) + if format_ == 1: + mupdf.fz_print_stext_page_as_html(out, this_tpage, 0) + elif format_ == 3: + mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0) + elif format_ == 4: + mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0) + else: + JM_print_stext_page_as_text(res, this_tpage) + out.fz_close_output() + text = JM_EscapeStrFromBuffer(res) + return text + + def _getNewBlockList(self, page_dict, raw): + JM_make_textpage_dict(self.this, page_dict, raw) + + def _textpage_dict(self, raw=False): + page_dict = {"width": self.rect.width, "height": self.rect.height} + self._getNewBlockList(page_dict, raw) + return page_dict + + def extractBLOCKS(self): + """Return a list with text block information.""" + if 1 or g_use_extra: + return extra.extractBLOCKS(self.this) + block_n = -1 + this_tpage = self.this + tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox) + res = mupdf.fz_new_buffer(1024) + lines = [] + for block in this_tpage: + block_n += 1 + blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) + if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: + mupdf.fz_clear_buffer(res) # set text buffer to empty + line_n = -1 + last_char = 0 + for line in block: + line_n += 1 + linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) + for ch in line: + cbbox = JM_char_bbox(line, ch) + if (not JM_rects_overlap(tp_rect, cbbox) + and not mupdf.fz_is_infinite_rect(tp_rect) + ): + continue + JM_append_rune(res, ch.m_internal.c) + last_char = ch.m_internal.c + linerect = mupdf.fz_union_rect(linerect, cbbox) + if last_char != 10 and not mupdf.fz_is_empty_rect(linerect): + mupdf.fz_append_byte(res, 10) + blockrect = mupdf.fz_union_rect(blockrect, linerect) + text = JM_EscapeStrFromBuffer(res) + elif (JM_rects_overlap(tp_rect, block.m_internal.bbox) + or mupdf.fz_is_infinite_rect(tp_rect) + ): + img = block.i_image() + cs = img.colorspace() + text = "" % ( + mupdf.fz_colorspace_name(cs), + img.w(), img.h(), img.bpc() + ) + blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox)) + if not mupdf.fz_is_empty_rect(blockrect): + litem = ( + blockrect.x0, + blockrect.y0, + blockrect.x1, + blockrect.y1, + text, + block_n, + block.m_internal.type, + ) + lines.append(litem) + return lines + + def extractDICT(self, cb=None, sort=False) -> dict: + """Return page content as a Python dict of images and text spans.""" + val = self._textpage_dict(raw=False) + if cb is not None: + val["width"] = cb.width + val["height"] = cb.height + if sort: + blocks = val["blocks"] + blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) + val["blocks"] = blocks + return val + + def extractHTML(self) -> str: + """Return page content as a HTML string.""" + return self._extractText(1) + + def extractIMGINFO(self, hashes=0): + """Return a list with image meta information.""" + block_n = -1 + this_tpage = self.this + rc = [] + for block in this_tpage: + block_n += 1 + if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: + continue + img = block.i_image() + img_size = 0 + mask = img.mask() + if mask.m_internal: + has_mask = True + else: + has_mask = False + compr_buff = mupdf.fz_compressed_image_buffer(img) + if compr_buff.m_internal: + img_size = compr_buff.fz_compressed_buffer_size() + compr_buff = None + if hashes: + r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT) + assert mupdf.fz_is_infinite_irect(r) + m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0) + pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m) + digest = mupdf.fz_md5_pixmap2(pix) + digest = bytes(digest) + if img_size == 0: + img_size = img.w() * img.h() * img.n() + cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace)) + block_dict = dict() + block_dict[dictkey_number] = block_n + block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox) + block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform()) + block_dict[dictkey_width] = img.w() + block_dict[dictkey_height] = img.h() + block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs) + block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs) + block_dict[dictkey_xres] = img.xres() + block_dict[dictkey_yres] = img.yres() + block_dict[dictkey_bpc] = img.bpc() + block_dict[dictkey_size] = img_size + if hashes: + block_dict["digest"] = digest + block_dict["has-mask"] = has_mask + rc.append(block_dict) + return rc + + def extractJSON(self, cb=None, sort=False) -> str: + """Return 'extractDICT' converted to JSON format.""" + import base64 + import json + val = self._textpage_dict(raw=False) + + class b64encode(json.JSONEncoder): + def default(self, s): + if type(s) in (bytes, bytearray): + return base64.b64encode(s).decode() + + if cb is not None: + val["width"] = cb.width + val["height"] = cb.height + if sort: + blocks = val["blocks"] + blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) + val["blocks"] = blocks + + val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1) + return val + + def extractRAWDICT(self, cb=None, sort=False) -> dict: + """Return page content as a Python dict of images and text characters.""" + val = self._textpage_dict(raw=True) + if cb is not None: + val["width"] = cb.width + val["height"] = cb.height + if sort: + blocks = val["blocks"] + blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) + val["blocks"] = blocks + return val + + def extractRAWJSON(self, cb=None, sort=False) -> str: + """Return 'extractRAWDICT' converted to JSON format.""" + import base64 + import json + val = self._textpage_dict(raw=True) + + class b64encode(json.JSONEncoder): + def default(self,s): + if type(s) in (bytes, bytearray): + return base64.b64encode(s).decode() + + if cb is not None: + val["width"] = cb.width + val["height"] = cb.height + if sort: + blocks = val["blocks"] + blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0])) + val["blocks"] = blocks + val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1) + return val + + def extractSelection(self, pointa, pointb): + a = JM_point_from_py(pointa) + b = JM_point_from_py(pointb) + found = mupdf.fz_copy_selection(self.this, a, b, 0) + return found + + def extractText(self, sort=False) -> str: + """Return simple, bare text on the page.""" + if not sort: + return self._extractText(0) + blocks = self.extractBLOCKS()[:] + blocks.sort(key=lambda b: (b[3], b[0])) + return "".join([b[4] for b in blocks]) + + def extractTextbox(self, rect): + this_tpage = self.this + assert isinstance(this_tpage, mupdf.FzStextPage) + area = JM_rect_from_py(rect) + found = JM_copy_rectangle(this_tpage, area) + rc = PyUnicode_DecodeRawUnicodeEscape(found) + return rc + + def extractWORDS(self, delimiters=None): + """Return a list with text word information.""" + if 1 or g_use_extra: + return extra.extractWORDS(self.this, delimiters) + buflen = 0 + last_char_rtl = 0 + block_n = -1 + wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox + this_tpage = self.this + tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox) + + lines = None + buff = mupdf.fz_new_buffer(64) + lines = [] + for block in this_tpage: + block_n += 1 + if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT: + continue + line_n = -1 + for line in block: + line_n += 1 + word_n = 0 # word counter per line + mupdf.fz_clear_buffer(buff) # reset word buffer + buflen = 0 # reset char counter + for ch in line: + cbbox = JM_char_bbox(line, ch) + if (not JM_rects_overlap(tp_rect, cbbox) + and not mupdf.fz_is_infinite_rect(tp_rect) + ): + continue + + if buflen == 0 and ch.m_internal.c == 0x200d: + # ZERO WIDTH JOINER cannot start a word + continue + word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters) + this_char_rtl = JM_is_rtl_char(ch.m_internal.c) + if word_delimiter or this_char_rtl != last_char_rtl: + if buflen == 0 and word_delimiter: + continue # skip delimiters at line start + if not mupdf.fz_is_empty_rect(wbbox): + word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n) + mupdf.fz_clear_buffer(buff) + buflen = 0 # reset char counter + if word_delimiter: + continue + # append one unicode character to the word + JM_append_rune(buff, ch.m_internal.c) + last_char_rtl = this_char_rtl + buflen += 1 + # enlarge word bbox + wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch)) + if buflen and not mupdf.fz_is_empty_rect(wbbox): + word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n) + buflen = 0 + return lines + + def extractXHTML(self) -> str: + """Return page content as a XHTML string.""" + return self._extractText(4) + + def extractXML(self) -> str: + """Return page content as a XML string.""" + return self._extractText(3) + + def poolsize(self): + """TextPage current poolsize.""" + tpage = self.this + pool = mupdf.Pool(tpage.m_internal.pool) + size = mupdf.fz_pool_size( pool) + pool.m_internal = None # Ensure that pool's destructor does not free the pool. + return size + + @property + def rect(self): + """Page rectangle.""" + this_tpage = self.this + mediabox = this_tpage.m_internal.mediabox + val = JM_py_from_rect(mediabox) + val = Rect(val) + + return val + + def search(self, needle, hit_max=0, quads=1): + """Locate 'needle' returning rects or quads.""" + val = JM_search_stext_page(self.this, needle) + if not val: + return val + items = len(val) + for i in range(items): # change entries to quads or rects + q = Quad(val[i]) + if quads: + val[i] = q + else: + val[i] = q.rect + if quads: + return val + i = 0 # join overlapping rects on the same line + while i < items - 1: + v1 = val[i] + v2 = val[i + 1] + if v1.y1 != v2.y1 or (v1 & v2).is_empty: + i += 1 + continue # no overlap on same line + val[i] = v1 | v2 # join rectangles + del val[i + 1] # remove v2 + items -= 1 # reduce item count + return val + + extractTEXT = extractText + + +class TextWriter: + + def __init__(self, page_rect, opacity=1, color=None): + """Stores text spans for later output on compatible PDF pages.""" + self.this = mupdf.fz_new_text() + + self.opacity = opacity + self.color = color + self.rect = Rect(page_rect) + self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height) + self.ictm = ~self.ctm + self.last_point = Point() + self.last_point.__doc__ = "Position following last text insertion." + self.text_rect = Rect() + + self.text_rect.__doc__ = "Accumulated area of text spans." + self.used_fonts = set() + self.thisown = True + + @property + def _bbox(self): + val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix())) + val = Rect(val) + return val + + def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0): + """Store 'text' at point 'pos' using 'font' and 'fontsize'.""" + pos = Point(pos) * self.ictm + #log( '{font=}') + if font is None: + font = Font("helv") + if not font.is_writable: + if 0: + log( '{font.this.m_internal.name=}') + log( '{font.this.m_internal.t3matrix=}') + log( '{font.this.m_internal.bbox=}') + log( '{font.this.m_internal.glyph_count=}') + log( '{font.this.m_internal.use_glyph_bbox=}') + log( '{font.this.m_internal.width_count=}') + log( '{font.this.m_internal.width_default=}') + log( '{font.this.m_internal.has_digest=}') + log( 'Unsupported font {font.name=}') + if mupdf_cppyy: + import cppyy + log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}') + raise ValueError("Unsupported font '%s'." % font.name) + if right_to_left: + text = self.clean_rtl(text) + text = "".join(reversed(text)) + right_to_left = 0 + + lang = mupdf.fz_text_language_from_string(language) + p = JM_point_from_py(pos) + trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y) + markup_dir = 0 + wmode = 0 + if small_caps == 0: + trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang) + else: + trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang) + val = JM_py_from_matrix(trm) + + self.last_point = Point(val[-2:]) * self.ctm + self.text_rect = self._bbox * self.ctm + val = self.text_rect, self.last_point + if font.flags["mono"] == 1: + self.used_fonts.add(font) + return val + + def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False): + lheight = fontsize * 1.2 + for c in text: + self.append(pos, c, font=font, fontsize=fontsize, + language=language, small_caps=small_caps) + pos.y += lheight + return self.text_rect, self.last_point + + def clean_rtl(self, text): + """Revert the sequence of Latin text parts. + + Text with right-to-left writing direction (Arabic, Hebrew) often + contains Latin parts, which are written in left-to-right: numbers, names, + etc. For output as PDF text we need *everything* in right-to-left. + E.g. an input like " ABCDE FG HIJ KL " will be + converted to " JIH GF EDCBA LK ". The Arabic + parts remain untouched. + + Args: + text: str + Returns: + Massaged string. + """ + if not text: + return text + # split into words at space boundaries + words = text.split(" ") + idx = [] + for i in range(len(words)): + w = words[i] + # revert character sequence for Latin only words + if not (len(w) < 2 or max([ord(c) for c in w]) > 255): + words[i] = "".join(reversed(w)) + idx.append(i) # stored index of Latin word + + # adjacent Latin words must revert their sequence, too + idx2 = [] # store indices of adjacent Latin words + for i in range(len(idx)): + if idx2 == []: # empty yet? + idx2.append(idx[i]) # store Latin word number + + elif idx[i] > idx2[-1] + 1: # large gap to last? + if len(idx2) > 1: # at least two consecutives? + words[idx2[0] : idx2[-1] + 1] = reversed( + words[idx2[0] : idx2[-1] + 1] + ) # revert their sequence + idx2 = [idx[i]] # re-initialize + + elif idx[i] == idx2[-1] + 1: # new adjacent Latin word + idx2.append(idx[i]) + + text = " ".join(words) + return text + + def fill_textbox( + writer: 'TextWriter', + rect: rect_like, + text: typing.Union[str, list], + pos: point_like = None, + font: typing.Optional[Font] = None, + fontsize: float = 11, + lineheight: OptFloat = None, + align: int = 0, + warn: bool = None, + right_to_left: bool = False, + small_caps: bool = False, + ) -> tuple: + """Fill a rectangle with text. + + Args: + writer: pymupdf.TextWriter object (= "self") + rect: rect-like to receive the text. + text: string or list/tuple of strings. + pos: point-like start position of first word. + font: pymupdf.Font object (default pymupdf.Font('helv')). + fontsize: the fontsize. + lineheight: overwrite the font property + align: (int) 0 = left, 1 = center, 2 = right, 3 = justify + warn: (bool) text overflow action: none, warn, or exception + right_to_left: (bool) indicate right-to-left language. + """ + rect = Rect(rect) + if rect.is_empty: + raise ValueError("fill rect must not empty.") + if type(font) is not Font: + font = Font("helv") + + def textlen(x): + """Return length of a string.""" + return font.text_length( + x, fontsize=fontsize, small_caps=small_caps + ) # abbreviation + + def char_lengths(x): + """Return list of single character lengths for a string.""" + return font.char_lengths(x, fontsize=fontsize, small_caps=small_caps) + + def append_this(pos, text): + ret = writer.append( + pos, text, font=font, fontsize=fontsize, small_caps=small_caps + ) + return ret + + tolerance = fontsize * 0.2 # extra distance to left border + space_len = textlen(" ") + std_width = rect.width - tolerance + std_start = rect.x0 + tolerance + + def norm_words(width, words): + """Cut any word in pieces no longer than 'width'.""" + nwords = [] + word_lengths = [] + for w in words: + wl_lst = char_lengths(w) + wl = sum(wl_lst) + if wl <= width: # nothing to do - copy over + nwords.append(w) + word_lengths.append(wl) + continue + + # word longer than rect width - split it in parts + n = len(wl_lst) + while n > 0: + wl = sum(wl_lst[:n]) + if wl <= width: + nwords.append(w[:n]) + word_lengths.append(wl) + w = w[n:] + wl_lst = wl_lst[n:] + n = len(wl_lst) + else: + n -= 1 + return nwords, word_lengths + + def output_justify(start, line): + """Justified output of a line.""" + # ignore leading / trailing / multiple spaces + words = [w for w in line.split(" ") if w != ""] + nwords = len(words) + if nwords == 0: + return + if nwords == 1: # single word cannot be justified + append_this(start, words[0]) + return + tl = sum([textlen(w) for w in words]) # total word lengths + gaps = nwords - 1 # number of word gaps + gapl = (std_width - tl) / gaps # width of each gap + for w in words: + _, lp = append_this(start, w) # output one word + start.x = lp.x + gapl # next start at word end plus gap + return + + asc = font.ascender + dsc = font.descender + if not lineheight: + if asc - dsc <= 1: + lheight = 1.2 + else: + lheight = asc - dsc + else: + lheight = lineheight + + LINEHEIGHT = fontsize * lheight # effective line height + width = std_width # available horizontal space + + # starting point of text + if pos is not None: + pos = Point(pos) + else: # default is just below rect top-left + pos = rect.tl + (tolerance, fontsize * asc) + if pos not in rect: + raise ValueError("Text must start in rectangle.") + + # calculate displacement factor for alignment + if align == TEXT_ALIGN_CENTER: + factor = 0.5 + elif align == TEXT_ALIGN_RIGHT: + factor = 1.0 + else: + factor = 0 + + # split in lines if just a string was given + if type(text) is str: + textlines = text.splitlines() + else: + textlines = [] + for line in text: + textlines.extend(line.splitlines()) + + max_lines = int((rect.y1 - pos.y) / LINEHEIGHT) + 1 + + new_lines = [] # the final list of textbox lines + no_justify = [] # no justify for these line numbers + for i, line in enumerate(textlines): + if line in ("", " "): + new_lines.append((line, space_len)) + width = rect.width - tolerance + no_justify.append((len(new_lines) - 1)) + continue + if i == 0: + width = rect.x1 - pos.x + else: + width = rect.width - tolerance + + if right_to_left: # reverses Arabic / Hebrew text front to back + line = writer.clean_rtl(line) + tl = textlen(line) + if tl <= width: # line short enough + new_lines.append((line, tl)) + no_justify.append((len(new_lines) - 1)) + continue + + # we need to split the line in fitting parts + words = line.split(" ") # the words in the line + + # cut in parts any words that are longer than rect width + words, word_lengths = norm_words(width, words) + + n = len(words) + while True: + line0 = " ".join(words[:n]) + wl = sum(word_lengths[:n]) + space_len * (n - 1) + if wl <= width: + new_lines.append((line0, wl)) + words = words[n:] + word_lengths = word_lengths[n:] + n = len(words) + line0 = None + else: + n -= 1 + + if len(words) == 0: + break + assert n + + # ------------------------------------------------------------------------- + # List of lines created. Each item is (text, tl), where 'tl' is the PDF + # output length (float) and 'text' is the text. Except for justified text, + # this is output-ready. + # ------------------------------------------------------------------------- + nlines = len(new_lines) + if nlines > max_lines: + msg = "Only fitting %i of %i lines." % (max_lines, nlines) + if warn is None: + pass + elif warn: + message("Warning: " + msg) + else: + raise ValueError(msg) + + start = Point() + no_justify += [len(new_lines) - 1] # no justifying of last line + for i in range(max_lines): + try: + line, tl = new_lines.pop(0) + except IndexError: + if g_exceptions_verbose >= 2: exception_info() + break + + if right_to_left: # Arabic, Hebrew + line = "".join(reversed(line)) + + if i == 0: # may have different start for first line + start = pos + + if align == TEXT_ALIGN_JUSTIFY and i not in no_justify and tl < std_width: + output_justify(start, line) + start.x = std_start + start.y += LINEHEIGHT + continue + + if i > 0 or pos.x == std_start: # left, center, right alignments + start.x += (width - tl) * factor + + append_this(start, line) + start.x = std_start + start.y += LINEHEIGHT + + return new_lines # return non-written lines + + def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0): + """Write the text to a PDF page having the TextWriter's page size. + + Args: + page: a PDF page having same size. + color: override text color. + opacity: override transparency. + overlay: put in foreground or background. + morph: tuple(Point, Matrix), apply a matrix with a fixpoint. + matrix: Matrix to be used instead of 'morph' argument. + render_mode: (int) PDF render mode operator 'Tr'. + """ + CheckParent(page) + if abs(self.rect - page.rect) > 1e-3: + raise ValueError("incompatible page rect") + if morph is not None: + if (type(morph) not in (tuple, list) + or type(morph[0]) is not Point + or type(morph[1]) is not Matrix + ): + raise ValueError("morph must be (Point, Matrix) or None") + if matrix is not None and morph is not None: + raise ValueError("only one of matrix, morph is allowed") + if getattr(opacity, "__float__", None) is None or opacity == -1: + opacity = self.opacity + if color is None: + color = self.color + + if 1: + pdfpage = page._pdf_page() + alpha = 1 + if opacity >= 0 and opacity < 1: + alpha = opacity + ncol = 1 + dev_color = [0, 0, 0, 0] + if color: + ncol, dev_color = JM_color_FromSequence(color) + if ncol == 3: + colorspace = mupdf.fz_device_rgb() + elif ncol == 4: + colorspace = mupdf.fz_device_cmyk() + else: + colorspace = mupdf.fz_device_gray() + + resources = mupdf.pdf_new_dict(pdfpage.doc(), 5) + contents = mupdf.fz_new_buffer(1024) + dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents) + #log( '=== {dev_color!r=}') + mupdf.fz_fill_text( + dev, + self.this, + mupdf.FzMatrix(), + colorspace, + dev_color, + alpha, + mupdf.FzColorParams(mupdf.fz_default_color_params), + ) + mupdf.fz_close_device( dev) + + # copy generated resources into the one of the page + max_nums = JM_merge_resources( pdfpage, resources) + cont_string = JM_EscapeStrFromBuffer( contents) + result = (max_nums, cont_string) + val = result + + max_nums = val[0] + content = val[1] + max_alp, max_font = max_nums + old_cont_lines = content.splitlines() + + optcont = page._get_optional_content(oc) + if optcont is not None: + bdc = "/OC /%s BDC" % optcont + emc = "EMC" + else: + bdc = emc = "" + + new_cont_lines = ["q"] + if bdc: + new_cont_lines.append(bdc) + + cb = page.cropbox_position + if page.rotation in (90, 270): + delta = page.rect.height - page.rect.width + else: + delta = 0 + mb = page.mediabox + if bool(cb) or mb.y0 != 0 or delta != 0: + new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm") + + if morph: + p = morph[0] * self.ictm + delta = Matrix(1, 1).pretranslate(p.x, p.y) + matrix = ~delta * morph[1] * delta + if morph or matrix: + new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm") + + for line in old_cont_lines: + if line.endswith(" cm"): + continue + if line == "BT": + new_cont_lines.append(line) + new_cont_lines.append("%i Tr" % render_mode) + continue + if line.endswith(" gs"): + alp = int(line.split()[0][4:]) + max_alp + line = "/Alp%i gs" % alp + elif line.endswith(" Tf"): + temp = line.split() + fsize = float(temp[1]) + if render_mode != 0: + w = fsize * 0.05 + else: + w = 1 + new_cont_lines.append(_format_g(w) + " w") + font = int(temp[0][2:]) + max_font + line = " ".join(["/F%i" % font] + temp[1:]) + elif line.endswith(" rg"): + new_cont_lines.append(line.replace("rg", "RG")) + elif line.endswith(" g"): + new_cont_lines.append(line.replace(" g", " G")) + elif line.endswith(" k"): + new_cont_lines.append(line.replace(" k", " K")) + new_cont_lines.append(line) + if emc: + new_cont_lines.append(emc) + new_cont_lines.append("Q\n") + content = "\n".join(new_cont_lines).encode("utf-8") + TOOLS._insert_contents(page, content, overlay=overlay) + val = None + for font in self.used_fonts: + repair_mono_font(page, font) + return val + + +class IRect: + """ + IRect() - all zeros + IRect(x0, y0, x1, y1) - 4 coordinates + IRect(top-left, x1, y1) - point and 2 coordinates + IRect(x0, y0, bottom-right) - 2 coordinates and point + IRect(top-left, bottom-right) - 2 points + IRect(sequ) - new from sequence or rect-like + """ + + def __add__(self, p): + return Rect.__add__(self, p).round() + + def __and__(self, x): + return Rect.__and__(self, x).round() + + def __contains__(self, x): + return Rect.__contains__(self, x) + + def __eq__(self, r): + if not hasattr(r, "__len__"): + return False + return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3] + + def __getitem__(self, i): + return (self.x0, self.y0, self.x1, self.y1)[i] + + def __hash__(self): + return hash(tuple(self)) + + def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): + self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1) + + def __len__(self): + return 4 + + def __mul__(self, m): + return Rect.__mul__(self, m).round() + + def __neg__(self): + return IRect(-self.x0, -self.y0, -self.x1, -self.y1) + + def __or__(self, x): + return Rect.__or__(self, x).round() + + def __pos__(self): + return IRect(self) + + def __repr__(self): + return "IRect" + str(tuple(self)) + + def __setitem__(self, i, v): + v = int(v) + if i == 0: self.x0 = v + elif i == 1: self.y0 = v + elif i == 2: self.x1 = v + elif i == 3: self.y1 = v + else: + raise IndexError("index out of range") + return None + + def __sub__(self, p): + return Rect.__sub__(self, p).round() + + def __truediv__(self, m): + return Rect.__truediv__(self, m).round() + + @property + def bottom_left(self): + """Bottom-left corner.""" + return Point(self.x0, self.y1) + + @property + def bottom_right(self): + """Bottom-right corner.""" + return Point(self.x1, self.y1) + + @property + def height(self): + return max(0, self.y1 - self.y0) + + def contains(self, x): + """Check if x is in the rectangle.""" + return self.__contains__(x) + + def get_area(self, *args) -> float: + """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'.""" + return _rect_area(self.width, self.height, args) + + def include_point(self, p): + """Extend rectangle to include point p.""" + rect = self.rect.include_point(p) + return rect.irect + + def include_rect(self, r): + """Extend rectangle to include rectangle r.""" + rect = self.rect.include_rect(r) + return rect.irect + + def intersect(self, r): + """Restrict rectangle to intersection with rectangle r.""" + return Rect.intersect(self, r).round() + + def intersects(self, x): + return Rect.intersects(self, x) + + @property + def is_empty(self): + """True if rectangle area is empty.""" + return self.x0 >= self.x1 or self.y0 >= self.y1 + + @property + def is_infinite(self): + """True if rectangle is infinite.""" + return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT + + @property + def is_valid(self): + """True if rectangle is valid.""" + return self.x0 <= self.x1 and self.y0 <= self.y1 + + def morph(self, p, m): + """Morph with matrix-like m and point-like p. + + Returns a new quad.""" + if self.is_infinite: + return INFINITE_QUAD() + return self.quad.morph(p, m) + + def norm(self): + return math.sqrt(sum([c*c for c in self])) + + def normalize(self): + """Replace rectangle with its valid version.""" + if self.x1 < self.x0: + self.x0, self.x1 = self.x1, self.x0 + if self.y1 < self.y0: + self.y0, self.y1 = self.y1, self.y0 + return self + + @property + def quad(self): + """Return Quad version of rectangle.""" + return Quad(self.tl, self.tr, self.bl, self.br) + + @property + def rect(self): + return Rect(self) + + @property + def top_left(self): + """Top-left corner.""" + return Point(self.x0, self.y0) + + @property + def top_right(self): + """Top-right corner.""" + return Point(self.x1, self.y0) + + def torect(self, r): + """Return matrix that converts to target rect.""" + r = Rect(r) + if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty: + raise ValueError("rectangles must be finite and not empty") + return ( + Matrix(1, 0, 0, 1, -self.x0, -self.y0) + * Matrix(r.width / self.width, r.height / self.height) + * Matrix(1, 0, 0, 1, r.x0, r.y0) + ) + + def transform(self, m): + return Rect.transform(self, m).round() + + @property + def width(self): + return max(0, self.x1 - self.x0) + + br = bottom_right + bl = bottom_left + tl = top_left + tr = top_right + + +# Data +# + +if 1: + _self = sys.modules[__name__] + if 1: + for _name, _value in mupdf.__dict__.items(): + if _name.startswith(('PDF_', 'UCDN_SCRIPT_')): + if _name.startswith('PDF_ENUM_NAME_'): + # Not a simple enum. + pass + else: + #assert not inspect.isroutine(value) + #log(f'importing {_name=} {_value=}.') + setattr(_self, _name, _value) + #log(f'{getattr( self, name, None)=}') + else: + # This is slow due to importing inspect, e.g. 0.019 instead of 0.004. + for _name, _value in inspect.getmembers(mupdf): + if _name.startswith(('PDF_', 'UCDN_SCRIPT_')): + if _name.startswith('PDF_ENUM_NAME_'): + # Not a simple enum. + pass + else: + #assert not inspect.isroutine(value) + #log(f'importing {name}') + setattr(_self, _name, _value) + #log(f'{getattr( self, name, None)=}') + + # This is a macro so not preserved in mupdf C++/Python bindings. + # + PDF_SIGNATURE_DEFAULT_APPEARANCE = (0 + | mupdf.PDF_SIGNATURE_SHOW_LABELS + | mupdf.PDF_SIGNATURE_SHOW_DN + | mupdf.PDF_SIGNATURE_SHOW_DATE + | mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME + | mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME + | mupdf.PDF_SIGNATURE_SHOW_LOGO + ) + + #UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM + #setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM) + + assert mupdf.UCDN_EAST_ASIAN_H == 1 + + # Flake8 incorrectly fails next two lines because we've dynamically added + # items to self. + assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821 + assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821 + del _self, _name, _value + +AnyType = typing.Any + +Base14_fontnames = ( + "Courier", + "Courier-Oblique", + "Courier-Bold", + "Courier-BoldOblique", + "Helvetica", + "Helvetica-Oblique", + "Helvetica-Bold", + "Helvetica-BoldOblique", + "Times-Roman", + "Times-Italic", + "Times-Bold", + "Times-BoldItalic", + "Symbol", + "ZapfDingbats", + ) + +Base14_fontdict = {} +for f in Base14_fontnames: + Base14_fontdict[f.lower()] = f +Base14_fontdict["helv"] = "Helvetica" +Base14_fontdict["heit"] = "Helvetica-Oblique" +Base14_fontdict["hebo"] = "Helvetica-Bold" +Base14_fontdict["hebi"] = "Helvetica-BoldOblique" +Base14_fontdict["cour"] = "Courier" +Base14_fontdict["coit"] = "Courier-Oblique" +Base14_fontdict["cobo"] = "Courier-Bold" +Base14_fontdict["cobi"] = "Courier-BoldOblique" +Base14_fontdict["tiro"] = "Times-Roman" +Base14_fontdict["tibo"] = "Times-Bold" +Base14_fontdict["tiit"] = "Times-Italic" +Base14_fontdict["tibi"] = "Times-BoldItalic" +Base14_fontdict["symb"] = "Symbol" +Base14_fontdict["zadb"] = "ZapfDingbats" + +EPSILON = 1e-5 +FLT_EPSILON = 1e-5 + +# largest 32bit integers surviving C float conversion roundtrips +# used by MuPDF to define infinite rectangles +FZ_MIN_INF_RECT = -0x80000000 +FZ_MAX_INF_RECT = 0x7fffff80 + +JM_annot_id_stem = "fitz" +JM_mupdf_warnings_store = [] +JM_mupdf_show_errors = 1 +JM_mupdf_show_warnings = 0 + + +# ------------------------------------------------------------------------------ +# Image recompression constants +# ------------------------------------------------------------------------------ +FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER +FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME +FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS +FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG +FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K +FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX +FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE +FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC + +# ------------------------------------------------------------------------------ +# Various PDF Optional Content Flags +# ------------------------------------------------------------------------------ +PDF_OC_ON = 0 +PDF_OC_TOGGLE = 1 +PDF_OC_OFF = 2 + +# ------------------------------------------------------------------------------ +# link kinds and link flags +# ------------------------------------------------------------------------------ +LINK_NONE = 0 +LINK_GOTO = 1 +LINK_URI = 2 +LINK_LAUNCH = 3 +LINK_NAMED = 4 +LINK_GOTOR = 5 +LINK_FLAG_L_VALID = 1 +LINK_FLAG_T_VALID = 2 +LINK_FLAG_R_VALID = 4 +LINK_FLAG_B_VALID = 8 +LINK_FLAG_FIT_H = 16 +LINK_FLAG_FIT_V = 32 +LINK_FLAG_R_IS_ZOOM = 64 + +SigFlag_SignaturesExist = 1 +SigFlag_AppendOnly = 2 + +STAMP_Approved = 0 +STAMP_AsIs = 1 +STAMP_Confidential = 2 +STAMP_Departmental = 3 +STAMP_Experimental = 4 +STAMP_Expired = 5 +STAMP_Final = 6 +STAMP_ForComment = 7 +STAMP_ForPublicRelease = 8 +STAMP_NotApproved = 9 +STAMP_NotForPublicRelease = 10 +STAMP_Sold = 11 +STAMP_TopSecret = 12 +STAMP_Draft = 13 + +TEXT_ALIGN_LEFT = 0 +TEXT_ALIGN_CENTER = 1 +TEXT_ALIGN_RIGHT = 2 +TEXT_ALIGN_JUSTIFY = 3 + +TEXT_FONT_SUPERSCRIPT = 1 +TEXT_FONT_ITALIC = 2 +TEXT_FONT_SERIFED = 4 +TEXT_FONT_MONOSPACED = 8 +TEXT_FONT_BOLD = 16 + +TEXT_OUTPUT_TEXT = 0 +TEXT_OUTPUT_HTML = 1 +TEXT_OUTPUT_JSON = 2 +TEXT_OUTPUT_XML = 3 +TEXT_OUTPUT_XHTML = 4 + +TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES +TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE +TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES +TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES +TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE +TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS +TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP +TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE +TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE +TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES +TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS +TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT +TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT + +if mupdf_version_tuple >= (1, 26): + TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK + TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT + TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES + TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE + TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT + TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS + TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS + +# 2025-05-07: Non-standard names preserved for backwards compatibility. +TEXT_STEXT_SEGMENT = TEXT_SEGMENT +TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE + +TEXTFLAGS_WORDS = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_BLOCKS = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_DICT = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_PRESERVE_IMAGES + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT + +TEXTFLAGS_SEARCH = (0 + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_DEHYPHENATE + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_HTML = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_PRESERVE_IMAGES + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_XHTML = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_PRESERVE_IMAGES + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_XML = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_TEXT = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + +# Simple text encoding options +TEXT_ENCODING_LATIN = 0 +TEXT_ENCODING_GREEK = 1 +TEXT_ENCODING_CYRILLIC = 2 + +TOOLS_JM_UNIQUE_ID = 0 + +# colorspace identifiers +CS_RGB = 1 +CS_GRAY = 2 +CS_CMYK = 3 + +# PDF Blend Modes +PDF_BM_Color = "Color" +PDF_BM_ColorBurn = "ColorBurn" +PDF_BM_ColorDodge = "ColorDodge" +PDF_BM_Darken = "Darken" +PDF_BM_Difference = "Difference" +PDF_BM_Exclusion = "Exclusion" +PDF_BM_HardLight = "HardLight" +PDF_BM_Hue = "Hue" +PDF_BM_Lighten = "Lighten" +PDF_BM_Luminosity = "Luminosity" +PDF_BM_Multiply = "Multiply" +PDF_BM_Normal = "Normal" +PDF_BM_Overlay = "Overlay" +PDF_BM_Saturation = "Saturation" +PDF_BM_Screen = "Screen" +PDF_BM_SoftLight = "Softlight" + + +annot_skel = { + "goto1": lambda a, b, c, d, e: f"<>/Rect[{e}]/BS<>/Subtype/Link>>", + "goto2": lambda a, b: f"<>/Rect[{b}]/BS<>/Subtype/Link>>", + "gotor1": lambda a, b, c, d, e, f, g: f"<>>>/Rect[{g}]/BS<>/Subtype/Link>>", + "gotor2": lambda a, b, c: f"<>/Rect[{c}]/BS<>/Subtype/Link>>", + "launch": lambda a, b, c: f"<>>>/Rect[{c}]/BS<>/Subtype/Link>>", + "uri": lambda a, b: f"<>/Rect[{b}]/BS<>/Subtype/Link>>", + "named": lambda a, b: f"<>/Rect[{b}]/BS<>/Subtype/Link>>", + } + +class FileDataError(RuntimeError): + """Raised for documents with file structure issues.""" + pass + +class FileNotFoundError(RuntimeError): + """Raised if file does not exist.""" + pass + +class EmptyFileError(FileDataError): + """Raised when creating documents from zero-length data.""" + pass + +# propagate exception class to C-level code +#_set_FileDataError(FileDataError) + +csRGB = Colorspace(CS_RGB) +csGRAY = Colorspace(CS_GRAY) +csCMYK = Colorspace(CS_CMYK) + +# These don't appear to be visible in classic, but are used +# internally. +# +dictkey_align = "align" +dictkey_asc = "ascender" +dictkey_bidi = "bidi" +dictkey_bbox = "bbox" +dictkey_blocks = "blocks" +dictkey_bpc = "bpc" +dictkey_c = "c" +dictkey_chars = "chars" +dictkey_color = "color" +dictkey_colorspace = "colorspace" +dictkey_content = "content" +dictkey_creationDate = "creationDate" +dictkey_cs_name = "cs-name" +dictkey_da = "da" +dictkey_dashes = "dashes" +dictkey_descr = "description" +dictkey_desc = "descender" +dictkey_dir = "dir" +dictkey_effect = "effect" +dictkey_ext = "ext" +dictkey_filename = "filename" +dictkey_fill = "fill" +dictkey_flags = "flags" +dictkey_char_flags = "char_flags" +dictkey_font = "font" +dictkey_glyph = "glyph" +dictkey_height = "height" +dictkey_id = "id" +dictkey_image = "image" +dictkey_items = "items" +dictkey_length = "length" +dictkey_lines = "lines" +dictkey_matrix = "transform" +dictkey_modDate = "modDate" +dictkey_name = "name" +dictkey_number = "number" +dictkey_origin = "origin" +dictkey_rect = "rect" +dictkey_size = "size" +dictkey_smask = "smask" +dictkey_spans = "spans" +dictkey_stroke = "stroke" +dictkey_style = "style" +dictkey_subject = "subject" +dictkey_text = "text" +dictkey_title = "title" +dictkey_type = "type" +dictkey_ufilename = "ufilename" +dictkey_width = "width" +dictkey_wmode = "wmode" +dictkey_xref = "xref" +dictkey_xres = "xres" +dictkey_yres = "yres" + + +try: + from pymupdf_fonts import fontdescriptors, fontbuffers + + fitz_fontdescriptors = fontdescriptors.copy() + for k in fitz_fontdescriptors.keys(): + fitz_fontdescriptors[k]["loader"] = fontbuffers[k] + del fontdescriptors, fontbuffers +except ImportError: + fitz_fontdescriptors = {} + +symbol_glyphs = ( # Glyph list for the built-in font 'Symbol' + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (32, 0.25), + (33, 0.333), + (34, 0.713), + (35, 0.5), + (36, 0.549), + (37, 0.833), + (38, 0.778), + (39, 0.439), + (40, 0.333), + (41, 0.333), + (42, 0.5), + (43, 0.549), + (44, 0.25), + (45, 0.549), + (46, 0.25), + (47, 0.278), + (48, 0.5), + (49, 0.5), + (50, 0.5), + (51, 0.5), + (52, 0.5), + (53, 0.5), + (54, 0.5), + (55, 0.5), + (56, 0.5), + (57, 0.5), + (58, 0.278), + (59, 0.278), + (60, 0.549), + (61, 0.549), + (62, 0.549), + (63, 0.444), + (64, 0.549), + (65, 0.722), + (66, 0.667), + (67, 0.722), + (68, 0.612), + (69, 0.611), + (70, 0.763), + (71, 0.603), + (72, 0.722), + (73, 0.333), + (74, 0.631), + (75, 0.722), + (76, 0.686), + (77, 0.889), + (78, 0.722), + (79, 0.722), + (80, 0.768), + (81, 0.741), + (82, 0.556), + (83, 0.592), + (84, 0.611), + (85, 0.69), + (86, 0.439), + (87, 0.768), + (88, 0.645), + (89, 0.795), + (90, 0.611), + (91, 0.333), + (92, 0.863), + (93, 0.333), + (94, 0.658), + (95, 0.5), + (96, 0.5), + (97, 0.631), + (98, 0.549), + (99, 0.549), + (100, 0.494), + (101, 0.439), + (102, 0.521), + (103, 0.411), + (104, 0.603), + (105, 0.329), + (106, 0.603), + (107, 0.549), + (108, 0.549), + (109, 0.576), + (110, 0.521), + (111, 0.549), + (112, 0.549), + (113, 0.521), + (114, 0.549), + (115, 0.603), + (116, 0.439), + (117, 0.576), + (118, 0.713), + (119, 0.686), + (120, 0.493), + (121, 0.686), + (122, 0.494), + (123, 0.48), + (124, 0.2), + (125, 0.48), + (126, 0.549), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (183, 0.46), + (160, 0.25), + (161, 0.62), + (162, 0.247), + (163, 0.549), + (164, 0.167), + (165, 0.713), + (166, 0.5), + (167, 0.753), + (168, 0.753), + (169, 0.753), + (170, 0.753), + (171, 1.042), + (172, 0.713), + (173, 0.603), + (174, 0.987), + (175, 0.603), + (176, 0.4), + (177, 0.549), + (178, 0.411), + (179, 0.549), + (180, 0.549), + (181, 0.576), + (182, 0.494), + (183, 0.46), + (184, 0.549), + (185, 0.549), + (186, 0.549), + (187, 0.549), + (188, 1), + (189, 0.603), + (190, 1), + (191, 0.658), + (192, 0.823), + (193, 0.686), + (194, 0.795), + (195, 0.987), + (196, 0.768), + (197, 0.768), + (198, 0.823), + (199, 0.768), + (200, 0.768), + (201, 0.713), + (202, 0.713), + (203, 0.713), + (204, 0.713), + (205, 0.713), + (206, 0.713), + (207, 0.713), + (208, 0.768), + (209, 0.713), + (210, 0.79), + (211, 0.79), + (212, 0.89), + (213, 0.823), + (214, 0.549), + (215, 0.549), + (216, 0.713), + (217, 0.603), + (218, 0.603), + (219, 1.042), + (220, 0.987), + (221, 0.603), + (222, 0.987), + (223, 0.603), + (224, 0.494), + (225, 0.329), + (226, 0.79), + (227, 0.79), + (228, 0.786), + (229, 0.713), + (230, 0.384), + (231, 0.384), + (232, 0.384), + (233, 0.384), + (234, 0.384), + (235, 0.384), + (236, 0.494), + (237, 0.494), + (238, 0.494), + (239, 0.494), + (183, 0.46), + (241, 0.329), + (242, 0.274), + (243, 0.686), + (244, 0.686), + (245, 0.686), + (246, 0.384), + (247, 0.549), + (248, 0.384), + (249, 0.384), + (250, 0.384), + (251, 0.384), + (252, 0.494), + (253, 0.494), + (254, 0.494), + (183, 0.46), + ) + + +zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats' + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (32, 0.278), + (33, 0.974), + (34, 0.961), + (35, 0.974), + (36, 0.98), + (37, 0.719), + (38, 0.789), + (39, 0.79), + (40, 0.791), + (41, 0.69), + (42, 0.96), + (43, 0.939), + (44, 0.549), + (45, 0.855), + (46, 0.911), + (47, 0.933), + (48, 0.911), + (49, 0.945), + (50, 0.974), + (51, 0.755), + (52, 0.846), + (53, 0.762), + (54, 0.761), + (55, 0.571), + (56, 0.677), + (57, 0.763), + (58, 0.76), + (59, 0.759), + (60, 0.754), + (61, 0.494), + (62, 0.552), + (63, 0.537), + (64, 0.577), + (65, 0.692), + (66, 0.786), + (67, 0.788), + (68, 0.788), + (69, 0.79), + (70, 0.793), + (71, 0.794), + (72, 0.816), + (73, 0.823), + (74, 0.789), + (75, 0.841), + (76, 0.823), + (77, 0.833), + (78, 0.816), + (79, 0.831), + (80, 0.923), + (81, 0.744), + (82, 0.723), + (83, 0.749), + (84, 0.79), + (85, 0.792), + (86, 0.695), + (87, 0.776), + (88, 0.768), + (89, 0.792), + (90, 0.759), + (91, 0.707), + (92, 0.708), + (93, 0.682), + (94, 0.701), + (95, 0.826), + (96, 0.815), + (97, 0.789), + (98, 0.789), + (99, 0.707), + (100, 0.687), + (101, 0.696), + (102, 0.689), + (103, 0.786), + (104, 0.787), + (105, 0.713), + (106, 0.791), + (107, 0.785), + (108, 0.791), + (109, 0.873), + (110, 0.761), + (111, 0.762), + (112, 0.762), + (113, 0.759), + (114, 0.759), + (115, 0.892), + (116, 0.892), + (117, 0.788), + (118, 0.784), + (119, 0.438), + (120, 0.138), + (121, 0.277), + (122, 0.415), + (123, 0.392), + (124, 0.392), + (125, 0.668), + (126, 0.668), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (183, 0.788), + (161, 0.732), + (162, 0.544), + (163, 0.544), + (164, 0.91), + (165, 0.667), + (166, 0.76), + (167, 0.76), + (168, 0.776), + (169, 0.595), + (170, 0.694), + (171, 0.626), + (172, 0.788), + (173, 0.788), + (174, 0.788), + (175, 0.788), + (176, 0.788), + (177, 0.788), + (178, 0.788), + (179, 0.788), + (180, 0.788), + (181, 0.788), + (182, 0.788), + (183, 0.788), + (184, 0.788), + (185, 0.788), + (186, 0.788), + (187, 0.788), + (188, 0.788), + (189, 0.788), + (190, 0.788), + (191, 0.788), + (192, 0.788), + (193, 0.788), + (194, 0.788), + (195, 0.788), + (196, 0.788), + (197, 0.788), + (198, 0.788), + (199, 0.788), + (200, 0.788), + (201, 0.788), + (202, 0.788), + (203, 0.788), + (204, 0.788), + (205, 0.788), + (206, 0.788), + (207, 0.788), + (208, 0.788), + (209, 0.788), + (210, 0.788), + (211, 0.788), + (212, 0.894), + (213, 0.838), + (214, 1.016), + (215, 0.458), + (216, 0.748), + (217, 0.924), + (218, 0.748), + (219, 0.918), + (220, 0.927), + (221, 0.928), + (222, 0.928), + (223, 0.834), + (224, 0.873), + (225, 0.828), + (226, 0.924), + (227, 0.924), + (228, 0.917), + (229, 0.93), + (230, 0.931), + (231, 0.463), + (232, 0.883), + (233, 0.836), + (234, 0.836), + (235, 0.867), + (236, 0.867), + (237, 0.696), + (238, 0.696), + (239, 0.874), + (183, 0.788), + (241, 0.874), + (242, 0.76), + (243, 0.946), + (244, 0.771), + (245, 0.865), + (246, 0.771), + (247, 0.888), + (248, 0.967), + (249, 0.888), + (250, 0.831), + (251, 0.873), + (252, 0.927), + (253, 0.97), + (183, 0.788), + (183, 0.788), + ) + + +# Functions +# + +def _rect_area(width, height, args): + # Used by IRect.get_area() and Rect.get_area(). + unit = args[0] if args else 'px' + u = {"px": (1, 1), "in": (1.0, 72.0), "cm": (2.54, 72.0), "mm": (25.4, 72.0)} + f = (u[unit][0] / u[unit][1]) ** 2 + return f * width * height + +def _read_samples( pixmap, offset, n): + # fixme: need to be able to get a sample in one call, as a Python + # bytes or similar. + ret = [] + if not pixmap.samples(): + # mupdf.fz_samples_get() gives a segv if pixmap->samples is null. + return ret + for i in range( n): + ret.append( mupdf.fz_samples_get( pixmap, offset + i)) + return bytes( ret) + + +def _INRANGE(v, low, high): + return low <= v and v <= high + + +def _remove_dest_range(pdf, numbers): + pagecount = mupdf.pdf_count_pages(pdf) + for i in range(pagecount): + n1 = i + if n1 in numbers: + continue + + pageref = mupdf.pdf_lookup_page_obj( pdf, i) + annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots')) + if not annots.m_internal: + continue + len_ = mupdf.pdf_array_len(annots) + for j in range(len_ - 1, -1, -1): + o = mupdf.pdf_array_get( annots, j) + if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')): + continue + action = mupdf.pdf_dict_get( o, PDF_NAME('A')) + dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest')) + if action.m_internal: + if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')): + continue + dest = mupdf.pdf_dict_get( action, PDF_NAME('D')) + pno = -1 + if mupdf.pdf_is_array( dest): + target = mupdf.pdf_array_get( dest, 0) + pno = mupdf.pdf_lookup_page_number( pdf, target) + elif mupdf.pdf_is_string( dest): + location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest)) + pno = location.page + if pno < 0: # page number lookup did not work + continue + n1 = pno + if n1 in numbers: + mupdf.pdf_array_delete( annots, j) + + +def ASSERT_PDF(cond): + assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}' + if not cond.m_internal: + raise Exception(MSG_IS_NO_PDF) + + +def EMPTY_IRECT(): + return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT) + + +def EMPTY_QUAD(): + return EMPTY_RECT().quad + + +def EMPTY_RECT(): + return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT) + + +def ENSURE_OPERATION(pdf): + if not JM_have_operation(pdf): + raise Exception("No journalling operation started") + + +def INFINITE_IRECT(): + return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT) + + +def INFINITE_QUAD(): + return INFINITE_RECT().quad + + +def INFINITE_RECT(): + return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT) + + +def JM_BinFromBuffer(buffer_): + ''' + Turn fz_buffer into a Python bytes object + ''' + assert isinstance(buffer_, mupdf.FzBuffer) + ret = mupdf.fz_buffer_extract_copy(buffer_) + return ret + + +def JM_EscapeStrFromStr(c): + # `c` is typically from SWIG which will have converted a `const char*` from + # C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast< + # Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str` + # with some characters encoded as a \0xdcXY sequence, where `XY` are hex + # digits for an invalid byte in the original `const char*`. + # + # This is actually a reasonable way of representing arbitrary + # strings from C, but we want to mimic what PyMuPDF does. It uses + # `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")` + # which gives a string containing actual unicode characters for any invalid + # bytes. + # + # We mimic this by converting the `str` to a `bytes` with 'surrogateescape' + # to recognise \0xdcXY sequences, then convert the individual bytes into a + # `str` using `chr()`. + # + # Would be good to have a more efficient way to do this. + # + if c is None: + return '' + assert isinstance(c, str), f'{type(c)=}' + b = c.encode('utf8', 'surrogateescape') + ret = '' + for bb in b: + ret += chr(bb) + return ret + + +def JM_BufferFromBytes(stream): + ''' + Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text + io.BytesIO, we convert to binary by encoding as utf8. + ''' + if isinstance(stream, (bytes, bytearray)): + data = stream + elif hasattr(stream, 'getvalue'): + data = stream.getvalue() + if isinstance(data, str): + data = data.encode('utf-8') + if not isinstance(data, (bytes, bytearray)): + raise Exception(f'.getvalue() returned unexpected type: {type(data)}') + else: + return mupdf.FzBuffer() + return mupdf.fz_new_buffer_from_copied_data(data) + + +def JM_FLOAT_ITEM(obj, idx): + if not PySequence_Check(obj): + return None + return float(obj[idx]) + +def JM_INT_ITEM(obj, idx): + if idx < len(obj): + temp = obj[idx] + if isinstance(temp, (int, float)): + return 0, temp + return 1, None + + +def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip): + ''' + Pixmap creation directly using a short-lived displaylist, so we can support + separations. + ''' + SPOTS_NONE = 0 + SPOTS_OVERPRINT_SIM = 1 + SPOTS_FULL = 2 + + FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h. + if FZ_ENABLE_SPOT_RENDERING: + spots = SPOTS_OVERPRINT_SIM + else: + spots = SPOTS_NONE + + seps = None + colorspace = cs + + matrix = JM_matrix_from_py(ctm) + rect = mupdf.fz_bound_page(page) + rclip = JM_rect_from_py(clip) + rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given + rect = mupdf.fz_transform_rect(rect, matrix) + bbox = mupdf.fz_round_rect(rect) + + # Pixmap of the document's /OutputIntents ("output intents") + oi = mupdf.fz_document_output_intent(doc) + # if present and compatible, use it instead of the parameter + if oi.m_internal: + if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs): + colorspace = mupdf.fz_keep_colorspace(oi) + + # check if spots rendering is available and if so use separations + if spots != SPOTS_NONE: + seps = mupdf.fz_page_separations(page) + if seps.m_internal: + n = mupdf.fz_count_separations(seps) + if spots == SPOTS_FULL: + for i in range(n): + mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT) + else: + for i in range(n): + mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE) + elif mupdf.fz_page_uses_overprint(page): + # This page uses overprint, so we need an empty + # sep object to force the overprint simulation on. + seps = mupdf.fz_new_separations(0) + elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace): + # We have an output intent, and it's incompatible + # with the colorspace our device needs. Force the + # overprint simulation on, because this ensures that + # we 'simulate' the output intent too. + seps = mupdf.fz_new_separations(0) + + pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha) + + if alpha: + mupdf.fz_clear_pixmap(pix) + else: + mupdf.fz_clear_pixmap_with_value(pix, 0xFF) + + dev = mupdf.fz_new_draw_device(matrix, pix) + if annots: + mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) + else: + mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) + mupdf.fz_close_device(dev) + return pix + + +def JM_StrAsChar(x): + # fixme: should encode, but swig doesn't pass bytes to C as const char*. + return x + #return x.encode('utf8') + + +def JM_TUPLE(o: typing.Sequence) -> tuple: + return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o)) + + +def JM_TUPLE3(o: typing.Sequence) -> tuple: + return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o)) + + +def JM_UnicodeFromStr(s): + if s is None: + return '' + if isinstance(s, bytes): + s = s.decode('utf8') + assert isinstance(s, str), f'{type(s)=} {s=}' + return s + + +def JM_add_annot_id(annot, stem): + ''' + Add a unique /NM key to an annotation or widget. + Append a number to 'stem' such that the result is a unique name. + ''' + assert isinstance(annot, mupdf.PdfAnnot) + page = _pdf_annot_page(annot) + annot_obj = mupdf.pdf_annot_obj( annot) + names = JM_get_annot_id_list(page) + i = 0 + while 1: + stem_id = f'{JM_annot_id_stem}-{stem}{i}' + if stem_id not in names: + break + i += 1 + response = JM_StrAsChar(stem_id) + name = mupdf.pdf_new_string( response, len(response)) + mupdf.pdf_dict_puts(annot_obj, "NM", name) + page.doc().m_internal.resynth_required = 0 + + +def JM_add_oc_object(pdf, ref, xref): + ''' + Add OC object reference to a dictionary + ''' + indobj = mupdf.pdf_new_indirect(pdf, xref, 0) + if not mupdf.pdf_is_dict(indobj): + RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError) + type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type')) + if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0 + or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0 + ): + mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj) + else: + RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError) + + +def JM_annot_border(annot_obj): + dash_py = list() + style = None + width = -1 + clouds = -1 + obj = None + + obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border')) + if mupdf.pdf_is_array( obj): + width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2)) + if mupdf.pdf_array_len( obj) == 4: + dash = mupdf.pdf_array_get( obj, 3) + for i in range( mupdf.pdf_array_len( dash)): + val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i)) + dash_py.append( val) + + bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS')) + if bs_o.m_internal: + width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W'))) + style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S'))) + if style == '': + style = None + obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D')) + if obj.m_internal: + for i in range( mupdf.pdf_array_len( obj)): + val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i)) + dash_py.append( val) + + obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE')) + if obj.m_internal: + clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I'))) + + res = dict() + res[ dictkey_width] = width + res[ dictkey_dashes] = tuple( dash_py) + res[ dictkey_style] = style + res[ 'clouds'] = clouds + return res + + +def JM_annot_colors(annot_obj): + res = dict() + bc = list() # stroke colors + fc =list() # fill colors + o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C) + if mupdf.pdf_is_array(o): + n = mupdf.pdf_array_len(o) + for i in range(n): + col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i)) + bc.append(col) + res[dictkey_stroke] = bc + + o = mupdf.pdf_dict_gets(annot_obj, "IC") + if mupdf.pdf_is_array(o): + n = mupdf.pdf_array_len(o) + for i in range(n): + col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i)) + fc.append(col) + + res[dictkey_fill] = fc + return res + + +def JM_annot_set_border( border, doc, annot_obj): + assert isinstance(border, dict) + obj = None + dashlen = 0 + nwidth = border.get( dictkey_width) # new width + ndashes = border.get( dictkey_dashes) # new dashes + nstyle = border.get( dictkey_style) # new style + nclouds = border.get( 'clouds', -1) # new clouds value + + # get old border properties + oborder = JM_annot_border( annot_obj) + + # delete border-related entries + mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS')) + mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE')) + mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border')) + + # populate border items: keep old values for any omitted new ones + if nwidth < 0: + nwidth = oborder.get( dictkey_width) # no new width: keep current + if ndashes is None: + ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old + if nstyle is None: + nstyle = oborder.get( dictkey_style) # no new style: keep old + if nclouds < 0: + nclouds = oborder.get( "clouds", -1) # no new clouds: keep old + + if isinstance( ndashes, tuple) and len( ndashes) > 0: + dashlen = len( ndashes) + darr = mupdf.pdf_new_array( doc, dashlen) + for d in ndashes: + mupdf.pdf_array_push_int( darr, d) + mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D')) + + mupdf.pdf_dict_putl( + annot_obj, + mupdf.pdf_new_real( nwidth), + PDF_NAME('BS'), + PDF_NAME('W'), + ) + + if dashlen == 0: + obj = JM_get_border_style( nstyle) + else: + obj = PDF_NAME('D') + mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S')) + + if nclouds > 0: + mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2) + obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE')) + mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C')) + mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds) + + +def make_escape(ch): + if ch == 92: + return "\\u005c" + elif 32 <= ch <= 127 or ch == 10: + return chr(ch) + elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate + return "\\ufffd" + elif ch <= 0xffff: + return "\\u%04x" % ch + else: + return "\\U%08x" % ch + + +def JM_append_rune(buff, ch): + """ + APPEND non-ascii runes in unicode escape format to fz_buffer. + """ + mupdf.fz_append_string(buff, make_escape(ch)) + + +def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n): + ''' + Functions for wordlist output + ''' + s = JM_EscapeStrFromBuffer(buff) + litem = ( + wbbox.x0, + wbbox.y0, + wbbox.x1, + wbbox.y1, + s, + block_n, + line_n, + word_n, + ) + lines.append(litem) + return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter + + +def JM_add_layer_config( pdf, name, creator, ON): + ''' + Add OC configuration to the PDF catalog + ''' + ocp = JM_ensure_ocproperties( pdf) + configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')) + if not mupdf.pdf_is_array( configs): + configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1) + D = mupdf.pdf_new_dict( pdf, 5) + mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name) + if creator is not None: + mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator) + mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF')) + onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5) + if not ON: + pass + else: + ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs')) + n = len(ON) + for i in range(n): + xref = 0 + e, xref = JM_INT_ITEM(ON, i) + if e == 1: + continue + ind = mupdf.pdf_new_indirect( pdf, xref, 0) + if mupdf.pdf_array_contains( ocgs, ind): + mupdf.pdf_array_push( onarray, ind) + mupdf.pdf_array_push( configs, D) + + +def JM_char_bbox(line, ch): + ''' + return rect of char quad + ''' + q = JM_char_quad(line, ch) + r = mupdf.fz_rect_from_quad(q) + if not line.m_internal.wmode: + return r + if r.y1 < r.y0 + ch.m_internal.size: + r.y0 = r.y1 - ch.m_internal.size + return r + + +def JM_char_font_flags(font, line, ch): + flags = 0 + if line and ch: + flags += detect_super_script(line, ch) + flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC + flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED + flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED + flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD + return flags + + +def JM_char_quad(line, ch): + ''' + re-compute char quad if ascender/descender values make no sense + ''' + if 1 and g_use_extra: + # This reduces time taken to extract text from PyMuPDF.pdf from 20s to + # 15s. + return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal)) + + assert isinstance(line, mupdf.FzStextLine) + assert isinstance(ch, mupdf.FzStextChar) + if _globals.skip_quad_corrections: # no special handling + return ch.quad + if line.m_internal.wmode: # never touch vertical write mode + return ch.quad + font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)) + asc = JM_font_ascender(font) + dsc = JM_font_descender(font) + fsize = ch.m_internal.size + asc_dsc = asc - dsc + FLT_EPSILON + if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem + return mupdf.FzQuad(ch.m_internal.quad) + + # Re-compute quad with adjusted ascender / descender values: + # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, + # re-rotate and move back to ch->origin location. + fsize = ch.m_internal.size + bbox = mupdf.fz_font_bbox(font) + fwidth = bbox.x1 - bbox.x0 + if asc < 1e-3: # probably Tesseract glyphless font + dsc = -0.1 + asc = 0.9 + asc_dsc = 1.0 + + if _globals.small_glyph_heights or asc_dsc < 1: + dsc = dsc / asc_dsc + asc = asc / asc_dsc + asc_dsc = asc - dsc + asc = asc * fsize / asc_dsc + dsc = dsc * fsize / asc_dsc + + # Re-compute quad with the adjusted ascender / descender values: + # Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, + # re-rotate and move back to ch->origin location. + c = line.m_internal.dir.x # cosine + s = line.m_internal.dir.y # sine + trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate + trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate + if (c == -1): # left-right flip + trm1.d = 1 + trm2.d = 1 + xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y) + xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y) + + quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0) + quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners + + # adjust vertical coordinates + if c == 1 and quad.ul.y > 0: # up-down flip + quad.ul.y = asc + quad.ur.y = asc + quad.ll.y = dsc + quad.lr.y = dsc + else: + quad.ul.y = -asc + quad.ur.y = -asc + quad.ll.y = -dsc + quad.lr.y = -dsc + + # adjust horizontal coordinates that are too crazy: + # (1) left x must be >= 0 + # (2) if bbox width is 0, lookup char advance in font. + if quad.ll.x < 0: + quad.ll.x = 0 + quad.ul.x = 0 + + cwidth = quad.lr.x - quad.ll.x + if cwidth < FLT_EPSILON: + glyph = mupdf.fz_encode_character( font, ch.m_internal.c) + if glyph: + fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode) + quad.lr.x = quad.ll.x + fwidth * fsize + quad.ur.x = quad.lr.x + + quad = mupdf.fz_transform_quad(quad, trm2) # rotate back + quad = mupdf.fz_transform_quad(quad, xlate2) # translate back + return quad + + +def JM_choice_options(annot): + ''' + return list of choices for list or combo boxes + ''' + annot_obj = mupdf.pdf_annot_obj( annot.this) + + opts = mupdf.pdf_choice_widget_options2( annot, 0) + n = len( opts) + if n == 0: + return # wrong widget type + + optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt')) + liste = [] + + for i in range( n): + m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i)) + if m == 2: + val = ( + mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)), + mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)), + ) + liste.append( val) + else: + val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i)) + liste.append( val) + return liste + + +def JM_clear_pixmap_rect_with_value(dest, value, b): + ''' + Clear a pixmap rectangle - my version also supports non-alpha pixmaps + ''' + b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest)) + w = b.x1 - b.x0 + y = b.y1 - b.y0 + if w <= 0 or y <= 0: + return 0 + + destspan = dest.stride() + destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x()) + + # CMYK needs special handling (and potentially any other subtractive colorspaces) + if mupdf.fz_colorspace_n(dest.colorspace()) == 4: + value = 255 - value + while 1: + s = destp + for x in range(0, w): + mupdf.fz_samples_set(dest, s, 0) + s += 1 + mupdf.fz_samples_set(dest, s, 0) + s += 1 + mupdf.fz_samples_set(dest, s, 0) + s += 1 + mupdf.fz_samples_set(dest, s, value) + s += 1 + if dest.alpha(): + mupdf.fz_samples_set(dest, s, 255) + s += 1 + destp += destspan + if y == 0: + break + y -= 1 + return 1 + + while 1: + s = destp + for x in range(w): + for k in range(dest.n()-1): + mupdf.fz_samples_set(dest, s, value) + s += 1 + if dest.alpha(): + mupdf.fz_samples_set(dest, s, 255) + s += 1 + else: + mupdf.fz_samples_set(dest, s, value) + s += 1 + destp += destspan + if y == 0: + break + y -= 1 + return 1 + + +def JM_color_FromSequence(color): + + if isinstance(color, (int, float)): # maybe just a single float + color = [color] + + if not isinstance( color, (list, tuple)): + return -1, [] + + if len(color) not in (0, 1, 3, 4): + return -1, [] + + ret = color[:] + for i in range(len(ret)): + if ret[i] < 0 or ret[i] > 1: + ret[i] = 1 + return len(ret), ret + + +def JM_color_count( pm, clip): + if 1 or g_use_extra: + return extra.ll_JM_color_count(pm.m_internal, clip) + + rc = dict() + cnt = 0 + irect = mupdf.fz_pixmap_bbox( pm) + irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip))) + stride = pm.stride() + width = irect.x1 - irect.x0 + height = irect.y1 - irect.y0 + n = pm.n() + substride = width * n + s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n + oldpix = _read_samples( pm, s, n) + cnt = 0 + if mupdf.fz_is_empty_irect(irect): + return rc + for i in range( height): + for j in range( 0, substride, n): + newpix = _read_samples( pm, s + j, n) + if newpix != oldpix: + pixel = oldpix + c = rc.get( pixel, None) + if c is not None: + cnt += c + rc[ pixel] = cnt + cnt = 1 + oldpix = newpix + else: + cnt += 1 + s += stride + pixel = oldpix + c = rc.get( pixel) + if c is not None: + cnt += c + rc[ pixel] = cnt + return rc + + +def JM_compress_buffer(inbuffer): + ''' + compress char* into a new buffer + ''' + data, compressed_length = mupdf.fz_new_deflated_data_from_buffer( + inbuffer, + mupdf.FZ_DEFLATE_BEST, + ) + #log( '{=data compressed_length}') + if not data or compressed_length == 0: + return None + buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length)) + mupdf.fz_resize_buffer(buf, compressed_length) + return buf + + +def JM_copy_rectangle(page, area): + need_new_line = 0 + buffer = io.StringIO() + for block in page: + if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT: + continue + for line in block: + line_had_text = 0 + for ch in line: + r = JM_char_bbox(line, ch) + if JM_rects_overlap(area, r): + line_had_text = 1 + if need_new_line: + buffer.write("\n") + need_new_line = 0 + buffer.write(make_escape(ch.m_internal.c)) + if line_had_text: + need_new_line = 1 + + s = buffer.getvalue() # take over the data + return s + + +def JM_convert_to_pdf(doc, fp, tp, rotate): + ''' + Convert any MuPDF document to a PDF + Returns bytes object containing the PDF, created via 'write' function. + ''' + pdfout = mupdf.PdfDocument() + incr = 1 + s = fp + e = tp + if fp > tp: + incr = -1 # count backwards + s = tp # adjust ... + e = fp # ... range + rot = JM_norm_rotation(rotate) + i = fp + while 1: # interpret & write document pages as PDF pages + if not _INRANGE(i, s, e): + break + page = mupdf.fz_load_page(doc, i) + mediabox = mupdf.fz_bound_page(page) + dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox) + mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie()) + mupdf.fz_close_device(dev) + dev = None + page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents) + mupdf.pdf_insert_page(pdfout, -1, page_obj) + i += incr + # PDF created - now write it to Python bytearray + # prepare write options structure + opts = mupdf.PdfWriteOptions() + opts.do_garbage = 4 + opts.do_compress = 1 + opts.do_compress_images = 1 + opts.do_compress_fonts = 1 + opts.do_sanitize = 1 + opts.do_incremental = 0 + opts.do_ascii = 0 + opts.do_decompress = 0 + opts.do_linear = 0 + opts.do_clean = 1 + opts.do_pretty = 0 + + res = mupdf.fz_new_buffer(8192) + out = mupdf.FzOutput(res) + mupdf.pdf_write_document(pdfout, out, opts) + out.fz_close_output() + c = mupdf.fz_buffer_extract_copy(res) + assert isinstance(c, bytes) + return c + + +# Copied from MuPDF v1.14 +# Create widget +def JM_create_widget(doc, page, type, fieldname): + old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags")) + #log( '*** JM_create_widget()') + #log( f'{mupdf.pdf_create_annot_raw=}') + #log( f'{page=}') + #log( f'{mupdf.PDF_ANNOT_WIDGET=}') + annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET) + annot_obj = mupdf.pdf_annot_obj(annot) + try: + JM_set_field_type(doc, annot_obj, type) + mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname) + + if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: + sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly) + mupdf.pdf_dict_putl( + mupdf.pdf_trailer(doc), + mupdf.pdf_new_int(sigflags), + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + PDF_NAME('SigFlags'), + ) + # pdf_create_annot will have linked the new widget into the page's + # annot array. We also need it linked into the document's form + form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields") + if not form.m_internal: + form = mupdf.pdf_new_array(doc, 1) + mupdf.pdf_dict_putl( + mupdf.pdf_trailer(doc), + form, + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + PDF_NAME('Fields'), + ) + mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last + except Exception: + if g_exceptions_verbose: exception_info() + mupdf.pdf_delete_annot(page, annot) + + if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: + mupdf.pdf_dict_putl( + mupdf.pdf_trailer(doc), + mupdf.pdf_new_int(old_sigflags), + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + PDF_NAME('SigFlags'), + ) + raise + return annot + + +def JM_cropbox(page_obj): + ''' + return a PDF page's CropBox + ''' + if g_use_extra: + return extra.JM_cropbox(page_obj) + + mediabox = JM_mediabox(page_obj) + cropbox = mupdf.pdf_to_rect( + mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox')) + ) + if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox): + cropbox = mediabox + y0 = mediabox.y1 - cropbox.y1 + y1 = mediabox.y1 - cropbox.y0 + cropbox.y0 = y0 + cropbox.y1 = y1 + return cropbox + + +def JM_cropbox_size(page_obj): + rect = JM_cropbox(page_obj) + w = abs(rect.x1 - rect.x0) + h = abs(rect.y1 - rect.y0) + size = mupdf.fz_make_point(w, h) + return size + + +def JM_derotate_page_matrix(page): + ''' + just the inverse of rotation + ''' + mp = JM_rotate_page_matrix(page) + return mupdf.fz_invert_matrix(mp) + + +def JM_embed_file( + pdf, + buf, + filename, + ufilename, + desc, + compress, + ): + ''' + embed a new file in a PDF (not only /EmbeddedFiles entries) + ''' + len_ = 0 + val = mupdf.pdf_new_dict(pdf, 6) + mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4) + ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4) + mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename) + mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename) + mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc) + mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec')) + bs = b' ' + f = mupdf.pdf_add_stream( + pdf, + #mupdf.fz_fz_new_buffer_from_copied_data(bs), + mupdf.fz_new_buffer_from_copied_data(bs), + mupdf.PdfObj(), + 0, + ) + mupdf.pdf_dict_put(ef, PDF_NAME('F'), f) + JM_update_stream(pdf, f, buf, compress) + len_, _ = mupdf.fz_buffer_storage(buf) + mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_) + mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_) + params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4) + mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_) + return val + + +def JM_embedded_clean(pdf): + ''' + perform some cleaning if we have /EmbeddedFiles: + (1) remove any /Limits if /Names exists + (2) remove any empty /Collection + (3) set /PageMode/UseAttachments + ''' + root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')) + + # remove any empty /Collection entry + coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection')) + if coll.m_internal and mupdf.pdf_dict_len(coll) == 0: + mupdf.pdf_dict_del(root, PDF_NAME('Collection')) + + efiles = mupdf.pdf_dict_getl( + root, + PDF_NAME('Names'), + PDF_NAME('EmbeddedFiles'), + PDF_NAME('Names'), + ) + if efiles.m_internal: + mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments") + + +def JM_EscapeStrFromBuffer(buff): + if not buff.m_internal: + return '' + s = mupdf.fz_buffer_extract_copy(buff) + val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace') + return val + + +def JM_ensure_identity(pdf): + ''' + Store ID in PDF trailer + ''' + id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID')) + if not id_.m_internal: + rnd0 = mupdf.fz_memrnd2(16) + # Need to convert raw bytes into a str to send to + # mupdf.pdf_new_string(). chr() seems to work for this. + rnd = '' + for i in rnd0: + rnd += chr(i) + id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2) + mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd))) + mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd))) + +def JM_ensure_ocproperties(pdf): + ''' + Ensure OCProperties, return /OCProperties key + ''' + ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties')) + if ocp.m_internal: + return ocp + root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')) + ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2) + mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0) + D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5) + mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0) + mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0) + mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0) + mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0) + return ocp + + +def JM_expand_fname(name): + ''' + Make /DA string of annotation + ''' + if not name: return "Helv" + if name.startswith("Co"): return "Cour" + if name.startswith("co"): return "Cour" + if name.startswith("Ti"): return "TiRo" + if name.startswith("ti"): return "TiRo" + if name.startswith("Sy"): return "Symb" + if name.startswith("sy"): return "Symb" + if name.startswith("Za"): return "ZaDb" + if name.startswith("za"): return "ZaDb" + return "Helv" + + +def JM_field_type_text(wtype): + ''' + String from widget type + ''' + if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON: + return "Button" + if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX: + return "CheckBox" + if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: + return "RadioButton" + if wtype == mupdf.PDF_WIDGET_TYPE_TEXT: + return "Text" + if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX: + return "ListBox" + if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX: + return "ComboBox" + if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE: + return "Signature" + return "unknown" + + +def JM_fill_pixmap_rect_with_color(dest, col, b): + assert isinstance(dest, mupdf.FzPixmap) + # fill a rect with a color tuple + b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest)) + w = b.x1 - b.x0 + y = b.y1 - b.y0 + if w <= 0 or y <= 0: + return 0 + destspan = dest.stride() + destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x()) + while 1: + s = destp + for x in range(w): + for i in range( dest.n()): + mupdf.fz_samples_set(dest, s, col[i]) + s += 1 + destp += destspan + y -= 1 + if y == 0: + break + return 1 + + +def JM_find_annot_irt(annot): + ''' + Return the first annotation whose /IRT key ("In Response To") points to + annot. Used to remove the response chain of a given annotation. + ''' + assert isinstance(annot, mupdf.PdfAnnot) + irt_annot = None # returning this + annot_obj = mupdf.pdf_annot_obj(annot) + found = 0 + # loop thru MuPDF's internal annots array + page = _pdf_annot_page(annot) + irt_annot = mupdf.pdf_first_annot(page) + while 1: + assert isinstance(irt_annot, mupdf.PdfAnnot) + if not irt_annot.m_internal: + break + irt_annot_obj = mupdf.pdf_annot_obj(irt_annot) + o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT') + if o.m_internal: + if not mupdf.pdf_objcmp(o, annot_obj): + found = 1 + break + irt_annot = mupdf.pdf_next_annot(irt_annot) + if found: + return irt_annot + + +def JM_font_ascender(font): + ''' + need own versions of ascender / descender + ''' + assert isinstance(font, mupdf.FzFont) + if _globals.skip_quad_corrections: + return 0.8 + return mupdf.fz_font_ascender(font) + + +def JM_font_descender(font): + ''' + need own versions of ascender / descender + ''' + assert isinstance(font, mupdf.FzFont) + if _globals.skip_quad_corrections: + return -0.2 + ret = mupdf.fz_font_descender(font) + return ret + + +def JM_is_word_delimiter(ch, delimiters): + """Check if ch is an extra word delimiting character. + """ + if (0 + or ch <= 32 + or ch == 160 + or 0x202a <= ch <= 0x202e + ): + # covers any whitespace plus unicodes that switch between + # right-to-left and left-to-right languages + return True + if not delimiters: # no extra delimiters provided + return False + char = chr(ch) + for d in delimiters: + if d == char: + return True + return False + + +def JM_is_rtl_char(ch): + if ch < 0x590 or ch > 0x900: + return False + return True + + +def JM_font_name(font): + assert isinstance(font, mupdf.FzFont) + name = mupdf.fz_font_name(font) + s = name.find('+') + if _globals.subset_fontnames or s == -1 or s != 6: + return name + return name[s + 1:] + + +def JM_gather_fonts(pdf, dict_, fontlist, stream_xref): + rc = 1 + n = mupdf.pdf_dict_len(dict_) + for i in range(n): + + refname = mupdf.pdf_dict_get_key(dict_, i) + fontdict = mupdf.pdf_dict_get_val(dict_, i) + if not mupdf.pdf_is_dict(fontdict): + mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)") + continue + + subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype) + basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont) + if not basefont.m_internal or mupdf.pdf_is_null(basefont): + name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name) + else: + name = basefont + encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding) + if mupdf.pdf_is_dict(encoding): + encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding) + xref = mupdf.pdf_to_num(fontdict) + ext = "n/a" + if xref: + ext = JM_get_fontextension(pdf, xref) + entry = ( + xref, + ext, + mupdf.pdf_to_name(subtype), + JM_EscapeStrFromStr(mupdf.pdf_to_name(name)), + mupdf.pdf_to_name(refname), + mupdf.pdf_to_name(encoding), + stream_xref, + ) + fontlist.append(entry) + return rc + + +def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int): + ''' + Store info of a /Form xobject in Python list + ''' + assert isinstance(doc, mupdf.PdfDocument) + rc = 1 + n = mupdf.pdf_dict_len(dict_) + for i in range(n): + refname = mupdf.pdf_dict_get_key( dict_, i) + imagedict = mupdf.pdf_dict_get_val(dict_, i) + if not mupdf.pdf_is_dict(imagedict): + mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)") + continue + + type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype')) + if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')): + continue + + o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox')) + m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix')) + if m.m_internal: + mat = mupdf.pdf_to_matrix(m) + else: + mat = mupdf.FzMatrix() + if o.m_internal: + bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat) + else: + bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) + xref = mupdf.pdf_to_num(imagedict) + + entry = ( + xref, + mupdf.pdf_to_name( refname), + stream_xref, + JM_py_from_rect(bbox), + ) + imagelist.append(entry) + return rc + + +def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int): + ''' + Store info of an image in Python list + ''' + rc = 1 + n = mupdf.pdf_dict_len( dict_) + for i in range(n): + refname = mupdf.pdf_dict_get_key(dict_, i) + imagedict = mupdf.pdf_dict_get_val(dict_, i) + if not mupdf.pdf_is_dict(imagedict): + mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)") + continue + + type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype')) + if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')): + continue + + xref = mupdf.pdf_to_num(imagedict) + gen = 0 + smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask')) + if smask.m_internal: + gen = mupdf.pdf_to_num(smask) + + filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F')) + if mupdf.pdf_is_array(filter_): + filter_ = mupdf.pdf_array_get(filter_, 0) + + altcs = mupdf.PdfObj(0) + cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS')) + if mupdf.pdf_is_array(cs): + cses = cs + cs = mupdf.pdf_array_get(cses, 0) + if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN')) + or mupdf.pdf_name_eq(cs, PDF_NAME('Separation')) + ): + altcs = mupdf.pdf_array_get(cses, 2) + if mupdf.pdf_is_array(altcs): + altcs = mupdf.pdf_array_get(altcs, 0) + width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W')) + height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H')) + bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC')) + + entry = ( + xref, + gen, + mupdf.pdf_to_int(width), + mupdf.pdf_to_int(height), + mupdf.pdf_to_int(bpc), + JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)), + JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)), + JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)), + JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)), + stream_xref, + ) + imagelist.append(entry) + return rc + + +def JM_get_annot_by_xref(page, xref): + ''' + retrieve annot by its xref + ''' + assert isinstance(page, mupdf.PdfPage) + found = 0 + # loop thru MuPDF's internal annots array + annot = mupdf.pdf_first_annot(page) + while 1: + if not annot.m_internal: + break + if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)): + found = 1 + break + annot = mupdf.pdf_next_annot( annot) + if not found: + raise Exception("xref %d is not an annot of this page" % xref) + return annot + + +def JM_get_annot_by_name(page, name): + ''' + retrieve annot by name (/NM key) + ''' + assert isinstance(page, mupdf.PdfPage) + if not name: + return + found = 0 + # loop thru MuPDF's internal annots and widget arrays + annot = mupdf.pdf_first_annot(page) + while 1: + if not annot.m_internal: + break + + response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")) + if name == response: + found = 1 + break + annot = mupdf.pdf_next_annot(annot) + if not found: + raise Exception("'%s' is not an annot of this page" % name) + return annot + + +def JM_get_annot_id_list(page): + names = [] + annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots) + if not annots.m_internal: + return names + for i in range( mupdf.pdf_array_len(annots)): + annot_obj = mupdf.pdf_array_get(annots, i) + name = mupdf.pdf_dict_gets(annot_obj, "NM") + if name.m_internal: + names.append( + mupdf.pdf_to_text_string(name) + ) + return names + +def JM_get_annot_xref_list( page_obj): + ''' + return the xrefs and /NM ids of a page's annots, links and fields + ''' + if g_use_extra: + names = extra.JM_get_annot_xref_list( page_obj) + return names + + names = [] + annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots')) + n = mupdf.pdf_array_len( annots) + for i in range( n): + annot_obj = mupdf.pdf_array_get( annots, i) + xref = mupdf.pdf_to_num( annot_obj) + subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype')) + if not subtype.m_internal: + continue # subtype is required + type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype)) + if type_ == mupdf.PDF_ANNOT_UNKNOWN: + continue # only accept valid annot types + id_ = mupdf.pdf_dict_gets( annot_obj, "NM") + names.append( (xref, type_, mupdf.pdf_to_text_string( id_))) + return names + + +def JM_get_annot_xref_list2(page): + page = page._pdf_page(required=False) + if not page.m_internal: + return list() + return JM_get_annot_xref_list( page.obj()) + + +def JM_get_border_style(style): + ''' + return pdf_obj "border style" from Python str + ''' + val = mupdf.PDF_ENUM_NAME_S + if style is None: + return val + s = style + if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B + elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D + elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I + elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U + elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S + return val + + +def JM_get_font( + fontname, + fontfile, + fontbuffer, + script, + lang, + ordering, + is_bold, + is_italic, + is_serif, + embed, + ): + ''' + return a fz_font from a number of parameters + ''' + def fertig(font): + if not font.m_internal: + raise RuntimeError(MSG_FONT_FAILED) + # if font allows this, set embedding + if not font.m_internal.flags.never_embed: + mupdf.fz_set_font_embedding(font, embed) + return font + + index = 0 + font = None + if fontfile: + #goto have_file; + font = mupdf.fz_new_font_from_file( None, fontfile, index, 0) + return fertig(font) + + if fontbuffer: + #goto have_buffer; + res = JM_BufferFromBytes(fontbuffer) + font = mupdf.fz_new_font_from_buffer( None, res, index, 0) + return fertig(font) + + if ordering > -1: + # goto have_cjk; + font = mupdf.fz_new_cjk_font(ordering) + return fertig(font) + + if fontname: + # goto have_base14; + # Base-14 or a MuPDF builtin font + font = mupdf.fz_new_base14_font(fontname) + if font.m_internal: + return fertig(font) + font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic) + return fertig(font) + + # Check for NOTO font + #have_noto:; + data, size, index = mupdf.fz_lookup_noto_font( script, lang) + font = None + if data: + font = mupdf.fz_new_font_from_memory( None, data, size, index, 0) + if font.m_internal: + return fertig(font) + font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic) + return fertig(font) + + +def JM_get_fontbuffer(doc, xref): + ''' + Return the contents of a font file, identified by xref + ''' + if xref < 1: + return + o = mupdf.pdf_load_object(doc, xref) + desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts')) + if desft.m_internal: + obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0)) + obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor')) + else: + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor')) + + if not obj.m_internal: + message(f"invalid font - FontDescriptor missing") + return + + o = obj + + stream = None + + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile')) + if obj.m_internal: + stream = obj # ext = "pfa" + + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2')) + if obj.m_internal: + stream = obj # ext = "ttf" + + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3')) + if obj.m_internal: + stream = obj + + obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) + if obj.m_internal and not mupdf.pdf_is_name(obj): + message("invalid font descriptor subtype") + return + + if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')): + pass # Prev code did: ext = "cff", but this has no effect. + elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')): + pass # Prev code did: ext = "cid", but this has no effect. + elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')): + pass # Prev code did: ext = "otf", but this has no effect. */ + else: + message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}') + + if not stream: + message('warning: unhandled font type') + return + + return mupdf.pdf_load_stream(stream) + + +def JM_get_resource_properties(ref): + ''' + Return the items of Resources/Properties (used for Marked Content) + Argument may be e.g. a page object or a Form XObject + ''' + properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties')) + if not properties.m_internal: + return () + else: + n = mupdf.pdf_dict_len(properties) + if n < 1: + return () + rc = [] + for i in range(n): + key = mupdf.pdf_dict_get_key(properties, i) + val = mupdf.pdf_dict_get_val(properties, i) + c = mupdf.pdf_to_name(key) + xref = mupdf.pdf_to_num(val) + rc.append((c, xref)) + return rc + + +def JM_get_widget_by_xref( page, xref): + ''' + retrieve widget by its xref + ''' + found = False + annot = mupdf.pdf_first_widget( page) + while annot.m_internal: + annot_obj = mupdf.pdf_annot_obj( annot) + if xref == mupdf.pdf_to_num( annot_obj): + found = True + break + annot = mupdf.pdf_next_widget( annot) + if not found: + raise Exception( f"xref {xref} is not a widget of this page") + return Annot( annot) + + +def JM_get_widget_properties(annot, Widget): + ''' + Populate a Python Widget object with the values from a PDF form field. + Called by "Page.first_widget" and "Widget.next". + ''' + #log( '{type(annot)=}') + annot_obj = mupdf.pdf_annot_obj(annot.this) + #log( 'Have called mupdf.pdf_annot_obj()') + page = _pdf_annot_page(annot.this) + pdf = page.doc() + tw = annot + + def SETATTR(key, value): + setattr(Widget, key, value) + + def SETATTR_DROP(mod, key, value): + # Original C code for this function deletes if PyObject* is NULL. We + # don't have a representation for that in Python - e.g. None is not + # represented by NULL. + setattr(mod, key, value) + + #log( '=== + mupdf.pdf_widget_type(tw)') + field_type = mupdf.pdf_widget_type(tw.this) + #log( '=== - mupdf.pdf_widget_type(tw)') + Widget.field_type = field_type + if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: + if mupdf.pdf_signature_is_signed(pdf, annot_obj): + SETATTR("is_signed", True) + else: + SETATTR("is_signed",False) + else: + SETATTR("is_signed", None) + SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj))) + SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type))) + + field_name = mupdf.pdf_load_field_name(annot_obj) + SETATTR_DROP(Widget, "field_name", field_name) + + def pdf_dict_get_inheritable_nonempty_label(node, key): + ''' + This is a modified version of MuPDF's pdf_dict_get_inheritable(), with + some changes: + * Returns string from pdf_to_text_string() or None if not found. + * Recurses to parent if current node exists but with empty string + value. + ''' + slow = node + halfbeat = 11 # Don't start moving slow pointer for a while. + while 1: + if not node.m_internal: + return + val = mupdf.pdf_dict_get(node, key) + if val.m_internal: + label = mupdf.pdf_to_text_string(val) + if label: + return label + node = mupdf.pdf_dict_get(node, PDF_NAME('Parent')) + if node.m_internal == slow.m_internal: + raise Exception("cycle in resources") + halfbeat -= 1 + if halfbeat == 0: + slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent')) + halfbeat = 2 + + # In order to address #3950, we use our modified pdf_dict_get_inheritable() + # to ignore empty-string child values. + label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU')) + if label is not None: + SETATTR_DROP(Widget, "field_label", label) + + fvalue = None + if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: + obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group + if obj.m_internal: + SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj)) + obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS')) + if obj.m_internal: + fvalue = mupdf.pdf_to_name(obj) + if not fvalue: + fvalue = mupdf.pdf_field_value(annot_obj) + SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue)) + + SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj)) + + border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W'))) + if border_width == 0: + border_width = 1 + SETATTR_DROP(Widget, "border_width", border_width) + + obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D')) + if mupdf.pdf_is_array(obj): + n = mupdf.pdf_array_len(obj) + d = [0] * n + for i in range(n): + d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i)) + SETATTR_DROP(Widget, "border_dashes", d) + + SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this)) + + SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this)) + + obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG')) + if mupdf.pdf_is_array(obj): + n = mupdf.pdf_array_len(obj) + col = [0] * n + for i in range(n): + col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i)) + SETATTR_DROP(Widget, "fill_color", col) + + obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC')) + if mupdf.pdf_is_array(obj): + n = mupdf.pdf_array_len(obj) + col = [0] * n + for i in range(n): + col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i)) + SETATTR_DROP(Widget, "border_color", col) + + SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot)) + + da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA'))) + SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da)) + + obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA')) + if obj.m_internal: + SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj))) + + SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj)) + + # call Py method to reconstruct text color, font name, size + Widget._parse_da() + + # extract JavaScript action texts + s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A')) + ss = JM_get_script(s) + SETATTR_DROP(Widget, "script", ss) + + SETATTR_DROP(Widget, "script_stroke", + JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K'))) + ) + + SETATTR_DROP(Widget, "script_format", + JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F'))) + ) + + SETATTR_DROP(Widget, "script_change", + JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V'))) + ) + + SETATTR_DROP(Widget, "script_calc", + JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C'))) + ) + + SETATTR_DROP(Widget, "script_blur", + JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'))) + ) + + SETATTR_DROP(Widget, "script_focus", + JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'))) + ) + + +def JM_get_fontextension(doc, xref): + ''' + Return the file extension of a font file, identified by xref + ''' + if xref < 1: + return "n/a" + o = mupdf.pdf_load_object(doc, xref) + desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts')) + if desft.m_internal: + obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0)) + obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor')) + else: + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor')) + if not obj.m_internal: + return "n/a" # this is a base-14 font + + o = obj # we have the FontDescriptor + + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile')) + if obj.m_internal: + return "pfa" + + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2')) + if obj.m_internal: + return "ttf" + + obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3')) + if obj.m_internal: + obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype')) + if obj.m_internal and not mupdf.pdf_is_name(obj): + message("invalid font descriptor subtype") + return "n/a" + if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')): + return "cff" + elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')): + return "cid" + elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')): + return "otf" + else: + message("unhandled font type '%s'", mupdf.pdf_to_name(obj)) + + return "n/a" + + +def JM_get_ocg_arrays_imp(arr): + ''' + Get OCG arrays from OC configuration + Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list} + ''' + list_ = list() + if mupdf.pdf_is_array( arr): + n = mupdf.pdf_array_len( arr) + for i in range(n): + obj = mupdf.pdf_array_get( arr, i) + item = mupdf.pdf_to_num( obj) + if item not in list_: + list_.append(item) + return list_ + + +def JM_get_ocg_arrays(conf): + + rc = dict() + arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON')) + list_ = JM_get_ocg_arrays_imp( arr) + if list_: + rc["on"] = list_ + arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF')) + list_ = JM_get_ocg_arrays_imp( arr) + if list_: + rc["off"] = list_ + arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked')) + list_ = JM_get_ocg_arrays_imp( arr) + if list_: + rc['locked'] = list_ + list_ = list() + arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups')) + if mupdf.pdf_is_array( arr): + n = mupdf.pdf_array_len( arr) + for i in range(n): + obj = mupdf.pdf_array_get( arr, i) + list1 = JM_get_ocg_arrays_imp( obj) + list_.append(list1) + if list_: + rc["rbgroups"] = list_ + obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState')) + + if obj.m_internal: + state = mupdf.pdf_to_name( obj) + rc["basestate"] = state + return rc + + +def JM_get_page_labels(liste, nums): + n = mupdf.pdf_array_len(nums) + for i in range(0, n, 2): + key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i)) + pno = mupdf.pdf_to_int(key) + val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1)) + res = JM_object_to_buffer(val, 1, 0) + c = mupdf.fz_buffer_extract(res) + assert isinstance(c, bytes) + c = c.decode('utf-8') + liste.append( (pno, c)) + + +def JM_get_script(key): + ''' + JavaScript extractor + Returns either the script source or None. Parameter is a PDF action + dictionary, which must have keys /S and /JS. The value of /S must be + '/JavaScript'. The value of /JS is returned. + ''' + if not key.m_internal: + return + + j = mupdf.pdf_dict_get(key, PDF_NAME('S')) + jj = mupdf.pdf_to_name(j) + if jj == "JavaScript": + js = mupdf.pdf_dict_get(key, PDF_NAME('JS')) + if not js.m_internal: + return + else: + return + + if mupdf.pdf_is_string(js): + script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js)) + elif mupdf.pdf_is_stream(js): + res = mupdf.pdf_load_stream(js) + script = JM_EscapeStrFromBuffer(res) + else: + return + if script: # do not return an empty script + return script + return + + +def JM_have_operation(pdf): + ''' + Ensure valid journalling state + ''' + if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0): + return 0 + return 1 + + +def JM_image_extension(type_): + ''' + return extension for MuPDF image type + ''' + if type_ == mupdf.FZ_IMAGE_FAX: return "fax" + if type_ == mupdf.FZ_IMAGE_RAW: return "raw" + if type_ == mupdf.FZ_IMAGE_FLATE: return "flate" + if type_ == mupdf.FZ_IMAGE_LZW: return "lzw" + if type_ == mupdf.FZ_IMAGE_RLD: return "rld" + if type_ == mupdf.FZ_IMAGE_BMP: return "bmp" + if type_ == mupdf.FZ_IMAGE_GIF: return "gif" + if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2" + if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg" + if type_ == mupdf.FZ_IMAGE_JPX: return "jpx" + if type_ == mupdf.FZ_IMAGE_JXR: return "jxr" + if type_ == mupdf.FZ_IMAGE_PNG: return "png" + if type_ == mupdf.FZ_IMAGE_PNM: return "pnm" + if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff" + #if type_ == mupdf.FZ_IMAGE_PSD: return "psd" + return "n/a" + + +# fixme: need to avoid using a global for this. +g_img_info = None + + +def JM_image_filter(opaque, ctm, name, image): + assert isinstance(ctm, mupdf.FzMatrix) + r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) + q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm) + q = mupdf.fz_transform_quad( q, g_img_info_matrix) + temp = name, JM_py_from_quad(q) + g_img_info.append(temp) + + +def JM_image_profile( imagedata, keep_image): + ''' + Return basic properties of an image provided as bytes or bytearray + The function creates an fz_image and optionally returns it. + ''' + if not imagedata: + return None # nothing given + + len_ = len( imagedata) + if len_ < 8: + message( "bad image data") + return None + c = imagedata + #log( 'calling mfz_recognize_image_format with {c!r=}') + type_ = mupdf.fz_recognize_image_format( c) + if type_ == mupdf.FZ_IMAGE_UNKNOWN: + return None + + if keep_image: + res = mupdf.fz_new_buffer_from_copied_data( c, len_) + else: + res = mupdf.fz_new_buffer_from_shared_data( c, len_) + image = mupdf.fz_new_image_from_buffer( res) + ctm = mupdf.fz_image_orientation_matrix( image) + xres, yres = mupdf.fz_image_resolution(image) + orientation = mupdf.fz_image_orientation( image) + cs_name = mupdf.fz_colorspace_name( image.colorspace()) + result = dict() + result[ dictkey_width] = image.w() + result[ dictkey_height] = image.h() + result[ "orientation"] = orientation + result[ dictkey_matrix] = JM_py_from_matrix(ctm) + result[ dictkey_xres] = xres + result[ dictkey_yres] = yres + result[ dictkey_colorspace] = image.n() + result[ dictkey_bpc] = image.bpc() + result[ dictkey_ext] = JM_image_extension(type_) + result[ dictkey_cs_name] = cs_name + + if keep_image: + result[ dictkey_image] = image + return result + + +def JM_image_reporter(page): + doc = page.doc() + global g_img_info_matrix + g_img_info_matrix = mupdf.FzMatrix() + mediabox = mupdf.FzRect() + mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix) + + class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2): + def __init__(self): + super().__init__() + self.use_virtual_image_filter() + def image_filter(self, ctx, ctm, name, image, scissor): + JM_image_filter(None, mupdf.FzMatrix(ctm), name, image) + + sanitize_filter_options = SanitizeFilterOptions() + + filter_options = _make_PdfFilterOptions( + instance_forms=1, + ascii=1, + no_update=1, + sanitize=1, + sopts=sanitize_filter_options, + ) + + global g_img_info + g_img_info = [] + + mupdf.pdf_filter_page_contents( doc, page, filter_options) + + rc = tuple(g_img_info) + g_img_info = [] + return rc + + +def JM_fitz_config(): + have_TOFU = not hasattr(mupdf, 'TOFU') + have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14') + have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK') + have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT') + have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG') + have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI') + have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC') + have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL') + have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL') + + ret = dict() + ret["base14"] = have_TOFU_BASE14 + ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ) + ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB) + ret["html"] = bool(mupdf.FZ_ENABLE_HTML) + ret["icc"] = bool(mupdf.FZ_ENABLE_ICC) + ret["img"] = bool(mupdf.FZ_ENABLE_IMG) + ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX) + ret["js"] = bool(mupdf.FZ_ENABLE_JS) + ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF) + ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK) + ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G) + ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N) + ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB) + ret["py-memory"] = bool(JM_MEMORY) + ret["svg"] = bool(mupdf.FZ_ENABLE_SVG) + ret["tofu"] = have_TOFU + ret["tofu-cjk"] = have_TOFU_CJK + ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT + ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG + ret["tofu-emoji"] = have_TOFU_EMOJI + ret["tofu-historic"] = have_TOFU_HISTORIC + ret["tofu-sil"] = have_TOFU_SIL + ret["tofu-symbol"] = have_TOFU_SYMBOL + ret["xps"] = bool(mupdf.FZ_ENABLE_XPS) + return ret + + +def JM_insert_contents(pdf, pageref, newcont, overlay): + ''' + Insert a buffer as a new separate /Contents object of a page. + 1. Create a new stream object from buffer 'newcont' + 2. If /Contents already is an array, then just prepend or append this object + 3. Else, create new array and put old content obj and this object into it. + If the page had no /Contents before, just create a 1-item array. + ''' + contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents')) + newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0) + xref = mupdf.pdf_to_num(newconts) + if mupdf.pdf_is_array(contents): + if overlay: # append new object + mupdf.pdf_array_push(contents, newconts) + else: # prepend new object + mupdf.pdf_array_insert(contents, newconts, 0) + else: + carr = mupdf.pdf_new_array(pdf, 5) + if overlay: + if contents.m_internal: + mupdf.pdf_array_push(carr, contents) + mupdf.pdf_array_push(carr, newconts) + else: + mupdf.pdf_array_push(carr, newconts) + if contents.m_internal: + mupdf.pdf_array_push(carr, contents) + mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr) + return xref + + +def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering): + ''' + Insert a font in a PDF + ''' + font = None + res = None + data = None + ixref = 0 + index = 0 + simple = 0 + value=None + name=None + subt=None + exto = None + + ENSURE_OPERATION(pdf) + # check for CJK font + if ordering > -1: + data, size, index = mupdf.fz_lookup_cjk_font(ordering) + if data: + font = mupdf.fz_new_font_from_memory(None, data, size, index, 0) + font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif) + exto = "n/a" + simple = 0 + #goto weiter; + else: + + # check for PDF Base-14 font + if bfname: + data, size = mupdf.fz_lookup_base14_font(bfname) + if data: + font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0) + font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding) + exto = "n/a" + simple = 1 + #goto weiter; + + else: + if fontfile: + font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0) + else: + res = JM_BufferFromBytes(fontbuffer) + if not res.m_internal: + RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError) + font = mupdf.fz_new_font_from_buffer(None, res, idx, 0) + + if not set_simple: + font_obj = mupdf.pdf_add_cid_font(pdf, font) + simple = 0 + else: + font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding) + simple = 2 + #weiter: ; + ixref = mupdf.pdf_to_num(font_obj) + name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont')))) + + subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype')))) + + if not exto: + exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref)) + + asc = mupdf.fz_font_ascender(font) + dsc = mupdf.fz_font_descender(font) + value = [ + ixref, + { + "name": name, # base font name + "type": subt, # subtype + "ext": exto, # file extension + "simple": bool(simple), # simple font? + "ordering": ordering, # CJK font? + "ascender": asc, + "descender": dsc, + }, + ] + return value + +def JM_irect_from_py(r): + ''' + PySequence to mupdf.FzIrect. Default: infinite irect + ''' + if isinstance(r, mupdf.FzIrect): + return r + if isinstance(r, IRect): + r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1) + return r + if isinstance(r, Rect): + ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1) + ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect(). + return ret + if isinstance(r, mupdf.FzRect): + ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect(). + return ret + if not r or not PySequence_Check(r) or PySequence_Size(r) != 4: + return mupdf.FzIrect(mupdf.fz_infinite_irect) + f = [0, 0, 0, 0] + for i in range(4): + f[i] = r[i] + if f[i] is None: + return mupdf.FzIrect(mupdf.fz_infinite_irect) + if f[i] < FZ_MIN_INF_RECT: + f[i] = FZ_MIN_INF_RECT + if f[i] > FZ_MAX_INF_RECT: + f[i] = FZ_MAX_INF_RECT + return mupdf.fz_make_irect(f[0], f[1], f[2], f[3]) + +def JM_listbox_value( annot): + ''' + ListBox retrieve value + ''' + # may be single value or array + annot_obj = mupdf.pdf_annot_obj( annot) + optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V')) + if mupdf.pdf_is_string( optarr): # a single string + return mupdf.pdf_to_text_string( optarr) + + # value is an array (may have len 0) + n = mupdf.pdf_array_len( optarr) + liste = [] + + # extract a list of strings + # each entry may again be an array: take second entry then + for i in range( n): + elem = mupdf.pdf_array_get( optarr, i) + if mupdf.pdf_is_array( elem): + elem = mupdf.pdf_array_get( elem, 1) + liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem))) + return liste + + +def JM_make_annot_DA(annot, ncol, col, fontname, fontsize): + # PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to + # convert the fz_buffer's `unsigned char*` into a `const char*` suitable + # for passing to pdf_dict_put_text_string(). So instead we build up the + # string directly in Python. + buf = '' + if ncol < 1: + buf += f'0 g ' + elif ncol == 1: + buf += f'{col[0]:g} g ' + elif ncol == 2: + assert 0 + elif ncol == 3: + buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg ' + else: + buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k ' + buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf' + mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf) + + +def JM_make_spanlist(line_dict, line, raw, buff, tp_rect): + if 1 or g_use_extra: + return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect) + char_list = None + span_list = [] + mupdf.fz_clear_buffer(buff) + span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) + line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) + + class char_style: + def __init__(self, rhs=None): + if rhs: + self.size = rhs.size + self.flags = rhs.flags + if mupdf_version_tuple >= (1, 25, 2): + self.char_flags = rhs.char_flags + self.font = rhs.font + self.argb = rhs.argb + self.asc = rhs.asc + self.desc = rhs.desc + self.bidi = rhs.bidi + else: + self.size = -1 + self.flags = -1 + if mupdf_version_tuple >= (1, 25, 2): + self.char_flags = -1 + self.font = '' + self.argb = -1 + self.asc = 0 + self.desc = 0 + self.bidi = 0 + def __str__(self): + ret = f'{self.size} {self.flags}' + if mupdf_version_tuple >= (1, 25, 2): + ret += f' {self.char_flags}' + ret += f' {self.font} {self.color} {self.asc} {self.desc}' + return ret + + old_style = char_style() + style = char_style() + span = None + span_origin = None + + for ch in line: + # start-trace + r = JM_char_bbox(line, ch) + if (not JM_rects_overlap(tp_rect, r) + and not mupdf.fz_is_infinite_rect(tp_rect) + ): + continue + + # Info from: + # detect_super_script() + # fz_font_is_italic() + # fz_font_is_serif() + # fz_font_is_monospaced() + # fz_font_is_bold() + + flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch) + origin = mupdf.FzPoint(ch.m_internal.origin) + style.size = ch.m_internal.size + style.flags = flags + if mupdf_version_tuple >= (1, 25, 2): + # FZ_STEXT_SYNTHETIC is per-char, not per-span. + style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC + style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))) + style.argb = ch.m_internal.argb + style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))) + style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))) + style.bidi = ch.m_internal.bidi + + if (style.size != old_style.size + or style.flags != old_style.flags + or (mupdf_version_tuple >= (1, 25, 2) + and (style.char_flags != old_style.char_flags) + ) + or style.argb != old_style.argb + or style.font != old_style.font + or style.bidi != old_style.bidi + ): + if old_style.size >= 0: + # not first one, output previous + if raw: + # put character list in the span + span[dictkey_chars] = char_list + char_list = None + else: + # put text string in the span + span[dictkey_text] = JM_EscapeStrFromBuffer( buff) + mupdf.fz_clear_buffer(buff) + + span[dictkey_origin] = JM_py_from_point(span_origin) + span[dictkey_bbox] = JM_py_from_rect(span_rect) + line_rect = mupdf.fz_union_rect(line_rect, span_rect) + span_list.append( span) + span = None + + span = dict() + asc = style.asc + desc = style.desc + if style.asc < 1e-3: + asc = 0.9 + desc = -0.1 + + span[dictkey_size] = style.size + span[dictkey_flags] = style.flags + span[dictkey_bidi] = style.bidi + if mupdf_version_tuple >= (1, 25, 2): + span[dictkey_char_flags] = style.char_flags + span[dictkey_font] = JM_EscapeStrFromStr(style.font) + span[dictkey_color] = style.argb & 0xffffff + if mupdf_version_tuple >= (1, 25, 0): + span['alpha'] = style.argb >> 24 + span["ascender"] = asc + span["descender"] = desc + + # Need to be careful here - doing 'old_style=style' does a shallow + # copy, but we need to keep old_style as a distinct instance. + old_style = char_style(style) + span_rect = r + span_origin = origin + + span_rect = mupdf.fz_union_rect(span_rect, r) + + if raw: # make and append a char dict + char_dict = dict() + char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin) + char_dict[dictkey_bbox] = JM_py_from_rect(r) + char_dict[dictkey_c] = chr(ch.m_internal.c) + char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC) + + if char_list is None: + char_list = [] + char_list.append(char_dict) + else: # add character byte to buffer + JM_append_rune(buff, ch.m_internal.c) + + # all characters processed, now flush remaining span + if span: + if raw: + span[dictkey_chars] = char_list + char_list = None + else: + span[dictkey_text] = JM_EscapeStrFromBuffer(buff) + mupdf.fz_clear_buffer(buff) + span[dictkey_origin] = JM_py_from_point(span_origin) + span[dictkey_bbox] = JM_py_from_rect(span_rect) + + if not mupdf.fz_is_empty_rect(span_rect): + span_list.append(span) + line_rect = mupdf.fz_union_rect(line_rect, span_rect) + span = None + if not mupdf.fz_is_empty_rect(line_rect): + line_dict[dictkey_spans] = span_list + else: + line_dict[dictkey_spans] = span_list + return line_rect + +def _make_image_dict(img, img_dict): + """Populate a dictionary with information extracted from a given image. + + Used by 'Document.extract_image' and by 'JM_make_image_block'. + Both of these functions will add some more specific information. + """ + img_type = img.fz_compressed_image_type() + ext = JM_image_extension(img_type) + + # compressed image buffer if present, else None + ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal) + + if (0 + or not ll_cbuf + or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN) + or img_type < mupdf.FZ_IMAGE_BMP + ): + # not an image with a compressed buffer: convert to PNG + res = mupdf.fz_new_buffer_from_image_as_png( + img, + mupdf.FzColorParams(mupdf.fz_default_color_params), + ) + ext = "png" + elif ext == "jpeg" and img.n() == 4: + # JPEG with CMYK: invert colors + res = mupdf.fz_new_buffer_from_image_as_jpeg( + img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1) + else: + # copy the compressed buffer + res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer)) + + bytes_ = JM_BinFromBuffer(res) + img_dict[dictkey_width] = img.w() + img_dict[dictkey_height] = img.h() + img_dict[dictkey_ext] = ext + img_dict[dictkey_colorspace] = img.n() + img_dict[dictkey_xres] = img.xres() + img_dict[dictkey_yres] = img.yres() + img_dict[dictkey_bpc] = img.bpc() + img_dict[dictkey_size] = len(bytes_) + img_dict[dictkey_image] = bytes_ + +def JM_make_image_block(block, block_dict): + img = block.i_image() + _make_image_dict(img, block_dict) + # if the image has a mask, store it as a PNG buffer + mask = img.mask() + if mask.m_internal: + buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params)) + block_dict["mask"] = buff.fz_buffer_extract() + else: + block_dict["mask"] = None + block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform()) + + +def JM_make_text_block(block, block_dict, raw, buff, tp_rect): + if 1 or g_use_extra: + return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal) + line_list = [] + block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) + #log(f'{block=}') + for line in block: + #log(f'{line=}') + if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox))) + and not mupdf.fz_is_infinite_rect(tp_rect) + ): + continue + line_dict = dict() + line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect) + block_rect = mupdf.fz_union_rect(block_rect, line_rect) + line_dict[dictkey_wmode] = line.m_internal.wmode + line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir) + line_dict[dictkey_bbox] = JM_py_from_rect(line_rect) + line_list.append(line_dict) + block_dict[dictkey_bbox] = JM_py_from_rect(block_rect) + block_dict[dictkey_lines] = line_list + + +def JM_make_textpage_dict(tp, page_dict, raw): + if 1 or g_use_extra: + return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw) + text_buffer = mupdf.fz_new_buffer(128) + block_list = [] + tp_rect = mupdf.FzRect(tp.m_internal.mediabox) + block_n = -1 + #log( 'JM_make_textpage_dict {=tp}') + for block in tp: + block_n += 1 + if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)) + and not mupdf.fz_is_infinite_rect(tp_rect) + and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE + ): + continue + if (not mupdf.fz_is_infinite_rect(tp_rect) + and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))) + ): + continue + + block_dict = dict() + block_dict[dictkey_number] = block_n + block_dict[dictkey_type] = block.m_internal.type + if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE: + block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox) + JM_make_image_block(block, block_dict) + else: + JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect) + + block_list.append(block_dict) + page_dict[dictkey_blocks] = block_list + + +def JM_matrix_from_py(m): + a = [0, 0, 0, 0, 0, 0] + if isinstance(m, mupdf.FzMatrix): + return m + if isinstance(m, Matrix): + return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f) + if not m or not PySequence_Check(m) or PySequence_Size(m) != 6: + return mupdf.FzMatrix() + for i in range(6): + a[i] = JM_FLOAT_ITEM(m, i) + if a[i] is None: + return mupdf.FzRect() + return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5]) + + +def JM_mediabox(page_obj): + ''' + return a PDF page's MediaBox + ''' + page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) + mediabox = mupdf.pdf_to_rect( + mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox')) + ) + if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox): + mediabox.x0 = 0 + mediabox.y0 = 0 + mediabox.x1 = 612 + mediabox.y1 = 792 + + page_mediabox = mupdf.FzRect( + mupdf.fz_min(mediabox.x0, mediabox.x1), + mupdf.fz_min(mediabox.y0, mediabox.y1), + mupdf.fz_max(mediabox.x0, mediabox.x1), + mupdf.fz_max(mediabox.y0, mediabox.y1), + ) + + if (page_mediabox.x1 - page_mediabox.x0 < 1 + or page_mediabox.y1 - page_mediabox.y0 < 1 + ): + page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) + + return page_mediabox + + +def JM_merge_range( + doc_des, + doc_src, + spage, + epage, + apage, + rotate, + links, + annots, + show_progress, + graft_map, + ): + ''' + Copy a range of pages (spage, epage) from a source PDF to a specified + location (apage) of the target PDF. + If spage > epage, the sequence of source pages is reversed. + ''' + if g_use_extra: + return extra.JM_merge_range( + doc_des, + doc_src, + spage, + epage, + apage, + rotate, + links, + annots, + show_progress, + graft_map, + ) + afterpage = apage + counter = 0 # copied pages counter + total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy + + if spage < epage: + page = spage + while page <= epage: + page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map) + counter += 1 + if show_progress > 0 and counter % show_progress == 0: + message(f"Inserted {counter} of {total} pages.") + page += 1 + afterpage += 1 + else: + page = spage + while page >= epage: + page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map) + counter += 1 + if show_progress > 0 and counter % show_progress == 0: + message(f"Inserted {counter} of {total} pages.") + page -= 1 + afterpage += 1 + + +def JM_merge_resources( page, temp_res): + ''' + Merge the /Resources object created by a text pdf device into the page. + The device may have created multiple /ExtGState/Alp? and /Font/F? objects. + These need to be renamed (renumbered) to not overwrite existing page + objects from previous executions. + Returns the next available numbers n, m for objects /Alp, /F. + ''' + # page objects /Resources, /Resources/ExtGState, /Resources/Font + resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources')) + if not resources.m_internal: + resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5) + main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState')) + main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font')) + + # text pdf device objects /ExtGState, /Font + temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState')) + temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font')) + + max_alp = -1 + max_fonts = -1 + + # Handle /Alp objects + if mupdf.pdf_is_dict(temp_extg): # any created at all? + n = mupdf.pdf_dict_len(temp_extg) + if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet? + for i in range(mupdf.pdf_dict_len(main_extg)): + # get highest number of objects named /Alpxxx + alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i)) + if not alp.startswith('Alp'): + continue + j = mupdf.fz_atoi(alp[3:]) + if j > max_alp: + max_alp = j + else: # create a /ExtGState for the page + main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n) + + max_alp += 1 + for i in range(n): # copy over renumbered /Alp objects + alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i)) + j = mupdf.fz_atoi(alp[3:]) + max_alp + text = f'Alp{j}' + val = mupdf.pdf_dict_get_val( temp_extg, i) + mupdf.pdf_dict_puts(main_extg, text, val) + + if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet? + for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number + font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i)) + if not font.startswith("F"): + continue + j = mupdf.fz_atoi(font[1:]) + if j > max_fonts: + max_fonts = j + else: # create a Resources/Font for the page + main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2) + + max_fonts += 1 + for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts + font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i)) + j = mupdf.fz_atoi(font[1:]) + max_fonts + text = f'F{j}' + val = mupdf.pdf_dict_get_val(temp_fonts, i) + mupdf.pdf_dict_puts(main_fonts, text, val) + return (max_alp, max_fonts) # next available numbers + + +def JM_mupdf_warning( text): + ''' + redirect MuPDF warnings + ''' + JM_mupdf_warnings_store.append(text) + if JM_mupdf_show_warnings: + message(f'MuPDF warning: {text}') + + +def JM_mupdf_error( text): + JM_mupdf_warnings_store.append(text) + if JM_mupdf_show_errors: + message(f'MuPDF error: {text}\n') + + +def JM_new_bbox_device(rc, inc_layers): + assert isinstance(rc, list) + return JM_new_bbox_device_Device( rc, inc_layers) + + +def JM_new_buffer_from_stext_page(page): + ''' + make a buffer from an stext_page's text + ''' + assert isinstance(page, mupdf.FzStextPage) + rect = mupdf.FzRect(page.m_internal.mediabox) + buf = mupdf.fz_new_buffer(256) + for block in page: + if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: + for line in block: + for ch in line: + if (not JM_rects_overlap(rect, JM_char_bbox(line, ch)) + and not mupdf.fz_is_infinite_rect(rect) + ): + continue + mupdf.fz_append_rune(buf, ch.m_internal.c) + mupdf.fz_append_byte(buf, ord('\n')) + mupdf.fz_append_byte(buf, ord('\n')) + return buf + + +def JM_new_javascript(pdf, value): + ''' + make new PDF action object from JavaScript source + Parameters are a PDF document and a Python string. + Returns a PDF action object. + ''' + if value is None: + # no argument given + return + data = JM_StrAsChar(value) + if data is None: + # not convertible to char* + return + + res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8')) + source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0) + newaction = mupdf.pdf_add_new_dict(pdf, 4) + mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript')) + mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source) + return newaction + + +def JM_new_output_fileptr(bio): + return JM_new_output_fileptr_Output( bio) + + +def JM_norm_rotation(rotate): + ''' + # return normalized /Rotate value:one of 0, 90, 180, 270 + ''' + while rotate < 0: + rotate += 360 + while rotate >= 360: + rotate -= 360 + if rotate % 90 != 0: + return 0 + return rotate + + +def JM_object_to_buffer(what, compress, ascii): + res = mupdf.fz_new_buffer(512) + out = mupdf.FzOutput(res) + mupdf.pdf_print_obj(out, what, compress, ascii) + out.fz_close_output() + mupdf.fz_terminate_buffer(res) + return res + + +def JM_outline_xrefs(obj, xrefs): + ''' + Return list of outline xref numbers. Recursive function. Arguments: + 'obj' first OL item + 'xrefs' empty Python list + ''' + if not obj.m_internal: + return xrefs + thisobj = obj + while thisobj.m_internal: + newxref = mupdf.pdf_to_num( thisobj) + if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal: + # circular ref or top of chain: terminate + break + xrefs.append( newxref) + first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down + if mupdf.pdf_is_dict( first): + xrefs = JM_outline_xrefs( first, xrefs) + thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next + parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent + if not mupdf.pdf_is_dict( thisobj): + thisobj = parent + return xrefs + + +def JM_page_rotation(page): + ''' + return a PDF page's /Rotate value: one of (0, 90, 180, 270) + ''' + rotate = 0 + + obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate) + rotate = mupdf.pdf_to_int(obj) + rotate = JM_norm_rotation(rotate) + return rotate + + +def JM_pdf_obj_from_str(doc, src): + ''' + create PDF object from given string (new in v1.14.0: MuPDF dropped it) + ''' + # fixme: seems inefficient to convert to bytes instance then make another + # copy inside fz_new_buffer_from_copied_data(), but no other way? + # + buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8')) + stream = mupdf.fz_open_buffer(buffer_) + lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL) + result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf) + return result + + +def JM_pixmap_from_display_list( + list_, + ctm, + cs, + alpha, + clip, + seps, + ): + ''' + Version of fz_new_pixmap_from_display_list (util.c) to also support + rendering of only the 'clip' part of the displaylist rectangle + ''' + assert isinstance(list_, mupdf.FzDisplayList) + if seps is None: + seps = mupdf.FzSeparations() + assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}' + + rect = mupdf.fz_bound_display_list(list_) + matrix = JM_matrix_from_py(ctm) + rclip = JM_rect_from_py(clip) + rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given + + rect = mupdf.fz_transform_rect(rect, matrix) + irect = mupdf.fz_round_rect(rect) + + assert isinstance( cs, mupdf.FzColorspace) + + pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha) + if alpha: + mupdf.fz_clear_pixmap(pix) + else: + mupdf.fz_clear_pixmap_with_value(pix, 0xFF) + + if not mupdf.fz_is_infinite_rect(rclip): + dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect) + mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie()) + else: + dev = mupdf.fz_new_draw_device(matrix, pix) + mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie()) + + mupdf.fz_close_device(dev) + # Use special raw Pixmap constructor so we don't set alpha to true. + return Pixmap( 'raw', pix) + + +def JM_point_from_py(p): + ''' + PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT) + ''' + if isinstance(p, mupdf.FzPoint): + return p + if isinstance(p, Point): + return mupdf.FzPoint(p.x, p.y) + if g_use_extra: + return extra.JM_point_from_py( p) + + p0 = mupdf.FzPoint(0, 0) + x = JM_FLOAT_ITEM(p, 0) + y = JM_FLOAT_ITEM(p, 1) + if x is None or y is None: + return p0 + x = max( x, FZ_MIN_INF_RECT) + y = max( y, FZ_MIN_INF_RECT) + x = min( x, FZ_MAX_INF_RECT) + y = min( y, FZ_MAX_INF_RECT) + return mupdf.FzPoint(x, y) + + +def JM_print_stext_page_as_text(res, page): + ''' + Plain text output. An identical copy of fz_print_stext_page_as_text, + but lines within a block are concatenated by space instead a new-line + character (which else leads to 2 new-lines). + ''' + if 1 and g_use_extra: + return extra.JM_print_stext_page_as_text(res, page) + + assert isinstance(res, mupdf.FzBuffer) + assert isinstance(page, mupdf.FzStextPage) + rect = mupdf.FzRect(page.m_internal.mediabox) + last_char = 0 + + n_blocks = 0 + n_lines = 0 + n_chars = 0 + for n_blocks2, block in enumerate( page): + if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: + for n_lines2, line in enumerate( block): + for n_chars2, ch in enumerate( line): + pass + n_chars += n_chars2 + n_lines += n_lines2 + n_blocks += n_blocks2 + + for block in page: + if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT: + for line in block: + last_char = 0 + for ch in line: + chbbox = JM_char_bbox(line, ch) + if (mupdf.fz_is_infinite_rect(rect) + or JM_rects_overlap(rect, chbbox) + ): + #raw += chr(ch.m_internal.c) + last_char = ch.m_internal.c + #log( '{=last_char!r utf!r}') + JM_append_rune(res, last_char) + if last_char != 10 and last_char > 0: + mupdf.fz_append_string(res, "\n") + + +def JM_put_script(annot_obj, key1, key2, value): + ''' + Create a JavaScript PDF action. + Usable for all object types which support PDF actions, even if the + argument name suggests annotations. Up to 2 key values can be specified, so + JavaScript actions can be stored for '/A' and '/AA/?' keys. + ''' + key1_obj = mupdf.pdf_dict_get(annot_obj, key1) + pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF + + # if no new script given, just delete corresponding key + if not value: + if key2 is None or not key2.m_internal: + mupdf.pdf_dict_del(annot_obj, key1) + elif key1_obj.m_internal: + mupdf.pdf_dict_del(key1_obj, key2) + return + + # read any existing script as a PyUnicode string + if not key2.m_internal or not key1_obj.m_internal: + script = JM_get_script(key1_obj) + else: + script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2)) + + # replace old script, if different from new one + if value != script: + newaction = JM_new_javascript(pdf, value) + if not key2.m_internal: + mupdf.pdf_dict_put(annot_obj, key1, newaction) + else: + mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2) + + +def JM_py_from_irect(r): + return r.x0, r.y0, r.x1, r.y1 + + +def JM_py_from_matrix(m): + return m.a, m.b, m.c, m.d, m.e, m.f + + +def JM_py_from_point(p): + return p.x, p.y + + +def JM_py_from_quad(q): + ''' + PySequence from fz_quad. + ''' + return ( + (q.ul.x, q.ul.y), + (q.ur.x, q.ur.y), + (q.ll.x, q.ll.y), + (q.lr.x, q.lr.y), + ) + + +def JM_py_from_rect(r): + return r.x0, r.y0, r.x1, r.y1 + + +def JM_quad_from_py(r): + if isinstance(r, mupdf.FzQuad): + return r + # cover all cases of 4-float-sequences + if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"): + r = mupdf.FzRect(*tuple(r)) + if isinstance( r, mupdf.FzRect): + return mupdf.fz_quad_from_rect( r) + if isinstance( r, Quad): + return mupdf.fz_make_quad( + r.ul.x, r.ul.y, + r.ur.x, r.ur.y, + r.ll.x, r.ll.y, + r.lr.x, r.lr.y, + ) + q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0) + p = [0,0,0,0] + if not r or not isinstance(r, (tuple, list)) or len(r) != 4: + return q + + if JM_FLOAT_ITEM(r, 0) is None: + return mupdf.fz_quad_from_rect(JM_rect_from_py(r)) + + for i in range(4): + if i >= len(r): + return q # invalid: cancel the rest + obj = r[i] # next point item + if not PySequence_Check(obj) or PySequence_Size(obj) != 2: + return q # invalid: cancel the rest + + p[i].x = JM_FLOAT_ITEM(obj, 0) + p[i].y = JM_FLOAT_ITEM(obj, 1) + if p[i].x is None or p[i].y is None: + return q + p[i].x = max( p[i].x, FZ_MIN_INF_RECT) + p[i].y = max( p[i].y, FZ_MIN_INF_RECT) + p[i].x = min( p[i].x, FZ_MAX_INF_RECT) + p[i].y = min( p[i].y, FZ_MAX_INF_RECT) + q.ul = p[0] + q.ur = p[1] + q.ll = p[2] + q.lr = p[3] + return q + + +def JM_read_contents(pageref): + ''' + Read and concatenate a PDF page's /Contents object(s) in a buffer + ''' + assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}' + contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents) + if mupdf.pdf_is_array(contents): + res = mupdf.FzBuffer(1024) + for i in range(mupdf.pdf_array_len(contents)): + if i > 0: + mupdf.fz_append_byte(res, 32) + obj = mupdf.pdf_array_get(contents, i) + if mupdf.pdf_is_stream(obj): + nres = mupdf.pdf_load_stream(obj) + mupdf.fz_append_buffer(res, nres) + elif contents.m_internal: + res = mupdf.pdf_load_stream(contents) + else: + res = mupdf.FzBuffer(0) + return res + + +def JM_rect_from_py(r): + if isinstance(r, mupdf.FzRect): + return r + if isinstance(r, mupdf.FzIrect): + return mupdf.FzRect(r) + if isinstance(r, Rect): + return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1) + if isinstance(r, IRect): + return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1) + if not r or not PySequence_Check(r) or PySequence_Size(r) != 4: + return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) + f = [0, 0, 0, 0] + for i in range(4): + f[i] = JM_FLOAT_ITEM(r, i) + if f[i] is None: + return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE) + if f[i] < FZ_MIN_INF_RECT: + f[i] = FZ_MIN_INF_RECT + if f[i] > FZ_MAX_INF_RECT: + f[i] = FZ_MAX_INF_RECT + return mupdf.fz_make_rect(f[0], f[1], f[2], f[3]) + + +def JM_rects_overlap(a, b): + if (0 + or a.x0 >= b.x1 + or a.y0 >= b.y1 + or a.x1 <= b.x0 + or a.y1 <= b.y0 + ): + return 0 + return 1 + + +def JM_refresh_links( page): + ''' + refreshes the link and annotation tables of a page + ''' + if page is None or not page.m_internal: + return + obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')) + if obj.m_internal: + pdf = page.doc() + number = mupdf.pdf_lookup_page_number( pdf, page.obj()) + page_mediabox = mupdf.FzRect() + page_ctm = mupdf.FzMatrix() + mupdf.pdf_page_transform( page, page_mediabox, page_ctm) + link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm) + page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal) + + +def JM_rotate_page_matrix(page): + ''' + calculate page rotation matrices + ''' + if not page.m_internal: + return mupdf.FzMatrix() # no valid pdf page given + rotation = JM_page_rotation(page) + #log( '{rotation=}') + if rotation == 0: + return mupdf.FzMatrix() # no rotation + cb_size = JM_cropbox_size(page.obj()) + w = cb_size.x + h = cb_size.y + #log( '{=h w}') + if rotation == 90: + m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0) + elif rotation == 180: + m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h) + else: + m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w) + #log( 'returning {m=}') + return m + + +def JM_search_stext_page(page, needle): + if 1 or g_use_extra: + return extra.JM_search_stext_page(page.m_internal, needle) + + rect = mupdf.FzRect(page.m_internal.mediabox) + if not needle: + return + quads = [] + class Hits: + def __str__(self): + return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}' + hits = Hits() + hits.len = 0 + hits.quads = quads + hits.hfuzz = 0.2 # merge kerns but not large gaps + hits.vfuzz = 0.1 + + buffer_ = JM_new_buffer_from_stext_page(page) + haystack_string = mupdf.fz_string_from_buffer(buffer_) + haystack = 0 + begin, end = find_string(haystack_string[haystack:], needle) + if begin is None: + #goto no_more_matches; + return quads + + begin += haystack + end += haystack + inside = 0 + i = 0 + for block in page: + if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT: + continue + for line in block: + for ch in line: + i += 1 + if not mupdf.fz_is_infinite_rect(rect): + r = JM_char_bbox(line, ch) + if not JM_rects_overlap(rect, r): + #goto next_char; + continue + while 1: + #try_new_match: + if not inside: + if haystack >= begin: + inside = 1 + if inside: + if haystack < end: + on_highlight_char(hits, line, ch) + break + else: + inside = 0 + begin, end = find_string(haystack_string[haystack:], needle) + if begin is None: + #goto no_more_matches; + return quads + else: + #goto try_new_match; + begin += haystack + end += haystack + continue + break + haystack += 1 + #next_char:; + assert haystack_string[haystack] == '\n', \ + f'{haystack=} {haystack_string[haystack]=}' + haystack += 1 + assert haystack_string[haystack] == '\n', \ + f'{haystack=} {haystack_string[haystack]=}' + haystack += 1 + #no_more_matches:; + return quads + + +def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer): + ''' + Step through /Resources, looking up image, xobject or font information + ''' + if mupdf.pdf_mark_obj(rsrc): + mupdf.fz_warn('Circular dependencies! Consider page cleaning.') + return # Circular dependencies! + try: + xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject) + + if what == 1: # lookup fonts + font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font) + JM_gather_fonts(pdf, font, liste, stream_xref) + elif what == 2: # look up images + JM_gather_images(pdf, xobj, liste, stream_xref) + elif what == 3: # look up form xobjects + JM_gather_forms(pdf, xobj, liste, stream_xref) + else: # should never happen + return + + # check if we need to recurse into Form XObjects + n = mupdf.pdf_dict_len(xobj) + for i in range(n): + obj = mupdf.pdf_dict_get_val(xobj, i) + if mupdf.pdf_is_stream(obj): + sxref = mupdf.pdf_to_num(obj) + else: + sxref = 0 + subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources) + if subrsrc.m_internal: + sxref_t = sxref + if sxref_t not in tracer: + tracer.append(sxref_t) + JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer) + else: + mupdf.fz_warn('Circular dependencies! Consider page cleaning.') + return + finally: + mupdf.pdf_unmark_obj(rsrc) + + +def JM_set_choice_options(annot, liste): + ''' + set ListBox / ComboBox values + ''' + if not liste: + return + assert isinstance( liste, (tuple, list)) + n = len( liste) + if n == 0: + return + annot_obj = mupdf.pdf_annot_obj( annot) + pdf = mupdf.pdf_get_bound_document( annot_obj) + optarr = mupdf.pdf_new_array( pdf, n) + for i in range(n): + val = liste[i] + opt = val + if isinstance(opt, str): + mupdf.pdf_array_push_text_string( optarr, opt) + else: + assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list' + opt1, opt2 = val + assert opt1 and opt2, 'bad choice field list' + optarrsub = mupdf.pdf_array_push_array( optarr, 2) + mupdf.pdf_array_push_text_string( optarrsub, opt1) + mupdf.pdf_array_push_text_string( optarrsub, opt2) + mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr) + + +def JM_set_field_type(doc, obj, type): + ''' + Set the field type + ''' + setbits = 0 + clearbits = 0 + typename = None + if type == mupdf.PDF_WIDGET_TYPE_BUTTON: + typename = PDF_NAME('Btn') + setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON + elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: + typename = PDF_NAME('Btn') + clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON + setbits = mupdf.PDF_BTN_FIELD_IS_RADIO + elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX: + typename = PDF_NAME('Btn') + clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO) + elif type == mupdf.PDF_WIDGET_TYPE_TEXT: + typename = PDF_NAME('Tx') + elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX: + typename = PDF_NAME('Ch') + clearbits = mupdf.PDF_CH_FIELD_IS_COMBO + elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX: + typename = PDF_NAME('Ch') + setbits = mupdf.PDF_CH_FIELD_IS_COMBO + elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE: + typename = PDF_NAME('Sig') + + if typename is not None and typename.m_internal: + mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename) + + if setbits != 0 or clearbits != 0: + bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff')) + bits &= ~clearbits + bits |= setbits + mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits) + + +def JM_set_object_value(obj, key, value): + ''' + Set a PDF dict key to some value + ''' + eyecatcher = "fitz: replace me!" + pdf = mupdf.pdf_get_bound_document(obj) + # split PDF key at path seps and take last key part + list_ = key.split('/') + len_ = len(list_) + i = len_ - 1 + skey = list_[i] + + del list_[i] # del the last sub-key + len_ = len(list_) # remaining length + testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists + if not testkey.m_internal: + #No, it will be created here. But we cannot allow this happening if + #indirect objects are referenced. So we check all higher level + #sub-paths for indirect references. + while len_ > 0: + t = '/'.join(list_) # next high level + if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))): + raise Exception("path to '%s' has indirects", JM_StrAsChar(skey)) + del list_[len_ - 1] # del last sub-key + len_ = len(list_) # remaining length + # Insert our eyecatcher. Will create all sub-paths in the chain, or + # respectively remove old value of key-path. + mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher)) + testkey = mupdf.pdf_dict_getp(obj, key) + if not mupdf.pdf_is_string(testkey): + raise Exception("cannot insert value for '%s'", key) + temp = mupdf.pdf_to_text_string(testkey) + if temp != eyecatcher: + raise Exception("cannot insert value for '%s'", key) + # read the result as a string + res = JM_object_to_buffer(obj, 1, 0) + objstr = JM_EscapeStrFromBuffer(res) + + # replace 'eyecatcher' by desired 'value' + nullval = "/%s(%s)" % ( skey, eyecatcher) + newval = "/%s %s" % (skey, value) + newstr = objstr.replace(nullval, newval, 1) + + # make PDF object from resulting string + new_obj = JM_pdf_obj_from_str(pdf, newstr) + return new_obj + + +def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked): + if basestate: + mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate) + + if on is not None: + mupdf.pdf_dict_del( conf, PDF_NAME('ON')) + if on: + arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1) + JM_set_ocg_arrays_imp( arr, on) + if off is not None: + mupdf.pdf_dict_del( conf, PDF_NAME('OFF')) + if off: + arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1) + JM_set_ocg_arrays_imp( arr, off) + if locked is not None: + mupdf.pdf_dict_del( conf, PDF_NAME('Locked')) + if locked: + arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1) + JM_set_ocg_arrays_imp( arr, locked) + if rbgroups is not None: + mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups')) + if rbgroups: + arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1) + n =len(rbgroups) + for i in range(n): + item0 = rbgroups[i] + obj = mupdf.pdf_array_push_array( arr, 1) + JM_set_ocg_arrays_imp( obj, item0) + + +def JM_set_ocg_arrays_imp(arr, list_): + ''' + Set OCG arrays from dict of Python lists + Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list} + ''' + pdf = mupdf.pdf_get_bound_document(arr) + for xref in list_: + obj = mupdf.pdf_new_indirect(pdf, xref, 0) + mupdf.pdf_array_push(arr, obj) + + +def JM_set_resource_property(ref, name, xref): + ''' + Insert an item into Resources/Properties (used for Marked Content) + Arguments: + (1) e.g. page object, Form XObject + (2) marked content name + (3) xref of the referenced object (insert as indirect reference) + ''' + pdf = mupdf.pdf_get_bound_document(ref) + ind = mupdf.pdf_new_indirect(pdf, xref, 0) + if not ind.m_internal: + RAISEPY(MSG_BAD_XREF, PyExc_ValueError) + resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources')) + if not resources.m_internal: + resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1) + properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties')) + if not properties.m_internal: + properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1) + mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind) + + +def JM_set_widget_properties(annot, Widget): + ''' + Update the PDF form field with the properties from a Python Widget object. + Called by "Page.add_widget" and "Annot.update_widget". + ''' + if isinstance( annot, Annot): + annot = annot.this + assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}' + page = _pdf_annot_page(annot) + assert page.m_internal, 'Annot is not bound to a page' + annot_obj = mupdf.pdf_annot_obj(annot) + pdf = page.doc() + def GETATTR(name): + return getattr(Widget, name, None) + + value = GETATTR("field_type") + field_type = value + + # rectangle -------------------------------------------------------------- + value = GETATTR("rect") + rect = JM_rect_from_py(value) + rot_mat = JM_rotate_page_matrix(page) + rect = mupdf.fz_transform_rect(rect, rot_mat) + mupdf.pdf_set_annot_rect(annot, rect) + + # fill color ------------------------------------------------------------- + value = GETATTR("fill_color") + if value and PySequence_Check(value): + n = len(value) + fill_col = mupdf.pdf_new_array(pdf, n) + col = 0 + for i in range(n): + col = value[i] + mupdf.pdf_array_push_real(fill_col, col) + mupdf.pdf_field_set_fill_color(annot_obj, fill_col) + + # dashes ----------------------------------------------------------------- + value = GETATTR("border_dashes") + if value and PySequence_Check(value): + n = len(value) + dashes = mupdf.pdf_new_array(pdf, n) + for i in range(n): + mupdf.pdf_array_push_int(dashes, value[i]) + mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D')) + + # border color ----------------------------------------------------------- + value = GETATTR("border_color") + if value and PySequence_Check(value): + n = len(value) + border_col = mupdf.pdf_new_array(pdf, n) + col = 0 + for i in range(n): + col = value[i] + mupdf.pdf_array_push_real(border_col, col) + mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC')) + + # entry ignored - may be used later + # + #int text_format = (int) PyInt_AsLong(GETATTR("text_format")); + # + + # field label ----------------------------------------------------------- + value = GETATTR("field_label") + if value is not None: + label = JM_StrAsChar(value) + mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label) + + # field name ------------------------------------------------------------- + value = GETATTR("field_name") + if value is not None: + name = JM_StrAsChar(value) + old_name = mupdf.pdf_load_field_name(annot_obj) + if name != old_name: + mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name) + + # max text len ----------------------------------------------------------- + if field_type == mupdf.PDF_WIDGET_TYPE_TEXT: + value = GETATTR("text_maxlen") + text_maxlen = value + if text_maxlen: + mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen) + value = GETATTR("field_display") + d = value + mupdf.pdf_field_set_display(annot_obj, d) + + # choice values ---------------------------------------------------------- + if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX): + value = GETATTR("choice_values") + JM_set_choice_options(annot, value) + + # border style ----------------------------------------------------------- + value = GETATTR("border_style") + val = JM_get_border_style(value) + mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S')) + + # border width ----------------------------------------------------------- + value = GETATTR("border_width") + border_width = value + mupdf.pdf_dict_putl( + annot_obj, + mupdf.pdf_new_real(border_width), + PDF_NAME('BS'), + PDF_NAME('W'), + ) + + # /DA string ------------------------------------------------------------- + value = GETATTR("_text_da") + da = JM_StrAsChar(value) + mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da) + mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF + mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF + + # field flags ------------------------------------------------------------ + field_flags = GETATTR("field_flags") + if field_flags is not None: + if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX: + field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO + elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: + field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO + elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON: + field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON + mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags) + + # button caption --------------------------------------------------------- + value = GETATTR("button_caption") + ca = JM_StrAsChar(value) + if ca: + mupdf.pdf_field_set_button_caption(annot_obj, ca) + + # script (/A) ------------------------------------------------------- + value = GETATTR("script") + JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value) + + # script (/AA/K) ------------------------------------------------------- + value = GETATTR("script_stroke") + JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value) + + # script (/AA/F) ------------------------------------------------------- + value = GETATTR("script_format") + JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value) + + # script (/AA/V) ------------------------------------------------------- + value = GETATTR("script_change") + JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value) + + # script (/AA/C) ------------------------------------------------------- + value = GETATTR("script_calc") + JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value) + + # script (/AA/Bl) ------------------------------------------------------- + value = GETATTR("script_blur") + JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value) + + # script (/AA/Fo) codespell:ignore -------------------------------------- + value = GETATTR("script_focus") + JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value) + + # field value ------------------------------------------------------------ + value = GETATTR("field_value") # field value + text = JM_StrAsChar(value) # convert to text (may fail!) + if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON: + if not value: + mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1) + mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off") + else: + # TODO check if another button in the group is ON and if so set it Off + onstate = mupdf.pdf_button_field_on_state(annot_obj) + if onstate.m_internal: + on = mupdf.pdf_to_name(onstate) + mupdf.pdf_set_field_value(pdf, annot_obj, on, 1) + mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on) + elif text: + mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text) + elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX: + onstate = mupdf.pdf_button_field_on_state(annot_obj) + on = onstate.pdf_to_name() + if value in (True, on) or text == 'Yes': + mupdf.pdf_set_field_value(pdf, annot_obj, on, 1) + mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on) + mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on) + else: + mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off') + mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off') + else: + if text: + mupdf.pdf_set_field_value(pdf, annot_obj, text, 1) + if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX): + mupdf.pdf_dict_del(annot_obj, PDF_NAME('I')) + mupdf.pdf_dirty_annot(annot) + mupdf.pdf_set_annot_hot(annot, 1) + mupdf.pdf_set_annot_active(annot, 1) + mupdf.pdf_update_annot(annot) + + +def JM_show_string_cs( + text, + user_font, + trm, + s, + wmode, + bidi_level, + markup_dir, + language, + ): + i = 0 + while i < len(s): + l, ucs = mupdf.fz_chartorune(s[i:]) + i += l + gid = mupdf.fz_encode_character_sc(user_font, ucs) + if gid == 0: + gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language) + else: + font = user_font + mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language) + adv = mupdf.fz_advance_glyph(font, gid, wmode) + if wmode == 0: + trm = mupdf.fz_pre_translate(trm, adv, 0) + else: + trm = mupdf.fz_pre_translate(trm, 0, -adv) + return trm + + +def JM_UnicodeFromBuffer(buff): + buff_bytes = mupdf.fz_buffer_extract_copy(buff) + val = buff_bytes.decode(errors='replace') + z = val.find(chr(0)) + if z >= 0: + val = val[:z] + return val + + +def message_warning(text): + ''' + Generate a warning. + ''' + message(f'warning: {text}') + + +def JM_update_stream(doc, obj, buffer_, compress): + ''' + update a stream object + compress stream when beneficial + ''' + if compress: + length, _ = mupdf.fz_buffer_storage(buffer_) + if length > 30: # ignore small stuff + buffer_compressed = JM_compress_buffer(buffer_) + assert isinstance(buffer_compressed, mupdf.FzBuffer) + if buffer_compressed.m_internal: + length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed) + if length_compressed < length: # was it worth the effort? + mupdf.pdf_dict_put( + obj, + mupdf.PDF_ENUM_NAME_Filter, + mupdf.PDF_ENUM_NAME_FlateDecode, + ) + mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1) + return + + mupdf.pdf_update_stream(doc, obj, buffer_, 0) + + +def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap): + ''' + Make an XObject from a PDF page + For a positive xref assume that its object can be used instead + ''' + assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}' + if xref > 0: + xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0) + else: + srcpage = _as_pdf_page(fsrcpage.this) + spageref = srcpage.obj() + mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox'))) + # Deep-copy resources object of source page + o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources')) + if gmap.m_internal: + # use graftmap when possible + resources = mupdf.pdf_graft_mapped_object(gmap, o) + else: + resources = mupdf.pdf_graft_object(pdfout, o) + + # get spgage contents source + res = JM_read_contents(spageref) + + #------------------------------------------------------------- + # create XObject representing the source page + #------------------------------------------------------------- + xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res) + # store spage contents + JM_update_stream(pdfout, xobj1, res, 1) + + # store spage resources + mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources) + return xobj1 + + +def PySequence_Check(s): + return isinstance(s, (tuple, list)) + + +def PySequence_Size(s): + return len(s) + + +# constants: error messages. These are also in extra.i. +# +MSG_BAD_ANNOT_TYPE = "bad annot type" +MSG_BAD_APN = "bad or missing annot AP/N" +MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs" +MSG_BAD_ARG_POINTS = "bad seq of points" +MSG_BAD_BUFFER = "bad type: 'buffer'" +MSG_BAD_COLOR_SEQ = "bad color sequence" +MSG_BAD_DOCUMENT = "cannot open broken document" +MSG_BAD_FILETYPE = "bad filetype" +MSG_BAD_LOCATION = "bad location" +MSG_BAD_OC_CONFIG = "bad config number" +MSG_BAD_OC_LAYER = "bad layer number" +MSG_BAD_OC_REF = "bad 'oc' reference" +MSG_BAD_PAGEID = "bad page id" +MSG_BAD_PAGENO = "bad page number(s)" +MSG_BAD_PDFROOT = "PDF has no root" +MSG_BAD_RECT = "rect is infinite or empty" +MSG_BAD_TEXT = "bad type: 'text'" +MSG_BAD_XREF = "bad xref" +MSG_COLOR_COUNT_FAILED = "color count failed" +MSG_FILE_OR_BUFFER = "need font file or buffer" +MSG_FONT_FAILED = "cannot create font" +MSG_IS_NO_ANNOT = "is no annotation" +MSG_IS_NO_IMAGE = "is no image" +MSG_IS_NO_PDF = "is no PDF" +MSG_IS_NO_DICT = "object is no PDF dict" +MSG_PIX_NOALPHA = "source pixmap has no alpha" +MSG_PIXEL_OUTSIDE = "pixel(s) outside image" + + +JM_Exc_FileDataError = 'FileDataError' +PyExc_ValueError = 'ValueError' + +def RAISEPY( msg, exc): + #JM_Exc_CurrentException=exc + #fz_throw(context, FZ_ERROR_GENERIC, msg) + raise Exception( msg) + + +def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'): + # FIXED: handle raw unicode escape sequences + if not s: + return "" + if isinstance(s, str): + rc = s.encode("utf8", errors=errors) + elif isinstance(s, bytes): + rc = s[:] + ret = rc.decode('raw_unicode_escape', errors=errors) + return ret + + +def CheckColor(c: OptSeq): + if c: + if ( + type(c) not in (list, tuple) + or len(c) not in (1, 3, 4) + or min(c) < 0 + or max(c) > 1 + ): + raise ValueError("need 1, 3 or 4 color components in range 0 to 1") + + +def CheckFont(page: Page, fontname: str) -> tuple: + """Return an entry in the page's font list if reference name matches. + """ + for f in page.get_fonts(): + if f[4] == fontname: + return f + + +def CheckFontInfo(doc: Document, xref: int) -> list: + """Return a font info if present in the document. + """ + for f in doc.FontInfos: + if xref == f[0]: + return f + + +def CheckMarkerArg(quads: typing.Any) -> tuple: + if CheckRect(quads): + r = Rect(quads) + return (r.quad,) + if CheckQuad(quads): + return (quads,) + for q in quads: + if not (CheckRect(q) or CheckQuad(q)): + raise ValueError("bad quads entry") + return quads + + +def CheckMorph(o: typing.Any) -> bool: + if not bool(o): + return False + if not (type(o) in (list, tuple) and len(o) == 2): + raise ValueError("morph must be a sequence of length 2") + if not (len(o[0]) == 2 and len(o[1]) == 6): + raise ValueError("invalid morph param 0") + if not o[1][4] == o[1][5] == 0: + raise ValueError("invalid morph param 1") + return True + + +def CheckParent(o: typing.Any): + return + if not hasattr(o, "parent") or o.parent is None: + raise ValueError(f"orphaned object {type(o)=}: parent is None") + + +def CheckQuad(q: typing.Any) -> bool: + """Check whether an object is convex, not empty quad-like. + + It must be a sequence of 4 number pairs. + """ + try: + q0 = Quad(q) + except Exception: + if g_exceptions_verbose > 1: exception_info() + return False + return q0.is_convex + + +def CheckRect(r: typing.Any) -> bool: + """Check whether an object is non-degenerate rect-like. + + It must be a sequence of 4 numbers. + """ + try: + r = Rect(r) + except Exception: + if g_exceptions_verbose > 1: exception_info() + return False + return not (r.is_empty or r.is_infinite) + + +def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str: + if not c: + return "" + if hasattr(c, "__float__"): + c = (c,) + CheckColor(c) + if len(c) == 1: + s = _format_g(c[0]) + " " + return s + "G " if f == "c" else s + "g " + + if len(c) == 3: + s = _format_g(tuple(c)) + " " + return s + "RG " if f == "c" else s + "rg " + + s = _format_g(tuple(c)) + " " + return s + "K " if f == "c" else s + "k " + + +def Page__add_text_marker(self, quads, annot_type): + pdfpage = self._pdf_page() + rotation = JM_page_rotation(pdfpage) + def final(): + if rotation != 0: + mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation) + try: + if rotation != 0: + mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0) + annot = mupdf.pdf_create_annot(pdfpage, annot_type) + for item in quads: + q = JM_quad_from_py(item) + mupdf.pdf_add_annot_quad_point(annot, q) + mupdf.pdf_update_annot(annot) + JM_add_annot_id(annot, "A") + final() + except Exception: + if g_exceptions_verbose: exception_info() + final() + return + return Annot(annot) + + +def PDF_NAME(x): + assert isinstance(x, str) + ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}') + # Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In + # the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as + # explicit, but this seems to be ignored by SWIG. If SWIG started to + # generate code that respected `explicit`, we would need to do `return + # mupdf.PdfObj(ret)`. + # + # [Compare with extra.i, where we define our own PDF_NAME2() macro that + # returns a mupdf::PdfObj.] + return ret + + +def UpdateFontInfo(doc: Document, info: typing.Sequence): + xref = info[0] + found = False + for i, fi in enumerate(doc.FontInfos): + if fi[0] == xref: + found = True + break + if found: + doc.FontInfos[i] = info + else: + doc.FontInfos.append(info) + + +def args_match(args, *types): + ''' + Returns true if matches . + + Each item in is a type or tuple of types. Any of these types will + match an item in . `None` will match anything in . `type(None)` + will match an arg whose value is `None`. + ''' + j = 0 + for i in range(len(types)): + type_ = types[i] + if j >= len(args): + if isinstance(type_, tuple) and None in type_: + # arg is missing but has default value. + continue + else: + return False + if type_ is not None and not isinstance(args[j], type_): + return False + j += 1 + if j != len(args): + return False + return True + + +def calc_image_matrix(width, height, tr, rotate, keep): + ''' + # compute image insertion matrix + ''' + trect = JM_rect_from_py(tr) + rot = mupdf.fz_rotate(rotate) + trw = trect.x1 - trect.x0 + trh = trect.y1 - trect.y0 + w = trw + h = trh + if keep: + large = max(width, height) + fw = width / large + fh = height / large + else: + fw = fh = 1 + small = min(fw, fh) + if rotate != 0 and rotate != 180: + f = fw + fw = fh + fh = f + if fw < 1: + if trw / fw > trh / fh: + w = trh * small + h = trh + else: + w = trw + h = trw / small + elif fw != fh: + if trw / fw > trh / fh: + w = trh / small + h = trh + else: + w = trw + h = trw * small + else: + w = trw + h = trh + tmp = mupdf.fz_make_point( + (trect.x0 + trect.x1) / 2, + (trect.y0 + trect.y1) / 2, + ) + mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5) + mat = mupdf.fz_concat(mat, rot) + mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h)) + mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y)) + return mat + + +def detect_super_script(line, ch): + if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0: + return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1 + return 0 + + +def dir_str(x): + ret = f'{x} {type(x)} ({len(dir(x))}):\n' + for i in dir(x): + ret += f' {i}\n' + return ret + + +def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str: + """ Return a PDF string enclosed in [] brackets, suitable for the PDF TJ + operator. + + Notes: + The input string is converted to either 2 or 4 hex digits per character. + Args: + simple: no glyphs: 2-chars, use char codes as the glyph + glyphs: 2-chars, use glyphs instead of char codes (Symbol, + ZapfDingbats) + not simple: ordering < 0: 4-chars, use glyphs not char codes + ordering >=0: a CJK font! 4 chars, use char codes as glyphs + """ + if text.startswith("[<") and text.endswith(">]"): # already done + return text + + if not bool(text): + return "[<>]" + + if simple: # each char or its glyph is coded as a 2-byte hex + if glyphs is None: # not Symbol, not ZapfDingbats: use char code + otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text]) + else: # Symbol or ZapfDingbats: use glyphs + otxt = "".join( + ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text] + ) + return "[<" + otxt + ">]" + + # non-simple fonts: each char or its glyph is coded as 4-byte hex + if ordering < 0: # not a CJK font: use the glyphs + otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text]) + else: # CJK: use the char codes + otxt = "".join(["%04x" % ord(c) for c in text]) + + return "[<" + otxt + ">]" + + +def get_pdf_str(s: str) -> str: + """ Return a PDF string depending on its coding. + + Notes: + Returns a string bracketed with either "()" or "<>" for hex values. + If only ascii then "(original)" is returned, else if only 8 bit chars + then "(original)" with interspersed octal strings \nnn is returned, + else a string "" is returned, where [hexstring] is the + UTF-16BE encoding of the original. + """ + if not bool(s): + return "()" + + def make_utf16be(s): + r = bytearray([254, 255]) + bytearray(s, "UTF-16BE") + return "<" + r.hex() + ">" # brackets indicate hex + + # The following either returns the original string with mixed-in + # octal numbers \nnn for chars outside the ASCII range, or returns + # the UTF-16BE BOM version of the string. + r = "" + for c in s: + oc = ord(c) + if oc > 255: # shortcut if beyond 8-bit code range + return make_utf16be(s) + + if oc > 31 and oc < 127: # in ASCII range + if c in ("(", ")", "\\"): # these need to be escaped + r += "\\" + r += c + continue + + if oc > 127: # beyond ASCII + r += "\\%03o" % oc + continue + + # now the white spaces + if oc == 8: # backspace + r += "\\b" + elif oc == 9: # tab + r += "\\t" + elif oc == 10: # line feed + r += "\\n" + elif oc == 12: # form feed + r += "\\f" + elif oc == 13: # carriage return + r += "\\r" + else: + r += "\\267" # unsupported: replace by 0xB7 + + return "(" + r + ")" + + +def get_tessdata(tessdata=None): + """Detect Tesseract language support folder. + + This function is used to enable OCR via Tesseract even if the language + support folder is not specified directly or in environment variable + TESSDATA_PREFIX. + + * If is set we return it directly. + + * Otherwise we return `os.environ['TESSDATA_PREFIX']` if set. + + * Otherwise we search for a Tesseract installation and return its language + support folder. + + * Otherwise we raise an exception. + """ + if tessdata: + return tessdata + tessdata = os.getenv("TESSDATA_PREFIX") + if tessdata: # use environment variable if set + return tessdata + + # Try to locate the tesseract-ocr installation. + + import subprocess + + cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True) + if cp.returncode == 0: + m = re.search('List of available languages in "(.+)"', cp.stdout) + if m: + tessdata = m.group(1) + return tessdata + + # Windows systems: + if sys.platform == "win32": + cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True) + response = cp.stdout.strip() + if cp.returncode or not response: + raise RuntimeError("No tessdata specified and Tesseract is not installed") + dirname = os.path.dirname(response) # path of tesseract.exe + tessdata = os.path.join(dirname, "tessdata") # language support + if os.path.exists(tessdata): # all ok? + return tessdata + else: # should not happen! + raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder") + + # Unix-like systems: + attempts = list() + for path in 'tesseract-ocr', 'tesseract': + cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True) + if cp.returncode == 0: + response = cp.stdout.strip().split() + if len(response) == 2: + # search tessdata in folder structure + dirname = response[1] # contains tesseract-ocr installation folder + pattern = f"{dirname}/*/tessdata" + attempts.append(pattern) + tessdatas = glob.glob(pattern) + tessdatas.sort() + if tessdatas: + return tessdatas[-1] + if attempts: + text = 'No tessdata specified and no match for:\n' + for attempt in attempts: + text += f' {attempt}' + raise RuntimeError(text) + else: + raise RuntimeError('No tessdata specified and Tesseract is not installed') + + +def css_for_pymupdf_font( + fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None +) -> str: + """Create @font-face items for the given fontcode of pymupdf-fonts. + + Adds @font-face support for fonts contained in package pymupdf-fonts. + + Creates a CSS font-family for all fonts starting with string 'fontcode'. + + Note: + The font naming convention in package pymupdf-fonts is "fontcode", + where the suffix "sf" is either empty or one of "it", "bo" or "bi". + These suffixes thus represent the regular, italic, bold or bold-italic + variants of a font. For example, font code "notos" refers to fonts + "notos" - "Noto Sans Regular" + "notosit" - "Noto Sans Italic" + "notosbo" - "Noto Sans Bold" + "notosbi" - "Noto Sans Bold Italic" + + This function creates four CSS @font-face definitions and collectively + assigns the font-family name "notos" to them (or the "name" value). + + All fitting font buffers of the pymupdf-fonts package are placed / added + to the archive provided as parameter. + To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct + font weight (bold) or style (italic) will automatically be selected. + Expects and returns the CSS source, with the new CSS definitions appended. + + Args: + fontcode: (str) font code for naming the font variants to include. + E.g. "fig" adds notos, notosi, notosb, notosbi fonts. + A maximum of 4 font variants is accepted. + CSS: (str) CSS string to add @font-face definitions to. + archive: (Archive, mandatory) where to place the font buffers. + name: (str) use this as family-name instead of 'fontcode'. + Returns: + Modified CSS, with appended @font-face statements for each font variant + of fontcode. + Fontbuffers associated with "fontcode" will be added to 'archive'. + """ + # @font-face template string + CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n" + + if not type(archive) is Archive: + raise ValueError("'archive' must be an Archive") + if CSS is None: + CSS = "" + + # select font codes starting with the pass-in string + font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)] + if font_keys == []: + raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.") + if len(font_keys) > 4: + raise ValueError("fontcode too short") + if name is None: # use this name for font-family + name = fontcode + + for fkey in font_keys: + font = fitz_fontdescriptors[fkey] + bold = font["bold"] # determine font property + italic = font["italic"] # determine font property + fbuff = font["loader"]() # load the fontbuffer + archive.add(fbuff, fkey) # update the archive + bold_text = "font-weight: bold;" if bold else "" + italic_text = "font-style: italic;" if italic else "" + CSS += CSSFONT % (name, fkey, bold_text, italic_text) + return CSS + + +def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float: + """Calculate length of a string for a built-in font. + + Args: + fontname: name of the font. + fontsize: font size points. + encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic. + Returns: + (float) length of text. + """ + fontname = fontname.lower() + basename = Base14_fontdict.get(fontname, None) + + glyphs = None + if basename == "Symbol": + glyphs = symbol_glyphs + if basename == "ZapfDingbats": + glyphs = zapf_glyphs + if glyphs is not None: + w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text]) + return w * fontsize + + if fontname in Base14_fontdict.keys(): + return util_measure_string( + text, Base14_fontdict[fontname], fontsize, encoding + ) + + if fontname in ( + "china-t", + "china-s", + "china-ts", + "china-ss", + "japan", + "japan-s", + "korea", + "korea-s", + ): + return len(text) * fontsize + + raise ValueError("Font '%s' is unsupported" % fontname) + + +def image_profile(img: ByteString) -> dict: + """ Return basic properties of an image. + + Args: + img: bytes, bytearray, io.BytesIO object or an opened image file. + Returns: + A dictionary with keys width, height, colorspace.n, bpc, type, ext and size, + where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable + file extension. + """ + if type(img) is io.BytesIO: + stream = img.getvalue() + elif hasattr(img, "read"): + stream = img.read() + elif type(img) in (bytes, bytearray): + stream = img + else: + raise ValueError("bad argument 'img'") + + return TOOLS.image_profile(stream) + + +def jm_append_merge(dev): + ''' + Append current path to list or merge into last path of the list. + (1) Append if first path, different item lists or not a 'stroke' version + of previous path + (2) If new path has the same items, merge its content into previous path + and change path["type"] to "fs". + (3) If "out" is callable, skip the previous and pass dictionary to it. + ''' + #log(f'{getattr(dev, "pathdict", None)=}') + assert isinstance(dev.out, list) + #log( f'{dev.out=}') + + if callable(dev.method) or dev.method: # function or method + # callback. + if dev.method is None: + # fixme, this surely cannot happen? + assert 0 + #resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL) + else: + #log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}') + resp = getattr(dev.out, dev.method)(dev.pathdict) + if not resp: + message("calling cdrawings callback function/method failed!") + dev.pathdict = None + return + + def append(): + #log(f'jm_append_merge(): clearing dev.pathdict') + dev.out.append(dev.pathdict.copy()) + dev.pathdict.clear() + assert isinstance(dev.out, list) + len_ = len(dev.out) # len of output list so far + #log('{len_=}') + if len_ == 0: # always append first path + return append() + #log(f'{getattr(dev, "pathdict", None)=}') + thistype = dev.pathdict[ dictkey_type] + #log(f'{thistype=}') + if thistype != 's': # if not stroke, then append + return append() + prev = dev.out[ len_-1] # get prev path + #log( f'{prev=}') + prevtype = prev[ dictkey_type] + #log( f'{prevtype=}') + if prevtype != 'f': # if previous not fill, append + return append() + # last check: there must be the same list of items for "f" and "s". + previtems = prev[ dictkey_items] + thisitems = dev.pathdict[ dictkey_items] + if previtems != thisitems: + return append() + + #rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override + try: + for k, v in dev.pathdict.items(): + if k not in prev: + prev[k] = v + rc = 0 + except Exception: + if g_exceptions_verbose: exception_info() + #raise + rc = -1 + if rc == 0: + prev[ dictkey_type] = 'fs' + dev.pathdict.clear() + else: + message("could not merge stroke and fill path") + append() + + +def jm_bbox_add_rect( dev, ctx, rect, code): + if not dev.layers: + dev.result.append( (code, JM_py_from_rect(rect))) + else: + dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name)) + + +def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params): + r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) + r = mupdf.ll_fz_transform_rect( r.internal(), ctm) + jm_bbox_add_rect( dev, ctx, r, "fill-image") + + +def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params): + try: + jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask") + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params): + even_odd = True if even_odd else False + try: + jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path") + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params): + try: + jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade") + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args): + try: + jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text") + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_bbox_fill_text( dev, ctx, text, ctm, *args): + try: + jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text") + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_bbox_ignore_text( dev, ctx, text, ctm): + jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text") + + +def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params): + try: + jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path") + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_checkquad(dev): + ''' + Check whether the last 4 lines represent a quad. + Because of how we count, the lines are a polyline already, i.e. last point + of a line equals 1st point of next line. + So we check for a polygon (last line's end point equals start point). + If not true we return 0. + ''' + #log(f'{getattr(dev, "pathdict", None)=}') + items = dev.pathdict[ dictkey_items] + len_ = len(items) + f = [0] * 8 # coordinates of the 4 corners + # fill the 8 floats in f, start from items[-4:] + for i in range( 4): # store line start points + line = items[ len_ - 4 + i] + temp = JM_point_from_py( line[1]) + f[i * 2] = temp.x + f[i * 2 + 1] = temp.y + lp = JM_point_from_py( line[ 2]) + if lp.x != f[0] or lp.y != f[1]: + # not a polygon! + #dev.linecount -= 1 + return 0 + + # we have detected a quad + dev.linecount = 0 # reset this + # a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items + # are pairs of floats representing a quad corner each. + + # relationship of float array to quad points: + # (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr + q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5]) + rect = ('qu', JM_py_from_quad(q)) + + items[ len_ - 4] = rect # replace item -4 by rect + del items[ len_ - 3 : len_] # delete remaining 3 items + return 1 + + +def jm_checkrect(dev): + ''' + Check whether the last 3 path items represent a rectangle. + Returns 1 if we have modified the path, otherwise 0. + ''' + #log(f'{getattr(dev, "pathdict", None)=}') + dev.linecount = 0 # reset line count + orientation = 0 # area orientation of rectangle + items = dev.pathdict[ dictkey_items] + len_ = len(items) + + line0 = items[ len_ - 3] + ll = JM_point_from_py( line0[ 1]) + lr = JM_point_from_py( line0[ 2]) + + # no need to extract "line1"! + line2 = items[ len_ - 1] + ur = JM_point_from_py( line2[ 1]) + ul = JM_point_from_py( line2[ 2]) + + # Assumption: + # When decomposing rects, MuPDF always starts with a horizontal line, + # followed by a vertical line, followed by a horizontal line. + # First line: (ll, lr), third line: (ul, ur). + # If 1st line is below 3rd line, we record anti-clockwise (+1), else + # clockwise (-1) orientation. + + if (0 + or ll.y != lr.y + or ll.x != ul.x + or ur.y != ul.y + or ur.x != lr.x + ): + return 0 # not a rectangle + + # we have a rect, replace last 3 "l" items by one "re" item. + if ul.y < lr.y: + r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y) + orientation = 1 + else: + r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y) + orientation = -1 + + rect = ( 're', JM_py_from_rect(r), orientation) + items[ len_ - 3] = rect # replace item -3 by rect + del items[ len_ - 2 : len_] # delete remaining 2 items + return 1 + + +def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno): + span = text.head + while 1: + if not span: + break + jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno) + span = span.next + + +def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno): + ''' + jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno) + ''' + out_font = None + assert isinstance( span, mupdf.fz_text_span) + span = mupdf.FzTextSpan( span) + assert isinstance( ctm, mupdf.fz_matrix) + ctm = mupdf.FzMatrix( ctm) + fontname = JM_font_name( span.font()) + #float rgb[3]; + #PyObject *chars = PyTuple_New(span->len); + + mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix + dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction + fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size + + dir = mupdf.fz_normalize_vector(dir) + + space_adv = 0 + asc = JM_font_ascender( span.font()) + dsc = JM_font_descender( span.font()) + if asc < 1e-3: # probably Tesseract font + dsc = -0.1 + asc = 0.9 + + # compute effective ascender / descender + ascsize = asc * fsize / (asc - dsc) + dscsize = dsc * fsize / (asc - dsc) + fflags = 0 # font flags + mono = mupdf.fz_font_is_monospaced( span.font()) + fflags += mono * TEXT_FONT_MONOSPACED + fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC + fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED + fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD + + last_adv = 0 + + # walk through characters of span + span_bbox = mupdf.FzRect() + rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0) + if dir.x == -1: # left-right flip + rot.d = 1 + + chars = [] + for i in range( span.m_internal.len): + adv = 0 + if span.items(i).gid >= 0: + adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode) + adv *= fsize + last_adv = adv + if span.items(i).ucs == 32: + space_adv = adv + char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y) + char_orig = mupdf.fz_transform_point(char_orig, ctm) + m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y) + m1 = mupdf.fz_concat(m1, rot) + m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y)) + x0 = char_orig.x + x1 = x0 + adv + if ( + (mat.d > 0 and (dir.x == 1 or dir.x == -1)) + or + (mat.b != 0 and mat.b == -mat.c) + ): # up-down flip + y0 = char_orig.y + dscsize + y1 = char_orig.y + ascsize + else: + y0 = char_orig.y - ascsize + y1 = char_orig.y - dscsize + char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1) + char_bbox = mupdf.fz_transform_rect(char_bbox, m1) + chars.append( + ( + span.items(i).ucs, + span.items(i).gid, + ( + char_orig.x, + char_orig.y, + ), + ( + char_bbox.x0, + char_bbox.y0, + char_bbox.x1, + char_bbox.y1, + ), + ) + ) + if i > 0: + span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox) + else: + span_bbox = char_bbox + chars = tuple(chars) + + if not space_adv: + if not (fflags & TEXT_FONT_MONOSPACED): + c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0) + space_adv = mupdf.fz_advance_glyph( + span.font(), + c, + span.m_internal.wmode, + ) + space_adv *= fsize + if not space_adv: + space_adv = last_adv + else: + space_adv = last_adv # for mono, any char width suffices + + # make the span dictionary + span_dict = dict() + span_dict[ 'dir'] = JM_py_from_point(dir) + span_dict[ 'font'] = JM_EscapeStrFromStr(fontname) + span_dict[ 'wmode'] = span.m_internal.wmode + span_dict[ 'flags'] =fflags + span_dict[ "bidi_lvl"] =span.m_internal.bidi_level + span_dict[ "bidi_dir"] = span.m_internal.markup_dir + span_dict[ 'ascender'] = asc + span_dict[ 'descender'] = dsc + span_dict[ 'colorspace'] = 3 + + if colorspace: + rgb = mupdf.fz_convert_color( + mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)), + color, + mupdf.fz_device_rgb(), + mupdf.FzColorspace(), + mupdf.FzColorParams(), + ) + rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items. + else: + rgb = (0, 0, 0) + + if dev.linewidth > 0: # width of character border + linewidth = dev.linewidth + else: + linewidth = fsize * 0.05 # default: 5% of font size + #log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}') + + span_dict[ 'color'] = rgb + span_dict[ 'size'] = fsize + span_dict[ "opacity"] = alpha + span_dict[ "linewidth"] = linewidth + span_dict[ "spacewidth"] = space_adv + span_dict[ 'type'] = type_ + span_dict[ 'bbox'] = JM_py_from_rect(span_bbox) + span_dict[ 'layer'] = dev.layer_name + span_dict[ "seqno"] = seqno + span_dict[ 'chars'] = chars + #log(f'{span_dict=}') + dev.out.append( span_dict) + + +def jm_lineart_color(colorspace, color): + #log(f' ') + if colorspace: + try: + # Need to be careful to use a named Python object to ensure + # that the `params` we pass to mupdf.ll_fz_convert_color() is + # valid. E.g. doing: + # + # rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal()) + # + # - seems to end up with a corrupted `params`. + # + cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB) + cp = mupdf.FzColorParams() + rgb = mupdf.ll_fz_convert_color( + colorspace, + color, + cs.m_internal, + None, + cp.internal(), + ) + except Exception: + if g_exceptions_verbose: exception_info() + raise + return rgb[:3] + return () + + +def jm_lineart_drop_device(dev, ctx): + if isinstance(dev.out, list): + dev.out = [] + dev.scissors = [] + + +def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params): + #log(f'{getattr(dev, "pathdict", None)=}') + #log(f'jm_lineart_fill_path(): {dev.seqno=}') + even_odd = True if even_odd else False + try: + assert isinstance( ctm, mupdf.fz_matrix) + dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm); + dev.path_type = trace_device_FILL_PATH + jm_lineart_path( dev, ctx, path) + if dev.pathdict is None: + return + #item_count = len(dev.pathdict[ dictkey_items]) + #if item_count == 0: + # return + dev.pathdict[ dictkey_type] ="f" + dev.pathdict[ "even_odd"] = even_odd + dev.pathdict[ "fill_opacity"] = alpha + #log(f'setting dev.pathdict[ "closePath"] to false') + #dev.pathdict[ "closePath"] = False + dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color) + dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect) + dev.pathdict[ "seqno"] = dev.seqno + #jm_append_merge(dev) + dev.pathdict[ 'layer'] = dev.layer_name + if dev.clips: + dev.pathdict[ 'level'] = dev.depth + jm_append_merge(dev) + dev.seqno += 1 + #log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}') + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +# There are 3 text trace types: +# 0 - fill text (PDF Tr 0) +# 1 - stroke text (PDF Tr 1) +# 3 - ignore text (PDF Tr 3) + +def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params): + if 0: + log(f'{type(ctx)=} {ctx=}') + log(f'{type(dev)=} {dev=}') + log(f'{type(text)=} {text=}') + log(f'{type(ctm)=} {ctm=}') + log(f'{type(colorspace)=} {colorspace=}') + log(f'{type(color)=} {color=}') + log(f'{type(alpha)=} {alpha=}') + log(f'{type(color_params)=} {color_params=}') + jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno) + dev.seqno += 1 + + +def jm_lineart_ignore_text(dev, text, ctm): + #log(f'{getattr(dev, "pathdict", None)=}') + jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno) + dev.seqno += 1 + + +class Walker(mupdf.FzPathWalker2): + + def __init__(self, dev): + super().__init__() + self.use_virtual_moveto() + self.use_virtual_lineto() + self.use_virtual_curveto() + self.use_virtual_closepath() + self.dev = dev + + def closepath(self, ctx): # trace_close(). + #log(f'Walker(): {self.dev.pathdict=}') + try: + if self.dev.linecount == 3: + if jm_checkrect(self.dev): + #log(f'end1: {self.dev.pathdict=}') + return + self.dev.linecount = 0 # reset # of consec. lines + + if self.dev.havemove: + if self.dev.lastpoint != self.dev.firstpoint: + item = ("l", JM_py_from_point(self.dev.lastpoint), + JM_py_from_point(self.dev.firstpoint)) + self.dev.pathdict[dictkey_items].append(item) + self.dev.lastpoint = self.dev.firstpoint + self.dev.pathdict["closePath"] = False + + else: + #log('setting self.dev.pathdict[ "closePath"] to true') + self.dev.pathdict[ "closePath"] = True + #log(f'end2: {self.dev.pathdict=}') + + self.dev.havemove = 0 + + except Exception: + if g_exceptions_verbose: exception_info() + raise + + def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto(). + #log(f'Walker(): {self.dev.pathdict=}') + try: + self.dev.linecount = 0 # reset # of consec. lines + p1 = mupdf.fz_make_point(x1, y1) + p2 = mupdf.fz_make_point(x2, y2) + p3 = mupdf.fz_make_point(x3, y3) + p1 = mupdf.fz_transform_point(p1, self.dev.ctm) + p2 = mupdf.fz_transform_point(p2, self.dev.ctm) + p3 = mupdf.fz_transform_point(p3, self.dev.ctm) + self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1) + self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2) + self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3) + + list_ = ( + "c", + JM_py_from_point(self.dev.lastpoint), + JM_py_from_point(p1), + JM_py_from_point(p2), + JM_py_from_point(p3), + ) + self.dev.lastpoint = p3 + self.dev.pathdict[ dictkey_items].append( list_) + except Exception: + if g_exceptions_verbose: exception_info() + raise + + def lineto(self, ctx, x, y): # trace_lineto(). + #log(f'Walker(): {self.dev.pathdict=}') + try: + p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm) + self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1) + list_ = ( + 'l', + JM_py_from_point( self.dev.lastpoint), + JM_py_from_point(p1), + ) + self.dev.lastpoint = p1 + items = self.dev.pathdict[ dictkey_items] + items.append( list_) + self.dev.linecount += 1 # counts consecutive lines + if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH: + # shrink to "re" or "qu" item + jm_checkquad(self.dev) + except Exception: + if g_exceptions_verbose: exception_info() + raise + + def moveto(self, ctx, x, y): # trace_moveto(). + if 0 and isinstance(self.dev.pathdict, dict): + log(f'self.dev.pathdict:') + for n, v in self.dev.pathdict.items(): + log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}') + + #log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}') + + try: + #log( '{=dev.ctm type(dev.ctm)}') + self.dev.lastpoint = mupdf.fz_transform_point( + mupdf.fz_make_point(x, y), + self.dev.ctm, + ) + if mupdf.fz_is_infinite_rect( self.dev.pathrect): + self.dev.pathrect = mupdf.fz_make_rect( + self.dev.lastpoint.x, + self.dev.lastpoint.y, + self.dev.lastpoint.x, + self.dev.lastpoint.y, + ) + self.dev.firstpoint = self.dev.lastpoint + self.dev.havemove = 1 + self.dev.linecount = 0 # reset # of consec. lines + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_lineart_path(dev, ctx, path): + ''' + Create the "items" list of the path dictionary + * either create or empty the path dictionary + * reset the end point of the path + * reset count of consecutive lines + * invoke fz_walk_path(), which create the single items + * if no items detected, empty path dict again + ''' + #log(f'{getattr(dev, "pathdict", None)=}') + try: + dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE) + dev.linecount = 0 + dev.lastpoint = mupdf.FzPoint( 0, 0) + dev.pathdict = dict() + dev.pathdict[ dictkey_items] = [] + + # First time we create a Walker instance is slow, e.g. 0.3s, then later + # times run in around 0.01ms. If Walker is defined locally instead of + # globally, each time takes 0.3s. + # + walker = Walker(dev) + # Unlike fz_run_page(), fz_path_walker callbacks are not passed + # a pointer to the struct, instead they get an arbitrary + # void*. The underlying C++ Director callbacks use this void* to + # identify the fz_path_walker instance so in turn we need to pass + # arg=walker.m_internal. + mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal) + # Check if any items were added ... + if not dev.pathdict[ dictkey_items]: + dev.pathdict = None + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params): + #log(f'{dev.pathdict=} {dev.clips=}') + try: + assert isinstance( ctm, mupdf.fz_matrix) + dev.pathfactor = 1 + if ctm.a != 0 and abs(ctm.a) == abs(ctm.d): + dev.pathfactor = abs(ctm.a) + elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c): + dev.pathfactor = abs(ctm.b) + dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm); + dev.path_type = trace_device_STROKE_PATH + + jm_lineart_path( dev, ctx, path) + if dev.pathdict is None: + return + dev.pathdict[ dictkey_type] = 's' + dev.pathdict[ 'stroke_opacity'] = alpha + dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color) + dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth + dev.pathdict[ 'lineCap'] = ( + stroke.start_cap, + stroke.dash_cap, + stroke.end_cap, + ) + dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin + if 'closePath' not in dev.pathdict: + #log('setting dev.pathdict["closePath"] to false') + dev.pathdict['closePath'] = False + + # output the "dashes" string + if stroke.dash_len: + buff = mupdf.fz_new_buffer( 256) + mupdf.fz_append_string( buff, "[ ") # left bracket + for i in range( stroke.dash_len): + # We use mupdf python's SWIG-generated floats_getitem() fn to + # access float *stroke.dash_list[]. + value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i]. + mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ') + mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}') + dev.pathdict[ 'dashes'] = buff + else: + dev.pathdict[ 'dashes'] = '[] 0' + dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect) + dev.pathdict['layer'] = dev.layer_name + dev.pathdict[ 'seqno'] = dev.seqno + if dev.clips: + dev.pathdict[ 'level'] = dev.depth + jm_append_merge(dev) + dev.seqno += 1 + + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor): + if not dev.clips: + return + dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm); + dev.path_type = trace_device_CLIP_PATH + jm_lineart_path(dev, ctx, path) + if dev.pathdict is None: + return + dev.pathdict[ dictkey_type] = 'clip' + dev.pathdict[ 'even_odd'] = bool(even_odd) + if 'closePath' not in dev.pathdict: + #log(f'setting dev.pathdict["closePath"] to False') + dev.pathdict['closePath'] = False + + dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev)) + dev.pathdict['level'] = dev.depth + dev.pathdict['layer'] = dev.layer_name + jm_append_merge(dev) + dev.depth += 1 + + +def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor): + if not dev.clips: + return + dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm); + dev.path_type = trace_device_CLIP_STROKE_PATH + jm_lineart_path(dev, ctx, path) + if dev.pathdict is None: + return + dev.pathdict['dictkey_type'] = 'clip' + dev.pathdict['even_odd'] = None + if 'closePath' not in dev.pathdict: + #log(f'setting dev.pathdict["closePath"] to False') + dev.pathdict['closePath'] = False + dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev)) + dev.pathdict['level'] = dev.depth + dev.pathdict['layer'] = dev.layer_name + jm_append_merge(dev) + dev.depth += 1 + + +def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor): + if not dev.clips: + return + compute_scissor(dev) + dev.depth += 1 + + +def jm_lineart_clip_text(dev, ctx, text, ctm, scissor): + if not dev.clips: + return + compute_scissor(dev) + dev.depth += 1 + + +def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor): + if not dev.clips: + return + compute_scissor(dev) + dev.depth += 1 + + +def jm_lineart_pop_clip(dev, ctx): + if not dev.clips or not dev.scissors: + return + len_ = len(dev.scissors) + if len_ < 1: + return + del dev.scissors[-1] + dev.depth -= 1 + + +def jm_lineart_begin_layer(dev, ctx, name): + if name: + dev.layer_name = name + else: + dev.layer_name = "" + + +def jm_lineart_end_layer(dev, ctx): + dev.layer_name = "" + + +def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha): + #log(f'{dev.pathdict=} {dev.clips=}') + if not dev.clips: + return + dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}", + "type": "group", + "rect": JM_py_from_rect(bbox), + "isolated": bool(isolated), + "knockout": bool(knockout), + "blendmode": mupdf.fz_blendmode_name(blendmode), + "opacity": alpha, + "level": dev.depth, + "layer": dev.layer_name + } + jm_append_merge(dev) + dev.depth += 1 + + +def jm_lineart_end_group(dev, ctx): + #log(f'{dev.pathdict=} {dev.clips=}') + if not dev.clips: + return + dev.depth -= 1 + + +def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params): + jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno) + dev.seqno += 1 + + +def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params): + dev.linewidth = stroke.linewidth + jm_increase_seqno( dev, ctx) + + +def jm_increase_seqno( dev, ctx, *vargs): + try: + dev.seqno += 1 + except Exception: + if g_exceptions_verbose: exception_info() + raise + + +def planish_line(p1: point_like, p2: point_like) -> Matrix: + """Compute matrix which maps line from p1 to p2 to the x-axis, such that it + maintains its length and p1 * matrix = Point(0, 0). + + Args: + p1, p2: point_like + Returns: + Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at + the same distance to Point(0,0). Will always combine a rotation and a + transformation. + """ + p1 = Point(p1) + p2 = Point(p2) + return Matrix(util_hor_matrix(p1, p2)) + + +class JM_image_reporter_Filter(mupdf.PdfFilterOptions2): + def __init__(self): + super().__init__() + self.use_virtual_image_filter() + + def image_filter( self, ctx, ctm, name, image): + assert isinstance(ctm, mupdf.fz_matrix) + JM_image_filter(self, mupdf.FzMatrix(ctm), name, image) + if mupdf_cppyy: + # cppyy doesn't appear to treat returned None as nullptr, + # resulting in obscure 'python exception' exception. + return 0 + + +class JM_new_bbox_device_Device(mupdf.FzDevice2): + def __init__(self, result, layers): + super().__init__() + self.result = result + self.layers = layers + self.layer_name = "" + self.use_virtual_fill_path() + self.use_virtual_stroke_path() + self.use_virtual_fill_text() + self.use_virtual_stroke_text() + self.use_virtual_ignore_text() + self.use_virtual_fill_shade() + self.use_virtual_fill_image() + self.use_virtual_fill_image_mask() + + self.use_virtual_begin_layer() + self.use_virtual_end_layer() + + begin_layer = jm_lineart_begin_layer + end_layer = jm_lineart_end_layer + + fill_path = jm_bbox_fill_path + stroke_path = jm_bbox_stroke_path + fill_text = jm_bbox_fill_text + stroke_text = jm_bbox_stroke_text + ignore_text = jm_bbox_ignore_text + fill_shade = jm_bbox_fill_shade + fill_image = jm_bbox_fill_image + fill_image_mask = jm_bbox_fill_image_mask + + +class JM_new_output_fileptr_Output(mupdf.FzOutput2): + def __init__(self, bio): + super().__init__() + self.bio = bio + self.use_virtual_write() + self.use_virtual_seek() + self.use_virtual_tell() + self.use_virtual_truncate() + + def seek( self, ctx, offset, whence): + return self.bio.seek( offset, whence) + + def tell( self, ctx): + ret = self.bio.tell() + return ret + + def truncate( self, ctx): + return self.bio.truncate() + + def write(self, ctx, data_raw, data_length): + data = mupdf.raw_to_python_bytes(data_raw, data_length) + return self.bio.write(data) + + +def compute_scissor(dev): + ''' + Every scissor of a clip is a sub rectangle of the preceding clip scissor + if the clip level is larger. + ''' + if dev.scissors is None: + dev.scissors = list() + num_scissors = len(dev.scissors) + if num_scissors > 0: + last_scissor = dev.scissors[num_scissors-1] + scissor = JM_rect_from_py(last_scissor) + scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect) + else: + scissor = dev.pathrect + dev.scissors.append(JM_py_from_rect(scissor)) + return scissor + + +class JM_new_lineart_device_Device(mupdf.FzDevice2): + ''' + LINEART device for Python method Page.get_cdrawings() + ''' + #log(f'JM_new_lineart_device_Device()') + def __init__(self, out, clips, method): + #log(f'JM_new_lineart_device_Device.__init__()') + super().__init__() + # fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().". + #self.use_virtual_drop_device() + self.use_virtual_fill_path() + self.use_virtual_stroke_path() + self.use_virtual_clip_path() + self.use_virtual_clip_image_mask() + self.use_virtual_clip_stroke_path() + self.use_virtual_clip_stroke_text() + self.use_virtual_clip_text() + + self.use_virtual_fill_text + self.use_virtual_stroke_text + self.use_virtual_ignore_text + + self.use_virtual_fill_shade() + self.use_virtual_fill_image() + self.use_virtual_fill_image_mask() + + self.use_virtual_pop_clip() + + self.use_virtual_begin_group() + self.use_virtual_end_group() + + self.use_virtual_begin_layer() + self.use_virtual_end_layer() + + self.out = out + self.seqno = 0 + self.depth = 0 + self.clips = clips + self.method = method + + self.scissors = None + self.layer_name = "" # optional content name + self.pathrect = None + + self.linewidth = 0 + self.ptm = mupdf.FzMatrix() + self.ctm = mupdf.FzMatrix() + self.rot = mupdf.FzMatrix() + self.lastpoint = mupdf.FzPoint() + self.firstpoint = mupdf.FzPoint() + self.havemove = 0 + self.pathrect = mupdf.FzRect() + self.pathfactor = 0 + self.linecount = 0 + self.path_type = 0 + + #drop_device = jm_lineart_drop_device + + fill_path = jm_lineart_fill_path + stroke_path = jm_lineart_stroke_path + clip_image_mask = jm_lineart_clip_image_mask + clip_path = jm_lineart_clip_path + clip_stroke_path = jm_lineart_clip_stroke_path + clip_text = jm_lineart_clip_text + clip_stroke_text = jm_lineart_clip_stroke_text + + fill_text = jm_increase_seqno + stroke_text = jm_increase_seqno + ignore_text = jm_increase_seqno + + fill_shade = jm_increase_seqno + fill_image = jm_increase_seqno + fill_image_mask = jm_increase_seqno + + pop_clip = jm_lineart_pop_clip + + begin_group = jm_lineart_begin_group + end_group = jm_lineart_end_group + + begin_layer = jm_lineart_begin_layer + end_layer = jm_lineart_end_layer + + +class JM_new_texttrace_device(mupdf.FzDevice2): + ''' + Trace TEXT device for Python method Page.get_texttrace() + ''' + + def __init__(self, out): + super().__init__() + self.use_virtual_fill_path() + self.use_virtual_stroke_path() + self.use_virtual_fill_text() + self.use_virtual_stroke_text() + self.use_virtual_ignore_text() + self.use_virtual_fill_shade() + self.use_virtual_fill_image() + self.use_virtual_fill_image_mask() + + self.use_virtual_begin_layer() + self.use_virtual_end_layer() + + self.out = out + + self.seqno = 0 + self.depth = 0 + self.clips = 0 + self.method = None + + self.seqno = 0 + + self.pathdict = dict() + self.scissors = list() + self.linewidth = 0 + self.ptm = mupdf.FzMatrix() + self.ctm = mupdf.FzMatrix() + self.rot = mupdf.FzMatrix() + self.lastpoint = mupdf.FzPoint() + self.pathrect = mupdf.FzRect() + self.pathfactor = 0 + self.linecount = 0 + self.path_type = 0 + self.layer_name = "" + + fill_path = jm_increase_seqno + stroke_path = jm_dev_linewidth + fill_text = jm_lineart_fill_text + stroke_text = jm_lineart_stroke_text + ignore_text = jm_lineart_ignore_text + fill_shade = jm_increase_seqno + fill_image = jm_increase_seqno + fill_image_mask = jm_increase_seqno + + begin_layer = jm_lineart_begin_layer + end_layer = jm_lineart_end_layer + + +def ConversionHeader(i: str, filename: OptStr ="unknown"): + t = i.lower() + import textwrap + html = textwrap.dedent(""" + + + + + + + """) + + xml = textwrap.dedent(""" + + + """ + % filename + ) + + xhtml = textwrap.dedent(""" + + + + + + + + """) + + text = "" + json = '{"document": "%s", "pages": [\n' % filename + if t == "html": + r = html + elif t == "json": + r = json + elif t == "xml": + r = xml + elif t == "xhtml": + r = xhtml + else: + r = text + + return r + + +def ConversionTrailer(i: str): + t = i.lower() + text = "" + json = "]\n}" + html = "\n\n" + xml = "\n" + xhtml = html + if t == "html": + r = html + elif t == "json": + r = json + elif t == "xml": + r = xml + elif t == "xhtml": + r = xhtml + else: + r = text + + return r + + +def annot_preprocess(page: "Page") -> int: + """Prepare for annotation insertion on the page. + + Returns: + Old page rotation value. Temporarily sets rotation to 0 when required. + """ + CheckParent(page) + if not page.parent.is_pdf: + raise ValueError("is no PDF") + old_rotation = page.rotation + if old_rotation != 0: + page.set_rotation(0) + return old_rotation + + +def annot_postprocess(page: "Page", annot: "Annot") -> None: + """Clean up after annotation insertion. + + Set ownership flag and store annotation in page annotation dictionary. + """ + #annot.parent = weakref.proxy(page) + assert isinstance( page, Page) + assert isinstance( annot, Annot) + annot.parent = page + page._annot_refs[id(annot)] = annot + annot.thisown = True + + +def canon(c): + assert isinstance(c, int) + # TODO: proper unicode case folding + # TODO: character equivalence (a matches ä, etc) + if c == 0xA0 or c == 0x2028 or c == 0x2029: + return ord(' ') + if c == ord('\r') or c == ord('\n') or c == ord('\t'): + return ord(' ') + if c >= ord('A') and c <= ord('Z'): + return c - ord('A') + ord('a') + return c + + +def chartocanon(s): + assert isinstance(s, str) + n, c = mupdf.fz_chartorune(s) + c = canon(c) + return n, c + + +def dest_is_valid(o, page_count, page_object_nums, names_list): + p = mupdf.pdf_dict_get( o, PDF_NAME('A')) + if ( + mupdf.pdf_name_eq( + mupdf.pdf_dict_get( p, PDF_NAME('S')), + PDF_NAME('GoTo') + ) + and not string_in_names_list( + mupdf.pdf_dict_get( p, PDF_NAME('D')), + names_list + ) + ): + return 0 + + p = mupdf.pdf_dict_get( o, PDF_NAME('Dest')) + if not p.m_internal: + pass + elif mupdf.pdf_is_string( p): + return string_in_names_list( p, names_list) + elif not dest_is_valid_page( + mupdf.pdf_array_get( p, 0), + page_object_nums, + page_count, + ): + return 0 + return 1 + + +def dest_is_valid_page(obj, page_object_nums, pagecount): + num = mupdf.pdf_to_num(obj) + + if num == 0: + return 0 + for i in range(pagecount): + if page_object_nums[i] == num: + return 1 + return 0 + + +def find_string(s, needle): + assert isinstance(s, str) + for i in range(len(s)): + end = match_string(s[i:], needle) + if end is not None: + end += i + return i, end + return None, None + + +def get_pdf_now() -> str: + ''' + "Now" timestamp in PDF Format + ''' + import time + tz = "%s'%s'" % ( + str(abs(time.altzone // 3600)).rjust(2, "0"), + str((abs(time.altzone // 60) % 60)).rjust(2, "0"), + ) + tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime()) + if time.altzone > 0: + tstamp += "-" + tz + elif time.altzone < 0: + tstamp += "+" + tz + else: + pass + return tstamp + + +class ElementPosition(object): + """Convert a dictionary with element position information to an object.""" + + def __init__(self): + pass + + +def make_story_elpos(): + return ElementPosition() + + +def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list: + """Return rectangles of text lines between two points. + + Notes: + The default of 'start' is top-left of 'clip'. The default of 'stop' + is bottom-reight of 'clip'. + + Args: + start: start point_like + stop: end point_like, must be 'below' start + clip: consider this rect_like only, default is page rectangle + Returns: + List of line bbox intersections with the area established by the + parameters. + """ + # validate and normalize arguments + if clip is None: + clip = page.rect + clip = Rect(clip) + if start is None: + start = clip.tl + if stop is None: + stop = clip.br + clip.y0 = start.y + clip.y1 = stop.y + if clip.is_empty or clip.is_infinite: + return [] + + # extract text of page, clip only, no images, expand ligatures + blocks = page.get_text( + "dict", flags=0, clip=clip, + )["blocks"] + + lines = [] # will return this list of rectangles + for b in blocks: + bbox = Rect(b["bbox"]) + if bbox.is_infinite or bbox.is_empty: + continue + for line in b["lines"]: + bbox = Rect(line["bbox"]) + if bbox.is_infinite or bbox.is_empty: + continue + lines.append(bbox) + + if lines == []: # did not select anything + return lines + + lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions + + # cut off prefix from first line if start point is close to its top + bboxf = lines.pop(0) + if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough? + r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle + if not (r.is_empty or r.is_infinite): + lines.insert(0, r) # insert again if not empty + else: + lines.insert(0, bboxf) # insert again + + if lines == []: # the list might have been emptied + return lines + + # cut off suffix from last line if stop point is close to its bottom + bboxl = lines.pop() + if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough? + r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle + if not (r.is_empty or r.is_infinite): + lines.append(r) # append if not empty + else: + lines.append(bboxl) # append again + + return lines + + +def glyph_name_to_unicode(name: str) -> int: + """Convenience function accessing unicodedata.""" + import unicodedata + try: + unc = ord(unicodedata.lookup(name)) + except Exception: + unc = 65533 + return unc + + +def hdist(dir, a, b): + dx = b.x - a.x + dy = b.y - a.y + return mupdf.fz_abs(dx * dir.x + dy * dir.y) + + +def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list: + """Return a list of (rows x cols) equal sized rectangles. + + Notes: + A utility to fill a given area with table cells of equal size. + Args: + rect: rect_like to use as the table area + rows: number of rows + cols: number of columns + Returns: + A list with items, where each item is a list of + PyMuPDF Rect objects of equal sizes. + """ + rect = Rect(rect) # ensure this is a Rect + if rect.is_empty or rect.is_infinite: + raise ValueError("rect must be finite and not empty") + tl = rect.tl + + height = rect.height / rows # height of one table cell + width = rect.width / cols # width of one table cell + delta_h = (width, 0, width, 0) # diff to next right rect + delta_v = (0, height, 0, height) # diff to next lower rect + + r = Rect(tl, tl.x + width, tl.y + height) # first rectangle + + # make the first row + row = [r] + for i in range(1, cols): + r += delta_h # build next rect to the right + row.append(r) + + # make result, starts with first row + rects = [row] + for i in range(1, rows): + row = rects[i - 1] # take previously appended row + nrow = [] # the new row to append + for r in row: # for each previous cell add its downward copy + nrow.append(r + delta_v) + rects.append(nrow) # append new row to result + + return rects + + +def util_ensure_widget_calc(annot): + ''' + Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO + ''' + annot_obj = mupdf.pdf_annot_obj(annot.this) + pdf = mupdf.pdf_get_bound_document(annot_obj) + PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO) + acro = mupdf.pdf_dict_getl( # get AcroForm dict + mupdf.pdf_trailer(pdf), + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + ) + + CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO + if not mupdf.pdf_is_array(CO): + CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2) + n = mupdf.pdf_array_len(CO) + found = 0 + xref = mupdf.pdf_to_num(annot_obj) + for i in range(n): + nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i)) + if xref == nxref: + found = 1 + break + if not found: + mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0)) + + +def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): + ''' + Helper for initialising rectangle classes. + + 2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which + uses `goto` in ways that don't easily translate to Python. + + Returns (x0, y0, x1, y1) derived from , then override with p0, p1, + x0, y0, x1, y1 if they are not None. + + Accepts following forms for : + () returns all zeros. + (top-left, bottom-right) + (top-left, x1, y1) + (x0, y0, bottom-right) + (x0, y0, x1, y1) + (rect) + + Where top-left and bottom-right are (x, y) or something with .x, .y + members; rect is something with .x0, .y0, .x1, and .y1 members. + + 2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None. + ''' + def get_xy( arg): + if isinstance( arg, (list, tuple)) and len( arg) == 2: + return arg[0], arg[1] + if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)): + return arg.x, arg.y + return None, None + def make_tuple( a): + if isinstance( a, tuple): + return a + if isinstance( a, Point): + return a.x, a.y + elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)): + return a.x0, a.y0, a.x1, a.y1 + if not isinstance( a, (list, tuple)): + a = a, + return a + def handle_args(): + if len(args) == 0: + return 0, 0, 0, 0 + elif len(args) == 1: + arg = args[0] + if isinstance( arg, (list, tuple)) and len( arg) == 2: + p1, p2 = arg + ret = *p1, *p2 + assert len(ret) == 4 + return ret + if isinstance( arg, (list, tuple)) and len( arg) == 3: + a, b, c = arg + a = make_tuple(a) + b = make_tuple(b) + c = make_tuple(c) + ret = *a, *b, *c + assert len(ret) == 4 + return ret + ret = make_tuple( arg) + assert len(ret) == 4, f'{arg=} {ret=}' + return ret + elif len(args) == 2: + ret = get_xy( args[0]) + get_xy( args[1]) + assert len(ret) == 4 + return ret + elif len(args) == 3: + x0, y0 = get_xy( args[0]) + if (x0, y0) != (None, None): + return x0, y0, args[1], args[2] + x1, y1 = get_xy( args[2]) + if (x1, y1) != (None, None): + return args[0], args[1], x1, y1 + elif len(args) == 4: + return args[0], args[1], args[2], args[3] + raise Exception( f'Unrecognised args: {args}') + ret_x0, ret_y0, ret_x1, ret_y1 = handle_args() + if p0 is not None: ret_x0, ret_y0 = get_xy(p0) + if p1 is not None: ret_x1, ret_y1 = get_xy(p1) + if x0 is not None: ret_x0 = x0 + if y0 is not None: ret_y0 = y0 + if x1 is not None: ret_x1 = x1 + if y1 is not None: ret_y1 = y1 + return ret_x0, ret_y0, ret_x1, ret_y1 + + +def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None): + a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1) + def convert(x, ceil): + if ceil: + return int(math.ceil(x)) + else: + return int(math.floor(x)) + a = convert(a, False) + b = convert(b, False) + c = convert(c, True) + d = convert(d, True) + return a, b, c, d + + +def util_round_rect( rect): + return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect))) + + +def util_transform_rect( rect, matrix): + if g_use_extra: + return extra.util_transform_rect( rect, matrix) + return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix))) + + +def util_intersect_rect( r1, r2): + return JM_py_from_rect( + mupdf.fz_intersect_rect( + JM_rect_from_py(r1), + JM_rect_from_py(r2), + ) + ) + + +def util_is_point_in_rect( p, r): + return mupdf.fz_is_point_inside_rect( + JM_point_from_py(p), + JM_rect_from_py(r), + ) + +def util_include_point_in_rect( r, p): + return JM_py_from_rect( + mupdf.fz_include_point_in_rect( + JM_rect_from_py(r), + JM_point_from_py(p), + ) + ) + + +def util_point_in_quad( P, Q): + p = JM_point_from_py(P) + q = JM_quad_from_py(Q) + return mupdf.fz_is_point_inside_quad(p, q) + + +def util_transform_point( point, matrix): + return JM_py_from_point( + mupdf.fz_transform_point( + JM_point_from_py(point), + JM_matrix_from_py(matrix), + ) + ) + + +def util_union_rect( r1, r2): + return JM_py_from_rect( + mupdf.fz_union_rect( + JM_rect_from_py(r1), + JM_rect_from_py(r2), + ) + ) + + +def util_concat_matrix( m1, m2): + return JM_py_from_matrix( + mupdf.fz_concat( + JM_matrix_from_py(m1), + JM_matrix_from_py(m2), + ) + ) + + +def util_invert_matrix(matrix): + if 0: + # Use MuPDF's fz_invert_matrix(). + if isinstance( matrix, (tuple, list)): + matrix = mupdf.FzMatrix( *matrix) + elif isinstance( matrix, mupdf.fz_matrix): + matrix = mupdf.FzMatrix( matrix) + elif isinstance( matrix, Matrix): + matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f) + assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}' + ret = mupdf.fz_invert_matrix( matrix) + if ret == matrix and (0 + or abs( matrix.a - 1) >= sys.float_info.epsilon + or abs( matrix.b - 0) >= sys.float_info.epsilon + or abs( matrix.c - 0) >= sys.float_info.epsilon + or abs( matrix.d - 1) >= sys.float_info.epsilon + ): + # Inversion not possible. + return 1, () + return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f) + # Do inversion in python. + src = JM_matrix_from_py(matrix) + a = src.a + det = a * src.d - src.b * src.c + if det < -sys.float_info.epsilon or det > sys.float_info.epsilon: + dst = mupdf.FzMatrix() + rdet = 1 / det + dst.a = src.d * rdet + dst.b = -src.b * rdet + dst.c = -src.c * rdet + dst.d = a * rdet + a = -src.e * dst.a - src.f * dst.c + dst.f = -src.e * dst.b - src.f * dst.d + dst.e = a + return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f) + + return 1, () + + +def util_measure_string( text, fontname, fontsize, encoding): + font = mupdf.fz_new_base14_font(fontname) + w = 0 + pos = 0 + while pos < len(text): + t, c = mupdf.fz_chartorune(text[pos:]) + pos += t + if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK: + c = mupdf.fz_iso8859_7_from_unicode(c) + elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC: + c = mupdf.fz_windows_1251_from_unicode(c) + else: + c = mupdf.fz_windows_1252_from_unicode(c) + if c < 0: + c = 0xB7 + g = mupdf.fz_encode_character(font, c) + dw = mupdf.fz_advance_glyph(font, g, 0) + w += dw + ret = w * fontsize + return ret + + +def util_sine_between(C, P, Q): + # for points C, P, Q compute the sine between lines CP and QP + c = JM_point_from_py(C) + p = JM_point_from_py(P) + q = JM_point_from_py(Q) + s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y)) + m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y) + m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0) + m1 = mupdf.fz_concat(m1, m2) + c = mupdf.fz_transform_point(c, m1) + c = mupdf.fz_normalize_vector(c) + return c.y + + +def util_hor_matrix(C, P): + ''' + Return the matrix that maps two points C, P to the x-axis such that + C -> (0,0) and the image of P have the same distance. + ''' + c = JM_point_from_py(C) + p = JM_point_from_py(P) + + # compute (cosine, sine) of vector P-C with double precision: + s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y)) + + m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y) + m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0) + return JM_py_from_matrix(mupdf.fz_concat(m1, m2)) + + +def match_string(h0, n0): + h = 0 + n = 0 + e = h + delta_h, hc = chartocanon(h0[h:]) + h += delta_h + delta_n, nc = chartocanon(n0[n:]) + n += delta_n + while hc == nc: + e = h + if hc == ord(' '): + while 1: + delta_h, hc = chartocanon(h0[h:]) + h += delta_h + if hc != ord(' '): + break + else: + delta_h, hc = chartocanon(h0[h:]) + h += delta_h + if nc == ord(' '): + while 1: + delta_n, nc = chartocanon(n0[n:]) + n += delta_n + if nc != ord(' '): + break + else: + delta_n, nc = chartocanon(n0[n:]) + n += delta_n + return None if nc != 0 else e + + +def on_highlight_char(hits, line, ch): + assert hits + assert isinstance(line, mupdf.FzStextLine) + assert isinstance(ch, mupdf.FzStextChar) + vfuzz = ch.m_internal.size * hits.vfuzz + hfuzz = ch.m_internal.size * hits.hfuzz + ch_quad = JM_char_quad(line, ch) + if hits.len > 0: + # fixme: end = hits.quads[-1] + quad = hits.quads[hits.len - 1] + end = JM_quad_from_py(quad) + if ( 1 + and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz + and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz + and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz + and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz + ): + end.ur = ch_quad.ur + end.lr = ch_quad.lr + assert hits.quads[-1] == end + return + hits.quads.append(ch_quad) + hits.len += 1 + + +def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map): + ''' + Deep-copies a source page to the target. + Modified version of function of pdfmerge.c: we also copy annotations, but + we skip some subtypes. In addition we rotate output. + ''' + if g_use_extra: + #log( 'Calling C++ extra.page_merge()') + return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map) + + # list of object types (per page) we want to copy + known_page_objs = [ + PDF_NAME('Contents'), + PDF_NAME('Resources'), + PDF_NAME('MediaBox'), + PDF_NAME('CropBox'), + PDF_NAME('BleedBox'), + PDF_NAME('TrimBox'), + PDF_NAME('ArtBox'), + PDF_NAME('Rotate'), + PDF_NAME('UserUnit'), + ] + page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from) + + # make new page dict in dest doc + page_dict = mupdf.pdf_new_dict(doc_des, 4) + mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page')) + + # copy objects of source page into it + for i in range( len(known_page_objs)): + obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i]) + if obj.m_internal: + #log( '{=type(graft_map) type(graft_map.this)}') + mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj)) + + # Copy annotations, but skip Link, Popup, IRT, Widget types + # If selected, remove dict keys P (parent) and Popup + if copy_annots: + old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots')) + n = mupdf.pdf_array_len( old_annots) + if n > 0: + new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n) + for i in range(n): + o = mupdf.pdf_array_get( old_annots, i) + if not o.m_internal or not mupdf.pdf_is_dict(o): + continue # skip non-dict items + if mupdf.pdf_dict_gets( o, "IRT").m_internal: + continue + subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype')) + if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')): + continue + if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')): + continue + if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')): + continue + mupdf.pdf_dict_del( o, PDF_NAME('Popup')) + mupdf.pdf_dict_del( o, PDF_NAME('P')) + copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o) + annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0) + mupdf.pdf_array_push( new_annots, annot) + + # rotate the page + if rotate != -1: + mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate) + # Now add the page dictionary to dest PDF + ref = mupdf.pdf_add_object( doc_des, page_dict) + + # Insert new page at specified location + mupdf.pdf_insert_page( doc_des, page_to, ref) + + +def paper_rect(s: str) -> Rect: + """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked. + """ + width, height = paper_size(s) + return Rect(0.0, 0.0, width, height) + + +def paper_size(s: str) -> tuple: + """Return a tuple (width, height) for a given paper format string. + + Notes: + 'A4-L' will return (842, 595), the values for A4 landscape. + Suffix '-P' and no suffix return the portrait tuple. + """ + size = s.lower() + f = "p" + if size.endswith("-l"): + f = "l" + size = size[:-2] + if size.endswith("-p"): + size = size[:-2] + rc = paper_sizes().get(size, (-1, -1)) + if f == "p": + return rc + return (rc[1], rc[0]) + + +def paper_sizes(): + """Known paper formats @ 72 dpi as a dictionary. Key is the format string + like "a4" for ISO-A4. Value is the tuple (width, height). + + Information taken from the following web sites: + www.din-formate.de + www.din-formate.info/amerikanische-formate.html + www.directtools.de/wissen/normen/iso.htm + """ + return { + "a0": (2384, 3370), + "a1": (1684, 2384), + "a10": (74, 105), + "a2": (1191, 1684), + "a3": (842, 1191), + "a4": (595, 842), + "a5": (420, 595), + "a6": (298, 420), + "a7": (210, 298), + "a8": (147, 210), + "a9": (105, 147), + "b0": (2835, 4008), + "b1": (2004, 2835), + "b10": (88, 125), + "b2": (1417, 2004), + "b3": (1001, 1417), + "b4": (709, 1001), + "b5": (499, 709), + "b6": (354, 499), + "b7": (249, 354), + "b8": (176, 249), + "b9": (125, 176), + "c0": (2599, 3677), + "c1": (1837, 2599), + "c10": (79, 113), + "c2": (1298, 1837), + "c3": (918, 1298), + "c4": (649, 918), + "c5": (459, 649), + "c6": (323, 459), + "c7": (230, 323), + "c8": (162, 230), + "c9": (113, 162), + "card-4x6": (288, 432), + "card-5x7": (360, 504), + "commercial": (297, 684), + "executive": (522, 756), + "invoice": (396, 612), + "ledger": (792, 1224), + "legal": (612, 1008), + "legal-13": (612, 936), + "letter": (612, 792), + "monarch": (279, 540), + "tabloid-extra": (864, 1296), + } + +def pdf_lookup_page_loc(doc, needle): + return mupdf.pdf_lookup_page_loc(doc, needle) + + +def pdfobj_string(o, prefix=''): + ''' + Returns description of mupdf.PdfObj (wrapper for pdf_obj) . + ''' + assert 0, 'use mupdf.pdf_debug_obj() ?' + ret = '' + if mupdf.pdf_is_array(o): + l = mupdf.pdf_array_len(o) + ret += f'array {l}\n' + for i in range(l): + oo = mupdf.pdf_array_get(o, i) + ret += pdfobj_string(oo, prefix + ' ') + ret += '\n' + elif mupdf.pdf_is_bool(o): + ret += f'bool: {o.array_get_bool()}\n' + elif mupdf.pdf_is_dict(o): + l = mupdf.pdf_dict_len(o) + ret += f'dict {l}\n' + for i in range(l): + key = mupdf.pdf_dict_get_key(o, i) + value = mupdf.pdf_dict_get( o, key) + ret += f'{prefix} {key}: ' + ret += pdfobj_string( value, prefix + ' ') + ret += '\n' + elif mupdf.pdf_is_embedded_file(o): + ret += f'embedded_file: {o.embedded_file_name()}\n' + elif mupdf.pdf_is_indirect(o): + ret += f'indirect: ...\n' + elif mupdf.pdf_is_int(o): + ret += f'int: {mupdf.pdf_to_int(o)}\n' + elif mupdf.pdf_is_jpx_image(o): + ret += f'jpx_image:\n' + elif mupdf.pdf_is_name(o): + ret += f'name: {mupdf.pdf_to_name(o)}\n' + elif o.pdf_is_null: + ret += f'null\n' + #elif o.pdf_is_number: + # ret += f'number\n' + elif o.pdf_is_real: + ret += f'real: {o.pdf_to_real()}\n' + elif mupdf.pdf_is_stream(o): + ret += f'stream\n' + elif mupdf.pdf_is_string(o): + ret += f'string: {mupdf.pdf_to_string(o)}\n' + else: + ret += '<>\n' + + return ret + + +def repair_mono_font(page: "Page", font: "Font") -> None: + """Repair character spacing for mono fonts. + + Notes: + Some mono-spaced fonts are displayed with a too large character + distance, e.g. "a b c" instead of "abc". This utility adds an entry + "/W[0 65535 w]" to the descendent font(s) of font. The float w is + taken to be the width of 0x20 (space). + This should enforce viewers to use 'w' as the character width. + + Args: + page: pymupdf.Page object. + font: pymupdf.Font object. + """ + if not font.flags["mono"]: # font not flagged as monospaced + return None + doc = page.parent # the document + fontlist = page.get_fonts() # list of fonts on page + xrefs = [ # list of objects referring to font + f[0] + for f in fontlist + if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity")) + ] + if xrefs == []: # our font does not occur + return + xrefs = set(xrefs) # drop any double counts + width = int(round((font.glyph_advance(32) * 1000))) + for xref in xrefs: + if not TOOLS.set_font_width(doc, xref, width): + log("Cannot set width for '%s' in xref %i" % (font.name, xref)) + + +def sRGB_to_pdf(srgb: int) -> tuple: + """Convert sRGB color code to a PDF color triple. + + There is **no error checking** for performance reasons! + + Args: + srgb: (int) RRGGBB (red, green, blue), each color in range(255). + Returns: + Tuple (red, green, blue) each item in interval 0 <= item <= 1. + """ + t = sRGB_to_rgb(srgb) + return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0 + + +def sRGB_to_rgb(srgb: int) -> tuple: + """Convert sRGB color code to an RGB color triple. + + There is **no error checking** for performance reasons! + + Args: + srgb: (int) SSRRGGBB (red, green, blue), each color in range(255). + With MuPDF < 1.26, `s` is always 0. + Returns: + Tuple (red, green, blue) each item in interval 0 <= item <= 255. + """ + srgb &= 0xffffff + r = srgb >> 16 + g = (srgb - (r << 16)) >> 8 + b = srgb - (r << 16) - (g << 8) + return (r, g, b) + + +def string_in_names_list(p, names_list): + n = mupdf.pdf_array_len( names_list) if names_list else 0 + str_ = mupdf.pdf_to_text_string( p) + for i in range(0, n, 2): + if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_: + return 1 + return 0 + + +def strip_outline(doc, outlines, page_count, page_object_nums, names_list): + ''' + Returns (count, first, prev). + ''' + first = None + count = 0 + current = outlines + prev = None + while current.m_internal: + # Strip any children to start with. This takes care of + # First / Last / Count for us. + nc = strip_outlines(doc, current, page_count, page_object_nums, names_list) + + if not dest_is_valid(current, page_count, page_object_nums, names_list): + if nc == 0: + # Outline with invalid dest and no children. Drop it by + # pulling the next one in here. + next = mupdf.pdf_dict_get(current, PDF_NAME('Next')) + if not next.m_internal: + # There is no next one to pull in + if prev.m_internal: + mupdf.pdf_dict_del(prev, PDF_NAME('Next')) + elif prev.m_internal: + mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next) + mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev) + else: + mupdf.pdf_dict_del(next, PDF_NAME('Prev')) + current = next + else: + # Outline with invalid dest, but children. Just drop the dest. + mupdf.pdf_dict_del(current, PDF_NAME('Dest')) + mupdf.pdf_dict_del(current, PDF_NAME('A')) + current = mupdf.pdf_dict_get(current, PDF_NAME('Next')) + else: + # Keep this one + if not first or not first.m_internal: + first = current + prev = current + current = mupdf.pdf_dict_get(current, PDF_NAME('Next')) + count += 1 + + return count, first, prev + + +def strip_outlines(doc, outlines, page_count, page_object_nums, names_list): + if not outlines.m_internal: + return 0 + + first = mupdf.pdf_dict_get(outlines, PDF_NAME('First')) + if not first.m_internal: + nc = 0 + else: + nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list) + + if nc == 0: + mupdf.pdf_dict_del(outlines, PDF_NAME('First')) + mupdf.pdf_dict_del(outlines, PDF_NAME('Last')) + mupdf.pdf_dict_del(outlines, PDF_NAME('Count')) + else: + old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count'))) + mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first) + mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last) + mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc)) + return nc + + +trace_device_FILL_PATH = 1 +trace_device_STROKE_PATH = 2 +trace_device_CLIP_PATH = 3 +trace_device_CLIP_STROKE_PATH = 4 + + +def unicode_to_glyph_name(ch: int) -> str: + """ + Convenience function accessing unicodedata. + """ + import unicodedata + try: + name = unicodedata.name(chr(ch)) + except ValueError: + name = ".notdef" + return name + + +def vdist(dir, a, b): + dx = b.x - a.x + dy = b.y - a.y + return mupdf.fz_abs(dx * dir.y + dy * dir.x) + + +def apply_pages( + path, + pagefn, + *, + pagefn_args=(), + pagefn_kwargs=dict(), + initfn=None, + initfn_args=(), + initfn_kwargs=dict(), + pages=None, + method='single', + concurrency=None, + _stats=False, + ): + ''' + Returns list of results from `pagefn()`, optionally using concurrency for + speed. + + Args: + path: + Path of document. + pagefn: + Function to call for each page; is passed (page, *pagefn_args, + **pagefn_kwargs). Return value is added to list that we return. If + `method` is not 'single', must be a top-level function - nested + functions don't work with concurrency. + pagefn_args + pagefn_kwargs: + Additional args to pass to `pagefn`. Must be picklable. + initfn: + If true, called once in each worker process; is passed + (*initfn_args, **initfn_kwargs). + initfn_args + initfn_kwargs: + Args to pass to initfn. Must be picklable. + pages: + List of page numbers to process, or None to include all pages. + method: + 'single' + Do not use concurrency. + 'mp' + Operate concurrently using Python's `multiprocessing` module. + 'fork' + Operate concurrently using custom implementation with + `os.fork()`. Does not work on Windows. + concurrency: + Number of worker processes to use when operating concurrently. If + None, we use the number of available CPUs. + _stats: + Internal, may change or be removed. If true, we output simple + timing diagnostics. + + Note: We require a file path rather than a Document, because Document + instances do not work properly after a fork - internal file descriptor + offsets are shared between the parent and child processes. + ''' + if _stats: + t0 = time.time() + + if method == 'single': + if initfn: + initfn(*initfn_args, **initfn_kwargs) + ret = list() + document = Document(path) + if pages is None: + pages = range(len(document)) + for pno in pages: + page = document[pno] + r = pagefn(page, *pagefn_args, **initfn_kwargs) + ret.append(r) + + else: + # Use concurrency. + # + from . import _apply_pages + + if pages is None: + if _stats: + t = time.time() + with Document(path) as document: + num_pages = len(document) + pages = list(range(num_pages)) + if _stats: + t = time.time() - t + log(f'{t:.2f}s: count pages.') + + if _stats: + t = time.time() + + if method == 'mp': + ret = _apply_pages._multiprocessing( + path, + pages, + pagefn, + pagefn_args, + pagefn_kwargs, + initfn, + initfn_args, + initfn_kwargs, + concurrency, + _stats, + ) + + elif method == 'fork': + ret = _apply_pages._fork( + path, + pages, + pagefn, + pagefn_args, + pagefn_kwargs, + initfn, + initfn_args, + initfn_kwargs, + concurrency, + _stats, + ) + + else: + assert 0, f'Unrecognised {method=}.' + + if _stats: + t = time.time() - t + log(f'{t:.2f}s: work.') + + if _stats: + t = time.time() - t0 + log(f'{t:.2f}s: total.') + return ret + + +def get_text( + path, + *, + pages=None, + method='single', + concurrency=None, + + option='text', + clip=None, + flags=None, + textpage=None, + sort=False, + delimiters=None, + + _stats=False, + ): + ''' + Returns list of results from `Page.get_text()`, optionally using + concurrency for speed. + + Args: + path: + Path of document. + pages: + List of page numbers to process, or None to include all pages. + method: + 'single' + Do not use concurrency. + 'mp' + Operate concurrently using Python's `multiprocessing` module. + 'fork' + Operate concurrently using custom implementation with + `os.fork`. Does not work on Windows. + concurrency: + Number of worker processes to use when operating concurrently. If + None, we use the number of available CPUs. + option + clip + flags + textpage + sort + delimiters: + Passed to internal calls to `Page.get_text()`. + ''' + args_dict = dict( + option=option, + clip=clip, + flags=flags, + textpage=textpage, + sort=sort, + delimiters=delimiters, + ) + + return apply_pages( + path, + Page.get_text, + pagefn_kwargs=args_dict, + pages=pages, + method=method, + concurrency=concurrency, + _stats=_stats, + ) + + +class TOOLS: + ''' + We use @staticmethod to avoid the need to create an instance of this class. + ''' + + def _derotate_matrix(page): + if isinstance(page, mupdf.PdfPage): + return JM_py_from_matrix(JM_derotate_page_matrix(page)) + else: + return JM_py_from_matrix(mupdf.FzMatrix()) + + @staticmethod + def _fill_widget(annot, widget): + val = JM_get_widget_properties(annot, widget) + + widget.rect = Rect(annot.rect) + widget.xref = annot.xref + widget.parent = annot.parent + widget._annot = annot # backpointer to annot object + if not widget.script: + widget.script = None + if not widget.script_stroke: + widget.script_stroke = None + if not widget.script_format: + widget.script_format = None + if not widget.script_change: + widget.script_change = None + if not widget.script_calc: + widget.script_calc = None + if not widget.script_blur: + widget.script_blur = None + if not widget.script_focus: + widget.script_focus = None + return val + + @staticmethod + def _get_all_contents(page): + page = _as_pdf_page(page.this) + res = JM_read_contents(page.obj()) + result = JM_BinFromBuffer( res) + return result + + @staticmethod + def _insert_contents(page, newcont, overlay=1): + """Add bytes as a new /Contents object for a page, and return its xref.""" + pdfpage = _as_pdf_page(page, required=1) + contbuf = JM_BufferFromBytes(newcont) + xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay) + #fixme: pdfpage->doc->dirty = 1; + return xref + + @staticmethod + def _le_annot_parms(annot, p1, p2, fill_color): + """Get common parameters for making annot line end symbols. + + Returns: + m: matrix that maps p1, p2 to points L, P on the x-axis + im: its inverse + L, P: transformed p1, p2 + w: line width + scol: stroke color string + fcol: fill color store_shrink + opacity: opacity string (gs command) + """ + w = annot.border["width"] # line width + sc = annot.colors["stroke"] # stroke color + if not sc: # black if missing + sc = (0,0,0) + scol = " ".join(map(str, sc)) + " RG\n" + if fill_color: + fc = fill_color + else: + fc = annot.colors["fill"] # fill color + if not fc: + fc = (1,1,1) # white if missing + fcol = " ".join(map(str, fc)) + " rg\n" + # nr = annot.rect + np1 = p1 # point coord relative to annot rect + np2 = p2 # point coord relative to annot rect + m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal + im = ~m # inverted matrix + L = np1 * m # converted start (left) point + R = np2 * m # converted end (right) point + if 0 <= annot.opacity < 1: + opacity = "/H gs\n" + else: + opacity = "" + return m, im, L, R, w, scol, fcol, opacity + + @staticmethod + def _le_butt(annot, p1, p2, lr, fill_color): + """Make stream commands for butt line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 3 + d = shift * max(1, w) + M = R if lr else L + top = (M + (0, -d/2.)) * im + bot = (M + (0, d/2.)) * im + ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) + ap += "%f %f l\n" % (bot.x, bot.y) + ap += _format_g(w) + " w\n" + ap += scol + "s\nQ\n" + return ap + + @staticmethod + def _le_circle(annot, p1, p2, lr, fill_color): + """Make stream commands for circle line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 2.5 # 2*shift*width = length of square edge + d = shift * max(1, w) + M = R - (d/2., 0) if lr else L + (d/2., 0) + r = Rect(M, M) + (-d, -d, d, d) # the square + ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im) + ap += _format_g(w) + " w\n" + ap += scol + fcol + "b\nQ\n" + return ap + + @staticmethod + def _le_closedarrow(annot, p1, p2, lr, fill_color): + """Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 2.5 + d = shift * max(1, w) + p2 = R + (d/2., 0) if lr else L - (d/2., 0) + p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d) + p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d) + p1 *= im + p2 *= im + p3 *= im + ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) + ap += "%f %f l\n" % (p2.x, p2.y) + ap += "%f %f l\n" % (p3.x, p3.y) + ap += _format_g(w) + " w\n" + ap += scol + fcol + "b\nQ\n" + return ap + + @staticmethod + def _le_diamond(annot, p1, p2, lr, fill_color): + """Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 2.5 # 2*shift*width = length of square edge + d = shift * max(1, w) + M = R - (d/2., 0) if lr else L + (d/2., 0) + r = Rect(M, M) + (-d, -d, d, d) # the square + # the square makes line longer by (2*shift - 1)*width + p = (r.tl + (r.bl - r.tl) * 0.5) * im + ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) + p = (r.tl + (r.tr - r.tl) * 0.5) * im + ap += "%f %f l\n" % (p.x, p.y) + p = (r.tr + (r.br - r.tr) * 0.5) * im + ap += "%f %f l\n" % (p.x, p.y) + p = (r.br + (r.bl - r.br) * 0.5) * im + ap += "%f %f l\n" % (p.x, p.y) + ap += _format_g(w) + " w\n" + ap += scol + fcol + "b\nQ\n" + return ap + + @staticmethod + def _le_openarrow(annot, p1, p2, lr, fill_color): + """Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 2.5 + d = shift * max(1, w) + p2 = R + (d/2., 0) if lr else L - (d/2., 0) + p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d) + p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d) + p1 *= im + p2 *= im + p3 *= im + ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) + ap += "%f %f l\n" % (p2.x, p2.y) + ap += "%f %f l\n" % (p3.x, p3.y) + ap += _format_g(w) + " w\n" + ap += scol + "S\nQ\n" + return ap + + @staticmethod + def _le_rclosedarrow(annot, p1, p2, lr, fill_color): + """Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 2.5 + d = shift * max(1, w) + p2 = R - (2*d, 0) if lr else L + (2*d, 0) + p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d) + p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d) + p1 *= im + p2 *= im + p3 *= im + ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) + ap += "%f %f l\n" % (p2.x, p2.y) + ap += "%f %f l\n" % (p3.x, p3.y) + ap += _format_g(w) + " w\n" + ap += scol + fcol + "b\nQ\n" + return ap + + @staticmethod + def _le_ropenarrow(annot, p1, p2, lr, fill_color): + """Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 2.5 + d = shift * max(1, w) + p2 = R - (d/3., 0) if lr else L + (d/3., 0) + p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d) + p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d) + p1 *= im + p2 *= im + p3 *= im + ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) + ap += "%f %f l\n" % (p2.x, p2.y) + ap += "%f %f l\n" % (p3.x, p3.y) + ap += _format_g(w) + " w\n" + ap += scol + fcol + "S\nQ\n" + return ap + + @staticmethod + def _le_slash(annot, p1, p2, lr, fill_color): + """Make stream commands for slash line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination + M = R if lr else L + r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w) + top = r.tl * im + bot = r.br * im + ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) + ap += "%f %f l\n" % (bot.x, bot.y) + ap += _format_g(w) + " w\n" + ap += scol + "s\nQ\n" + return ap + + @staticmethod + def _le_square(annot, p1, p2, lr, fill_color): + """Make stream commands for square line end symbol. "lr" denotes left (False) or right point. + """ + m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color) + shift = 2.5 # 2*shift*width = length of square edge + d = shift * max(1, w) + M = R - (d/2., 0) if lr else L + (d/2., 0) + r = Rect(M, M) + (-d, -d, d, d) # the square + # the square makes line longer by (2*shift - 1)*width + p = r.tl * im + ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) + p = r.tr * im + ap += "%f %f l\n" % (p.x, p.y) + p = r.br * im + ap += "%f %f l\n" % (p.x, p.y) + p = r.bl * im + ap += "%f %f l\n" % (p.x, p.y) + ap += _format_g(w) + " w\n" + ap += scol + fcol + "b\nQ\n" + return ap + + @staticmethod + def _oval_string(p1, p2, p3, p4): + """Return /AP string defining an oval within a 4-polygon provided as points + """ + def bezier(p, q, r): + f = "%f %f %f %f %f %f c\n" + return f % (p.x, p.y, q.x, q.y, r.x, r.y) + + kappa = 0.55228474983 # magic number + ml = p1 + (p4 - p1) * 0.5 # middle points ... + mo = p1 + (p2 - p1) * 0.5 # for each ... + mr = p2 + (p3 - p2) * 0.5 # polygon ... + mu = p4 + (p3 - p4) * 0.5 # side + ol1 = ml + (p1 - ml) * kappa # the 8 bezier + ol2 = mo + (p1 - mo) * kappa # helper points + or1 = mo + (p2 - mo) * kappa + or2 = mr + (p2 - mr) * kappa + ur1 = mr + (p3 - mr) * kappa + ur2 = mu + (p3 - mu) * kappa + ul1 = mu + (p4 - mu) * kappa + ul2 = ml + (p4 - ml) * kappa + # now draw, starting from middle point of left side + ap = "%f %f m\n" % (ml.x, ml.y) + ap += bezier(ol1, ol2, mo) + ap += bezier(or1, or2, mr) + ap += bezier(ur1, ur2, mu) + ap += bezier(ul1, ul2, ml) + return ap + + @staticmethod + def _parse_da(annot): + + if g_use_extra: + val = extra.Tools_parse_da( annot.this) + else: + def Tools__parse_da(annot): + this_annot = annot.this + assert isinstance(this_annot, mupdf.PdfAnnot) + this_annot_obj = mupdf.pdf_annot_obj( this_annot) + pdf = mupdf.pdf_get_bound_document( this_annot_obj) + try: + da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA')) + if not da.m_internal: + trailer = mupdf.pdf_trailer(pdf) + da = mupdf.pdf_dict_getl(trailer, + PDF_NAME('Root'), + PDF_NAME('AcroForm'), + PDF_NAME('DA'), + ) + da_str = mupdf.pdf_to_text_string(da) + except Exception: + if g_exceptions_verbose: exception_info() + return + return da_str + val = Tools__parse_da(annot) + + if not val: + return ((0,), "", 0) + font = "Helv" + fsize = 12 + col = (0, 0, 0) + dat = val.split() # split on any whitespace + for i, item in enumerate(dat): + if item == "Tf": + font = dat[i - 2][1:] + fsize = float(dat[i - 1]) + dat[i] = dat[i-1] = dat[i-2] = "" + continue + if item == "g": # unicolor text + col = [(float(dat[i - 1]))] + dat[i] = dat[i-1] = "" + continue + if item == "rg": # RGB colored text + col = [float(f) for f in dat[i - 3:i]] + dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = "" + continue + if item == "k": # CMYK colored text + col = [float(f) for f in dat[i - 4:i]] + dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = "" + continue + + val = (col, font, fsize) + return val + + @staticmethod + def _reset_widget(annot): + this_annot = annot + this_annot_obj = mupdf.pdf_annot_obj(this_annot) + pdf = mupdf.pdf_get_bound_document(this_annot_obj) + mupdf.pdf_field_reset(pdf, this_annot_obj) + + @staticmethod + def _rotate_matrix(page): + pdfpage = page._pdf_page(required=False) + if not pdfpage.m_internal: + return JM_py_from_matrix(mupdf.FzMatrix()) + return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage)) + + @staticmethod + def _save_widget(annot, widget): + JM_set_widget_properties(annot, widget) + + def _update_da(annot, da_str): + if g_use_extra: + extra.Tools_update_da( annot.this, da_str) + else: + try: + this_annot = annot.this + assert isinstance(this_annot, mupdf.PdfAnnot) + mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str) + mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */ + mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */ + except Exception: + if g_exceptions_verbose: exception_info() + return + return + + @staticmethod + def gen_id(): + global TOOLS_JM_UNIQUE_ID + TOOLS_JM_UNIQUE_ID += 1 + return TOOLS_JM_UNIQUE_ID + + @staticmethod + def glyph_cache_empty(): + ''' + Empty the glyph cache. + ''' + mupdf.fz_purge_glyph_cache() + + @staticmethod + def image_profile(stream, keep_image=0): + ''' + Metadata of an image binary stream. + ''' + return JM_image_profile(stream, keep_image) + + @staticmethod + def mupdf_display_errors(on=None): + ''' + Set MuPDF error display to True or False. + ''' + global JM_mupdf_show_errors + if on is not None: + JM_mupdf_show_errors = bool(on) + return JM_mupdf_show_errors + + @staticmethod + def mupdf_display_warnings(on=None): + ''' + Set MuPDF warnings display to True or False. + ''' + global JM_mupdf_show_warnings + if on is not None: + JM_mupdf_show_warnings = bool(on) + return JM_mupdf_show_warnings + + @staticmethod + def mupdf_version(): + '''Get version of MuPDF binary build.''' + return mupdf.FZ_VERSION + + @staticmethod + def mupdf_warnings(reset=1): + ''' + Get the MuPDF warnings/errors with optional reset (default). + ''' + # Get any trailing `... repeated times...` message. + mupdf.fz_flush_warnings() + ret = '\n'.join( JM_mupdf_warnings_store) + if reset: + TOOLS.reset_mupdf_warnings() + return ret + + @staticmethod + def reset_mupdf_warnings(): + global JM_mupdf_warnings_store + JM_mupdf_warnings_store = list() + + @staticmethod + def set_aa_level(level): + ''' + Set anti-aliasing level. + ''' + mupdf.fz_set_aa_level(level) + + @staticmethod + def set_annot_stem( stem=None): + global JM_annot_id_stem + if stem is None: + return JM_annot_id_stem + len_ = len(stem) + 1 + if len_ > 50: + len_ = 50 + JM_annot_id_stem = stem[:50] + return JM_annot_id_stem + + @staticmethod + def set_font_width(doc, xref, width): + pdf = _as_pdf_document(doc, required=0) + if not pdf.m_internal: + return False + font = mupdf.pdf_load_object(pdf, xref) + dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts')) + if mupdf.pdf_is_array(dfonts): + n = mupdf.pdf_array_len(dfonts) + for i in range(n): + dfont = mupdf.pdf_array_get(dfonts, i) + warray = mupdf.pdf_new_array(pdf, 3) + mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0)) + mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535)) + mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width)) + mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray) + return True + + @staticmethod + def set_graphics_min_line_width(min_line_width): + ''' + Set the graphics minimum line width. + ''' + mupdf.fz_set_graphics_min_line_width(min_line_width) + + @staticmethod + def set_icc( on=0): + """Set ICC color handling on or off.""" + if on: + if mupdf.FZ_ENABLE_ICC: + mupdf.fz_enable_icc() + else: + RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError) + elif mupdf.FZ_ENABLE_ICC: + mupdf.fz_disable_icc() + + @staticmethod + def set_low_memory( on=None): + """Set / unset MuPDF device caching.""" + if on is not None: + _globals.no_device_caching = bool(on) + return _globals.no_device_caching + + @staticmethod + def set_small_glyph_heights(on=None): + """Set / unset small glyph heights.""" + if on is not None: + _globals.small_glyph_heights = bool(on) + if g_use_extra: + extra.set_small_glyph_heights(_globals.small_glyph_heights) + return _globals.small_glyph_heights + + @staticmethod + def set_subset_fontnames(on=None): + ''' + Set / unset returning fontnames with their subset prefix. + ''' + if on is not None: + _globals.subset_fontnames = bool(on) + if g_use_extra: + extra.set_subset_fontnames(_globals.subset_fontnames) + return _globals.subset_fontnames + + @staticmethod + def show_aa_level(): + ''' + Show anti-aliasing values. + ''' + return dict( + graphics = mupdf.fz_graphics_aa_level(), + text = mupdf.fz_text_aa_level(), + graphics_min_line_width = mupdf.fz_graphics_min_line_width(), + ) + + @staticmethod + def store_maxsize(): + ''' + MuPDF store size limit. + ''' + # fixme: return gctx->store->max. + return None + + @staticmethod + def store_shrink(percent): + ''' + Free 'percent' of current store size. + ''' + if percent >= 100: + mupdf.fz_empty_store() + return 0 + if percent > 0: + mupdf.fz_shrink_store( 100 - percent) + # fixme: return gctx->store->size. + + @staticmethod + def store_size(): + ''' + MuPDF current store size. + ''' + # fixme: return gctx->store->size. + return None + + @staticmethod + def unset_quad_corrections(on=None): + ''' + Set ascender / descender corrections on or off. + ''' + if on is not None: + _globals.skip_quad_corrections = bool(on) + if g_use_extra: + extra.set_skip_quad_corrections(_globals.skip_quad_corrections) + return _globals.skip_quad_corrections + + # fixme: also defined at top-level. + JM_annot_id_stem = 'fitz' + + fitz_config = JM_fitz_config() + + +# Callbacks not yet supported with cppyy. +if not mupdf_cppyy: + mupdf.fz_set_warning_callback(JM_mupdf_warning) + mupdf.fz_set_error_callback(JM_mupdf_error) + + +# If there are pending warnings when we exit, we end up in this sequence: +# +# atexit() +# -> mupdf::internal_thread_state::~internal_thread_state() +# -> fz_drop_context() +# -> fz_flush_warnings() +# -> SWIG Director code +# -> Python calling JM_mupdf_warning(). +# +# Unfortunately this causes a SEGV, seemingly because the SWIG Director code has +# already been torn down. +# +# So we use a Python atexit handler to explicitly call fz_flush_warnings(); +# this appears to happen early enough for the Director machinery to still +# work. So in the sequence above, fz_flush_warnings() will find that there are +# no pending warnings and will not attempt to call JM_mupdf_warning(). +# +def _atexit(): + #log( 'PyMuPDF/src/__init__.py:_atexit() called') + mupdf.fz_flush_warnings() + mupdf.fz_set_warning_callback(None) + mupdf.fz_set_error_callback(None) + #log( '_atexit() returning') +atexit.register( _atexit) + + +# List of (name, red, green, blue) where: +# name: upper-case name. +# red, green, blue: integer in range 0..255. +# +from . import _wxcolors +_wxcolors = _wxcolors._wxcolors + + +# Dict mapping from name to (red, green, blue). +# name: lower-case name. +# red, green, blue: float in range 0..1. +# +pdfcolor = dict() +for name, r, g, b in _wxcolors: + pdfcolor[name.lower()] = (r/255, g/255, b/255) + + +def colors_pdf_dict(): + ''' + Returns dict mapping from name to (red, green, blue). + name: lower-case name. + red, green, blue: float in range 0..1. + ''' + return pdfcolor + + +def colors_wx_list(): + ''' + Returns list of (name, red, green, blue) tuples: + name: upper-case name. + red, green, blue: integers in range 0..255. + ''' + return _wxcolors + + +def _mupdf_devel(make_links=True): + ''' + Allows PyMuPDF installation to be used to compile and link programmes that + use the MuPDF C/C++ API. + + Args: + make_links: + If true, then on non-windows we also create softlinks to any shared + libraries that are supplied with a version suffix; this allows them + to be used in a link command. + + For example we create links such as: + + site-packages/pymupdf/ + libmupdf.so -> libmupdf.so.26.7 + libmupdfcpp.so -> libmupdfcpp.so.26.7 + + Returns: (mupdf_include, mupdf_lib). + mupdf_include: + Path of MuPDF include directory within PyMuPDF install. + mupdf_lib + Path of MuPDF library directory within PyMuPDF install. + ''' + import platform + + log(f'{mupdf_version=}') + + p = os.path.normpath(f'{__file__}/..') + + mupdf_include = f'{p}/mupdf-devel/include' + + if platform.system() == 'Windows': + # Separate .lib files are used at build time. + mupdf_lib = f'{p}/mupdf-devel/lib' + else: + # .so files are used for both buildtime and runtime linking. + mupdf_lib = p + log(f'Within installed PyMuPDF:') + log(f' {mupdf_include=}') + log(f' {mupdf_lib=}') + + assert os.path.isdir(mupdf_include), f'Not a directory: {mupdf_include=}.' + assert os.path.isdir(mupdf_lib), f'Not a directory: {mupdf_lib=}.' + + if platform.system() != 'Windows' and make_links: + # Make symbolic links within the installed pymupdf module so + # that ld can find libmupdf.so etc. This is a bit of a hack, but + # necessary because wheels cannot contain symbolic links. + # + # For example we create `libmupdf.so -> libmupdf.so.24.8`. + # + # We are careful to only create symlinks for the expected MuPDF + # version, in case old .so files from a previous install are still + # in place. + # + log(f'Creating symlinks in {mupdf_lib=} for MuPDF-{mupdf_version} .so files.') + regex_suffix = mupdf_version.split('.')[1:3] + regex_suffix = '[.]'.join(regex_suffix) + mupdf_lib_regex = f'^(lib[^.]+[.]so)[.]{regex_suffix}$' + log(f'{mupdf_lib_regex=}.') + for leaf in os.listdir(mupdf_lib): + m = re.match(mupdf_lib_regex, leaf) + if m: + pfrom = f'{mupdf_lib}/{m.group(1)}' + # os.path.exists() can return false if softlink exists + # but points to non-existent file, so we also use + # `os.path.islink()`. + if os.path.islink(pfrom) or os.path.exists(pfrom): + log(f'Removing existing link {pfrom=}.') + os.remove(pfrom) + log(f'Creating symlink: {pfrom} -> {leaf}') + os.symlink(leaf, pfrom) + + return mupdf_include, mupdf_lib + + +# We cannot import utils earlier because it imports this .py file itself and +# uses some pymupdf.* types in function typing. +# +from . import utils + + +# Use utils.*() fns for some class methods. +# +recover_bbox_quad = utils.recover_bbox_quad +recover_char_quad = utils.recover_char_quad +recover_line_quad = utils.recover_line_quad +recover_quad = utils.recover_quad +recover_span_quad = utils.recover_span_quad + +from .table import find_tables +Page.find_tables = find_tables + + +class FitzDeprecation(DeprecationWarning): + pass + +def restore_aliases(): + warnings.filterwarnings( "once", category=FitzDeprecation) + + def showthis(msg, cat, filename, lineno, file=None, line=None): + text = warnings.formatwarning(msg, cat, filename, lineno, line=line) + s = text.find("FitzDeprecation") + if s < 0: + log(text) + return + text = text[s:].splitlines()[0][4:] + log(text) + + warnings.showwarning = showthis + + def _alias(class_, new_name, legacy_name=None): + ''' + Adds an alias for a class_ or module item clled .. + + class_: + Class/module to modify; use None for the current module. + new_name: + String name of existing item, e.g. name of method. + legacy_name: + Name of legacy object to create in . If None, we generate + from by removing underscores and capitalising the next + letter. + ''' + if class_ is None: + class_ = sys.modules[__name__] + if not legacy_name: + legacy_name = '' + capitalise_next = False + for c in new_name: + if c == '_': + capitalise_next = True + elif capitalise_next: + legacy_name += c.upper() + capitalise_next = False + else: + legacy_name += c + new_object = getattr( class_, new_name) + assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}' + if callable( new_object): + def deprecated_function( *args, **kwargs): + warnings.warn( + f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".', + category=FitzDeprecation, + ) + return new_object( *args, **kwargs) + setattr( class_, legacy_name, deprecated_function) + deprecated_function.__doc__ = ( + f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n' + f'{new_object.__doc__}' + ) + else: + setattr( class_, legacy_name, new_object) + + _alias( Annot, 'get_file', 'fileGet') + _alias( Annot, 'get_pixmap') + _alias( Annot, 'get_sound', 'soundGet') + _alias( Annot, 'get_text') + _alias( Annot, 'get_textbox') + _alias( Annot, 'get_textpage', 'getTextPage') + _alias( Annot, 'line_ends') + _alias( Annot, 'set_blendmode', 'setBlendMode') + _alias( Annot, 'set_border') + _alias( Annot, 'set_colors') + _alias( Annot, 'set_flags') + _alias( Annot, 'set_info') + _alias( Annot, 'set_line_ends') + _alias( Annot, 'set_name') + _alias( Annot, 'set_oc', 'setOC') + _alias( Annot, 'set_opacity') + _alias( Annot, 'set_rect') + _alias( Annot, 'update_file', 'fileUpd') + _alias( DisplayList, 'get_pixmap') + _alias( DisplayList, 'get_textpage', 'getTextPage') + _alias( Document, 'chapter_count') + _alias( Document, 'chapter_page_count') + _alias( Document, 'convert_to_pdf', 'convertToPDF') + _alias( Document, 'copy_page') + _alias( Document, 'delete_page') + _alias( Document, 'delete_pages', 'deletePageRange') + _alias( Document, 'embfile_add', 'embeddedFileAdd') + _alias( Document, 'embfile_count', 'embeddedFileCount') + _alias( Document, 'embfile_del', 'embeddedFileDel') + _alias( Document, 'embfile_get', 'embeddedFileGet') + _alias( Document, 'embfile_info', 'embeddedFileInfo') + _alias( Document, 'embfile_names', 'embeddedFileNames') + _alias( Document, 'embfile_upd', 'embeddedFileUpd') + _alias( Document, 'extract_font') + _alias( Document, 'extract_image') + _alias( Document, 'find_bookmark') + _alias( Document, 'fullcopy_page') + _alias( Document, 'get_char_widths') + _alias( Document, 'get_ocgs', 'getOCGs') + _alias( Document, 'get_page_fonts', 'getPageFontList') + _alias( Document, 'get_page_images', 'getPageImageList') + _alias( Document, 'get_page_pixmap') + _alias( Document, 'get_page_text') + _alias( Document, 'get_page_xobjects', 'getPageXObjectList') + _alias( Document, 'get_sigflags', 'getSigFlags') + _alias( Document, 'get_toc', 'getToC') + _alias( Document, 'get_xml_metadata') + _alias( Document, 'insert_page') + _alias( Document, 'insert_pdf', 'insertPDF') + _alias( Document, 'is_dirty') + _alias( Document, 'is_form_pdf', 'isFormPDF') + _alias( Document, 'is_pdf', 'isPDF') + _alias( Document, 'is_reflowable') + _alias( Document, 'is_repaired') + _alias( Document, 'last_location') + _alias( Document, 'load_page') + _alias( Document, 'make_bookmark') + _alias( Document, 'move_page') + _alias( Document, 'needs_pass') + _alias( Document, 'new_page') + _alias( Document, 'next_location') + _alias( Document, 'page_count') + _alias( Document, 'page_cropbox', 'pageCropBox') + _alias( Document, 'page_xref') + _alias( Document, 'pdf_catalog', 'PDFCatalog') + _alias( Document, 'pdf_trailer', 'PDFTrailer') + _alias( Document, 'prev_location', 'previousLocation') + _alias( Document, 'resolve_link') + _alias( Document, 'search_page_for') + _alias( Document, 'set_language') + _alias( Document, 'set_metadata') + _alias( Document, 'set_toc', 'setToC') + _alias( Document, 'set_xml_metadata') + _alias( Document, 'update_object') + _alias( Document, 'update_stream') + _alias( Document, 'xref_is_stream', 'isStream') + _alias( Document, 'xref_length') + _alias( Document, 'xref_object') + _alias( Document, 'xref_stream') + _alias( Document, 'xref_stream_raw') + _alias( Document, 'xref_xml_metadata', 'metadataXML') + _alias( IRect, 'get_area') + _alias( IRect, 'get_area', 'getRectArea') + _alias( IRect, 'include_point') + _alias( IRect, 'include_rect') + _alias( IRect, 'is_empty') + _alias( IRect, 'is_infinite') + _alias( Link, 'is_external') + _alias( Link, 'set_border') + _alias( Link, 'set_colors') + _alias( Matrix, 'is_rectilinear') + _alias( Matrix, 'prerotate', 'preRotate') + _alias( Matrix, 'prescale', 'preScale') + _alias( Matrix, 'preshear', 'preShear') + _alias( Matrix, 'pretranslate', 'preTranslate') + _alias( None, 'get_pdf_now', 'getPDFnow') + _alias( None, 'get_pdf_str', 'getPDFstr') + _alias( None, 'get_text_length') + _alias( None, 'get_text_length', 'getTextlength') + _alias( None, 'image_profile', 'ImageProperties') + _alias( None, 'paper_rect', 'PaperRect') + _alias( None, 'paper_size', 'PaperSize') + _alias( None, 'paper_sizes') + _alias( None, 'planish_line') + _alias( Outline, 'is_external') + _alias( Outline, 'is_open') + _alias( Page, 'add_caret_annot') + _alias( Page, 'add_circle_annot') + _alias( Page, 'add_file_annot') + _alias( Page, 'add_freetext_annot') + _alias( Page, 'add_highlight_annot') + _alias( Page, 'add_ink_annot') + _alias( Page, 'add_line_annot') + _alias( Page, 'add_polygon_annot') + _alias( Page, 'add_polyline_annot') + _alias( Page, 'add_rect_annot') + _alias( Page, 'add_redact_annot') + _alias( Page, 'add_squiggly_annot') + _alias( Page, 'add_stamp_annot') + _alias( Page, 'add_strikeout_annot') + _alias( Page, 'add_text_annot') + _alias( Page, 'add_underline_annot') + _alias( Page, 'add_widget') + _alias( Page, 'clean_contents') + _alias( Page, 'cropbox', 'CropBox') + _alias( Page, 'cropbox_position', 'CropBoxPosition') + _alias( Page, 'delete_annot') + _alias( Page, 'delete_link') + _alias( Page, 'delete_widget') + _alias( Page, 'derotation_matrix') + _alias( Page, 'draw_bezier') + _alias( Page, 'draw_circle') + _alias( Page, 'draw_curve') + _alias( Page, 'draw_line') + _alias( Page, 'draw_oval') + _alias( Page, 'draw_polyline') + _alias( Page, 'draw_quad') + _alias( Page, 'draw_rect') + _alias( Page, 'draw_sector') + _alias( Page, 'draw_squiggle') + _alias( Page, 'draw_zigzag') + _alias( Page, 'first_annot') + _alias( Page, 'first_link') + _alias( Page, 'first_widget') + _alias( Page, 'get_contents') + _alias( Page, 'get_displaylist', 'getDisplayList') + _alias( Page, 'get_drawings') + _alias( Page, 'get_fonts', 'getFontList') + _alias( Page, 'get_image_bbox') + _alias( Page, 'get_images', 'getImageList') + _alias( Page, 'get_links') + _alias( Page, 'get_pixmap') + _alias( Page, 'get_svg_image', 'getSVGimage') + _alias( Page, 'get_text') + _alias( Page, 'get_text_blocks') + _alias( Page, 'get_text_words') + _alias( Page, 'get_textbox') + _alias( Page, 'get_textpage', 'getTextPage') + _alias( Page, 'insert_font') + _alias( Page, 'insert_image') + _alias( Page, 'insert_link') + _alias( Page, 'insert_text') + _alias( Page, 'insert_textbox') + _alias( Page, 'is_wrapped', '_isWrapped') + _alias( Page, 'load_annot') + _alias( Page, 'load_links') + _alias( Page, 'mediabox', 'MediaBox') + _alias( Page, 'mediabox_size', 'MediaBoxSize') + _alias( Page, 'new_shape') + _alias( Page, 'read_contents') + _alias( Page, 'rotation_matrix') + _alias( Page, 'search_for') + _alias( Page, 'set_cropbox', 'setCropBox') + _alias( Page, 'set_mediabox', 'setMediaBox') + _alias( Page, 'set_rotation') + _alias( Page, 'show_pdf_page', 'showPDFpage') + _alias( Page, 'transformation_matrix') + _alias( Page, 'update_link') + _alias( Page, 'wrap_contents') + _alias( Page, 'write_text') + _alias( Pixmap, 'clear_with') + _alias( Pixmap, 'copy', 'copyPixmap') + _alias( Pixmap, 'gamma_with') + _alias( Pixmap, 'invert_irect', 'invertIRect') + _alias( Pixmap, 'pil_save', 'pillowWrite') + _alias( Pixmap, 'pil_tobytes', 'pillowData') + _alias( Pixmap, 'save', 'writeImage') + _alias( Pixmap, 'save', 'writePNG') + _alias( Pixmap, 'set_alpha') + _alias( Pixmap, 'set_dpi', 'setResolution') + _alias( Pixmap, 'set_origin') + _alias( Pixmap, 'set_pixel') + _alias( Pixmap, 'set_rect') + _alias( Pixmap, 'tint_with') + _alias( Pixmap, 'tobytes', 'getImageData') + _alias( Pixmap, 'tobytes', 'getPNGData') + _alias( Pixmap, 'tobytes', 'getPNGdata') + _alias( Quad, 'is_convex') + _alias( Quad, 'is_empty') + _alias( Quad, 'is_rectangular') + _alias( Rect, 'get_area') + _alias( Rect, 'get_area', 'getRectArea') + _alias( Rect, 'include_point') + _alias( Rect, 'include_rect') + _alias( Rect, 'is_empty') + _alias( Rect, 'is_infinite') + _alias( TextWriter, 'fill_textbox') + _alias( TextWriter, 'write_text') + _alias( Shape, 'draw_bezier') + _alias( Shape, 'draw_circle') + _alias( Shape, 'draw_curve') + _alias( Shape, 'draw_line') + _alias( Shape, 'draw_oval') + _alias( Shape, 'draw_polyline') + _alias( Shape, 'draw_quad') + _alias( Shape, 'draw_rect') + _alias( Shape, 'draw_sector') + _alias( Shape, 'draw_squiggle') + _alias( Shape, 'draw_zigzag') + _alias( Shape, 'insert_text') + _alias( Shape, 'insert_textbox') + +if 0: + restore_aliases() + +__version__ = VersionBind +__doc__ = ( + f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n' + f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n' + ) diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 000000000..35914d6c7 --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,1140 @@ +# ----------------------------------------------------------------------------- +# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com +# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html +# Part of "PyMuPDF", Python bindings for "MuPDF" (http://mupdf.com), a +# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is +# maintained and developed by Artifex Software, Inc. https://artifex.com. +# ----------------------------------------------------------------------------- +import argparse +import bisect +import os +import sys +import statistics +from typing import Dict, List, Set + +from . import pymupdf + +def mycenter(x): + return (" %s " % x).center(75, "-") + + +def recoverpix(doc, item): + """Return image for a given XREF.""" + x = item[0] # xref of PDF image + s = item[1] # xref of its /SMask + if s == 0: # no smask: use direct image output + return doc.extract_image(x) + + def getimage(pix): + if pix.colorspace.n != 4: + return pix + tpix = pymupdf.Pixmap(pymupdf.csRGB, pix) + return tpix + + # we need to reconstruct the alpha channel with the smask + pix1 = pymupdf.Pixmap(doc, x) + pix2 = pymupdf.Pixmap(doc, s) # create pixmap of the /SMask entry + + """Sanity check: + - both pixmaps must have the same rectangle + - both pixmaps must have alpha=0 + - pix2 must consist of 1 byte per pixel + """ + if not (pix1.irect == pix2.irect and pix1.alpha == pix2.alpha == 0 and pix2.n == 1): + pymupdf.message("Warning: unsupported /SMask %i for %i:" % (s, x)) + pymupdf.message(pix2) + pix2 = None + return getimage(pix1) # return the pixmap as is + + pix = pymupdf.Pixmap(pix1) # copy of pix1, with an alpha channel added + pix.set_alpha(pix2.samples) # treat pix2.samples as the alpha values + pix1 = pix2 = None # free temp pixmaps + + # we may need to adjust something for CMYK pixmaps here: + return getimage(pix) + + +def open_file(filename, password, show=False, pdf=True): + """Open and authenticate a document.""" + doc = pymupdf.open(filename) + if not doc.is_pdf and pdf is True: + sys.exit("this command supports PDF files only") + rc = -1 + if not doc.needs_pass: + return doc + if password: + rc = doc.authenticate(password) + if not rc: + sys.exit("authentication unsuccessful") + if show is True: + pymupdf.message("authenticated as %s" % "owner" if rc > 2 else "user") + else: + sys.exit("'%s' requires a password" % doc.name) + return doc + + +def print_dict(item): + """Print a Python dictionary.""" + l = max([len(k) for k in item.keys()]) + 1 + for k, v in item.items(): + msg = "%s: %s" % (k.rjust(l), v) + pymupdf.message(msg) + + +def print_xref(doc, xref): + """Print an object given by XREF number. + + Simulate the PDF source in "pretty" format. + For a stream also print its size. + """ + pymupdf.message("%i 0 obj" % xref) + xref_str = doc.xref_object(xref) + pymupdf.message(xref_str) + if doc.xref_is_stream(xref): + temp = xref_str.split() + try: + idx = temp.index("/Length") + 1 + size = temp[idx] + if size.endswith("0 R"): + size = "unknown" + except Exception: + size = "unknown" + pymupdf.message("stream\n...%s bytes" % size) + pymupdf.message("endstream") + pymupdf.message("endobj") + + +def get_list(rlist, limit, what="page"): + """Transform a page / xref specification into a list of integers. + + Args + ---- + rlist: (str) the specification + limit: maximum number, i.e. number of pages, number of objects + what: a string to be used in error messages + Returns + ------- + A list of integers representing the specification. + """ + N = str(limit - 1) + rlist = rlist.replace("N", N).replace(" ", "") + rlist_arr = rlist.split(",") + out_list = [] + for seq, item in enumerate(rlist_arr): + n = seq + 1 + if item.isdecimal(): # a single integer + i = int(item) + if 1 <= i < limit: + out_list.append(int(item)) + else: + sys.exit("bad %s specification at item %i" % (what, n)) + continue + try: # this must be a range now, and all of the following must work: + i1, i2 = item.split("-") # will fail if not 2 items produced + i1 = int(i1) # will fail on non-integers + i2 = int(i2) + except Exception: + sys.exit("bad %s range specification at item %i" % (what, n)) + + if not (1 <= i1 < limit and 1 <= i2 < limit): + sys.exit("bad %s range specification at item %i" % (what, n)) + + if i1 == i2: # just in case: a range of equal numbers + out_list.append(i1) + continue + + if i1 < i2: # first less than second + out_list += list(range(i1, i2 + 1)) + else: # first larger than second + out_list += list(range(i1, i2 - 1, -1)) + + return out_list + + +def show(args): + doc = open_file(args.input, args.password, True) + size = os.path.getsize(args.input) / 1024 + flag = "KB" + if size > 1000: + size /= 1024 + flag = "MB" + size = round(size, 1) + meta = doc.metadata # pylint: disable=no-member + pymupdf.message( + "'%s', pages: %i, objects: %i, %g %s, %s, encryption: %s" + % ( + args.input, + doc.page_count, + doc.xref_length() - 1, + size, + flag, + meta["format"], + meta["encryption"], + ) + ) + n = doc.is_form_pdf + if n > 0: + s = doc.get_sigflags() + pymupdf.message( + "document contains %i root form fields and is %ssigned" + % (n, "not " if s != 3 else "") + ) + n = doc.embfile_count() + if n > 0: + pymupdf.message("document contains %i embedded files" % n) + pymupdf.message() + if args.catalog: + pymupdf.message(mycenter("PDF catalog")) + xref = doc.pdf_catalog() + print_xref(doc, xref) + pymupdf.message() + if args.metadata: + pymupdf.message(mycenter("PDF metadata")) + print_dict(doc.metadata) # pylint: disable=no-member + pymupdf.message() + if args.xrefs: + pymupdf.message(mycenter("object information")) + xrefl = get_list(args.xrefs, doc.xref_length(), what="xref") + for xref in xrefl: + print_xref(doc, xref) + pymupdf.message() + if args.pages: + pymupdf.message(mycenter("page information")) + pagel = get_list(args.pages, doc.page_count + 1) + for pno in pagel: + n = pno - 1 + xref = doc.page_xref(n) + pymupdf.message("Page %i:" % pno) + print_xref(doc, xref) + pymupdf.message() + if args.trailer: + pymupdf.message(mycenter("PDF trailer")) + pymupdf.message(doc.pdf_trailer()) + pymupdf.message() + doc.close() + + +def clean(args): + doc = open_file(args.input, args.password, pdf=True) + encryption = args.encryption + encrypt = ("keep", "none", "rc4-40", "rc4-128", "aes-128", "aes-256").index( + encryption + ) + + if not args.pages: # simple cleaning + doc.save( + args.output, + garbage=args.garbage, + deflate=args.compress, + pretty=args.pretty, + clean=args.sanitize, + ascii=args.ascii, + linear=args.linear, + encryption=encrypt, + owner_pw=args.owner, + user_pw=args.user, + permissions=args.permission, + ) + return + + # create sub document from page numbers + pages = get_list(args.pages, doc.page_count + 1) + outdoc = pymupdf.open() + for pno in pages: + n = pno - 1 + outdoc.insert_pdf(doc, from_page=n, to_page=n) + outdoc.save( + args.output, + garbage=args.garbage, + deflate=args.compress, + pretty=args.pretty, + clean=args.sanitize, + ascii=args.ascii, + linear=args.linear, + encryption=encrypt, + owner_pw=args.owner, + user_pw=args.user, + permissions=args.permission, + ) + doc.close() + outdoc.close() + return + + +def doc_join(args): + """Join pages from several PDF documents.""" + doc_list = args.input # a list of input PDFs + doc = pymupdf.open() # output PDF + for src_item in doc_list: # process one input PDF + src_list = src_item.split(",") + password = src_list[1] if len(src_list) > 1 else None + src = open_file(src_list[0], password, pdf=True) + pages = ",".join(src_list[2:]) # get 'pages' specifications + if pages: # if anything there, retrieve a list of desired pages + page_list = get_list(",".join(src_list[2:]), src.page_count + 1) + else: # take all pages + page_list = range(1, src.page_count + 1) + for i in page_list: + doc.insert_pdf(src, from_page=i - 1, to_page=i - 1) # copy each source page + src.close() + + doc.save(args.output, garbage=4, deflate=True) + doc.close() + + +def embedded_copy(args): + """Copy embedded files between PDFs.""" + doc = open_file(args.input, args.password, pdf=True) + if not doc.can_save_incrementally() and ( + not args.output or args.output == args.input + ): + sys.exit("cannot save PDF incrementally") + src = open_file(args.source, args.pwdsource) + names = set(args.name) if args.name else set() + src_names = set(src.embfile_names()) + if names: + if not names <= src_names: + sys.exit("not all names are contained in source") + else: + names = src_names + if not names: + sys.exit("nothing to copy") + intersect = names & set(doc.embfile_names()) # any equal name already in target? + if intersect: + sys.exit("following names already exist in receiving PDF: %s" % str(intersect)) + + for item in names: + info = src.embfile_info(item) + buff = src.embfile_get(item) + doc.embfile_add( + item, + buff, + filename=info["filename"], + ufilename=info["ufilename"], + desc=info["desc"], + ) + pymupdf.message("copied entry '%s' from '%s'" % (item, src.name)) + src.close() + if args.output and args.output != args.input: + doc.save(args.output, garbage=3) + else: + doc.saveIncr() + doc.close() + + +def embedded_del(args): + """Delete an embedded file entry.""" + doc = open_file(args.input, args.password, pdf=True) + if not doc.can_save_incrementally() and ( + not args.output or args.output == args.input + ): + sys.exit("cannot save PDF incrementally") + + try: + doc.embfile_del(args.name) + except (ValueError, pymupdf.mupdf.FzErrorBase) as e: + sys.exit(f'no such embedded file {args.name!r}: {e}') + if not args.output or args.output == args.input: + doc.saveIncr() + else: + doc.save(args.output, garbage=1) + doc.close() + + +def embedded_get(args): + """Retrieve contents of an embedded file.""" + doc = open_file(args.input, args.password, pdf=True) + try: + stream = doc.embfile_get(args.name) + d = doc.embfile_info(args.name) + except (ValueError, pymupdf.mupdf.FzErrorBase) as e: + sys.exit(f'no such embedded file {args.name!r}: {e}') + filename = args.output if args.output else d["filename"] + with open(filename, "wb") as output: + output.write(stream) + pymupdf.message("saved entry '%s' as '%s'" % (args.name, filename)) + doc.close() + + +def embedded_add(args): + """Insert a new embedded file.""" + doc = open_file(args.input, args.password, pdf=True) + if not doc.can_save_incrementally() and ( + args.output is None or args.output == args.input + ): + sys.exit("cannot save PDF incrementally") + + try: + doc.embfile_del(args.name) + sys.exit("entry '%s' already exists" % args.name) + except Exception: + pass + + if not os.path.exists(args.path) or not os.path.isfile(args.path): + sys.exit("no such file '%s'" % args.path) + with open(args.path, "rb") as f: + stream = f.read() + filename = args.path + ufilename = filename + if not args.desc: + desc = filename + else: + desc = args.desc + doc.embfile_add( + args.name, stream, filename=filename, ufilename=ufilename, desc=desc + ) + if not args.output or args.output == args.input: + doc.saveIncr() + else: + doc.save(args.output, garbage=3) + doc.close() + + +def embedded_upd(args): + """Update contents or metadata of an embedded file.""" + doc = open_file(args.input, args.password, pdf=True) + if not doc.can_save_incrementally() and ( + args.output is None or args.output == args.input + ): + sys.exit("cannot save PDF incrementally") + + try: + doc.embfile_info(args.name) + except Exception: + sys.exit("no such embedded file '%s'" % args.name) + + if ( + args.path is not None + and os.path.exists(args.path) + and os.path.isfile(args.path) + ): + with open(args.path, "rb") as f: + stream = f.read() + else: + stream = None + + if args.filename: + filename = args.filename + else: + filename = None + + if args.ufilename: + ufilename = args.ufilename + elif args.filename: + ufilename = args.filename + else: + ufilename = None + + if args.desc: + desc = args.desc + else: + desc = None + + doc.embfile_upd( + args.name, stream, filename=filename, ufilename=ufilename, desc=desc + ) + if args.output is None or args.output == args.input: + doc.saveIncr() + else: + doc.save(args.output, garbage=3) + doc.close() + + +def embedded_list(args): + """List embedded files.""" + doc = open_file(args.input, args.password, pdf=True) + names = doc.embfile_names() + if args.name is not None: + if args.name not in names: + sys.exit("no such embedded file '%s'" % args.name) + else: + pymupdf.message() + pymupdf.message( + "printing 1 of %i embedded file%s:" + % (len(names), "s" if len(names) > 1 else "") + ) + pymupdf.message() + print_dict(doc.embfile_info(args.name)) + pymupdf.message() + return + if not names: + pymupdf.message("'%s' contains no embedded files" % doc.name) + return + if len(names) > 1: + msg = "'%s' contains the following %i embedded files" % (doc.name, len(names)) + else: + msg = "'%s' contains the following embedded file" % doc.name + pymupdf.message(msg) + pymupdf.message() + for name in names: + if not args.detail: + pymupdf.message(name) + continue + _ = doc.embfile_info(name) + print_dict(doc.embfile_info(name)) + pymupdf.message() + doc.close() + + +def extract_objects(args): + """Extract images and / or fonts from a PDF.""" + if not args.fonts and not args.images: + sys.exit("neither fonts nor images requested") + doc = open_file(args.input, args.password, pdf=True) + + if args.pages: + pages = get_list(args.pages, doc.page_count + 1) + else: + pages = range(1, doc.page_count + 1) + + if not args.output: + out_dir = os.path.abspath(os.curdir) + else: + out_dir = args.output + if not (os.path.exists(out_dir) and os.path.isdir(out_dir)): + sys.exit("output directory %s does not exist" % out_dir) + + font_xrefs = set() # already saved fonts + image_xrefs = set() # already saved images + + for pno in pages: + if args.fonts: + itemlist = doc.get_page_fonts(pno - 1) + for item in itemlist: + xref = item[0] + if xref not in font_xrefs: + font_xrefs.add(xref) + fontname, ext, _, buffer = doc.extract_font(xref) + if ext == "n/a" or not buffer: + continue + outname = os.path.join( + out_dir, f"{fontname.replace(' ', '-')}-{xref}.{ext}" + ) + with open(outname, "wb") as outfile: + outfile.write(buffer) + buffer = None + if args.images: + itemlist = doc.get_page_images(pno - 1) + for item in itemlist: + xref = item[0] + if xref not in image_xrefs: + image_xrefs.add(xref) + pix = recoverpix(doc, item) + if type(pix) is dict: + ext = pix["ext"] + imgdata = pix["image"] + outname = os.path.join(out_dir, "img-%i.%s" % (xref, ext)) + with open(outname, "wb") as outfile: + outfile.write(imgdata) + else: + outname = os.path.join(out_dir, "img-%i.png" % xref) + pix2 = ( + pix + if pix.colorspace.n < 4 + else pymupdf.Pixmap(pymupdf.csRGB, pix) + ) + pix2.save(outname) + + if args.fonts: + pymupdf.message("saved %i fonts to '%s'" % (len(font_xrefs), out_dir)) + if args.images: + pymupdf.message("saved %i images to '%s'" % (len(image_xrefs), out_dir)) + doc.close() + + +def page_simple(page, textout, GRID, fontsize, noformfeed, skip_empty, flags): + eop = b"\n" if noformfeed else bytes([12]) + text = page.get_text("text", flags=flags) + if not text: + if not skip_empty: + textout.write(eop) # write formfeed + return + textout.write(text.encode("utf8", errors="surrogatepass")) + textout.write(eop) + return + + +def page_blocksort(page, textout, GRID, fontsize, noformfeed, skip_empty, flags): + eop = b"\n" if noformfeed else bytes([12]) + blocks = page.get_text("blocks", flags=flags) + if blocks == []: + if not skip_empty: + textout.write(eop) # write formfeed + return + blocks.sort(key=lambda b: (b[3], b[0])) + for b in blocks: + textout.write(b[4].encode("utf8", errors="surrogatepass")) + textout.write(eop) + return + + +def page_layout(page, textout, GRID, fontsize, noformfeed, skip_empty, flags): + eop = b"\n" if noformfeed else bytes([12]) + + # -------------------------------------------------------------------- + def find_line_index(values: List[int], value: int) -> int: + """Find the right row coordinate. + + Args: + values: (list) y-coordinates of rows. + value: (int) lookup for this value (y-origin of char). + Returns: + y-ccordinate of appropriate line for value. + """ + i = bisect.bisect_right(values, value) + if i: + return values[i - 1] + raise RuntimeError("Line for %g not found in %s" % (value, values)) + + # -------------------------------------------------------------------- + def curate_rows(rows: Set[int], GRID) -> List: + rows = list(rows) + rows.sort() # sort ascending + nrows = [rows[0]] + for h in rows[1:]: + if h >= nrows[-1] + GRID: # only keep significant differences + nrows.append(h) + return nrows # curated list of line bottom coordinates + + def process_blocks(blocks: List[Dict], page: pymupdf.Page): + rows = set() + page_width = page.rect.width + page_height = page.rect.height + rowheight = page_height + left = page_width + right = 0 + chars = [] + for block in blocks: + for line in block["lines"]: + if line["dir"] != (1, 0): # ignore non-horizontal text + continue + x0, y0, x1, y1 = line["bbox"] + if y1 < 0 or y0 > page.rect.height: # ignore if outside CropBox + continue + # upd row height + height = y1 - y0 + + if rowheight > height: + rowheight = height + for span in line["spans"]: + if span["size"] <= fontsize: + continue + for c in span["chars"]: + x0, _, x1, _ = c["bbox"] + cwidth = x1 - x0 + ox, oy = c["origin"] + oy = int(round(oy)) + rows.add(oy) + ch = c["c"] + if left > ox and ch != " ": + left = ox # update left coordinate + if right < x1: + right = x1 # update right coordinate + # handle ligatures: + if cwidth == 0 and chars != []: # potential ligature + old_ch, old_ox, old_oy, old_cwidth = chars[-1] + if old_oy == oy: # ligature + if old_ch != chr(0xFB00): # previous "ff" char lig? + lig = joinligature(old_ch + ch) # no + # convert to one of the 3-char ligatures: + elif ch == "i": + lig = chr(0xFB03) # "ffi" + elif ch == "l": + lig = chr(0xFB04) # "ffl" + else: # something wrong, leave old char in place + lig = old_ch + chars[-1] = (lig, old_ox, old_oy, old_cwidth) + continue + chars.append((ch, ox, oy, cwidth)) # all chars on page + return chars, rows, left, right, rowheight + + def joinligature(lig: str) -> str: + """Return ligature character for a given pair / triple of characters. + + Args: + lig: (str) 2/3 characters, e.g. "ff" + Returns: + Ligature, e.g. "ff" -> chr(0xFB00) + """ + + if lig == "ff": + return chr(0xFB00) + elif lig == "fi": + return chr(0xFB01) + elif lig == "fl": + return chr(0xFB02) + elif lig == "ffi": + return chr(0xFB03) + elif lig == "ffl": + return chr(0xFB04) + elif lig == "ft": + return chr(0xFB05) + elif lig == "st": + return chr(0xFB06) + return lig + + # -------------------------------------------------------------------- + def make_textline(left, slot, minslot, lchars): + """Produce the text of one output line. + + Args: + left: (float) left most coordinate used on page + slot: (float) avg width of one character in any font in use. + minslot: (float) min width for the characters in this line. + chars: (list[tuple]) characters of this line. + Returns: + text: (str) text string for this line + """ + text = "" # we output this + old_char = "" + old_x1 = 0 # end coordinate of last char + old_ox = 0 # x-origin of last char + if minslot <= pymupdf.EPSILON: + raise RuntimeError("program error: minslot too small = %g" % minslot) + + for c in lchars: # loop over characters + char, ox, _, cwidth = c + ox = ox - left # its (relative) start coordinate + x1 = ox + cwidth # ending coordinate + + # eliminate overprint effect + if old_char == char and ox - old_ox <= cwidth * 0.2: + continue + + # omit spaces overlapping previous char + if char == " " and (old_x1 - ox) / cwidth > 0.8: + continue + + old_char = char + # close enough to previous? + if ox < old_x1 + minslot: # assume char adjacent to previous + text += char # append to output + old_x1 = x1 # new end coord + old_ox = ox # new origin.x + continue + + # else next char starts after some gap: + # fill in right number of spaces, so char is positioned + # in the right slot of the line + if char == " ": # rest relevant for non-space only + continue + delta = int(ox / slot) - len(text) + if ox > old_x1 and delta > 1: + text += " " * delta + # now append char + text += char + old_x1 = x1 # new end coordinate + old_ox = ox # new origin + return text.rstrip() + + # extract page text by single characters ("rawdict") + blocks = page.get_text("rawdict", flags=flags)["blocks"] + chars, rows, left, right, rowheight = process_blocks(blocks, page) + + if chars == []: + if not skip_empty: + textout.write(eop) # write formfeed + return + # compute list of line coordinates - ignoring small (GRID) differences + rows = curate_rows(rows, GRID) + + # sort all chars by x-coordinates, so every line will receive char info, + # sorted from left to right. + chars.sort(key=lambda c: c[1]) + + # populate the lines with their char info + lines = {} # key: y1-ccordinate, value: char list + for c in chars: + _, _, oy, _ = c + y = find_line_index(rows, oy) # y-coord of the right line + lchars = lines.get(y, []) # read line chars so far + lchars.append(c) # append this char + lines[y] = lchars # write back to line + + # ensure line coordinates are ascending + keys = list(lines.keys()) + keys.sort() + + # ------------------------------------------------------------------------- + # Compute "char resolution" for the page: the char width corresponding to + # 1 text char position on output - call it 'slot'. + # For each line, compute median of its char widths. The minimum across all + # lines is 'slot'. + # The minimum char width of each line is used to determine if spaces must + # be inserted in between two characters. + # ------------------------------------------------------------------------- + slot = right - left + minslots = {} + for k in keys: + lchars = lines[k] + ccount = len(lchars) + if ccount < 2: + minslots[k] = 1 + continue + widths = [c[3] for c in lchars] + widths.sort() + this_slot = statistics.median(widths) # take median value + if this_slot < slot: + slot = this_slot + minslots[k] = widths[0] + + # compute line advance in text output + rowheight = rowheight * (rows[-1] - rows[0]) / (rowheight * len(rows)) * 1.2 + rowpos = rows[0] # first line positioned here + textout.write(b"\n") + for k in keys: # walk through the lines + while rowpos < k: # honor distance between lines + textout.write(b"\n") + rowpos += rowheight + text = make_textline(left, slot, minslots[k], lines[k]) + textout.write((text + "\n").encode("utf8", errors="surrogatepass")) + rowpos = k + rowheight + + textout.write(eop) # write formfeed + + +def gettext(args): + doc = open_file(args.input, args.password, pdf=False) + pagel = get_list(args.pages, doc.page_count + 1) + output = args.output + if output is None: + filename, _ = os.path.splitext(doc.name) + output = filename + ".txt" + with open(output, "wb") as textout: + flags = pymupdf.TEXT_PRESERVE_LIGATURES | pymupdf.TEXT_PRESERVE_WHITESPACE + if args.convert_white: + flags ^= pymupdf.TEXT_PRESERVE_WHITESPACE + if args.noligatures: + flags ^= pymupdf.TEXT_PRESERVE_LIGATURES + if args.extra_spaces: + flags ^= pymupdf.TEXT_INHIBIT_SPACES + func = { + "simple": page_simple, + "blocks": page_blocksort, + "layout": page_layout, + } + for pno in pagel: + page = doc[pno - 1] + func[args.mode]( + page, + textout, + args.grid, + args.fontsize, + args.noformfeed, + args.skip_empty, + flags=flags, + ) + + +def _internal(args): + pymupdf.message('This is from PyMuPDF message().') + pymupdf.log('This is from PyMuPDF log().') + +def main(): + """Define command configurations.""" + parser = argparse.ArgumentParser( + prog="pymupdf", + description=mycenter("Basic PyMuPDF Functions"), + ) + subps = parser.add_subparsers( + title="Subcommands", help="Enter 'command -h' for subcommand specific help" + ) + + # ------------------------------------------------------------------------- + # 'show' command + # ------------------------------------------------------------------------- + ps_show = subps.add_parser("show", description=mycenter("display PDF information")) + ps_show.add_argument("input", type=str, help="PDF filename") + ps_show.add_argument("-password", help="password") + ps_show.add_argument("-catalog", action="store_true", help="show PDF catalog") + ps_show.add_argument("-trailer", action="store_true", help="show PDF trailer") + ps_show.add_argument("-metadata", action="store_true", help="show PDF metadata") + ps_show.add_argument( + "-xrefs", type=str, help="show selected objects, format: 1,5-7,N" + ) + ps_show.add_argument( + "-pages", type=str, help="show selected pages, format: 1,5-7,50-N" + ) + ps_show.set_defaults(func=show) + + # ------------------------------------------------------------------------- + # 'clean' command + # ------------------------------------------------------------------------- + ps_clean = subps.add_parser( + "clean", description=mycenter("optimize PDF, or create sub-PDF if pages given") + ) + ps_clean.add_argument("input", type=str, help="PDF filename") + ps_clean.add_argument("output", type=str, help="output PDF filename") + ps_clean.add_argument("-password", help="password") + + ps_clean.add_argument( + "-encryption", + help="encryption method", + choices=("keep", "none", "rc4-40", "rc4-128", "aes-128", "aes-256"), + default="none", + ) + + ps_clean.add_argument("-owner", type=str, help="owner password") + ps_clean.add_argument("-user", type=str, help="user password") + + ps_clean.add_argument( + "-garbage", + type=int, + help="garbage collection level", + choices=range(5), + default=0, + ) + + ps_clean.add_argument( + "-compress", + action="store_true", + default=False, + help="compress (deflate) output", + ) + + ps_clean.add_argument( + "-ascii", action="store_true", default=False, help="ASCII encode binary data" + ) + + ps_clean.add_argument( + "-linear", + action="store_true", + default=False, + help="format for fast web display", + ) + + ps_clean.add_argument( + "-permission", type=int, default=-1, help="integer with permission levels" + ) + + ps_clean.add_argument( + "-sanitize", + action="store_true", + default=False, + help="sanitize / clean contents", + ) + ps_clean.add_argument( + "-pretty", action="store_true", default=False, help="prettify PDF structure" + ) + ps_clean.add_argument( + "-pages", help="output selected pages pages, format: 1,5-7,50-N" + ) + ps_clean.set_defaults(func=clean) + + # ------------------------------------------------------------------------- + # 'join' command + # ------------------------------------------------------------------------- + ps_join = subps.add_parser( + "join", + description=mycenter("join PDF documents"), + epilog="specify each input as 'filename[,password[,pages]]'", + ) + ps_join.add_argument("input", nargs="*", help="input filenames") + ps_join.add_argument("-output", required=True, help="output filename") + ps_join.set_defaults(func=doc_join) + + # ------------------------------------------------------------------------- + # 'extract' command + # ------------------------------------------------------------------------- + ps_extract = subps.add_parser( + "extract", description=mycenter("extract images and fonts to disk") + ) + ps_extract.add_argument("input", type=str, help="PDF filename") + ps_extract.add_argument("-images", action="store_true", help="extract images") + ps_extract.add_argument("-fonts", action="store_true", help="extract fonts") + ps_extract.add_argument( + "-output", help="folder to receive output, defaults to current" + ) + ps_extract.add_argument("-password", help="password") + ps_extract.add_argument( + "-pages", type=str, help="consider these pages only, format: 1,5-7,50-N" + ) + ps_extract.set_defaults(func=extract_objects) + + # ------------------------------------------------------------------------- + # 'embed-info' + # ------------------------------------------------------------------------- + ps_show = subps.add_parser( + "embed-info", description=mycenter("list embedded files") + ) + ps_show.add_argument("input", help="PDF filename") + ps_show.add_argument("-name", help="if given, report only this one") + ps_show.add_argument("-detail", action="store_true", help="detail information") + ps_show.add_argument("-password", help="password") + ps_show.set_defaults(func=embedded_list) + + # ------------------------------------------------------------------------- + # 'embed-add' command + # ------------------------------------------------------------------------- + ps_embed_add = subps.add_parser( + "embed-add", description=mycenter("add embedded file") + ) + ps_embed_add.add_argument("input", help="PDF filename") + ps_embed_add.add_argument("-password", help="password") + ps_embed_add.add_argument( + "-output", help="output PDF filename, incremental save if none" + ) + ps_embed_add.add_argument("-name", required=True, help="name of new entry") + ps_embed_add.add_argument("-path", required=True, help="path to data for new entry") + ps_embed_add.add_argument("-desc", help="description of new entry") + ps_embed_add.set_defaults(func=embedded_add) + + # ------------------------------------------------------------------------- + # 'embed-del' command + # ------------------------------------------------------------------------- + ps_embed_del = subps.add_parser( + "embed-del", description=mycenter("delete embedded file") + ) + ps_embed_del.add_argument("input", help="PDF filename") + ps_embed_del.add_argument("-password", help="password") + ps_embed_del.add_argument( + "-output", help="output PDF filename, incremental save if none" + ) + ps_embed_del.add_argument("-name", required=True, help="name of entry to delete") + ps_embed_del.set_defaults(func=embedded_del) + + # ------------------------------------------------------------------------- + # 'embed-upd' command + # ------------------------------------------------------------------------- + ps_embed_upd = subps.add_parser( + "embed-upd", + description=mycenter("update embedded file"), + epilog="except '-name' all parameters are optional", + ) + ps_embed_upd.add_argument("input", help="PDF filename") + ps_embed_upd.add_argument("-name", required=True, help="name of entry") + ps_embed_upd.add_argument("-password", help="password") + ps_embed_upd.add_argument( + "-output", help="Output PDF filename, incremental save if none" + ) + ps_embed_upd.add_argument("-path", help="path to new data for entry") + ps_embed_upd.add_argument("-filename", help="new filename to store in entry") + ps_embed_upd.add_argument( + "-ufilename", help="new unicode filename to store in entry" + ) + ps_embed_upd.add_argument("-desc", help="new description to store in entry") + ps_embed_upd.set_defaults(func=embedded_upd) + + # ------------------------------------------------------------------------- + # 'embed-extract' command + # ------------------------------------------------------------------------- + ps_embed_extract = subps.add_parser( + "embed-extract", description=mycenter("extract embedded file to disk") + ) + ps_embed_extract.add_argument("input", type=str, help="PDF filename") + ps_embed_extract.add_argument("-name", required=True, help="name of entry") + ps_embed_extract.add_argument("-password", help="password") + ps_embed_extract.add_argument( + "-output", help="output filename, default is stored name" + ) + ps_embed_extract.set_defaults(func=embedded_get) + + # ------------------------------------------------------------------------- + # 'embed-copy' command + # ------------------------------------------------------------------------- + ps_embed_copy = subps.add_parser( + "embed-copy", description=mycenter("copy embedded files between PDFs") + ) + ps_embed_copy.add_argument("input", type=str, help="PDF to receive embedded files") + ps_embed_copy.add_argument("-password", help="password of input") + ps_embed_copy.add_argument( + "-output", help="output PDF, incremental save to 'input' if omitted" + ) + ps_embed_copy.add_argument( + "-source", required=True, help="copy embedded files from here" + ) + ps_embed_copy.add_argument("-pwdsource", help="password of 'source' PDF") + ps_embed_copy.add_argument( + "-name", nargs="*", help="restrict copy to these entries" + ) + ps_embed_copy.set_defaults(func=embedded_copy) + + # ------------------------------------------------------------------------- + # 'textlayout' command + # ------------------------------------------------------------------------- + ps_gettext = subps.add_parser( + "gettext", description=mycenter("extract text in various formatting modes") + ) + ps_gettext.add_argument("input", type=str, help="input document filename") + ps_gettext.add_argument("-password", help="password for input document") + ps_gettext.add_argument( + "-mode", + type=str, + help="mode: simple, block sort, or layout (default)", + choices=("simple", "blocks", "layout"), + default="layout", + ) + ps_gettext.add_argument( + "-pages", + type=str, + help="select pages, format: 1,5-7,50-N", + default="1-N", + ) + ps_gettext.add_argument( + "-noligatures", + action="store_true", + help="expand ligature characters (default False)", + default=False, + ) + ps_gettext.add_argument( + "-convert-white", + action="store_true", + help="convert whitespace characters to white (default False)", + default=False, + ) + ps_gettext.add_argument( + "-extra-spaces", + action="store_true", + help="fill gaps with spaces (default False)", + default=False, + ) + ps_gettext.add_argument( + "-noformfeed", + action="store_true", + help="write linefeeds, no formfeeds (default False)", + default=False, + ) + ps_gettext.add_argument( + "-skip-empty", + action="store_true", + help="suppress pages with no text (default False)", + default=False, + ) + ps_gettext.add_argument( + "-output", + help="store text in this file (default inputfilename.txt)", + ) + ps_gettext.add_argument( + "-grid", + type=float, + help="merge lines if closer than this (default 2)", + default=2, + ) + ps_gettext.add_argument( + "-fontsize", + type=float, + help="only include text with a larger fontsize (default 3)", + default=3, + ) + ps_gettext.set_defaults(func=gettext) + + # ------------------------------------------------------------------------- + # '_internal' command + # ------------------------------------------------------------------------- + ps_internal = subps.add_parser( + "internal", description=mycenter("internal testing") + ) + ps_internal.set_defaults(func=_internal) + + # ------------------------------------------------------------------------- + # start program + # ------------------------------------------------------------------------- + args = parser.parse_args() # create parameter arguments class + if not hasattr(args, "func"): # no function selected + parser.print_help() # so print top level help + else: + args.func(args) # execute requested command + + +if __name__ == "__main__": + main() diff --git a/src/_apply_pages.py b/src/_apply_pages.py new file mode 100644 index 000000000..0aae54c78 --- /dev/null +++ b/src/_apply_pages.py @@ -0,0 +1,253 @@ +import multiprocessing +import os +import time + +import pymupdf + + +# Support for concurrent processing of document pages. +# + +class _worker_State: + pass +_worker_state = _worker_State() + + +def _worker_init( + path, + initfn, + initfn_args, + initfn_kwargs, + pagefn, + pagefn_args, + pagefn_kwargs, + stats, + ): + # pylint: disable=attribute-defined-outside-init + _worker_state.path = path + _worker_state.pagefn = pagefn + _worker_state.pagefn_args = pagefn_args + _worker_state.pagefn_kwargs = pagefn_kwargs + _worker_state.stats = stats + _worker_state.document = None + if initfn: + initfn(*initfn_args, **initfn_kwargs) + + +def _stats_write(t, label): + t = time.time() - t + if t >= 10: + pymupdf.log(f'{os.getpid()=}: {t:2f}s: {label}.') + + +def _worker_fn(page_number): + # Create Document from filename if we haven't already done so. + if not _worker_state.document: + if _worker_state.stats: + t = time.time() + _worker_state.document = pymupdf.Document(_worker_state.path) # pylint: disable=attribute-defined-outside-init + if _worker_state.stats: + _stats_write(t, 'pymupdf.Document()') + + if _worker_state.stats: + t = time.time() + page = _worker_state.document[page_number] + if _worker_state.stats: + _stats_write(t, '_worker_state.document[page_number]') + + if _worker_state.stats: + t = time.time() + ret = _worker_state.pagefn( + page, + *_worker_state.pagefn_args, + **_worker_state.pagefn_kwargs, + ) + if _worker_state.stats: + _stats_write(t, '_worker_state.pagefn()') + + return ret + + +def _multiprocessing( + path, + pages, + pagefn, + pagefn_args, + pagefn_kwargs, + initfn, + initfn_args, + initfn_kwargs, + concurrency, + stats, + ): + #print(f'_worker_mp(): {concurrency=}', flush=1) + with multiprocessing.Pool( + concurrency, + _worker_init, + ( + path, + initfn, initfn_args, initfn_kwargs, + pagefn, pagefn_args, pagefn_kwargs, + stats, + ), + ) as pool: + result = pool.map_async(_worker_fn, pages) + return result.get() + + +def _fork( + path, + pages, + pagefn, + pagefn_args, + pagefn_kwargs, + initfn, + initfn_args, + initfn_kwargs, + concurrency, + stats, + ): + verbose = 0 + if concurrency is None: + concurrency = multiprocessing.cpu_count() + # We write page numbers to `queue_down` and read `(page_num, text)` from + # `queue_up`. Workers each repeatedly read the next available page number + # from `queue_down`, extract the text and write it onto `queue_up`. + # + # This is better than pre-allocating a subset of pages to each worker + # because it ensures there will never be idle workers until we are near the + # end with fewer pages left than workers. + # + queue_down = multiprocessing.Queue() + queue_up = multiprocessing.Queue() + def childfn(): + document = None + if verbose: + pymupdf.log(f'{os.getpid()=}: {initfn=} {initfn_args=}') + _worker_init( + path, + initfn, + initfn_args, + initfn_kwargs, + pagefn, + pagefn_args, + pagefn_kwargs, + stats, + ) + while 1: + if verbose: + pymupdf.log(f'{os.getpid()=}: calling get().') + page_num = queue_down.get() + if verbose: + pymupdf.log(f'{os.getpid()=}: {page_num=}.') + if page_num is None: + break + try: + if not document: + if stats: + t = time.time() + document = pymupdf.Document(path) + if stats: + _stats_write(t, 'pymupdf.Document(path)') + + if stats: + t = time.time() + page = document[page_num] + if stats: + _stats_write(t, 'document[page_num]') + + if verbose: + pymupdf.log(f'{os.getpid()=}: {_worker_state=}') + + if stats: + t = time.time() + ret = pagefn( + page, + *_worker_state.pagefn_args, + **_worker_state.pagefn_kwargs, + ) + if stats: + _stats_write(t, f'{page_num=} pagefn()') + except Exception as e: + if verbose: pymupdf.log(f'{os.getpid()=}: exception {e=}') + ret = e + if verbose: + pymupdf.log(f'{os.getpid()=}: sending {page_num=} {ret=}') + + queue_up.put( (page_num, ret) ) + + error = None + + pids = list() + try: + # Start child processes. + if stats: + t = time.time() + for i in range(concurrency): + p = os.fork() # pylint: disable=no-member + if p == 0: + # Child process. + try: + try: + childfn() + except Exception as e: + pymupdf.log(f'{os.getpid()=}: childfn() => {e=}') + raise + finally: + if verbose: + pymupdf.log(f'{os.getpid()=}: calling os._exit(0)') + os._exit(0) + pids.append(p) + if stats: + _stats_write(t, 'create child processes') + + # Send page numbers. + if stats: + t = time.time() + if verbose: + pymupdf.log(f'Sending page numbers.') + for page_num in range(len(pages)): + queue_down.put(page_num) + if stats: + _stats_write(t, 'Send page numbers') + + # Collect results. We give up if any worker sends an exception instead + # of text, but this hasn't been tested. + ret = [None] * len(pages) + for i in range(len(pages)): + page_num, text = queue_up.get() + if verbose: + pymupdf.log(f'{page_num=} {len(text)=}') + assert ret[page_num] is None + if isinstance(text, Exception): + if not error: + error = text + break + ret[page_num] = text + + # Close queue. This should cause exception in workers and terminate + # them, but on macos-arm64 this does not seem to happen, so we also + # send None, which makes workers terminate. + for i in range(concurrency): + queue_down.put(None) + if verbose: pymupdf.log(f'Closing queues.') + queue_down.close() + + if error: + raise error + if verbose: + pymupdf.log(f'After concurrent, returning {len(ret)=}') + return ret + + finally: + # Join all child processes. + if stats: + t = time.time() + for pid in pids: + if verbose: + pymupdf.log(f'waiting for {pid=}.') + e = os.waitpid(pid, 0) + if verbose: + pymupdf.log(f'{pid=} => {e=}') + if stats: + _stats_write(t, 'Join all child proceses') diff --git a/src/_wxcolors.py b/src/_wxcolors.py new file mode 100644 index 000000000..730fcbc3b --- /dev/null +++ b/src/_wxcolors.py @@ -0,0 +1,562 @@ +_wxcolors = [ + ("ALICEBLUE", 240, 248, 255), + ("ANTIQUEWHITE", 250, 235, 215), + ("ANTIQUEWHITE1", 255, 239, 219), + ("ANTIQUEWHITE2", 238, 223, 204), + ("ANTIQUEWHITE3", 205, 192, 176), + ("ANTIQUEWHITE4", 139, 131, 120), + ("AQUA", 0, 255, 255), + ("AQUAMARINE", 127, 255, 212), + ("AQUAMARINE1", 127, 255, 212), + ("AQUAMARINE2", 118, 238, 198), + ("AQUAMARINE3", 102, 205, 170), + ("AQUAMARINE4", 69, 139, 116), + ("AZURE", 240, 255, 255), + ("AZURE1", 240, 255, 255), + ("AZURE2", 224, 238, 238), + ("AZURE3", 193, 205, 205), + ("AZURE4", 131, 139, 139), + ("BEIGE", 245, 245, 220), + ("BISQUE", 255, 228, 196), + ("BISQUE1", 255, 228, 196), + ("BISQUE2", 238, 213, 183), + ("BISQUE3", 205, 183, 158), + ("BISQUE4", 139, 125, 107), + ("BLACK", 0, 0, 0), + ("BLANCHEDALMOND", 255, 235, 205), + ("BLUE", 0, 0, 255), + ("BLUE1", 0, 0, 255), + ("BLUE2", 0, 0, 238), + ("BLUE3", 0, 0, 205), + ("BLUE4", 0, 0, 139), + ("BLUEVIOLET", 138, 43, 226), + ("BROWN", 165, 42, 42), + ("BROWN1", 255, 64, 64), + ("BROWN2", 238, 59, 59), + ("BROWN3", 205, 51, 51), + ("BROWN4", 139, 35, 35), + ("BURLYWOOD", 222, 184, 135), + ("BURLYWOOD1", 255, 211, 155), + ("BURLYWOOD2", 238, 197, 145), + ("BURLYWOOD3", 205, 170, 125), + ("BURLYWOOD4", 139, 115, 85), + ("CADETBLUE", 95, 158, 160), + ("CADETBLUE1", 152, 245, 255), + ("CADETBLUE2", 142, 229, 238), + ("CADETBLUE3", 122, 197, 205), + ("CADETBLUE4", 83, 134, 139), + ("CHARTREUSE", 127, 255, 0), + ("CHARTREUSE1", 127, 255, 0), + ("CHARTREUSE2", 118, 238, 0), + ("CHARTREUSE3", 102, 205, 0), + ("CHARTREUSE4", 69, 139, 0), + ("CHOCOLATE", 210, 105, 30), + ("CHOCOLATE1", 255, 127, 36), + ("CHOCOLATE2", 238, 118, 33), + ("CHOCOLATE3", 205, 102, 29), + ("CHOCOLATE4", 139, 69, 19), + ("COFFEE", 156, 79, 0), + ("CORAL", 255, 127, 80), + ("CORAL1", 255, 114, 86), + ("CORAL2", 238, 106, 80), + ("CORAL3", 205, 91, 69), + ("CORAL4", 139, 62, 47), + ("CORNFLOWERBLUE", 100, 149, 237), + ("CORNSILK", 255, 248, 220), + ("CORNSILK1", 255, 248, 220), + ("CORNSILK2", 238, 232, 205), + ("CORNSILK3", 205, 200, 177), + ("CORNSILK4", 139, 136, 120), + ("CRIMSON", 220, 20, 60), + ("CYAN", 0, 255, 255), + ("CYAN1", 0, 255, 255), + ("CYAN2", 0, 238, 238), + ("CYAN3", 0, 205, 205), + ("CYAN4", 0, 139, 139), + ("DARKBLUE", 0, 0, 139), + ("DARKCYAN", 0, 139, 139), + ("DARKGOLDENROD", 184, 134, 11), + ("DARKGOLDENROD1", 255, 185, 15), + ("DARKGOLDENROD2", 238, 173, 14), + ("DARKGOLDENROD3", 205, 149, 12), + ("DARKGOLDENROD4", 139, 101, 8), + ("DARKGRAY", 169, 169, 169), + ("DARKGREEN", 0, 100, 0), + ("DARKGREY", 169, 169, 169), + ("DARKKHAKI", 189, 183, 107), + ("DARKMAGENTA", 139, 0, 139), + ("DARKOLIVEGREEN", 85, 107, 47), + ("DARKOLIVEGREEN1", 202, 255, 112), + ("DARKOLIVEGREEN2", 188, 238, 104), + ("DARKOLIVEGREEN3", 162, 205, 90), + ("DARKOLIVEGREEN4", 110, 139, 61), + ("DARKORANGE", 255, 140, 0), + ("DARKORANGE1", 255, 127, 0), + ("DARKORANGE2", 238, 118, 0), + ("DARKORANGE3", 205, 102, 0), + ("DARKORANGE4", 139, 69, 0), + ("DARKORCHID", 153, 50, 204), + ("DARKORCHID1", 191, 62, 255), + ("DARKORCHID2", 178, 58, 238), + ("DARKORCHID3", 154, 50, 205), + ("DARKORCHID4", 104, 34, 139), + ("DARKRED", 139, 0, 0), + ("DARKSALMON", 233, 150, 122), + ("DARKSEAGREEN", 143, 188, 143), + ("DARKSEAGREEN1", 193, 255, 193), + ("DARKSEAGREEN2", 180, 238, 180), + ("DARKSEAGREEN3", 155, 205, 155), + ("DARKSEAGREEN4", 105, 139, 105), + ("DARKSLATEBLUE", 72, 61, 139), + ("DARKSLATEGRAY", 47, 79, 79), + ("DARKSLATEGREY", 47, 79, 79), + ("DARKTURQUOISE", 0, 206, 209), + ("DARKVIOLET", 148, 0, 211), + ("DEEPPINK", 255, 20, 147), + ("DEEPPINK1", 255, 20, 147), + ("DEEPPINK2", 238, 18, 137), + ("DEEPPINK3", 205, 16, 118), + ("DEEPPINK4", 139, 10, 80), + ("DEEPSKYBLUE", 0, 191, 255), + ("DEEPSKYBLUE1", 0, 191, 255), + ("DEEPSKYBLUE2", 0, 178, 238), + ("DEEPSKYBLUE3", 0, 154, 205), + ("DEEPSKYBLUE4", 0, 104, 139), + ("DIMGRAY", 105, 105, 105), + ("DIMGREY", 105, 105, 105), + ("DODGERBLUE", 30, 144, 255), + ("DODGERBLUE1", 30, 144, 255), + ("DODGERBLUE2", 28, 134, 238), + ("DODGERBLUE3", 24, 116, 205), + ("DODGERBLUE4", 16, 78, 139), + ("FIREBRICK", 178, 34, 34), + ("FIREBRICK1", 255, 48, 48), + ("FIREBRICK2", 238, 44, 44), + ("FIREBRICK3", 205, 38, 38), + ("FIREBRICK4", 139, 26, 26), + ("FLORALWHITE", 255, 250, 240), + ("FORESTGREEN", 34, 139, 34), + ("FUCHSIA", 255, 0, 255), + ("GAINSBORO", 220, 220, 220), + ("GHOSTWHITE", 248, 248, 255), + ("GOLD", 255, 215, 0), + ("GOLD1", 255, 215, 0), + ("GOLD2", 238, 201, 0), + ("GOLD3", 205, 173, 0), + ("GOLD4", 139, 117, 0), + ("GOLDENROD", 218, 165, 32), + ("GOLDENROD1", 255, 193, 37), + ("GOLDENROD2", 238, 180, 34), + ("GOLDENROD3", 205, 155, 29), + ("GOLDENROD4", 139, 105, 20), + ("GRAY", 190, 190, 190), + ("GRAY0", 0, 0, 0), + ("GRAY1", 3, 3, 3), + ("GRAY10", 26, 26, 26), + ("GRAY100", 255, 255, 255), + ("GRAY11", 28, 28, 28), + ("GRAY12", 31, 31, 31), + ("GRAY13", 33, 33, 33), + ("GRAY14", 36, 36, 36), + ("GRAY15", 38, 38, 38), + ("GRAY16", 41, 41, 41), + ("GRAY17", 43, 43, 43), + ("GRAY18", 46, 46, 46), + ("GRAY19", 48, 48, 48), + ("GRAY2", 5, 5, 5), + ("GRAY20", 51, 51, 51), + ("GRAY21", 54, 54, 54), + ("GRAY22", 56, 56, 56), + ("GRAY23", 59, 59, 59), + ("GRAY24", 61, 61, 61), + ("GRAY25", 64, 64, 64), + ("GRAY26", 66, 66, 66), + ("GRAY27", 69, 69, 69), + ("GRAY28", 71, 71, 71), + ("GRAY29", 74, 74, 74), + ("GRAY3", 8, 8, 8), + ("GRAY30", 77, 77, 77), + ("GRAY31", 79, 79, 79), + ("GRAY32", 82, 82, 82), + ("GRAY33", 84, 84, 84), + ("GRAY34", 87, 87, 87), + ("GRAY35", 89, 89, 89), + ("GRAY36", 92, 92, 92), + ("GRAY37", 94, 94, 94), + ("GRAY38", 97, 97, 97), + ("GRAY39", 99, 99, 99), + ("GRAY4", 10, 10, 10), + ("GRAY40", 102, 102, 102), + ("GRAY41", 105, 105, 105), + ("GRAY42", 107, 107, 107), + ("GRAY43", 110, 110, 110), + ("GRAY44", 112, 112, 112), + ("GRAY45", 115, 115, 115), + ("GRAY46", 117, 117, 117), + ("GRAY47", 120, 120, 120), + ("GRAY48", 122, 122, 122), + ("GRAY49", 125, 125, 125), + ("GRAY5", 13, 13, 13), + ("GRAY50", 127, 127, 127), + ("GRAY51", 130, 130, 130), + ("GRAY52", 133, 133, 133), + ("GRAY53", 135, 135, 135), + ("GRAY54", 138, 138, 138), + ("GRAY55", 140, 140, 140), + ("GRAY56", 143, 143, 143), + ("GRAY57", 145, 145, 145), + ("GRAY58", 148, 148, 148), + ("GRAY59", 150, 150, 150), + ("GRAY6", 15, 15, 15), + ("GRAY60", 153, 153, 153), + ("GRAY61", 156, 156, 156), + ("GRAY62", 158, 158, 158), + ("GRAY63", 161, 161, 161), + ("GRAY64", 163, 163, 163), + ("GRAY65", 166, 166, 166), + ("GRAY66", 168, 168, 168), + ("GRAY67", 171, 171, 171), + ("GRAY68", 173, 173, 173), + ("GRAY69", 176, 176, 176), + ("GRAY7", 18, 18, 18), + ("GRAY70", 179, 179, 179), + ("GRAY71", 181, 181, 181), + ("GRAY72", 184, 184, 184), + ("GRAY73", 186, 186, 186), + ("GRAY74", 189, 189, 189), + ("GRAY75", 191, 191, 191), + ("GRAY76", 194, 194, 194), + ("GRAY77", 196, 196, 196), + ("GRAY78", 199, 199, 199), + ("GRAY79", 201, 201, 201), + ("GRAY8", 20, 20, 20), + ("GRAY80", 204, 204, 204), + ("GRAY81", 207, 207, 207), + ("GRAY82", 209, 209, 209), + ("GRAY83", 212, 212, 212), + ("GRAY84", 214, 214, 214), + ("GRAY85", 217, 217, 217), + ("GRAY86", 219, 219, 219), + ("GRAY87", 222, 222, 222), + ("GRAY88", 224, 224, 224), + ("GRAY89", 227, 227, 227), + ("GRAY9", 23, 23, 23), + ("GRAY90", 229, 229, 229), + ("GRAY91", 232, 232, 232), + ("GRAY92", 235, 235, 235), + ("GRAY93", 237, 237, 237), + ("GRAY94", 240, 240, 240), + ("GRAY95", 242, 242, 242), + ("GRAY96", 245, 245, 245), + ("GRAY97", 247, 247, 247), + ("GRAY98", 250, 250, 250), + ("GRAY99", 252, 252, 252), + ("GREEN YELLOW", 173, 255, 47), + ("GREEN", 0, 255, 0), + ("GREEN1", 0, 255, 0), + ("GREEN2", 0, 238, 0), + ("GREEN3", 0, 205, 0), + ("GREEN4", 0, 139, 0), + ("GREENYELLOW", 173, 255, 47), + ("GREY", 128, 128, 128), + ("HONEYDEW", 240, 255, 240), + ("HONEYDEW1", 240, 255, 240), + ("HONEYDEW2", 224, 238, 224), + ("HONEYDEW3", 193, 205, 193), + ("HONEYDEW4", 131, 139, 131), + ("HOTPINK", 255, 105, 180), + ("HOTPINK1", 255, 110, 180), + ("HOTPINK2", 238, 106, 167), + ("HOTPINK3", 205, 96, 144), + ("HOTPINK4", 139, 58, 98), + ("INDIANRED", 205, 92, 92), + ("INDIANRED1", 255, 106, 106), + ("INDIANRED2", 238, 99, 99), + ("INDIANRED3", 205, 85, 85), + ("INDIANRED4", 139, 58, 58), + ("INDIGO", 75, 0, 130), + ("IVORY", 255, 255, 240), + ("IVORY1", 255, 255, 240), + ("IVORY2", 238, 238, 224), + ("IVORY3", 205, 205, 193), + ("IVORY4", 139, 139, 131), + ("KHAKI", 240, 230, 140), + ("KHAKI1", 255, 246, 143), + ("KHAKI2", 238, 230, 133), + ("KHAKI3", 205, 198, 115), + ("KHAKI4", 139, 134, 78), + ("LAVENDER", 230, 230, 250), + ("LAVENDERBLUSH", 255, 240, 245), + ("LAVENDERBLUSH1", 255, 240, 245), + ("LAVENDERBLUSH2", 238, 224, 229), + ("LAVENDERBLUSH3", 205, 193, 197), + ("LAVENDERBLUSH4", 139, 131, 134), + ("LAWNGREEN", 124, 252, 0), + ("LEMONCHIFFON", 255, 250, 205), + ("LEMONCHIFFON1", 255, 250, 205), + ("LEMONCHIFFON2", 238, 233, 191), + ("LEMONCHIFFON3", 205, 201, 165), + ("LEMONCHIFFON4", 139, 137, 112), + ("LIGHTBLUE", 173, 216, 230), + ("LIGHTBLUE1", 191, 239, 255), + ("LIGHTBLUE2", 178, 223, 238), + ("LIGHTBLUE3", 154, 192, 205), + ("LIGHTBLUE4", 104, 131, 139), + ("LIGHTCORAL", 240, 128, 128), + ("LIGHTCYAN", 224, 255, 255), + ("LIGHTCYAN1", 224, 255, 255), + ("LIGHTCYAN2", 209, 238, 238), + ("LIGHTCYAN3", 180, 205, 205), + ("LIGHTCYAN4", 122, 139, 139), + ("LIGHTGOLDENROD", 238, 221, 130), + ("LIGHTGOLDENROD1", 255, 236, 139), + ("LIGHTGOLDENROD2", 238, 220, 130), + ("LIGHTGOLDENROD3", 205, 190, 112), + ("LIGHTGOLDENROD4", 139, 129, 76), + ("LIGHTGOLDENRODYELLOW", 250, 250, 210), + ("LIGHTGRAY", 211, 211, 211), + ("LIGHTGREEN", 144, 238, 144), + ("LIGHTGREY", 211, 211, 211), + ("LIGHTPINK", 255, 182, 193), + ("LIGHTPINK1", 255, 174, 185), + ("LIGHTPINK2", 238, 162, 173), + ("LIGHTPINK3", 205, 140, 149), + ("LIGHTPINK4", 139, 95, 101), + ("LIGHTSALMON", 255, 160, 122), + ("LIGHTSALMON1", 255, 160, 122), + ("LIGHTSALMON2", 238, 149, 114), + ("LIGHTSALMON3", 205, 129, 98), + ("LIGHTSALMON4", 139, 87, 66), + ("LIGHTSEAGREEN", 32, 178, 170), + ("LIGHTSKYBLUE", 135, 206, 250), + ("LIGHTSKYBLUE1", 176, 226, 255), + ("LIGHTSKYBLUE2", 164, 211, 238), + ("LIGHTSKYBLUE3", 141, 182, 205), + ("LIGHTSKYBLUE4", 96, 123, 139), + ("LIGHTSLATEBLUE", 132, 112, 255), + ("LIGHTSLATEGRAY", 119, 136, 153), + ("LIGHTSLATEGREY", 119, 136, 153), + ("LIGHTSTEELBLUE", 176, 196, 222), + ("LIGHTSTEELBLUE1", 202, 225, 255), + ("LIGHTSTEELBLUE2", 188, 210, 238), + ("LIGHTSTEELBLUE3", 162, 181, 205), + ("LIGHTSTEELBLUE4", 110, 123, 139), + ("LIGHTYELLOW", 255, 255, 224), + ("LIGHTYELLOW1", 255, 255, 224), + ("LIGHTYELLOW2", 238, 238, 209), + ("LIGHTYELLOW3", 205, 205, 180), + ("LIGHTYELLOW4", 139, 139, 122), + ("LIME", 0, 255, 0), + ("LIMEGREEN", 50, 205, 50), + ("LINEN", 250, 240, 230), + ("MAGENTA", 255, 0, 255), + ("MAGENTA1", 255, 0, 255), + ("MAGENTA2", 238, 0, 238), + ("MAGENTA3", 205, 0, 205), + ("MAGENTA4", 139, 0, 139), + ("MAROON", 176, 48, 96), + ("MAROON1", 255, 52, 179), + ("MAROON2", 238, 48, 167), + ("MAROON3", 205, 41, 144), + ("MAROON4", 139, 28, 98), + ("MEDIUMAQUAMARINE", 102, 205, 170), + ("MEDIUMBLUE", 0, 0, 205), + ("MEDIUMORCHID", 186, 85, 211), + ("MEDIUMORCHID1", 224, 102, 255), + ("MEDIUMORCHID2", 209, 95, 238), + ("MEDIUMORCHID3", 180, 82, 205), + ("MEDIUMORCHID4", 122, 55, 139), + ("MEDIUMPURPLE", 147, 112, 219), + ("MEDIUMPURPLE1", 171, 130, 255), + ("MEDIUMPURPLE2", 159, 121, 238), + ("MEDIUMPURPLE3", 137, 104, 205), + ("MEDIUMPURPLE4", 93, 71, 139), + ("MEDIUMSEAGREEN", 60, 179, 113), + ("MEDIUMSLATEBLUE", 123, 104, 238), + ("MEDIUMSPRINGGREEN", 0, 250, 154), + ("MEDIUMTURQUOISE", 72, 209, 204), + ("MEDIUMVIOLETRED", 199, 21, 133), + ("MIDNIGHTBLUE", 25, 25, 112), + ("MINTCREAM", 245, 255, 250), + ("MISTYROSE", 255, 228, 225), + ("MISTYROSE1", 255, 228, 225), + ("MISTYROSE2", 238, 213, 210), + ("MISTYROSE3", 205, 183, 181), + ("MISTYROSE4", 139, 125, 123), + ("MOCCASIN", 255, 228, 181), + ("MUPDFBLUE", 37, 114, 172), + ("NAVAJOWHITE", 255, 222, 173), + ("NAVAJOWHITE1", 255, 222, 173), + ("NAVAJOWHITE2", 238, 207, 161), + ("NAVAJOWHITE3", 205, 179, 139), + ("NAVAJOWHITE4", 139, 121, 94), + ("NAVY", 0, 0, 128), + ("NAVYBLUE", 0, 0, 128), + ("OLDLACE", 253, 245, 230), + ("OLIVE", 128, 128, 0), + ("OLIVEDRAB", 107, 142, 35), + ("OLIVEDRAB1", 192, 255, 62), + ("OLIVEDRAB2", 179, 238, 58), + ("OLIVEDRAB3", 154, 205, 50), + ("OLIVEDRAB4", 105, 139, 34), + ("ORANGE", 255, 165, 0), + ("ORANGE1", 255, 165, 0), + ("ORANGE2", 238, 154, 0), + ("ORANGE3", 205, 133, 0), + ("ORANGE4", 139, 90, 0), + ("ORANGERED", 255, 69, 0), + ("ORANGERED1", 255, 69, 0), + ("ORANGERED2", 238, 64, 0), + ("ORANGERED3", 205, 55, 0), + ("ORANGERED4", 139, 37, 0), + ("ORCHID", 218, 112, 214), + ("ORCHID1", 255, 131, 250), + ("ORCHID2", 238, 122, 233), + ("ORCHID3", 205, 105, 201), + ("ORCHID4", 139, 71, 137), + ("PALEGOLDENROD", 238, 232, 170), + ("PALEGREEN", 152, 251, 152), + ("PALEGREEN1", 154, 255, 154), + ("PALEGREEN2", 144, 238, 144), + ("PALEGREEN3", 124, 205, 124), + ("PALEGREEN4", 84, 139, 84), + ("PALETURQUOISE", 175, 238, 238), + ("PALETURQUOISE1", 187, 255, 255), + ("PALETURQUOISE2", 174, 238, 238), + ("PALETURQUOISE3", 150, 205, 205), + ("PALETURQUOISE4", 102, 139, 139), + ("PALEVIOLETRED", 219, 112, 147), + ("PALEVIOLETRED1", 255, 130, 171), + ("PALEVIOLETRED2", 238, 121, 159), + ("PALEVIOLETRED3", 205, 104, 137), + ("PALEVIOLETRED4", 139, 71, 93), + ("PAPAYAWHIP", 255, 239, 213), + ("PEACHPUFF", 255, 218, 185), + ("PEACHPUFF1", 255, 218, 185), + ("PEACHPUFF2", 238, 203, 173), + ("PEACHPUFF3", 205, 175, 149), + ("PEACHPUFF4", 139, 119, 101), + ("PERU", 205, 133, 63), + ("PINK", 255, 192, 203), + ("PINK1", 255, 181, 197), + ("PINK2", 238, 169, 184), + ("PINK3", 205, 145, 158), + ("PINK4", 139, 99, 108), + ("PLUM", 221, 160, 221), + ("PLUM1", 255, 187, 255), + ("PLUM2", 238, 174, 238), + ("PLUM3", 205, 150, 205), + ("PLUM4", 139, 102, 139), + ("POWDERBLUE", 176, 224, 230), + ("PURPLE", 160, 32, 240), + ("PURPLE1", 155, 48, 255), + ("PURPLE2", 145, 44, 238), + ("PURPLE3", 125, 38, 205), + ("PURPLE4", 85, 26, 139), + ("PY_COLOR", 240, 255, 210), + ("RED", 255, 0, 0), + ("RED1", 255, 0, 0), + ("RED2", 238, 0, 0), + ("RED3", 205, 0, 0), + ("RED4", 139, 0, 0), + ("ROSYBROWN", 188, 143, 143), + ("ROSYBROWN1", 255, 193, 193), + ("ROSYBROWN2", 238, 180, 180), + ("ROSYBROWN3", 205, 155, 155), + ("ROSYBROWN4", 139, 105, 105), + ("ROYALBLUE", 65, 105, 225), + ("ROYALBLUE1", 72, 118, 255), + ("ROYALBLUE2", 67, 110, 238), + ("ROYALBLUE3", 58, 95, 205), + ("ROYALBLUE4", 39, 64, 139), + ("SADDLEBROWN", 139, 69, 19), + ("SALMON", 250, 128, 114), + ("SALMON1", 255, 140, 105), + ("SALMON2", 238, 130, 98), + ("SALMON3", 205, 112, 84), + ("SALMON4", 139, 76, 57), + ("SANDYBROWN", 244, 164, 96), + ("SEAGREEN", 46, 139, 87), + ("SEAGREEN1", 84, 255, 159), + ("SEAGREEN2", 78, 238, 148), + ("SEAGREEN3", 67, 205, 128), + ("SEAGREEN4", 46, 139, 87), + ("SEASHELL", 255, 245, 238), + ("SEASHELL1", 255, 245, 238), + ("SEASHELL2", 238, 229, 222), + ("SEASHELL3", 205, 197, 191), + ("SEASHELL4", 139, 134, 130), + ("SIENNA", 160, 82, 45), + ("SIENNA1", 255, 130, 71), + ("SIENNA2", 238, 121, 66), + ("SIENNA3", 205, 104, 57), + ("SIENNA4", 139, 71, 38), + ("SILVER", 192, 192, 192), + ("SKYBLUE", 135, 206, 235), + ("SKYBLUE1", 135, 206, 255), + ("SKYBLUE2", 126, 192, 238), + ("SKYBLUE3", 108, 166, 205), + ("SKYBLUE4", 74, 112, 139), + ("SLATEBLUE", 106, 90, 205), + ("SLATEBLUE1", 131, 111, 255), + ("SLATEBLUE2", 122, 103, 238), + ("SLATEBLUE3", 105, 89, 205), + ("SLATEBLUE4", 71, 60, 139), + ("SLATEGRAY", 112, 128, 144), + ("SLATEGREY", 112, 128, 144), + ("SNOW", 255, 250, 250), + ("SNOW1", 255, 250, 250), + ("SNOW2", 238, 233, 233), + ("SNOW3", 205, 201, 201), + ("SNOW4", 139, 137, 137), + ("SPRINGGREEN", 0, 255, 127), + ("SPRINGGREEN1", 0, 255, 127), + ("SPRINGGREEN2", 0, 238, 118), + ("SPRINGGREEN3", 0, 205, 102), + ("SPRINGGREEN4", 0, 139, 69), + ("STEELBLUE", 70, 130, 180), + ("STEELBLUE1", 99, 184, 255), + ("STEELBLUE2", 92, 172, 238), + ("STEELBLUE3", 79, 148, 205), + ("STEELBLUE4", 54, 100, 139), + ("TAN", 210, 180, 140), + ("TAN1", 255, 165, 79), + ("TAN2", 238, 154, 73), + ("TAN3", 205, 133, 63), + ("TAN4", 139, 90, 43), + ("TEAL", 0, 128, 128), + ("THISTLE", 216, 191, 216), + ("THISTLE1", 255, 225, 255), + ("THISTLE2", 238, 210, 238), + ("THISTLE3", 205, 181, 205), + ("THISTLE4", 139, 123, 139), + ("TOMATO", 255, 99, 71), + ("TOMATO1", 255, 99, 71), + ("TOMATO2", 238, 92, 66), + ("TOMATO3", 205, 79, 57), + ("TOMATO4", 139, 54, 38), + ("TURQUOISE", 64, 224, 208), + ("TURQUOISE1", 0, 245, 255), + ("TURQUOISE2", 0, 229, 238), + ("TURQUOISE3", 0, 197, 205), + ("TURQUOISE4", 0, 134, 139), + ("VIOLET", 238, 130, 238), + ("VIOLETRED", 208, 32, 144), + ("VIOLETRED1", 255, 62, 150), + ("VIOLETRED2", 238, 58, 140), + ("VIOLETRED3", 205, 50, 120), + ("VIOLETRED4", 139, 34, 82), + ("WHEAT", 245, 222, 179), + ("WHEAT1", 255, 231, 186), + ("WHEAT2", 238, 216, 174), + ("WHEAT3", 205, 186, 150), + ("WHEAT4", 139, 126, 102), + ("WHITE", 255, 255, 255), + ("WHITESMOKE", 245, 245, 245), + ("YELLOW", 255, 255, 0), + ("YELLOW1", 255, 255, 0), + ("YELLOW2", 238, 238, 0), + ("YELLOW3", 205, 205, 0), + ("YELLOW4", 139, 139, 0), + ("YELLOWGREEN", 154, 205, 50), + ] diff --git a/src/extra.i b/src/extra.i new file mode 100644 index 000000000..8e4e75efb --- /dev/null +++ b/src/extra.i @@ -0,0 +1,4522 @@ +%pythoncode %{ +# pylint: disable=all +%} + +%begin +%{ +#define SWIG_PYTHON_INTERPRETER_NO_DEBUG + +/* This seems to be necessary on some Windows machines with Py_LIMITED_API, +otherwise compilation can fail because free() and malloc() are not declared. */ +#include +%} + +%init +%{ + /* Initialise some globals that require Python functions. + + [Prior to 2023-08-18 we initialised these global variables inline, + but this causes a SEGV on Windows with Python-3.10 for `dictkey_c` + (actually any string of length 1 failed).] */ + + dictkey_align = PyUnicode_InternFromString("align"); + dictkey_ascender = PyUnicode_InternFromString("ascender"); + dictkey_bidi = PyUnicode_InternFromString("bidi"); + dictkey_bbox = PyUnicode_InternFromString("bbox"); + dictkey_blocks = PyUnicode_InternFromString("blocks"); + dictkey_bpc = PyUnicode_InternFromString("bpc"); + dictkey_c = PyUnicode_InternFromString("c"); + dictkey_chars = PyUnicode_InternFromString("chars"); + dictkey_color = PyUnicode_InternFromString("color"); + dictkey_colorspace = PyUnicode_InternFromString("colorspace"); + dictkey_content = PyUnicode_InternFromString("content"); + dictkey_creationDate = PyUnicode_InternFromString("creationDate"); + dictkey_cs_name = PyUnicode_InternFromString("cs-name"); + dictkey_da = PyUnicode_InternFromString("da"); + dictkey_dashes = PyUnicode_InternFromString("dashes"); + dictkey_desc = PyUnicode_InternFromString("descender"); + dictkey_descender = PyUnicode_InternFromString("descender"); + dictkey_dir = PyUnicode_InternFromString("dir"); + dictkey_effect = PyUnicode_InternFromString("effect"); + dictkey_ext = PyUnicode_InternFromString("ext"); + dictkey_filename = PyUnicode_InternFromString("filename"); + dictkey_fill = PyUnicode_InternFromString("fill"); + dictkey_flags = PyUnicode_InternFromString("flags"); + dictkey_char_flags = PyUnicode_InternFromString("char_flags"); /* Only used with mupdf >= 1.25.2. */ + dictkey_font = PyUnicode_InternFromString("font"); + dictkey_glyph = PyUnicode_InternFromString("glyph"); + dictkey_height = PyUnicode_InternFromString("height"); + dictkey_id = PyUnicode_InternFromString("id"); + dictkey_image = PyUnicode_InternFromString("image"); + dictkey_items = PyUnicode_InternFromString("items"); + dictkey_length = PyUnicode_InternFromString("length"); + dictkey_lines = PyUnicode_InternFromString("lines"); + dictkey_matrix = PyUnicode_InternFromString("transform"); + dictkey_modDate = PyUnicode_InternFromString("modDate"); + dictkey_name = PyUnicode_InternFromString("name"); + dictkey_number = PyUnicode_InternFromString("number"); + dictkey_origin = PyUnicode_InternFromString("origin"); + dictkey_rect = PyUnicode_InternFromString("rect"); + dictkey_size = PyUnicode_InternFromString("size"); + dictkey_smask = PyUnicode_InternFromString("smask"); + dictkey_spans = PyUnicode_InternFromString("spans"); + dictkey_stroke = PyUnicode_InternFromString("stroke"); + dictkey_style = PyUnicode_InternFromString("style"); + dictkey_subject = PyUnicode_InternFromString("subject"); + dictkey_text = PyUnicode_InternFromString("text"); + dictkey_title = PyUnicode_InternFromString("title"); + dictkey_type = PyUnicode_InternFromString("type"); + dictkey_ufilename = PyUnicode_InternFromString("ufilename"); + dictkey_width = PyUnicode_InternFromString("width"); + dictkey_wmode = PyUnicode_InternFromString("wmode"); + dictkey_xref = PyUnicode_InternFromString("xref"); + dictkey_xres = PyUnicode_InternFromString("xres"); + dictkey_yres = PyUnicode_InternFromString("yres"); +%} + +%include std_string.i + +%include exception.i +%exception { + try { + $action + } + +/* this might not be ok on windows. +catch (Swig::DirectorException &e) { + SWIG_fail; +}*/ +catch(std::exception& e) { + SWIG_exception(SWIG_RuntimeError, e.what()); +} +catch(...) { + SWIG_exception(SWIG_RuntimeError, "Unknown exception"); + } +} + +%{ +#include "mupdf/classes2.h" +#include "mupdf/exceptions.h" +#include "mupdf/internal.h" + +#include +#include + + +#define MAKE_MUPDF_VERSION_INT(major, minor, patch) ((major << 16) + (minor << 8) + (patch << 0)) + +#define MUPDF_VERSION_INT MAKE_MUPDF_VERSION_INT(FZ_VERSION_MAJOR, FZ_VERSION_MINOR, FZ_VERSION_PATCH) + +#define MUPDF_VERSION_GE(major, minor, patch) \ + MUPDF_VERSION_INT >= MAKE_MUPDF_VERSION_INT(major, minor, patch) + +/* Define a wrapper for PDF_NAME that returns a mupdf::PdfObj instead of a +pdf_obj*. This avoids implicit construction of a mupdf::PdfObj, which is +deliberately prohibited (with `explicit` on constructors) by recent MuPDF. */ +#define PDF_NAME2(X) mupdf::PdfObj(PDF_NAME(X)) + +/* Returns equivalent of `repr(x)`. */ +static std::string repr(PyObject* x) +{ + PyObject* repr = PyObject_Repr(x); + PyObject* repr_str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~"); + #ifdef Py_LIMITED_API + const char* repr_str_s = PyBytes_AsString(repr_str); + #else + const char* repr_str_s = PyBytes_AS_STRING(repr_str); + #endif + std::string ret = repr_str_s; + Py_DECREF(repr_str); + Py_DECREF(repr); + return ret; +} + +#ifdef Py_LIMITED_API + static PyObject* PySequence_ITEM(PyObject* o, Py_ssize_t i) + { + return PySequence_GetItem(o, i); + } + + static const char* PyUnicode_AsUTF8(PyObject* o) + { + static PyObject* string = nullptr; + Py_XDECREF(string); + string = PyUnicode_AsUTF8String(o); + return PyBytes_AsString(string); + } +#endif + + +/* These are also in pymupdf/__init__.py. */ +const char MSG_BAD_ANNOT_TYPE[] = "bad annot type"; +const char MSG_BAD_APN[] = "bad or missing annot AP/N"; +const char MSG_BAD_ARG_INK_ANNOT[] = "arg must be seq of seq of float pairs"; +const char MSG_BAD_ARG_POINTS[] = "bad seq of points"; +const char MSG_BAD_BUFFER[] = "bad type: 'buffer'"; +const char MSG_BAD_COLOR_SEQ[] = "bad color sequence"; +const char MSG_BAD_DOCUMENT[] = "cannot open broken document"; +const char MSG_BAD_FILETYPE[] = "bad filetype"; +const char MSG_BAD_LOCATION[] = "bad location"; +const char MSG_BAD_OC_CONFIG[] = "bad config number"; +const char MSG_BAD_OC_LAYER[] = "bad layer number"; +const char MSG_BAD_OC_REF[] = "bad 'oc' reference"; +const char MSG_BAD_PAGEID[] = "bad page id"; +const char MSG_BAD_PAGENO[] = "bad page number(s)"; +const char MSG_BAD_PDFROOT[] = "PDF has no root"; +const char MSG_BAD_RECT[] = "rect is infinite or empty"; +const char MSG_BAD_TEXT[] = "bad type: 'text'"; +const char MSG_BAD_XREF[] = "bad xref"; +const char MSG_COLOR_COUNT_FAILED[] = "color count failed"; +const char MSG_FILE_OR_BUFFER[] = "need font file or buffer"; +const char MSG_FONT_FAILED[] = "cannot create font"; +const char MSG_IS_NO_ANNOT[] = "is no annotation"; +const char MSG_IS_NO_IMAGE[] = "is no image"; +const char MSG_IS_NO_PDF[] = "is no PDF"; +const char MSG_IS_NO_DICT[] = "object is no PDF dict"; +const char MSG_PIX_NOALPHA[] = "source pixmap has no alpha"; +const char MSG_PIXEL_OUTSIDE[] = "pixel(s) outside image"; + +#define JM_BOOL(x) PyBool_FromLong((long) (x)) + +static PyObject *JM_UnicodeFromStr(const char *c); + + +#ifdef _WIN32 + +/* These functions are not provided on Windows. */ + +int vasprintf(char** str, const char* fmt, va_list ap) +{ + va_list ap2; + + va_copy(ap2, ap); + int len = vsnprintf(nullptr, 0, fmt, ap2); + va_end(ap2); + + char* buffer = (char*) malloc(len + 1); + if (!buffer) + { + *str = nullptr; + return -1; + } + va_copy(ap2, ap); + int len2 = vsnprintf(buffer, len + 1, fmt, ap2); + va_end(ap2); + assert(len2 == len); + *str = buffer; + return len; +} + +int asprintf(char** str, const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + int ret = vasprintf(str, fmt, ap); + va_end(ap); + + return ret; +} +#endif + + +static void messagev(const char* format, va_list va) +{ + static PyObject* pymupdf_module = PyImport_ImportModule("pymupdf"); + static PyObject* message_fn = PyObject_GetAttrString(pymupdf_module, "message"); + char* text; + vasprintf(&text, format, va); + PyObject* text_py = PyString_FromString(text); + PyObject* args = PyTuple_Pack(1, text_py); + PyObject* ret = PyObject_CallObject(message_fn, args); + Py_XDECREF(ret); + Py_XDECREF(args); + Py_XDECREF(text_py); + free(text); +} + +static void messagef(const char* format, ...) +{ + va_list args; + va_start(args, format); + messagev(format, args); + va_end(args); +} + +PyObject* JM_EscapeStrFromStr(const char* c) +{ + if (!c) return PyUnicode_FromString(""); + PyObject* val = PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace"); + if (!val) + { + val = PyUnicode_FromString(""); + PyErr_Clear(); + } + return val; +} + +PyObject* JM_EscapeStrFromBuffer(fz_buffer* buff) +{ + if (!buff) return PyUnicode_FromString(""); + unsigned char* s = nullptr; + size_t len = mupdf::ll_fz_buffer_storage(buff, &s); + PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace"); + if (!val) + { + val = PyUnicode_FromString(""); + PyErr_Clear(); + } + return val; +} + +//---------------------------------------------------------------------------- +// Deep-copies a source page to the target. +// Modified version of function of pdfmerge.c: we also copy annotations, but +// we skip some subtypes. In addition we rotate output. +//---------------------------------------------------------------------------- +static void page_merge( + mupdf::PdfDocument& doc_des, + mupdf::PdfDocument& doc_src, + int page_from, + int page_to, + int rotate, + int links, + int copy_annots, + mupdf::PdfGraftMap& graft_map + ) +{ + // list of object types (per page) we want to copy + + /* Fixme: on linux these get destructed /after/ + mupdf/platform/c++/implementation/internal.cpp:s_thread_state, which causes + problems - s_thread_state::m_ctx will have been freed. We have a hack + that sets s_thread_state::m_ctx when destructed, so it mostly works when + s_thread_state.get_context() is called after destruction, but this causes + memento leaks and is clearly incorrect. + + Perhaps we could use pdf_obj* known_page_objs[] = {...} and create PdfObj + wrappers as used - this would avoid any cleanup at exit. And it's a general + solution to problem of ordering of cleanup of globals. + */ + static pdf_obj* known_page_objs[] = { + PDF_NAME(Contents), + PDF_NAME(Resources), + PDF_NAME(MediaBox), + PDF_NAME(CropBox), + PDF_NAME(BleedBox), + PDF_NAME(TrimBox), + PDF_NAME(ArtBox), + PDF_NAME(Rotate), + PDF_NAME(UserUnit) + }; + int known_page_objs_num = sizeof(known_page_objs) / sizeof(known_page_objs[0]); + mupdf::PdfObj page_ref = mupdf::pdf_lookup_page_obj(doc_src, page_from); + + // make new page dict in dest doc + mupdf::PdfObj page_dict = mupdf::pdf_new_dict(doc_des, 4); + mupdf::pdf_dict_put(page_dict, PDF_NAME2(Type), PDF_NAME2(Page)); + + for (int i = 0; i < known_page_objs_num; ++i) + { + mupdf::PdfObj known_page_obj(known_page_objs[i]); + mupdf::PdfObj obj = mupdf::pdf_dict_get_inheritable(page_ref, known_page_obj); + if (obj.m_internal) + { + mupdf::pdf_dict_put( + page_dict, + known_page_obj, + mupdf::pdf_graft_mapped_object(graft_map, obj) + ); + } + } + + // Copy annotations, but skip Link, Popup, IRT, Widget types + // If selected, remove dict keys P (parent) and Popup + if (copy_annots) + { + mupdf::PdfObj old_annots = mupdf::pdf_dict_get(page_ref, PDF_NAME2(Annots)); + int n = mupdf::pdf_array_len(old_annots); + if (n > 0) + { + mupdf::PdfObj new_annots = mupdf::pdf_dict_put_array(page_dict, PDF_NAME2(Annots), n); + for (int i = 0; i < n; i++) + { + mupdf::PdfObj o = mupdf::pdf_array_get(old_annots, i); + if (!o.m_internal || !mupdf::pdf_is_dict(o)) // skip non-dict items + { + continue; // skip invalid/null/non-dict items + } + if (mupdf::pdf_dict_get(o, PDF_NAME2(IRT)).m_internal) continue; + mupdf::PdfObj subtype = mupdf::pdf_dict_get(o, PDF_NAME2(Subtype)); + if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Link))) continue; + if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Popup))) continue; + if (mupdf::pdf_name_eq(subtype, PDF_NAME2(Widget))) continue; + mupdf::pdf_dict_del(o, PDF_NAME2(Popup)); + mupdf::pdf_dict_del(o, PDF_NAME2(P)); + mupdf::PdfObj copy_o = mupdf::pdf_graft_mapped_object(graft_map, o); + mupdf::PdfObj annot = mupdf::pdf_new_indirect( + doc_des, + mupdf::pdf_to_num(copy_o), + 0 + ); + mupdf::pdf_array_push(new_annots, annot); + } + } + } + // rotate the page + if (rotate != -1) + { + mupdf::pdf_dict_put_int(page_dict, PDF_NAME2(Rotate), rotate); + } + // Now add the page dictionary to dest PDF + mupdf::PdfObj ref = mupdf::pdf_add_object(doc_des, page_dict); + + // Insert new page at specified location + mupdf::pdf_insert_page(doc_des, page_to, ref); +} + +//----------------------------------------------------------------------------- +// Copy a range of pages (spage, epage) from a source PDF to a specified +// location (apage) of the target PDF. +// If spage > epage, the sequence of source pages is reversed. +//----------------------------------------------------------------------------- +static void JM_merge_range( + mupdf::PdfDocument& doc_des, + mupdf::PdfDocument& doc_src, + int spage, + int epage, + int apage, + int rotate, + int links, + int annots, + int show_progress, + mupdf::PdfGraftMap& graft_map + ) +{ + int afterpage = apage; + int counter = 0; // copied pages counter + int total = mupdf::ll_fz_absi(epage - spage) + 1; // total pages to copy + + if (spage < epage) + { + for (int page = spage; page <= epage; page++, afterpage++) + { + page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map); + counter++; + if (show_progress > 0 && counter % show_progress == 0) + { + messagef("Inserted %i of %i pages.", counter, total); + } + } + } + else + { + for (int page = spage; page >= epage; page--, afterpage++) + { + page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map); + counter++; + if (show_progress > 0 && counter % show_progress == 0) + { + messagef("Inserted %i of %i pages.", counter, total); + } + } + } +} + +static bool JM_have_operation(mupdf::PdfDocument& pdf) +{ + // Ensure valid journalling state + if (pdf.m_internal->journal and !mupdf::pdf_undoredo_step(pdf, 0)) + { + return 0; + } + return 1; +} + +static void JM_ensure_operation(mupdf::PdfDocument& pdf) +{ + if (!JM_have_operation(pdf)) + { + throw std::runtime_error("No journalling operation started"); + } +} + + +static void FzDocument_insert_pdf( + mupdf::FzDocument& doc, + mupdf::FzDocument& src, + int from_page, + int to_page, + int start_at, + int rotate, + int links, + int annots, + int show_progress, + int final, + mupdf::PdfGraftMap& graft_map + ) +{ + //std::cerr << __FILE__ << ":" << __LINE__ << ":" << __FUNCTION__ << "\n"; + mupdf::PdfDocument pdfout = mupdf::pdf_specifics(doc); + mupdf::PdfDocument pdfsrc = mupdf::pdf_specifics(src); + int outCount = mupdf::fz_count_pages(doc); + int srcCount = mupdf::fz_count_pages(src); + + // local copies of page numbers + int fp = from_page; + int tp = to_page; + int sa = start_at; + + // normalize page numbers + fp = std::max(fp, 0); // -1 = first page + fp = std::min(fp, srcCount - 1); // but do not exceed last page + + if (tp < 0) tp = srcCount - 1; // -1 = last page + tp = std::min(tp, srcCount - 1); // but do not exceed last page + + if (sa < 0) sa = outCount; // -1 = behind last page + sa = std::min(sa, outCount); // but that is also the limit + + if (!pdfout.m_internal || !pdfsrc.m_internal) + { + throw std::runtime_error("source or target not a PDF"); + } + JM_ensure_operation(pdfout); + JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, graft_map); +} + +static int page_xref(mupdf::FzDocument& this_doc, int pno) +{ + int page_count = mupdf::fz_count_pages(this_doc); + int n = pno; + while (n < 0) + { + n += page_count; + } + mupdf::PdfDocument pdf = mupdf::pdf_specifics(this_doc); + assert(pdf.m_internal); + int xref = 0; + if (n >= page_count) + { + throw std::runtime_error(MSG_BAD_PAGENO);//, PyExc_ValueError); + } + xref = mupdf::pdf_to_num(mupdf::pdf_lookup_page_obj(pdf, n)); + return xref; +} + +static void _newPage(mupdf::PdfDocument& pdf, int pno=-1, float width=595, float height=842) +{ + if (!pdf.m_internal) + { + throw std::runtime_error("is no PDF"); + } + mupdf::FzRect mediabox(0, 0, width, height); + if (pno < -1) + { + throw std::runtime_error("bad page number(s)"); // Should somehow be Python ValueError + } + JM_ensure_operation(pdf); + // create /Resources and /Contents objects + mupdf::PdfObj resources = mupdf::pdf_add_new_dict(pdf, 1); + mupdf::FzBuffer contents; + mupdf::PdfObj page_obj = mupdf::pdf_add_page(pdf, mediabox, 0, resources, contents); + mupdf::pdf_insert_page(pdf, pno, page_obj); +} + +static void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842) +{ + mupdf::PdfDocument pdf = mupdf::pdf_specifics(self); + _newPage(pdf, pno, width, height); +} + + +//------------------------------------------------------------------------ +// return the annotation names (list of /NM entries) +//------------------------------------------------------------------------ +static std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page) +{ + std::vector< std::string> names; + mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)); + if (!annots.m_internal) return names; + int n = mupdf::pdf_array_len(annots); + for (int i = 0; i < n; i++) + { + mupdf::PdfObj annot_obj = mupdf::pdf_array_get(annots, i); + mupdf::PdfObj name = mupdf::pdf_dict_gets(annot_obj, "NM"); + if (name.m_internal) + { + names.push_back(mupdf::pdf_to_text_string(name)); + } + } + return names; +} + + +//------------------------------------------------------------------------ +// Add a unique /NM key to an annotation or widget. +// Append a number to 'stem' such that the result is a unique name. +//------------------------------------------------------------------------ +static void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem) +{ + mupdf::PdfPage page = mupdf::pdf_annot_page(annot); + mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot); + std::vector< std::string> names = JM_get_annot_id_list(page); + char* stem_id = nullptr; + for (int i=0; ; ++i) + { + free(stem_id); + asprintf(&stem_id, "fitz-%s%d", stem, i); + if (std::find(names.begin(), names.end(), stem_id) == names.end()) + { + break; + } + } + mupdf::PdfObj name = mupdf::pdf_new_string(stem_id, strlen(stem_id)); + free(stem_id); + mupdf::pdf_dict_puts(annot_obj, "NM", name); + page.m_internal->doc->resynth_required = 0; +} + +//---------------------------------------------------------------- +// page add_caret_annot +//---------------------------------------------------------------- +static mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& page, mupdf::FzPoint& point) +{ + mupdf::PdfAnnot annot = mupdf::pdf_create_annot(page, ::PDF_ANNOT_CARET); + mupdf::FzPoint p = point; + mupdf::FzRect r = mupdf::pdf_annot_rect(annot); + r = mupdf::fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0); + mupdf::pdf_set_annot_rect(annot, r); + mupdf::pdf_update_annot(annot); + JM_add_annot_id(annot, "A"); + return annot; +} + +static mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& page, mupdf::FzPoint& point) +{ + mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page); + return _add_caret_annot(pdf_page, point); +} + +static const char* Tools_parse_da(mupdf::PdfAnnot& this_annot) +{ + const char* da_str = nullptr; + mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot); + mupdf::PdfDocument pdf = mupdf::pdf_get_bound_document(this_annot_obj); + try + { + mupdf::PdfObj da = mupdf::pdf_dict_get_inheritable(this_annot_obj, PDF_NAME2(DA)); + if (!da.m_internal) + { + mupdf::PdfObj trailer = mupdf::pdf_trailer(pdf); + da = mupdf::pdf_dict_getl( + &trailer, + PDF_NAME(Root), + PDF_NAME(AcroForm), + PDF_NAME(DA), + nullptr + ); + } + da_str = mupdf::pdf_to_text_string(da); + } + catch (std::exception&) + { + return nullptr; + } + return da_str; +} + +//---------------------------------------------------------------------------- +// Turn fz_buffer into a Python bytes object +//---------------------------------------------------------------------------- +static PyObject* JM_BinFromBuffer(fz_buffer* buffer) +{ + if (!buffer) + { + return PyBytes_FromStringAndSize("", 0); + } + unsigned char* c = nullptr; + size_t len = mupdf::ll_fz_buffer_storage(buffer, &c); + return PyBytes_FromStringAndSize((const char*) c, len); +} +static PyObject* JM_BinFromBuffer(mupdf::FzBuffer& buffer) +{ + return JM_BinFromBuffer( buffer.m_internal); +} + +static PyObject* Annot_getAP(mupdf::PdfAnnot& annot) +{ + mupdf::PdfObj annot_obj = mupdf::pdf_annot_obj(annot); + mupdf::PdfObj ap = mupdf::pdf_dict_getl( + &annot_obj, + PDF_NAME(AP), + PDF_NAME(N), + nullptr + ); + if (mupdf::pdf_is_stream(ap)) + { + mupdf::FzBuffer res = mupdf::pdf_load_stream(ap); + return JM_BinFromBuffer(res); + } + return PyBytes_FromStringAndSize("", 0); +} + +void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str) +{ + mupdf::PdfObj this_annot_obj = mupdf::pdf_annot_obj(this_annot); + mupdf::pdf_dict_put_text_string(this_annot_obj, PDF_NAME2(DA), da_str); + mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(DS)); /* not supported */ + mupdf::pdf_dict_del(this_annot_obj, PDF_NAME2(RC)); /* not supported */ +} + +static int +jm_float_item(PyObject* obj, Py_ssize_t idx, double* result) +{ + PyObject* temp = PySequence_ITEM(obj, idx); + if (!temp) return 1; + *result = PyFloat_AsDouble(temp); + Py_DECREF(temp); + if (PyErr_Occurred()) + { + PyErr_Clear(); + return 1; + } + return 0; +} + + +static mupdf::FzPoint JM_point_from_py(PyObject* p) +{ + fz_point p0 = fz_make_point(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT); + if (!p || !PySequence_Check(p) || PySequence_Size(p) != 2) + { + return p0; + } + double x; + double y; + if (jm_float_item(p, 0, &x) == 1) return p0; + if (jm_float_item(p, 1, &y) == 1) return p0; + if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT; + if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT; + if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT; + if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT; + + return fz_make_point(x, y); +} + +static int s_list_append_drop(PyObject* list, PyObject* item) +{ + if (!list || !PyList_Check(list) || !item) + { + return -2; + } + int rc = PyList_Append(list, item); + Py_DECREF(item); + return rc; +} + +static int LIST_APPEND_DROP(PyObject *list, PyObject *item) +{ + if (!list || !PyList_Check(list) || !item) return -2; + int rc = PyList_Append(list, item); + Py_DECREF(item); + return rc; +} + +static int LIST_APPEND(PyObject *list, PyObject *item) +{ + if (!list || !PyList_Check(list) || !item) return -2; + int rc = PyList_Append(list, item); + return rc; +} + +static int DICT_SETITEM_DROP(PyObject *dict, PyObject *key, PyObject *value) +{ + if (!dict || !PyDict_Check(dict) || !key || !value) return -2; + int rc = PyDict_SetItem(dict, key, value); + Py_DECREF(value); + return rc; +} + +static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value) +{ + if (!dict || !PyDict_Check(dict) || !key || !value) return -2; + int rc = PyDict_SetItemString(dict, key, value); + Py_DECREF(value); + return rc; +} + + +//----------------------------------------------------------------------------- +// Functions converting between PySequences and pymupdf geometry objects +//----------------------------------------------------------------------------- +static int +jm_init_item(PyObject* obj, Py_ssize_t idx, int* result) +{ + PyObject* temp = PySequence_ITEM(obj, idx); + if (!temp) + { + return 1; + } + if (PyLong_Check(temp)) + { + *result = (int) PyLong_AsLong(temp); + Py_DECREF(temp); + } + else if (PyFloat_Check(temp)) + { + *result = (int) PyFloat_AsDouble(temp); + Py_DECREF(temp); + } + else + { + Py_DECREF(temp); + return 1; + } + if (PyErr_Occurred()) + { + PyErr_Clear(); + return 1; + } + return 0; +} + +// TODO: ------------------------------------------------------------------ +// This is a temporary solution and should be replaced by a C++ extension: +// There is no way in Python specify an array of fz_point - as is required +// for function pdf_set_annot_callout_line(). +static void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count) +{ + fz_point points[3]; + mupdf::FzPoint p; + for (int i = 0; i < count; i++) + { + p = JM_point_from_py(PyTuple_GetItem(callout, (Py_ssize_t) i)); + points[i] = fz_make_point(p.x, p.y); + } + mupdf::pdf_set_annot_callout_line(annot, points, count); +} + + +//---------------------------------------------------------------------------- +// Return list of outline xref numbers. Recursive function. Arguments: +// 'obj' first OL item +// 'xrefs' empty Python list +//---------------------------------------------------------------------------- +static PyObject* JM_outline_xrefs(mupdf::PdfObj obj, PyObject* xrefs) +{ + if (!obj.m_internal) + { + return xrefs; + } + PyObject* newxref = nullptr; + mupdf::PdfObj thisobj = obj; + while (thisobj.m_internal) + { + int nxr = mupdf::pdf_to_num(thisobj); + newxref = PyLong_FromLong((long) nxr); + if (PySequence_Contains(xrefs, newxref) + or mupdf::pdf_dict_get(thisobj, PDF_NAME2(Type)).m_internal + ) + { + // circular ref or top of chain: terminate + Py_DECREF(newxref); + break; + } + s_list_append_drop(xrefs, newxref); + mupdf::PdfObj first = mupdf::pdf_dict_get(thisobj, PDF_NAME2(First)); // try go down + if (mupdf::pdf_is_dict(first)) + { + xrefs = JM_outline_xrefs(first, xrefs); + } + thisobj = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Next)); // try go next + mupdf::PdfObj parent = mupdf::pdf_dict_get(thisobj, PDF_NAME2(Parent)); // get parent + if (!mupdf::pdf_is_dict(thisobj)) + { + thisobj = parent; + } + } + return xrefs; +} + + +PyObject* dictkey_align = NULL; +PyObject* dictkey_ascender = NULL; +PyObject* dictkey_bidi = NULL; +PyObject* dictkey_bbox = NULL; +PyObject* dictkey_blocks = NULL; +PyObject* dictkey_bpc = NULL; +PyObject* dictkey_c = NULL; +PyObject* dictkey_chars = NULL; +PyObject* dictkey_color = NULL; +PyObject* dictkey_colorspace = NULL; +PyObject* dictkey_content = NULL; +PyObject* dictkey_creationDate = NULL; +PyObject* dictkey_cs_name = NULL; +PyObject* dictkey_da = NULL; +PyObject* dictkey_dashes = NULL; +PyObject* dictkey_desc = NULL; +PyObject* dictkey_descender = NULL; +PyObject* dictkey_dir = NULL; +PyObject* dictkey_effect = NULL; +PyObject* dictkey_ext = NULL; +PyObject* dictkey_filename = NULL; +PyObject* dictkey_fill = NULL; +PyObject* dictkey_flags = NULL; +PyObject* dictkey_char_bidi = NULL; +PyObject* dictkey_char_flags = NULL; +PyObject* dictkey_font = NULL; +PyObject* dictkey_glyph = NULL; +PyObject* dictkey_height = NULL; +PyObject* dictkey_id = NULL; +PyObject* dictkey_image = NULL; +PyObject* dictkey_items = NULL; +PyObject* dictkey_length = NULL; +PyObject* dictkey_lines = NULL; +PyObject* dictkey_matrix = NULL; +PyObject* dictkey_modDate = NULL; +PyObject* dictkey_name = NULL; +PyObject* dictkey_number = NULL; +PyObject* dictkey_origin = NULL; +PyObject* dictkey_rect = NULL; +PyObject* dictkey_size = NULL; +PyObject* dictkey_smask = NULL; +PyObject* dictkey_spans = NULL; +PyObject* dictkey_stroke = NULL; +PyObject* dictkey_style = NULL; +PyObject* dictkey_subject = NULL; +PyObject* dictkey_text = NULL; +PyObject* dictkey_title = NULL; +PyObject* dictkey_type = NULL; +PyObject* dictkey_ufilename = NULL; +PyObject* dictkey_width = NULL; +PyObject* dictkey_wmode = NULL; +PyObject* dictkey_xref = NULL; +PyObject* dictkey_xres = NULL; +PyObject* dictkey_yres = NULL; + +static int dict_setitem_drop(PyObject* dict, PyObject* key, PyObject* value) +{ + if (!dict || !PyDict_Check(dict) || !key || !value) + { + return -2; + } + int rc = PyDict_SetItem(dict, key, value); + Py_DECREF(value); + return rc; +} + +static int dict_setitemstr_drop(PyObject* dict, const char* key, PyObject* value) +{ + if (!dict || !PyDict_Check(dict) || !key || !value) + { + return -2; + } + int rc = PyDict_SetItemString(dict, key, value); + Py_DECREF(value); + return rc; +} + + +static void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items) +{ + PyObject* item=nullptr; + PyObject* itemdict=nullptr; + PyObject* xrefs=nullptr; + + PyObject* bold = PyUnicode_FromString("bold"); + PyObject* italic = PyUnicode_FromString("italic"); + PyObject* collapse = PyUnicode_FromString("collapse"); + PyObject* zoom = PyUnicode_FromString("zoom"); + + try + { + /* Need to define these things early because later code uses + `goto`; otherwise we get compiler warnings 'jump bypasses variable + initialization' */ + int xref = 0; + mupdf::PdfObj root; + mupdf::PdfObj olroot; + mupdf::PdfObj first; + Py_ssize_t n; + Py_ssize_t m; + + root = mupdf::pdf_dict_get(mupdf::pdf_trailer(pdf), PDF_NAME2(Root)); + if (!root.m_internal) goto end; + + olroot = mupdf::pdf_dict_get(root, PDF_NAME2(Outlines)); + if (!olroot.m_internal) goto end; + + first = mupdf::pdf_dict_get(olroot, PDF_NAME2(First)); + if (!first.m_internal) goto end; + + xrefs = PyList_New(0); // pre-allocate an empty list + xrefs = JM_outline_xrefs(first, xrefs); + n = PySequence_Size(xrefs); + m = PySequence_Size(items); + if (!n) goto end; + + if (n != m) + { + throw std::runtime_error("internal error finding outline xrefs"); + } + + // update all TOC item dictionaries + for (int i = 0; i < n; i++) + { + jm_init_item(xrefs, i, &xref); + item = PySequence_ITEM(items, i); + itemdict = PySequence_ITEM(item, 3); + if (!itemdict || !PyDict_Check(itemdict)) + { + throw std::runtime_error("need non-simple TOC format"); + } + PyDict_SetItem(itemdict, dictkey_xref, PySequence_ITEM(xrefs, i)); + mupdf::PdfObj bm = mupdf::pdf_load_object(pdf, xref); + int flags = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(F))); + if (flags == 1) + { + PyDict_SetItem(itemdict, italic, Py_True); + } + else if (flags == 2) + { + PyDict_SetItem(itemdict, bold, Py_True); + } + else if (flags == 3) + { + PyDict_SetItem(itemdict, italic, Py_True); + PyDict_SetItem(itemdict, bold, Py_True); + } + int count = mupdf::pdf_to_int(mupdf::pdf_dict_get(bm, PDF_NAME2(Count))); + if (count < 0) + { + PyDict_SetItem(itemdict, collapse, Py_True); + } + else if (count > 0) + { + PyDict_SetItem(itemdict, collapse, Py_False); + } + mupdf::PdfObj col = mupdf::pdf_dict_get(bm, PDF_NAME2(C)); + if (mupdf::pdf_is_array(col) && mupdf::pdf_array_len(col) == 3) + { + PyObject* color = PyTuple_New(3); + PyTuple_SET_ITEM(color, 0, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 0)))); + PyTuple_SET_ITEM(color, 1, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 1)))); + PyTuple_SET_ITEM(color, 2, Py_BuildValue("f", mupdf::pdf_to_real(mupdf::pdf_array_get(col, 2)))); + dict_setitem_drop(itemdict, dictkey_color, color); + } + float z=0; + mupdf::PdfObj obj = mupdf::pdf_dict_get(bm, PDF_NAME2(Dest)); + if (!obj.m_internal || !mupdf::pdf_is_array(obj)) + { + obj = mupdf::pdf_dict_getl(&bm, PDF_NAME(A), PDF_NAME(D), nullptr); + } + if (mupdf::pdf_is_array(obj) && mupdf::pdf_array_len(obj) == 5) + { + z = mupdf::pdf_to_real(mupdf::pdf_array_get(obj, 4)); + } + dict_setitem_drop(itemdict, zoom, Py_BuildValue("f", z)); + PyList_SetItem(item, 3, itemdict); + PyList_SetItem(items, i, item); + } + end:; + } + catch (std::exception&) + { + } + Py_CLEAR(xrefs); + Py_CLEAR(bold); + Py_CLEAR(italic); + Py_CLEAR(collapse); + Py_CLEAR(zoom); +} + +static void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items) +{ + mupdf::PdfDocument pdf = mupdf::pdf_document_from_fz_document(document); + return Document_extend_toc_items(pdf, items); +} + +//----------------------------------------------------------------------------- +// PySequence from fz_rect +//----------------------------------------------------------------------------- +static PyObject* JM_py_from_rect(fz_rect r) +{ + return Py_BuildValue("ffff", r.x0, r.y0, r.x1, r.y1); +} +static PyObject* JM_py_from_rect(mupdf::FzRect r) +{ + return JM_py_from_rect(*r.internal()); +} + +//----------------------------------------------------------------------------- +// PySequence from fz_point +//----------------------------------------------------------------------------- +static PyObject* JM_py_from_point(fz_point p) +{ + return Py_BuildValue("ff", p.x, p.y); +} + +//----------------------------------------------------------------------------- +// PySequence from fz_quad. +//----------------------------------------------------------------------------- +static PyObject * +JM_py_from_quad(fz_quad q) +{ + return Py_BuildValue("((f,f),(f,f),(f,f),(f,f))", + q.ul.x, q.ul.y, q.ur.x, q.ur.y, + q.ll.x, q.ll.y, q.lr.x, q.lr.y); +} + +//---------------------------------------------------------------- +// annotation rectangle +//---------------------------------------------------------------- +static mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot) +{ + mupdf::FzRect rect = mupdf::pdf_bound_annot(annot); + return rect; +} + +static PyObject* Annot_rect3(mupdf::PdfAnnot& annot) +{ + fz_rect rect = mupdf::ll_pdf_bound_annot(annot.m_internal); + return JM_py_from_rect(rect); +} + +//----------------------------------------------------------------------------- +// PySequence to fz_rect. Default: infinite rect +//----------------------------------------------------------------------------- +static fz_rect JM_rect_from_py(PyObject* r) +{ + if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4) + { + return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal();// fz_infinite_rect; + } + double f[4]; + for (int i = 0; i < 4; i++) + { + if (jm_float_item(r, i, &f[i]) == 1) + { + return *mupdf::FzRect(mupdf::FzRect::Fixed_INFINITE).internal(); + } + if (f[i] < FZ_MIN_INF_RECT) f[i] = FZ_MIN_INF_RECT; + if (f[i] > FZ_MAX_INF_RECT) f[i] = FZ_MAX_INF_RECT; + } + return mupdf::ll_fz_make_rect( + (float) f[0], + (float) f[1], + (float) f[2], + (float) f[3] + ); +} + +//----------------------------------------------------------------------------- +// PySequence to fz_matrix. Default: fz_identity +//----------------------------------------------------------------------------- +static fz_matrix JM_matrix_from_py(PyObject* m) +{ + double a[6]; + + if (!m || !PySequence_Check(m) || PySequence_Size(m) != 6) + { + return fz_identity; + } + for (int i = 0; i < 6; i++) + { + if (jm_float_item(m, i, &a[i]) == 1) + { + return *mupdf::FzMatrix().internal(); + } + } + return mupdf::ll_fz_make_matrix( + (float) a[0], + (float) a[1], + (float) a[2], + (float) a[3], + (float) a[4], + (float) a[5] + ); +} + +PyObject* util_transform_rect(PyObject* rect, PyObject* matrix) +{ + return JM_py_from_rect( + mupdf::ll_fz_transform_rect( + JM_rect_from_py(rect), + JM_matrix_from_py(matrix) + ) + ); +} + +//---------------------------------------------------------------------------- +// return normalized /Rotate value:one of 0, 90, 180, 270 +//---------------------------------------------------------------------------- +static int JM_norm_rotation(int rotate) +{ + while (rotate < 0) rotate += 360; + while (rotate >= 360) rotate -= 360; + if (rotate % 90 != 0) return 0; + return rotate; +} + + +//---------------------------------------------------------------------------- +// return a PDF page's /Rotate value: one of (0, 90, 180, 270) +//---------------------------------------------------------------------------- +static int JM_page_rotation(mupdf::PdfPage& page) +{ + int rotate = 0; + rotate = mupdf::pdf_to_int( + mupdf::pdf_dict_get_inheritable(page.obj(), PDF_NAME2(Rotate)) + ); + rotate = JM_norm_rotation(rotate); + return rotate; +} + + +//---------------------------------------------------------------------------- +// return a PDF page's MediaBox +//---------------------------------------------------------------------------- +static mupdf::FzRect JM_mediabox(mupdf::PdfObj& page_obj) +{ + mupdf::FzRect mediabox = mupdf::pdf_to_rect( + mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(MediaBox)) + ); + if (mupdf::fz_is_empty_rect(mediabox) || mupdf::fz_is_infinite_rect(mediabox)) + { + mediabox.x0 = 0; + mediabox.y0 = 0; + mediabox.x1 = 612; + mediabox.y1 = 792; + } + mupdf::FzRect page_mediabox; + page_mediabox.x0 = mupdf::fz_min(mediabox.x0, mediabox.x1); + page_mediabox.y0 = mupdf::fz_min(mediabox.y0, mediabox.y1); + page_mediabox.x1 = mupdf::fz_max(mediabox.x0, mediabox.x1); + page_mediabox.y1 = mupdf::fz_max(mediabox.y0, mediabox.y1); + if (0 + || page_mediabox.x1 - page_mediabox.x0 < 1 + || page_mediabox.y1 - page_mediabox.y0 < 1 + ) + { + page_mediabox = *mupdf::FzRect(mupdf::FzRect::Fixed_UNIT).internal(); //fz_unit_rect; + } + return page_mediabox; +} + + +//---------------------------------------------------------------------------- +// return a PDF page's CropBox +//---------------------------------------------------------------------------- +mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj) +{ + mupdf::FzRect mediabox = JM_mediabox(page_obj); + mupdf::FzRect cropbox = mupdf::pdf_to_rect( + mupdf::pdf_dict_get_inheritable(page_obj, PDF_NAME2(CropBox)) + ); + if (mupdf::fz_is_infinite_rect(cropbox) || mupdf::fz_is_empty_rect(cropbox)) + { + cropbox = mediabox; + } + float y0 = mediabox.y1 - cropbox.y1; + float y1 = mediabox.y1 - cropbox.y0; + cropbox.y0 = y0; + cropbox.y1 = y1; + return cropbox; +} + + +//---------------------------------------------------------------------------- +// calculate width and height of the UNROTATED page +//---------------------------------------------------------------------------- +static mupdf::FzPoint JM_cropbox_size(mupdf::PdfObj& page_obj) +{ + mupdf::FzPoint size; + mupdf::FzRect rect = JM_cropbox(page_obj); + float w = (rect.x0 < rect.x1) ? rect.x1 - rect.x0 : rect.x0 - rect.x1; + float h = (rect.y0 < rect.y1) ? rect.y1 - rect.y0 : rect.y0 - rect.y1; + size = fz_make_point(w, h); + return size; +} + + +//---------------------------------------------------------------------------- +// calculate page rotation matrices +//---------------------------------------------------------------------------- +static mupdf::FzMatrix JM_rotate_page_matrix(mupdf::PdfPage& page) +{ + if (!page.m_internal) + { + return *mupdf::FzMatrix().internal(); // no valid pdf page given + } + int rotation = JM_page_rotation(page); + if (rotation == 0) + { + return *mupdf::FzMatrix().internal(); // no rotation + } + auto po = page.obj(); + mupdf::FzPoint cb_size = JM_cropbox_size(po); + float w = cb_size.x; + float h = cb_size.y; + mupdf::FzMatrix m; + if (rotation == 90) + { + m = mupdf::fz_make_matrix(0, 1, -1, 0, h, 0); + } + else if (rotation == 180) + { + m = mupdf::fz_make_matrix(-1, 0, 0, -1, w, h); + } + else + { + m = mupdf::fz_make_matrix(0, -1, 1, 0, 0, w); + } + return m; +} + + +static mupdf::FzMatrix JM_derotate_page_matrix(mupdf::PdfPage& page) +{ // just the inverse of rotation + return mupdf::fz_invert_matrix(JM_rotate_page_matrix(page)); +} + +//----------------------------------------------------------------------------- +// PySequence from fz_matrix +//----------------------------------------------------------------------------- +static PyObject* JM_py_from_matrix(mupdf::FzMatrix m) +{ + return Py_BuildValue("ffffff", m.a, m.b, m.c, m.d, m.e, m.f); +} + +static mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage) +{ + if (!pdfpage.m_internal) + { + return mupdf::FzMatrix(); + } + return JM_derotate_page_matrix(pdfpage); +} + +static mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& page) +{ + mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page); + return Page_derotate_matrix(pdf_page); +} + + +static PyObject *lll_JM_get_annot_xref_list(pdf_obj *page_obj) +{ + fz_context* ctx = mupdf::internal_context_get(); + PyObject *names = PyList_New(0); + pdf_obj *id, *subtype, *annots, *annot_obj; + int xref, type, i, n; + fz_try(ctx) { + annots = pdf_dict_get(ctx, page_obj, PDF_NAME(Annots)); + n = pdf_array_len(ctx, annots); + for (i = 0; i < n; i++) { + annot_obj = pdf_array_get(ctx, annots, i); + xref = pdf_to_num(ctx, annot_obj); + subtype = pdf_dict_get(ctx, annot_obj, PDF_NAME(Subtype)); + if (!subtype) { + continue; // subtype is required + } + type = pdf_annot_type_from_string(ctx, pdf_to_name(ctx, subtype)); + if (type == PDF_ANNOT_UNKNOWN) { + continue; // only accept valid annot types + } + id = pdf_dict_gets(ctx, annot_obj, "NM"); + LIST_APPEND_DROP(names, Py_BuildValue("iis", xref, type, pdf_to_text_string(ctx, id))); + } + } + fz_catch(ctx) { + return names; + } + return names; +} +//------------------------------------------------------------------------ +// return the xrefs and /NM ids of a page's annots, links and fields +//------------------------------------------------------------------------ +static PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj) +{ + PyObject* names = PyList_New(0); + if (!page_obj.m_internal) + { + return names; + } + return lll_JM_get_annot_xref_list( page_obj.m_internal); +} + +static mupdf::FzBuffer JM_object_to_buffer(const mupdf::PdfObj& what, int compress, int ascii) +{ + mupdf::FzBuffer res = mupdf::fz_new_buffer(512); + mupdf::FzOutput out(res); + mupdf::pdf_print_obj(out, what, compress, ascii); + out.fz_close_output(); + mupdf::fz_terminate_buffer(res); + return res; +} + +static PyObject* JM_EscapeStrFromBuffer(mupdf::FzBuffer& buff) +{ + if (!buff.m_internal) + { + return PyUnicode_FromString(""); + } + unsigned char* s = nullptr; + size_t len = mupdf::fz_buffer_storage(buff, &s); + PyObject* val = PyUnicode_DecodeRawUnicodeEscape((const char*) s, (Py_ssize_t) len, "replace"); + if (!val) + { + val = PyUnicode_FromString(""); + PyErr_Clear(); + } + return val; +} + +static PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0) +{ + if (!pdf.m_internal) + { + throw std::runtime_error(MSG_IS_NO_PDF); + } + int xreflen = mupdf::pdf_xref_len(pdf); + if ((xref < 1 || xref >= xreflen) and xref != -1) + { + throw std::runtime_error(MSG_BAD_XREF); + } + mupdf::PdfObj obj = (xref > 0) ? mupdf::pdf_load_object(pdf, xref) : mupdf::pdf_trailer(pdf); + mupdf::FzBuffer res = JM_object_to_buffer(mupdf::pdf_resolve_indirect(obj), compressed, ascii); + PyObject* text = JM_EscapeStrFromBuffer(res); + return text; +} + +static PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0) +{ + mupdf::PdfDocument pdf = mupdf::pdf_document_from_fz_document(document); + return xref_object(pdf, xref, compressed, ascii); +} + + +//------------------------------------- +// fz_output for Python file objects +//------------------------------------- + +static PyObject* Link_is_external(mupdf::FzLink& this_link) +{ + const char* uri = this_link.m_internal->uri; + if (!uri) + { + return PyBool_FromLong(0); + } + bool ret = mupdf::fz_is_external_link(uri); + return PyBool_FromLong((long) ret); +} + +static mupdf::FzLink Link_next(mupdf::FzLink& this_link) +{ + return this_link.next(); +} + + +//----------------------------------------------------------------------------- +// create PDF object from given string +//----------------------------------------------------------------------------- +static pdf_obj *lll_JM_pdf_obj_from_str(fz_context *ctx, pdf_document *doc, const char *src) +{ + pdf_obj *result = NULL; + pdf_lexbuf lexbuf; + fz_stream *stream = fz_open_memory(ctx, (unsigned char *)src, strlen(src)); + + pdf_lexbuf_init(ctx, &lexbuf, PDF_LEXBUF_SMALL); + + fz_try(ctx) { + result = pdf_parse_stm_obj(ctx, doc, stream, &lexbuf); + } + + fz_always(ctx) { + pdf_lexbuf_fin(ctx, &lexbuf); + fz_drop_stream(ctx, stream); + } + + fz_catch(ctx) { + mupdf::internal_throw_exception(ctx); + } + + return result; + +} + +/*********************************************************************/ +// Page._addAnnot_FromString +// Add new links provided as an array of string object definitions. +/*********************************************************************/ +PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist) +{ + PyObject* txtpy = nullptr; + int lcount = (int) PySequence_Size(linklist); // link count + //printf("Page_addAnnot_FromString(): lcount=%i\n", lcount); + if (lcount < 1) + { + Py_RETURN_NONE; + } + try + { + // insert links from the provided sources + if (!page.m_internal) + { + throw std::runtime_error(MSG_IS_NO_PDF); + } + if (!mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)).m_internal) + { + mupdf::pdf_dict_put_array(page.obj(), PDF_NAME2(Annots), lcount); + } + mupdf::PdfObj annots = mupdf::pdf_dict_get(page.obj(), PDF_NAME2(Annots)); + mupdf::PdfDocument doc = page.doc(); + //printf("lcount=%i\n", lcount); + fz_context* ctx = mupdf::internal_context_get(); + for (int i = 0; i < lcount; i++) + { + const char* text = nullptr; + txtpy = PySequence_ITEM(linklist, (Py_ssize_t) i); + text = PyUnicode_AsUTF8(txtpy); + Py_CLEAR(txtpy); + if (!text) + { + messagef("skipping bad link / annot item %i.", i); + continue; + } + try + { + pdf_obj* obj = lll_JM_pdf_obj_from_str(ctx, doc.m_internal, text); + pdf_obj* annot = pdf_add_object_drop( + ctx, + doc.m_internal, + obj + ); + pdf_obj* ind_obj = pdf_new_indirect(ctx, doc.m_internal, pdf_to_num(ctx, annot), 0); + pdf_array_push_drop(ctx, annots.m_internal, ind_obj); + pdf_drop_obj(ctx, annot); + } + catch (std::exception&) + { + messagef("skipping bad link / annot item %i.", i); + } + } + } + catch (std::exception&) + { + PyErr_Clear(); + return nullptr; + } + Py_RETURN_NONE; +} + +PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist) +{ + mupdf::PdfPage pdf_page = mupdf::pdf_page_from_fz_page(page); + return Page_addAnnot_FromString(pdf_page, linklist); +} + +static int page_count_fz2(void* document) +{ + mupdf::FzDocument* document2 = (mupdf::FzDocument*) document; + return mupdf::fz_count_pages(*document2); +} + +static int page_count_fz(mupdf::FzDocument& document) +{ + return mupdf::fz_count_pages(document); +} + +static int page_count_pdf(mupdf::PdfDocument& pdf) +{ + mupdf::FzDocument document = pdf.super(); + return page_count_fz(document); +} + +static int page_count(mupdf::FzDocument& document) +{ + return mupdf::fz_count_pages(document); +} + +static int page_count(mupdf::PdfDocument& pdf) +{ + mupdf::FzDocument document = pdf.super(); + return page_count(document); +} + +static PyObject* page_annot_xrefs(mupdf::FzDocument& document, mupdf::PdfDocument& pdf, int pno) +{ + int page_count = mupdf::fz_count_pages(document); + int n = pno; + while (n < 0) + { + n += page_count; + } + PyObject* annots = nullptr; + if (n >= page_count) + { + throw std::runtime_error(MSG_BAD_PAGENO); + } + if (!pdf.m_internal) + { + throw std::runtime_error(MSG_IS_NO_PDF); + } + annots = JM_get_annot_xref_list(mupdf::pdf_lookup_page_obj(pdf, n)); + return annots; +} + +static PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno) +{ + mupdf::PdfDocument pdf = mupdf::pdf_specifics(document); + return page_annot_xrefs(document, pdf, pno); +} + +static PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno) +{ + mupdf::FzDocument document = pdf.super(); + return page_annot_xrefs(document, pdf, pno); +} + +static bool Outline_is_external(mupdf::FzOutline* outline) +{ + if (!outline->m_internal->uri) + { + return false; + } + return mupdf::ll_fz_is_external_link(outline->m_internal->uri); +} + +int ll_fz_absi(int i) +{ + return mupdf::ll_fz_absi(i); +} + +enum +{ + TEXT_FONT_SUPERSCRIPT = 1, + TEXT_FONT_ITALIC = 2, + TEXT_FONT_SERIFED = 4, + TEXT_FONT_MONOSPACED = 8, + TEXT_FONT_BOLD = 16, +}; + +int g_skip_quad_corrections = 0; +int g_subset_fontnames = 0; +int g_small_glyph_heights = 0; + +void set_skip_quad_corrections(int on) +{ + g_skip_quad_corrections = on; +} + +void set_subset_fontnames(int on) +{ + g_subset_fontnames = on; +} + +void set_small_glyph_heights(int on) +{ + g_small_glyph_heights = on; +} + +struct jm_lineart_device +{ + fz_device super; + + PyObject* out = {}; + PyObject* method = {}; + PyObject* pathdict = {}; + PyObject* scissors = {}; + float pathfactor = {}; + fz_matrix ctm = {}; + fz_matrix ptm = {}; + fz_matrix rot = {}; + fz_point lastpoint = {}; + fz_point firstpoint = {}; + int havemove = 0; + fz_rect pathrect = {}; + int clips = {}; + int linecount = {}; + float linewidth = {}; + int path_type = {}; + long depth = {}; + size_t seqno = {}; + char* layer_name; +}; + + +static void jm_lineart_drop_device(fz_context *ctx, fz_device *dev_) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (PyList_Check(dev->out)) { + Py_CLEAR(dev->out); + } + Py_CLEAR(dev->method); + Py_CLEAR(dev->scissors); + mupdf::ll_fz_free(dev->layer_name); + dev->layer_name = nullptr; +} + +typedef jm_lineart_device jm_tracedraw_device; + +// need own versions of ascender / descender +static float JM_font_ascender(fz_font* font) +{ + if (g_skip_quad_corrections) + { + return 0.8f; + } + return mupdf::ll_fz_font_ascender(font); +} + +static float JM_font_descender(fz_font* font) +{ + if (g_skip_quad_corrections) + { + return -0.2f; + } + return mupdf::ll_fz_font_descender(font); +} + + +//---------------------------------------------------------------- +// Return true if character is considered to be a word delimiter +//---------------------------------------------------------------- +static int +JM_is_word_delimiter(int c, PyObject *delimiters) +{ + if (c <= 32 || c == 160) return 1; // a standard delimiter + if (0x202a <= c && c <= 0x202e) + { + return 1; // change between writing directions + } + + // extra delimiters must be a non-empty sequence + if (!delimiters || PyObject_Not(delimiters) || !PySequence_Check(delimiters)) { + return 0; + } + + // convert to tuple for easier looping + PyObject *delims = PySequence_Tuple(delimiters); + if (!delims) { + PyErr_Clear(); + return 0; + } + + // Make 1-char PyObject from character given as integer + PyObject *cchar = Py_BuildValue("C", c); // single character PyObject + Py_ssize_t i, len = PyTuple_Size(delims); + for (i = 0; i < len; i++) { + int rc = PyUnicode_Compare(cchar, PyTuple_GET_ITEM(delims, i)); + if (rc == 0) { // equal to a delimiter character + Py_DECREF(cchar); + Py_DECREF(delims); + PyErr_Clear(); + return 1; + } + } + + Py_DECREF(delims); + PyErr_Clear(); + return 0; +} + +static int +JM_is_rtl_char(int c) +{ + if (c < 0x590 || c > 0x900) return 0; + return 1; +} + +static const char* JM_font_name(fz_font* font) +{ + const char* name = mupdf::ll_fz_font_name(font); + const char* s = strchr(name, '+'); + if (g_subset_fontnames || !s || s-name != 6) + { + return name; + } + return s + 1; +} + +static void jm_trace_text_span( + jm_tracedraw_device* dev, + fz_text_span* span, + int type, + fz_matrix ctm, + fz_colorspace* colorspace, + const float* color, + float alpha, + size_t seqno + ) +{ + //printf("extra.jm_trace_text_span(): seqno=%zi\n", seqno); + //fz_matrix join = mupdf::ll_fz_concat(span->trm, ctm); + //double fsize = sqrt(fabs((double) span->trm.a * (double) span->trm.d)); + fz_matrix mat = mupdf::ll_fz_concat(span->trm, ctm); // text transformation matrix + fz_point dir = mupdf::ll_fz_transform_vector(mupdf::ll_fz_make_point(1, 0), mat); // writing direction + double fsize = sqrt(dir.x * dir.x + dir.y * dir.y); // font size + + dir = mupdf::ll_fz_normalize_vector(dir); + + // compute effective ascender / descender + double asc = (double) JM_font_ascender(span->font); + double dsc = (double) JM_font_descender(span->font); + if (asc < 1e-3) { // probably Tesseract font + dsc = -0.1; + asc = 0.9; + } + + double ascsize = asc * fsize / (asc - dsc); + double dscsize = dsc * fsize / (asc - dsc); + int fflags = 0; // font flags + int mono = mupdf::ll_fz_font_is_monospaced(span->font); + fflags += mono * TEXT_FONT_MONOSPACED; + fflags += mupdf::ll_fz_font_is_italic(span->font) * TEXT_FONT_ITALIC; + fflags += mupdf::ll_fz_font_is_serif(span->font) * TEXT_FONT_SERIFED; + fflags += mupdf::ll_fz_font_is_bold(span->font) * TEXT_FONT_BOLD; + + // walk through characters of span + fz_matrix rot = mupdf::ll_fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0); + if (dir.x == -1) + { + // left-right flip + rot.d = 1; + } + PyObject* chars = PyTuple_New(span->len); + double space_adv = 0; + double last_adv = 0; + fz_rect span_bbox; + + for (int i = 0; i < span->len; i++) + { + double adv = 0; + if (span->items[i].gid >= 0) + { + adv = (double) mupdf::ll_fz_advance_glyph(span->font, span->items[i].gid, span->wmode); + } + adv *= fsize; + last_adv = adv; + if (span->items[i].ucs == 32) + { + space_adv = adv; + } + fz_point char_orig; + char_orig = fz_make_point(span->items[i].x, span->items[i].y); + char_orig = fz_transform_point(char_orig, ctm); + fz_matrix m1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y); + m1 = mupdf::ll_fz_concat(m1, rot); + m1 = mupdf::ll_fz_concat(m1, mupdf::ll_fz_make_matrix(1, 0, 0, 1, char_orig.x, char_orig.y)); + float x0 = char_orig.x; + float x1 = x0 + adv; + float y0; + float y1; + if ( + (mat.d > 0 && (dir.x == 1 || dir.x == -1)) + || + (mat.b !=0 && mat.b == -mat.c) + ) // up-down flip + { + // up-down flip + y0 = char_orig.y + dscsize; + y1 = char_orig.y + ascsize; + } + else + { + y0 = char_orig.y - ascsize; + y1 = char_orig.y - dscsize; + } + fz_rect char_bbox = mupdf::ll_fz_make_rect(x0, y0, x1, y1); + char_bbox = mupdf::ll_fz_transform_rect(char_bbox, m1); + PyTuple_SET_ITEM( + chars, + (Py_ssize_t) i, + Py_BuildValue( + "ii(ff)(ffff)", + span->items[i].ucs, + span->items[i].gid, + char_orig.x, + char_orig.y, + char_bbox.x0, + char_bbox.y0, + char_bbox.x1, + char_bbox.y1 + ) + ); + if (i > 0) + { + span_bbox = mupdf::ll_fz_union_rect(span_bbox, char_bbox); + } + else + { + span_bbox = char_bbox; + } + } + if (!space_adv) + { + if (!(fflags & TEXT_FONT_MONOSPACED)) + { + fz_font* out_font = nullptr; + space_adv = mupdf::ll_fz_advance_glyph( + span->font, + mupdf::ll_fz_encode_character_with_fallback(span->font, 32, 0, 0, &out_font), + span->wmode + ); + space_adv *= fsize; + if (!space_adv) + { + space_adv = last_adv; + } + } + else + { + space_adv = last_adv; // for mono any char width suffices + } + } + // make the span dictionary + PyObject* span_dict = PyDict_New(); + dict_setitemstr_drop(span_dict, "dir", JM_py_from_point(dir)); + dict_setitem_drop(span_dict, dictkey_font, JM_EscapeStrFromStr(JM_font_name(span->font))); + dict_setitem_drop(span_dict, dictkey_wmode, PyLong_FromLong((long) span->wmode)); + dict_setitem_drop(span_dict, dictkey_flags, PyLong_FromLong((long) fflags)); + dict_setitemstr_drop(span_dict, "bidi_lvl", PyLong_FromLong((long) span->bidi_level)); + dict_setitemstr_drop(span_dict, "bidi_dir", PyLong_FromLong((long) span->markup_dir)); + dict_setitem_drop(span_dict, dictkey_ascender, PyFloat_FromDouble(asc)); + dict_setitem_drop(span_dict, dictkey_descender, PyFloat_FromDouble(dsc)); + dict_setitem_drop(span_dict, dictkey_colorspace, PyLong_FromLong(3)); + float rgb[3]; + if (colorspace) + { + mupdf::ll_fz_convert_color( + colorspace, + color, + mupdf::ll_fz_device_rgb(), + rgb, + nullptr, + fz_default_color_params + ); + } + else + { + rgb[0] = rgb[1] = rgb[2] = 0; + } + double linewidth; + if (dev->linewidth > 0) // width of character border + { + linewidth = (double) dev->linewidth; + } + else + { + linewidth = fsize * 0.05; // default: 5% of font size + } + if (0) std::cout + << " dev->linewidth=" << dev->linewidth + << " fsize=" << fsize + << " linewidth=" << linewidth + << "\n"; + dict_setitem_drop(span_dict, dictkey_color, Py_BuildValue("fff", rgb[0], rgb[1], rgb[2])); + dict_setitem_drop(span_dict, dictkey_size, PyFloat_FromDouble(fsize)); + dict_setitemstr_drop(span_dict, "opacity", PyFloat_FromDouble((double) alpha)); + dict_setitemstr_drop(span_dict, "linewidth", PyFloat_FromDouble((double) linewidth)); + dict_setitemstr_drop(span_dict, "spacewidth", PyFloat_FromDouble(space_adv)); + dict_setitem_drop(span_dict, dictkey_type, PyLong_FromLong((long) type)); + dict_setitem_drop(span_dict, dictkey_bbox, JM_py_from_rect(span_bbox)); + dict_setitemstr_drop(span_dict, "layer", JM_UnicodeFromStr(dev->layer_name)); + dict_setitemstr_drop(span_dict, "seqno", PyLong_FromSize_t(seqno)); + dict_setitem_drop(span_dict, dictkey_chars, chars); + //std::cout << "span_dict=" << repr(span_dict) << "\n"; + s_list_append_drop(dev->out, span_dict); +} + +static inline void jm_increase_seqno(fz_context* ctx, fz_device* dev_) +{ + jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; + dev->seqno += 1; +} + +static void jm_fill_path( + fz_context* ctx, + fz_device* dev, + const fz_path*, + int even_odd, + fz_matrix, + fz_colorspace*, + const float* color, + float alpha, + fz_color_params + ) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_fill_shade( + fz_context* ctx, + fz_device* dev, + fz_shade* shd, + fz_matrix ctm, + float alpha, + fz_color_params color_params + ) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_fill_image( + fz_context* ctx, + fz_device* dev, + fz_image* img, + fz_matrix ctm, + float alpha, + fz_color_params color_params + ) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_fill_image_mask( + fz_context* ctx, + fz_device* dev, + fz_image* img, + fz_matrix ctm, + fz_colorspace* cs, + const float* color, + float alpha, + fz_color_params color_params + ) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_dev_linewidth( + fz_context* ctx, + fz_device* dev_, + const fz_path* path, + const fz_stroke_state* stroke, + fz_matrix ctm, + fz_colorspace* colorspace, + const float* color, + float alpha, + fz_color_params color_params + ) +{ + jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; + if (0) std::cout << "jm_dev_linewidth(): changing dev->linewidth from " << dev->linewidth + << " to stroke->linewidth=" << stroke->linewidth + << "\n"; + dev->linewidth = stroke->linewidth; + jm_increase_seqno(ctx, dev_); +} + +static void jm_trace_text( + jm_tracedraw_device* dev, + const fz_text* text, + int type, + fz_matrix ctm, + fz_colorspace* colorspace, + const float* color, + float alpha, + size_t seqno + ) +{ + fz_text_span* span; + for (span = text->head; span; span = span->next) + { + jm_trace_text_span(dev, span, type, ctm, colorspace, color, alpha, seqno); + } +} + +/*--------------------------------------------------------- +There are 3 text trace types: +0 - fill text (PDF Tr 0) +1 - stroke text (PDF Tr 1) +3 - ignore text (PDF Tr 3) +---------------------------------------------------------*/ +static void +jm_tracedraw_fill_text( + fz_context* ctx, + fz_device* dev_, + const fz_text* text, + fz_matrix ctm, + fz_colorspace* colorspace, + const float* color, + float alpha, + fz_color_params color_params + ) +{ + jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; + jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev->seqno); + dev->seqno += 1; +} + +static void +jm_tracedraw_stroke_text( + fz_context* ctx, + fz_device* dev_, + const fz_text* text, + const fz_stroke_state* stroke, + fz_matrix ctm, + fz_colorspace* colorspace, + const float* color, + float alpha, + fz_color_params color_params + ) +{ + jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; + jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev->seqno); + dev->seqno += 1; +} + + +static void +jm_tracedraw_ignore_text( + fz_context* ctx, + fz_device* dev_, + const fz_text* text, + fz_matrix ctm + ) +{ + jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; + jm_trace_text(dev, text, 3, ctm, nullptr, nullptr, 1, dev->seqno); + dev->seqno += 1; +} + +static void +jm_lineart_begin_layer(fz_context *ctx, fz_device *dev_, const char *name) +{ + jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; + mupdf::ll_fz_free(dev->layer_name); + dev->layer_name = mupdf::ll_fz_strdup(name); +} + +static void +jm_lineart_end_layer(fz_context *ctx, fz_device *dev_) +{ + jm_tracedraw_device* dev = (jm_tracedraw_device*) dev_; + mupdf::ll_fz_free(dev->layer_name); + dev->layer_name = nullptr; +} + + +mupdf::FzDevice JM_new_texttrace_device(PyObject* out) +{ + mupdf::FzDevice device(sizeof(jm_tracedraw_device)); + jm_tracedraw_device* dev = (jm_tracedraw_device*) device.m_internal; + + dev->super.close_device = nullptr; + dev->super.drop_device = jm_lineart_drop_device; + dev->super.fill_path = jm_fill_path; + dev->super.stroke_path = jm_dev_linewidth; + dev->super.clip_path = nullptr; + dev->super.clip_stroke_path = nullptr; + + dev->super.fill_text = jm_tracedraw_fill_text; + dev->super.stroke_text = jm_tracedraw_stroke_text; + dev->super.clip_text = nullptr; + dev->super.clip_stroke_text = nullptr; + dev->super.ignore_text = jm_tracedraw_ignore_text; + + dev->super.fill_shade = jm_fill_shade; + dev->super.fill_image = jm_fill_image; + dev->super.fill_image_mask = jm_fill_image_mask; + dev->super.clip_image_mask = nullptr; + + dev->super.pop_clip = nullptr; + + dev->super.begin_mask = nullptr; + dev->super.end_mask = nullptr; + dev->super.begin_group = nullptr; + dev->super.end_group = nullptr; + + dev->super.begin_tile = nullptr; + dev->super.end_tile = nullptr; + + dev->super.begin_layer = jm_lineart_begin_layer; + dev->super.end_layer = jm_lineart_end_layer; + + dev->super.begin_structure = nullptr; + dev->super.end_structure = nullptr; + + dev->super.begin_metatext = nullptr; + dev->super.end_metatext = nullptr; + + dev->super.render_flags = nullptr; + dev->super.set_default_colorspaces = nullptr; + + Py_XINCREF(out); + dev->out = out; + dev->seqno = 0; + return device; +} + + +static fz_quad +JM_char_quad(fz_stext_line *line, fz_stext_char *ch) +{ + if (g_skip_quad_corrections) { // no special handling + return ch->quad; + } + if (line->wmode) { // never touch vertical write mode + return ch->quad; + } + fz_font *font = ch->font; + float asc = JM_font_ascender(font); + float dsc = JM_font_descender(font); + float c, s, fsize = ch->size; + float asc_dsc = asc - dsc + FLT_EPSILON; + if (asc_dsc >= 1 && g_small_glyph_heights == 0) { // no problem + return ch->quad; + } + if (asc < 1e-3) { // probably Tesseract glyphless font + dsc = -0.1f; + asc = 0.9f; + asc_dsc = 1.0f; + } + + if (g_small_glyph_heights || asc_dsc < 1) { + dsc = dsc / asc_dsc; + asc = asc / asc_dsc; + } + asc_dsc = asc - dsc; + asc = asc * fsize / asc_dsc; + dsc = dsc * fsize / asc_dsc; + + /* ------------------------------ + Re-compute quad with the adjusted ascender / descender values: + Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, + re-rotate and move back to ch->origin location. + ------------------------------ */ + fz_matrix trm1, trm2, xlate1, xlate2; + fz_quad quad; + c = line->dir.x; // cosine + s = line->dir.y; // sine + trm1 = mupdf::ll_fz_make_matrix(c, -s, s, c, 0, 0); // derotate + trm2 = mupdf::ll_fz_make_matrix(c, s, -s, c, 0, 0); // rotate + if (c == -1) { // left-right flip + trm1.d = 1; + trm2.d = 1; + } + xlate1 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, -ch->origin.x, -ch->origin.y); + xlate2 = mupdf::ll_fz_make_matrix(1, 0, 0, 1, ch->origin.x, ch->origin.y); + + quad = mupdf::ll_fz_transform_quad(ch->quad, xlate1); // move origin to (0,0) + quad = mupdf::ll_fz_transform_quad(quad, trm1); // de-rotate corners + + // adjust vertical coordinates + if (c == 1 && quad.ul.y > 0) { // up-down flip + quad.ul.y = asc; + quad.ur.y = asc; + quad.ll.y = dsc; + quad.lr.y = dsc; + } else { + quad.ul.y = -asc; + quad.ur.y = -asc; + quad.ll.y = -dsc; + quad.lr.y = -dsc; + } + + // adjust horizontal coordinates that are too crazy: + // (1) left x must be >= 0 + // (2) if bbox width is 0, lookup char advance in font. + if (quad.ll.x < 0) { + quad.ll.x = 0; + quad.ul.x = 0; + } + float cwidth = quad.lr.x - quad.ll.x; + if (cwidth < FLT_EPSILON) { + int glyph = mupdf::ll_fz_encode_character( font, ch->c); + if (glyph) { + float fwidth = mupdf::ll_fz_advance_glyph( font, glyph, line->wmode); + quad.lr.x = quad.ll.x + fwidth * fsize; + quad.ur.x = quad.lr.x; + } + } + + quad = mupdf::ll_fz_transform_quad(quad, trm2); // rotate back + quad = mupdf::ll_fz_transform_quad(quad, xlate2); // translate back + return quad; +} + + +static fz_rect JM_char_bbox(fz_stext_line* line, fz_stext_char* ch) +{ + fz_rect r = mupdf::ll_fz_rect_from_quad(JM_char_quad( line, ch)); + if (!line->wmode) { + return r; + } + if (r.y1 < r.y0 + ch->size) { + r.y0 = r.y1 - ch->size; + } + return r; +} + +fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch) +{ + return JM_char_bbox( line.m_internal, ch.m_internal); +} + +static int JM_rects_overlap(const fz_rect a, const fz_rect b) +{ + if (0 + || a.x0 >= b.x1 + || a.y0 >= b.y1 + || a.x1 <= b.x0 + || a.y1 <= b.y0 + ) + return 0; + return 1; +} + +// +void JM_append_rune(fz_buffer *buff, int ch); + +//----------------------------------------------------------------------------- +// Plain text output. An identical copy of fz_print_stext_page_as_text, +// but lines within a block are concatenated by space instead a new-line +// character (which else leads to 2 new-lines). +//----------------------------------------------------------------------------- +void _as_text(fz_stext_block *block, mupdf::FzBuffer& res, mupdf::FzStextPage& page) +{ + /* + Recursive function for output by blocks as identified by the + MuPDF SEGMENT logic. + The recursion happens when we encounter a structure block. + */ + fz_rect rect = page.m_internal->mediabox; + int last_char; + fz_stext_line *line; + fz_stext_char *ch; + while (block) + { + switch (block->type) + { + case FZ_STEXT_BLOCK_STRUCT: + if (block->u.s.down) + { + _as_text(block->u.s.down->first_block, res, page); + } + break; + + case FZ_STEXT_BLOCK_TEXT: + last_char = 0; + for (line = block->u.t.first_line; line; line = line->next) + { + for (ch = line->first_char; ch; ch = ch->next) + { + fz_rect chbbox = JM_char_bbox( line, ch); + if (mupdf::ll_fz_is_infinite_rect(rect) || JM_rects_overlap(rect, chbbox)) + { + last_char = ch->c; + JM_append_rune(res.m_internal, last_char); + } + } + if (last_char != 10 && last_char > 0) + { + mupdf::ll_fz_append_string(res.m_internal, "\n"); + last_char = 10; + } + } + if (last_char != 10 && last_char > 0) + { + mupdf::ll_fz_append_string(res.m_internal, "\n"); + last_char = 10; + } + break; + } + block = block->next; + } +} + +void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page) +{ + fz_stext_block *block = page.m_internal->first_block; + _as_text(block, res, page); +} + + + +// path_type is one of: +#define FILL_PATH 1 +#define STROKE_PATH 2 +#define CLIP_PATH 3 +#define CLIP_STROKE_PATH 4 + +// Every scissor of a clip is a sub rectangle of the preceding clip scissor if +// the clip level is larger. +static fz_rect compute_scissor(jm_lineart_device *dev) +{ + PyObject *last_scissor = NULL; + fz_rect scissor; + if (!dev->scissors) { + dev->scissors = PyList_New(0); + } + Py_ssize_t num_scissors = PyList_Size(dev->scissors); + if (num_scissors > 0) { + last_scissor = PyList_GET_ITEM(dev->scissors, num_scissors-1); + scissor = JM_rect_from_py(last_scissor); + scissor = fz_intersect_rect(scissor, dev->pathrect); + } else { + scissor = dev->pathrect; + } + LIST_APPEND_DROP(dev->scissors, JM_py_from_rect(scissor)); + return scissor; +} + + +/* +-------------------------------------------------------------------------- +Check whether the last 4 lines represent a quad. +Because of how we count, the lines are a polyline already, i.e. last point +of a line equals 1st point of next line. +So we check for a polygon (last line's end point equals start point). +If not true we return 0. +-------------------------------------------------------------------------- +*/ +static int +jm_checkquad(jm_lineart_device* dev) +{ + PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); + Py_ssize_t i, len = PyList_Size(items); + float f[8]; // coordinates of the 4 corners + mupdf::FzPoint temp, lp; // line = (temp, lp) + PyObject *rect; + PyObject *line; + // fill the 8 floats in f, start from items[-4:] + for (i = 0; i < 4; i++) { // store line start points + line = PyList_GET_ITEM(items, len - 4 + i); + temp = JM_point_from_py(PyTuple_GET_ITEM(line, 1)); + f[i * 2] = temp.x; + f[i * 2 + 1] = temp.y; + lp = JM_point_from_py(PyTuple_GET_ITEM(line, 2)); + } + if (lp.x != f[0] || lp.y != f[1]) { + // not a polygon! + //dev_linecount -= 1; + return 0; + } + + // we have detected a quad + dev->linecount = 0; // reset this + // a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items + // are pairs of floats representing a quad corner each. + rect = PyTuple_New(2); + PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("qu")); + /* ---------------------------------------------------- + * relationship of float array to quad points: + * (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr + ---------------------------------------------------- */ + fz_quad q = fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5]); + PyTuple_SET_ITEM(rect, 1, JM_py_from_quad(q)); + PyList_SetItem(items, len - 4, rect); // replace item -4 by rect + PyList_SetSlice(items, len - 3, len, NULL); // delete remaining 3 items + return 1; +} + + +/* +-------------------------------------------------------------------------- +Check whether the last 3 path items represent a rectangle. +Line 1 and 3 must be horizontal, line 2 must be vertical. +Returns 1 if we have modified the path, otherwise 0. +-------------------------------------------------------------------------- +*/ +static int +jm_checkrect(jm_lineart_device* dev) +{ + dev->linecount = 0; // reset line count + long orientation = 0; // area orientation of rectangle + mupdf::FzPoint ll, lr, ur, ul; + mupdf::FzRect r; + PyObject *rect; + PyObject *line0, *line2; + PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); + Py_ssize_t len = PyList_Size(items); + + line0 = PyList_GET_ITEM(items, len - 3); + ll = JM_point_from_py(PyTuple_GET_ITEM(line0, 1)); + lr = JM_point_from_py(PyTuple_GET_ITEM(line0, 2)); + // no need to extract "line1"! + line2 = PyList_GET_ITEM(items, len - 1); + ur = JM_point_from_py(PyTuple_GET_ITEM(line2, 1)); + ul = JM_point_from_py(PyTuple_GET_ITEM(line2, 2)); + + /* + --------------------------------------------------------------------- + Assumption: + When decomposing rects, MuPDF always starts with a horizontal line, + followed by a vertical line, followed by a horizontal line. + First line: (ll, lr), third line: (ul, ur). + If 1st line is below 3rd line, we record anti-clockwise (+1), else + clockwise (-1) orientation. + --------------------------------------------------------------------- + */ + if (ll.y != lr.y || + ll.x != ul.x || + ur.y != ul.y || + ur.x != lr.x) { + goto drop_out; // not a rectangle + } + + // we have a rect, replace last 3 "l" items by one "re" item. + if (ul.y < lr.y) { + r = fz_make_rect(ul.x, ul.y, lr.x, lr.y); + orientation = 1; + } else { + r = fz_make_rect(ll.x, ll.y, ur.x, ur.y); + orientation = -1; + } + rect = PyTuple_New(3); + PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re")); + PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r)); + PyTuple_SET_ITEM(rect, 2, PyLong_FromLong(orientation)); + PyList_SetItem(items, len - 3, rect); // replace item -3 by rect + PyList_SetSlice(items, len - 2, len, NULL); // delete remaining 2 items + return 1; + drop_out:; + return 0; +} + +static PyObject * +jm_lineart_color(fz_colorspace *colorspace, const float *color) +{ + float rgb[3]; + if (colorspace) { + mupdf::ll_fz_convert_color(colorspace, color, mupdf::ll_fz_device_rgb(), + rgb, NULL, fz_default_color_params); + return Py_BuildValue("fff", rgb[0], rgb[1], rgb[2]); + } + return PyTuple_New(0); +} + +static void +trace_moveto(fz_context *ctx, void *dev_, float x, float y) +{ + jm_lineart_device* dev = (jm_lineart_device*) dev_; + dev->lastpoint = mupdf::ll_fz_transform_point(fz_make_point(x, y), dev->ctm); + if (mupdf::ll_fz_is_infinite_rect(dev->pathrect)) + { + dev->pathrect = mupdf::ll_fz_make_rect( + dev->lastpoint.x, + dev->lastpoint.y, + dev->lastpoint.x, + dev->lastpoint.y + ); + } + dev->firstpoint = dev->lastpoint; + dev->havemove = 1; + dev->linecount = 0; // reset # of consec. lines +} + +static void +trace_lineto(fz_context *ctx, void *dev_, float x, float y) +{ + jm_lineart_device* dev = (jm_lineart_device*) dev_; + fz_point p1 = fz_transform_point(fz_make_point(x, y), dev->ctm); + dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1); + PyObject *list = PyTuple_New(3); + PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l")); + PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint)); + PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1)); + dev->lastpoint = p1; + PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); + LIST_APPEND_DROP(items, list); + dev->linecount += 1; // counts consecutive lines + if (dev->linecount == 4 && dev->path_type != FILL_PATH) { // shrink to "re" or "qu" item + jm_checkquad(dev); + } +} + +static void +trace_curveto(fz_context *ctx, void *dev_, float x1, float y1, float x2, float y2, float x3, float y3) +{ + jm_lineart_device* dev = (jm_lineart_device*) dev_; + dev->linecount = 0; // reset # of consec. lines + fz_point p1 = fz_make_point(x1, y1); + fz_point p2 = fz_make_point(x2, y2); + fz_point p3 = fz_make_point(x3, y3); + p1 = fz_transform_point(p1, dev->ctm); + p2 = fz_transform_point(p2, dev->ctm); + p3 = fz_transform_point(p3, dev->ctm); + dev->pathrect = fz_include_point_in_rect(dev->pathrect, p1); + dev->pathrect = fz_include_point_in_rect(dev->pathrect, p2); + dev->pathrect = fz_include_point_in_rect(dev->pathrect, p3); + + PyObject *list = PyTuple_New(5); + PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("c")); + PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint)); + PyTuple_SET_ITEM(list, 2, JM_py_from_point(p1)); + PyTuple_SET_ITEM(list, 3, JM_py_from_point(p2)); + PyTuple_SET_ITEM(list, 4, JM_py_from_point(p3)); + dev->lastpoint = p3; + PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); + LIST_APPEND_DROP(items, list); +} + +static void +trace_close(fz_context *ctx, void *dev_) +{ + jm_lineart_device* dev = (jm_lineart_device*) dev_; + if (dev->linecount == 3) { + if (jm_checkrect(dev)) { + return; + } + } + dev->linecount = 0; // reset # of consec. lines + if (dev->havemove) { + if (dev->firstpoint.x != dev->lastpoint.x || dev->firstpoint.y != dev->lastpoint.y) { + PyObject *list = PyTuple_New(3); + PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l")); + PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev->lastpoint)); + PyTuple_SET_ITEM(list, 2, JM_py_from_point(dev->firstpoint)); + dev->lastpoint = dev->firstpoint; + PyObject *items = PyDict_GetItem(dev->pathdict, dictkey_items); + LIST_APPEND_DROP(items, list); + } + dev->havemove = 0; + DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); + } else { + DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(1)); + } +} + +static const fz_path_walker trace_path_walker = + { + trace_moveto, + trace_lineto, + trace_curveto, + trace_close + }; + +/* +--------------------------------------------------------------------- +Create the "items" list of the path dictionary +* either create or empty the path dictionary +* reset the end point of the path +* reset count of consecutive lines +* invoke fz_walk_path(), which create the single items +* if no items detected, empty path dict again +--------------------------------------------------------------------- +*/ +static void +jm_lineart_path(jm_lineart_device *dev, const fz_path *path) +{ + dev->pathrect = fz_infinite_rect; + dev->linecount = 0; + dev->lastpoint = fz_make_point(0, 0); + dev->firstpoint = fz_make_point(0, 0); + if (dev->pathdict) { + Py_CLEAR(dev->pathdict); + } + dev->pathdict = PyDict_New(); + DICT_SETITEM_DROP(dev->pathdict, dictkey_items, PyList_New(0)); + mupdf::ll_fz_walk_path(path, &trace_path_walker, dev); + // Check if any items were added ... + if (!PyDict_GetItem(dev->pathdict, dictkey_items) || !PyList_Size(PyDict_GetItem(dev->pathdict, dictkey_items))) + { + Py_CLEAR(dev->pathdict); + } +} + +//--------------------------------------------------------------------------- +// Append current path to list or merge into last path of the list. +// (1) Append if first path, different item lists or not a 'stroke' version +// of previous path +// (2) If new path has the same items, merge its content into previous path +// and change path["type"] to "fs". +// (3) If "out" is callable, skip the previous and pass dictionary to it. +//--------------------------------------------------------------------------- +static void +// todo: remove `method` arg - it is dev->method. +jm_append_merge(jm_lineart_device *dev) +{ + Py_ssize_t len; + int rc; + PyObject *prev; + PyObject *previtems; + PyObject *thisitems; + const char *thistype; + const char *prevtype; + if (PyCallable_Check(dev->out) || dev->method != Py_None) { // function or method + goto callback; + } + len = PyList_Size(dev->out); // len of output list so far + if (len == 0) { // always append first path + goto append; + } + thistype = PyUnicode_AsUTF8(PyDict_GetItem(dev->pathdict, dictkey_type)); + if (strcmp(thistype, "s") != 0) { // if not stroke, then append + goto append; + } + prev = PyList_GET_ITEM(dev->out, len - 1); // get prev path + prevtype = PyUnicode_AsUTF8(PyDict_GetItem(prev, dictkey_type)); + if (strcmp(prevtype, "f") != 0) { // if previous not fill, append + goto append; + } + // last check: there must be the same list of items for "f" and "s". + previtems = PyDict_GetItem(prev, dictkey_items); + thisitems = PyDict_GetItem(dev->pathdict, dictkey_items); + if (PyObject_RichCompareBool(previtems, thisitems, Py_NE)) { + goto append; + } + rc = PyDict_Merge(prev, dev->pathdict, 0); // merge, do not override + if (rc == 0) { + DICT_SETITEM_DROP(prev, dictkey_type, PyUnicode_FromString("fs")); + goto postappend; + } else { + messagef("could not merge stroke and fill path"); + goto append; + } + append:; + //printf("Appending to dev->out. len(dev->out)=%zi\n", PyList_Size(dev->out)); + PyList_Append(dev->out, dev->pathdict); + postappend:; + Py_CLEAR(dev->pathdict); + return; + + callback:; // callback function or method + PyObject *resp = NULL; + if (dev->method == Py_None) { + resp = PyObject_CallFunctionObjArgs(dev->out, dev->pathdict, NULL); + } else { + resp = PyObject_CallMethodObjArgs(dev->out, dev->method, dev->pathdict, NULL); + } + if (resp) { + Py_DECREF(resp); + } else { + messagef("calling cdrawings callback function/method failed!"); + PyErr_Clear(); + } + Py_CLEAR(dev->pathdict); + return; +} + +static void +jm_lineart_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path, + int even_odd, fz_matrix ctm, fz_colorspace *colorspace, + const float *color, float alpha, fz_color_params color_params) +{ + jm_lineart_device *dev = (jm_lineart_device *) dev_; + //printf("extra.jm_lineart_fill_path(): dev->seqno=%zi\n", dev->seqno); + dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm); + dev->path_type = FILL_PATH; + jm_lineart_path(dev, path); + if (!dev->pathdict) { + return; + } + DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("f")); + DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd)); + DICT_SETITEMSTR_DROP(dev->pathdict, "fill_opacity", Py_BuildValue("f", alpha)); + DICT_SETITEMSTR_DROP(dev->pathdict, "fill", jm_lineart_color(colorspace, color)); + DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect)); + DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno)); + DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); + if (dev->clips) { + DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); + } + jm_append_merge(dev); + dev->seqno += 1; +} + +static void +jm_lineart_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, + const fz_stroke_state *stroke, fz_matrix ctm, + fz_colorspace *colorspace, const float *color, float alpha, + fz_color_params color_params) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + //printf("extra.jm_lineart_stroke_path(): dev->seqno=%zi\n", dev->seqno); + int i; + dev->pathfactor = 1; + if (ctm.a != 0 && fz_abs(ctm.a) == fz_abs(ctm.d)) { + dev->pathfactor = fz_abs(ctm.a); + } else { + if (ctm.b != 0 && fz_abs(ctm.b) == fz_abs(ctm.c)) { + dev->pathfactor = fz_abs(ctm.b); + } + } + dev->ctm = ctm; // fz_concat(ctm, trace_device_ptm); + dev->path_type = STROKE_PATH; + + jm_lineart_path(dev, path); + if (!dev->pathdict) { + return; + } + DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("s")); + DICT_SETITEMSTR_DROP(dev->pathdict, "stroke_opacity", Py_BuildValue("f", alpha)); + DICT_SETITEMSTR_DROP(dev->pathdict, "color", jm_lineart_color(colorspace, color)); + DICT_SETITEM_DROP(dev->pathdict, dictkey_width, Py_BuildValue("f", dev->pathfactor * stroke->linewidth)); + DICT_SETITEMSTR_DROP(dev->pathdict, "lineCap", Py_BuildValue("iii", stroke->start_cap, stroke->dash_cap, stroke->end_cap)); + DICT_SETITEMSTR_DROP(dev->pathdict, "lineJoin", Py_BuildValue("f", dev->pathfactor * stroke->linejoin)); + if (!PyDict_GetItemString(dev->pathdict, "closePath")) { + DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); + } + + // output the "dashes" string + if (stroke->dash_len) { + mupdf::FzBuffer buff(256); + mupdf::fz_append_string(buff, "[ "); // left bracket + for (i = 0; i < stroke->dash_len; i++) { + fz_append_printf(ctx, buff.m_internal, "%g ", dev->pathfactor * stroke->dash_list[i]); + } + fz_append_printf(ctx, buff.m_internal, "] %g", dev->pathfactor * stroke->dash_phase); + DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", JM_EscapeStrFromBuffer(buff)); + } else { + DICT_SETITEMSTR_DROP(dev->pathdict, "dashes", PyUnicode_FromString("[] 0")); + } + + DICT_SETITEM_DROP(dev->pathdict, dictkey_rect, JM_py_from_rect(dev->pathrect)); + DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); + DICT_SETITEMSTR_DROP(dev->pathdict, "seqno", PyLong_FromSize_t(dev->seqno)); + if (dev->clips) { + DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); + } + // output the dict - potentially merging it with a previous fill_path twin + jm_append_merge(dev); + dev->seqno += 1; +} + +static void +jm_lineart_clip_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int even_odd, fz_matrix ctm, fz_rect scissor) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm); + dev->path_type = CLIP_PATH; + jm_lineart_path(dev, path); + if (!dev->pathdict) { + return; + } + DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip")); + DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", JM_BOOL(even_odd)); + if (!PyDict_GetItemString(dev->pathdict, "closePath")) { + DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); + } + DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev))); + DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); + DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); + jm_append_merge(dev); + dev->depth++; +} + +static void +jm_lineart_clip_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + dev->ctm = ctm; //fz_concat(ctm, trace_device_ptm); + dev->path_type = CLIP_STROKE_PATH; + jm_lineart_path(dev, path); + if (!dev->pathdict) { + return; + } + DICT_SETITEM_DROP(dev->pathdict, dictkey_type, PyUnicode_FromString("clip")); + DICT_SETITEMSTR_DROP(dev->pathdict, "even_odd", Py_BuildValue("s", NULL)); + if (!PyDict_GetItemString(dev->pathdict, "closePath")) { + DICT_SETITEMSTR_DROP(dev->pathdict, "closePath", JM_BOOL(0)); + } + DICT_SETITEMSTR_DROP(dev->pathdict, "scissor", JM_py_from_rect(compute_scissor(dev))); + DICT_SETITEMSTR_DROP(dev->pathdict, "level", PyLong_FromLong(dev->depth)); + DICT_SETITEMSTR_DROP(dev->pathdict, "layer", JM_UnicodeFromStr(dev->layer_name)); + jm_append_merge(dev); + dev->depth++; +} + + +static void +jm_lineart_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + compute_scissor(dev); + dev->depth++; +} + +static void +jm_lineart_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + compute_scissor(dev); + dev->depth++; +} + +static void +jm_lineart_clip_image_mask(fz_context *ctx, fz_device *dev_, fz_image *image, fz_matrix ctm, fz_rect scissor) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + compute_scissor(dev); + dev->depth++; +} + +static void +jm_lineart_pop_clip(fz_context *ctx, fz_device *dev_) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + if (!dev->scissors) return; + Py_ssize_t len = PyList_Size(dev->scissors); + if (len < 1) return; + PyList_SetSlice(dev->scissors, len - 1, len, NULL); + dev->depth--; +} + + +static void +jm_lineart_begin_group(fz_context *ctx, fz_device *dev_, fz_rect bbox, fz_colorspace *cs, int isolated, int knockout, int blendmode, float alpha) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + dev->pathdict = Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}", + "type", "group", + "rect", JM_py_from_rect(bbox), + "isolated", JM_BOOL(isolated), + "knockout", JM_BOOL(knockout), + "blendmode", fz_blendmode_name(blendmode), + "opacity", alpha, + "level", dev->depth, + "layer", JM_UnicodeFromStr(dev->layer_name) + ); + jm_append_merge(dev); + dev->depth++; +} + +static void +jm_lineart_end_group(fz_context *ctx, fz_device *dev_) +{ + jm_lineart_device *dev = (jm_lineart_device *)dev_; + if (!dev->clips) return; + dev->depth--; +} + +static void jm_lineart_fill_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_lineart_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *, const fz_stroke_state *, fz_matrix, fz_colorspace *, const float *color, float alpha, fz_color_params) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_lineart_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shd, fz_matrix ctm, float alpha, fz_color_params color_params) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_lineart_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_lineart_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, fz_colorspace *, const float *color, float alpha, fz_color_params color_params) +{ + jm_increase_seqno(ctx, dev); +} + +static void jm_lineart_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *, fz_matrix) +{ + jm_increase_seqno(ctx, dev); +} + + +//------------------------------------------------------------------- +// LINEART device for Python method Page.get_cdrawings() +//------------------------------------------------------------------- +mupdf::FzDevice JM_new_lineart_device(PyObject *out, int clips, PyObject *method) +{ + //printf("extra.JM_new_lineart_device()\n"); + jm_lineart_device* dev = (jm_lineart_device*) mupdf::ll_fz_new_device_of_size(sizeof(jm_lineart_device)); + + dev->super.close_device = NULL; + dev->super.drop_device = jm_lineart_drop_device; + dev->super.fill_path = jm_lineart_fill_path; + dev->super.stroke_path = jm_lineart_stroke_path; + dev->super.clip_path = jm_lineart_clip_path; + dev->super.clip_stroke_path = jm_lineart_clip_stroke_path; + + dev->super.fill_text = jm_lineart_fill_text; + dev->super.stroke_text = jm_lineart_stroke_text; + dev->super.clip_text = jm_lineart_clip_text; + dev->super.clip_stroke_text = jm_lineart_clip_stroke_text; + dev->super.ignore_text = jm_lineart_ignore_text; + + dev->super.fill_shade = jm_lineart_fill_shade; + dev->super.fill_image = jm_lineart_fill_image; + dev->super.fill_image_mask = jm_lineart_fill_image_mask; + dev->super.clip_image_mask = jm_lineart_clip_image_mask; + + dev->super.pop_clip = jm_lineart_pop_clip; + + dev->super.begin_mask = NULL; + dev->super.end_mask = NULL; + dev->super.begin_group = jm_lineart_begin_group; + dev->super.end_group = jm_lineart_end_group; + + dev->super.begin_tile = NULL; + dev->super.end_tile = NULL; + + dev->super.begin_layer = jm_lineart_begin_layer; + dev->super.end_layer = jm_lineart_end_layer; + + dev->super.begin_structure = NULL; + dev->super.end_structure = NULL; + + dev->super.begin_metatext = NULL; + dev->super.end_metatext = NULL; + + dev->super.render_flags = NULL; + dev->super.set_default_colorspaces = NULL; + + if (PyList_Check(out)) { + Py_INCREF(out); + } + Py_INCREF(method); + dev->out = out; + dev->seqno = 0; + dev->depth = 0; + dev->clips = clips; + dev->method = method; + dev->pathdict = nullptr; + + return mupdf::FzDevice(&dev->super); +} + +PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL) +{ + //fz_page *page = (fz_page *) $self; + //fz_device *dev = NULL; + PyObject *rc = NULL; + int clips = PyObject_IsTrue(extended); + + mupdf::FzDevice dev; + if (PyCallable_Check(callback) || method != Py_None) { + dev = JM_new_lineart_device(callback, clips, method); + } else { + rc = PyList_New(0); + dev = JM_new_lineart_device(rc, clips, method); + } + mupdf::FzRect prect = mupdf::fz_bound_page(page); + ((jm_lineart_device*) dev.m_internal)->ptm = mupdf::ll_fz_make_matrix(1, 0, 0, -1, 0, prect.y1); + + mupdf::FzCookie cookie; + mupdf::FzMatrix identity; + mupdf::fz_run_page( page, dev, *identity.internal(), cookie); + mupdf::fz_close_device( dev); + if (PyCallable_Check(callback) || method != Py_None) + { + Py_RETURN_NONE; + } + return rc; +} + + +static int detect_super_script(fz_stext_line *line, fz_stext_char *ch) +{ + if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0) + { + return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f; + } + return 0; +} + +static int JM_char_font_flags(fz_font *font, fz_stext_line *line, fz_stext_char *ch) +{ + int flags = detect_super_script(line, ch); + flags += mupdf::ll_fz_font_is_italic(font) * TEXT_FONT_ITALIC; + flags += mupdf::ll_fz_font_is_serif(font) * TEXT_FONT_SERIFED; + flags += mupdf::ll_fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED; + flags += mupdf::ll_fz_font_is_bold(font) * TEXT_FONT_BOLD; + return flags; +} + +//--------------------------------------------------------------------------- +// APPEND non-ascii runes in unicode escape format to fz_buffer +//--------------------------------------------------------------------------- +void JM_append_rune(fz_buffer *buff, int ch) +{ + char text[32]; + if (ch == 92) // prevent accidental "\u", "\U" sequences + { + mupdf::ll_fz_append_string(buff, "\\u005c"); + } + else if ((ch >= 32 && ch <= 127) || ch == 10) + { + mupdf::ll_fz_append_byte(buff, ch); + } + else if (ch >= 0xd800 && ch <= 0xdfff) // orphaned surrogate Unicodes + { + mupdf::ll_fz_append_string(buff, "\\ufffd"); + } + else if (ch <= 0xffff) + { + // 4 hex digits + snprintf(text, sizeof(text), "\\u%04x", ch); + mupdf::ll_fz_append_string(buff, text); + } + else + { + // 8 hex digits + snprintf(text, sizeof(text), "\\U%08x", ch); + mupdf::ll_fz_append_string(buff, text); + } +} + + +mupdf::FzRect JM_make_spanlist( + PyObject *line_dict, + mupdf::FzStextLine& line, + int raw, + mupdf::FzBuffer& buff, + mupdf::FzRect& tp_rect + ) +{ + PyObject *span = NULL, *char_list = NULL, *char_dict; + PyObject *span_list = PyList_New(0); + mupdf::fz_clear_buffer(buff); + fz_rect span_rect = fz_empty_rect; + fz_rect line_rect = fz_empty_rect; + fz_point span_origin = {0, 0}; + struct char_style + { + float size = -1; + unsigned flags = 0; + + #if MUPDF_VERSION_GE(1, 25, 2) + /* From mupdf:include/mupdf/fitz/structured-text.h:fz_stext_char::flags, which + uses anonymous enum values: + FZ_STEXT_STRIKEOUT = 1, + FZ_STEXT_UNDERLINE = 2, + FZ_STEXT_SYNTHETIC = 4, + FZ_STEXT_FILLED = 16, + FZ_STEXT_STROKED = 32, + FZ_STEXT_CLIPPED = 64 + */ + unsigned char_flags = 0; + #endif + + const char *font = ""; + unsigned argb = 0; + float asc = 0; + float desc = 0; + uint16_t bidi = 0; + }; + char_style old_style; + char_style style; + + for (mupdf::FzStextChar ch: line) + { + fz_rect r = JM_char_bbox(line, ch); + if (!JM_rects_overlap(*tp_rect.internal(), r) && !fz_is_infinite_rect(tp_rect)) + { + continue; + } + /* Info from: + detect_super_script() + fz_font_is_italic() + fz_font_is_serif() + fz_font_is_monospaced() + fz_font_is_bold() + */ + int flags = JM_char_font_flags( ch.m_internal->font, line.m_internal, ch.m_internal); + fz_point origin = ch.m_internal->origin; + style.size = ch.m_internal->size; + style.flags = flags; + #if MUPDF_VERSION_GE(1, 25, 2) + /* FZ_STEXT_SYNTHETIC is per-char, not per-span. */ + style.char_flags = ch.m_internal->flags & ~FZ_STEXT_SYNTHETIC; + #endif + style.font = JM_font_name(ch.m_internal->font); + #if MUPDF_VERSION_GE(1, 25, 0) + style.argb = ch.m_internal->argb; + #else + style.argb = ch.m_internal->color; + #endif + style.asc = JM_font_ascender(ch.m_internal->font); + style.desc = JM_font_descender(ch.m_internal->font); + + if (0 + || style.size != old_style.size + || style.flags != old_style.flags + #if MUPDF_VERSION_GE(1, 25, 2) + || style.char_flags != old_style.char_flags + #endif + || style.argb != old_style.argb + || strcmp(style.font, old_style.font) != 0 + || style.bidi != old_style.bidi + ) + { + if (old_style.size >= 0) + { + // not first one, output previous + if (raw) + { + // put character list in the span + DICT_SETITEM_DROP(span, dictkey_chars, char_list); + char_list = NULL; + } + else + { + // put text string in the span + DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff)); + mupdf::fz_clear_buffer(buff); + } + + DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin)); + DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect)); + line_rect = mupdf::ll_fz_union_rect(line_rect, span_rect); + LIST_APPEND_DROP(span_list, span); + span = NULL; + } + + span = PyDict_New(); + float asc = style.asc, desc = style.desc; + if (style.asc < 1e-3) + { + asc = 0.9f; + desc = -0.1f; + } + + DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size)); + DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("I", style.flags)); + DICT_SETITEM_DROP(span, dictkey_bidi, Py_BuildValue("I", style.bidi)); + #if MUPDF_VERSION_GE(1, 25, 2) + DICT_SETITEM_DROP(span, dictkey_char_flags, Py_BuildValue("I", style.char_flags)); + #endif + DICT_SETITEM_DROP(span, dictkey_font, JM_EscapeStrFromStr(style.font)); + DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("I", style.argb & 0xffffff)); + #if MUPDF_VERSION_GE(1, 25, 0) + DICT_SETITEMSTR_DROP(span, "alpha", Py_BuildValue("I", style.argb >> 24)); + #endif + DICT_SETITEMSTR_DROP(span, "ascender", Py_BuildValue("f", asc)); + DICT_SETITEMSTR_DROP(span, "descender", Py_BuildValue("f", desc)); + + old_style = style; + span_rect = r; + span_origin = origin; + + } + span_rect = mupdf::ll_fz_union_rect(span_rect, r); + + if (raw) + { + // make and append a char dict + char_dict = PyDict_New(); + DICT_SETITEM_DROP(char_dict, dictkey_origin, JM_py_from_point(ch.m_internal->origin)); + + DICT_SETITEM_DROP(char_dict, dictkey_bbox, JM_py_from_rect(r)); + + DICT_SETITEM_DROP(char_dict, dictkey_c, Py_BuildValue("C", ch.m_internal->c)); + DICT_SETITEMSTR_DROP(char_dict, "synthetic", Py_BuildValue("O", (ch.m_internal->flags & FZ_STEXT_SYNTHETIC) ? Py_True : Py_False)); + if (!char_list) + { + char_list = PyList_New(0); + } + LIST_APPEND_DROP(char_list, char_dict); + } + else + { + // add character byte to buffer + JM_append_rune(buff.m_internal, ch.m_internal->c); + } + } + // all characters processed, now flush remaining span + if (span) + { + if (raw) + { + DICT_SETITEM_DROP(span, dictkey_chars, char_list); + char_list = NULL; + } + else + { + DICT_SETITEM_DROP(span, dictkey_text, JM_EscapeStrFromBuffer(buff)); + mupdf::fz_clear_buffer(buff); + } + DICT_SETITEM_DROP(span, dictkey_origin, JM_py_from_point(span_origin)); + DICT_SETITEM_DROP(span, dictkey_bbox, JM_py_from_rect(span_rect)); + + if (!fz_is_empty_rect(span_rect)) + { + LIST_APPEND_DROP(span_list, span); + line_rect = fz_union_rect(line_rect, span_rect); + } + else + { + Py_DECREF(span); + } + span = NULL; + } + if (!mupdf::fz_is_empty_rect(line_rect)) + { + DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list); + } + else + { + DICT_SETITEM_DROP(line_dict, dictkey_spans, span_list); + } + return line_rect; +} + +//----------------------------------------------------------------------------- +// Functions for wordlist output +//----------------------------------------------------------------------------- +int JM_append_word( + PyObject* lines, + fz_buffer* buff, + fz_rect* wbbox, + int block_n, + int line_n, + int word_n + ) +{ + PyObject* s = JM_EscapeStrFromBuffer(buff); + PyObject* litem = Py_BuildValue( + "ffffOiii", + wbbox->x0, + wbbox->y0, + wbbox->x1, + wbbox->y1, + s, + block_n, + line_n, + word_n + ); + LIST_APPEND_DROP(lines, litem); + Py_DECREF(s); + *wbbox = fz_empty_rect; + return word_n + 1; // word counter +} + +int _as_words(fz_stext_block *block, mupdf::FzBuffer& buff, fz_rect tp_rect, PyObject *lines, int block_n, PyObject *delimiters) +{ + /* 'buff' is intermediate storage for composing a word. Used as parameter only for + avoiding repeated allocation of an FzBuffer.*/ + int line_n; + fz_stext_line *line; + fz_stext_char *ch; + fz_rect wbbox, blockrect; + while (block) + { + switch (block->type) + { + case FZ_STEXT_BLOCK_STRUCT: + if (block->u.s.down) + { + block_n = _as_words(block->u.s.down->first_block, buff, tp_rect, lines, block_n, delimiters); + } + break; + + case FZ_STEXT_BLOCK_TEXT: + block_n++; + blockrect = block->bbox; + wbbox = fz_empty_rect; + line_n = -1; + for (line = block->u.t.first_line; line; line = line->next) + { + line_n++; + int word_n = 0; // word counter per line + mupdf::fz_clear_buffer(buff); // reset word buffer + int last_char_rtl = 0; // was last character RTL? + for (ch = line->first_char; ch; ch = ch->next) + { + mupdf::FzRect cbbox = JM_char_bbox(line, ch); + if (!JM_rects_overlap(tp_rect, *cbbox.internal()) && !fz_is_infinite_rect(tp_rect)) + { + continue; + } + // prevent Unicode ZWJ 0x200d to start a word + if (mupdf::fz_buffer_storage(buff, NULL) == 0 && ch->c == 0x200d) + { + continue; + } + int word_delimiter = JM_is_word_delimiter(ch->c, delimiters); + int this_char_rtl = JM_is_rtl_char(ch->c); + if (word_delimiter || this_char_rtl != last_char_rtl) + { + if (mupdf::fz_buffer_storage(buff, NULL) == 0 && word_delimiter) + { + continue; // skip delimiters at line start + } + if (!fz_is_empty_rect(wbbox)) + { + word_n = JM_append_word( + lines, + buff.m_internal, + &wbbox, + block_n, + line_n, + word_n + ); + } + mupdf::fz_clear_buffer(buff); + if (word_delimiter) continue; + } + // append one unicode character to the word + JM_append_rune(buff.m_internal, ch->c); + last_char_rtl = this_char_rtl; + // enlarge word bbox + wbbox = fz_union_rect(wbbox, JM_char_bbox(line, ch)); + } + if (mupdf::fz_buffer_storage(buff, NULL) && !fz_is_empty_rect(wbbox)) + { + word_n = JM_append_word( + lines, + buff.m_internal, + &wbbox, + block_n, + line_n, + word_n + ); + } + mupdf::fz_clear_buffer(buff); + } + break; + } + block = block->next; + } + return block_n; +} + + +PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters) +{ + int block_n = -1; + fz_rect tp_rect = this_tpage.m_internal->mediabox; + PyObject *lines = NULL; + mupdf::FzBuffer buff = mupdf::fz_new_buffer(64); + lines = PyList_New(0); + mupdf::FzStextBlock block = this_tpage.m_internal->first_block; + block_n = _as_words(block.m_internal, buff, tp_rect, lines, block_n, delimiters); + return lines; +} + + +struct ScopedPyObject +/* PyObject* wrapper, destructor calls Py_CLEAR() unless `release()` has been +called. */ +{ + ScopedPyObject(PyObject* rhs=nullptr) + : + m_pyobject(rhs) + {} + + PyObject*& get() + { + return m_pyobject; + } + + ScopedPyObject& operator= (PyObject* rhs) + { + Py_CLEAR(m_pyobject); + m_pyobject = rhs; + return *this; + } + + PyObject* release() + { + PyObject* ret = m_pyobject; + m_pyobject = nullptr; + return ret; + } + ~ScopedPyObject() + { + Py_CLEAR(m_pyobject); + } + + PyObject* m_pyobject = nullptr; +}; + +int _as_blocks(fz_stext_block *block, fz_rect tp_rect, PyObject *lines, int block_n) +{ + /* + Recursive function for output by blocks as identified by the + MuPDF SEGMENT logic. + Recursion happens on encountering a structure block. + In addition to the previous support of text and image, we now also support + vector blocks. + */ + PyObject *text = NULL; + fz_rect blockrect; + mupdf::FzBuffer res; + while (block) + { + switch (block->type) + { + case FZ_STEXT_BLOCK_STRUCT: + if (block->u.s.down) + { + block_n = _as_blocks(block->u.s.down->first_block, tp_rect, lines, block_n); + } + break; + + case FZ_STEXT_BLOCK_TEXT: + blockrect = fz_empty_rect; + res = mupdf::fz_new_buffer(1024); + int last_char; + for (fz_stext_line* line = block->u.t.first_line; line; line = line->next) + { + fz_rect linerect = fz_empty_rect; + for (fz_stext_char* ch = line->first_char; ch; ch = ch->next) + { + fz_rect cbbox = JM_char_bbox(line, ch); + if (!JM_rects_overlap(tp_rect, cbbox) && !fz_is_infinite_rect(tp_rect)) + { + continue; + } + JM_append_rune(res.m_internal, ch->c); + last_char = ch->c; + linerect = fz_union_rect(linerect, cbbox); + } + if (last_char != 10 && !fz_is_empty_rect(linerect)) + { + JM_append_rune(res.m_internal, 10); + } + blockrect = fz_union_rect(blockrect, linerect); + } + text = JM_EscapeStrFromBuffer(res); + break; + + case FZ_STEXT_BLOCK_IMAGE: + if (fz_contains_rect(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) + { + blockrect = block->bbox; + fz_image *img = block->u.i.image; + fz_colorspace *cs = img->colorspace; + text = PyUnicode_FromFormat( + "\n", + mupdf::ll_fz_colorspace_name(cs), + img->w, + img->h, + img->bpc + ); + } + break; + + case FZ_STEXT_BLOCK_VECTOR: + if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) + { + blockrect = block->bbox; + int alpha = (int) (block->u.v.argb >> 24); + int color = (int) (block->u.v.argb & 0xffffff); + text = PyUnicode_FromFormat( + "\n\n", + (block->u.v.flags & FZ_STEXT_VECTOR_IS_STROKED) ? "stroked" : "filled", + color, + alpha, + (block->u.v.flags & FZ_STEXT_VECTOR_IS_RECTANGLE) ? "true":"false", + (block->u.v.flags & FZ_STEXT_VECTOR_CONTINUES) ? "true":"false"); + } + break; + } + + if (text) + { + block_n += 1; + PyObject *litem = PyTuple_New(7); + PyTuple_SET_ITEM(litem, 0, Py_BuildValue("f", blockrect.x0)); + PyTuple_SET_ITEM(litem, 1, Py_BuildValue("f", blockrect.y0)); + PyTuple_SET_ITEM(litem, 2, Py_BuildValue("f", blockrect.x1)); + PyTuple_SET_ITEM(litem, 3, Py_BuildValue("f", blockrect.y1)); + PyTuple_SET_ITEM(litem, 4, Py_BuildValue("O", text)); + PyTuple_SET_ITEM(litem, 5, Py_BuildValue("i", block_n)); + PyTuple_SET_ITEM(litem, 6, Py_BuildValue("i", block->type)); + LIST_APPEND(lines, litem); + } + text = NULL; + block = block->next; + } + return block_n; + } + +PyObject* extractBLOCKS(mupdf::FzStextPage& self) +{ + fz_stext_page *this_tpage = self.m_internal; + fz_rect tp_rect = this_tpage->mediabox; + ScopedPyObject lines(PyList_New(0)); + int block_n = -1; + fz_stext_block *block = this_tpage->first_block; + block_n = _as_blocks(block, tp_rect, lines.get(), block_n); + return lines.release(); +} + +#define EMPTY_STRING PyUnicode_FromString("") + +static PyObject *JM_UnicodeFromStr(const char *c) +{ + if (!c) return EMPTY_STRING; + PyObject *val = Py_BuildValue("s", c); + if (!val) { + val = EMPTY_STRING; + PyErr_Clear(); + } + return val; +} + +PyObject* link_uri(mupdf::FzLink& link) +{ + return JM_UnicodeFromStr( link.m_internal->uri); +} + +fz_stext_page* page_get_textpage( + mupdf::FzPage& self, + PyObject* clip, + int flags, + PyObject* matrix + ) +{ + fz_context* ctx = mupdf::internal_context_get(); + fz_stext_page *tpage=NULL; + fz_page *page = self.m_internal; + fz_device *dev = NULL; + fz_stext_options options; + memset(&options, 0, sizeof options); + options.flags = flags; + fz_try(ctx) { + // Default to page's rect if `clip` not specified, for #2048. + fz_rect rect = (clip==Py_None) ? fz_bound_page(ctx, page) : JM_rect_from_py(clip); + fz_matrix ctm = JM_matrix_from_py(matrix); + tpage = fz_new_stext_page(ctx, rect); + dev = fz_new_stext_device(ctx, tpage, &options); + fz_run_page(ctx, page, dev, ctm, NULL); + fz_close_device(ctx, dev); + } + fz_always(ctx) { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) { + mupdf::internal_throw_exception(ctx); + } + return tpage; +} + +// return extension for pymupdf image type +const char *JM_image_extension(int type) +{ + switch (type) { + case(FZ_IMAGE_RAW): return "raw"; + case(FZ_IMAGE_FLATE): return "flate"; + case(FZ_IMAGE_LZW): return "lzw"; + case(FZ_IMAGE_RLD): return "rld"; + case(FZ_IMAGE_BMP): return "bmp"; + case(FZ_IMAGE_GIF): return "gif"; + case(FZ_IMAGE_JBIG2): return "jb2"; + case(FZ_IMAGE_JPEG): return "jpeg"; + case(FZ_IMAGE_JPX): return "jpx"; + case(FZ_IMAGE_JXR): return "jxr"; + case(FZ_IMAGE_PNG): return "png"; + case(FZ_IMAGE_PNM): return "pnm"; + case(FZ_IMAGE_TIFF): return "tiff"; + default: return "n/a"; + } +} + +void JM_make_image_block(fz_stext_block *block, PyObject *block_dict) +{ + fz_context* ctx = mupdf::internal_context_get(); + fz_image *image = block->u.i.image; + fz_buffer *buf = NULL, *freebuf = NULL, *mask_buf = NULL; + fz_compressed_buffer *buffer = fz_compressed_image_buffer(ctx, image); + fz_var(buf); + fz_var(freebuf); + fz_var(mask_buf); + int n = fz_colorspace_n(ctx, image->colorspace); + int w = image->w; + int h = image->h; + const char *ext = ""; + int type = FZ_IMAGE_UNKNOWN; + if (buffer) { + type = buffer->params.type; + ext = JM_image_extension(type); + } + if (type < FZ_IMAGE_BMP || type == FZ_IMAGE_JBIG2) + type = FZ_IMAGE_UNKNOWN; + PyObject *bytes = NULL; + fz_var(bytes); + PyObject *mask_bytes = NULL; + fz_var(mask_bytes); + fz_try(ctx) { + if (!buffer || type == FZ_IMAGE_UNKNOWN) + { + buf = freebuf = fz_new_buffer_from_image_as_png(ctx, image, fz_default_color_params); + ext = "png"; + } + else if (n == 4 && strcmp(ext, "jpeg") == 0) // JPEG CMYK needs another step + { + buf = freebuf = fz_new_buffer_from_image_as_jpeg(ctx, image, fz_default_color_params, 95, 1); + } + else + { + buf = buffer->buffer; + } + bytes = JM_BinFromBuffer(buf); + if (image->mask) { + mask_buf = fz_new_buffer_from_image_as_png(ctx, image->mask, fz_default_color_params); + mask_bytes = JM_BinFromBuffer(mask_buf); + } else { + mask_bytes = Py_BuildValue("s", NULL); + } + } + fz_always(ctx) { + if (!bytes) + bytes = PyBytes_FromString(""); + DICT_SETITEM_DROP(block_dict, dictkey_width, + Py_BuildValue("i", w)); + DICT_SETITEM_DROP(block_dict, dictkey_height, + Py_BuildValue("i", h)); + DICT_SETITEM_DROP(block_dict, dictkey_ext, + Py_BuildValue("s", ext)); + DICT_SETITEM_DROP(block_dict, dictkey_colorspace, + Py_BuildValue("i", n)); + DICT_SETITEM_DROP(block_dict, dictkey_xres, + Py_BuildValue("i", image->xres)); + DICT_SETITEM_DROP(block_dict, dictkey_yres, + Py_BuildValue("i", image->xres)); + DICT_SETITEM_DROP(block_dict, dictkey_bpc, + Py_BuildValue("i", (int) image->bpc)); + DICT_SETITEM_DROP(block_dict, dictkey_matrix, + JM_py_from_matrix(block->u.i.transform)); + DICT_SETITEM_DROP(block_dict, dictkey_size, + Py_BuildValue("n", PyBytes_Size(bytes))); + DICT_SETITEM_DROP(block_dict, dictkey_image, bytes); + DICT_SETITEMSTR_DROP(block_dict, "mask", mask_bytes); + fz_drop_buffer(ctx, mask_buf); + fz_drop_buffer(ctx, freebuf); + } + fz_catch(ctx) + { + fz_ignore_error(ctx); + } + return; +} + + +void JM_make_vector_block(fz_stext_block *block, PyObject *block_dict) +{ + DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox)); + DICT_SETITEMSTR_DROP(block_dict, "stroked", JM_BOOL(block->u.v.flags & FZ_STEXT_VECTOR_IS_STROKED)); + DICT_SETITEMSTR_DROP(block_dict, "isrect", JM_BOOL(block->u.v.flags & FZ_STEXT_VECTOR_IS_RECTANGLE)); + DICT_SETITEMSTR_DROP(block_dict, "continues", JM_BOOL(block->u.v.flags & FZ_STEXT_VECTOR_CONTINUES)); + int color = (int) block->u.v.argb & 0xffffff; // extract color components + int alpha = block->u.v.argb >> 24; // extract alpha value + DICT_SETITEM_DROP(block_dict, dictkey_color, Py_BuildValue("i", color)); + DICT_SETITEMSTR_DROP(block_dict, "alpha", Py_BuildValue("i", alpha)); + return; +} + +void JM_make_grid_block(fz_stext_block *block, PyObject *block_dict) +{ + Py_ssize_t i; + PyObject *pos; + + DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox)); + + DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); + + DICT_SETITEMSTR_DROP(block_dict, "max_uncertain", Py_BuildValue("ii", + block->u.b.xs->max_uncertainty, + block->u.b.ys->max_uncertainty)); + + // x coordinates with uncertainties + pos = PyList_New((size_t) block->u.b.xs->len); + for (i = 0; i < block->u.b.xs->len; i++) + { + PyList_SetItem(pos, i, Py_BuildValue("fi", + block->u.b.xs->list[i].pos, + block->u.b.xs->list[i].uncertainty)); + } + DICT_SETITEMSTR_DROP(block_dict, "xpos", pos); + + // y coordinates with uncertainties + pos = PyList_New((size_t) block->u.b.ys->len); + for (i = 0; i < block->u.b.ys->len; i++) + { + PyList_SetItem(pos, i, Py_BuildValue("fi", + block->u.b.ys->list[i].pos, + block->u.b.ys->list[i].uncertainty)); + } + DICT_SETITEMSTR_DROP(block_dict, "ypos", pos); + + return; +} + + +void make_table_dict(fz_stext_page *tp, PyObject *table_dict, PyObject *bbox) +{ + fz_rect bounds = JM_rect_from_py(bbox); + fz_stext_block *block; + + try + { + block = mupdf::ll_fz_find_table_within_bounds(tp, bounds); + } + catch (std::exception&) + { + /* Ignore failure to find a table structure. */ + return; + } + + // Check if a table structure was found + if (block && block->type == FZ_STEXT_BLOCK_GRID) + { + JM_make_grid_block(block, table_dict); + } + +} + + +static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect) +{ + fz_stext_line *line; + PyObject *line_list = PyList_New(0), *line_dict; + fz_rect block_rect = fz_empty_rect; + for (line = block->u.t.first_line; line; line = line->next) { + if (fz_is_empty_rect(fz_intersect_rect(tp_rect, line->bbox)) && + !fz_is_infinite_rect(tp_rect)) { + continue; + } + line_dict = PyDict_New(); + mupdf::FzStextLine line2(line); + mupdf::FzBuffer buff2( mupdf::ll_fz_keep_buffer( buff)); + mupdf::FzRect tp_rect2( tp_rect); + mupdf::FzRect line_rect2 = JM_make_spanlist( + line_dict, + line2, + raw, + buff2, + tp_rect2 + ); + fz_rect& line_rect = *line_rect2.internal(); + block_rect = fz_union_rect(block_rect, line_rect); + DICT_SETITEM_DROP(line_dict, dictkey_wmode, + Py_BuildValue("i", line->wmode)); + DICT_SETITEM_DROP(line_dict, dictkey_dir, JM_py_from_point(line->dir)); + DICT_SETITEM_DROP(line_dict, dictkey_bbox, + JM_py_from_rect(line_rect)); + LIST_APPEND_DROP(line_list, line_dict); + } + DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block_rect)); + DICT_SETITEM_DROP(block_dict, dictkey_lines, line_list); + return; +} + + +void JM_make_struct_block(fz_stext_block *block, PyObject *block_dict) +{ + DICT_SETITEMSTR_DROP(block_dict, "index", Py_BuildValue("i",block->u.s.index)); + if (block->u.s.down) + { + DICT_SETITEMSTR_DROP(block_dict, "raw", Py_BuildValue("s",block->u.s.down->raw)); + DICT_SETITEMSTR_DROP(block_dict, "std", Py_BuildValue("s",fz_structure_to_string(block->u.s.down->standard))); + } + +} + + +int _as_dict(PyObject *block_list, fz_stext_block *block, fz_buffer *text_buffer, int raw, fz_rect tp_rect, int block_n) +{ + /* + Recursive function for output by blocks as identified by the + MuPDF SEGMENT logic. + */ + PyObject *block_dict; + while (block) + { + switch (block->type) + { + case FZ_STEXT_BLOCK_STRUCT: + if (block->u.s.down && block->u.s.down->first_block) + { + block_n++; + block_dict = PyDict_New(); + DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); + DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); + DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox)); + JM_make_struct_block(block, block_dict); + PyObject *subblocks = PyList_New(0); + block_n = _as_dict(subblocks, block->u.s.down->first_block, text_buffer, raw, tp_rect, block_n); + DICT_SETITEM_DROP(block_dict, dictkey_blocks, subblocks); + LIST_APPEND_DROP(block_list, block_dict); + } + break; + + case FZ_STEXT_BLOCK_TEXT: + if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) + { + block_dict = PyDict_New(); + block_n++; + DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); + DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); + DICT_SETITEMSTR_DROP(block_dict, "flags", Py_BuildValue("i", block->u.t.flags)); + JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect); + LIST_APPEND_DROP(block_list, block_dict); + } + break; + + case FZ_STEXT_BLOCK_IMAGE: + if (fz_contains_rect(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) + { + block_dict = PyDict_New(); + block_n++; + DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); + DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); + DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox)); + JM_make_image_block(block, block_dict); + LIST_APPEND_DROP(block_list, block_dict); + } + break; + + case FZ_STEXT_BLOCK_VECTOR: + if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) + { + block_dict = PyDict_New(); + block_n++; + DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); + DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); + JM_make_vector_block(block, block_dict); + LIST_APPEND_DROP(block_list, block_dict); + } + break; + + case FZ_STEXT_BLOCK_GRID: + if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) + { + block_dict = PyDict_New(); + block_n++; + DICT_SETITEM_DROP(block_dict, dictkey_type, Py_BuildValue("i", block->type)); + DICT_SETITEM_DROP(block_dict, dictkey_number, Py_BuildValue("i", block_n)); + JM_make_grid_block(block, block_dict); + LIST_APPEND_DROP(block_list, block_dict); + } + break; + } + block = block->next; + } + return block_n; +} + +void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw) +{ + fz_context* ctx = mupdf::internal_context_get(); + fz_stext_block *block; + fz_buffer *text_buffer = fz_new_buffer(ctx, 128); + PyObject *block_list = PyList_New(0); + fz_rect tp_rect = tp->mediabox; + block = tp->first_block; + int block_n = -1; + block_n = _as_dict(block_list, block, text_buffer, raw, tp_rect, block_n); + DICT_SETITEM_DROP(page_dict, dictkey_blocks, block_list); + fz_drop_buffer(ctx, text_buffer); +} + +//----------------------------------------------------------------- +// get one pixel as a list +//----------------------------------------------------------------- +PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y) +{ + fz_context* ctx = mupdf::internal_context_get(); + PyObject *p = NULL; + if (0 + || x < 0 + || x >= pm->w + || y < 0 + || y >= pm->h + ) + { + throw std::range_error( MSG_PIXEL_OUTSIDE); + } + int n = pm->n; + int stride = fz_pixmap_stride(ctx, pm); + int i = stride * y + n * x; + p = PyTuple_New(n); + for (int j = 0; j < n; j++) + { + PyTuple_SET_ITEM(p, j, Py_BuildValue("i", pm->samples[i + j])); + } + return p; +} + +int pixmap_n(mupdf::FzPixmap& pixmap) +{ + return mupdf::fz_pixmap_components( pixmap); +} + +static int +JM_INT_ITEM(PyObject *obj, Py_ssize_t idx, int *result) +{ + PyObject *temp = PySequence_ITEM(obj, idx); + if (!temp) return 1; + if (PyLong_Check(temp)) { + *result = (int) PyLong_AsLong(temp); + Py_DECREF(temp); + } else if (PyFloat_Check(temp)) { + *result = (int) PyFloat_AsDouble(temp); + Py_DECREF(temp); + } else { + Py_DECREF(temp); + return 1; + } + if (PyErr_Occurred()) { + PyErr_Clear(); + return 1; + } + return 0; +} + +PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color) +{ + fz_context* ctx = mupdf::internal_context_get(); + if (0 + || x < 0 + || x >= pm->w + || y < 0 + || y >= pm->h + ) + { + throw std::range_error( MSG_PIXEL_OUTSIDE); + } + int n = pm->n; + if (!PySequence_Check(color) || PySequence_Size(color) != n) { + throw std::range_error(MSG_BAD_COLOR_SEQ); + } + int i, j; + unsigned char c[5]; + for (j = 0; j < n; j++) { + if (JM_INT_ITEM(color, j, &i) == 1) { + throw std::range_error(MSG_BAD_COLOR_SEQ); + } + if (i < 0 or i >= 256) { + throw std::range_error(MSG_BAD_COLOR_SEQ); + } + c[j] = (unsigned char) i; + } + int stride = fz_pixmap_stride(ctx, pm); + i = stride * y + n * x; + for (j = 0; j < n; j++) { + pm->samples[i + j] = c[j]; + } + Py_RETURN_NONE; +} +//------------------------------------------- +// make a buffer from an stext_page's text +//------------------------------------------- +fz_buffer * +JM_new_buffer_from_stext_page(fz_stext_page *page) +{ + fz_context* ctx = mupdf::internal_context_get(); + fz_stext_block *block; + fz_stext_line *line; + fz_stext_char *ch; + fz_rect rect = page->mediabox; + fz_buffer *buf = NULL; + + fz_try(ctx) + { + buf = fz_new_buffer(ctx, 256); + for (block = page->first_block; block; block = block->next) { + if (block->type == FZ_STEXT_BLOCK_TEXT) { + for (line = block->u.t.first_line; line; line = line->next) { + for (ch = line->first_char; ch; ch = ch->next) { + if (!JM_rects_overlap(rect, JM_char_bbox(line, ch)) && + !fz_is_infinite_rect(rect)) { + continue; + } + fz_append_rune(ctx, buf, ch->c); + } + fz_append_byte(ctx, buf, '\n'); + } + fz_append_byte(ctx, buf, '\n'); + } + } + } + fz_catch(ctx) { + fz_drop_buffer(ctx, buf); + mupdf::internal_throw_exception(ctx); + } + return buf; +} + +static inline int canon(int c) +{ + /* TODO: proper unicode case folding */ + /* TODO: character equivalence (a matches ä, etc) */ + if (c == 0xA0 || c == 0x2028 || c == 0x2029) + return ' '; + if (c == '\r' || c == '\n' || c == '\t') + return ' '; + if (c >= 'A' && c <= 'Z') + return c - 'A' + 'a'; + return c; +} + +static inline int chartocanon(int *c, const char *s) +{ + int n = fz_chartorune(c, s); + *c = canon(*c); + return n; +} + +static const char *match_string(const char *h, const char *n) +{ + int hc, nc; + const char *e = h; + h += chartocanon(&hc, h); + n += chartocanon(&nc, n); + while (hc == nc) + { + e = h; + if (hc == ' ') + do + h += chartocanon(&hc, h); + while (hc == ' '); + else + h += chartocanon(&hc, h); + if (nc == ' ') + do + n += chartocanon(&nc, n); + while (nc == ' '); + else + n += chartocanon(&nc, n); + } + return nc == 0 ? e : NULL; +} + + +static const char *find_string(const char *s, const char *needle, const char **endp) +{ + const char *end; + while (*s) + { + end = match_string(s, needle); + if (end) + { + *endp = end; + return s; + } + ++s; + } + *endp = NULL; + return NULL; +} + +struct highlight +{ + Py_ssize_t len; + PyObject *quads; + float hfuzz, vfuzz; +}; + + +static int +JM_FLOAT_ITEM(PyObject *obj, Py_ssize_t idx, double *result) +{ + PyObject *temp = PySequence_ITEM(obj, idx); + if (!temp) return 1; + *result = PyFloat_AsDouble(temp); + Py_DECREF(temp); + if (PyErr_Occurred()) { + PyErr_Clear(); + return 1; + } + return 0; +} + + +//----------------------------------------------------------------------------- +// fz_quad from PySequence. Four floats are treated as rect. +// Else must be four pairs of floats. +//----------------------------------------------------------------------------- +static fz_quad +JM_quad_from_py(PyObject *r) +{ + fz_quad q = fz_make_quad(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, + FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, + FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, + FZ_MAX_INF_RECT, FZ_MAX_INF_RECT); + fz_point p[4]; + double test, x, y; + Py_ssize_t i; + PyObject *obj = NULL; + + if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4) + return q; + + if (JM_FLOAT_ITEM(r, 0, &test) == 0) + return fz_quad_from_rect(JM_rect_from_py(r)); + + for (i = 0; i < 4; i++) { + obj = PySequence_ITEM(r, i); // next point item + if (!obj || !PySequence_Check(obj) || PySequence_Size(obj) != 2) + goto exit_result; // invalid: cancel the rest + + if (JM_FLOAT_ITEM(obj, 0, &x) == 1) goto exit_result; + if (JM_FLOAT_ITEM(obj, 1, &y) == 1) goto exit_result; + if (x < FZ_MIN_INF_RECT) x = FZ_MIN_INF_RECT; + if (y < FZ_MIN_INF_RECT) y = FZ_MIN_INF_RECT; + if (x > FZ_MAX_INF_RECT) x = FZ_MAX_INF_RECT; + if (y > FZ_MAX_INF_RECT) y = FZ_MAX_INF_RECT; + p[i] = fz_make_point((float) x, (float) y); + + Py_CLEAR(obj); + } + q.ul = p[0]; + q.ur = p[1]; + q.ll = p[2]; + q.lr = p[3]; + return q; + + exit_result:; + Py_CLEAR(obj); + return q; +} + +static float hdist(fz_point *dir, fz_point *a, fz_point *b) +{ + float dx = b->x - a->x; + float dy = b->y - a->y; + return fz_abs(dx * dir->x + dy * dir->y); +} + +static float vdist(fz_point *dir, fz_point *a, fz_point *b) +{ + float dx = b->x - a->x; + float dy = b->y - a->y; + return fz_abs(dx * dir->y + dy * dir->x); +} + +static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) +{ + struct highlight* hits = (struct highlight*) arg; + float vfuzz = ch->size * hits->vfuzz; + float hfuzz = ch->size * hits->hfuzz; + fz_quad ch_quad = JM_char_quad(line, ch); + if (hits->len > 0) { + PyObject *quad = PySequence_ITEM(hits->quads, hits->len - 1); + fz_quad end = JM_quad_from_py(quad); + Py_DECREF(quad); + if (hdist(&line->dir, &end.lr, &ch_quad.ll) < hfuzz + && vdist(&line->dir, &end.lr, &ch_quad.ll) < vfuzz + && hdist(&line->dir, &end.ur, &ch_quad.ul) < hfuzz + && vdist(&line->dir, &end.ur, &ch_quad.ul) < vfuzz) + { + end.ur = ch_quad.ur; + end.lr = ch_quad.lr; + quad = JM_py_from_quad(end); + PyList_SetItem(hits->quads, hits->len - 1, quad); + return; + } + } + LIST_APPEND_DROP(hits->quads, JM_py_from_quad(ch_quad)); + hits->len++; +} + + +PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle) +{ + fz_context* ctx = mupdf::internal_context_get(); + struct highlight hits; + fz_stext_block *block; + fz_stext_line *line; + fz_stext_char *ch; + fz_buffer *buffer = NULL; + const char *haystack, *begin, *end; + fz_rect rect = page->mediabox; + int c, inside; + + if (strlen(needle) == 0) Py_RETURN_NONE; + PyObject *quads = PyList_New(0); + hits.len = 0; + hits.quads = quads; + hits.hfuzz = 0.2f; /* merge kerns but not large gaps */ + hits.vfuzz = 0.1f; + + fz_try(ctx) { + buffer = JM_new_buffer_from_stext_page( page); + haystack = fz_string_from_buffer(ctx, buffer); + begin = find_string(haystack, needle, &end); + if (!begin) goto no_more_matches; + + inside = 0; + for (block = page->first_block; block; block = block->next) { + if (block->type != FZ_STEXT_BLOCK_TEXT) { + continue; + } + for (line = block->u.t.first_line; line; line = line->next) { + for (ch = line->first_char; ch; ch = ch->next) { + if (!fz_is_infinite_rect(rect) && + !JM_rects_overlap(rect, JM_char_bbox(line, ch))) { + goto next_char; + } +try_new_match: + if (!inside) { + if (haystack >= begin) inside = 1; + } + if (inside) { + if (haystack < end) { + on_highlight_char(ctx, &hits, line, ch); + } else { + inside = 0; + begin = find_string(haystack, needle, &end); + if (!begin) goto no_more_matches; + else goto try_new_match; + } + } + haystack += fz_chartorune(&c, haystack); +next_char:; + } + assert(*haystack == '\n'); + ++haystack; + } + assert(*haystack == '\n'); + ++haystack; + } +no_more_matches:; + } + fz_always(ctx) + fz_drop_buffer(ctx, buffer); + fz_catch(ctx) + mupdf::internal_throw_exception(ctx); + + return quads; +} + +void pixmap_copy( fz_pixmap* pm, const fz_pixmap* src, int n) +{ + assert(pm->w == src->w); + assert(pm->h == src->h); + assert(n <= pm->n); + assert(n <= src->n); + + if (pm->n == src->n) + { + // identical samples + assert(pm->stride == src->stride); + memcpy(pm->samples, src->samples, pm->w * pm->h * pm->n); + } + else + { + int nn; + int do_alpha; + if (pm->n > src->n) + { + assert(pm->n == src->n + 1); + nn = src->n; + assert(!src->alpha); + assert(pm->alpha); + do_alpha = 1; + } + else + { + assert(src->n == pm->n + 1); + nn = pm->n; + assert(src->alpha); + assert(!pm->alpha); + do_alpha = 0; + } + for (int y=0; yh; ++y) + { + for (int x=0; xw; ++x) + { + memcpy( + pm->samples + pm->stride * y + pm->n * x, + src->samples + src->stride * y + src->n * x, + nn + ); + if (do_alpha) + { + pm->samples[pm->stride * y + pm->n * x + pm->n-1] = 255; + } + } + } + } +} + + +PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip) +{ + fz_context* ctx = mupdf::internal_context_get(); + PyObject* rc = PyDict_New(); + fz_irect irect = fz_pixmap_bbox(ctx, pm); + irect = fz_intersect_irect(irect, fz_round_rect(JM_rect_from_py(clip))); + if (fz_is_empty_irect(irect)) + { + return rc; + } + size_t stride = pm->stride; + size_t width = irect.x1 - irect.x0; + size_t height = irect.y1 - irect.y0; + size_t n = (size_t) pm->n; + size_t substride = width * n; + unsigned char* s = pm->samples + stride * (irect.y0 - pm->y) + n * (irect.x0 - pm->x); + // Cache previous pixel. + char oldpix[10]; + assert(n <= sizeof(oldpix)); + memcpy(oldpix, s, n); + long cnt = 0; + for (size_t i = 0; i < height; i++) + { + for (size_t j = 0; j < substride; j += n) + { + const char* newpix = (const char*) s + j; + if (memcmp(oldpix, newpix, n)) + { + /* Pixel differs from previous pixel, so update results with + last run of pixels. We get a PyObject representation of pixel + so we can look up in Python dict . */ + PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n); + PyObject* c = PyDict_GetItem(rc, pixel); + if (c) cnt += PyLong_AsLong(c); + DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt)); + Py_DECREF(pixel); + /* Start next run of identical pixels. */ + cnt = 1; + memcpy(oldpix, newpix, n); + } + else + { + cnt += 1; + } + } + s += stride; + } + /* Update results with last pixel. */ + PyObject* pixel = PyBytes_FromStringAndSize(&oldpix[0], n); + PyObject* c = PyDict_GetItem(rc, pixel); + if (c) cnt += PyLong_AsLong(c); + DICT_SETITEM_DROP(rc, pixel, PyLong_FromLong(cnt)); + Py_DECREF(pixel); + PyErr_Clear(); + return rc; +} + +%} + +/* Declarations for functions defined above. */ + +void page_merge( + mupdf::PdfDocument& doc_des, + mupdf::PdfDocument& doc_src, + int page_from, + int page_to, + int rotate, + int links, + int copy_annots, + mupdf::PdfGraftMap& graft_map + ); + +void JM_merge_range( + mupdf::PdfDocument& doc_des, + mupdf::PdfDocument& doc_src, + int spage, + int epage, + int apage, + int rotate, + int links, + int annots, + int show_progress, + mupdf::PdfGraftMap& graft_map + ); + +void FzDocument_insert_pdf( + mupdf::FzDocument& doc, + mupdf::FzDocument& src, + int from_page, + int to_page, + int start_at, + int rotate, + int links, + int annots, + int show_progress, + int final, + mupdf::PdfGraftMap& graft_map + ); + +int page_xref(mupdf::FzDocument& this_doc, int pno); +void _newPage(mupdf::FzDocument& self, int pno=-1, float width=595, float height=842); +void _newPage(mupdf::PdfDocument& self, int pno=-1, float width=595, float height=842); +void JM_add_annot_id(mupdf::PdfAnnot& annot, const char* stem); +void JM_set_annot_callout_line(mupdf::PdfAnnot& annot, PyObject *callout, int count); +std::vector< std::string> JM_get_annot_id_list(mupdf::PdfPage& page); +mupdf::PdfAnnot _add_caret_annot(mupdf::PdfPage& self, mupdf::FzPoint& point); +mupdf::PdfAnnot _add_caret_annot(mupdf::FzPage& self, mupdf::FzPoint& point); +const char* Tools_parse_da(mupdf::PdfAnnot& this_annot); +PyObject* Annot_getAP(mupdf::PdfAnnot& annot); +void Tools_update_da(mupdf::PdfAnnot& this_annot, const char* da_str); +mupdf::FzPoint JM_point_from_py(PyObject* p); +mupdf::FzRect Annot_rect(mupdf::PdfAnnot& annot); +PyObject* util_transform_rect(PyObject* rect, PyObject* matrix); +PyObject* Annot_rect3(mupdf::PdfAnnot& annot); +mupdf::FzMatrix Page_derotate_matrix(mupdf::PdfPage& pdfpage); +mupdf::FzMatrix Page_derotate_matrix(mupdf::FzPage& pdfpage); +PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj); +PyObject* xref_object(mupdf::PdfDocument& pdf, int xref, int compressed=0, int ascii=0); +PyObject* xref_object(mupdf::FzDocument& document, int xref, int compressed=0, int ascii=0); + +PyObject* Link_is_external(mupdf::FzLink& this_link); +PyObject* Page_addAnnot_FromString(mupdf::PdfPage& page, PyObject* linklist); +PyObject* Page_addAnnot_FromString(mupdf::FzPage& page, PyObject* linklist); +mupdf::FzLink Link_next(mupdf::FzLink& this_link); + +static int page_count_fz2(void* document); +int page_count_fz(mupdf::FzDocument& document); +int page_count_pdf(mupdf::PdfDocument& pdf); +int page_count(mupdf::FzDocument& document); +int page_count(mupdf::PdfDocument& pdf); + +PyObject* page_annot_xrefs(mupdf::PdfDocument& pdf, int pno); +PyObject* page_annot_xrefs(mupdf::FzDocument& document, int pno); +bool Outline_is_external(mupdf::FzOutline* outline); +void Document_extend_toc_items(mupdf::PdfDocument& pdf, PyObject* items); +void Document_extend_toc_items(mupdf::FzDocument& document, PyObject* items); + +int ll_fz_absi(int i); + +mupdf::FzDevice JM_new_texttrace_device(PyObject* out); + +fz_rect JM_char_bbox(const mupdf::FzStextLine& line, const mupdf::FzStextChar& ch); + +static fz_quad JM_char_quad( fz_stext_line *line, fz_stext_char *ch); +void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page); + +void set_skip_quad_corrections(int on); +void set_subset_fontnames(int on); +void set_small_glyph_heights(int on); + +mupdf::FzRect JM_cropbox(mupdf::PdfObj& page_obj); +PyObject* get_cdrawings(mupdf::FzPage& page, PyObject *extended=NULL, PyObject *callback=NULL, PyObject *method=NULL); + +mupdf::FzRect JM_make_spanlist( + PyObject *line_dict, + mupdf::FzStextLine& line, + int raw, + mupdf::FzBuffer& buff, + mupdf::FzRect& tp_rect + ); + +PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters); +PyObject* extractBLOCKS(mupdf::FzStextPage& self); + +PyObject* link_uri(mupdf::FzLink& link); + +fz_stext_page* page_get_textpage( + mupdf::FzPage& self, + PyObject* clip, + int flags, + PyObject* matrix + ); + +void make_table_dict(fz_stext_page *tp, PyObject *table_dict, PyObject *bbox); +void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw); +PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y); +int pixmap_n(mupdf::FzPixmap& pixmap); + +PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle); + +PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color); + +/* Copies from to , which must have same width and height. pm->n - +src->n must be -1, 0 or +1. If -1, must have alpha and must not have +alpha, and we copy the non-alpha bytes. If +1 must not have alpha and + must have alpha and we set 's alpha bytes all to 255.*/ +void pixmap_copy(fz_pixmap* pm, const fz_pixmap* src, int n); + +PyObject* ll_JM_color_count(fz_pixmap *pm, PyObject *clip); diff --git a/src/fitz___init__.py b/src/fitz___init__.py new file mode 100644 index 000000000..c4c7bd88c --- /dev/null +++ b/src/fitz___init__.py @@ -0,0 +1,13 @@ +# pylint: disable=wildcard-import,unused-import,unused-wildcard-import +from pymupdf import * +from pymupdf import _as_fz_document +from pymupdf import _as_fz_page +from pymupdf import _as_pdf_document +from pymupdf import _as_pdf_page +from pymupdf import _log_items +from pymupdf import _log_items_active +from pymupdf import _log_items_clear +from pymupdf import __version__ +from pymupdf import __doc__ +from pymupdf import _globals +from pymupdf import _g_out_message diff --git a/src/fitz_table.py b/src/fitz_table.py new file mode 100644 index 000000000..d932fbe57 --- /dev/null +++ b/src/fitz_table.py @@ -0,0 +1,2 @@ +# pylint: disable=wildcard-import,unused-wildcard-import +from pymupdf.table import * diff --git a/src/fitz_utils.py b/src/fitz_utils.py new file mode 100644 index 000000000..4e0f7f514 --- /dev/null +++ b/src/fitz_utils.py @@ -0,0 +1,2 @@ +# pylint: disable=wildcard-import,unused-wildcard-import +from pymupdf.utils import * diff --git a/src/pymupdf.py b/src/pymupdf.py new file mode 100644 index 000000000..82205555f --- /dev/null +++ b/src/pymupdf.py @@ -0,0 +1,2 @@ +# pylint: disable=wildcard-import,unused-import +from . import * diff --git a/src/table.py b/src/table.py new file mode 100644 index 000000000..9b73782d3 --- /dev/null +++ b/src/table.py @@ -0,0 +1,2697 @@ +""" +Copyright (C) 2023 Artifex Software, Inc. + +This file is part of PyMuPDF. + +PyMuPDF is free software: you can redistribute it and/or modify it under the +terms of the GNU Affero General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. + +PyMuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +details. + +You should have received a copy of the GNU Affero General Public License +along with MuPDF. If not, see + +Alternative licensing terms are available from the licensor. +For commercial licensing, see or contact +Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +CA 94129, USA, for further information. + +--------------------------------------------------------------------- +Portions of this code have been ported from pdfplumber, see +https://pypi.org/project/pdfplumber/. + +The ported code is under the following MIT license: + +--------------------------------------------------------------------- +The MIT License (MIT) + +Copyright (c) 2015, Jeremy Singer-Vine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +--------------------------------------------------------------------- +Also see here: https://github.com/jsvine/pdfplumber/blob/stable/LICENSE.txt +--------------------------------------------------------------------- + +The porting mainly pertains to files "table.py" and relevant parts of +"utils/text.py" within pdfplumber's repository on Github. +With respect to "text.py", we have removed functions or features that are not +used by table processing. Examples are: + +* the text search function +* simple text extraction +* text extraction by lines + +Original pdfplumber code does neither detect, nor identify table headers. +This PyMuPDF port adds respective code to the 'Table' class as method '_get_header'. +This is implemented as new class TableHeader with the properties: +* bbox: A tuple for the header's bbox +* cells: A tuple for each bbox of a column header +* names: A list of strings with column header text +* external: A bool indicating whether the header is outside the table cells. + +""" + +import inspect +import itertools +import string +import html +from collections.abc import Sequence +from dataclasses import dataclass +from operator import itemgetter +import weakref +import pymupdf +from pymupdf import mupdf + +# ------------------------------------------------------------------- +# Start of PyMuPDF interface code +# ------------------------------------------------------------------- + +EDGES = [] # vector graphics from PyMuPDF +CHARS = [] # text characters from PyMuPDF +TEXTPAGE = None +TEXT_BOLD = mupdf.FZ_STEXT_BOLD +TEXT_STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT +FLAGS = ( + 0 + | pymupdf.TEXTFLAGS_TEXT + | pymupdf.TEXT_COLLECT_STYLES + | pymupdf.TEXT_ACCURATE_BBOXES + | pymupdf.TEXT_MEDIABOX_CLIP +) +# needed by mupdf function fz_find_table_within_bounds(). +TABLE_DETECTOR_FLAGS = ( + 0 + | pymupdf.TEXT_ACCURATE_BBOXES + | pymupdf.TEXT_SEGMENT + | pymupdf.TEXT_COLLECT_VECTORS + | pymupdf.TEXT_MEDIABOX_CLIP +) +white_spaces = set(string.whitespace) # for checking white space only cells + + +def _iou(r1, r2): + """Compute intersection over union of two rectangles.""" + ix = max(0, min(r1[2], r2[2]) - max(r1[0], r2[0])) + iy = max(0, min(r1[3], r2[3]) - max(r1[1], r2[1])) + intersection = ix * iy # intersection area + if not intersection: + return 0 + area1 = (r1[2] - r1[0]) * (r1[3] - r1[1]) + area2 = (r2[2] - r2[0]) * (r2[3] - r2[1]) + return intersection / (area1 + area2 - intersection) + + +def intersects_words_h(bbox, y, word_rects) -> bool: + """Check whether any of the words in bbox are cut through by + horizontal line y. + """ + return any(r.y0 < y < r.y1 for r in word_rects if r in bbox) + + +def get_table_dict_from_rect(textpage, rect): + """Extract MuPDF table structure information from a given rectangle.""" + table_dict = {} + pymupdf.extra.make_table_dict(textpage.this.m_internal, table_dict, rect) + return table_dict + + +def make_table_from_bbox(textpage, word_rects, rect): + """Detect table structure within a given rectangle.""" + cells = [] # table cells as (x0,y0,x1,y1) tuples + + # calls fz_find_table_within_bounds + block = get_table_dict_from_rect(textpage, rect) + # No table structure found if not a grid block + if block.get("type") != mupdf.FZ_STEXT_BLOCK_GRID: + return cells + bbox = pymupdf.Rect(block["bbox"]) # resulting table bbox + + # lists of (pos,uncertainty) tuples + xpos = sorted(block["xpos"], key=lambda x: x[0]) + ypos = sorted(block["ypos"], key=lambda y: y[0]) + + # maximum uncertainties in x and y directions + xmaxu, ymaxu = block["max_uncertain"] + + # Modify ypos to remove uncertain positions, and y positions + # that cut through words. + nypos = [] + for y, yunc in ypos: + if yunc > 0: # allow no uncertain y values + continue + if intersects_words_h(bbox, y, word_rects): + continue # allow no y that cuts through words + if nypos and (y - nypos[-1] < 3): + nypos[-1] = y # snap close positions + else: + nypos.append(y) + + # New max y uncertainty: 35% of remaining y positions. + # Omit x positions that intersect too many words, otherwise + # only remove x for the affected cells. + ymaxu = max(0, round((len(nypos) - 2) * 0.35)) + + # Exclude x positions with too high uncertainty + # (we allow more uncertainty in x direction) + nxpos = [x[0] for x in xpos if x[1] <= ymaxu] + if bbox.x1 > nxpos[-1] + 3: + nxpos.append(bbox.x1) # ensure right table border + + # Compose cells from the remaining x and y positions. + for i in range(len(nypos) - 1): + row_box = pymupdf.Rect(bbox.x0, nypos[i], bbox.x1, nypos[i + 1]) + # Sub-select words in this row and sort them by left coordinate + row_words = sorted([r for r in word_rects if r in row_box], key=lambda r: r.x0) + # Sub-select x values that do not cut through words + this_xpos = [x for x in nxpos if not any(r.x0 < x < r.x1 for r in row_words)] + for j in range(len(this_xpos) - 1): + cell = pymupdf.Rect(this_xpos[j], nypos[i], this_xpos[j + 1], nypos[i + 1]) + if not cell.is_empty: # valid cell + cells.append(tuple(cell)) + # Add new table to TableFinder tables + return cells + + +def extract_cells(textpage, cell, markdown=False): + """Extract text from a rect-like 'cell' as plain or MD styled text. + + This function should ultimately be used to extract text from a table cell. + Markdown output will only work correctly if extraction flag bit + TEXT_COLLECT_STYLES is set. + + Args: + textpage: A PyMuPDF TextPage object. Must have been created with + TEXTFLAGS_TEXT | TEXT_COLLECT_STYLES. + cell: A tuple (x0, y0, x1, y1) defining the cell's bbox. + markdown: If True, return text formatted for Markdown. + + Returns: + A string with the text extracted from the cell. + """ + text = "" + for block in textpage.extractRAWDICT()["blocks"]: + if block["type"] != 0: + continue + block_bbox = block["bbox"] + if ( + 0 + or block_bbox[0] > cell[2] + or block_bbox[2] < cell[0] + or block_bbox[1] > cell[3] + or block_bbox[3] < cell[1] + ): + continue # skip block outside cell + for line in block["lines"]: + lbbox = line["bbox"] + if ( + 0 + or lbbox[0] > cell[2] + or lbbox[2] < cell[0] + or lbbox[1] > cell[3] + or lbbox[3] < cell[1] + ): + continue # skip line outside cell + + if text: # must be a new line in the cell + text += "
" if markdown else "\n" + + # strikeout detection only works with horizontal text + horizontal = line["dir"] == (0, 1) or line["dir"] == (1, 0) + + for span in line["spans"]: + sbbox = span["bbox"] + if ( + 0 + or sbbox[0] > cell[2] + or sbbox[2] < cell[0] + or sbbox[1] > cell[3] + or sbbox[3] < cell[1] + ): + continue # skip spans outside cell + + # only include chars with more than 50% bbox overlap + span_text = "" + for char in span["chars"]: + this_char = char["c"] + bbox = pymupdf.Rect(char["bbox"]) + if abs(bbox & cell) > 0.5 * abs(bbox): + span_text += this_char + elif this_char in white_spaces: + span_text += " " + + if not span_text: + continue # skip empty span + + if not markdown: # no MD styling + text += span_text + continue + + prefix = "" + suffix = "" + if horizontal and span["char_flags"] & TEXT_STRIKEOUT: + prefix += "~~" + suffix = "~~" + suffix + if span["char_flags"] & TEXT_BOLD: + prefix += "**" + suffix = "**" + suffix + if span["flags"] & pymupdf.TEXT_FONT_ITALIC: + prefix += "_" + suffix = "_" + suffix + if span["flags"] & pymupdf.TEXT_FONT_MONOSPACED: + prefix += "`" + suffix = "`" + suffix + + if len(span["chars"]) > 2: + span_text = span_text.rstrip() + + # if span continues previous styling: extend cell text + if (ls := len(suffix)) and text.endswith(suffix): + text = text[:-ls] + span_text + suffix + else: # append the span with new styling + if not span_text.strip(): + text += " " + else: + text += prefix + span_text + suffix + + return text.strip() + + +# ------------------------------------------------------------------- +# End of PyMuPDF interface code +# ------------------------------------------------------------------- + + +class UnsetFloat(float): + pass + + +NON_NEGATIVE_SETTINGS = [ + "snap_tolerance", + "snap_x_tolerance", + "snap_y_tolerance", + "join_tolerance", + "join_x_tolerance", + "join_y_tolerance", + "edge_min_length", + "min_words_vertical", + "min_words_horizontal", + "intersection_tolerance", + "intersection_x_tolerance", + "intersection_y_tolerance", +] + + +TABLE_STRATEGIES = ["lines", "lines_strict", "text", "explicit"] +UNSET = UnsetFloat(0) +DEFAULT_SNAP_TOLERANCE = 3 +DEFAULT_JOIN_TOLERANCE = 3 +DEFAULT_MIN_WORDS_VERTICAL = 3 +DEFAULT_MIN_WORDS_HORIZONTAL = 1 +DEFAULT_X_TOLERANCE = 3 +DEFAULT_Y_TOLERANCE = 3 +DEFAULT_X_DENSITY = 7.25 +DEFAULT_Y_DENSITY = 13 +bbox_getter = itemgetter("x0", "top", "x1", "bottom") + + +LIGATURES = { + "ff": "ff", + "ffi": "ffi", + "ffl": "ffl", + "fi": "fi", + "fl": "fl", + "st": "st", + "ſt": "st", +} + + +def to_list(collection) -> list: + if isinstance(collection, list): + return collection + elif isinstance(collection, Sequence): + return list(collection) + elif hasattr(collection, "to_dict"): + res = collection.to_dict("records") # pragma: nocover + return res + else: + return list(collection) + + +class TextMap: + """ + A TextMap maps each unicode character in the text to an individual `char` + object (or, in the case of layout-implied whitespace, `None`). + """ + + def __init__(self, tuples=None) -> None: + self.tuples = tuples + self.as_string = "".join(map(itemgetter(0), tuples)) + + def match_to_dict( + self, + m, + main_group: int = 0, + return_groups: bool = True, + return_chars: bool = True, + ) -> dict: + subset = self.tuples[m.start(main_group) : m.end(main_group)] + chars = [c for (text, c) in subset if c is not None] + x0, top, x1, bottom = objects_to_bbox(chars) + + result = { + "text": m.group(main_group), + "x0": x0, + "top": top, + "x1": x1, + "bottom": bottom, + } + + if return_groups: + result["groups"] = m.groups() + + if return_chars: + result["chars"] = chars + + return result + + +class WordMap: + """ + A WordMap maps words->chars. + """ + + def __init__(self, tuples) -> None: + self.tuples = tuples + + def to_textmap( + self, + layout: bool = False, + layout_width=0, + layout_height=0, + layout_width_chars: int = 0, + layout_height_chars: int = 0, + x_density=DEFAULT_X_DENSITY, + y_density=DEFAULT_Y_DENSITY, + x_shift=0, + y_shift=0, + y_tolerance=DEFAULT_Y_TOLERANCE, + use_text_flow: bool = False, + presorted: bool = False, + expand_ligatures: bool = True, + ) -> TextMap: + """ + Given a list of (word, chars) tuples (i.e., a WordMap), return a list of + (char-text, char) tuples (i.e., a TextMap) that can be used to mimic the + structural layout of the text on the page(s), using the following approach: + + - Sort the words by (doctop, x0) if not already sorted. + + - Calculate the initial doctop for the starting page. + + - Cluster the words by doctop (taking `y_tolerance` into account), and + iterate through them. + + - For each cluster, calculate the distance between that doctop and the + initial doctop, in points, minus `y_shift`. Divide that distance by + `y_density` to calculate the minimum number of newlines that should come + before this cluster. Append that number of newlines *minus* the number of + newlines already appended, with a minimum of one. + + - Then for each cluster, iterate through each word in it. Divide each + word's x0, minus `x_shift`, by `x_density` to calculate the minimum + number of characters that should come before this cluster. Append that + number of spaces *minus* the number of characters and spaces already + appended, with a minimum of one. Then append the word's text. + + - At the termination of each line, add more spaces if necessary to + mimic `layout_width`. + + - Finally, add newlines to the end if necessary to mimic to + `layout_height`. + + Note: This approach currently works best for horizontal, left-to-right + text, but will display all words regardless of orientation. There is room + for improvement in better supporting right-to-left text, as well as + vertical text. + """ + _textmap = [] + + if not len(self.tuples): + return TextMap(_textmap) + + expansions = LIGATURES if expand_ligatures else {} + + if layout: + if layout_width_chars: + if layout_width: + raise ValueError( + "`layout_width` and `layout_width_chars` cannot both be set." + ) + else: + layout_width_chars = int(round(layout_width / x_density)) + + if layout_height_chars: + if layout_height: + raise ValueError( + "`layout_height` and `layout_height_chars` cannot both be set." + ) + else: + layout_height_chars = int(round(layout_height / y_density)) + + blank_line = [(" ", None)] * layout_width_chars + else: + blank_line = [] + + num_newlines = 0 + + words_sorted_doctop = ( + self.tuples + if presorted or use_text_flow + else sorted(self.tuples, key=lambda x: float(x[0]["doctop"])) + ) + + first_word = words_sorted_doctop[0][0] + doctop_start = first_word["doctop"] - first_word["top"] + + for i, ws in enumerate( + cluster_objects( + words_sorted_doctop, lambda x: float(x[0]["doctop"]), y_tolerance + ) + ): + y_dist = ( + (ws[0][0]["doctop"] - (doctop_start + y_shift)) / y_density + if layout + else 0 + ) + num_newlines_prepend = max( + # At least one newline, unless this iis the first line + int(i > 0), + # ... or as many as needed to get the imputed "distance" from the top + round(y_dist) - num_newlines, + ) + + for i in range(num_newlines_prepend): + if not len(_textmap) or _textmap[-1][0] == "\n": + _textmap += blank_line + _textmap.append(("\n", None)) + + num_newlines += num_newlines_prepend + + line_len = 0 + + line_words_sorted_x0 = ( + ws + if presorted or use_text_flow + else sorted(ws, key=lambda x: float(x[0]["x0"])) + ) + + for word, chars in line_words_sorted_x0: + x_dist = (word["x0"] - x_shift) / x_density if layout else 0 + num_spaces_prepend = max(min(1, line_len), round(x_dist) - line_len) + _textmap += [(" ", None)] * num_spaces_prepend + line_len += num_spaces_prepend + + for c in chars: + letters = expansions.get(c["text"], c["text"]) + for letter in letters: + _textmap.append((letter, c)) + line_len += 1 + + # Append spaces at end of line + if layout: + _textmap += [(" ", None)] * (layout_width_chars - line_len) + + # Append blank lines at end of text + if layout: + num_newlines_append = layout_height_chars - (num_newlines + 1) + for i in range(num_newlines_append): + if i > 0: + _textmap += blank_line + _textmap.append(("\n", None)) + + # Remove terminal newline + if _textmap[-1] == ("\n", None): + _textmap = _textmap[:-1] + + return TextMap(_textmap) + + +class WordExtractor: + def __init__( + self, + x_tolerance=DEFAULT_X_TOLERANCE, + y_tolerance=DEFAULT_Y_TOLERANCE, + keep_blank_chars: bool = False, + use_text_flow=False, + horizontal_ltr=True, # Should words be read left-to-right? + vertical_ttb=False, # Should vertical words be read top-to-bottom? + extra_attrs=None, + split_at_punctuation=False, + expand_ligatures=True, + ): + self.x_tolerance = x_tolerance + self.y_tolerance = y_tolerance + self.keep_blank_chars = keep_blank_chars + self.use_text_flow = use_text_flow + self.horizontal_ltr = horizontal_ltr + self.vertical_ttb = vertical_ttb + self.extra_attrs = [] if extra_attrs is None else extra_attrs + + # Note: string.punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' + self.split_at_punctuation = ( + string.punctuation + if split_at_punctuation is True + else (split_at_punctuation or "") + ) + + self.expansions = LIGATURES if expand_ligatures else {} + + def merge_chars(self, ordered_chars: list): + x0, top, x1, bottom = objects_to_bbox(ordered_chars) + doctop_adj = ordered_chars[0]["doctop"] - ordered_chars[0]["top"] + upright = ordered_chars[0]["upright"] + direction = 1 if (self.horizontal_ltr if upright else self.vertical_ttb) else -1 + + matrix = ordered_chars[0]["matrix"] + + rotation = 0 + if not upright and matrix[1] < 0: + ordered_chars = reversed(ordered_chars) + rotation = 270 + + if matrix[0] < 0 and matrix[3] < 0: + rotation = 180 + elif matrix[1] > 0: + rotation = 90 + + word = { + "text": "".join( + self.expansions.get(c["text"], c["text"]) for c in ordered_chars + ), + "x0": x0, + "x1": x1, + "top": top, + "doctop": top + doctop_adj, + "bottom": bottom, + "upright": upright, + "direction": direction, + "rotation": rotation, + } + + for key in self.extra_attrs: + word[key] = ordered_chars[0][key] + + return word + + def char_begins_new_word( + self, + prev_char, + curr_char, + ) -> bool: + """This method takes several factors into account to determine if + `curr_char` represents the beginning of a new word: + + - Whether the text is "upright" (i.e., non-rotated) + - Whether the user has specified that horizontal text runs + left-to-right (default) or right-to-left, as represented by + self.horizontal_ltr + - Whether the user has specified that vertical text the text runs + top-to-bottom (default) or bottom-to-top, as represented by + self.vertical_ttb + - The x0, top, x1, and bottom attributes of prev_char and + curr_char + - The self.x_tolerance and self.y_tolerance settings. Note: In + this case, x/y refer to those directions for non-rotated text. + For vertical text, they are flipped. A more accurate terminology + might be "*intra*line character distance tolerance" and + "*inter*line character distance tolerance" + + An important note: The *intra*line distance is measured from the + *end* of the previous character to the *beginning* of the current + character, while the *inter*line distance is measured from the + *top* of the previous character to the *top* of the next + character. The reasons for this are partly repository-historical, + and partly logical, as successive text lines' bounding boxes often + overlap slightly (and we don't want that overlap to be interpreted + as the two lines being the same line). + + The upright-ness of the character determines the attributes to + compare, while horizontal_ltr/vertical_ttb determine the direction + of the comparison. + """ + + # Note: Due to the grouping step earlier in the process, + # curr_char["upright"] will always equal prev_char["upright"]. + if curr_char["upright"]: + x = self.x_tolerance + y = self.y_tolerance + ay = prev_char["top"] + cy = curr_char["top"] + if self.horizontal_ltr: + ax = prev_char["x0"] + bx = prev_char["x1"] + cx = curr_char["x0"] + else: + ax = -prev_char["x1"] + bx = -prev_char["x0"] + cx = -curr_char["x1"] + + else: + x = self.y_tolerance + y = self.x_tolerance + ay = prev_char["x0"] + cy = curr_char["x0"] + if self.vertical_ttb: + ax = prev_char["top"] + bx = prev_char["bottom"] + cx = curr_char["top"] + else: + ax = -prev_char["bottom"] + bx = -prev_char["top"] + cx = -curr_char["bottom"] + + return bool( + # Intraline test + (cx < ax) + or (cx > bx + x) + # Interline test + or (cy > ay + y) + ) + + def iter_chars_to_words(self, ordered_chars): + current_word: list = [] + + def start_next_word(new_char=None): + nonlocal current_word + + if current_word: + yield current_word + + current_word = [] if new_char is None else [new_char] + + for char in ordered_chars: + text = char["text"] + + if not self.keep_blank_chars and text.isspace(): + yield from start_next_word(None) + + elif text in self.split_at_punctuation: + yield from start_next_word(char) + yield from start_next_word(None) + + elif current_word and self.char_begins_new_word(current_word[-1], char): + yield from start_next_word(char) + + else: + current_word.append(char) + + # Finally, after all chars processed + if current_word: + yield current_word + + def iter_sort_chars(self, chars): + def upright_key(x) -> int: + return -int(x["upright"]) + + for upright_cluster in cluster_objects(list(chars), upright_key, 0): + upright = upright_cluster[0]["upright"] + cluster_key = "doctop" if upright else "x0" + + # Cluster by line + subclusters = cluster_objects( + upright_cluster, itemgetter(cluster_key), self.y_tolerance + ) + + for sc in subclusters: + # Sort within line + sort_key = "x0" if upright else "doctop" + to_yield = sorted(sc, key=itemgetter(sort_key)) + + # Reverse order if necessary + if not (self.horizontal_ltr if upright else self.vertical_ttb): + yield from reversed(to_yield) + else: + yield from to_yield + + def iter_extract_tuples(self, chars): + ordered_chars = chars if self.use_text_flow else self.iter_sort_chars(chars) + + grouping_key = itemgetter("upright", *self.extra_attrs) + grouped_chars = itertools.groupby(ordered_chars, grouping_key) + + for keyvals, char_group in grouped_chars: + for word_chars in self.iter_chars_to_words(char_group): + yield (self.merge_chars(word_chars), word_chars) + + def extract_wordmap(self, chars) -> WordMap: + return WordMap(list(self.iter_extract_tuples(chars))) + + def extract_words(self, chars: list) -> list: + words = list(word for word, word_chars in self.iter_extract_tuples(chars)) + return words + + +def extract_words(chars: list, **kwargs) -> list: + return WordExtractor(**kwargs).extract_words(chars) + + +TEXTMAP_KWARGS = inspect.signature(WordMap.to_textmap).parameters.keys() +WORD_EXTRACTOR_KWARGS = inspect.signature(WordExtractor).parameters.keys() + + +def chars_to_textmap(chars: list, **kwargs) -> TextMap: + kwargs.update({"presorted": True}) + + extractor = WordExtractor( + **{k: kwargs[k] for k in WORD_EXTRACTOR_KWARGS if k in kwargs} + ) + wordmap = extractor.extract_wordmap(chars) + textmap = wordmap.to_textmap( + **{k: kwargs[k] for k in TEXTMAP_KWARGS if k in kwargs} + ) + + return textmap + + +def extract_text(chars: list, **kwargs) -> str: + chars = to_list(chars) + if len(chars) == 0: + return "" + + if kwargs.get("layout"): + return chars_to_textmap(chars, **kwargs).as_string + else: + y_tolerance = kwargs.get("y_tolerance", DEFAULT_Y_TOLERANCE) + extractor = WordExtractor( + **{k: kwargs[k] for k in WORD_EXTRACTOR_KWARGS if k in kwargs} + ) + words = extractor.extract_words(chars) + if words: + rotation = words[0]["rotation"] # rotation cannot change within a cell + else: + rotation = 0 + + if rotation == 90: + words.sort(key=lambda w: (w["x1"], -w["top"])) + lines = " ".join([w["text"] for w in words]) + elif rotation == 270: + words.sort(key=lambda w: (-w["x1"], w["top"])) + lines = " ".join([w["text"] for w in words]) + else: + lines = cluster_objects(words, itemgetter("doctop"), y_tolerance) + lines = "\n".join(" ".join(word["text"] for word in line) for line in lines) + if rotation == 180: # needs extra treatment + lines = "".join([(c if c != "\n" else " ") for c in reversed(lines)]) + + return lines + + +def collate_line( + line_chars: list, + tolerance=DEFAULT_X_TOLERANCE, +) -> str: + coll = "" + last_x1 = None + for char in sorted(line_chars, key=itemgetter("x0")): + if (last_x1 is not None) and (char["x0"] > (last_x1 + tolerance)): + coll += " " + last_x1 = char["x1"] + coll += char["text"] + return coll + + +def dedupe_chars(chars: list, tolerance=1) -> list: + """ + Removes duplicate chars — those sharing the same text, fontname, size, + and positioning (within `tolerance`) as other characters in the set. + """ + key = itemgetter("fontname", "size", "upright", "text") + pos_key = itemgetter("doctop", "x0") + + def yield_unique_chars(chars: list): + sorted_chars = sorted(chars, key=key) + for grp, grp_chars in itertools.groupby(sorted_chars, key=key): + for y_cluster in cluster_objects( + list(grp_chars), itemgetter("doctop"), tolerance + ): + for x_cluster in cluster_objects( + y_cluster, itemgetter("x0"), tolerance + ): + yield sorted(x_cluster, key=pos_key)[0] + + deduped = yield_unique_chars(chars) + return sorted(deduped, key=chars.index) + + +def line_to_edge(line): + edge = dict(line) + edge["orientation"] = "h" if (line["top"] == line["bottom"]) else "v" + return edge + + +def rect_to_edges(rect) -> list: + top, bottom, left, right = [dict(rect) for x in range(4)] + top.update( + { + "object_type": "rect_edge", + "height": 0, + "y0": rect["y1"], + "bottom": rect["top"], + "orientation": "h", + } + ) + bottom.update( + { + "object_type": "rect_edge", + "height": 0, + "y1": rect["y0"], + "top": rect["top"] + rect["height"], + "doctop": rect["doctop"] + rect["height"], + "orientation": "h", + } + ) + left.update( + { + "object_type": "rect_edge", + "width": 0, + "x1": rect["x0"], + "orientation": "v", + } + ) + right.update( + { + "object_type": "rect_edge", + "width": 0, + "x0": rect["x1"], + "orientation": "v", + } + ) + return [top, bottom, left, right] + + +def curve_to_edges(curve) -> list: + point_pairs = zip(curve["pts"], curve["pts"][1:]) + return [ + { + "object_type": "curve_edge", + "x0": min(p0[0], p1[0]), + "x1": max(p0[0], p1[0]), + "top": min(p0[1], p1[1]), + "doctop": min(p0[1], p1[1]) + (curve["doctop"] - curve["top"]), + "bottom": max(p0[1], p1[1]), + "width": abs(p0[0] - p1[0]), + "height": abs(p0[1] - p1[1]), + "orientation": "v" if p0[0] == p1[0] else ("h" if p0[1] == p1[1] else None), + } + for p0, p1 in point_pairs + ] + + +def obj_to_edges(obj) -> list: + t = obj["object_type"] + if "_edge" in t: + return [obj] + elif t == "line": + return [line_to_edge(obj)] + else: + return {"rect": rect_to_edges, "curve": curve_to_edges}[t](obj) + + +def filter_edges( + edges, + orientation=None, + edge_type=None, + min_length=1, +) -> list: + if orientation not in ("v", "h", None): + raise ValueError("Orientation must be 'v' or 'h'") + + def test(e) -> bool: + dim = "height" if e["orientation"] == "v" else "width" + et_correct = e["object_type"] == edge_type if edge_type is not None else True + orient_correct = orientation is None or e["orientation"] == orientation + return bool(et_correct and orient_correct and (e[dim] >= min_length)) + + return list(filter(test, edges)) + + +def cluster_list(xs, tolerance=0) -> list: + if tolerance == 0: + return [[x] for x in sorted(xs)] + if len(xs) < 2: + return [[x] for x in sorted(xs)] + groups = [] + xs = list(sorted(xs)) + current_group = [xs[0]] + last = xs[0] + for x in xs[1:]: + if x <= (last + tolerance): + current_group.append(x) + else: + groups.append(current_group) + current_group = [x] + last = x + groups.append(current_group) + return groups + + +def make_cluster_dict(values, tolerance) -> dict: + clusters = cluster_list(list(set(values)), tolerance) + + nested_tuples = [ + [(val, i) for val in value_cluster] for i, value_cluster in enumerate(clusters) + ] + + return dict(itertools.chain(*nested_tuples)) + + +def cluster_objects(xs, key_fn, tolerance) -> list: + if not callable(key_fn): + key_fn = itemgetter(key_fn) + + values = map(key_fn, xs) + cluster_dict = make_cluster_dict(values, tolerance) + + get_0, get_1 = itemgetter(0), itemgetter(1) + + cluster_tuples = sorted(((x, cluster_dict.get(key_fn(x))) for x in xs), key=get_1) + + grouped = itertools.groupby(cluster_tuples, key=get_1) + + return [list(map(get_0, v)) for k, v in grouped] + + +def move_object(obj, axis: str, value): + assert axis in ("h", "v") + if axis == "h": + new_items = [ + ("x0", obj["x0"] + value), + ("x1", obj["x1"] + value), + ] + if axis == "v": + new_items = [ + ("top", obj["top"] + value), + ("bottom", obj["bottom"] + value), + ] + if "doctop" in obj: + new_items += [("doctop", obj["doctop"] + value)] + if "y0" in obj: + new_items += [ + ("y0", obj["y0"] - value), + ("y1", obj["y1"] - value), + ] + return obj.__class__(tuple(obj.items()) + tuple(new_items)) + + +def snap_objects(objs, attr: str, tolerance) -> list: + axis = {"x0": "h", "x1": "h", "top": "v", "bottom": "v"}[attr] + list_objs = list(objs) + clusters = cluster_objects(list_objs, itemgetter(attr), tolerance) + avgs = [sum(map(itemgetter(attr), cluster)) / len(cluster) for cluster in clusters] + snapped_clusters = [ + [move_object(obj, axis, avg - obj[attr]) for obj in cluster] + for cluster, avg in zip(clusters, avgs) + ] + return list(itertools.chain(*snapped_clusters)) + + +def snap_edges( + edges, + x_tolerance=DEFAULT_SNAP_TOLERANCE, + y_tolerance=DEFAULT_SNAP_TOLERANCE, +): + """ + Given a list of edges, snap any within `tolerance` pixels of one another + to their positional average. + """ + by_orientation = {"v": [], "h": []} + for e in edges: + by_orientation[e["orientation"]].append(e) + + snapped_v = snap_objects(by_orientation["v"], "x0", x_tolerance) + snapped_h = snap_objects(by_orientation["h"], "top", y_tolerance) + return snapped_v + snapped_h + + +def resize_object(obj, key: str, value): + assert key in ("x0", "x1", "top", "bottom") + old_value = obj[key] + diff = value - old_value + new_items = [ + (key, value), + ] + if key == "x0": + assert value <= obj["x1"] + new_items.append(("width", obj["x1"] - value)) + elif key == "x1": + assert value >= obj["x0"] + new_items.append(("width", value - obj["x0"])) + elif key == "top": + assert value <= obj["bottom"] + new_items.append(("doctop", obj["doctop"] + diff)) + new_items.append(("height", obj["height"] - diff)) + if "y1" in obj: + new_items.append(("y1", obj["y1"] - diff)) + elif key == "bottom": + assert value >= obj["top"] + new_items.append(("height", obj["height"] + diff)) + if "y0" in obj: + new_items.append(("y0", obj["y0"] - diff)) + return obj.__class__(tuple(obj.items()) + tuple(new_items)) + + +def join_edge_group(edges, orientation: str, tolerance=DEFAULT_JOIN_TOLERANCE): + """ + Given a list of edges along the same infinite line, join those that + are within `tolerance` pixels of one another. + """ + if orientation == "h": + min_prop, max_prop = "x0", "x1" + elif orientation == "v": + min_prop, max_prop = "top", "bottom" + else: + raise ValueError("Orientation must be 'v' or 'h'") + + sorted_edges = list(sorted(edges, key=itemgetter(min_prop))) + joined = [sorted_edges[0]] + for e in sorted_edges[1:]: + last = joined[-1] + if e[min_prop] <= (last[max_prop] + tolerance): + if e[max_prop] > last[max_prop]: + # Extend current edge to new extremity + joined[-1] = resize_object(last, max_prop, e[max_prop]) + else: + # Edge is separate from previous edges + joined.append(e) + + return joined + + +def merge_edges( + edges, + snap_x_tolerance, + snap_y_tolerance, + join_x_tolerance, + join_y_tolerance, +): + """ + Using the `snap_edges` and `join_edge_group` methods above, + merge a list of edges into a more "seamless" list. + """ + + def get_group(edge): + if edge["orientation"] == "h": + return ("h", edge["top"]) + else: + return ("v", edge["x0"]) + + if snap_x_tolerance > 0 or snap_y_tolerance > 0: + edges = snap_edges(edges, snap_x_tolerance, snap_y_tolerance) + + _sorted = sorted(edges, key=get_group) + edge_groups = itertools.groupby(_sorted, key=get_group) + edge_gen = ( + join_edge_group( + items, k[0], (join_x_tolerance if k[0] == "h" else join_y_tolerance) + ) + for k, items in edge_groups + ) + edges = list(itertools.chain(*edge_gen)) + return edges + + +def bbox_to_rect(bbox) -> dict: + """ + Return the rectangle (i.e a dict with keys "x0", "top", "x1", + "bottom") for an object. + """ + return {"x0": bbox[0], "top": bbox[1], "x1": bbox[2], "bottom": bbox[3]} + + +def objects_to_rect(objects) -> dict: + """ + Given an iterable of objects, return the smallest rectangle (i.e. a + dict with "x0", "top", "x1", and "bottom" keys) that contains them + all. + """ + return bbox_to_rect(objects_to_bbox(objects)) + + +def merge_bboxes(bboxes): + """ + Given an iterable of bounding boxes, return the smallest bounding box + that contains them all. + """ + x0, top, x1, bottom = zip(*bboxes) + return (min(x0), min(top), max(x1), max(bottom)) + + +def objects_to_bbox(objects): + """ + Given an iterable of objects, return the smallest bounding box that + contains them all. + """ + return merge_bboxes(map(bbox_getter, objects)) + + +def words_to_edges_h(words, word_threshold: int = DEFAULT_MIN_WORDS_HORIZONTAL): + """ + Find (imaginary) horizontal lines that connect the tops + of at least `word_threshold` words. + """ + by_top = cluster_objects(words, itemgetter("top"), 1) + large_clusters = filter(lambda x: len(x) >= word_threshold, by_top) + rects = list(map(objects_to_rect, large_clusters)) + if len(rects) == 0: + return [] + min_x0 = min(map(itemgetter("x0"), rects)) + max_x1 = max(map(itemgetter("x1"), rects)) + + edges = [] + for r in rects: + edges += [ + # Top of text + { + "x0": min_x0, + "x1": max_x1, + "top": r["top"], + "bottom": r["top"], + "width": max_x1 - min_x0, + "orientation": "h", + }, + # For each detected row, we also add the 'bottom' line. This will + # generate extra edges, (some will be redundant with the next row + # 'top' line), but this catches the last row of every table. + { + "x0": min_x0, + "x1": max_x1, + "top": r["bottom"], + "bottom": r["bottom"], + "width": max_x1 - min_x0, + "orientation": "h", + }, + ] + + return edges + + +def get_bbox_overlap(a, b): + a_left, a_top, a_right, a_bottom = a + b_left, b_top, b_right, b_bottom = b + o_left = max(a_left, b_left) + o_right = min(a_right, b_right) + o_bottom = min(a_bottom, b_bottom) + o_top = max(a_top, b_top) + o_width = o_right - o_left + o_height = o_bottom - o_top + if o_height >= 0 and o_width >= 0 and o_height + o_width > 0: + return (o_left, o_top, o_right, o_bottom) + else: + return None + + +def words_to_edges_v(words, word_threshold: int = DEFAULT_MIN_WORDS_VERTICAL): + """ + Find (imaginary) vertical lines that connect the left, right, or + center of at least `word_threshold` words. + """ + # Find words that share the same left, right, or centerpoints + by_x0 = cluster_objects(words, itemgetter("x0"), 1) + by_x1 = cluster_objects(words, itemgetter("x1"), 1) + + def get_center(word): + return float(word["x0"] + word["x1"]) / 2 + + by_center = cluster_objects(words, get_center, 1) + clusters = by_x0 + by_x1 + by_center + + # Find the points that align with the most words + sorted_clusters = sorted(clusters, key=lambda x: -len(x)) + large_clusters = filter(lambda x: len(x) >= word_threshold, sorted_clusters) + + # For each of those points, find the bboxes fitting all matching words + bboxes = list(map(objects_to_bbox, large_clusters)) + + # Iterate through those bboxes, condensing overlapping bboxes + condensed_bboxes = [] + for bbox in bboxes: + overlap = any(get_bbox_overlap(bbox, c) for c in condensed_bboxes) + if not overlap: + condensed_bboxes.append(bbox) + + if not condensed_bboxes: + return [] + + condensed_rects = map(bbox_to_rect, condensed_bboxes) + sorted_rects = list(sorted(condensed_rects, key=itemgetter("x0"))) + + max_x1 = max(map(itemgetter("x1"), sorted_rects)) + min_top = min(map(itemgetter("top"), sorted_rects)) + max_bottom = max(map(itemgetter("bottom"), sorted_rects)) + + return [ + { + "x0": b["x0"], + "x1": b["x0"], + "top": min_top, + "bottom": max_bottom, + "height": max_bottom - min_top, + "orientation": "v", + } + for b in sorted_rects + ] + [ + { + "x0": max_x1, + "x1": max_x1, + "top": min_top, + "bottom": max_bottom, + "height": max_bottom - min_top, + "orientation": "v", + } + ] + + +def edges_to_intersections(edges, x_tolerance=1, y_tolerance=1) -> dict: + """ + Given a list of edges, return the points at which they intersect + within `tolerance` pixels. + """ + intersections = {} + v_edges, h_edges = [ + list(filter(lambda x: x["orientation"] == o, edges)) for o in ("v", "h") + ] + for v in sorted(v_edges, key=itemgetter("x0", "top")): + for h in sorted(h_edges, key=itemgetter("top", "x0")): + if ( + (v["top"] <= (h["top"] + y_tolerance)) + and (v["bottom"] >= (h["top"] - y_tolerance)) + and (v["x0"] >= (h["x0"] - x_tolerance)) + and (v["x0"] <= (h["x1"] + x_tolerance)) + ): + vertex = (v["x0"], h["top"]) + if vertex not in intersections: + intersections[vertex] = {"v": [], "h": []} + intersections[vertex]["v"].append(v) + intersections[vertex]["h"].append(h) + return intersections + + +def obj_to_bbox(obj): + """ + Return the bounding box for an object. + """ + return bbox_getter(obj) + + +def intersections_to_cells(intersections): + """ + Given a list of points (`intersections`), return all rectangular "cells" + that those points describe. + + `intersections` should be a dictionary with (x0, top) tuples as keys, + and a list of edge objects as values. The edge objects should correspond + to the edges that touch the intersection. + """ + + def edge_connects(p1, p2) -> bool: + def edges_to_set(edges): + return set(map(obj_to_bbox, edges)) + + if p1[0] == p2[0]: + common = edges_to_set(intersections[p1]["v"]).intersection( + edges_to_set(intersections[p2]["v"]) + ) + if len(common): + return True + + if p1[1] == p2[1]: + common = edges_to_set(intersections[p1]["h"]).intersection( + edges_to_set(intersections[p2]["h"]) + ) + if len(common): + return True + return False + + points = list(sorted(intersections.keys())) + n_points = len(points) + + def find_smallest_cell(points, i: int): + if i == n_points - 1: + return None + pt = points[i] + rest = points[i + 1 :] + # Get all the points directly below and directly right + below = [x for x in rest if x[0] == pt[0]] + right = [x for x in rest if x[1] == pt[1]] + for below_pt in below: + if not edge_connects(pt, below_pt): + continue + + for right_pt in right: + if not edge_connects(pt, right_pt): + continue + + bottom_right = (right_pt[0], below_pt[1]) + + if ( + (bottom_right in intersections) + and edge_connects(bottom_right, right_pt) + and edge_connects(bottom_right, below_pt) + ): + return (pt[0], pt[1], bottom_right[0], bottom_right[1]) + return None + + cell_gen = (find_smallest_cell(points, i) for i in range(len(points))) + return list(filter(None, cell_gen)) + + +def cells_to_tables(page, cells) -> list: + """ + Given a list of bounding boxes (`cells`), return a list of tables that + hold those cells most simply (and contiguously). + """ + + def bbox_to_corners(bbox) -> tuple: + x0, top, x1, bottom = bbox + return ((x0, top), (x0, bottom), (x1, top), (x1, bottom)) + + remaining_cells = list(cells) + + # Iterate through the cells found above, and assign them + # to contiguous tables + + current_corners = set() + current_cells = [] + + tables = [] + while len(remaining_cells): + initial_cell_count = len(current_cells) + for cell in list(remaining_cells): + cell_corners = bbox_to_corners(cell) + # If we're just starting a table ... + if len(current_cells) == 0: + # ... immediately assign it to the empty group + current_corners |= set(cell_corners) + current_cells.append(cell) + remaining_cells.remove(cell) + else: + # How many corners does this table share with the current group? + corner_count = sum(c in current_corners for c in cell_corners) + + # If touching on at least one corner... + if corner_count > 0: + # ... assign it to the current group + current_corners |= set(cell_corners) + current_cells.append(cell) + remaining_cells.remove(cell) + + # If this iteration did not find any more cells to append... + if len(current_cells) == initial_cell_count: + # ... start a new cell group + tables.append(list(current_cells)) + current_corners.clear() + current_cells.clear() + + # Once we have exhausting the list of cells ... + + # ... and we have a cell group that has not been stored + if len(current_cells): + # ... store it. + tables.append(list(current_cells)) + + # PyMuPDF modification: + # Remove tables without text or having only 1 column + for i in range(len(tables) - 1, -1, -1): + r = pymupdf.EMPTY_RECT() + x1_vals = set() + x0_vals = set() + for c in tables[i]: + r |= c + x1_vals.add(c[2]) + x0_vals.add(c[0]) + if ( + len(x1_vals) < 2 + or len(x0_vals) < 2 + or white_spaces.issuperset( + page.get_textbox( + r, + textpage=TEXTPAGE, + ) + ) + ): + del tables[i] + + # Sort the tables top-to-bottom-left-to-right based on the value of the + # topmost-and-then-leftmost coordinate of a table. + _sorted = sorted(tables, key=lambda t: min((c[1], c[0]) for c in t)) + return _sorted + + +class CellGroup: + def __init__(self, cells): + self.cells = cells + self.bbox = ( + min(map(itemgetter(0), filter(None, cells))), + min(map(itemgetter(1), filter(None, cells))), + max(map(itemgetter(2), filter(None, cells))), + max(map(itemgetter(3), filter(None, cells))), + ) + + +class TableRow(CellGroup): + pass + + +class TableHeader: + """PyMuPDF extension containing the identified table header.""" + + def __init__(self, bbox, cells, names, above): + self.bbox = bbox + self.cells = cells + self.names = names + self.external = above + + +class Table: + def __init__(self, page, cells): + self.page = page + self.cells = cells + self.header = self._get_header() # PyMuPDF extension + + @property + def bbox(self): + c = self.cells + return ( + min(map(itemgetter(0), c)), + min(map(itemgetter(1), c)), + max(map(itemgetter(2), c)), + max(map(itemgetter(3), c)), + ) + + @property + def rows(self) -> list: + _sorted = sorted(self.cells, key=itemgetter(1, 0)) + xs = list(sorted(set(map(itemgetter(0), self.cells)))) + rows = [] + for y, row_cells in itertools.groupby(_sorted, itemgetter(1)): + xdict = {cell[0]: cell for cell in row_cells} + row = TableRow([xdict.get(x) for x in xs]) + rows.append(row) + return rows + + @property + def row_count(self) -> int: # PyMuPDF extension + return len(self.rows) + + @property + def col_count(self) -> int: # PyMuPDF extension + return max([len(r.cells) for r in self.rows]) + + def extract(self, **kwargs) -> list: + chars = CHARS + table_arr = [] + + def char_in_bbox(char, bbox) -> bool: + v_mid = (char["top"] + char["bottom"]) / 2 + h_mid = (char["x0"] + char["x1"]) / 2 + x0, top, x1, bottom = bbox + return bool( + (h_mid >= x0) and (h_mid < x1) and (v_mid >= top) and (v_mid < bottom) + ) + + for row in self.rows: + arr = [] + row_chars = [char for char in chars if char_in_bbox(char, row.bbox)] + + for cell in row.cells: + if cell is None: + cell_text = None + else: + cell_chars = [ + char for char in row_chars if char_in_bbox(char, cell) + ] + + if len(cell_chars): + kwargs["x_shift"] = cell[0] + kwargs["y_shift"] = cell[1] + if "layout" in kwargs: + kwargs["layout_width"] = cell[2] - cell[0] + kwargs["layout_height"] = cell[3] - cell[1] + cell_text = extract_text(cell_chars, **kwargs) + else: + cell_text = "" + arr.append(cell_text) + table_arr.append(arr) + + return table_arr + + def to_markdown(self, clean=False, fill_empty=True): + """Output table content as a string in Github-markdown format. + + If "clean" then markdown syntax is removed from cell content. + If "fill_empty" then cell content None is replaced by the values + above (columns) or left (rows) in an effort to approximate row and + columns spans. + + """ + output = "|" + rows = self.row_count + cols = self.col_count + + # cell coordinates + cell_boxes = [[c for c in r.cells] for r in self.rows] + + # cell text strings + cells = [[None for i in range(cols)] for j in range(rows)] + for i, row in enumerate(cell_boxes): + for j, cell in enumerate(row): + if cell is not None: + cells[i][j] = extract_cells( + TEXTPAGE, cell_boxes[i][j], markdown=True + ) + + if fill_empty: # fill "None" cells where possible + + # for rows, copy content from left to right + for j in range(rows): + for i in range(cols - 1): + if cells[j][i + 1] is None: + cells[j][i + 1] = cells[j][i] + + # for columns, copy top to bottom + for i in range(cols): + for j in range(rows - 1): + if cells[j + 1][i] is None: + cells[j + 1][i] = cells[j][i] + + # generate header string and MD separator + for i, name in enumerate(self.header.names): + if not name: # generate a name if empty + name = f"Col{i+1}" + name = name.replace("\n", "
") # use HTML line breaks + if clean: # remove sensitive syntax + name = html.escape(name.replace("-", "-")) + output += name + "|" + + output += "\n" + # insert GitHub header line separator + output += "|" + "|".join("---" for i in range(self.col_count)) + "|\n" + + # skip first row in details if header is part of the table + j = 0 if self.header.external else 1 + + # iterate over detail rows + for row in cells[j:]: + line = "|" + for i, cell in enumerate(row): + # replace None cells with empty string + # use HTML line break tag + if cell is None: + cell = "" + if clean: # remove sensitive syntax + cell = html.escape(cell.replace("-", "-")) + line += cell + "|" + line += "\n" + output += line + return output + "\n" + + def to_pandas(self, **kwargs): + """Return a pandas DataFrame version of the table.""" + try: + import pandas as pd + except ModuleNotFoundError: + pymupdf.message("Package 'pandas' is not installed") + raise + + pd_dict = {} + extract = self.extract() + hdr = self.header + names = self.header.names + hdr_len = len(names) + # ensure uniqueness of column names + for i in range(hdr_len): + name = names[i] + if not name: + names[i] = f"Col{i}" + if hdr_len != len(set(names)): + for i in range(hdr_len): + name = names[i] + if name != f"Col{i}": + names[i] = f"{i}-{name}" + + if not hdr.external: # header is part of 'extract' + extract = extract[1:] + + for i in range(hdr_len): + key = names[i] + value = [] + for j in range(len(extract)): + value.append(extract[j][i]) + pd_dict[key] = value + + return pd.DataFrame(pd_dict) + + def _get_header(self, y_tolerance=3): + """Identify the table header. + + *** PyMuPDF extension. *** + + Starting from the first line above the table upwards, check if it + qualifies to be part of the table header. + + Criteria include: + * A one-line table never has an extra header. + * Column borders must not intersect any word. If this happens, all + text of this line and above of it is ignored. + * No excess inter-line distance: If a line further up has a distance + of more than 1.5 times of its font size, it will be ignored and + all lines above of it. + * Must have same text properties. + * Starting with the top table line, a bold text property cannot change + back to non-bold. + + If not all criteria are met (or there is no text above the table), + the first table row is assumed to be the header. + """ + page = self.page + y_delta = y_tolerance + + def top_row_bg_color(self): + """ + Compare top row background color with color of same-sized bbox + above. If different, return True indicating that the original + table top row is already the header. + """ + bbox0 = pymupdf.Rect(self.rows[0].bbox) + bboxt = bbox0 + (0, -bbox0.height, 0, -bbox0.height) # area above + top_color0 = page.get_pixmap(clip=bbox0).color_topusage()[1] + top_colort = page.get_pixmap(clip=bboxt).color_topusage()[1] + if top_color0 != top_colort: + return True # top row is header + return False + + def row_has_bold(bbox): + """Check if a row contains some bold text. + + If e.g. true for the top row, then it will be used as (internal) + column header row if any of the following is true: + * the previous (above) text line has no bold span + * the second table row text has no bold span + + Returns True if any spans are bold else False. + """ + blocks = page.get_text("dict", flags=pymupdf.TEXTFLAGS_TEXT, clip=bbox)[ + "blocks" + ] + spans = [s for b in blocks for l in b["lines"] for s in l["spans"]] + + return any(s["flags"] & pymupdf.TEXT_FONT_BOLD for s in spans) + + try: + row = self.rows[0] + cells = row.cells + bbox = pymupdf.Rect(row.bbox) + except IndexError: # this table has no rows + return None + + # return this if we determine that the top row is the header + header_top_row = TableHeader(bbox, cells, self.extract()[0], False) + + # 1-line tables have no extra header + if len(self.rows) < 2: + return header_top_row + + # 1-column tables have no extra header + if len(cells) < 2: + return header_top_row + + # assume top row is the header if second row is empty + row2 = self.rows[1] # second row + if all(c is None for c in row2.cells): # no valid cell bboxes in row2 + return header_top_row + + # Special check: is top row bold? + top_row_bold = row_has_bold(bbox) + + # assume top row is header if it is bold and any cell + # of 2nd row is non-bold + if top_row_bold and not row_has_bold(row2.bbox): + return header_top_row + + if top_row_bg_color(self): + # if area above top row has a different background color, + # then top row is already the header + return header_top_row + + # column coordinates (x1 values) in top row + col_x = [c[2] if c is not None else None for c in cells[:-1]] + + # clip = page area above the table + # We will inspect this area for text qualifying as column header. + clip = +bbox # take row 0 bbox + clip.y0 = 0 # start at top of page + clip.y1 = bbox.y0 # end at top of table + + blocks = page.get_text("dict", clip=clip, flags=pymupdf.TEXTFLAGS_TEXT)[ + "blocks" + ] + # non-empty, non-superscript spans above table, sorted descending by y1 + spans = sorted( + [ + s + for b in blocks + for l in b["lines"] + for s in l["spans"] + if not ( + white_spaces.issuperset(s["text"]) + or s["flags"] & pymupdf.TEXT_FONT_SUPERSCRIPT + ) + ], + key=lambda s: s["bbox"][3], + reverse=True, + ) + + select = [] # y1 coordinates above, sorted descending + line_heights = [] # line heights above, sorted descending + line_bolds = [] # bold indicator per line above, same sorting + + # walk through the spans and fill above 3 lists + for i in range(len(spans)): + s = spans[i] + y1 = s["bbox"][3] # span bottom + h = y1 - s["bbox"][1] # span bbox height + bold = s["flags"] & pymupdf.TEXT_FONT_BOLD + + # use first item to start the lists + if i == 0: + select.append(y1) + line_heights.append(h) + line_bolds.append(bold) + continue + + # get previous items from the 3 lists + y0 = select[-1] + h0 = line_heights[-1] + bold0 = line_bolds[-1] + + if bold0 and not bold: + break # stop if switching from bold to non-bold + + # if fitting in height of previous span, modify bbox + if y0 - y1 <= y_delta or abs((y0 - h0) - s["bbox"][1]) <= y_delta: + s["bbox"] = (s["bbox"][0], y0 - h0, s["bbox"][2], y0) + spans[i] = s + if bold: + line_bolds[-1] = bold + continue + elif y0 - y1 > 1.5 * h0: + break # stop if distance to previous line too large + select.append(y1) + line_heights.append(h) + line_bolds.append(bold) + + if select == []: # nothing above the table? + return header_top_row + + select = select[:5] # accept up to 5 lines for an external header + + # assume top row as header if text above is too far away + if bbox.y0 - select[0] >= line_heights[0]: + return header_top_row + + # accept top row as header if bold, but line above is not + if top_row_bold and not line_bolds[0]: + return header_top_row + + if spans == []: # nothing left above the table, return top row + return header_top_row + + # re-compute clip above table + nclip = pymupdf.EMPTY_RECT() + for s in [s for s in spans if s["bbox"][3] >= select[-1]]: + nclip |= s["bbox"] + if not nclip.is_empty: + clip = nclip + + clip.y1 = bbox.y0 # make sure we still include every word above + + # Confirm that no word in clip is intersecting a column separator + word_rects = [pymupdf.Rect(w[:4]) for w in page.get_text("words", clip=clip)] + word_tops = sorted(list(set([r[1] for r in word_rects])), reverse=True) + + select = [] + + # exclude lines with words that intersect a column border + for top in word_tops: + intersecting = [ + (x, r) + for x in col_x + if x is not None + for r in word_rects + if r[1] == top and r[0] < x and r[2] > x + ] + if intersecting == []: + select.append(top) + else: # detected a word crossing a column border + break + + if select == []: # nothing left over: return first row + return header_top_row + + hdr_bbox = +clip # compute the header cells + hdr_bbox.y0 = select[-1] # hdr_bbox top is smallest top coord of words + hdr_cells = [ + (c[0], hdr_bbox.y0, c[2], hdr_bbox.y1) if c is not None else None + for c in cells + ] + + # adjust left/right of header bbox + hdr_bbox.x0 = self.bbox[0] + hdr_bbox.x1 = self.bbox[2] + + # column names: no line breaks, no excess spaces + hdr_names = [ + ( + page.get_textbox(c).replace("\n", " ").replace(" ", " ").strip() + if c is not None + else "" + ) + for c in hdr_cells + ] + return TableHeader(tuple(hdr_bbox), hdr_cells, hdr_names, True) + + +@dataclass +class TableSettings: + vertical_strategy: str = "lines" + horizontal_strategy: str = "lines" + explicit_vertical_lines: list = None + explicit_horizontal_lines: list = None + snap_tolerance: float = DEFAULT_SNAP_TOLERANCE + snap_x_tolerance: float = UNSET + snap_y_tolerance: float = UNSET + join_tolerance: float = DEFAULT_JOIN_TOLERANCE + join_x_tolerance: float = UNSET + join_y_tolerance: float = UNSET + edge_min_length: float = 3 + min_words_vertical: float = DEFAULT_MIN_WORDS_VERTICAL + min_words_horizontal: float = DEFAULT_MIN_WORDS_HORIZONTAL + intersection_tolerance: float = 3 + intersection_x_tolerance: float = UNSET + intersection_y_tolerance: float = UNSET + text_settings: dict = None + + def __post_init__(self) -> "TableSettings": + """Clean up user-provided table settings. + + Validates that the table settings provided consists of acceptable values and + returns a cleaned up version. The cleaned up version fills out the missing + values with the default values in the provided settings. + + TODO: Can be further used to validate that the values are of the correct + type. For example, raising a value error when a non-boolean input is + provided for the key ``keep_blank_chars``. + + :param table_settings: User-provided table settings. + :returns: A cleaned up version of the user-provided table settings. + :raises ValueError: When an unrecognised key is provided. + """ + + for setting in NON_NEGATIVE_SETTINGS: + if (getattr(self, setting) or 0) < 0: + raise ValueError(f"Table setting '{setting}' cannot be negative") + + for orientation in ["horizontal", "vertical"]: + strategy = getattr(self, orientation + "_strategy") + if strategy not in TABLE_STRATEGIES: + raise ValueError( + f"{orientation}_strategy must be one of" + f'{{{",".join(TABLE_STRATEGIES)}}}' + ) + + if self.text_settings is None: + self.text_settings = {} + + # This next section is for backwards compatibility + for attr in ["x_tolerance", "y_tolerance"]: + if attr not in self.text_settings: + self.text_settings[attr] = self.text_settings.get("tolerance", 3) + + if "tolerance" in self.text_settings: + del self.text_settings["tolerance"] + # End of that section + + for attr, fallback in [ + ("snap_x_tolerance", "snap_tolerance"), + ("snap_y_tolerance", "snap_tolerance"), + ("join_x_tolerance", "join_tolerance"), + ("join_y_tolerance", "join_tolerance"), + ("intersection_x_tolerance", "intersection_tolerance"), + ("intersection_y_tolerance", "intersection_tolerance"), + ]: + if getattr(self, attr) is UNSET: + setattr(self, attr, getattr(self, fallback)) + + return self + + @classmethod + def resolve(cls, settings=None): + if settings is None: + return cls() + elif isinstance(settings, cls): + return settings + elif isinstance(settings, dict): + core_settings = {} + text_settings = {} + for k, v in settings.items(): + if k[:5] == "text_": + text_settings[k[5:]] = v + else: + core_settings[k] = v + core_settings["text_settings"] = text_settings + return cls(**core_settings) + else: + raise ValueError(f"Cannot resolve settings: {settings}") + + +class TableFinder: + """ + Given a PDF page, find plausible table structures. + + Largely borrowed from Anssi Nurminen's master's thesis: + http://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3 + + ... and inspired by Tabula: + https://github.com/tabulapdf/tabula-extractor/issues/16 + """ + + def __init__(self, page, settings=None): + self.page = weakref.proxy(page) + self.settings = TableSettings.resolve(settings) + self.edges = self.get_edges() + self.intersections = edges_to_intersections( + self.edges, + self.settings.intersection_x_tolerance, + self.settings.intersection_y_tolerance, + ) + self.cells = intersections_to_cells(self.intersections) + self.tables = [ + Table(self.page, cell_group) + for cell_group in cells_to_tables(self.page, self.cells) + ] + + def get_edges(self) -> list: + settings = self.settings + + for orientation in ["vertical", "horizontal"]: + strategy = getattr(settings, orientation + "_strategy") + if strategy == "explicit": + lines = getattr(settings, "explicit_" + orientation + "_lines") + if len(lines) < 2: + raise ValueError( + f"If {orientation}_strategy == 'explicit', " + f"explicit_{orientation}_lines " + f"must be specified as a list/tuple of two or more " + f"floats/ints." + ) + + v_strat = settings.vertical_strategy + h_strat = settings.horizontal_strategy + + if v_strat == "text" or h_strat == "text": + words = extract_words(CHARS, **(settings.text_settings or {})) + else: + words = [] + + v_explicit = [] + for desc in settings.explicit_vertical_lines or []: + if isinstance(desc, dict): + for e in obj_to_edges(desc): + if e["orientation"] == "v": + v_explicit.append(e) + else: + v_explicit.append( + { + "x0": desc, + "x1": desc, + "top": self.page.rect[1], + "bottom": self.page.rect[3], + "height": self.page.rect[3] - self.page.rect[1], + "orientation": "v", + } + ) + + if v_strat == "lines": + v_base = filter_edges(EDGES, "v") + elif v_strat == "lines_strict": + v_base = filter_edges(EDGES, "v", edge_type="line") + elif v_strat == "text": + v_base = words_to_edges_v(words, word_threshold=settings.min_words_vertical) + elif v_strat == "explicit": + v_base = [] + else: + v_base = [] + + v = v_base + v_explicit + + h_explicit = [] + for desc in settings.explicit_horizontal_lines or []: + if isinstance(desc, dict): + for e in obj_to_edges(desc): + if e["orientation"] == "h": + h_explicit.append(e) + else: + h_explicit.append( + { + "x0": self.page.rect[0], + "x1": self.page.rect[2], + "width": self.page.rect[2] - self.page.rect[0], + "top": desc, + "bottom": desc, + "orientation": "h", + } + ) + + if h_strat == "lines": + h_base = filter_edges(EDGES, "h") + elif h_strat == "lines_strict": + h_base = filter_edges(EDGES, "h", edge_type="line") + elif h_strat == "text": + h_base = words_to_edges_h( + words, word_threshold=settings.min_words_horizontal + ) + elif h_strat == "explicit": + h_base = [] + else: + h_base = [] + + h = h_base + h_explicit + + edges = list(v) + list(h) + + edges = merge_edges( + edges, + snap_x_tolerance=settings.snap_x_tolerance, + snap_y_tolerance=settings.snap_y_tolerance, + join_x_tolerance=settings.join_x_tolerance, + join_y_tolerance=settings.join_y_tolerance, + ) + + return filter_edges(edges, min_length=settings.edge_min_length) + + def __getitem__(self, i): + tcount = len(self.tables) + if i >= tcount: + raise IndexError("table not on page") + while i < 0: + i += tcount + return self.tables[i] + + +""" +Start of PyMuPDF interface code. +The following functions are executed when "page.find_tables()" is called. + +* make_chars: Fills the CHARS list with text character information extracted + via "rawdict" text extraction. Items in CHARS are formatted + as expected by the table code. +* make_edges: Fills the EDGES list with vector graphic information extracted + via "get_drawings". Items in EDGES are formatted as expected + by the table code. + +The lists CHARS and EDGES are used to replace respective document access +of pdfplumber or, respectively pdfminer. +The table code has been modified to use these lists instead of accessing +page information themselves. +""" + + +# ----------------------------------------------------------------------------- +# Extract all page characters to fill the CHARS list +# ----------------------------------------------------------------------------- +def make_chars(page, clip=None): + """Extract text as "rawdict" to fill CHARS.""" + global TEXTPAGE + page_number = page.number + 1 + page_height = page.rect.height + ctm = page.transformation_matrix + TEXTPAGE = page.get_textpage(clip=clip, flags=FLAGS) + blocks = page.get_text("rawdict", textpage=TEXTPAGE)["blocks"] + doctop_base = page_height * page.number + for block in blocks: + for line in block["lines"]: + ldir = line["dir"] # = (cosine, sine) of angle + ldir = (round(ldir[0], 4), round(ldir[1], 4)) + matrix = pymupdf.Matrix(ldir[0], -ldir[1], ldir[1], ldir[0], 0, 0) + if ldir[1] == 0: + upright = True + else: + upright = False + for span in sorted(line["spans"], key=lambda s: s["bbox"][0]): + fontname = span["font"] + fontsize = span["size"] + color = pymupdf.sRGB_to_pdf(span["color"]) + for char in sorted(span["chars"], key=lambda c: c["bbox"][0]): + bbox = pymupdf.Rect(char["bbox"]) + bbox_ctm = bbox * ctm + origin = pymupdf.Point(char["origin"]) * ctm + matrix.e = origin.x + matrix.f = origin.y + text = char["c"] + char_dict = { + "adv": bbox.x1 - bbox.x0 if upright else bbox.y1 - bbox.y0, + "bottom": bbox.y1, + "doctop": bbox.y0 + doctop_base, + "fontname": fontname, + "height": bbox.y1 - bbox.y0, + "matrix": tuple(matrix), + "ncs": "DeviceRGB", + "non_stroking_color": color, + "non_stroking_pattern": None, + "object_type": "char", + "page_number": page_number, + "size": fontsize if upright else bbox.y1 - bbox.y0, + "stroking_color": color, + "stroking_pattern": None, + "text": text, + "top": bbox.y0, + "upright": upright, + "width": bbox.x1 - bbox.x0, + "x0": bbox.x0, + "x1": bbox.x1, + "y0": bbox_ctm.y0, + "y1": bbox_ctm.y1, + } + CHARS.append(char_dict) + + +# ------------------------------------------------------------------------ +# Extract all page vector graphics to fill the EDGES list. +# We are ignoring Bézier curves completely and are converting everything +# else to lines. +# ------------------------------------------------------------------------ +def make_edges(page, clip=None, tset=None, paths=None, add_lines=None, add_boxes=None): + snap_x = tset.snap_x_tolerance + snap_y = tset.snap_y_tolerance + min_length = tset.edge_min_length + lines_strict = ( + tset.vertical_strategy == "lines_strict" + or tset.horizontal_strategy == "lines_strict" + ) + page_height = page.rect.height + doctop_basis = page.number * page_height + page_number = page.number + 1 + prect = page.rect + if page.rotation in (90, 270): + w, h = prect.br + prect = pymupdf.Rect(0, 0, h, w) + if clip is not None: + clip = pymupdf.Rect(clip) + else: + clip = prect + + def are_neighbors(r1, r2): + """Detect whether r1, r2 are neighbors. + + Defined as: + The minimum distance between points of r1 and points of r2 is not + larger than some delta. + + This check supports empty rect-likes and thus also lines. + + Note: + This type of check is MUCH faster than native Rect containment checks. + """ + if ( # check if x-coordinates of r1 are within those of r2 + r2.x0 - snap_x <= r1.x0 <= r2.x1 + snap_x + or r2.x0 - snap_x <= r1.x1 <= r2.x1 + snap_x + ) and ( # ... same for y-coordinates + r2.y0 - snap_y <= r1.y0 <= r2.y1 + snap_y + or r2.y0 - snap_y <= r1.y1 <= r2.y1 + snap_y + ): + return True + + # same check with r1 / r2 exchanging their roles (this is necessary!) + if ( + r1.x0 - snap_x <= r2.x0 <= r1.x1 + snap_x + or r1.x0 - snap_x <= r2.x1 <= r1.x1 + snap_x + ) and ( + r1.y0 - snap_y <= r2.y0 <= r1.y1 + snap_y + or r1.y0 - snap_y <= r2.y1 <= r1.y1 + snap_y + ): + return True + return False + + def clean_graphics(npaths=None): + """Detect and join rectangles of "connected" vector graphics.""" + if npaths is None: + allpaths = page.get_drawings() + else: # accept passed-in vector graphics + allpaths = npaths[:] # paths relevant for table detection + paths = [] + for p in allpaths: + # If only looking at lines, we ignore fill-only paths, + # except simulated lines (i.e. small width or height). + if ( + lines_strict + and p["type"] == "f" + and p["rect"].width > snap_x + and p["rect"].height > snap_y + ): + continue + paths.append(p) + + # start with all vector graphics rectangles + prects = sorted(set([p["rect"] for p in paths]), key=lambda r: (r.y1, r.x0)) + new_rects = [] # the final list of joined rectangles + # ---------------------------------------------------------------- + # Strategy: Join rectangles that "almost touch" each other. + # Extend first rectangle with any other that is a "neighbor". + # Then move it to the final list and continue with the rest. + # ---------------------------------------------------------------- + while prects: # the algorithm will empty this list + prect0 = prects[0] # copy of first rectangle (performance reasons!) + repeat = True + while repeat: # this loop extends first rect in list + repeat = False # set to true again if some other rect touches + for i in range(len(prects) - 1, 0, -1): # run backwards + if are_neighbors(prect0, prects[i]): # close enough to rect 0? + prect0 |= prects[i].tl # extend rect 0 + prect0 |= prects[i].br # extend rect 0 + del prects[i] # delete this rect + repeat = True # keep checking the rest + + # move rect 0 over to result list if there is some text in it + if not white_spaces.issuperset(page.get_textbox(prect0, textpage=TEXTPAGE)): + # contains text, so accept it as a table bbox candidate + new_rects.append(prect0) + del prects[0] # remove from rect list + + return new_rects, paths + + bboxes, paths = clean_graphics(npaths=paths) + + def is_parallel(p1, p2): + """Check if line is roughly axis-parallel.""" + if abs(p1.x - p2.x) <= snap_x or abs(p1.y - p2.y) <= snap_y: + return True + return False + + def make_line(p, p1, p2, clip): + """Given 2 points, make a line dictionary for table detection.""" + if not is_parallel(p1, p2): # only accepting axis-parallel lines + return {} + # compute the extremal values + x0 = min(p1.x, p2.x) + x1 = max(p1.x, p2.x) + y0 = min(p1.y, p2.y) + y1 = max(p1.y, p2.y) + + # check for outside clip + if x0 > clip.x1 or x1 < clip.x0 or y0 > clip.y1 or y1 < clip.y0: + return {} + + if x0 < clip.x0: + x0 = clip.x0 # adjust to clip boundary + + if x1 > clip.x1: + x1 = clip.x1 # adjust to clip boundary + + if y0 < clip.y0: + y0 = clip.y0 # adjust to clip boundary + + if y1 > clip.y1: + y1 = clip.y1 # adjust to clip boundary + + width = x1 - x0 # from adjusted values + height = y1 - y0 # from adjusted values + if width == height == 0: + return {} # nothing left to deal with + line_dict = { + "x0": x0, + "y0": page_height - y0, + "x1": x1, + "y1": page_height - y1, + "width": width, + "height": height, + "pts": [(x0, y0), (x1, y1)], + "linewidth": p["width"], + "stroke": True, + "fill": False, + "evenodd": False, + "stroking_color": p["color"] if p["color"] else p["fill"], + "non_stroking_color": None, + "object_type": "line", + "page_number": page_number, + "stroking_pattern": None, + "non_stroking_pattern": None, + "top": y0, + "bottom": y1, + "doctop": y0 + doctop_basis, + } + return line_dict + + for p in paths: + items = p["items"] # items in this path + + # if 'closePath', add a line from last to first point + if p["closePath"] and items[0][0] == "l" and items[-1][0] == "l": + items.append(("l", items[-1][2], items[0][1])) + + for i in items: + if i[0] not in ("l", "re", "qu"): + continue # ignore anything else + + if i[0] == "l": # a line + p1, p2 = i[1:] + line_dict = make_line(p, p1, p2, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + elif i[0] == "re": + # A rectangle: decompose into 4 lines, but filter out + # the ones that simulate a line + rect = i[1].normalize() # normalize the rectangle + + if ( + rect.width <= min_length and rect.width < rect.height + ): # simulates a vertical line + x = abs(rect.x1 + rect.x0) / 2 # take middle value for x + p1 = pymupdf.Point(x, rect.y0) + p2 = pymupdf.Point(x, rect.y1) + line_dict = make_line(p, p1, p2, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + continue + + if ( + rect.height <= min_length and rect.height < rect.width + ): # simulates a horizontal line + y = abs(rect.y1 + rect.y0) / 2 # take middle value for y + p1 = pymupdf.Point(rect.x0, y) + p2 = pymupdf.Point(rect.x1, y) + line_dict = make_line(p, p1, p2, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + continue + + line_dict = make_line(p, rect.tl, rect.bl, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(p, rect.bl, rect.br, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(p, rect.br, rect.tr, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(p, rect.tr, rect.tl, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + else: # must be a quad + # we convert it into (up to) 4 lines + ul, ur, ll, lr = i[1] + + line_dict = make_line(p, ul, ll, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(p, ll, lr, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(p, lr, ur, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(p, ur, ul, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + path = {"color": (0, 0, 0), "fill": None, "width": 1} + for bbox in bboxes: # add the border lines for all enveloping bboxes + line_dict = make_line(path, bbox.tl, bbox.tr, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(path, bbox.bl, bbox.br, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(path, bbox.tl, bbox.bl, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + line_dict = make_line(path, bbox.tr, bbox.br, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + if add_lines is not None: # add user-specified lines + assert isinstance(add_lines, (tuple, list)) + else: + add_lines = [] + for p1, p2 in add_lines: + p1 = pymupdf.Point(p1) + p2 = pymupdf.Point(p2) + line_dict = make_line(path, p1, p2, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + if add_boxes is not None: # add user-specified rectangles + assert isinstance(add_boxes, (tuple, list)) + else: + add_boxes = [] + for box in add_boxes: + r = pymupdf.Rect(box) + line_dict = make_line(path, r.tl, r.bl, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + line_dict = make_line(path, r.bl, r.br, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + line_dict = make_line(path, r.br, r.tr, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + line_dict = make_line(path, r.tr, r.tl, clip) + if line_dict: + EDGES.append(line_to_edge(line_dict)) + + +def page_rotation_set0(page): + """Nullify page rotation. + + To correctly detect tables, page rotation must be zero. + This function performs the necessary adjustments and returns information + for reverting this changes. + """ + mediabox = page.mediabox + rot = page.rotation # contains normalized rotation value + # need to derotate the page's content + mb = page.mediabox # current mediabox + + if rot == 90: + # before derotation, shift content horizontally + mat0 = pymupdf.Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0) + elif rot == 270: + # before derotation, shift content vertically + mat0 = pymupdf.Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0) + else: + mat0 = pymupdf.Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0) + + # prefix with derotation matrix + mat = mat0 * page.derotation_matrix + cmd = b"%g %g %g %g %g %g cm " % tuple(mat) + xref = pymupdf.TOOLS._insert_contents(page, cmd, 0) + + # swap x- and y-coordinates + if rot in (90, 270): + x0, y0, x1, y1 = mb + mb.x0 = y0 + mb.y0 = x0 + mb.x1 = y1 + mb.y1 = x1 + page.set_mediabox(mb) + + page.set_rotation(0) + + # refresh the page to apply these changes + doc = page.parent + pno = page.number + page = doc[pno] + return page, xref, rot, mediabox + + +def page_rotation_reset(page, xref, rot, mediabox): + """Reset page rotation to original values. + + To be used before we return tables.""" + doc = page.parent # document of the page + doc.update_stream(xref, b" ") # remove de-rotation matrix + page.set_mediabox(mediabox) # set mediabox to old value + page.set_rotation(rot) # set rotation to old value + pno = page.number + page = doc[pno] # update page info + return page + + +def find_tables( + page, + clip=None, + vertical_strategy: str = "lines", + horizontal_strategy: str = "lines", + vertical_lines: list = None, + horizontal_lines: list = None, + snap_tolerance: float = DEFAULT_SNAP_TOLERANCE, + snap_x_tolerance: float = None, + snap_y_tolerance: float = None, + join_tolerance: float = DEFAULT_JOIN_TOLERANCE, + join_x_tolerance: float = None, + join_y_tolerance: float = None, + edge_min_length: float = 3, + min_words_vertical: float = DEFAULT_MIN_WORDS_VERTICAL, + min_words_horizontal: float = DEFAULT_MIN_WORDS_HORIZONTAL, + intersection_tolerance: float = 3, + intersection_x_tolerance: float = None, + intersection_y_tolerance: float = None, + text_tolerance=3, + text_x_tolerance=3, + text_y_tolerance=3, + strategy=None, # offer abbreviation + add_lines=None, # user-specified lines + add_boxes=None, # user-specified rectangles + paths=None, # accept vector graphics as parameter +): + pymupdf._warn_layout_once() + global CHARS, EDGES + CHARS = [] + EDGES = [] + old_small = bool(pymupdf.TOOLS.set_small_glyph_heights()) # save old value + pymupdf.TOOLS.set_small_glyph_heights(True) # we need minimum bboxes + if page.rotation != 0: + page, old_xref, old_rot, old_mediabox = page_rotation_set0(page) + else: + old_xref, old_rot, old_mediabox = None, None, None + + if snap_x_tolerance is None: + snap_x_tolerance = UNSET + if snap_y_tolerance is None: + snap_y_tolerance = UNSET + if join_x_tolerance is None: + join_x_tolerance = UNSET + if join_y_tolerance is None: + join_y_tolerance = UNSET + if intersection_x_tolerance is None: + intersection_x_tolerance = UNSET + if intersection_y_tolerance is None: + intersection_y_tolerance = UNSET + if strategy is not None: + vertical_strategy = strategy + horizontal_strategy = strategy + + settings = { + "vertical_strategy": vertical_strategy, + "horizontal_strategy": horizontal_strategy, + "explicit_vertical_lines": vertical_lines, + "explicit_horizontal_lines": horizontal_lines, + "snap_tolerance": snap_tolerance, + "snap_x_tolerance": snap_x_tolerance, + "snap_y_tolerance": snap_y_tolerance, + "join_tolerance": join_tolerance, + "join_x_tolerance": join_x_tolerance, + "join_y_tolerance": join_y_tolerance, + "edge_min_length": edge_min_length, + "min_words_vertical": min_words_vertical, + "min_words_horizontal": min_words_horizontal, + "intersection_tolerance": intersection_tolerance, + "intersection_x_tolerance": intersection_x_tolerance, + "intersection_y_tolerance": intersection_y_tolerance, + "text_tolerance": text_tolerance, + "text_x_tolerance": text_x_tolerance, + "text_y_tolerance": text_y_tolerance, + } + + old_quad_corrections = pymupdf.TOOLS.unset_quad_corrections() + try: + page.get_layout() + if page.layout_information: + pymupdf.TOOLS.unset_quad_corrections(True) + boxes = [ + pymupdf.Rect(b[:4]) for b in page.layout_information if b[-1] == "table" + ] + else: + boxes = [] + + if boxes: # layout did find some tables + pass + elif page.layout_information is not None: + # layout was executed but found no tables + # make sure we exit quickly with an empty TableFinder + tbf = TableFinder(page) + return tbf + + tset = TableSettings.resolve(settings=settings) + page.table_settings = tset + + make_chars(page, clip=clip) # create character list of page + make_edges( + page, + clip=clip, + tset=tset, + paths=paths, + add_lines=add_lines, + add_boxes=add_boxes, + ) # create lines and curves + + tbf = TableFinder(page, settings=tset) + + if boxes: + # only keep Finder tables that match a layout box + tbf.tables = [ + tab + for tab in tbf.tables + if any(_iou(tab.bbox, r) >= 0.6 for r in boxes) + ] + # build the complementary list of layout table boxes + my_boxes = [ + r for r in boxes if all(_iou(r, tab.bbox) < 0.6 for tab in tbf.tables) + ] + if my_boxes: + word_rects = [pymupdf.Rect(w[:4]) for w in TEXTPAGE.extractWORDS()] + tp2 = page.get_textpage(flags=TABLE_DETECTOR_FLAGS) + for rect in my_boxes: + cells = make_table_from_bbox(tp2, word_rects, rect) # pylint: disable=E0606 + tbf.tables.append(Table(page, cells)) + except Exception as e: + pymupdf.message("find_tables: exception occurred: %s" % str(e)) + return None + finally: + pymupdf.TOOLS.set_small_glyph_heights(old_small) + if old_xref is not None: + page = page_rotation_reset(page, old_xref, old_rot, old_mediabox) + pymupdf.TOOLS.unset_quad_corrections(old_quad_corrections) + + return tbf diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 000000000..4ea5668f3 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,1169 @@ +# ------------------------------------------------------------------------ +# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com +# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html +# +# Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a +# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is +# maintained and developed by Artifex Software, Inc. https://artifex.com. +# ------------------------------------------------------------------------ +import math +import typing +import weakref + +try: + from . import pymupdf +except Exception: + import pymupdf +try: + from . import mupdf +except Exception: + import mupdf + +_format_g = pymupdf.format_g + +g_exceptions_verbose = pymupdf.g_exceptions_verbose + +point_like = "point_like" +rect_like = "rect_like" +matrix_like = "matrix_like" +quad_like = "quad_like" + +# ByteString is gone from typing in 3.14. +# collections.abc.Buffer available from 3.12 only +try: + ByteString = typing.ByteString +except AttributeError: + # pylint: disable=unsupported-binary-operation + ByteString = bytes | bytearray | memoryview + +AnyType = typing.Any +OptInt = typing.Union[int, None] +OptFloat = typing.Optional[float] +OptStr = typing.Optional[str] +OptDict = typing.Optional[dict] +OptBytes = typing.Optional[ByteString] +OptSeq = typing.Optional[typing.Sequence] + +""" +This is a collection of functions to extend PyMupdf. +""" + + +def get_text_blocks( + page: pymupdf.Page, + clip: rect_like = None, + flags: OptInt = None, + textpage: pymupdf.TextPage = None, + sort: bool = False, +) -> list: + """Return the text blocks on a page. + + Notes: + Lines in a block are concatenated with line breaks. + Args: + flags: (int) control the amount of data parsed into the textpage. + Returns: + A list of the blocks. Each item contains the containing rectangle + coordinates, text lines, running block number and block type. + """ + pymupdf.CheckParent(page) + if flags is None: + flags = pymupdf.TEXTFLAGS_BLOCKS + tp = textpage + if tp is None: + tp = page.get_textpage(clip=clip, flags=flags) + elif getattr(tp, "parent") != page: + raise ValueError("not a textpage of this page") + + blocks = tp.extractBLOCKS() + if textpage is None: + del tp + if sort: + blocks.sort(key=lambda b: (b[3], b[0])) + return blocks + + +def get_text_words( + page: pymupdf.Page, + clip: rect_like = None, + flags: OptInt = None, + textpage: pymupdf.TextPage = None, + sort: bool = False, + delimiters=None, + tolerance=3, +) -> list: + """Return the text words as a list with the bbox for each word. + + Args: + page: pymupdf.Page + clip: (rect-like) area on page to consider + flags: (int) control the amount of data parsed into the textpage. + textpage: (pymupdf.TextPage) either passed-in or None. + sort: (bool) sort the words in reading sequence. + delimiters: (str,list) characters to use as word delimiters. + tolerance: (float) consider words to be part of the same line if + top or bottom coordinate are not larger than this. Relevant + only if sort=True. + + Returns: + Word tuples (x0, y0, x1, y1, "word", bno, lno, wno). + """ + + def sort_words(words): + """Sort words line-wise, forgiving small deviations.""" + words.sort(key=lambda w: (w[3], w[0])) + nwords = [] # final word list + line = [words[0]] # collects words roughly in same line + lrect = pymupdf.Rect(words[0][:4]) # start the line rectangle + for w in words[1:]: + wrect = pymupdf.Rect(w[:4]) + if ( + abs(wrect.y0 - lrect.y0) <= tolerance + or abs(wrect.y1 - lrect.y1) <= tolerance + ): + line.append(w) + lrect |= wrect + else: + line.sort(key=lambda w: w[0]) # sort words in line l-t-r + nwords.extend(line) # append to final words list + line = [w] # start next line + lrect = wrect # start next line rect + + line.sort(key=lambda w: w[0]) # sort words in line l-t-r + nwords.extend(line) # append to final words list + + return nwords + + pymupdf.CheckParent(page) + if flags is None: + flags = pymupdf.TEXTFLAGS_WORDS + tp = textpage + if tp is None: + tp = page.get_textpage(clip=clip, flags=flags) + elif getattr(tp, "parent") != page: + raise ValueError("not a textpage of this page") + + words = tp.extractWORDS(delimiters) + + # if textpage was given, we subselect the words in clip + if textpage is not None and clip is not None: + # sub-select words contained in clip + clip = pymupdf.Rect(clip) + words = [ + w for w in words if abs(clip & w[:4]) >= 0.5 * abs(pymupdf.Rect(w[:4])) + ] + + if textpage is None: + del tp + if words and sort: + # advanced sort if any words found + words = sort_words(words) + + return words + + +def get_sorted_text( + page: pymupdf.Page, + clip: rect_like = None, + flags: OptInt = None, + textpage: pymupdf.TextPage = None, + tolerance=3, +) -> str: + """Extract plain text avoiding unacceptable line breaks. + + Text contained in clip will be sorted in reading sequence. Some effort + is also spent to simulate layout vertically and horizontally. + + Args: + page: pymupdf.Page + clip: (rect-like) only consider text inside + flags: (int) text extraction flags + textpage: pymupdf.TextPage + tolerance: (float) consider words to be on the same line if their top + or bottom coordinates do not differ more than this. + + Notes: + If a TextPage is provided, all text is checked for being inside clip + with at least 50% of its bbox. + This allows to use some "global" TextPage in conjunction with sub- + selecting words in parts of the defined TextPage rectangle. + + Returns: + A text string in reading sequence. Left indentation of each line, + inter-line and inter-word distances strive to reflect the layout. + """ + + def line_text(clip, line): + """Create the string of one text line. + + We are trying to simulate some horizontal layout here, too. + + Args: + clip: (pymupdf.Rect) the area from which all text is being read. + line: (list) word tuples (rect, text) contained in the line + Returns: + Text in this line. Generated from words in 'line'. Distance from + predecessor is translated to multiple spaces, thus simulating + text indentations and large horizontal distances. + """ + line.sort(key=lambda w: w[0].x0) + ltext = "" # text in the line + x1 = clip.x0 # end coordinate of ltext + lrect = pymupdf.EMPTY_RECT() # bbox of this line + for r, t in line: + lrect |= r # update line bbox + # convert distance to previous word to multiple spaces + dist = max( + int(round((r.x0 - x1) / r.width * len(t))), + 0 if (x1 == clip.x0 or r.x0 <= x1) else 1, + ) # number of space characters + + ltext += " " * dist + t # append word string + x1 = r.x1 # update new end position + return ltext + + # Extract words in correct sequence first. + words = [ + (pymupdf.Rect(w[:4]), w[4]) + for w in get_text_words( + page, + clip=clip, + flags=flags, + textpage=textpage, + sort=True, + tolerance=tolerance, + ) + ] + + if not words: # no text present + return "" + totalbox = pymupdf.EMPTY_RECT() # area covering all text + for wr, text in words: + totalbox |= wr + + lines = [] # list of reconstituted lines + line = [words[0]] # current line + lrect = words[0][0] # the line's rectangle + + # walk through the words + for wr, text in words[1:]: # start with second word + w0r, _ = line[-1] # read previous word in current line + + # if this word matches top or bottom of the line, append it + if abs(lrect.y0 - wr.y0) <= tolerance or abs(lrect.y1 - wr.y1) <= tolerance: + line.append((wr, text)) + lrect |= wr + else: + # output current line and re-initialize + ltext = line_text(totalbox, line) + lines.append((lrect, ltext)) + line = [(wr, text)] + lrect = wr + + # also append unfinished last line + ltext = line_text(totalbox, line) + lines.append((lrect, ltext)) + + # sort all lines vertically + lines.sort(key=lambda l: (l[0].y1)) + + text = lines[0][1] # text of first line + y1 = lines[0][0].y1 # its bottom coordinate + for lrect, ltext in lines[1:]: + distance = min(int(round((lrect.y0 - y1) / lrect.height)), 5) + breaks = "\n" * (distance + 1) + text += breaks + ltext + y1 = lrect.y1 + + # return text in clip + return text + + +def get_textbox( + page: pymupdf.Page, + rect: rect_like, + textpage: pymupdf.TextPage = None, +) -> str: + tp = textpage + if tp is None: + tp = page.get_textpage() + elif getattr(tp, "parent") != page: + raise ValueError("not a textpage of this page") + rc = tp.extractTextbox(rect) + if textpage is None: + del tp + return rc + + +def get_text_selection( + page: pymupdf.Page, + p1: point_like, + p2: point_like, + clip: rect_like = None, + textpage: pymupdf.TextPage = None, +): + pymupdf.CheckParent(page) + tp = textpage + if tp is None: + tp = page.get_textpage(clip=clip, flags=pymupdf.TEXT_DEHYPHENATE) + elif getattr(tp, "parent") != page: + raise ValueError("not a textpage of this page") + rc = tp.extractSelection(p1, p2) + if textpage is None: + del tp + return rc + + +def get_textpage_ocr( + page: pymupdf.Page, + flags: int = 0, + language: str = "eng", + dpi: int = 72, + full: bool = False, + tessdata: str = None, +) -> pymupdf.TextPage: + """Create a Textpage from combined results of normal and OCR text parsing. + + Args: + flags: (int) control content becoming part of the result. + language: (str) specify expected language(s). Default is "eng" (English). + dpi: (int) resolution in dpi, default 72. + full: (bool) whether to OCR the full page image, or only its images (default) + """ + pymupdf.CheckParent(page) + tessdata = pymupdf.get_tessdata(tessdata) + + def full_ocr(page, dpi, language, flags): + zoom = dpi / 72 + mat = pymupdf.Matrix(zoom, zoom) + pix = page.get_pixmap(matrix=mat) + ocr_pdf = pymupdf.Document( + "pdf", + pix.pdfocr_tobytes( + compress=False, + language=language, + tessdata=tessdata, + ), + ) + ocr_page = ocr_pdf.load_page(0) + unzoom = page.rect.width / ocr_page.rect.width + ctm = pymupdf.Matrix(unzoom, unzoom) * page.derotation_matrix + tpage = ocr_page.get_textpage(flags=flags, matrix=ctm) + ocr_pdf.close() + pix = None + tpage.parent = weakref.proxy(page) + return tpage + + # if OCR for the full page, OCR its pixmap @ desired dpi + if full: + return full_ocr(page, dpi, language, flags) + + # For partial OCR, make a normal textpage, then extend it with text that + # is OCRed from each image. + # Because of this, we need the images flag bit set ON. + tpage = page.get_textpage(flags=flags) + for block in page.get_text("dict", flags=pymupdf.TEXT_PRESERVE_IMAGES)["blocks"]: + if block["type"] != 1: # only look at images + continue + bbox = pymupdf.Rect(block["bbox"]) + if bbox.width <= 3 or bbox.height <= 3: # ignore tiny stuff + continue + try: + pix = pymupdf.Pixmap(block["image"]) # get image pixmap + if pix.n - pix.alpha != 3: # we need to convert this to RGB! + pix = pymupdf.Pixmap(pymupdf.csRGB, pix) + if pix.alpha: # must remove alpha channel + pix = pymupdf.Pixmap(pix, 0) + imgdoc = pymupdf.Document( + "pdf", + pix.pdfocr_tobytes(language=language, tessdata=tessdata), + ) # pdf with OCRed page + imgpage = imgdoc.load_page(0) # read image as a page + pix = None + # compute matrix to transform coordinates back to that of 'page' + imgrect = imgpage.rect # page size of image PDF + shrink = pymupdf.Matrix(1 / imgrect.width, 1 / imgrect.height) + mat = shrink * block["transform"] + imgpage.extend_textpage(tpage, flags=0, matrix=mat) + imgdoc.close() + except (RuntimeError, mupdf.FzErrorBase): + if 0 and g_exceptions_verbose: + # Don't show exception info here because it can happen in + # normal operation (see test_3842b). + pymupdf.exception_info() + tpage = None + pymupdf.message("Falling back to full page OCR") + return full_ocr(page, dpi, language, flags) + + return tpage + + +def get_text( + page: pymupdf.Page, + option: str = "text", + *, + clip: rect_like = None, + flags: OptInt = None, + textpage: pymupdf.TextPage = None, + sort: bool = False, + delimiters=None, + tolerance=3, +): + """Extract text from a page or an annotation. + + This is a unifying wrapper for various methods of the pymupdf.TextPage class. + + Args: + option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml. + clip: (rect-like) restrict output to this area. + flags: bit switches to e.g. exclude images or decompose ligatures. + textpage: reuse this pymupdf.TextPage and make no new one. If specified, + 'flags' and 'clip' are ignored. + + Returns: + the output of methods get_text_words / get_text_blocks or pymupdf.TextPage + methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT, + extractXHTML or etractXML respectively. + Default and misspelling choice is "text". + """ + formats = { + "text": pymupdf.TEXTFLAGS_TEXT, + "html": pymupdf.TEXTFLAGS_HTML, + "json": pymupdf.TEXTFLAGS_DICT, + "rawjson": pymupdf.TEXTFLAGS_RAWDICT, + "xml": pymupdf.TEXTFLAGS_XML, + "xhtml": pymupdf.TEXTFLAGS_XHTML, + "dict": pymupdf.TEXTFLAGS_DICT, + "rawdict": pymupdf.TEXTFLAGS_RAWDICT, + "words": pymupdf.TEXTFLAGS_WORDS, + "blocks": pymupdf.TEXTFLAGS_BLOCKS, + } + option = option.lower() + assert option in formats + if option not in formats: + option = "text" + if flags is None: + flags = formats[option] + + if option == "words": + return get_text_words( + page, + clip=clip, + flags=flags, + textpage=textpage, + sort=sort, + delimiters=delimiters, + ) + if option == "blocks": + return get_text_blocks( + page, clip=clip, flags=flags, textpage=textpage, sort=sort + ) + + if option == "text" and sort: + return get_sorted_text( + page, + clip=clip, + flags=flags, + textpage=textpage, + tolerance=tolerance, + ) + + pymupdf.CheckParent(page) + cb = None + if option in ("html", "xml", "xhtml"): # no clipping for MuPDF functions + clip = page.cropbox + if clip is not None: + clip = pymupdf.Rect(clip) + cb = None + elif type(page) is pymupdf.Page: + cb = page.cropbox + # pymupdf.TextPage with or without images + tp = textpage + #pymupdf.exception_info() + if tp is None: + tp = page.get_textpage(clip=clip, flags=flags) + elif getattr(tp, "parent") != page: + raise ValueError("not a textpage of this page") + #pymupdf.log( '{option=}') + if option == "json": + t = tp.extractJSON(cb=cb, sort=sort) + elif option == "rawjson": + t = tp.extractRAWJSON(cb=cb, sort=sort) + elif option == "dict": + t = tp.extractDICT(cb=cb, sort=sort) + elif option == "rawdict": + t = tp.extractRAWDICT(cb=cb, sort=sort) + elif option == "html": + t = tp.extractHTML() + elif option == "xml": + t = tp.extractXML() + elif option == "xhtml": + t = tp.extractXHTML() + else: + t = tp.extractText(sort=sort) + + if textpage is None: + del tp + return t + + +def getLinkDict(ln, document=None) -> dict: + if isinstance(ln, pymupdf.Outline): + dest = ln.destination(document) + elif isinstance(ln, pymupdf.Link): + dest = ln.dest + else: + assert 0, f'Unexpected {type(ln)=}.' + nl = {"kind": dest.kind, "xref": 0} + try: + if hasattr(ln, 'rect'): + nl["from"] = ln.rect + except Exception: + # This seems to happen quite often in PyMuPDF/tests. + if g_exceptions_verbose >= 2: pymupdf.exception_info() + pass + pnt = pymupdf.Point(0, 0) + if dest.flags & pymupdf.LINK_FLAG_L_VALID: + pnt.x = dest.lt.x + if dest.flags & pymupdf.LINK_FLAG_T_VALID: + pnt.y = dest.lt.y + + if dest.kind == pymupdf.LINK_URI: + nl["uri"] = dest.uri + + elif dest.kind == pymupdf.LINK_GOTO: + nl["page"] = dest.page + nl["to"] = pnt + if dest.flags & pymupdf.LINK_FLAG_R_IS_ZOOM: + nl["zoom"] = dest.rb.x + else: + nl["zoom"] = 0.0 + + elif dest.kind == pymupdf.LINK_GOTOR: + nl["file"] = dest.file_spec.replace("\\", "/") + nl["page"] = dest.page + if dest.page < 0: + nl["to"] = dest.dest + else: + nl["to"] = pnt + if dest.flags & pymupdf.LINK_FLAG_R_IS_ZOOM: + nl["zoom"] = dest.rb.x + else: + nl["zoom"] = 0.0 + + elif dest.kind == pymupdf.LINK_LAUNCH: + nl["file"] = dest.file_spec.replace("\\", "/") + + elif dest.kind == pymupdf.LINK_NAMED: + # The dicts should not have same key(s). + assert not (dest.named.keys() & nl.keys()) + nl.update(dest.named) + if 'to' in nl: + nl['to'] = pymupdf.Point(nl['to']) + + else: + nl["page"] = dest.page + return nl + + +def getDestStr(xref: int, ddict: dict) -> str: + """Calculate the PDF action string. + + Notes: + Supports Link annotations and outline items (bookmarks). + """ + if not ddict: + return "" + str_goto = lambda a, b, c, d: f"/A<>" + str_gotor1 = lambda a, b, c, d, e, f: f"/A<>>>" + str_gotor2 = lambda a, b, c: f"/A<>>>" + str_launch = lambda a, b: f"/A<>>>" + str_uri = lambda a: f"/A<>" + + if type(ddict) in (int, float): + dest = str_goto(xref, 0, ddict, 0) + return dest + d_kind = ddict.get("kind", pymupdf.LINK_NONE) + + if d_kind == pymupdf.LINK_NONE: + return "" + + if ddict["kind"] == pymupdf.LINK_GOTO: + d_zoom = ddict.get("zoom", 0) + to = ddict.get("to", pymupdf.Point(0, 0)) + d_left, d_top = to + dest = str_goto(xref, d_left, d_top, d_zoom) + return dest + + if ddict["kind"] == pymupdf.LINK_URI: + dest = str_uri(pymupdf.get_pdf_str(ddict["uri"]),) + return dest + + if ddict["kind"] == pymupdf.LINK_LAUNCH: + fspec = pymupdf.get_pdf_str(ddict["file"]) + dest = str_launch(fspec, fspec) + return dest + + if ddict["kind"] == pymupdf.LINK_GOTOR and ddict["page"] < 0: + fspec = pymupdf.get_pdf_str(ddict["file"]) + dest = str_gotor2(pymupdf.get_pdf_str(ddict["to"]), fspec, fspec) + return dest + + if ddict["kind"] == pymupdf.LINK_GOTOR and ddict["page"] >= 0: + fspec = pymupdf.get_pdf_str(ddict["file"]) + dest = str_gotor1( + ddict["page"], + ddict["to"].x, + ddict["to"].y, + ddict["zoom"], + fspec, + fspec, + ) + return dest + + return "" + + +def getLinkText(page: pymupdf.Page, lnk: dict) -> str: + # -------------------------------------------------------------------------- + # define skeletons for /Annots object texts + # -------------------------------------------------------------------------- + ctm = page.transformation_matrix + ictm = ~ctm + r = lnk["from"] + rect = _format_g(tuple(r * ictm)) + + annot = "" + if lnk["kind"] == pymupdf.LINK_GOTO: + if lnk["page"] >= 0: + txt = pymupdf.annot_skel["goto1"] # annot_goto + pno = lnk["page"] + xref = page.parent.page_xref(pno) + pnt = lnk.get("to", pymupdf.Point(0, 0)) # destination point + dest_page = page.parent[pno] + dest_ctm = dest_page.transformation_matrix + dest_ictm = ~dest_ctm + ipnt = pnt * dest_ictm + annot = txt(xref, ipnt.x, ipnt.y, lnk.get("zoom", 0), rect) + else: + txt = pymupdf.annot_skel["goto2"] # annot_goto_n + annot = txt(pymupdf.get_pdf_str(lnk["to"]), rect) + + elif lnk["kind"] == pymupdf.LINK_GOTOR: + if lnk["page"] >= 0: + txt = pymupdf.annot_skel["gotor1"] # annot_gotor + pnt = lnk.get("to", pymupdf.Point(0, 0)) # destination point + if type(pnt) is not pymupdf.Point: + pnt = pymupdf.Point(0, 0) + annot = txt( + lnk["page"], + pnt.x, + pnt.y, + lnk.get("zoom", 0), + lnk["file"], + lnk["file"], + rect, + ) + else: + txt = pymupdf.annot_skel["gotor2"] # annot_gotor_n + annot = txt(pymupdf.get_pdf_str(lnk["to"]), lnk["file"], rect) + + elif lnk["kind"] == pymupdf.LINK_LAUNCH: + txt = pymupdf.annot_skel["launch"] # annot_launch + annot = txt(lnk["file"], lnk["file"], rect) + + elif lnk["kind"] == pymupdf.LINK_URI: + txt = pymupdf.annot_skel["uri"] # txt = annot_uri + annot = txt(lnk["uri"], rect) + + elif lnk["kind"] == pymupdf.LINK_NAMED: + txt = pymupdf.annot_skel["named"] # annot_named + lname = lnk.get("name") # check presence of key + if lname is None: # if missing, fall back to alternative + lname = lnk["nameddest"] + annot = txt(lname, rect) + if not annot: + return annot + + # add a /NM PDF key to the object definition + link_names = dict( # existing ids and their xref + [(x[0], x[2]) for x in page.annot_xrefs() if x[1] == pymupdf.PDF_ANNOT_LINK] # pylint: disable=no-member + ) + + old_name = lnk.get("id", "") # id value in the argument + + if old_name and (lnk["xref"], old_name) in link_names.items(): + name = old_name # no new name if this is an update only + else: + i = 0 + stem = pymupdf.TOOLS.set_annot_stem() + "-L%i" + while True: + name = stem % i + if name not in link_names.values(): + break + i += 1 + # add /NM key to object definition + annot = annot.replace("/Link", "/Link/NM(%s)" % name) + return annot + + +# ---------------------------------------------------------------------- +# Name: wx.lib.colourdb.py +# Purpose: Adds a bunch of colour names and RGB values to the +# colour database so they can be found by name +# +# Author: Robin Dunn +# +# Created: 13-March-2001 +# Copyright: (c) 2001-2017 by Total Control Software +# Licence: wxWindows license +# Tags: phoenix-port, unittest, documented +# ---------------------------------------------------------------------- + + +def getColorList() -> list: + """ + Returns a list of upper-case colour names. + :rtype: list of strings + """ + return [name for name, r, g, b in pymupdf.colors_wx_list()] + + +def getColorInfoList() -> list: + """ + Returns list of (name, red, gree, blue) tuples, where: + name: upper-case color name. + read, green, blue: integers in range 0..255. + :rtype: list of tuples + """ + return pymupdf.colors_wx_list() + + +def getColor(name: str) -> tuple: + """Retrieve RGB color in PDF format by name. + + Returns: + a triple of floats in range 0 to 1. In case of name-not-found, "white" is returned. + """ + return pymupdf.colors_pdf_dict().get(name.lower(), (1, 1, 1)) + + +def getColorHSV(name: str) -> tuple: + """Retrieve the hue, saturation, value triple of a color name. + + Returns: + a triple (degree, percent, percent). If not found (-1, -1, -1) is returned. + """ + try: + x = getColorInfoList()[getColorList().index(name.upper())] + except Exception: + if g_exceptions_verbose: pymupdf.exception_info() + return (-1, -1, -1) + + r = x[1] / 255.0 + g = x[2] / 255.0 + b = x[3] / 255.0 + cmax = max(r, g, b) + V = round(cmax * 100, 1) + cmin = min(r, g, b) + delta = cmax - cmin + if delta == 0: + hue = 0 + elif cmax == r: + hue = 60.0 * (((g - b) / delta) % 6) + elif cmax == g: + hue = 60.0 * (((b - r) / delta) + 2) + else: + hue = 60.0 * (((r - g) / delta) + 4) + + H = int(round(hue)) + + if cmax == 0: + sat = 0 + else: + sat = delta / cmax + S = int(round(sat * 100)) + + return (H, S, V) + + +def _get_font_properties(doc: pymupdf.Document, xref: int) -> tuple: + fontname, ext, stype, buffer = doc.extract_font(xref) + asc = 0.8 + dsc = -0.2 + if ext == "": + return fontname, ext, stype, asc, dsc + + if buffer: + try: + font = pymupdf.Font(fontbuffer=buffer) + asc = font.ascender + dsc = font.descender + bbox = font.bbox + if asc - dsc < 1: + if bbox.y0 < dsc: + dsc = bbox.y0 + asc = 1 - dsc + except Exception: + pymupdf.exception_info() + asc *= 1.2 + dsc *= 1.2 + return fontname, ext, stype, asc, dsc + if ext != "n/a": + try: + font = pymupdf.Font(fontname) + asc = font.ascender + dsc = font.descender + except Exception: + pymupdf.exception_info() + asc *= 1.2 + dsc *= 1.2 + else: + asc *= 1.2 + dsc *= 1.2 + return fontname, ext, stype, asc, dsc + + +def _show_fz_text( text): + #if mupdf_cppyy: + # assert isinstance( text, cppyy.gbl.mupdf.Text) + #else: + # assert isinstance( text, mupdf.Text) + num_spans = 0 + num_chars = 0 + span = text.m_internal.head + while 1: + if not span: + break + num_spans += 1 + num_chars += span.len + span = span.next + return f'num_spans={num_spans} num_chars={num_chars}' + + +""" +Handle page labels for PDF documents. + +Reading +------- +* compute the label of a page +* find page number(s) having the given label. + +Writing +------- +Supports setting (defining) page labels for PDF documents. + +A big Thank You goes to WILLIAM CHAPMAN who contributed the idea and +significant parts of the following code during late December 2020 +through early January 2021. +""" + + +def rule_dict(item): + """Make a Python dict from a PDF page label rule. + + Args: + item -- a tuple (pno, rule) with the start page number and the rule + string like <>. + Returns: + A dict like + {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}. + """ + # Jorj McKie, 2021-01-06 + + pno, rule = item + rule = rule[2:-2].split("/")[1:] # strip "<<" and ">>" + d = {"startpage": pno, "prefix": "", "firstpagenum": 1} + skip = False + for i, item in enumerate(rule): # pylint: disable=redefined-argument-from-local + if skip: # this item has already been processed + skip = False # deactivate skipping again + continue + if item == "S": # style specification + d["style"] = rule[i + 1] # next item has the style + skip = True # do not process next item again + continue + if item.startswith("P"): # prefix specification: extract the string + x = item[1:].replace("(", "").replace(")", "") + d["prefix"] = x + continue + if item.startswith("St"): # start page number specification + x = int(item[2:]) + d["firstpagenum"] = x + return d + + +def get_label_pno(pgNo, labels): + """Return the label for this page number. + + Args: + pgNo: page number, 0-based. + labels: result of doc._get_page_labels(). + Returns: + The label (str) of the page number. Errors return an empty string. + """ + # Jorj McKie, 2021-01-06 + + item = [x for x in labels if x[0] <= pgNo][-1] + rule = rule_dict(item) + prefix = rule.get("prefix", "") + style = rule.get("style", "") + # make sure we start at 0 when enumerating the alphabet + delta = -1 if style in ("a", "A") else 0 + pagenumber = pgNo - rule["startpage"] + rule["firstpagenum"] + delta + return construct_label(style, prefix, pagenumber) + + +def construct_label(style, prefix, pno) -> str: + """Construct a label based on style, prefix and page number.""" + # William Chapman, 2021-01-06 + + n_str = "" + if style == "D": + n_str = str(pno) + elif style == "r": + n_str = integerToRoman(pno).lower() + elif style == "R": + n_str = integerToRoman(pno).upper() + elif style == "a": + n_str = integerToLetter(pno).lower() + elif style == "A": + n_str = integerToLetter(pno).upper() + result = prefix + n_str + return result + + +def integerToLetter(i) -> str: + """Returns letter sequence string for integer i.""" + # William Chapman, Jorj McKie, 2021-01-06 + import string + ls = string.ascii_uppercase + n, a = 1, i + while pow(26, n) <= a: + a -= int(math.pow(26, n)) + n += 1 + + str_t = "" + for j in reversed(range(n)): + f, g = divmod(a, int(math.pow(26, j))) + str_t += ls[f] + a = g + return str_t + + +def integerToRoman(num: int) -> str: + """Return roman numeral for an integer.""" + # William Chapman, Jorj McKie, 2021-01-06 + + roman = ( + (1000, "M"), + (900, "CM"), + (500, "D"), + (400, "CD"), + (100, "C"), + (90, "XC"), + (50, "L"), + (40, "XL"), + (10, "X"), + (9, "IX"), + (5, "V"), + (4, "IV"), + (1, "I"), + ) + + def roman_num(num): + for r, ltr in roman: + x, _ = divmod(num, r) + yield ltr * x + num -= r * x + if num <= 0: + break + + return "".join([a for a in roman_num(num)]) + + +# ------------------------------------------------------------------- +# Functions to recover the quad contained in a text extraction bbox +# ------------------------------------------------------------------- +def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> pymupdf.Quad: + """Compute the quad located inside the bbox. + + The bbox may be any of the resp. tuples occurring inside the given span. + + Args: + line_dir: (tuple) 'line["dir"]' of the owning line or None. + span: (dict) the span. May be from get_texttrace() method. + bbox: (tuple) the bbox of the span or any of its characters. + Returns: + The quad which is wrapped by the bbox. + """ + if line_dir is None: + line_dir = span["dir"] + cos, sin = line_dir + bbox = pymupdf.Rect(bbox) # make it a rect + if pymupdf.TOOLS.set_small_glyph_heights(): # ==> just fontsize as height + d = 1 + else: + d = span["ascender"] - span["descender"] + + height = d * span["size"] # the quad's rectangle height + # The following are distances from the bbox corners, at which we find the + # respective quad points. The computation depends on in which quadrant the + # text writing angle is located. + hs = height * sin + hc = height * cos + if hc >= 0 and hs <= 0: # quadrant 1 + ul = bbox.bl - (0, hc) + ur = bbox.tr + (hs, 0) + ll = bbox.bl - (hs, 0) + lr = bbox.tr + (0, hc) + elif hc <= 0 and hs <= 0: # quadrant 2 + ul = bbox.br + (hs, 0) + ur = bbox.tl - (0, hc) + ll = bbox.br + (0, hc) + lr = bbox.tl - (hs, 0) + elif hc <= 0 and hs >= 0: # quadrant 3 + ul = bbox.tr - (0, hc) + ur = bbox.bl + (hs, 0) + ll = bbox.tr - (hs, 0) + lr = bbox.bl + (0, hc) + else: # quadrant 4 + ul = bbox.tl + (hs, 0) + ur = bbox.br - (0, hc) + ll = bbox.tl + (0, hc) + lr = bbox.br - (hs, 0) + return pymupdf.Quad(ul, ur, ll, lr) + + +def recover_quad(line_dir: tuple, span: dict) -> pymupdf.Quad: + """Recover the quadrilateral of a text span. + + Args: + line_dir: (tuple) 'line["dir"]' of the owning line. + span: the span. + Returns: + The quadrilateral enveloping the span's text. + """ + if type(line_dir) is not tuple or len(line_dir) != 2: + raise ValueError("bad line dir argument") + if type(span) is not dict: + raise ValueError("bad span argument") + return recover_bbox_quad(line_dir, span, span["bbox"]) + + +def recover_line_quad(line: dict, spans: list = None) -> pymupdf.Quad: + """Calculate the line quad for 'dict' / 'rawdict' text extractions. + + The lower quad points are those of the first, resp. last span quad. + The upper points are determined by the maximum span quad height. + From this, compute a rect with bottom-left in (0, 0), convert this to a + quad and rotate and shift back to cover the text of the spans. + + Args: + spans: (list, optional) sub-list of spans to consider. + Returns: + pymupdf.Quad covering selected spans. + """ + if spans is None: # no sub-selection + spans = line["spans"] # all spans + if len(spans) == 0: + raise ValueError("bad span list") + line_dir = line["dir"] # text direction + cos, sin = line_dir + q0 = recover_quad(line_dir, spans[0]) # quad of first span + if len(spans) > 1: # get quad of last span + q1 = recover_quad(line_dir, spans[-1]) + else: + q1 = q0 # last = first + + line_ll = q0.ll # lower-left of line quad + line_lr = q1.lr # lower-right of line quad + + mat0 = pymupdf.planish_line(line_ll, line_lr) + + # map base line to x-axis such that line_ll goes to (0, 0) + x_lr = line_lr * mat0 + + small = pymupdf.TOOLS.set_small_glyph_heights() # small glyph heights? + + h = max( + [s["size"] * (1 if small else (s["ascender"] - s["descender"])) for s in spans] + ) + + line_rect = pymupdf.Rect(0, -h, x_lr.x, 0) # line rectangle + line_quad = line_rect.quad # make it a quad and: + line_quad *= ~mat0 + return line_quad + + +def recover_span_quad(line_dir: tuple, span: dict, chars: list = None) -> pymupdf.Quad: + """Calculate the span quad for 'dict' / 'rawdict' text extractions. + + Notes: + There are two execution paths: + 1. For the full span quad, the result of 'recover_quad' is returned. + 2. For the quad of a sub-list of characters, the char quads are + computed and joined. This is only supported for the "rawdict" + extraction option. + + Args: + line_dir: (tuple) 'line["dir"]' of the owning line. + span: (dict) the span. + chars: (list, optional) sub-list of characters to consider. + Returns: + pymupdf.Quad covering selected characters. + """ + if line_dir is None: # must be a span from get_texttrace() + line_dir = span["dir"] + if chars is None: # no sub-selection + return recover_quad(line_dir, span) + if "chars" not in span.keys(): + raise ValueError("need 'rawdict' option to sub-select chars") + + q0 = recover_char_quad(line_dir, span, chars[0]) # quad of first char + if len(chars) > 1: # get quad of last char + q1 = recover_char_quad(line_dir, span, chars[-1]) + else: + q1 = q0 # last = first + + span_ll = q0.ll # lower-left of span quad + span_lr = q1.lr # lower-right of span quad + mat0 = pymupdf.planish_line(span_ll, span_lr) + # map base line to x-axis such that span_ll goes to (0, 0) + x_lr = span_lr * mat0 + + small = pymupdf.TOOLS.set_small_glyph_heights() # small glyph heights? + h = span["size"] * (1 if small else (span["ascender"] - span["descender"])) + + span_rect = pymupdf.Rect(0, -h, x_lr.x, 0) # line rectangle + span_quad = span_rect.quad # make it a quad and: + span_quad *= ~mat0 # rotate back and shift back + return span_quad + + +def recover_char_quad(line_dir: tuple, span: dict, char: dict) -> pymupdf.Quad: + """Recover the quadrilateral of a text character. + + This requires the "rawdict" option of text extraction. + + Args: + line_dir: (tuple) 'line["dir"]' of the span's line. + span: (dict) the span dict. + char: (dict) the character dict. + Returns: + The quadrilateral enveloping the character. + """ + if line_dir is None: + line_dir = span["dir"] + if type(line_dir) is not tuple or len(line_dir) != 2: + raise ValueError("bad line dir argument") + if type(span) is not dict: + raise ValueError("bad span argument") + if type(char) is dict: + bbox = pymupdf.Rect(char["bbox"]) + elif type(char) is tuple: + bbox = pymupdf.Rect(char[3]) + else: + raise ValueError("bad span argument") + + return recover_bbox_quad(line_dir, span, bbox) diff --git a/src_classic/__init__.py b/src_classic/__init__.py new file mode 100644 index 000000000..488a248c2 --- /dev/null +++ b/src_classic/__init__.py @@ -0,0 +1,506 @@ +# ------------------------------------------------------------------------ +# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com +# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html +# +# Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a +# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is +# maintained and developed by Artifex Software, Inc. https://artifex.com. +# ------------------------------------------------------------------------ +import sys + +import glob +import os +if os.path.exists( 'fitz/__init__.py'): + if not glob.glob( 'fitz/_fitz*'): + print( '#' * 40) + print( '# Warning: current directory appears to contain an incomplete') + print( '# fitz/ installation directory so "import fitz" may fail.') + print( '# This can happen if current directory is a PyMuPDF source tree.') + print( '# Suggest changing to a different current directory.') + print( '#' * 40) + +def message(text=''): + print(text) + +from fitz_old.fitz_old import * + +# Allow this to work: +# import fitz_old as fitz +# fitz.fitz.TEXT_ALIGN_CENTER +# +fitz = fitz_old + +# define the supported colorspaces for convenience +fitz_old.csRGB = fitz_old.Colorspace(fitz_old.CS_RGB) +fitz_old.csGRAY = fitz_old.Colorspace(fitz_old.CS_GRAY) +fitz_old.csCMYK = fitz_old.Colorspace(fitz_old.CS_CMYK) +csRGB = fitz_old.csRGB +csGRAY = fitz_old.csGRAY +csCMYK = fitz_old.csCMYK + +# create the TOOLS object. +# +# Unfortunately it seems that this is never be destructed even if we use an +# atexit() handler, which makes MuPDF's Memento list it as a leak. In fitz_old.i +# we use Memento_startLeaking()/Memento_stopLeaking() when allocating +# the Tools instance so at least the leak is marked as known. +# +TOOLS = fitz_old.Tools() +TOOLS.thisown = True +fitz_old.TOOLS = TOOLS + +# This atexit handler runs, but doesn't cause ~Tools() to be run. +# +import atexit + + +def cleanup_tools(TOOLS): + # print(f'cleanup_tools: TOOLS={TOOLS} id(TOOLS)={id(TOOLS)}') + # print(f'TOOLS.thisown={TOOLS.thisown}') + del TOOLS + del fitz_old.TOOLS + + +atexit.register(cleanup_tools, TOOLS) + + +# Require that MuPDF matches fitz_old.TOOLS.mupdf_version(); also allow use with +# next minor version (e.g. 1.21.2 => 1.22), so we can test with mupdf master. +# +def v_str_to_tuple(s): + return tuple(map(int, s.split('.'))) + +def v_tuple_to_string(t): + return '.'.join(map(str, t)) + +mupdf_version_tuple = v_str_to_tuple(fitz_old.TOOLS.mupdf_version()) +mupdf_version_tuple_required = v_str_to_tuple(fitz_old.VersionFitz) +mupdf_version_tuple_required_prev = (mupdf_version_tuple_required[0], mupdf_version_tuple_required[1]-1) +mupdf_version_tuple_required_next = (mupdf_version_tuple_required[0], mupdf_version_tuple_required[1]+1) + +# copy functions in 'utils' to their respective fitz classes +import fitz_old.utils +from .table import find_tables + +# ------------------------------------------------------------------------------ +# General +# ------------------------------------------------------------------------------ +fitz_old.recover_quad = fitz_old.utils.recover_quad +fitz_old.recover_bbox_quad = fitz_old.utils.recover_bbox_quad +fitz_old.recover_line_quad = fitz_old.utils.recover_line_quad +fitz_old.recover_span_quad = fitz_old.utils.recover_span_quad +fitz_old.recover_char_quad = fitz_old.utils.recover_char_quad + +# ------------------------------------------------------------------------------ +# Document +# ------------------------------------------------------------------------------ +fitz_old.open = fitz_old.Document +fitz_old.Document._do_links = fitz_old.utils.do_links +fitz_old.Document.del_toc_item = fitz_old.utils.del_toc_item +fitz_old.Document.get_char_widths = fitz_old.utils.get_char_widths +fitz_old.Document.get_ocmd = fitz_old.utils.get_ocmd +fitz_old.Document.get_page_labels = fitz_old.utils.get_page_labels +fitz_old.Document.get_page_numbers = fitz_old.utils.get_page_numbers +fitz_old.Document.get_page_pixmap = fitz_old.utils.get_page_pixmap +fitz_old.Document.get_page_text = fitz_old.utils.get_page_text +fitz_old.Document.get_toc = fitz_old.utils.get_toc +fitz_old.Document.has_annots = fitz_old.utils.has_annots +fitz_old.Document.has_links = fitz_old.utils.has_links +fitz_old.Document.insert_page = fitz_old.utils.insert_page +fitz_old.Document.new_page = fitz_old.utils.new_page +fitz_old.Document.scrub = fitz_old.utils.scrub +fitz_old.Document.search_page_for = fitz_old.utils.search_page_for +fitz_old.Document.set_metadata = fitz_old.utils.set_metadata +fitz_old.Document.set_ocmd = fitz_old.utils.set_ocmd +fitz_old.Document.set_page_labels = fitz_old.utils.set_page_labels +fitz_old.Document.set_toc = fitz_old.utils.set_toc +fitz_old.Document.set_toc_item = fitz_old.utils.set_toc_item +fitz_old.Document.tobytes = fitz_old.Document.write +fitz_old.Document.subset_fonts = fitz_old.utils.subset_fonts +fitz_old.Document.get_oc = fitz_old.utils.get_oc +fitz_old.Document.set_oc = fitz_old.utils.set_oc +fitz_old.Document.xref_copy = fitz_old.utils.xref_copy + + +# ------------------------------------------------------------------------------ +# Page +# ------------------------------------------------------------------------------ +fitz_old.Page.apply_redactions = fitz_old.utils.apply_redactions +fitz_old.Page.delete_widget = fitz_old.utils.delete_widget +fitz_old.Page.draw_bezier = fitz_old.utils.draw_bezier +fitz_old.Page.draw_circle = fitz_old.utils.draw_circle +fitz_old.Page.draw_curve = fitz_old.utils.draw_curve +fitz_old.Page.draw_line = fitz_old.utils.draw_line +fitz_old.Page.draw_oval = fitz_old.utils.draw_oval +fitz_old.Page.draw_polyline = fitz_old.utils.draw_polyline +fitz_old.Page.draw_quad = fitz_old.utils.draw_quad +fitz_old.Page.draw_rect = fitz_old.utils.draw_rect +fitz_old.Page.draw_sector = fitz_old.utils.draw_sector +fitz_old.Page.draw_squiggle = fitz_old.utils.draw_squiggle +fitz_old.Page.draw_zigzag = fitz_old.utils.draw_zigzag +fitz_old.Page.get_links = fitz_old.utils.get_links +fitz_old.Page.get_pixmap = fitz_old.utils.get_pixmap +fitz_old.Page.get_text = fitz_old.utils.get_text +fitz_old.Page.get_image_info = fitz_old.utils.get_image_info +fitz_old.Page.get_text_blocks = fitz_old.utils.get_text_blocks +fitz_old.Page.get_text_selection = fitz_old.utils.get_text_selection +fitz_old.Page.get_text_words = fitz_old.utils.get_text_words +fitz_old.Page.get_textbox = fitz_old.utils.get_textbox +fitz_old.Page.insert_image = fitz_old.utils.insert_image +fitz_old.Page.insert_link = fitz_old.utils.insert_link +fitz_old.Page.insert_text = fitz_old.utils.insert_text +fitz_old.Page.insert_textbox = fitz_old.utils.insert_textbox +fitz_old.Page.new_shape = lambda x: fitz_old.utils.Shape(x) +fitz_old.Page.search_for = fitz_old.utils.search_for +fitz_old.Page.show_pdf_page = fitz_old.utils.show_pdf_page +fitz_old.Page.update_link = fitz_old.utils.update_link +fitz_old.Page.write_text = fitz_old.utils.write_text +fitz_old.Page.get_label = fitz_old.utils.get_label +fitz_old.Page.get_image_rects = fitz_old.utils.get_image_rects +fitz_old.Page.get_textpage_ocr = fitz_old.utils.get_textpage_ocr +fitz_old.Page.delete_image = fitz_old.utils.delete_image +fitz_old.Page.replace_image = fitz_old.utils.replace_image +fitz_old.Page.find_tables = find_tables +# ------------------------------------------------------------------------ +# Annot +# ------------------------------------------------------------------------ +fitz_old.Annot.get_text = fitz_old.utils.get_text +fitz_old.Annot.get_textbox = fitz_old.utils.get_textbox + +# ------------------------------------------------------------------------ +# Rect and IRect +# ------------------------------------------------------------------------ +fitz_old.Rect.get_area = fitz_old.utils.get_area +fitz_old.IRect.get_area = fitz_old.utils.get_area + +# ------------------------------------------------------------------------ +# TextWriter +# ------------------------------------------------------------------------ +fitz_old.TextWriter.fill_textbox = fitz_old.utils.fill_textbox + + +class FitzDeprecation(DeprecationWarning): + pass + + +def restore_aliases(): + import warnings + + warnings.filterwarnings( + "once", + category=FitzDeprecation, + ) + + def showthis(msg, cat, filename, lineno, file=None, line=None): + text = warnings.formatwarning(msg, cat, filename, lineno, line=line) + s = text.find("FitzDeprecation") + if s < 0: + print(text, file=sys.stderr) + return + text = text[s:].splitlines()[0][4:] + print(text, file=sys.stderr) + + warnings.showwarning = showthis + + def _alias(fitz_class, old, new): + fname = getattr(fitz_class, new) + r = str(fitz_class)[1:-1] + objname = " ".join(r.split()[:2]) + objname = objname.replace("fitz_old.fitz_old.", "") + objname = objname.replace("fitz_old.utils.", "") + if callable(fname): + + def deprecated_function(*args, **kw): + msg = "'%s' removed from %s after v1.19 - use '%s'." % ( + old, + objname, + new, + ) + if not VersionBind.startswith("1.18"): + warnings.warn(msg, category=FitzDeprecation) + return fname(*args, **kw) + + setattr(fitz_class, old, deprecated_function) + else: + if type(fname) is property: + setattr(fitz_class, old, property(fname.fget)) + else: + setattr(fitz_class, old, fname) + + eigen = getattr(fitz_class, old) + x = fname.__doc__ + if not x: + x = "" + try: + if callable(fname) or type(fname) is property: + eigen.__doc__ = ( + "*** Deprecated and removed after v1.19 - use '%s'. ***\n" % new + x + ) + except: + pass + + # deprecated Document aliases + _alias(fitz_old.Document, "chapterCount", "chapter_count") + _alias(fitz_old.Document, "chapterPageCount", "chapter_page_count") + _alias(fitz_old.Document, "convertToPDF", "convert_to_pdf") + _alias(fitz_old.Document, "copyPage", "copy_page") + _alias(fitz_old.Document, "deletePage", "delete_page") + _alias(fitz_old.Document, "deletePageRange", "delete_pages") + _alias(fitz_old.Document, "embeddedFileAdd", "embfile_add") + _alias(fitz_old.Document, "embeddedFileCount", "embfile_count") + _alias(fitz_old.Document, "embeddedFileDel", "embfile_del") + _alias(fitz_old.Document, "embeddedFileGet", "embfile_get") + _alias(fitz_old.Document, "embeddedFileInfo", "embfile_info") + _alias(fitz_old.Document, "embeddedFileNames", "embfile_names") + _alias(fitz_old.Document, "embeddedFileUpd", "embfile_upd") + _alias(fitz_old.Document, "extractFont", "extract_font") + _alias(fitz_old.Document, "extractImage", "extract_image") + _alias(fitz_old.Document, "findBookmark", "find_bookmark") + _alias(fitz_old.Document, "fullcopyPage", "fullcopy_page") + _alias(fitz_old.Document, "getCharWidths", "get_char_widths") + _alias(fitz_old.Document, "getOCGs", "get_ocgs") + _alias(fitz_old.Document, "getPageFontList", "get_page_fonts") + _alias(fitz_old.Document, "getPageImageList", "get_page_images") + _alias(fitz_old.Document, "getPagePixmap", "get_page_pixmap") + _alias(fitz_old.Document, "getPageText", "get_page_text") + _alias(fitz_old.Document, "getPageXObjectList", "get_page_xobjects") + _alias(fitz_old.Document, "getSigFlags", "get_sigflags") + _alias(fitz_old.Document, "getToC", "get_toc") + _alias(fitz_old.Document, "getXmlMetadata", "get_xml_metadata") + _alias(fitz_old.Document, "insertPage", "insert_page") + _alias(fitz_old.Document, "insertPDF", "insert_pdf") + _alias(fitz_old.Document, "isDirty", "is_dirty") + _alias(fitz_old.Document, "isFormPDF", "is_form_pdf") + _alias(fitz_old.Document, "isPDF", "is_pdf") + _alias(fitz_old.Document, "isReflowable", "is_reflowable") + _alias(fitz_old.Document, "isRepaired", "is_repaired") + _alias(fitz_old.Document, "isStream", "xref_is_stream") + _alias(fitz_old.Document, "is_stream", "xref_is_stream") + _alias(fitz_old.Document, "lastLocation", "last_location") + _alias(fitz_old.Document, "loadPage", "load_page") + _alias(fitz_old.Document, "makeBookmark", "make_bookmark") + _alias(fitz_old.Document, "metadataXML", "xref_xml_metadata") + _alias(fitz_old.Document, "movePage", "move_page") + _alias(fitz_old.Document, "needsPass", "needs_pass") + _alias(fitz_old.Document, "newPage", "new_page") + _alias(fitz_old.Document, "nextLocation", "next_location") + _alias(fitz_old.Document, "pageCount", "page_count") + _alias(fitz_old.Document, "pageCropBox", "page_cropbox") + _alias(fitz_old.Document, "pageXref", "page_xref") + _alias(fitz_old.Document, "PDFCatalog", "pdf_catalog") + _alias(fitz_old.Document, "PDFTrailer", "pdf_trailer") + _alias(fitz_old.Document, "previousLocation", "prev_location") + _alias(fitz_old.Document, "resolveLink", "resolve_link") + _alias(fitz_old.Document, "searchPageFor", "search_page_for") + _alias(fitz_old.Document, "setLanguage", "set_language") + _alias(fitz_old.Document, "setMetadata", "set_metadata") + _alias(fitz_old.Document, "setToC", "set_toc") + _alias(fitz_old.Document, "setXmlMetadata", "set_xml_metadata") + _alias(fitz_old.Document, "updateObject", "update_object") + _alias(fitz_old.Document, "updateStream", "update_stream") + _alias(fitz_old.Document, "xrefLength", "xref_length") + _alias(fitz_old.Document, "xrefObject", "xref_object") + _alias(fitz_old.Document, "xrefStream", "xref_stream") + _alias(fitz_old.Document, "xrefStreamRaw", "xref_stream_raw") + + # deprecated Page aliases + _alias(fitz_old.Page, "_isWrapped", "is_wrapped") + _alias(fitz_old.Page, "addCaretAnnot", "add_caret_annot") + _alias(fitz_old.Page, "addCircleAnnot", "add_circle_annot") + _alias(fitz_old.Page, "addFileAnnot", "add_file_annot") + _alias(fitz_old.Page, "addFreetextAnnot", "add_freetext_annot") + _alias(fitz_old.Page, "addHighlightAnnot", "add_highlight_annot") + _alias(fitz_old.Page, "addInkAnnot", "add_ink_annot") + _alias(fitz_old.Page, "addLineAnnot", "add_line_annot") + _alias(fitz_old.Page, "addPolygonAnnot", "add_polygon_annot") + _alias(fitz_old.Page, "addPolylineAnnot", "add_polyline_annot") + _alias(fitz_old.Page, "addRectAnnot", "add_rect_annot") + _alias(fitz_old.Page, "addRedactAnnot", "add_redact_annot") + _alias(fitz_old.Page, "addSquigglyAnnot", "add_squiggly_annot") + _alias(fitz_old.Page, "addStampAnnot", "add_stamp_annot") + _alias(fitz_old.Page, "addStrikeoutAnnot", "add_strikeout_annot") + _alias(fitz_old.Page, "addTextAnnot", "add_text_annot") + _alias(fitz_old.Page, "addUnderlineAnnot", "add_underline_annot") + _alias(fitz_old.Page, "addWidget", "add_widget") + _alias(fitz_old.Page, "cleanContents", "clean_contents") + _alias(fitz_old.Page, "CropBox", "cropbox") + _alias(fitz_old.Page, "CropBoxPosition", "cropbox_position") + _alias(fitz_old.Page, "deleteAnnot", "delete_annot") + _alias(fitz_old.Page, "deleteLink", "delete_link") + _alias(fitz_old.Page, "deleteWidget", "delete_widget") + _alias(fitz_old.Page, "derotationMatrix", "derotation_matrix") + _alias(fitz_old.Page, "drawBezier", "draw_bezier") + _alias(fitz_old.Page, "drawCircle", "draw_circle") + _alias(fitz_old.Page, "drawCurve", "draw_curve") + _alias(fitz_old.Page, "drawLine", "draw_line") + _alias(fitz_old.Page, "drawOval", "draw_oval") + _alias(fitz_old.Page, "drawPolyline", "draw_polyline") + _alias(fitz_old.Page, "drawQuad", "draw_quad") + _alias(fitz_old.Page, "drawRect", "draw_rect") + _alias(fitz_old.Page, "drawSector", "draw_sector") + _alias(fitz_old.Page, "drawSquiggle", "draw_squiggle") + _alias(fitz_old.Page, "drawZigzag", "draw_zigzag") + _alias(fitz_old.Page, "firstAnnot", "first_annot") + _alias(fitz_old.Page, "firstLink", "first_link") + _alias(fitz_old.Page, "firstWidget", "first_widget") + _alias(fitz_old.Page, "getContents", "get_contents") + _alias(fitz_old.Page, "getDisplayList", "get_displaylist") + _alias(fitz_old.Page, "getDrawings", "get_drawings") + _alias(fitz_old.Page, "getFontList", "get_fonts") + _alias(fitz_old.Page, "getImageBbox", "get_image_bbox") + _alias(fitz_old.Page, "getImageList", "get_images") + _alias(fitz_old.Page, "getLinks", "get_links") + _alias(fitz_old.Page, "getPixmap", "get_pixmap") + _alias(fitz_old.Page, "getSVGimage", "get_svg_image") + _alias(fitz_old.Page, "getText", "get_text") + _alias(fitz_old.Page, "getTextBlocks", "get_text_blocks") + _alias(fitz_old.Page, "getTextbox", "get_textbox") + _alias(fitz_old.Page, "getTextPage", "get_textpage") + _alias(fitz_old.Page, "getTextWords", "get_text_words") + _alias(fitz_old.Page, "insertFont", "insert_font") + _alias(fitz_old.Page, "insertImage", "insert_image") + _alias(fitz_old.Page, "insertLink", "insert_link") + _alias(fitz_old.Page, "insertText", "insert_text") + _alias(fitz_old.Page, "insertTextbox", "insert_textbox") + _alias(fitz_old.Page, "loadAnnot", "load_annot") + _alias(fitz_old.Page, "loadLinks", "load_links") + _alias(fitz_old.Page, "MediaBox", "mediabox") + _alias(fitz_old.Page, "MediaBoxSize", "mediabox_size") + _alias(fitz_old.Page, "newShape", "new_shape") + _alias(fitz_old.Page, "readContents", "read_contents") + _alias(fitz_old.Page, "rotationMatrix", "rotation_matrix") + _alias(fitz_old.Page, "searchFor", "search_for") + _alias(fitz_old.Page, "setCropBox", "set_cropbox") + _alias(fitz_old.Page, "setMediaBox", "set_mediabox") + _alias(fitz_old.Page, "setRotation", "set_rotation") + _alias(fitz_old.Page, "showPDFpage", "show_pdf_page") + _alias(fitz_old.Page, "transformationMatrix", "transformation_matrix") + _alias(fitz_old.Page, "updateLink", "update_link") + _alias(fitz_old.Page, "wrapContents", "wrap_contents") + _alias(fitz_old.Page, "writeText", "write_text") + + # deprecated Shape aliases + _alias(fitz_old.utils.Shape, "drawBezier", "draw_bezier") + _alias(fitz_old.utils.Shape, "drawCircle", "draw_circle") + _alias(fitz_old.utils.Shape, "drawCurve", "draw_curve") + _alias(fitz_old.utils.Shape, "drawLine", "draw_line") + _alias(fitz_old.utils.Shape, "drawOval", "draw_oval") + _alias(fitz_old.utils.Shape, "drawPolyline", "draw_polyline") + _alias(fitz_old.utils.Shape, "drawQuad", "draw_quad") + _alias(fitz_old.utils.Shape, "drawRect", "draw_rect") + _alias(fitz_old.utils.Shape, "drawSector", "draw_sector") + _alias(fitz_old.utils.Shape, "drawSquiggle", "draw_squiggle") + _alias(fitz_old.utils.Shape, "drawZigzag", "draw_zigzag") + _alias(fitz_old.utils.Shape, "insertText", "insert_text") + _alias(fitz_old.utils.Shape, "insertTextbox", "insert_textbox") + + # deprecated Annot aliases + _alias(fitz_old.Annot, "getText", "get_text") + _alias(fitz_old.Annot, "getTextbox", "get_textbox") + _alias(fitz_old.Annot, "fileGet", "get_file") + _alias(fitz_old.Annot, "fileUpd", "update_file") + _alias(fitz_old.Annot, "getPixmap", "get_pixmap") + _alias(fitz_old.Annot, "getTextPage", "get_textpage") + _alias(fitz_old.Annot, "lineEnds", "line_ends") + _alias(fitz_old.Annot, "setBlendMode", "set_blendmode") + _alias(fitz_old.Annot, "setBorder", "set_border") + _alias(fitz_old.Annot, "setColors", "set_colors") + _alias(fitz_old.Annot, "setFlags", "set_flags") + _alias(fitz_old.Annot, "setInfo", "set_info") + _alias(fitz_old.Annot, "setLineEnds", "set_line_ends") + _alias(fitz_old.Annot, "setName", "set_name") + _alias(fitz_old.Annot, "setOpacity", "set_opacity") + _alias(fitz_old.Annot, "setRect", "set_rect") + _alias(fitz_old.Annot, "setOC", "set_oc") + _alias(fitz_old.Annot, "soundGet", "get_sound") + + # deprecated TextWriter aliases + _alias(fitz_old.TextWriter, "writeText", "write_text") + _alias(fitz_old.TextWriter, "fillTextbox", "fill_textbox") + + # deprecated DisplayList aliases + _alias(fitz_old.DisplayList, "getPixmap", "get_pixmap") + _alias(fitz_old.DisplayList, "getTextPage", "get_textpage") + + # deprecated Pixmap aliases + _alias(fitz_old.Pixmap, "setAlpha", "set_alpha") + _alias(fitz_old.Pixmap, "gammaWith", "gamma_with") + _alias(fitz_old.Pixmap, "tintWith", "tint_with") + _alias(fitz_old.Pixmap, "clearWith", "clear_with") + _alias(fitz_old.Pixmap, "copyPixmap", "copy") + _alias(fitz_old.Pixmap, "getImageData", "tobytes") + _alias(fitz_old.Pixmap, "getPNGData", "tobytes") + _alias(fitz_old.Pixmap, "getPNGdata", "tobytes") + _alias(fitz_old.Pixmap, "writeImage", "save") + _alias(fitz_old.Pixmap, "writePNG", "save") + _alias(fitz_old.Pixmap, "pillowWrite", "pil_save") + _alias(fitz_old.Pixmap, "pillowData", "pil_tobytes") + _alias(fitz_old.Pixmap, "invertIRect", "invert_irect") + _alias(fitz_old.Pixmap, "setPixel", "set_pixel") + _alias(fitz_old.Pixmap, "setOrigin", "set_origin") + _alias(fitz_old.Pixmap, "setRect", "set_rect") + _alias(fitz_old.Pixmap, "setResolution", "set_dpi") + + # deprecated geometry aliases + _alias(fitz_old.Rect, "getArea", "get_area") + _alias(fitz_old.IRect, "getArea", "get_area") + _alias(fitz_old.Rect, "getRectArea", "get_area") + _alias(fitz_old.IRect, "getRectArea", "get_area") + _alias(fitz_old.Rect, "includePoint", "include_point") + _alias(fitz_old.IRect, "includePoint", "include_point") + _alias(fitz_old.Rect, "includeRect", "include_rect") + _alias(fitz_old.IRect, "includeRect", "include_rect") + _alias(fitz_old.Rect, "isInfinite", "is_infinite") + _alias(fitz_old.IRect, "isInfinite", "is_infinite") + _alias(fitz_old.Rect, "isEmpty", "is_empty") + _alias(fitz_old.IRect, "isEmpty", "is_empty") + _alias(fitz_old.Quad, "isEmpty", "is_empty") + _alias(fitz_old.Quad, "isRectangular", "is_rectangular") + _alias(fitz_old.Quad, "isConvex", "is_convex") + _alias(fitz_old.Matrix, "isRectilinear", "is_rectilinear") + _alias(fitz_old.Matrix, "preRotate", "prerotate") + _alias(fitz_old.Matrix, "preScale", "prescale") + _alias(fitz_old.Matrix, "preShear", "preshear") + _alias(fitz_old.Matrix, "preTranslate", "pretranslate") + + # deprecated other aliases + _alias(fitz_old.Outline, "isExternal", "is_external") + _alias(fitz_old.Outline, "isOpen", "is_open") + _alias(fitz_old.Link, "isExternal", "is_external") + _alias(fitz_old.Link, "setBorder", "set_border") + _alias(fitz_old.Link, "setColors", "set_colors") + _alias(fitz, "getPDFstr", "get_pdf_str") + _alias(fitz, "getPDFnow", "get_pdf_now") + _alias(fitz, "PaperSize", "paper_size") + _alias(fitz, "PaperRect", "paper_rect") + _alias(fitz, "paperSizes", "paper_sizes") + _alias(fitz, "ImageProperties", "image_profile") + _alias(fitz, "planishLine", "planish_line") + _alias(fitz, "getTextLength", "get_text_length") + _alias(fitz, "getTextlength", "get_text_length") + + +fitz_old.__doc__ = """ +PyMuPDF %s: Python bindings for the MuPDF %s library. +Version date: %s. +Built for Python %i.%i on %s (%i-bit). +""" % ( + fitz_old.VersionBind, + fitz_old.VersionFitz, + fitz_old.VersionDate, + sys.version_info[0], + sys.version_info[1], + sys.platform, + 64 if sys.maxsize > 2**32 else 32, +) + +if VersionBind.startswith("1.19"): # don't generate aliases after v1.19.* + restore_aliases() + +pdfcolor = dict( + [ + (k, (r / 255, g / 255, b / 255)) + for k, (r, g, b) in fitz_old.utils.getColorInfoDict().items() + ] +) +__version__ = fitz_old.VersionBind diff --git a/fitz/__main__.py b/src_classic/__main__.py similarity index 100% rename from fitz/__main__.py rename to src_classic/__main__.py diff --git a/fitz/_config.h b/src_classic/_config.h similarity index 97% rename from fitz/_config.h rename to src_classic/_config.h index 089c30694..2589b6cb0 100644 --- a/fitz/_config.h +++ b/src_classic/_config.h @@ -17,8 +17,8 @@ // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact -// Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, -// CA 94945, U.S.A., +1(415)492-9861, for further information. +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. #ifndef FZ_CONFIG_H @@ -100,7 +100,7 @@ /* To skip CJK Extension A, enable: (this implicitly enables * TOFU_CJK_LANG) */ -#define TOFU_CJK_EXT +#define TOFU_CJK_EXT 1 /* To skip CJK language specific fonts, enable: */ /* #define TOFU_CJK_LANG */ diff --git a/fitz/fitz.i b/src_classic/fitz_old.i similarity index 99% rename from fitz/fitz.i rename to src_classic/fitz_old.i index c2e9d0c89..8abc89558 100644 --- a/fitz/fitz.i +++ b/src_classic/fitz_old.i @@ -113,8 +113,8 @@ EnsureOwnership(self)%} #define JM_BinFromChar(x) PyBytes_FromString(x) #define JM_BinFromCharSize(x, y) PyBytes_FromStringAndSize(x, (Py_ssize_t) y) -#include -#include +#include +#include #include // freetype includes >> -------------------------------------------------- #include @@ -153,14 +153,16 @@ static void show(const char* prefix, PyObject* obj); // additional headers ---------------------------------------------- +#if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR == 23 && FZ_VERSION_PATCH < 8 pdf_obj *pdf_lookup_page_loc(fz_context *ctx, pdf_document *doc, int needle, pdf_obj **parentp, int *indexp); fz_pixmap *fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip); int fz_pixmap_size(fz_context *ctx, fz_pixmap *src); void fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor); void fz_copy_pixmap_rect(fz_context *ctx, fz_pixmap *dest, fz_pixmap *src, fz_irect b, const fz_default_colorspaces *default_cs); +void fz_write_pixmap_as_jpeg(fz_context *ctx, fz_output *out, fz_pixmap *pix, int jpg_quality); +#endif static const float JM_font_ascender(fz_context *ctx, fz_font *font); static const float JM_font_descender(fz_context *ctx, fz_font *font); -void fz_write_pixmap_as_jpeg(fz_context *ctx, fz_output *out, fz_pixmap *pix, int jpg_quality); // end of additional headers -------------------------------------------- static PyObject *JM_mupdf_warnings_store; @@ -177,8 +179,13 @@ static PyObject *JM_Exc_CurrentException; #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 /* Stop Memento backtraces if we reach the Python interpreter. `cfunction_call()` isn't the only way that Python calls C though, so we - might need extra calls to Memento_addBacktraceLimitFnname(). */ - Memento_addBacktraceLimitFnname("cfunction_call"); + might need extra calls to Memento_addBacktraceLimitFnname(). + + We put this inside `#ifdef MEMENTO` because memento.h's disabling macro + causes "warning: statement with no effect" from cc. */ + #ifdef MEMENTO + Memento_addBacktraceLimitFnname("cfunction_call"); + #endif #endif /* @@ -304,18 +311,30 @@ import re import tarfile import zipfile import pathlib +import string + +# PDF names must not contain these characters: +INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0)) TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX") point_like = "point_like" rect_like = "rect_like" matrix_like = "matrix_like" quad_like = "quad_like" + +# ByteString is gone from typing in 3.14. +# collections.abc.Buffer available from 3.12 only +try: + ByteString = typing.ByteString +except AttributeError: + ByteString = bytes | bytearray | memoryview + AnyType = typing.Any OptInt = typing.Union[int, None] OptFloat = typing.Optional[float] OptStr = typing.Optional[str] OptDict = typing.Optional[dict] -OptBytes = typing.Optional[typing.ByteString] +OptBytes = typing.Optional[ByteString] OptSeq = typing.Optional[typing.Sequence] try: @@ -506,12 +525,31 @@ struct Document if (!handler) { RAISEPY(gctx, MSG_BAD_FILETYPE, PyExc_ValueError); } + #if FZ_VERSION_MINOR >= 24 + if (handler->open) + { + fz_stream* filename_stream = fz_open_file(gctx, filename); + fz_try(gctx) + { + doc = handler->open(gctx, filename_stream, NULL, NULL); + } + fz_always(gctx) + { + fz_drop_stream(gctx, filename_stream); + } + fz_catch(gctx) + { + fz_rethrow(gctx); + } + } + #else if (handler->open) { doc = handler->open(gctx, filename); } else if (handler->open_with_stream) { data = fz_open_file(gctx, filename); doc = handler->open_with_stream(gctx, data); } + #endif } } else { pdf_document *pdf = pdf_create_document(gctx); @@ -806,7 +844,15 @@ struct Document FITZEXCEPTION(xref_set_key, !result) - CLOSECHECK0(xref_set_key, """Set the value of a PDF dictionary key.""") + %pythonprepend xref_set_key %{ + """Set the value of a PDF dictionary key.""" + if self.is_closed or self.is_encrypted: + raise ValueError("document closed or encrypted") + if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}): + raise ValueError("bad 'key'") + if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set(): + raise ValueError("bad 'value'") + %} PyObject * xref_set_key(int xref, const char *key, char *value) { @@ -1207,6 +1253,7 @@ struct Document return Py_BuildValue("i", xref); } + %pythoncode %{ def embfile_names(self) -> list: """Get list of names of EmbeddedFiles.""" @@ -1303,7 +1350,7 @@ struct Document self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) return xref - def embfile_add(self, name: str, buffer: typing.ByteString, + def embfile_add(self, name: str, buffer: ByteString, filename: OptStr =None, ufilename: OptStr =None, desc: OptStr =None,) -> None: @@ -1880,7 +1927,7 @@ struct Document #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR <= 21 /* The underlying struct members that these methods give access to, are - not in mupdf-1.22. */ + not available. */ CLOSECHECK0(has_xref_streams, """Check if xref table is a stream.""") %pythoncode%{@property%} PyObject *has_xref_streams() @@ -2258,7 +2305,8 @@ if not self.is_pdf: if not hasattr(pyliste, "__getitem__"): raise ValueError("sequence required") if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not in range(len(self)): - raise ValueError("bad page number(s)")%} + raise ValueError("bad page number(s)") +pyliste = tuple(pyliste)%} %pythonappend select %{self._reset_page_refs()%} PyObject *select(PyObject *pyliste) { @@ -2267,17 +2315,23 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not // (2) transform Python list into integer array pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); + int *pages = NULL; fz_try(gctx) { // call retainpages (code copy of fz_clean_file.c) - globals glo = {0}; - glo.ctx = gctx; - glo.doc = pdf; - retainpages(gctx, &glo, pyliste); + int i, len = (int) PyTuple_Size(pyliste); + pages = fz_realloc_array(gctx, pages, len, int); + for (i = 0; i < len; i++) { + pages[i] = (int) PyLong_AsLong(PyTuple_GET_ITEM(pyliste, (Py_ssize_t) i)); + } + pdf_rearrange_pages(gctx, pdf, len, pages); if (pdf->rev_page_map) { pdf_drop_page_tree(gctx, pdf); } } + fz_always(gctx) { + fz_free(gctx, pages); + } fz_catch(gctx) { return NULL; } @@ -4025,7 +4079,7 @@ if rbgroups: if not type(x) in (list, tuple): raise ValueError("bad RBGroup '%s'" % x) s = set(x).difference(ocgs) - if f != set(): + if s != set(): raise ValueError("bad OCGs in RBGroup: %s" % s) if basestate: @@ -4115,7 +4169,7 @@ if basestate: } PyObject *item = Py_BuildValue("{s:i,s:N,s:i,s:s,s:N,s:N}", "number", i, - "text", JM_EscapeStrFromStr(info.text), + "text", JM_UnicodeFromStr(info.text), "depth", info.depth, "type", type, "on", JM_BOOL(info.selected), @@ -4336,6 +4390,8 @@ if basestate: } return Py_BuildValue("i", xref); } + + struct Annot; void internal_keep_annot(struct Annot* annot) { @@ -4618,6 +4674,7 @@ if basestate: old_annots[k] = v page._erase() # remove the page page = None + TOOLS.store_shrink(100) page = self.load_page(pno) # reload the page # copy annot refs over to the new dictionary @@ -4780,6 +4837,7 @@ if basestate: def __getitem__(self, i: int =0)->"Page": + assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)) if i not in self: raise IndexError("page not in document") return self.load_page(i) @@ -5914,7 +5972,7 @@ struct Page { def add_file_annot(self, point: point_like, - buffer: typing.ByteString, + buffer: ByteString, filename: str, ufilename: OptStr =None, desc: OptStr =None, @@ -6302,7 +6360,7 @@ def get_oc_items(self) -> list: cmd = item[0] rest = item[1:] if cmd == "re": - item = ("re", Rect(rest[0]), rest[1]) + item = ("re", Rect(rest[0]).normalize(), rest[1]) elif cmd == "qu": item = ("qu", Quad(rest[0])) else: @@ -6437,7 +6495,7 @@ def get_oc_items(self) -> list: cmd = item[0] rest = item[1:] if cmd == "re": - item = ("re", Rect(rest[0]), rest[1]) + item = ("re", Rect(rest[0]).normalize(), rest[1]) elif cmd == "qu": item = ("qu", Quad(rest[0])) else: @@ -6584,7 +6642,7 @@ def get_oc_items(self) -> list: { pdf_page *page = pdf_page_from_fz_page(gctx, (fz_page *) $self); int success = 0; - pdf_redact_options opts; + pdf_redact_options opts = {0}; opts.black_boxes = 0; // no black boxes opts.image_method = images; // how to treat images fz_try(gctx) { @@ -6909,20 +6967,24 @@ def get_oc_items(self) -> list: doc = self.parent if doc == None: raise ValueError("orphaned object: parent is None") + if not doc.is_pdf: raise ValueError("is no PDF") + valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox") + if boxtype not in valid_boxes: raise ValueError("bad boxtype") + + rect = Rect(rect) mb = self.mediabox rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) - rect = Rect(JM_TUPLE3(rect)) - if rect.is_infinite or rect.is_empty: - raise ValueError("rect is infinite or empty") - if rect not in mb: - raise ValueError("rect not in mediabox") + if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1): + raise ValueError(f"{boxtype} not in MediaBox") + doc.xref_set_key(self.xref, boxtype, "[%g %g %g %g]" % tuple(rect)) + def set_cropbox(self, rect): """Set the CropBox. Will also change Page.rect.""" return self._set_pagebox("CropBox", rect) @@ -7364,6 +7426,9 @@ def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None, if fontname.startswith("/"): fontname = fontname[1:] + inv_chars = INVALID_NAME_CHARS.intersection(fontname) + if inv_chars != set(): + raise ValueError(f"bad fontname chars {inv_chars}") font = CheckFont(self, fontname) if font is not None: # font already in font list of page @@ -8129,7 +8194,7 @@ Args: alphavalues: (bytes) with length (width * height) or 'None'. premultiply: (bool, True) premultiply colors with alpha values. opaque: (tuple, length colorspace.n) this color receives opacity 0. - matte: (tuple, length colorspace.n)) preblending background color. + matte: (tuple, length colorspace.n) preblending background color. """) PyObject *set_alpha(PyObject *alphavalues=NULL, int premultiply=1, PyObject *opaque=NULL, PyObject *matte=NULL) { @@ -8252,7 +8317,11 @@ Args: break; #if FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 22 case(7): // JPEG format + #if FZ_VERSION_MINOR < 24 fz_write_pixmap_as_jpeg(gctx, out, pm, jpg_quality); + #else + fz_write_pixmap_as_jpeg(gctx, out, pm, jpg_quality, 0 /*invert_cmyk*/); + #endif break; #endif default: @@ -8440,17 +8509,20 @@ def save(self, filename, output=None, jpg_quality=95): self.set_dpi(self.xres, self.yres) return self._writeIMG(filename, idx, jpg_quality) -def pil_save(self, *args, **kwargs): +def pil_save(self, *args, unmultiply=False, **kwargs): """Write to image file using Pillow. Args are passed to Pillow's Image.save method, see their documentation. Use instead of save when other output formats are desired. + + :arg bool unmultiply: generates Pillow mode "RGBa" instead of "RGBA". + Relevant for colorspace RGB with alpha only. """ EnsureOwnership(self) try: from PIL import Image except ImportError: - print("PIL/Pillow not installed") + print("Pillow not installed") raise cspace = self.colorspace @@ -8460,6 +8532,8 @@ def pil_save(self, *args, **kwargs): mode = "L" if self.alpha == 0 else "LA" elif cspace.n == 3: mode = "RGB" if self.alpha == 0 else "RGBA" + if mode == "RGBA" and unmultiply: + mode = "RGBa" else: mode = "CMYK" @@ -8470,7 +8544,7 @@ def pil_save(self, *args, **kwargs): img.save(*args, **kwargs) -def pil_tobytes(self, *args, **kwargs): +def pil_tobytes(self, *args, unmultiply=False, **kwargs): """Convert to binary image stream using pillow. Args are passed to Pillow's Image.save method, see their documentation. @@ -8479,7 +8553,7 @@ def pil_tobytes(self, *args, **kwargs): EnsureOwnership(self) from io import BytesIO bytes_out = BytesIO() - self.pil_save(bytes_out, *args, **kwargs) + self.pil_save(bytes_out, *args, unmultiply=unmultiply, **kwargs) return bytes_out.getvalue() %} @@ -11539,8 +11613,16 @@ struct TextPage { } unsigned char digest[16]; fz_image *img = block->u.i.image; + Py_ssize_t img_size = 0; + fz_compressed_buffer *cbuff = fz_compressed_image_buffer(gctx, img); + if (cbuff) { + img_size = (Py_ssize_t) cbuff->buffer->len; + } if (hashes) { pix = fz_get_pixmap_from_image(gctx, img, NULL, NULL, NULL, NULL); + if (img_size == 0) { + img_size = (Py_ssize_t) pix->w * pix->h * pix->n; + } fz_md5_pixmap(gctx, pix, digest); fz_drop_pixmap(gctx, pix); pix = NULL; @@ -11569,7 +11651,7 @@ struct TextPage { DICT_SETITEM_DROP(block_dict, dictkey_bpc, Py_BuildValue("i", (int) img->bpc)); DICT_SETITEM_DROP(block_dict, dictkey_size, - Py_BuildValue("n", (Py_ssize_t) fz_image_size(gctx, img))); + Py_BuildValue("n", img_size)); if (hashes) { DICT_SETITEMSTR_DROP(block_dict, "digest", PyBytes_FromStringAndSize(digest, 16)); @@ -11676,7 +11758,7 @@ struct TextPage { %pythonprepend extractWORDS %{"""Return a list with text word information."""%} PyObject * - extractWORDS() + extractWORDS(PyObject *delimiters=NULL) { fz_stext_block *block; fz_stext_line *line; @@ -11688,7 +11770,7 @@ struct TextPage { fz_rect wbbox = fz_empty_rect; // word bbox fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_rect tp_rect = this_tpage->mediabox; - + int word_delimiter = 0; PyObject *lines = NULL; fz_try(gctx) { buff = fz_new_buffer(gctx, 64); @@ -11710,10 +11792,10 @@ struct TextPage { !fz_is_infinite_rect(tp_rect)) { continue; } - if (ch->c == 32 && buflen == 0) - continue; // skip spaces at line start - if (ch->c == 32) { - if (!fz_is_empty_rect(wbbox)) { + word_delimiter = JM_is_word_delimiter(ch->c, delimiters); + if (word_delimiter) { + if (buflen == 0) continue; // skip spaces at line start + if (!fz_is_empty_rect(wbbox)) { // output word word_n = JM_append_word(gctx, lines, buff, &wbbox, block_n, line_n, word_n); } @@ -11799,10 +11881,10 @@ struct TextPage { fz_print_stext_page_as_xhtml(gctx, out, this_tpage, 0); break; default: - JM_print_stext_page_as_text(gctx, out, this_tpage); + JM_print_stext_page_as_text(gctx, res, this_tpage); break; } - text = JM_UnicodeFromBuffer(gctx, res); + text = JM_EscapeStrFromBuffer(gctx, res); } fz_always(gctx) { @@ -11817,28 +11899,20 @@ struct TextPage { //---------------------------------------------------------------- - // method extractRect() + // method extractTextbox() //---------------------------------------------------------------- + FITZEXCEPTION(extractTextbox, !result) PyObject *extractTextbox(PyObject *rect) { fz_stext_page *this_tpage = (fz_stext_page *) $self; fz_rect area = JM_rect_from_py(rect); PyObject *rc = NULL; - char *found = NULL; fz_try(gctx) { - char *found = JM_copy_rectangle(gctx, this_tpage, area); - if (found) { - rc = JM_UnicodeFromStr(found); - JM_Free(found); - } else { - rc = EMPTY_STRING; - } + rc = JM_copy_rectangle(gctx, this_tpage, area); } fz_catch(gctx) { - if (found) JM_Free(found); - return EMPTY_STRING; + return NULL; } - return rc; } diff --git a/fitz/helper-annot.i b/src_classic/helper-annot.i similarity index 100% rename from fitz/helper-annot.i rename to src_classic/helper-annot.i diff --git a/fitz/helper-convert.i b/src_classic/helper-convert.i similarity index 100% rename from fitz/helper-convert.i rename to src_classic/helper-convert.i diff --git a/fitz/helper-defines.i b/src_classic/helper-defines.i similarity index 99% rename from fitz/helper-defines.i rename to src_classic/helper-defines.i index 6bf14051d..9fe199ad4 100644 --- a/fitz/helper-defines.i +++ b/src_classic/helper-defines.i @@ -429,6 +429,7 @@ PyObject *util_hor_matrix(PyObject *C, PyObject *P) struct Annot; // Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO +struct Annot; PyObject *util_ensure_widget_calc(struct Annot *annot) { pdf_obj *PDFNAME_CO=NULL; diff --git a/fitz/helper-devices.i b/src_classic/helper-devices.i similarity index 95% rename from fitz/helper-devices.i rename to src_classic/helper-devices.i index 1a1b8613b..fb194f23e 100644 --- a/fitz/helper-devices.i +++ b/src_classic/helper-devices.i @@ -26,10 +26,12 @@ static fz_matrix trace_device_ptm; // page transformation matrix static fz_matrix trace_device_ctm; // trace device matrix static fz_matrix trace_device_rot; static fz_point dev_lastpoint = {0, 0}; +static fz_point dev_firstpoint = {0, 0}; +static int dev_havemove = 0; static fz_rect dev_pathrect; static float dev_pathfactor = 0; static int dev_linecount = 0; -static const char *layer_name=NULL; // optional content name +static char *layer_name=NULL; // optional content name static int path_type = 0; // one of the following values: #define FILL_PATH 1 #define STROKE_PATH 2 @@ -47,6 +49,8 @@ static void trace_device_reset() trace_device_rot = fz_identity; dev_lastpoint.x = 0; dev_lastpoint.y = 0; + dev_firstpoint.x = 0; + dev_firstpoint.y = 0; dev_pathrect.x0 = 0; dev_pathrect.y0 = 0; dev_pathrect.x1 = 0; @@ -218,6 +222,8 @@ trace_moveto(fz_context *ctx, void *dev_, float x, float y) dev_pathrect = fz_make_rect(dev_lastpoint.x, dev_lastpoint.y, dev_lastpoint.x, dev_lastpoint.y); } + dev_firstpoint = dev_lastpoint; + dev_havemove = 1; dev_linecount = 0; // reset # of consec. lines } @@ -272,8 +278,22 @@ trace_close(fz_context *ctx, void *dev_) return; } } - DICT_SETITEMSTR_DROP(dev_pathdict, "closePath", JM_BOOL(1)); dev_linecount = 0; // reset # of consec. lines + if (dev_havemove) { + if (dev_firstpoint.x != dev_lastpoint.x || dev_firstpoint.y != dev_lastpoint.y) { + PyObject *list = PyTuple_New(3); + PyTuple_SET_ITEM(list, 0, PyUnicode_FromString("l")); + PyTuple_SET_ITEM(list, 1, JM_py_from_point(dev_lastpoint)); + PyTuple_SET_ITEM(list, 2, JM_py_from_point(dev_firstpoint)); + dev_lastpoint = dev_firstpoint; + PyObject *items = PyDict_GetItem(dev_pathdict, dictkey_items); + LIST_APPEND_DROP(items, list); + } + dev_havemove = 0; + DICT_SETITEMSTR_DROP(dev_pathdict, "closePath", JM_BOOL(0)); + } else { + DICT_SETITEMSTR_DROP(dev_pathdict, "closePath", JM_BOOL(1)); + } } static const fz_path_walker trace_path_walker = @@ -307,7 +327,7 @@ jm_lineart_path(fz_context *ctx, jm_lineart_device *dev, const fz_path *path) DICT_SETITEM_DROP(dev_pathdict, dictkey_items, PyList_New(0)); fz_walk_path(ctx, path, &trace_path_walker, dev); // Check if any items were added ... - if (!PyList_Size(PyDict_GetItem(dev_pathdict, dictkey_items))) { + if (!PyDict_GetItem(dev_pathdict, dictkey_items) || !PyList_Size(PyDict_GetItem(dev_pathdict, dictkey_items))) { Py_CLEAR(dev_pathdict); } } @@ -396,7 +416,7 @@ jm_lineart_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path, DICT_SETITEMSTR_DROP(dev_pathdict, "fill", jm_lineart_color(ctx, colorspace, color)); DICT_SETITEM_DROP(dev_pathdict, dictkey_rect, JM_py_from_rect(dev_pathrect)); DICT_SETITEMSTR_DROP(dev_pathdict, "seqno", PyLong_FromSize_t(dev->seqno)); - DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_EscapeStrFromStr(layer_name)); + DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_UnicodeFromStr(layer_name)); if (dev->clips) { DICT_SETITEMSTR_DROP(dev_pathdict, "level", PyLong_FromLong(dev->depth)); } @@ -449,7 +469,7 @@ jm_lineart_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, } DICT_SETITEM_DROP(dev_pathdict, dictkey_rect, JM_py_from_rect(dev_pathrect)); - DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_EscapeStrFromStr(layer_name)); + DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_UnicodeFromStr(layer_name)); DICT_SETITEMSTR_DROP(dev_pathdict, "seqno", PyLong_FromSize_t(dev->seqno)); if (dev->clips) { DICT_SETITEMSTR_DROP(dev_pathdict, "level", PyLong_FromLong(dev->depth)); @@ -468,6 +488,9 @@ jm_lineart_clip_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int trace_device_ctm = ctm; //fz_concat(ctm, trace_device_ptm); path_type = CLIP_PATH; jm_lineart_path(ctx, dev, path); + if (!dev_pathdict) { + return; + } DICT_SETITEM_DROP(dev_pathdict, dictkey_type, PyUnicode_FromString("clip")); DICT_SETITEMSTR_DROP(dev_pathdict, "even_odd", JM_BOOL(even_odd)); if (!PyDict_GetItemString(dev_pathdict, "closePath")) { @@ -475,7 +498,7 @@ jm_lineart_clip_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int } DICT_SETITEMSTR_DROP(dev_pathdict, "scissor", JM_py_from_rect(compute_scissor())); DICT_SETITEMSTR_DROP(dev_pathdict, "level", PyLong_FromLong(dev->depth)); - DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_EscapeStrFromStr(layer_name)); + DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_UnicodeFromStr(layer_name)); jm_append_merge(out, dev->method); dev->depth++; } @@ -489,6 +512,9 @@ jm_lineart_clip_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *pat trace_device_ctm = ctm; //fz_concat(ctm, trace_device_ptm); path_type = CLIP_STROKE_PATH; jm_lineart_path(ctx, dev, path); + if (!dev_pathdict) { + return; + } DICT_SETITEM_DROP(dev_pathdict, dictkey_type, PyUnicode_FromString("clip")); DICT_SETITEMSTR_DROP(dev_pathdict, "even_odd", Py_BuildValue("s", NULL)); if (!PyDict_GetItemString(dev_pathdict, "closePath")) { @@ -496,7 +522,7 @@ jm_lineart_clip_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *pat } DICT_SETITEMSTR_DROP(dev_pathdict, "scissor", JM_py_from_rect(compute_scissor())); DICT_SETITEMSTR_DROP(dev_pathdict, "level", PyLong_FromLong(dev->depth)); - DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_EscapeStrFromStr(layer_name)); + DICT_SETITEMSTR_DROP(dev_pathdict, "layer", JM_UnicodeFromStr(layer_name)); jm_append_merge(out, dev->method); dev->depth++; } @@ -547,12 +573,13 @@ jm_lineart_pop_clip(fz_context *ctx, fz_device *dev_) static void jm_lineart_begin_layer(fz_context *ctx, fz_device *dev_, const char *name) { - layer_name = name; + layer_name = fz_strdup(ctx, name); } static void jm_lineart_end_layer(fz_context *ctx, fz_device *dev_) { + fz_free(ctx, layer_name); layer_name = NULL; } @@ -570,7 +597,7 @@ jm_lineart_begin_group(fz_context *ctx, fz_device *dev_, fz_rect bbox, fz_colors "blendmode", fz_blendmode_name(blendmode), "opacity", alpha, "level", dev->depth, - "layer", JM_EscapeStrFromStr(layer_name) + "layer", JM_UnicodeFromStr(layer_name) ); jm_append_merge(out, dev->method); dev->depth++; @@ -603,9 +630,9 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, PyObject *chars = PyTuple_New(span->len); fz_matrix mat = fz_concat(span->trm, ctm); // text transformation matrix fz_point dir = fz_transform_vector(fz_make_point(1, 0), mat); // writing direction - dir = fz_normalize_vector(dir); + double fsize = sqrt(dir.x * dir.x + dir.y * dir.y); - double fsize = sqrt(fabs((double) span->trm.a * (double) span->trm.d)); // font size + dir = fz_normalize_vector(dir); double linewidth, adv, asc, dsc; double space_adv = 0; float x0, y0, x1, y1; @@ -719,7 +746,7 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, DICT_SETITEMSTR_DROP(span_dict, "spacewidth", PyFloat_FromDouble(space_adv)); DICT_SETITEM_DROP(span_dict, dictkey_type, PyLong_FromLong((long) type)); DICT_SETITEM_DROP(span_dict, dictkey_bbox, JM_py_from_rect(span_bbox)); - DICT_SETITEMSTR_DROP(span_dict, "layer", JM_EscapeStrFromStr(layer_name)); + DICT_SETITEMSTR_DROP(span_dict, "layer", JM_UnicodeFromStr(layer_name)); DICT_SETITEMSTR_DROP(span_dict, "seqno", PyLong_FromSize_t(seqno)); DICT_SETITEM_DROP(span_dict, dictkey_chars, chars); LIST_APPEND_DROP(out, span_dict); @@ -914,7 +941,7 @@ jm_bbox_add_rect(fz_context *ctx, fz_device *dev, fz_rect rect, char *code) if (!bdev->layers) { LIST_APPEND_DROP(bdev->result, Py_BuildValue("sN", code, JM_py_from_rect(rect))); } else { - LIST_APPEND_DROP(bdev->result, Py_BuildValue("sNN", code, JM_py_from_rect(rect), JM_EscapeStrFromStr(layer_name))); + LIST_APPEND_DROP(bdev->result, Py_BuildValue("sNN", code, JM_py_from_rect(rect), JM_UnicodeFromStr(layer_name))); } } diff --git a/fitz/helper-fields.i b/src_classic/helper-fields.i similarity index 98% rename from fitz/helper-fields.i rename to src_classic/helper-fields.i index 3a6805a62..721dc34fe 100644 --- a/fitz/helper-fields.i +++ b/src_classic/helper-fields.i @@ -1097,6 +1097,14 @@ class Widget(object): for x in apnt: nstates.append(x.split()[0]) states["normal"] = nstates + if APN[0] == "xref": + nstates = [] + nxref = int(APN[1].split(" ")[0]) + APN = doc.xref_object(nxref) + apnt = APN.split("/")[1:] + for x in apnt: + nstates.append(x.split()[0]) + states["normal"] = nstates APD = doc.xref_get_key(xref, "AP/D") if APD[0] == "dict": dstates = [] @@ -1105,6 +1113,14 @@ class Widget(object): for x in apdt: dstates.append(x.split()[0]) states["down"] = dstates + if APD[0] == "xref": + dstates = [] + dxref = int(APD[1].split(" ")[0]) + APD = doc.xref_object(dxref) + apdt = APD.split("/")[1:] + for x in apdt: + dstates.append(x.split()[0]) + states["down"] = dstates return states def on_state(self): diff --git a/fitz/helper-fileobj.i b/src_classic/helper-fileobj.i similarity index 100% rename from fitz/helper-fileobj.i rename to src_classic/helper-fileobj.i diff --git a/fitz/helper-geo-c.i b/src_classic/helper-geo-c.i similarity index 100% rename from fitz/helper-geo-c.i rename to src_classic/helper-geo-c.i diff --git a/fitz/helper-geo-py.i b/src_classic/helper-geo-py.i similarity index 100% rename from fitz/helper-geo-py.i rename to src_classic/helper-geo-py.i diff --git a/fitz/helper-globals.i b/src_classic/helper-globals.i similarity index 100% rename from fitz/helper-globals.i rename to src_classic/helper-globals.i diff --git a/fitz/helper-other.i b/src_classic/helper-other.i similarity index 100% rename from fitz/helper-other.i rename to src_classic/helper-other.i diff --git a/fitz/helper-pdfinfo.i b/src_classic/helper-pdfinfo.i similarity index 100% rename from fitz/helper-pdfinfo.i rename to src_classic/helper-pdfinfo.i diff --git a/fitz/helper-pixmap.i b/src_classic/helper-pixmap.i similarity index 100% rename from fitz/helper-pixmap.i rename to src_classic/helper-pixmap.i diff --git a/fitz/helper-portfolio.i b/src_classic/helper-portfolio.i similarity index 100% rename from fitz/helper-portfolio.i rename to src_classic/helper-portfolio.i diff --git a/fitz/helper-python.i b/src_classic/helper-python.i similarity index 97% rename from fitz/helper-python.i rename to src_classic/helper-python.i index c48a1eecb..9a14a5df6 100644 --- a/fitz/helper-python.i +++ b/src_classic/helper-python.i @@ -53,52 +53,69 @@ TEXT_INHIBIT_SPACES = 8 TEXT_DEHYPHENATE = 16 TEXT_PRESERVE_SPANS = 32 TEXT_MEDIABOX_CLIP = 64 +TEXT_CID_FOR_UNKNOWN_UNICODE = 128 + +TEXTFLAGS_WORDS = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_BLOCKS = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_DICT = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_PRESERVE_IMAGES + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) -TEXTFLAGS_WORDS = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP -) -TEXTFLAGS_BLOCKS = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP -) -TEXTFLAGS_DICT = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP - | TEXT_PRESERVE_IMAGES -) TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT -TEXTFLAGS_SEARCH = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP - | TEXT_DEHYPHENATE -) -TEXTFLAGS_HTML = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP - | TEXT_PRESERVE_IMAGES -) -TEXTFLAGS_XHTML = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP - | TEXT_PRESERVE_IMAGES -) -TEXTFLAGS_XML = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP -) -TEXTFLAGS_TEXT = ( - TEXT_PRESERVE_LIGATURES - | TEXT_PRESERVE_WHITESPACE - | TEXT_MEDIABOX_CLIP -) + +TEXTFLAGS_SEARCH = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_DEHYPHENATE + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_HTML = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_PRESERVE_IMAGES + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_XHTML = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_PRESERVE_IMAGES + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_XML = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) + +TEXTFLAGS_TEXT = (0 + | TEXT_PRESERVE_LIGATURES + | TEXT_PRESERVE_WHITESPACE + | TEXT_MEDIABOX_CLIP + | TEXT_CID_FOR_UNKNOWN_UNICODE + ) # ------------------------------------------------------------------------------ # Simple text encoding options @@ -855,7 +872,7 @@ class linkDest(object): if self.uri.startswith("#"): self.named = "" self.kind = LINK_GOTO - m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),([0-9.]+),([0-9.]+)$', self.uri) + m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri) if m: self.page = int(m.group(1)) - 1 self.lt = Point(float((m.group(3))), float(m.group(4))) @@ -1228,7 +1245,7 @@ def planish_line(p1: point_like, p2: point_like) -> Matrix: return Matrix(util_hor_matrix(p1, p2)) -def image_profile(img: typing.ByteString) -> dict: +def image_profile(img: ByteString) -> dict: """ Return basic properties of an image. Args: diff --git a/src_classic/helper-select.i b/src_classic/helper-select.i new file mode 100644 index 000000000..2a547649e --- /dev/null +++ b/src_classic/helper-select.i @@ -0,0 +1,71 @@ +%{ +/* +# ------------------------------------------------------------------------ +# Copyright 2020-2022, Harald Lieder, mailto:harald.lieder@outlook.com +# License: GNU AFFERO GPL 3.0, https://www.gnu.org/licenses/agpl-3.0.html +# +# Part of "PyMuPDF", a Python binding for "MuPDF" (http://mupdf.com), a +# lightweight PDF, XPS, and E-book viewer, renderer and toolkit which is +# maintained and developed by Artifex Software, Inc. https://artifex.com. +# ------------------------------------------------------------------------ +*/ +void remove_dest_range(fz_context *ctx, pdf_document *pdf, PyObject *numbers) +{ + fz_try(ctx) { + int i, j, pno, len, pagecount = pdf_count_pages(ctx, pdf); + PyObject *n1 = NULL; + pdf_obj *target, *annots, *pageref, *o, *action, *dest; + for (i = 0; i < pagecount; i++) { + n1 = PyLong_FromLong((long) i); + if (PySet_Contains(numbers, n1)) { + Py_DECREF(n1); + continue; + } + Py_DECREF(n1); + + pageref = pdf_lookup_page_obj(ctx, pdf, i); + annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots)); + if (!annots) continue; + len = pdf_array_len(ctx, annots); + for (j = len - 1; j >= 0; j -= 1) { + o = pdf_array_get(ctx, annots, j); + if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME(Subtype)), PDF_NAME(Link))) { + continue; + } + action = pdf_dict_get(ctx, o, PDF_NAME(A)); + dest = pdf_dict_get(ctx, o, PDF_NAME(Dest)); + if (action) { + if (!pdf_name_eq(ctx, pdf_dict_get(ctx, action, + PDF_NAME(S)), PDF_NAME(GoTo))) + continue; + dest = pdf_dict_get(ctx, action, PDF_NAME(D)); + } + pno = -1; + if (pdf_is_array(ctx, dest)) { + target = pdf_array_get(ctx, dest, 0); + pno = pdf_lookup_page_number(ctx, pdf, target); + } + else if (pdf_is_string(ctx, dest)) { + fz_location location = fz_resolve_link(ctx, &pdf->super, + pdf_to_text_string(ctx, dest), + NULL, NULL); + pno = location.page; + } + if (pno < 0) { // page number lookup did not work + continue; + } + n1 = PyLong_FromLong((long) pno); + if (PySet_Contains(numbers, n1)) { + pdf_array_delete(ctx, annots, j); + } + Py_DECREF(n1); + } + } + } + + fz_catch(ctx) { + fz_rethrow(ctx); + } + return; +} +%} diff --git a/fitz/helper-stext.i b/src_classic/helper-stext.i similarity index 94% rename from fitz/helper-stext.i rename to src_classic/helper-stext.i index d96bb761c..98cb3dfad 100644 --- a/fitz/helper-stext.i +++ b/src_classic/helper-stext.i @@ -29,6 +29,44 @@ JM_font_descender(fz_context *ctx, fz_font *font) } +//---------------------------------------------------------------- +// Return true if character is considered to be a word delimiter +//---------------------------------------------------------------- +static const int +JM_is_word_delimiter(int c, PyObject *delimiters) +{ + if (c <= 32 || c == 160) return 1; // a standard delimiter + + // extra delimiters must be a non-empty sequence + if (!delimiters || PyObject_Not(delimiters) || !PySequence_Check(delimiters)) { + return 0; + } + + // convert to tuple for easier looping + PyObject *delims = PySequence_Tuple(delimiters); + if (!delims) { + PyErr_Clear(); + return 0; + } + + // Make 1-char PyObject from character given as integer + PyObject *cchar = Py_BuildValue("C", c); // single character PyObject + Py_ssize_t i, len = PyTuple_Size(delims); + for (i = 0; i < len; i++) { + int rc = PyUnicode_Compare(cchar, PyTuple_GET_ITEM(delims, i)); + if (rc == 0) { // equal to a delimiter character + Py_DECREF(cchar); + Py_DECREF(delims); + PyErr_Clear(); + return 1; + } + } + + Py_DECREF(delims); + PyErr_Clear(); + return 0; +} + /* inactive //----------------------------------------------------------------------------- // Make OCR text page directly from an fz_page @@ -76,8 +114,12 @@ JM_new_stext_page_ocr_from_page(fz_context *ctx, fz_page *page, fz_rect rect, in //--------------------------------------------------------------------------- void JM_append_rune(fz_context *ctx, fz_buffer *buff, int ch) { - if ((ch >= 32 && ch <= 255) || ch == 10) { + if (ch == 92) { // prevent accidental "\u" etc. + fz_append_string(ctx, buff, "\\u005c"); + } else if ((ch >= 32 && ch <= 255) || ch == 10) { fz_append_byte(ctx, buff, ch); + } else if (ch >= 0xd800 && ch <= 0xdfff) { // surrogate Unicode range + fz_append_string(ctx, buff, "\\ufffd"); } else if (ch <= 0xffff) { // 4 hex digits fz_append_printf(ctx, buff, "\\u%04x", ch); } else { // 8 hex digits @@ -418,7 +460,7 @@ no_more_matches:; // character (which else leads to 2 new-lines). //----------------------------------------------------------------------------- void -JM_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page) +JM_print_stext_page_as_text(fz_context *ctx, fz_buffer *buff, fz_stext_page *page) { fz_stext_block *block; fz_stext_line *line; @@ -438,14 +480,11 @@ JM_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page if (fz_is_infinite_rect(rect) || JM_rects_overlap(rect, chbbox)) { last_char = ch->c; - n = fz_runetochar(utf, ch->c); - for (i = 0; i < n; i++) { - fz_write_byte(ctx, out, utf[i]); - } + JM_append_rune(ctx, buff, ch->c); } } if (last_char != 10 && last_char > 0) { - fz_write_string(ctx, out, "\n"); + fz_append_string(ctx, buff, "\n"); } } } @@ -663,6 +702,7 @@ static void JM_make_image_block(fz_context *ctx, fz_stext_block *block, PyObject fz_always(ctx) { if (!bytes) bytes = JM_BinFromChar(""); + DICT_SETITEM_DROP(block_dict, dictkey_width, Py_BuildValue("i", w)); DICT_SETITEM_DROP(block_dict, dictkey_height, @@ -680,7 +720,7 @@ static void JM_make_image_block(fz_context *ctx, fz_stext_block *block, PyObject DICT_SETITEM_DROP(block_dict, dictkey_matrix, JM_py_from_matrix(block->u.i.transform)); DICT_SETITEM_DROP(block_dict, dictkey_size, - Py_BuildValue("n", (Py_ssize_t) fz_image_size(ctx, image))); + Py_BuildValue("n", PyBytes_Size(bytes))); DICT_SETITEM_DROP(block_dict, dictkey_image, bytes); fz_drop_buffer(ctx, freebuf); @@ -752,18 +792,17 @@ void JM_make_textpage_dict(fz_context *ctx, fz_stext_page *tp, PyObject *page_di //--------------------------------------------------------------------- -char * +PyObject * JM_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area) { fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; fz_buffer *buffer; - unsigned char *s; int need_new_line = 0; - - buffer = fz_new_buffer(ctx, 1024); + PyObject *rc = NULL; fz_try(ctx) { + buffer = fz_new_buffer(ctx, 1024); for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; @@ -777,7 +816,7 @@ JM_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area) fz_append_string(ctx, buffer, "\n"); need_new_line = 0; } - fz_append_rune(ctx, buffer, ch->c < 32 ? FZ_REPLACEMENT_CHARACTER : ch->c); + JM_append_rune(ctx, buffer, ch->c); } } if (line_had_text) @@ -785,16 +824,19 @@ JM_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area) } } fz_terminate_buffer(ctx, buffer); + rc = JM_EscapeStrFromBuffer(ctx, buffer); + if (!rc) { + rc = EMPTY_STRING; + PyErr_Clear(); + } } + fz_always(ctx) { + fz_drop_buffer(ctx, buffer); + } fz_catch(ctx) { - fz_drop_buffer(ctx, buffer); fz_rethrow(ctx); } - - - fz_buffer_extract(ctx, buffer, &s); /* take over the data */ - fz_drop_buffer(ctx, buffer); - return (char*)s; + return rc; } //--------------------------------------------------------------------- diff --git a/fitz/helper-xobject.i b/src_classic/helper-xobject.i similarity index 100% rename from fitz/helper-xobject.i rename to src_classic/helper-xobject.i diff --git a/fitz/utils.py b/src_classic/utils.py similarity index 98% rename from fitz/utils.py rename to src_classic/utils.py index f91712d18..c1369afbc 100644 --- a/fitz/utils.py +++ b/src_classic/utils.py @@ -16,19 +16,27 @@ import typing import warnings -from fitz import * +from fitz_old import * TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX") point_like = "point_like" rect_like = "rect_like" matrix_like = "matrix_like" quad_like = "quad_like" + +# ByteString is gone from typing in 3.14. +# collections.abc.Buffer available from 3.12 only +try: + ByteString = typing.ByteString +except AttributeError: + ByteString = bytes | bytearray | memoryview + AnyType = typing.Any OptInt = typing.Union[int, None] OptFloat = typing.Optional[float] OptStr = typing.Optional[str] OptDict = typing.Optional[dict] -OptBytes = typing.Optional[typing.ByteString] +OptBytes = typing.Optional[ByteString] OptSeq = typing.Optional[typing.Sequence] """ @@ -275,7 +283,7 @@ def delete_image(page: Page, xref: int): xref: xref of the image to delete. """ # make a small 100% transparent pixmap (of just any dimension) - pix = fitz.Pixmap(fitz.csGRAY, (0, 0, 1, 1), 1) + pix = fitz_old.Pixmap(fitz_old.csGRAY, (0, 0, 1, 1), 1) pix.clear_with() # clear all samples bytes to 0x00 page.replace_image(xref, pixmap=pix) @@ -392,7 +400,6 @@ def insert_image(page, rect, **kwargs): _imgname = n + str(i) # try new name digests = doc.InsertedImages - xref, digests = page._insert_image( filename=filename, pixmap=pixmap, @@ -410,7 +417,6 @@ def insert_image(page, rect, **kwargs): _imgname=_imgname, digests=digests, ) - if digests != None: doc.InsertedImages = digests @@ -536,25 +542,33 @@ def get_text_words( flags: OptInt = None, textpage: TextPage = None, sort: bool = False, + delimiters=None, ) -> list: """Return the text words as a list with the bbox for each word. Args: flags: (int) control the amount of data parsed into the textpage. + delimiters: (str,list) characters to use as word delimiters + + Returns: + Word tuples (x0, y0, x1, y1, "word", bno, lno, wno). """ CheckParent(page) if flags is None: flags = TEXT_PRESERVE_WHITESPACE | TEXT_PRESERVE_LIGATURES | TEXT_MEDIABOX_CLIP + tp = textpage if tp is None: tp = page.get_textpage(clip=clip, flags=flags) elif getattr(tp, "parent") != page: raise ValueError("not a textpage of this page") - words = tp.extractWORDS() + + words = tp.extractWORDS(delimiters) if textpage is None: del tp if sort is True: words.sort(key=lambda w: (w[3], w[0])) + return words @@ -751,6 +765,7 @@ def get_text( flags: OptInt = None, textpage: TextPage = None, sort: bool = False, + delimiters=None, ): """Extract text from a page or an annotation. @@ -770,28 +785,31 @@ def get_text( Default and misspelling choice is "text". """ formats = { - "text": 0, - "html": 1, - "json": 1, - "rawjson": 1, - "xml": 0, - "xhtml": 1, - "dict": 1, - "rawdict": 1, - "words": 0, - "blocks": 1, + "text": fitz.TEXTFLAGS_TEXT, + "html": fitz.TEXTFLAGS_HTML, + "json": fitz.TEXTFLAGS_DICT, + "rawjson": fitz.TEXTFLAGS_RAWDICT, + "xml": fitz.TEXTFLAGS_XML, + "xhtml": fitz.TEXTFLAGS_XHTML, + "dict": fitz.TEXTFLAGS_DICT, + "rawdict": fitz.TEXTFLAGS_RAWDICT, + "words": fitz.TEXTFLAGS_WORDS, + "blocks": fitz.TEXTFLAGS_BLOCKS, } option = option.lower() if option not in formats: option = "text" if flags is None: - flags = TEXT_PRESERVE_WHITESPACE | TEXT_PRESERVE_LIGATURES | TEXT_MEDIABOX_CLIP - if formats[option] == 1: - flags |= TEXT_PRESERVE_IMAGES + flags = formats[option] if option == "words": return get_text_words( - page, clip=clip, flags=flags, textpage=textpage, sort=sort + page, + clip=clip, + flags=flags, + textpage=textpage, + sort=sort, + delimiters=delimiters, ) if option == "blocks": return get_text_blocks( @@ -806,6 +824,7 @@ def get_text( cb = None elif type(page) is Page: cb = page.cropbox + # TextPage with or without images tp = textpage if tp is None: @@ -1049,7 +1068,6 @@ def recurse(olItem, liste, lvl): raise ValueError("document closed") doc.init_doc() olItem = doc.outline - if not olItem: return [] lvl = 1 @@ -1584,9 +1602,7 @@ def cre_annot(lnk, xref_dst, pno_src, ctm): if l["kind"] == LINK_GOTO and (l["page"] not in pno_src): continue # GOTO link target not in copied pages annot_text = cre_annot(l, xref_dst, pno_src, ctm) - if not annot_text: - print("cannot create /Annot for kind: " + str(l["kind"])) - else: + if annot_text: link_tab.append(annot_text) if link_tab != []: page_dst._addAnnot_FromString(tuple(link_tab)) @@ -1668,7 +1684,6 @@ def getLinkText(page: Page, lnk: dict) -> str: i += 1 # add /NM key to object definition annot = annot.replace("/Link", "/Link/NM(%s)" % name) - return annot @@ -1725,7 +1740,7 @@ def insert_textbox( align: int = 0, rotate: int = 0, render_mode: int = 0, - border_width: float = 1, + border_width: float = 0.05, morph: OptSeq = None, overlay: bool = True, stroke_opacity: float = 1, @@ -1791,7 +1806,7 @@ def insert_text( encoding: int = 0, color: OptSeq = None, fill: OptSeq = None, - border_width: float = 1, + border_width: float = 0.05, render_mode: int = 0, rotate: int = 0, morph: OptSeq = None, @@ -3430,7 +3445,7 @@ def insert_text( color: OptSeq = None, fill: OptSeq = None, render_mode: int = 0, - border_width: float = 1, + border_width: float = 0.05, rotate: int = 0, morph: OptSeq = None, stroke_opacity: float = 1, @@ -3561,10 +3576,11 @@ def insert_text( else: alpha = "/%s gs\n" % alpha nres = templ1 % (bdc, alpha, cm, left, top, fname, fontsize) + if render_mode > 0: nres += "%i Tr " % render_mode - if border_width != 1: - nres += "%g w " % border_width + nres += "%g w " % (border_width * fontsize) + if color is not None: nres += color_str if fill is not None: @@ -3590,16 +3606,16 @@ def insert_text( nres += "\nET\n%sQ\n" % emc - # ========================================================================= + # ===================================================================== # end of text insertion - # ========================================================================= + # ===================================================================== # update the /Contents object self.text_cont += nres return nlines - # ============================================================================== + # ========================================================================= # Shape.insert_textbox - # ============================================================================== + # ========================================================================= def insert_textbox( self, rect: rect_like, @@ -3613,7 +3629,7 @@ def insert_textbox( color: OptSeq = None, fill: OptSeq = None, expandtabs: int = 1, - border_width: float = 1, + border_width: float = 0.05, align: int = 0, render_mode: int = 0, rotate: int = 0, @@ -3634,7 +3650,7 @@ def insert_textbox( color -- RGB stroke color triple fill -- RGB fill color triple render_mode -- text rendering control - border_width -- thickness of glyph borders + border_width -- thickness of glyph borders as percentage of fontsize expandtabs -- handles tabulators with string function align -- left, center, right, justified rotate -- 0, 90, 180, or 270 degrees @@ -3737,7 +3753,7 @@ def pixlen(x): else: return len(x) * fontsize - # ---------------------------------------------------------------------- + # --------------------------------------------------------------------- if ordering < 0: blen = glyphs[32][1] * fontsize # pixel size of space character @@ -3755,99 +3771,107 @@ def pixlen(x): else: cm = "" - # --------------------------------------------------------------------------- + # --------------------------------------------------------------------- # adjust for text orientation / rotation - # --------------------------------------------------------------------------- + # --------------------------------------------------------------------- progr = 1 # direction of line progress c_pnt = Point(0, fontsize * ascender) # used for line progress if rot == 0: # normal orientation point = rect.tl + c_pnt # line 1 is 'lheight' below top - pos = point.y + self.y # y of first line maxwidth = rect.width # pixels available in one line - maxpos = rect.y1 + self.y # lines must not be below this + maxheight = rect.height # available text height elif rot == 90: # rotate counter clockwise c_pnt = Point(fontsize * ascender, 0) # progress in x-direction point = rect.bl + c_pnt # line 1 'lheight' away from left - pos = point.x + self.x # position of first line maxwidth = rect.height # pixels available in one line - maxpos = rect.x1 + self.x # lines must not be right of this + maxheight = rect.width # available text height cm += cmp90 elif rot == 180: # text upside down # progress upwards in y direction c_pnt = -Point(0, fontsize * ascender) point = rect.br + c_pnt # line 1 'lheight' above bottom - pos = point.y + self.y # position of first line maxwidth = rect.width # pixels available in one line progr = -1 # subtract lheight for next line - maxpos = rect.y0 + self.y # lines must not be above this + maxheight = rect.height # available text height cm += cm180 else: # rotate clockwise (270 or -90) # progress from right to left c_pnt = -Point(fontsize * ascender, 0) point = rect.tr + c_pnt # line 1 'lheight' left of right - pos = point.x + self.x # position of first line maxwidth = rect.height # pixels available in one line progr = -1 # subtract lheight for next line - maxpos = rect.x0 + self.x # lines must not left of this + maxheight = rect.width # available text height cm += cmm90 - # ======================================================================= + # ===================================================================== # line loop - # ======================================================================= + # ===================================================================== just_tab = [] # 'justify' indicators per line for i, line in enumerate(t0): line_t = line.expandtabs(expandtabs).split(" ") # split into words + num_words = len(line_t) lbuff = "" # init line buffer rest = maxwidth # available line pixels - # =================================================================== + # ================================================================= # word loop - # =================================================================== - for word in line_t: + # ================================================================= + for j in range(num_words): + word = line_t[j] pl_w = pixlen(word) # pixel len of word - if rest >= pl_w: # will it fit on the line? - lbuff += word + " " # yes, and append word + if rest >= pl_w: # does it fit on the line? + lbuff += word + " " # yes, append word rest -= pl_w + blen # update available line space - continue - # word won't fit - output line (if not empty) - if len(lbuff) > 0: + continue # next word + + # word doesn't fit - output line (if not empty) + if lbuff: lbuff = lbuff.rstrip() + "\n" # line full, append line break text += lbuff # append to total text - pos += lheight * progr # increase line position - just_tab.append(True) # line is justify candidate - lbuff = "" # re-init line buffer + just_tab.append(True) # can align-justify + + lbuff = "" # re-init line buffer rest = maxwidth # re-init avail. space + if pl_w <= maxwidth: # word shorter than 1 line? lbuff = word + " " # start the line with it rest = maxwidth - pl_w - blen # update free space continue + # long word: split across multiple lines - char by char ... if len(just_tab) > 0: - just_tab[-1] = False # reset justify indicator + just_tab[-1] = False # cannot align-justify for c in word: if pixlen(lbuff) <= maxwidth - pixlen(c): lbuff += c else: # line full lbuff += "\n" # close line text += lbuff # append to text - pos += lheight * progr # increase line position - just_tab.append(False) # do not justify line + just_tab.append(False) # cannot align-justify lbuff = c # start new line with this char + lbuff += " " # finish long word rest = maxwidth - pixlen(lbuff) # long word stored - if lbuff != "": # unprocessed line content? + if lbuff: # unprocessed line content? text += lbuff.rstrip() # append to text - just_tab.append(False) # do not justify line + just_tab.append(False) # cannot align-justify + if i < len(t0) - 1: # not the last line? text += "\n" # insert line break - pos += lheight * progr # increase line position - more = (pos - maxpos) * progr # difference to rect size limit + # compute used part of the textbox + if text.endswith("\n"): + text = text[:-1] + lb_count = text.count("\n") + 1 # number of lines written + + # text height = line count * line height plus one descender value + text_height = lheight * lb_count - descender * fontsize + more = text_height - maxheight # difference to height limit if more > EPSILON: # landed too much outside rect return (-1) * more # return deficit, don't output @@ -3891,8 +3915,11 @@ def pixlen(x): top = -height + pnt.y + self.y nres += templ % (left, top, fname, fontsize) + if render_mode > 0: nres += "%i Tr " % render_mode + nres += "%g w " % (border_width * fontsize) + if align == 3: nres += "%g Tw " % spacing @@ -3900,8 +3927,6 @@ def pixlen(x): nres += color_str if fill is not None: nres += fill_str - if border_width != 1: - nres += "%g w " % border_width nres += "%sTJ\n" % getTJstr(t, tj_glyphs, simple, ordering) nres += "ET\n%sQ\n" % emc @@ -4021,21 +4046,6 @@ def commit(self, overlay: bool = True) -> None: self.totalcont = "" # re-use return - # define deprecated aliases ------------------------------------------ - drawBezier = draw_bezier - drawCircle = draw_circle - drawCurve = draw_curve - drawLine = draw_line - drawOval = draw_oval - drawPolyline = draw_polyline - drawQuad = draw_quad - drawRect = draw_rect - drawSector = draw_sector - drawSquiggle = draw_squiggle - drawZigzag = draw_zigzag - insertText = insert_text - insertTextbox = insert_textbox - def apply_redactions(page: Page, images: int = 2) -> bool: """Apply the redaction annotations of the page. @@ -5051,7 +5061,6 @@ def recover_line_quad(line: dict, spans: list = None) -> Quad: line_dir = line["dir"] # text direction cos, sin = line_dir q0 = recover_quad(line_dir, spans[0]) # quad of first span - if len(spans) > 1: # get quad of last span q1 = recover_quad(line_dir, spans[-1]) else: diff --git a/src_classic/version.i b/src_classic/version.i new file mode 100644 index 000000000..c27cb5e5a --- /dev/null +++ b/src_classic/version.i @@ -0,0 +1,7 @@ +%pythoncode %{ +VersionFitz = "1.24.1" # MuPDF version. +VersionBind = "1.24.1" # PyMuPDF version. +VersionDate = "2024-04-02 00:00:01" +version = (VersionBind, VersionFitz, "20240402000001") +pymupdf_version_tuple = tuple( [int(i) for i in VersionFitz.split('.')]) +%} diff --git a/tests/README.md b/tests/README.md index 4842ff48f..b793d2489 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,83 +1,65 @@ -# Testing your PyMuPDF Installation -This folder contains a number of basic tests to confirm that PyMuPDF is correctly installed. +# PyMuPDF tests -The following areas are currently covered: -* encryption and decryption -* extraction of drawings -* "geometry": correct working of points, rectangles, matrices and operator algebra -* image bbox computation -* handling of embedded files -* image insertion -* PDF document joining -* computation of quadrilaterals for non-horizontal text -* extraction of non-unicode fontnames -* handling of PDF standard metadata -* handling of non-PDF document types -* programmatic editing of PDF object definition sources -* mass deletion of PDF pages -* handling of PDF page labels -* pixmap handling -* show PDF pages inside other PDF pages -* text extraction -* text searching -* handling of PDF Tables of Contents -* annotation handling -* field / widget handling -* image extraction +To run these tests: -This is **_not a coverage test_**, although a significant part of the relevant Python part **_does_** get executed (ca. 80%). Achieving a much higher code coverage remains an ongoing task. +* Create and enter a venv. +* Install PyMuPDF. +* Install the Python packages listed in + `PyMuPDF/scripts/gh_release.py:test_packages`. +* Run pytest on the PyMuPDF directory. -To use these scripts, you must have installed `pytest`: - -`python -m pip install pytest` - -Then simply execute `python -m pytest` in a terminal of this folder. `pytest` will automatically locate all scripts and execute them. All tests should run successfully and you will see an output like this: +For example, as of 2023-12-11: ``` -pytest --cov=fitz -============================ test session starts ============================= -platform linux -- Python 3.8.5, pytest-6.2.4, py-1.10.0, pluggy-0.13.1 -rootdir: /mnt/d/harald/desktop/fitzPython119/pymupdf -plugins: cov-2.12.0 -collected 79 items - -test_annots.py ............... [ 18%] -test_badfonts.py . [ 20%] -test_crypting.py . [ 21%] -test_drawings.py .. [ 24%] -test_embeddedfiles.py . [ 25%] -test_font.py .. [ 27%] -test_general.py ............ [ 43%] -test_geometry.py ....... [ 51%] -test_imagebbox.py . [ 53%] -test_insertimage.py . [ 54%] -test_insertpdf.py . [ 55%] -test_linequad.py . [ 56%] -test_metadata.py .. [ 59%] -test_nonpdf.py ... [ 63%] -test_object_manipulation.py ... [ 67%] -test_optional_content.py .. [ 69%] -test_pagedelete.py . [ 70%] -test_pagelabels.py . [ 72%] -test_pixmap.py ...... [ 79%] -test_showpdfpage.py . [ 81%] -test_textbox.py .... [ 86%] -test_textextract.py . [ 87%] -test_textsearch.py . [ 88%] -test_toc.py .... [ 93%] -test_widgets.py ..... [100%] - ------------ coverage: platform linux, python 3.8.5-final-0 ----------- -Name Stmts Miss Cover ------------------------------------------------------------------------------ -/usr/local/lib/python3.8/dist-packages/fitz/__init__.py 335 13 96% -/usr/local/lib/python3.8/dist-packages/fitz/fitz.py 4183 740 82% -/usr/local/lib/python3.8/dist-packages/fitz/utils.py 2196 669 70% ------------------------------------------------------------------------------ -TOTAL 6714 1422 79% +> python -m pip install pytest fontTools psutil pymupdf-fonts pillow +> pytest PyMuPDF +============================= test session starts ============================== +platform linux -- Python 3.11.2, pytest-7.4.3, pluggy-1.3.0 +rootdir: /home/jules/artifex-remote/PyMuPDF +configfile: pytest.ini +collected 171 items +PyMuPDF/tests/test_2548.py . [ 0%] +PyMuPDF/tests/test_2634.py . [ 1%] +PyMuPDF/tests/test_2736.py . [ 1%] +PyMuPDF/tests/test_2791.py . [ 2%] +PyMuPDF/tests/test_2861.py . [ 2%] +PyMuPDF/tests/test_annots.py .................. [ 13%] +PyMuPDF/tests/test_badfonts.py . [ 14%] +PyMuPDF/tests/test_crypting.py . [ 14%] +PyMuPDF/tests/test_docs_samples.py ............. [ 22%] +PyMuPDF/tests/test_drawings.py ...... [ 25%] +PyMuPDF/tests/test_embeddedfiles.py . [ 26%] +PyMuPDF/tests/test_extractimage.py .. [ 27%] +PyMuPDF/tests/test_flake8.py . [ 28%] +PyMuPDF/tests/test_font.py ..... [ 30%] +PyMuPDF/tests/test_general.py .......................................... [ 55%] +... [ 57%] +PyMuPDF/tests/test_geometry.py ........ [ 61%] +PyMuPDF/tests/test_imagebbox.py .. [ 63%] +PyMuPDF/tests/test_insertimage.py .. [ 64%] +PyMuPDF/tests/test_insertpdf.py .. [ 65%] +PyMuPDF/tests/test_linequad.py . [ 66%] +PyMuPDF/tests/test_metadata.py .. [ 67%] +PyMuPDF/tests/test_nonpdf.py ... [ 69%] +PyMuPDF/tests/test_object_manipulation.py .... [ 71%] +PyMuPDF/tests/test_optional_content.py .. [ 72%] +PyMuPDF/tests/test_pagedelete.py . [ 73%] +PyMuPDF/tests/test_pagelabels.py . [ 73%] +PyMuPDF/tests/test_pixmap.py .......... [ 79%] +PyMuPDF/tests/test_showpdfpage.py . [ 80%] +PyMuPDF/tests/test_story.py ... [ 81%] +PyMuPDF/tests/test_tables.py ... [ 83%] +PyMuPDF/tests/test_tesseract.py . [ 84%] +PyMuPDF/tests/test_textbox.py ...... [ 87%] +PyMuPDF/tests/test_textextract.py .. [ 88%] +PyMuPDF/tests/test_textsearch.py .. [ 90%] +PyMuPDF/tests/test_toc.py ........ [ 94%] +PyMuPDF/tests/test_widgets.py ........ [ 99%] +PyMuPDF/tests/test_word_delimiters.py . [100%] -============================ 79 passed in 5.76s ============================== +======================== 171 passed in 78.65s (0:01:18) ======================== +> ``` ## Known test failure with non-default build of MuPDF @@ -89,3 +71,11 @@ having been built with PyMuPDF's customized configuration, ``fitz/_config.h``. One can skip this particular test by adding ``-k 'not test_textbox3'`` to the pytest command line. + + +## Resuming at a particular test. + +To skip tests before a particular test, set PYMUPDF_PYTEST_RESUME to the name +of the function. + +For example PYMUPDF_PYTEST_RESUME=test_haslinks. diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..06af5b7aa --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,157 @@ +import copy +import os +import platform +import sys + +import pymupdf + +import pytest + +PYMUPDF_PYTEST_RESUME = os.environ.get('PYMUPDF_PYTEST_RESUME') + +@pytest.fixture(autouse=True) +def wrap(request): + ''' + Check that tests return with empty MuPDF warnings buffer. For example this + detects failure to call fz_close_output() before fz_drop_output(), which + (as of 2024-4-12) generates a warning from MuPDF. + + As of 2024-09-12 we also detect whether tests leave fds open; but for now + do not fail tests, because many tests need fixing. + ''' + global PYMUPDF_PYTEST_RESUME + if PYMUPDF_PYTEST_RESUME: + # Skip all tests until we reach a matching name. + if PYMUPDF_PYTEST_RESUME == request.function.__name__: + print(f'### {PYMUPDF_PYTEST_RESUME=}: resuming at {request.function.__name__=}.') + PYMUPDF_PYTEST_RESUME = None + else: + print(f'### {PYMUPDF_PYTEST_RESUME=}: Skipping {request.function.__name__=}.') + return + + wt = pymupdf.TOOLS.mupdf_warnings() + assert not wt, f'{wt=}' + if platform.python_implementation() == 'GraalVM': + pymupdf.TOOLS.set_small_glyph_heights() + else: + assert not pymupdf.TOOLS.set_small_glyph_heights() + next_fd_before = os.open(__file__, os.O_RDONLY) + os.close(next_fd_before) + + if platform.system() == 'Linux' and platform.python_implementation() != 'GraalVM': + test_fds = True + else: + test_fds = False + + if test_fds: + # Gather detailed information about leaked fds. + def get_fds(): + import subprocess + path = 'PyMuPDF-linx-fds' + path_l = 'PyMuPDF-linx-fds-l' + command = f'ls /proc/{os.getpid()}/fd > {path}' + command_l = f'ls -l /proc/{os.getpid()}/fd > {path_l}' + subprocess.run(command, shell=1) + subprocess.run(command_l, shell=1) + with open(path) as f: + ret = f.read() + ret = ret.replace('\n', ' ') + with open(path_l) as f: + ret_l = f.read() + return ret, ret_l + open_fds_before, open_fds_before_l = get_fds() + + pymupdf._log_items_clear() + pymupdf._log_items_active(True) + + JM_annot_id_stem = pymupdf.JM_annot_id_stem + + def get_members(a): + ret = dict() + for n in dir(a): + if not n.startswith('_'): + v = getattr(a, n) + ret[n] = v + return ret + + # Allow post-test checking that pymupdf._globals has not changed. + _globals_pre = get_members(pymupdf._globals) + + testsfailed_before = request.session.testsfailed + + # Run the test. + rep = yield + + sys.stdout.flush() + + # This seems the only way for us to tell that a test has failed. In + # particular, is always None. We're implicitly relying on tests not + # being run in parallel. + # + failed = request.session.testsfailed - testsfailed_before + assert failed in (0, 1) + + if failed: + # Do not check post-test conditions if the test as failed. This avoids + # additional confusing `ERROR` status for failed tests. + return + + # Test has run; check it did not create any MuPDF warnings etc. + wt = pymupdf.TOOLS.mupdf_warnings() + if not hasattr(pymupdf, 'mupdf'): + print(f'Not checking mupdf_warnings on classic.') + else: + assert not wt, f'Warnings text not empty: {wt=}' + + assert not pymupdf.TOOLS.set_small_glyph_heights() + + _globals_post = get_members(pymupdf._globals) + if _globals_post != _globals_pre: + print(f'Test has changed pymupdf._globals from {_globals_pre=} to {_globals_post=}') + assert 0 + + log_items = pymupdf._log_items() + assert not log_items, f'log() was called; {len(log_items)=}.' + + assert pymupdf.JM_annot_id_stem == JM_annot_id_stem, \ + f'pymupdf.JM_annot_id_stem has changed from {JM_annot_id_stem!r} to {pymupdf.JM_annot_id_stem!r}' + + if test_fds: + # Show detailed information about leaked fds. + open_fds_after, open_fds_after_l = get_fds() + if open_fds_after != open_fds_before: + import textwrap + print(f'Test has changed process fds:') + print(f' {open_fds_before=}') + print(f' {open_fds_after=}') + print(f'open_fds_before_l:') + print(textwrap.indent(open_fds_before_l, ' ')) + print(f'open_fds_after_l:') + print(textwrap.indent(open_fds_after_l, ' ')) + #assert 0 + + next_fd_after = os.open(__file__, os.O_RDONLY) + os.close(next_fd_after) + + if test_fds and next_fd_after != next_fd_before: + print(f'Test has leaked fds, {next_fd_before=} {next_fd_after=}.') + #assert 0, f'Test has leaked fds, {next_fd_before=} {next_fd_after=}. {args=} {kwargs=}.' + + if 0: + # This code can be useful to track down test failures caused by other + # tests modifying global state. + # + # We run a particular test menually after each test returns. + sys.path.insert(0, os.path.dirname(__file__)) + try: + import test_tables + finally: + del sys.path[0] + print(f'### Calling test_tables.test_md_styles().') + try: + test_tables.test_md_styles() + except Exception as e: + print(f'### test_tables.test_md_styles() failed: {e}') + raise + else: + print(f'### test_tables.test_md_styles() passed.') diff --git a/tests/gentle_compare.py b/tests/gentle_compare.py new file mode 100644 index 000000000..aa155bb7c --- /dev/null +++ b/tests/gentle_compare.py @@ -0,0 +1,96 @@ +import math + +import pymupdf + + +def gentle_compare(w0, w1): + """Check lists of "words" extractions for approximate equality. + + * both lists must have same length + * word items must contain same word strings + * word rectangles must be approximately equal + """ + tolerance = 1e-3 # maximum (Euclidean) norm of difference rectangle + word_count = len(w0) # number of words + if word_count != len(w1): + print(f"different number of words: {word_count}/{len(w1)}") + return False + for i in range(word_count): + if w0[i][4] != w1[i][4]: # word strings must be the same + print(f"word {i} mismatch") + return False + r0 = pymupdf.Rect(w0[i][:4]) # rect of first word + r1 = pymupdf.Rect(w1[i][:4]) # rect of second word + delta = (r1 - r0).norm() # norm of difference rectangle + if delta > tolerance: + print(f"word {i}: rectangle mismatch {delta}") + return False + return True + + +def rms(a, b, verbose=None, out_prefix=''): + ''' + Returns RMS diff of raw bytes of two sequences. + ''' + assert len(a) == len(b) + e = 0 + for i, (aa, bb) in enumerate(zip(a, b)): + if verbose and (i % verbose == 0): + print(f'{out_prefix}rms(): {i=} {e=} {aa=} {aa=}.') + e += (aa - bb) ** 2 + rms = math.sqrt(e / len(a)) + return rms + + +def pixmaps_rms(a, b, out_prefix=''): + ''' + Returns RMS diff of raw bytes of two pixmaps. + + We assert that the pixmaps/sequences are the same size. + + and can each be a pymupdf.Pixmap or path of a bitmap file. + ''' + if isinstance(a, str): + print(f'{out_prefix}pixmaps_rms(): reading pixmap from {a=}.') + a = pymupdf.Pixmap(a) + if isinstance(b, str): + print(f'{out_prefix}pixmaps_rms(): reading pixmap from {b=}.') + b = pymupdf.Pixmap(b) + assert a.irect == b.irect, f'Differing rects: {a.irect=} {b.irect=}.' + a_mv = a.samples_mv + b_mv = b.samples_mv + assert len(a_mv) == len(b_mv) + ret = rms(a_mv, b_mv, out_prefix=out_prefix) + print(f'{out_prefix}pixmaps_rms(): {ret=}.') + return ret + + +def pixmaps_diff(a, b, out_prefix=''): + ''' + Returns a pymupdf.Pixmap that represents the difference between pixmaps + and . + + Each byte in the returned pixmap is `128 + (b_byte - a_byte) // 2`. + ''' + if isinstance(a, str): + print(f'{out_prefix}pixmaps_rms(): reading pixmap from {a=}.') + a = pymupdf.Pixmap(a) + if isinstance(b, str): + print(f'{out_prefix}pixmaps_rms(): reading pixmap from {b=}.') + b = pymupdf.Pixmap(b) + assert a.irect == b.irect, f'Differing rects: {a.irect=} {b.irect=}.' + a_mv = a.samples_mv + b_mv = b.samples_mv + c = pymupdf.Pixmap(a.tobytes()) + c_mv = c.samples_mv + assert len(a_mv) == len(b_mv) == len(c_mv) + if 1: + print(f'{len(a_mv)=}') + for i, (a_byte, b_byte, c_byte) in enumerate(zip(a_mv, b_mv, c_mv)): + assert 0 <= a_byte < 256 + assert 0 <= b_byte < 256 + assert 0 <= c_byte < 256 + # Set byte to 128 plus half the diff so we represent the full + # -255..+255 range. + c_mv[i] = 128 + (b_byte - a_byte) // 2 + return c diff --git a/tests/resources/2201.00069.pdf b/tests/resources/2201.00069.pdf new file mode 100644 index 000000000..5d0cdbe83 Binary files /dev/null and b/tests/resources/2201.00069.pdf differ diff --git a/tests/resources/battery-file-22.pdf b/tests/resources/battery-file-22.pdf new file mode 100644 index 000000000..73dd3850f Binary files /dev/null and b/tests/resources/battery-file-22.pdf differ diff --git a/tests/resources/chinese-tables.pdf b/tests/resources/chinese-tables.pdf new file mode 100644 index 000000000..cf88301f0 Binary files /dev/null and b/tests/resources/chinese-tables.pdf differ diff --git a/tests/resources/chinese-tables.pickle b/tests/resources/chinese-tables.pickle new file mode 100644 index 000000000..3489c5ae4 Binary files /dev/null and b/tests/resources/chinese-tables.pickle differ diff --git a/tests/resources/cms-etc-filled.pdf b/tests/resources/cms-etc-filled.pdf new file mode 100644 index 000000000..73d77fa2d Binary files /dev/null and b/tests/resources/cms-etc-filled.pdf differ diff --git a/tests/resources/cython.pdf b/tests/resources/cython.pdf new file mode 100644 index 000000000..13a473d48 Binary files /dev/null and b/tests/resources/cython.pdf differ diff --git a/tests/resources/cython.pickle b/tests/resources/cython.pickle new file mode 100644 index 000000000..8e3e5d395 Binary files /dev/null and b/tests/resources/cython.pickle differ diff --git a/tests/resources/dotted-gridlines.pdf b/tests/resources/dotted-gridlines.pdf new file mode 100644 index 000000000..e6b0d3ef0 Binary files /dev/null and b/tests/resources/dotted-gridlines.pdf differ diff --git a/tests/resources/full_toc.txt b/tests/resources/full_toc.txt index 17208748b..a2d6d5045 100644 --- a/tests/resources/full_toc.txt +++ b/tests/resources/full_toc.txt @@ -1,5 +1,5 @@ -[1, 'HAUPTÜBERSICHT', -1, {'kind': 3, 'xref': 2, 'file': '../SDW2006.PDF', 'zoom': 0.0}] -[1, 'Januar 01/2006', -1, {'kind': 3, 'xref': 3, 'file': '01004INH.pdf', 'collapse': False, 'zoom': 0.0}] +[1, 'HAUPTÜBERSICHT', -1, {'kind': 5, 'xref': 2, 'file': '../SDW2006.PDF', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[1, 'Januar 01/2006', -1, {'kind': 5, 'xref': 3, 'file': '01004INH.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0, 'collapse': False}] [2, 'SPEKTROGRAMM', -1, {'kind': 0, 'xref': 4, 'page': -1, 'collapse': False, 'zoom': 0.0}] [3, 'Urzeit-Godzilla', -1, {'kind': 5, 'xref': 87, 'file': '01008SP.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] [3, 'Frühchristliche Mosaike im Knast', -1, {'kind': 5, 'xref': 102, 'file': '01008SP.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] @@ -16,15 +16,15 @@ [3, 'Vampire gegen Schlaganfall', -1, {'kind': 5, 'xref': 27, 'file': '01012FA.pdf', 'page': 4, 'to': Point(0.0, 0.0), 'zoom': 0.0}] [3, 'Der Flug des Kolibris', -1, {'kind': 5, 'xref': 25, 'file': '01012FA.pdf', 'page': 7, 'to': Point(0.0, 0.0), 'zoom': 0.0}] [2, 'THEMEN', -1, {'kind': 0, 'xref': 20, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Entwicklung von Spiralgalaxien', -1, {'kind': 3, 'xref': 21, 'file': '01022HA.pdf', 'zoom': 0.0}] -[3, 'Geschichtsträchtige Genspuren', -1, {'kind': 3, 'xref': 46, 'file': '01030HA.pdf', 'zoom': 0.0}] -[3, 'Was Sedimente verraten', -1, {'kind': 3, 'xref': 44, 'file': '01042HA.pdf', 'zoom': 0.0}] -[3, 'Von Baumringen und Regenmengen', -1, {'kind': 3, 'xref': 42, 'file': '01050HA.pdf', 'zoom': 0.0}] -[3, 'Software-Agenten in Not', -1, {'kind': 3, 'xref': 40, 'file': '01056HA.pdf', 'zoom': 0.0}] -[3, 'Künstlicher kalter Antiwasserstoff', -1, {'kind': 3, 'xref': 38, 'file': '01062HA.pdf', 'zoom': 0.0}] -[3, 'Rüsten gegen eine Pandemie', -1, {'kind': 3, 'xref': 36, 'file': '01072HA.pdf', 'zoom': 0.0}] -[3, 'Satelliten zeigen Lawinengefahr', -1, {'kind': 3, 'xref': 34, 'file': '01084HA.pdf', 'zoom': 0.0}] -[3, 'Provokante Verheißung: Update für den Menschen', -1, {'kind': 3, 'xref': 22, 'file': '01100HA.pdf', 'zoom': 0.0}] +[3, 'Entwicklung von Spiralgalaxien', -1, {'kind': 5, 'xref': 21, 'file': '01022HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Geschichtsträchtige Genspuren', -1, {'kind': 5, 'xref': 46, 'file': '01030HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Was Sedimente verraten', -1, {'kind': 5, 'xref': 44, 'file': '01042HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Von Baumringen und Regenmengen', -1, {'kind': 5, 'xref': 42, 'file': '01050HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Software-Agenten in Not', -1, {'kind': 5, 'xref': 40, 'file': '01056HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Künstlicher kalter Antiwasserstoff', -1, {'kind': 5, 'xref': 38, 'file': '01062HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Rüsten gegen eine Pandemie', -1, {'kind': 5, 'xref': 36, 'file': '01072HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Satelliten zeigen Lawinengefahr', -1, {'kind': 5, 'xref': 34, 'file': '01084HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] +[3, 'Provokante Verheißung: Update für den Menschen', -1, {'kind': 5, 'xref': 22, 'file': '01100HA.pdf', 'page': 0, 'to': Point(0.0, 0.0), 'zoom': 0.0}] [2, 'KOMMENTAR', -1, {'kind': 0, 'xref': 18, 'page': -1, 'collapse': False, 'zoom': 0.0}] [3, 'Springers Einwüfe: Holland, die Hydrometropole', -1, {'kind': 5, 'xref': 19, 'file': '01012FA.pdf', 'page': 8, 'to': Point(0.0, 0.0), 'zoom': 0.0}] [2, 'WISSENSCHAFT IM ...', -1, {'kind': 0, 'xref': 15, 'page': -1, 'collapse': False, 'zoom': 0.0}] diff --git a/tests/resources/full_toc2.txt b/tests/resources/full_toc2.txt deleted file mode 100644 index 749ee9e06..000000000 --- a/tests/resources/full_toc2.txt +++ /dev/null @@ -1,48 +0,0 @@ -[1, 'HAUPTÜBERSICHT', -1, {'kind': 3, 'xref': 2, 'file': '', 'zoom': 0.0}] -[1, 'Januar 01/2006', -1, {'kind': 3, 'xref': 3, 'file': '', 'collapse': False, 'zoom': 0.0}] -[2, 'SPEKTROGRAMM', -1, {'kind': 0, 'xref': 4, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Urzeit-Godzilla', -1, {'kind': 3, 'xref': 87, 'file': '', 'zoom': 0.0}] -[3, 'Frühchristliche Mosaike im Knast', -1, {'kind': 3, 'xref': 102, 'file': '', 'zoom': 0.0}] -[3, 'Evolution auf Eis', -1, {'kind': 3, 'xref': 100, 'file': '', 'zoom': 0.0}] -[3, 'Entwarnung bei Kondensstreifen', -1, {'kind': 3, 'xref': 98, 'file': '', 'zoom': 0.0}] -[3, 'Spermatausch beim Schnecken-Sex', -1, {'kind': 3, 'xref': 96, 'file': '', 'zoom': 0.0}] -[3, 'Mehr Monde für Pluto', -1, {'kind': 3, 'xref': 94, 'file': '', 'zoom': 0.0}] -[3, 'Endlich ein Malaria-Impfstoff', -1, {'kind': 3, 'xref': 92, 'file': '', 'zoom': 0.0}] -[3, 'Spuren der ersten Sterne', -1, {'kind': 3, 'xref': 90, 'file': '', 'zoom': 0.0}] -[3, 'Bild des Monats', -1, {'kind': 3, 'xref': 88, 'file': '', 'zoom': 0.0}] -[2, 'FORSCHUNG AKTUELL', -1, {'kind': 0, 'xref': 23, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Der Super-Teilchenfänger in der Pampa', -1, {'kind': 3, 'xref': 24, 'file': '', 'zoom': 0.0}] -[3, 'Auf der Fährte der Lepra', -1, {'kind': 3, 'xref': 29, 'file': '', 'zoom': 0.0}] -[3, 'Vampire gegen Schlaganfall', -1, {'kind': 3, 'xref': 27, 'file': '', 'zoom': 0.0}] -[3, 'Der Flug des Kolibris', -1, {'kind': 3, 'xref': 25, 'file': '', 'zoom': 0.0}] -[2, 'THEMEN', -1, {'kind': 0, 'xref': 20, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Entwicklung von Spiralgalaxien', -1, {'kind': 3, 'xref': 21, 'file': '', 'zoom': 0.0}] -[3, 'Geschichtsträchtige Genspuren', -1, {'kind': 3, 'xref': 46, 'file': '', 'zoom': 0.0}] -[3, 'Was Sedimente verraten', -1, {'kind': 3, 'xref': 44, 'file': '', 'zoom': 0.0}] -[3, 'Von Baumringen und Regenmengen', -1, {'kind': 3, 'xref': 42, 'file': '', 'zoom': 0.0}] -[3, 'Software-Agenten in Not', -1, {'kind': 3, 'xref': 40, 'file': '', 'zoom': 0.0}] -[3, 'Künstlicher kalter Antiwasserstoff', -1, {'kind': 3, 'xref': 38, 'file': '', 'zoom': 0.0}] -[3, 'Rüsten gegen eine Pandemie', -1, {'kind': 3, 'xref': 36, 'file': '', 'zoom': 0.0}] -[3, 'Satelliten zeigen Lawinengefahr', -1, {'kind': 3, 'xref': 34, 'file': '', 'zoom': 0.0}] -[3, 'Provokante Verheißung: Update für den Menschen', -1, {'kind': 3, 'xref': 22, 'file': '', 'zoom': 0.0}] -[2, 'KOMMENTAR', -1, {'kind': 0, 'xref': 18, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Springers Einwüfe: Holland, die Hydrometropole', -1, {'kind': 3, 'xref': 19, 'file': '', 'zoom': 0.0}] -[2, 'WISSENSCHAFT IM ...', -1, {'kind': 0, 'xref': 15, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Alltag: Eine Decke für die Straße', -1, {'kind': 3, 'xref': 16, 'file': '', 'zoom': 0.0}] -[3, 'Rückblick: Mozarts Ohr • Per Auto zum Südpol u.a.', -1, {'kind': 3, 'xref': 17, 'file': '', 'zoom': 0.0}] -[2, 'JUNGE WISSENSCHAFT', -1, {'kind': 0, 'xref': 13, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Ein Putzroboter für die Mama', -1, {'kind': 3, 'xref': 14, 'file': '', 'zoom': 0.0}] -[2, 'REZENSIONEN', -1, {'kind': 0, 'xref': 10, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Vulkanismus verstehen und erleben', -1, {'kind': 3, 'xref': 11, 'file': '', 'zoom': 0.0}] -[3, 'Warum der Mensch glaubt', -1, {'kind': 3, 'xref': 72, 'file': '', 'zoom': 0.0}] -[3, 'Biomedizin und Ethik', -1, {'kind': 3, 'xref': 70, 'file': '', 'zoom': 0.0}] -[3, 'Mythos Meer', -1, {'kind': 3, 'xref': 68, 'file': '', 'zoom': 0.0}] -[3, 'Warum Frauen nicht schwach ... sind', -1, {'kind': 3, 'xref': 66, 'file': '', 'zoom': 0.0}] -[3, 'PISA, Bach, Pythagoras', -1, {'kind': 3, 'xref': 12, 'file': '', 'zoom': 0.0}] -[2, 'MATHEMATISCHE UNTERHALTUNGEN', -1, {'kind': 0, 'xref': 8, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Himmliches Ballett', -1, {'kind': 3, 'xref': 9, 'file': '', 'zoom': 0.0}] -[2, 'WEITERE RUBRIKEN', -1, {'kind': 0, 'xref': 5, 'page': -1, 'collapse': False, 'zoom': 0.0}] -[3, 'Editorial', -1, {'kind': 3, 'xref': 6, 'file': '', 'zoom': 0.0}] -[3, 'Leserbriefe/Impressum', -1, {'kind': 3, 'xref': 81, 'file': '', 'zoom': 0.0}] -[3, 'Preisrätsel', -1, {'kind': 3, 'xref': 79, 'file': '', 'zoom': 0.0}] -[3, 'Vorschau', -1, {'kind': 3, 'xref': 7, 'file': '', 'zoom': 0.0}] diff --git a/tests/resources/img-regular.pdf b/tests/resources/img-regular.pdf new file mode 100644 index 000000000..158f557ff Binary files /dev/null and b/tests/resources/img-regular.pdf differ diff --git a/tests/resources/img-transparent.pdf b/tests/resources/img-transparent.pdf new file mode 100644 index 000000000..5ab875627 Binary files /dev/null and b/tests/resources/img-transparent.pdf differ diff --git a/tests/resources/interfield-calculation.pdf b/tests/resources/interfield-calculation.pdf new file mode 100644 index 000000000..abb3275d7 Binary files /dev/null and b/tests/resources/interfield-calculation.pdf differ diff --git a/tests/resources/merge-form1.pdf b/tests/resources/merge-form1.pdf new file mode 100644 index 000000000..faaba705f Binary files /dev/null and b/tests/resources/merge-form1.pdf differ diff --git a/tests/resources/merge-form2.pdf b/tests/resources/merge-form2.pdf new file mode 100644 index 000000000..f4dc3c95f Binary files /dev/null and b/tests/resources/merge-form2.pdf differ diff --git a/tests/resources/mupdf_explored.pdf b/tests/resources/mupdf_explored.pdf new file mode 100644 index 000000000..6eff969bd Binary files /dev/null and b/tests/resources/mupdf_explored.pdf differ diff --git a/tests/resources/small-table.pdf b/tests/resources/small-table.pdf new file mode 100644 index 000000000..46ed27041 Binary files /dev/null and b/tests/resources/small-table.pdf differ diff --git a/tests/resources/strict-yes-no.pdf b/tests/resources/strict-yes-no.pdf new file mode 100644 index 000000000..e00bf6962 Binary files /dev/null and b/tests/resources/strict-yes-no.pdf differ diff --git a/tests/resources/symbols.txt b/tests/resources/symbols.txt index 90e07c04b..75d225d90 100644 --- a/tests/resources/symbols.txt +++ b/tests/resources/symbols.txt @@ -1,11 +1,12 @@ -[{'closePath': True, +[{'closePath': False, 'color': (1.0, 1.0, 1.0), 'dashes': '[] 0', 'even_odd': False, 'fill': (1.0, 0.0, 0.0), 'fill_opacity': 1.0, 'items': [('l', (50.0, 50.0), (50.0, 100.0)), - ('l', (50.0, 100.0), (100.0, 75.0))], + ('l', (50.0, 100.0), (100.0, 75.0)), + ('l', (100.0, 75.0), (50.0, 50.0))], 'layer': '', 'lineCap': (0, 0, 0), 'lineJoin': 0.0, @@ -14,7 +15,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 1.0}, - {'closePath': True, + {'closePath': False, 'color': (1.0, 1.0, 1.0), 'dashes': '[] 0', 'even_odd': False, @@ -48,7 +49,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 1.0}, - {'closePath': True, + {'closePath': False, 'color': (0.0, 1.0, 0.0), 'dashes': '[] 0', 'even_odd': False, @@ -66,7 +67,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 0.30000001192092896}, - {'closePath': True, + {'closePath': False, 'color': (1.0, 1.0, 1.0), 'dashes': '[] 0', 'even_odd': False, @@ -74,7 +75,8 @@ 'fill_opacity': 1.0, 'items': [('l', (75.0, 230.0), (100.0, 255.0)), ('l', (100.0, 255.0), (75.0, 280.0)), - ('l', (75.0, 280.0), (50.0, 255.0))], + ('l', (75.0, 280.0), (50.0, 255.0)), + ('l', (50.0, 255.0), (75.0, 230.0))], 'layer': '', 'lineCap': (0, 0, 0), 'lineJoin': 0.0, @@ -83,7 +85,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 1.0}, - {'closePath': True, + {'closePath': False, 'color': (1.0, 1.0, 1.0), 'dashes': '[] 0', 'even_odd': False, @@ -117,7 +119,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 2.0}, - {'closePath': True, + {'closePath': False, 'color': (0.0, 0.0, 0.0), 'dashes': '[] 0', 'items': [('c', @@ -163,7 +165,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 3.0}, - {'closePath': True, + {'closePath': False, 'even_odd': False, 'fill': (1.0, 1.0, 0.0), 'fill_opacity': 1.0, @@ -191,7 +193,7 @@ 'rect': (50.0, 350.0, 100.0, 400.0), 'seqno': 13, 'type': 'f'}, - {'closePath': True, + {'closePath': False, 'even_odd': False, 'fill': (0.0, 0.0, 0.0), 'fill_opacity': 1.0, @@ -317,14 +319,15 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 1.0}, - {'closePath': True, + {'closePath': False, 'color': (1.0, 0.0, 0.0), 'dashes': '[] 0', 'even_odd': False, 'fill': (1.0, 0.0, 0.0), 'fill_opacity': 1.0, 'items': [('c', (75.0, 485.0), (62.5, 470.0), (50.0, 490.0), (75.0, 510.0)), - ('c', (75.0, 485.0), (87.5, 470.0), (100.0, 490.0), (75.0, 510.0))], + ('c', (75.0, 485.0), (87.5, 470.0), (100.0, 490.0), (75.0, 510.0)), + ('l', (75.0, 510.0), (75.0, 485.0))], 'layer': '', 'lineCap': (0, 0, 0), 'lineJoin': 0.0, @@ -451,7 +454,7 @@ 'rect': (85.5072021484375, 547.7540283203125, 100.0, 562.2459716796875), 'seqno': 27, 'type': 'f'}, - {'closePath': True, + {'closePath': False, 'color': (0.7215690016746521, 0.5254899859428406, 0.04313730075955391), 'dashes': '[] 0', 'even_odd': False, @@ -461,7 +464,10 @@ (85.5072021484375, 547.7540283203125), (86.30770111083984, 548.553955078125), (85.00990295410156, 549.8519897460938), - (82.60870361328125, 550.6519775390625))], + (82.60870361328125, 550.6519775390625)), + ('l', + (82.60870361328125, 550.6519775390625), + (85.5072021484375, 547.7540283203125))], 'layer': '', 'lineCap': (0, 0, 0), 'lineJoin': 0.0, @@ -473,7 +479,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 0.07246380299329758}, - {'closePath': True, + {'closePath': False, 'color': (0.7215690016746521, 0.5254899859428406, 0.04313730075955391), 'dashes': '[] 0', 'even_odd': False, @@ -483,7 +489,10 @@ (82.60870361328125, 550.6519775390625), (87.2510986328125, 553.052978515625), (87.2510986328125, 556.947021484375), - (82.60870361328125, 559.3480224609375))], + (82.60870361328125, 559.3480224609375)), + ('l', + (82.60870361328125, 559.3480224609375), + (82.60870361328125, 550.6519775390625))], 'layer': '', 'lineCap': (0, 0, 0), 'lineJoin': 0.0, @@ -495,7 +504,7 @@ 'stroke_opacity': 1.0, 'type': 'fs', 'width': 0.07246380299329758}, - {'closePath': True, + {'closePath': False, 'color': (0.7215690016746521, 0.5254899859428406, 0.04313730075955391), 'dashes': '[] 0', 'even_odd': False, @@ -505,7 +514,10 @@ (82.60870361328125, 559.3480224609375), (85.00990295410156, 560.1480102539062), (86.30770111083984, 561.446044921875), - (85.5072021484375, 562.2459716796875))], + (85.5072021484375, 562.2459716796875)), + ('l', + (85.5072021484375, 562.2459716796875), + (82.60870361328125, 559.3480224609375))], 'layer': '', 'lineCap': (0, 0, 0), 'lineJoin': 0.0, @@ -599,7 +611,7 @@ 557.1740112304688), 'seqno': 37, 'type': 'f'}, - {'closePath': True, + {'closePath': False, 'color': (1.0, 1.0, 1.0), 'dashes': '[] 0', 'items': [('l', @@ -607,7 +619,10 @@ (58.937198638916016, 561.52197265625)), ('l', (61.352699279785156, 548.47802734375), - (61.352699279785156, 561.52197265625))], + (61.352699279785156, 561.52197265625)), + ('l', + (61.352699279785156, 561.52197265625), + (61.352699279785156, 548.47802734375))], 'layer': '', 'lineCap': (0, 0, 0), 'lineJoin': 0.0, @@ -619,7 +634,7 @@ 'stroke_opacity': 1.0, 'type': 's', 'width': 1.1594200134277344}, - {'closePath': True, + {'closePath': False, 'even_odd': False, 'fill': (1.0, 1.0, 0.0), 'fill_opacity': 1.0, @@ -647,7 +662,7 @@ 'rect': (50.0, 590.0, 100.0, 640.0), 'seqno': 39, 'type': 'f'}, - {'closePath': True, + {'closePath': False, 'even_odd': False, 'fill': (0.0, 0.0, 0.0), 'fill_opacity': 1.0, diff --git a/tests/resources/test-2812.pdf b/tests/resources/test-2812.pdf new file mode 100644 index 000000000..793aba5fb Binary files /dev/null and b/tests/resources/test-2812.pdf differ diff --git a/tests/resources/test-3143.pdf b/tests/resources/test-3143.pdf new file mode 100644 index 000000000..867f1650f Binary files /dev/null and b/tests/resources/test-3143.pdf differ diff --git a/tests/resources/test-3150.pdf b/tests/resources/test-3150.pdf new file mode 100644 index 000000000..24102aaf3 Binary files /dev/null and b/tests/resources/test-3150.pdf differ diff --git a/tests/resources/test-3207.pdf b/tests/resources/test-3207.pdf new file mode 100644 index 000000000..c80a2b4b5 Binary files /dev/null and b/tests/resources/test-3207.pdf differ diff --git a/tests/resources/test-3591.pdf b/tests/resources/test-3591.pdf new file mode 100644 index 000000000..005d9ad9b Binary files /dev/null and b/tests/resources/test-3591.pdf differ diff --git a/tests/resources/test-3820.pdf b/tests/resources/test-3820.pdf new file mode 100644 index 000000000..5af6590f4 Binary files /dev/null and b/tests/resources/test-3820.pdf differ diff --git a/tests/resources/test-4055.pdf b/tests/resources/test-4055.pdf new file mode 100644 index 000000000..693d9f2eb Binary files /dev/null and b/tests/resources/test-4055.pdf differ diff --git a/tests/resources/test-4503.pdf b/tests/resources/test-4503.pdf new file mode 100644 index 000000000..307762ed7 Binary files /dev/null and b/tests/resources/test-4503.pdf differ diff --git a/tests/resources/test-707448.pdf b/tests/resources/test-707448.pdf new file mode 100644 index 000000000..75300d10b Binary files /dev/null and b/tests/resources/test-707448.pdf differ diff --git a/tests/resources/test-707673.pdf b/tests/resources/test-707673.pdf new file mode 100644 index 000000000..85f385aae Binary files /dev/null and b/tests/resources/test-707673.pdf differ diff --git a/tests/resources/test-E+A.pdf b/tests/resources/test-E+A.pdf new file mode 100644 index 000000000..84508fa19 Binary files /dev/null and b/tests/resources/test-E+A.pdf differ diff --git a/tests/resources/test-linebreaks.pdf b/tests/resources/test-linebreaks.pdf new file mode 100644 index 000000000..8696f8853 Binary files /dev/null and b/tests/resources/test-linebreaks.pdf differ diff --git a/tests/resources/test-rewrite-images.pdf b/tests/resources/test-rewrite-images.pdf new file mode 100644 index 000000000..d1d742390 Binary files /dev/null and b/tests/resources/test-rewrite-images.pdf differ diff --git a/tests/resources/test-styled-table.pdf b/tests/resources/test-styled-table.pdf new file mode 100644 index 000000000..67f7e8a49 Binary files /dev/null and b/tests/resources/test-styled-table.pdf differ diff --git a/tests/resources/test_1645_expected_1.22.pdf b/tests/resources/test_1645_expected-after-1.27.0.pdf similarity index 74% rename from tests/resources/test_1645_expected_1.22.pdf rename to tests/resources/test_1645_expected-after-1.27.0.pdf index b149b109d..27d61072d 100644 Binary files a/tests/resources/test_1645_expected_1.22.pdf and b/tests/resources/test_1645_expected-after-1.27.0.pdf differ diff --git a/tests/resources/test_1645_expected.pdf b/tests/resources/test_1645_expected.pdf index 8b21fe8dc..ec9ecf2c5 100644 Binary files a/tests/resources/test_1645_expected.pdf and b/tests/resources/test_1645_expected.pdf differ diff --git a/tests/resources/test_2548.pdf b/tests/resources/test_2548.pdf new file mode 100644 index 000000000..4b5431585 Binary files /dev/null and b/tests/resources/test_2548.pdf differ diff --git a/tests/resources/test_2553-2.pdf b/tests/resources/test_2553-2.pdf new file mode 100644 index 000000000..bd271e5d1 Binary files /dev/null and b/tests/resources/test_2553-2.pdf differ diff --git a/tests/resources/test_2553.pdf b/tests/resources/test_2553.pdf new file mode 100644 index 000000000..27dab5053 Binary files /dev/null and b/tests/resources/test_2553.pdf differ diff --git a/tests/resources/test_2596.pdf b/tests/resources/test_2596.pdf new file mode 100644 index 000000000..96498a0fc Binary files /dev/null and b/tests/resources/test_2596.pdf differ diff --git a/tests/resources/test_2608_expected b/tests/resources/test_2608_expected new file mode 100644 index 000000000..10cfa6127 --- /dev/null +++ b/tests/resources/test_2608_expected @@ -0,0 +1,14 @@ +No significant gamma-ray excess above the expected background +is detected from the direction of FRB 20171019A, with 52 gamma +candidate events from the source region and 524 background event. +A second analysis using an independent event calibration and recon- +struction (Parsons & Hinton 2014) confirms this result. A search for +variable emission on timescales ranging from milliseconds to sev- +eral minutes with tools provided in (Brun et al. 2020) does not reveal +any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% confi- +dence level (C. L.) upper limits on the photon flux are derived using +the method described by Rolke et al. (2005). The energy threshold +of the data is highly dependent on the zenith angle of the observa- +tions. For these observations, the zenith angles range from 15 to 25 +deg, which leads to an energy threshold for the stacked data set of +𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE) diff --git a/tests/resources/test_2608_expected_1.26 b/tests/resources/test_2608_expected_1.26 new file mode 100644 index 000000000..d550f1237 --- /dev/null +++ b/tests/resources/test_2608_expected_1.26 @@ -0,0 +1,10 @@ +No significant gamma-ray excess above the expected background +is detected from the direction of FRB 20171019A, with 52 gamma +candidate events from the source region and 524 background event. +A second analysis using an independent event calibration and reconstruction (Parsons & Hinton 2014) confirms this result. A search for +variable emission on timescales ranging from milliseconds to several minutes with tools provided in (Brun et al. 2020) does not reveal +any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% confidence level (C. L.) upper limits on the photon flux are derived using +the method described by Rolke et al. (2005). The energy threshold +of the data is highly dependent on the zenith angle of the observations. For these observations, the zenith angles range from 15 to 25 +deg, which leads to an energy threshold for the stacked data set of +𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE) diff --git a/tests/resources/test_2634.pdf b/tests/resources/test_2634.pdf new file mode 100644 index 000000000..6f4cb4703 Binary files /dev/null and b/tests/resources/test_2634.pdf differ diff --git a/tests/resources/test_2635.pdf b/tests/resources/test_2635.pdf new file mode 100644 index 000000000..640c0d19c Binary files /dev/null and b/tests/resources/test_2635.pdf differ diff --git a/tests/resources/test_2645_1.pdf b/tests/resources/test_2645_1.pdf new file mode 100644 index 000000000..5c177a094 Binary files /dev/null and b/tests/resources/test_2645_1.pdf differ diff --git a/tests/resources/test_2645_2.pdf b/tests/resources/test_2645_2.pdf new file mode 100644 index 000000000..0a9317a52 Binary files /dev/null and b/tests/resources/test_2645_2.pdf differ diff --git a/tests/resources/test_2645_3.pdf b/tests/resources/test_2645_3.pdf new file mode 100644 index 000000000..199089988 Binary files /dev/null and b/tests/resources/test_2645_3.pdf differ diff --git a/tests/resources/test_2710.pdf b/tests/resources/test_2710.pdf new file mode 100644 index 000000000..8dc2893f5 Binary files /dev/null and b/tests/resources/test_2710.pdf differ diff --git a/tests/resources/test_2730.pdf b/tests/resources/test_2730.pdf new file mode 100644 index 000000000..0b46418ed Binary files /dev/null and b/tests/resources/test_2730.pdf differ diff --git a/tests/resources/test_2742.pdf b/tests/resources/test_2742.pdf new file mode 100644 index 000000000..7d185f2fc Binary files /dev/null and b/tests/resources/test_2742.pdf differ diff --git a/tests/resources/test_2788.pdf b/tests/resources/test_2788.pdf new file mode 100644 index 000000000..855d60543 Binary files /dev/null and b/tests/resources/test_2788.pdf differ diff --git a/tests/resources/test_2791_content.pdf b/tests/resources/test_2791_content.pdf new file mode 100644 index 000000000..955db9945 Binary files /dev/null and b/tests/resources/test_2791_content.pdf differ diff --git a/tests/resources/test_2791_coverpage.pdf b/tests/resources/test_2791_coverpage.pdf new file mode 100644 index 000000000..9a90e526b Binary files /dev/null and b/tests/resources/test_2791_coverpage.pdf differ diff --git a/tests/resources/test_2861.pdf b/tests/resources/test_2861.pdf new file mode 100644 index 000000000..cab77fb53 Binary files /dev/null and b/tests/resources/test_2861.pdf differ diff --git a/tests/resources/test_2871.pdf b/tests/resources/test_2871.pdf new file mode 100644 index 000000000..9a3c77065 Binary files /dev/null and b/tests/resources/test_2871.pdf differ diff --git a/tests/resources/test_2885.pdf b/tests/resources/test_2885.pdf new file mode 100644 index 000000000..b5a0f3e1f Binary files /dev/null and b/tests/resources/test_2885.pdf differ diff --git a/tests/resources/test_2904.pdf b/tests/resources/test_2904.pdf new file mode 100644 index 000000000..43b3f6f94 Binary files /dev/null and b/tests/resources/test_2904.pdf differ diff --git a/tests/resources/test_2907.pdf b/tests/resources/test_2907.pdf new file mode 100644 index 000000000..fa58153ba Binary files /dev/null and b/tests/resources/test_2907.pdf differ diff --git a/tests/resources/test_2954.pdf b/tests/resources/test_2954.pdf new file mode 100644 index 000000000..c8df83689 Binary files /dev/null and b/tests/resources/test_2954.pdf differ diff --git a/tests/resources/test_2957_1.pdf b/tests/resources/test_2957_1.pdf new file mode 100644 index 000000000..30fb8f83d Binary files /dev/null and b/tests/resources/test_2957_1.pdf differ diff --git a/tests/resources/test_2957_2.pdf b/tests/resources/test_2957_2.pdf new file mode 100644 index 000000000..7db4bdd49 Binary files /dev/null and b/tests/resources/test_2957_2.pdf differ diff --git a/tests/resources/test_2969.pdf b/tests/resources/test_2969.pdf new file mode 100644 index 000000000..4778ea36e Binary files /dev/null and b/tests/resources/test_2969.pdf differ diff --git a/tests/resources/test_2979.pdf b/tests/resources/test_2979.pdf new file mode 100644 index 000000000..740c23903 Binary files /dev/null and b/tests/resources/test_2979.pdf differ diff --git a/tests/resources/test_3050_expected.png b/tests/resources/test_3050_expected.png new file mode 100644 index 000000000..4b7edcdee Binary files /dev/null and b/tests/resources/test_3050_expected.png differ diff --git a/tests/resources/test_3058.pdf b/tests/resources/test_3058.pdf new file mode 100644 index 000000000..fb48230ac Binary files /dev/null and b/tests/resources/test_3058.pdf differ diff --git a/tests/resources/test_3062.pdf b/tests/resources/test_3062.pdf new file mode 100644 index 000000000..9f605baf5 Binary files /dev/null and b/tests/resources/test_3062.pdf differ diff --git a/tests/resources/test_3070.pdf b/tests/resources/test_3070.pdf new file mode 100644 index 000000000..97794087c Binary files /dev/null and b/tests/resources/test_3070.pdf differ diff --git a/tests/resources/test_3072.pdf b/tests/resources/test_3072.pdf new file mode 100644 index 000000000..626e8c6a9 Binary files /dev/null and b/tests/resources/test_3072.pdf differ diff --git a/tests/resources/test_3087.pdf b/tests/resources/test_3087.pdf new file mode 100644 index 000000000..8cd53d4b2 Binary files /dev/null and b/tests/resources/test_3087.pdf differ diff --git a/tests/resources/test_3179.pdf b/tests/resources/test_3179.pdf new file mode 100644 index 000000000..e2e352259 Binary files /dev/null and b/tests/resources/test_3179.pdf differ diff --git a/tests/resources/test_3186.pdf b/tests/resources/test_3186.pdf new file mode 100644 index 000000000..1490ca0ba Binary files /dev/null and b/tests/resources/test_3186.pdf differ diff --git a/tests/resources/test_3197.pdf b/tests/resources/test_3197.pdf new file mode 100644 index 000000000..e21133cf3 Binary files /dev/null and b/tests/resources/test_3197.pdf differ diff --git a/tests/resources/test_3357.pdf b/tests/resources/test_3357.pdf new file mode 100644 index 000000000..a5b99e254 Binary files /dev/null and b/tests/resources/test_3357.pdf differ diff --git a/tests/resources/test_3362.pdf b/tests/resources/test_3362.pdf new file mode 100644 index 000000000..f894b4b47 Binary files /dev/null and b/tests/resources/test_3362.pdf differ diff --git a/tests/resources/test_3376.pdf b/tests/resources/test_3376.pdf new file mode 100644 index 000000000..d5240d2e5 Binary files /dev/null and b/tests/resources/test_3376.pdf differ diff --git a/tests/resources/test_3448.pdf b/tests/resources/test_3448.pdf new file mode 100644 index 000000000..dae9e9c1a Binary files /dev/null and b/tests/resources/test_3448.pdf differ diff --git a/tests/resources/test_3448.pdf-expected.png b/tests/resources/test_3448.pdf-expected.png new file mode 100644 index 000000000..fbb82b7c4 Binary files /dev/null and b/tests/resources/test_3448.pdf-expected.png differ diff --git a/tests/resources/test_3450.pdf b/tests/resources/test_3450.pdf new file mode 100644 index 000000000..e993d00ff Binary files /dev/null and b/tests/resources/test_3450.pdf differ diff --git a/tests/resources/test_3493.epub b/tests/resources/test_3493.epub new file mode 100644 index 000000000..34e23344f Binary files /dev/null and b/tests/resources/test_3493.epub differ diff --git a/tests/resources/test_3569.pdf b/tests/resources/test_3569.pdf new file mode 100644 index 000000000..d14588054 Binary files /dev/null and b/tests/resources/test_3569.pdf differ diff --git a/tests/resources/test_3594.pdf b/tests/resources/test_3594.pdf new file mode 100644 index 000000000..ea3c44424 Binary files /dev/null and b/tests/resources/test_3594.pdf differ diff --git a/tests/resources/test_3615.epub b/tests/resources/test_3615.epub new file mode 100644 index 000000000..94ab28dbc Binary files /dev/null and b/tests/resources/test_3615.epub differ diff --git a/tests/resources/test_3624.pdf b/tests/resources/test_3624.pdf new file mode 100644 index 000000000..0da2b1153 Binary files /dev/null and b/tests/resources/test_3624.pdf differ diff --git a/tests/resources/test_3624_expected.png b/tests/resources/test_3624_expected.png new file mode 100644 index 000000000..bbfa9bc75 Binary files /dev/null and b/tests/resources/test_3624_expected.png differ diff --git a/tests/resources/test_3650.pdf b/tests/resources/test_3650.pdf new file mode 100644 index 000000000..50afea040 Binary files /dev/null and b/tests/resources/test_3650.pdf differ diff --git a/tests/resources/test_3654.docx b/tests/resources/test_3654.docx new file mode 100644 index 000000000..3794ec39b Binary files /dev/null and b/tests/resources/test_3654.docx differ diff --git a/tests/resources/test_3677.pdf b/tests/resources/test_3677.pdf new file mode 100644 index 000000000..428578591 Binary files /dev/null and b/tests/resources/test_3677.pdf differ diff --git a/tests/resources/test_3687-3.epub b/tests/resources/test_3687-3.epub new file mode 100644 index 000000000..76581a229 Binary files /dev/null and b/tests/resources/test_3687-3.epub differ diff --git a/tests/resources/test_3687.epub b/tests/resources/test_3687.epub new file mode 100644 index 000000000..b510dd58d Binary files /dev/null and b/tests/resources/test_3687.epub differ diff --git a/tests/resources/test_3705.pdf b/tests/resources/test_3705.pdf new file mode 100644 index 000000000..53259949c Binary files /dev/null and b/tests/resources/test_3705.pdf differ diff --git a/tests/resources/test_3725.pdf b/tests/resources/test_3725.pdf new file mode 100644 index 000000000..8f115fca7 Binary files /dev/null and b/tests/resources/test_3725.pdf differ diff --git a/tests/resources/test_3727.pdf b/tests/resources/test_3727.pdf new file mode 100644 index 000000000..b12bf9b47 Binary files /dev/null and b/tests/resources/test_3727.pdf differ diff --git a/tests/resources/test_3780.pdf b/tests/resources/test_3780.pdf new file mode 100644 index 000000000..2beca3edb Binary files /dev/null and b/tests/resources/test_3780.pdf differ diff --git a/tests/resources/test_3789.pdf b/tests/resources/test_3789.pdf new file mode 100644 index 000000000..102101e5d Binary files /dev/null and b/tests/resources/test_3789.pdf differ diff --git a/tests/resources/test_3806-expected.png b/tests/resources/test_3806-expected.png new file mode 100644 index 000000000..8c0721522 Binary files /dev/null and b/tests/resources/test_3806-expected.png differ diff --git a/tests/resources/test_3806.pdf b/tests/resources/test_3806.pdf new file mode 100644 index 000000000..9d8ca9dda Binary files /dev/null and b/tests/resources/test_3806.pdf differ diff --git a/tests/resources/test_3842.pdf b/tests/resources/test_3842.pdf new file mode 100644 index 000000000..400db3a7d Binary files /dev/null and b/tests/resources/test_3842.pdf differ diff --git a/tests/resources/test_3848.pdf b/tests/resources/test_3848.pdf new file mode 100644 index 000000000..de121e0bb Binary files /dev/null and b/tests/resources/test_3848.pdf differ diff --git a/tests/resources/test_3854.pdf b/tests/resources/test_3854.pdf new file mode 100644 index 000000000..157402d96 Binary files /dev/null and b/tests/resources/test_3854.pdf differ diff --git a/tests/resources/test_3854_expected.png b/tests/resources/test_3854_expected.png new file mode 100644 index 000000000..aca7a1a7b Binary files /dev/null and b/tests/resources/test_3854_expected.png differ diff --git a/tests/resources/test_3863.pdf b/tests/resources/test_3863.pdf new file mode 100644 index 000000000..7b65184de Binary files /dev/null and b/tests/resources/test_3863.pdf differ diff --git a/tests/resources/test_3863.pdf.pdf.0.png b/tests/resources/test_3863.pdf.pdf.0.png new file mode 100644 index 000000000..257697a5c Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.0.png differ diff --git a/tests/resources/test_3863.pdf.pdf.1.png b/tests/resources/test_3863.pdf.pdf.1.png new file mode 100644 index 000000000..df29c48b5 Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.1.png differ diff --git a/tests/resources/test_3863.pdf.pdf.2.png b/tests/resources/test_3863.pdf.pdf.2.png new file mode 100644 index 000000000..85f20da40 Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.2.png differ diff --git a/tests/resources/test_3863.pdf.pdf.3.png b/tests/resources/test_3863.pdf.pdf.3.png new file mode 100644 index 000000000..108812277 Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.3.png differ diff --git a/tests/resources/test_3863.pdf.pdf.4.png b/tests/resources/test_3863.pdf.pdf.4.png new file mode 100644 index 000000000..8c20f345d Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.4.png differ diff --git a/tests/resources/test_3863.pdf.pdf.5.png b/tests/resources/test_3863.pdf.pdf.5.png new file mode 100644 index 000000000..20e2d1462 Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.5.png differ diff --git a/tests/resources/test_3863.pdf.pdf.6.png b/tests/resources/test_3863.pdf.pdf.6.png new file mode 100644 index 000000000..09c0f4e43 Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.6.png differ diff --git a/tests/resources/test_3863.pdf.pdf.7.png b/tests/resources/test_3863.pdf.pdf.7.png new file mode 100644 index 000000000..93e8fbc05 Binary files /dev/null and b/tests/resources/test_3863.pdf.pdf.7.png differ diff --git a/tests/resources/test_3886.pdf b/tests/resources/test_3886.pdf new file mode 100644 index 000000000..9f5743b79 Binary files /dev/null and b/tests/resources/test_3886.pdf differ diff --git a/tests/resources/test_3887.pdf b/tests/resources/test_3887.pdf new file mode 100644 index 000000000..38907d424 Binary files /dev/null and b/tests/resources/test_3887.pdf differ diff --git a/tests/resources/test_3933.pdf b/tests/resources/test_3933.pdf new file mode 100644 index 000000000..6745f0d7f Binary files /dev/null and b/tests/resources/test_3933.pdf differ diff --git a/tests/resources/test_3950.pdf b/tests/resources/test_3950.pdf new file mode 100644 index 000000000..69d1b9dcc Binary files /dev/null and b/tests/resources/test_3950.pdf differ diff --git a/tests/resources/test_3994.pdf b/tests/resources/test_3994.pdf new file mode 100644 index 000000000..0af3cb6ca Binary files /dev/null and b/tests/resources/test_3994.pdf differ diff --git a/tests/resources/test_4004.pdf b/tests/resources/test_4004.pdf new file mode 100644 index 000000000..772f5124d Binary files /dev/null and b/tests/resources/test_4004.pdf differ diff --git a/tests/resources/test_4017.pdf b/tests/resources/test_4017.pdf new file mode 100644 index 000000000..a68193278 Binary files /dev/null and b/tests/resources/test_4017.pdf differ diff --git a/tests/resources/test_4026.pdf b/tests/resources/test_4026.pdf new file mode 100644 index 000000000..45a7e6fb1 Binary files /dev/null and b/tests/resources/test_4026.pdf differ diff --git a/tests/resources/test_4034.pdf b/tests/resources/test_4034.pdf new file mode 100644 index 000000000..65e621679 Binary files /dev/null and b/tests/resources/test_4034.pdf differ diff --git a/tests/resources/test_4043.pdf b/tests/resources/test_4043.pdf new file mode 100644 index 000000000..608794b8e Binary files /dev/null and b/tests/resources/test_4043.pdf differ diff --git a/tests/resources/test_4047.pdf b/tests/resources/test_4047.pdf new file mode 100644 index 000000000..b36b753c9 Binary files /dev/null and b/tests/resources/test_4047.pdf differ diff --git a/tests/resources/test_4079.pdf b/tests/resources/test_4079.pdf new file mode 100644 index 000000000..5e79cab3e Binary files /dev/null and b/tests/resources/test_4079.pdf differ diff --git a/tests/resources/test_4079_after.pdf b/tests/resources/test_4079_after.pdf new file mode 100644 index 000000000..7aa560b08 Binary files /dev/null and b/tests/resources/test_4079_after.pdf differ diff --git a/tests/resources/test_4079_after_1.25.pdf b/tests/resources/test_4079_after_1.25.pdf new file mode 100644 index 000000000..13ce571a1 Binary files /dev/null and b/tests/resources/test_4079_after_1.25.pdf differ diff --git a/tests/resources/test_4090.pdf b/tests/resources/test_4090.pdf new file mode 100644 index 000000000..291d5e3a8 Binary files /dev/null and b/tests/resources/test_4090.pdf differ diff --git a/tests/resources/test_4125.pdf b/tests/resources/test_4125.pdf new file mode 100644 index 000000000..8acb2be64 Binary files /dev/null and b/tests/resources/test_4125.pdf differ diff --git a/tests/resources/test_4139.pdf b/tests/resources/test_4139.pdf new file mode 100644 index 000000000..27ed404e0 Binary files /dev/null and b/tests/resources/test_4139.pdf differ diff --git a/tests/resources/test_4141.pdf b/tests/resources/test_4141.pdf new file mode 100644 index 000000000..a8fc73e23 Binary files /dev/null and b/tests/resources/test_4141.pdf differ diff --git a/tests/resources/test_4147.pdf b/tests/resources/test_4147.pdf new file mode 100644 index 000000000..4d2c38606 Binary files /dev/null and b/tests/resources/test_4147.pdf differ diff --git a/tests/resources/test_4179.pdf b/tests/resources/test_4179.pdf new file mode 100644 index 000000000..9bdccb998 Binary files /dev/null and b/tests/resources/test_4179.pdf differ diff --git a/tests/resources/test_4179_expected.png b/tests/resources/test_4179_expected.png new file mode 100644 index 000000000..df053b707 Binary files /dev/null and b/tests/resources/test_4179_expected.png differ diff --git a/tests/resources/test_4180.pdf b/tests/resources/test_4180.pdf new file mode 100644 index 000000000..fcb1d3503 Binary files /dev/null and b/tests/resources/test_4180.pdf differ diff --git a/tests/resources/test_4180_expected.png b/tests/resources/test_4180_expected.png new file mode 100644 index 000000000..3de67e613 Binary files /dev/null and b/tests/resources/test_4180_expected.png differ diff --git a/tests/resources/test_4182.pdf b/tests/resources/test_4182.pdf new file mode 100644 index 000000000..7c239646e Binary files /dev/null and b/tests/resources/test_4182.pdf differ diff --git a/tests/resources/test_4182_expected.png b/tests/resources/test_4182_expected.png new file mode 100644 index 000000000..8d38ecd2c Binary files /dev/null and b/tests/resources/test_4182_expected.png differ diff --git a/tests/resources/test_4224.pdf b/tests/resources/test_4224.pdf new file mode 100644 index 000000000..88c49a3f1 Binary files /dev/null and b/tests/resources/test_4224.pdf differ diff --git a/tests/resources/test_4245.pdf b/tests/resources/test_4245.pdf new file mode 100644 index 000000000..554057475 Binary files /dev/null and b/tests/resources/test_4245.pdf differ diff --git a/tests/resources/test_4245_expected.png b/tests/resources/test_4245_expected.png new file mode 100644 index 000000000..2b7bcb330 Binary files /dev/null and b/tests/resources/test_4245_expected.png differ diff --git a/tests/resources/test_4263.pdf b/tests/resources/test_4263.pdf new file mode 100644 index 000000000..c56700709 Binary files /dev/null and b/tests/resources/test_4263.pdf differ diff --git a/tests/resources/test_4363.pdf b/tests/resources/test_4363.pdf new file mode 100644 index 000000000..d15940e3e Binary files /dev/null and b/tests/resources/test_4363.pdf differ diff --git a/tests/resources/test_4388_BOZ1.pdf b/tests/resources/test_4388_BOZ1.pdf new file mode 100644 index 000000000..6d2b23b68 Binary files /dev/null and b/tests/resources/test_4388_BOZ1.pdf differ diff --git a/tests/resources/test_4388_BUL1.pdf b/tests/resources/test_4388_BUL1.pdf new file mode 100644 index 000000000..b681354e1 Binary files /dev/null and b/tests/resources/test_4388_BUL1.pdf differ diff --git a/tests/resources/test_4412.pdf b/tests/resources/test_4412.pdf new file mode 100644 index 000000000..920df52b1 Binary files /dev/null and b/tests/resources/test_4412.pdf differ diff --git a/tests/resources/test_4415.pdf b/tests/resources/test_4415.pdf new file mode 100644 index 000000000..a8f9585e4 Binary files /dev/null and b/tests/resources/test_4415.pdf differ diff --git a/tests/resources/test_4415_out_expected.png b/tests/resources/test_4415_out_expected.png new file mode 100644 index 000000000..067016d38 Binary files /dev/null and b/tests/resources/test_4415_out_expected.png differ diff --git a/tests/resources/test_4423.pdf b/tests/resources/test_4423.pdf new file mode 100644 index 000000000..55510b8cf Binary files /dev/null and b/tests/resources/test_4423.pdf differ diff --git a/tests/resources/test_4435.pdf b/tests/resources/test_4435.pdf new file mode 100644 index 000000000..c22a87c9a Binary files /dev/null and b/tests/resources/test_4435.pdf differ diff --git a/tests/resources/test_4479.pdf b/tests/resources/test_4479.pdf new file mode 100644 index 000000000..28efe43fe Binary files /dev/null and b/tests/resources/test_4479.pdf differ diff --git a/tests/resources/test_4496.hwpx b/tests/resources/test_4496.hwpx new file mode 100755 index 000000000..f1e0fb1a7 Binary files /dev/null and b/tests/resources/test_4496.hwpx differ diff --git a/tests/resources/test_4503.pdf b/tests/resources/test_4503.pdf new file mode 100644 index 000000000..307762ed7 Binary files /dev/null and b/tests/resources/test_4503.pdf differ diff --git a/tests/resources/test_4505.pdf b/tests/resources/test_4505.pdf new file mode 100644 index 000000000..038b34d8e Binary files /dev/null and b/tests/resources/test_4505.pdf differ diff --git a/tests/resources/test_4546.pdf b/tests/resources/test_4546.pdf new file mode 100644 index 000000000..e5f2ece53 Binary files /dev/null and b/tests/resources/test_4546.pdf differ diff --git a/tests/resources/test_4564.pdf b/tests/resources/test_4564.pdf new file mode 100644 index 000000000..486a100f9 Binary files /dev/null and b/tests/resources/test_4564.pdf differ diff --git a/tests/resources/test_4571.pdf b/tests/resources/test_4571.pdf new file mode 100644 index 000000000..991a0512b Binary files /dev/null and b/tests/resources/test_4571.pdf differ diff --git a/tests/resources/test_4613.png b/tests/resources/test_4613.png new file mode 100644 index 000000000..e02b7328d Binary files /dev/null and b/tests/resources/test_4613.png differ diff --git a/tests/resources/test_4614.pdf b/tests/resources/test_4614.pdf new file mode 100644 index 000000000..a9f71a819 Binary files /dev/null and b/tests/resources/test_4614.pdf differ diff --git a/tests/resources/test_4639.pdf b/tests/resources/test_4639.pdf new file mode 100644 index 000000000..11d0d1ee3 Binary files /dev/null and b/tests/resources/test_4639.pdf differ diff --git a/tests/resources/test_4699.pdf b/tests/resources/test_4699.pdf new file mode 100644 index 000000000..79ad74ced Binary files /dev/null and b/tests/resources/test_4699.pdf differ diff --git a/tests/resources/test_4699.png b/tests/resources/test_4699.png new file mode 100644 index 000000000..e67380d3a Binary files /dev/null and b/tests/resources/test_4699.png differ diff --git a/tests/resources/test_4712_a.pdf b/tests/resources/test_4712_a.pdf new file mode 100644 index 000000000..7a7fd49fc Binary files /dev/null and b/tests/resources/test_4712_a.pdf differ diff --git a/tests/resources/test_4712_b.pdf b/tests/resources/test_4712_b.pdf new file mode 100644 index 000000000..039aa59b7 Binary files /dev/null and b/tests/resources/test_4712_b.pdf differ diff --git a/tests/resources/test_4716.pdf b/tests/resources/test_4716.pdf new file mode 100644 index 000000000..25a9a57f5 Binary files /dev/null and b/tests/resources/test_4716.pdf differ diff --git a/tests/resources/test_annot_file_info.pdf b/tests/resources/test_annot_file_info.pdf new file mode 100644 index 000000000..9e66bd421 Binary files /dev/null and b/tests/resources/test_annot_file_info.pdf differ diff --git a/tests/resources/test_delete_image.pdf b/tests/resources/test_delete_image.pdf new file mode 100644 index 000000000..36b2d1939 Binary files /dev/null and b/tests/resources/test_delete_image.pdf differ diff --git a/tests/resources/test_open2.cbz b/tests/resources/test_open2.cbz new file mode 100644 index 000000000..322cfde24 Binary files /dev/null and b/tests/resources/test_open2.cbz differ diff --git a/tests/resources/test_open2.doc b/tests/resources/test_open2.doc new file mode 100644 index 000000000..2835739ad Binary files /dev/null and b/tests/resources/test_open2.doc differ diff --git a/tests/resources/test_open2.docx b/tests/resources/test_open2.docx new file mode 100644 index 000000000..fac653d10 Binary files /dev/null and b/tests/resources/test_open2.docx differ diff --git a/tests/resources/test_open2.epub b/tests/resources/test_open2.epub new file mode 100644 index 000000000..0a9ec16f4 Binary files /dev/null and b/tests/resources/test_open2.epub differ diff --git a/tests/resources/test_open2.fb2 b/tests/resources/test_open2.fb2 new file mode 100644 index 000000000..ad5f56d70 --- /dev/null +++ b/tests/resources/test_open2.fb2 @@ -0,0 +1,64 @@ + + + + + computers + + Chris + Clark + + Sample FB2 book + +

Short sample of a FictionBook2 book with simple metadata. Based on test_book.md from https://github.com/clach04/sample_reading_media

+
+ ebook,sample,markdown,fb2,FictionBook2 +
+ + + clach04 + https://github.com/clach04/sample_reading_media + + + vim and scite + https://github.com/clach04/sample_reading_media + 1.0 + +

Initial version, written by hand.

+
+
+
+ + + <p>This is a title</p> + + +
+ + <p>Test Header h1</p> + + +

A test paragraph.

+

Another test paragraph.

+
+ +
+ + <p>Another Test Header h1</p> + + +
+ + <p>A Test Header h2</p> + + +
+ + <p>A Test Header h3</p> + + +

Yet more copy

+
+
+
+ +
diff --git a/tests/resources/test_open2.html b/tests/resources/test_open2.html new file mode 100644 index 000000000..66411721c Binary files /dev/null and b/tests/resources/test_open2.html differ diff --git a/tests/resources/test_open2.jpg b/tests/resources/test_open2.jpg new file mode 100644 index 000000000..a3c18592c Binary files /dev/null and b/tests/resources/test_open2.jpg differ diff --git a/tests/resources/test_open2.mobi b/tests/resources/test_open2.mobi new file mode 100644 index 000000000..fe3d4689a Binary files /dev/null and b/tests/resources/test_open2.mobi differ diff --git a/tests/resources/test_open2.pdf b/tests/resources/test_open2.pdf new file mode 100644 index 000000000..c5da93780 Binary files /dev/null and b/tests/resources/test_open2.pdf differ diff --git a/tests/resources/test_open2.svg b/tests/resources/test_open2.svg new file mode 100644 index 000000000..287f7739a --- /dev/null +++ b/tests/resources/test_open2.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/tests/resources/test_open2.xhtml b/tests/resources/test_open2.xhtml new file mode 100644 index 000000000..5271725ab --- /dev/null +++ b/tests/resources/test_open2.xhtml @@ -0,0 +1,17 @@ + + + + + + + + +
+

Some text

+
+ + diff --git a/tests/resources/test_open2.xml b/tests/resources/test_open2.xml new file mode 100644 index 000000000..1c9953127 --- /dev/null +++ b/tests/resources/test_open2.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/resources/test_open2.xps b/tests/resources/test_open2.xps new file mode 100644 index 000000000..05a2b4a75 Binary files /dev/null and b/tests/resources/test_open2.xps differ diff --git a/tests/resources/test_open2_expected.json b/tests/resources/test_open2_expected.json new file mode 100644 index 000000000..9d9a173a3 --- /dev/null +++ b/tests/resources/test_open2_expected.json @@ -0,0 +1,808 @@ +{ + "tests/resources/test_open2.cbz": { + "": { + "file": "zip", + "stream": "zip" + }, + ".cbz": { + "file": "zip", + "stream": "zip" + }, + ".doc": { + "file": "zip", + "stream": "zip" + }, + ".docx": { + "file": "zip", + "stream": "zip" + }, + ".epub": { + "file": "zip", + "stream": "zip" + }, + ".fb2": { + "file": "zip", + "stream": "zip" + }, + ".html": { + "file": "zip", + "stream": "zip" + }, + ".jpg": { + "file": "zip", + "stream": "zip" + }, + ".mobi": { + "file": "zip", + "stream": "zip" + }, + ".pdf": { + "file": "zip", + "stream": "zip" + }, + ".svg": { + "file": "zip", + "stream": "zip" + }, + ".txt": { + "file": "zip", + "stream": "zip" + }, + ".xhtml": { + "file": "zip", + "stream": "zip" + }, + ".xml": { + "file": "zip", + "stream": "zip" + }, + ".xps": { + "file": "zip", + "stream": "zip" + } + }, + "tests/resources/test_open2.doc": { + "": { + "file": "[error]", + "stream": "[error]" + }, + ".cbz": { + "file": "cfb", + "stream": "cfb" + }, + ".doc": { + "file": "[error]", + "stream": "[error]" + }, + ".docx": { + "file": "[error]", + "stream": "[error]" + }, + ".epub": { + "file": "[error]", + "stream": "[error]" + }, + ".fb2": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".html": { + "file": "HTML5", + "stream": "HTML5" + }, + ".jpg": { + "file": "Image", + "stream": "Image" + }, + ".mobi": { + "file": "[error]", + "stream": "[error]" + }, + ".pdf": { + "file": "[error]", + "stream": "[error]" + }, + ".svg": { + "file": "SVG", + "stream": "SVG" + }, + ".txt": { + "file": "Text", + "stream": "Text" + }, + ".xhtml": { + "file": "XHTML", + "stream": "XHTML" + }, + ".xml": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".xps": { + "file": "[error]", + "stream": "[error]" + } + }, + "tests/resources/test_open2.docx": { + "": { + "file": "Office document", + "stream": "Office document" + }, + ".cbz": { + "file": "Office document", + "stream": "Office document" + }, + ".doc": { + "file": "Office document", + "stream": "Office document" + }, + ".docx": { + "file": "Office document", + "stream": "Office document" + }, + ".epub": { + "file": "Office document", + "stream": "Office document" + }, + ".fb2": { + "file": "Office document", + "stream": "Office document" + }, + ".html": { + "file": "Office document", + "stream": "Office document" + }, + ".jpg": { + "file": "Office document", + "stream": "Office document" + }, + ".mobi": { + "file": "Office document", + "stream": "Office document" + }, + ".pdf": { + "file": "Office document", + "stream": "Office document" + }, + ".svg": { + "file": "Office document", + "stream": "Office document" + }, + ".txt": { + "file": "Office document", + "stream": "Office document" + }, + ".xhtml": { + "file": "Office document", + "stream": "Office document" + }, + ".xml": { + "file": "Office document", + "stream": "Office document" + }, + ".xps": { + "file": "Office document", + "stream": "Office document" + } + }, + "tests/resources/test_open2.epub": { + "": { + "file": "EPUB", + "stream": "EPUB" + }, + ".cbz": { + "file": "zip", + "stream": "zip" + }, + ".doc": { + "file": "EPUB", + "stream": "EPUB" + }, + ".docx": { + "file": "EPUB", + "stream": "EPUB" + }, + ".epub": { + "file": "EPUB", + "stream": "EPUB" + }, + ".fb2": { + "file": "EPUB", + "stream": "EPUB" + }, + ".html": { + "file": "EPUB", + "stream": "EPUB" + }, + ".jpg": { + "file": "EPUB", + "stream": "EPUB" + }, + ".mobi": { + "file": "EPUB", + "stream": "EPUB" + }, + ".pdf": { + "file": "EPUB", + "stream": "EPUB" + }, + ".svg": { + "file": "EPUB", + "stream": "EPUB" + }, + ".txt": { + "file": "EPUB", + "stream": "EPUB" + }, + ".xhtml": { + "file": "EPUB", + "stream": "EPUB" + }, + ".xml": { + "file": "EPUB", + "stream": "EPUB" + }, + ".xps": { + "file": "EPUB", + "stream": "EPUB" + } + }, + "tests/resources/test_open2.fb2": { + "": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".cbz": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".doc": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".docx": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".epub": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".fb2": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".html": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".jpg": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".mobi": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".pdf": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".svg": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".txt": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".xhtml": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".xml": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".xps": { + "file": "FictionBook2", + "stream": "FictionBook2" + } + }, + "tests/resources/test_open2.html": { + "": { + "file": "HTML5", + "stream": "HTML5" + }, + ".cbz": { + "file": "HTML5", + "stream": "HTML5" + }, + ".doc": { + "file": "HTML5", + "stream": "HTML5" + }, + ".docx": { + "file": "HTML5", + "stream": "HTML5" + }, + ".epub": { + "file": "HTML5", + "stream": "HTML5" + }, + ".fb2": { + "file": "HTML5", + "stream": "HTML5" + }, + ".html": { + "file": "HTML5", + "stream": "HTML5" + }, + ".jpg": { + "file": "HTML5", + "stream": "HTML5" + }, + ".mobi": { + "file": "HTML5", + "stream": "HTML5" + }, + ".pdf": { + "file": "HTML5", + "stream": "HTML5" + }, + ".svg": { + "file": "HTML5", + "stream": "HTML5" + }, + ".txt": { + "file": "HTML5", + "stream": "HTML5" + }, + ".xhtml": { + "file": "XHTML", + "stream": "XHTML" + }, + ".xml": { + "file": "HTML5", + "stream": "HTML5" + }, + ".xps": { + "file": "HTML5", + "stream": "HTML5" + } + }, + "tests/resources/test_open2.jpg": { + "": { + "file": "Image", + "stream": "Image" + }, + ".cbz": { + "file": "Image", + "stream": "Image" + }, + ".doc": { + "file": "Image", + "stream": "Image" + }, + ".docx": { + "file": "Image", + "stream": "Image" + }, + ".epub": { + "file": "Image", + "stream": "Image" + }, + ".fb2": { + "file": "Image", + "stream": "Image" + }, + ".html": { + "file": "Image", + "stream": "Image" + }, + ".jpg": { + "file": "Image", + "stream": "Image" + }, + ".mobi": { + "file": "Image", + "stream": "Image" + }, + ".pdf": { + "file": "Image", + "stream": "Image" + }, + ".svg": { + "file": "Image", + "stream": "Image" + }, + ".txt": { + "file": "Image", + "stream": "Image" + }, + ".xhtml": { + "file": "Image", + "stream": "Image" + }, + ".xml": { + "file": "Image", + "stream": "Image" + }, + ".xps": { + "file": "Image", + "stream": "Image" + } + }, + "tests/resources/test_open2.mobi": { + "": { + "file": "MOBI", + "stream": "MOBI" + }, + ".cbz": { + "file": "MOBI", + "stream": "MOBI" + }, + ".doc": { + "file": "MOBI", + "stream": "MOBI" + }, + ".docx": { + "file": "MOBI", + "stream": "MOBI" + }, + ".epub": { + "file": "MOBI", + "stream": "MOBI" + }, + ".fb2": { + "file": "MOBI", + "stream": "MOBI" + }, + ".html": { + "file": "MOBI", + "stream": "MOBI" + }, + ".jpg": { + "file": "MOBI", + "stream": "MOBI" + }, + ".mobi": { + "file": "MOBI", + "stream": "MOBI" + }, + ".pdf": { + "file": "MOBI", + "stream": "MOBI" + }, + ".svg": { + "file": "MOBI", + "stream": "MOBI" + }, + ".txt": { + "file": "MOBI", + "stream": "MOBI" + }, + ".xhtml": { + "file": "MOBI", + "stream": "MOBI" + }, + ".xml": { + "file": "MOBI", + "stream": "MOBI" + }, + ".xps": { + "file": "MOBI", + "stream": "MOBI" + } + }, + "tests/resources/test_open2.pdf": { + "": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".cbz": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".doc": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".docx": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".epub": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".fb2": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".html": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".jpg": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".mobi": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".pdf": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".svg": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".txt": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".xhtml": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".xml": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + }, + ".xps": { + "file": "PDF 1.5", + "stream": "PDF 1.5" + } + }, + "tests/resources/test_open2.svg": { + "": { + "file": "SVG", + "stream": "SVG" + }, + ".cbz": { + "file": "SVG", + "stream": "SVG" + }, + ".doc": { + "file": "SVG", + "stream": "SVG" + }, + ".docx": { + "file": "SVG", + "stream": "SVG" + }, + ".epub": { + "file": "SVG", + "stream": "SVG" + }, + ".fb2": { + "file": "SVG", + "stream": "SVG" + }, + ".html": { + "file": "SVG", + "stream": "SVG" + }, + ".jpg": { + "file": "SVG", + "stream": "SVG" + }, + ".mobi": { + "file": "SVG", + "stream": "SVG" + }, + ".pdf": { + "file": "SVG", + "stream": "SVG" + }, + ".svg": { + "file": "SVG", + "stream": "SVG" + }, + ".txt": { + "file": "SVG", + "stream": "SVG" + }, + ".xhtml": { + "file": "SVG", + "stream": "SVG" + }, + ".xml": { + "file": "SVG", + "stream": "SVG" + }, + ".xps": { + "file": "SVG", + "stream": "SVG" + } + }, + "tests/resources/test_open2.xhtml": { + "": { + "file": "XHTML", + "stream": "XHTML" + }, + ".cbz": { + "file": "XHTML", + "stream": "XHTML" + }, + ".doc": { + "file": "XHTML", + "stream": "XHTML" + }, + ".docx": { + "file": "XHTML", + "stream": "XHTML" + }, + ".epub": { + "file": "XHTML", + "stream": "XHTML" + }, + ".fb2": { + "file": "XHTML", + "stream": "XHTML" + }, + ".html": { + "file": "HTML5", + "stream": "HTML5" + }, + ".jpg": { + "file": "XHTML", + "stream": "XHTML" + }, + ".mobi": { + "file": "XHTML", + "stream": "XHTML" + }, + ".pdf": { + "file": "XHTML", + "stream": "XHTML" + }, + ".svg": { + "file": "XHTML", + "stream": "XHTML" + }, + ".txt": { + "file": "XHTML", + "stream": "XHTML" + }, + ".xhtml": { + "file": "XHTML", + "stream": "XHTML" + }, + ".xml": { + "file": "XHTML", + "stream": "XHTML" + }, + ".xps": { + "file": "XHTML", + "stream": "XHTML" + } + }, + "tests/resources/test_open2.xml": { + "": { + "file": "[error]", + "stream": "[error]" + }, + ".cbz": { + "file": "[error]", + "stream": "[error]" + }, + ".doc": { + "file": "[error]", + "stream": "[error]" + }, + ".docx": { + "file": "[error]", + "stream": "[error]" + }, + ".epub": { + "file": "[error]", + "stream": "[error]" + }, + ".fb2": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".html": { + "file": "HTML5", + "stream": "HTML5" + }, + ".jpg": { + "file": "Image", + "stream": "Image" + }, + ".mobi": { + "file": "[error]", + "stream": "[error]" + }, + ".pdf": { + "file": "[error]", + "stream": "[error]" + }, + ".svg": { + "file": "SVG", + "stream": "SVG" + }, + ".txt": { + "file": "Text", + "stream": "Text" + }, + ".xhtml": { + "file": "XHTML", + "stream": "XHTML" + }, + ".xml": { + "file": "FictionBook2", + "stream": "FictionBook2" + }, + ".xps": { + "file": "[error]", + "stream": "[error]" + } + }, + "tests/resources/test_open2.xps": { + "": { + "file": "XPS", + "stream": "XPS" + }, + ".cbz": { + "file": "zip", + "stream": "zip" + }, + ".doc": { + "file": "XPS", + "stream": "XPS" + }, + ".docx": { + "file": "XPS", + "stream": "XPS" + }, + ".epub": { + "file": "XPS", + "stream": "XPS" + }, + ".fb2": { + "file": "XPS", + "stream": "XPS" + }, + ".html": { + "file": "XPS", + "stream": "XPS" + }, + ".jpg": { + "file": "XPS", + "stream": "XPS" + }, + ".mobi": { + "file": "XPS", + "stream": "XPS" + }, + ".pdf": { + "file": "XPS", + "stream": "XPS" + }, + ".svg": { + "file": "XPS", + "stream": "XPS" + }, + ".txt": { + "file": "XPS", + "stream": "XPS" + }, + ".xhtml": { + "file": "XPS", + "stream": "XPS" + }, + ".xml": { + "file": "XPS", + "stream": "XPS" + }, + ".xps": { + "file": "XPS", + "stream": "XPS" + } + } +} \ No newline at end of file diff --git a/tests/resources/test_toc_count.pdf b/tests/resources/test_toc_count.pdf new file mode 100644 index 000000000..fda13889d Binary files /dev/null and b/tests/resources/test_toc_count.pdf differ diff --git a/tests/resources/text-find-ligatures.pdf b/tests/resources/text-find-ligatures.pdf new file mode 100644 index 000000000..40c8e688f Binary files /dev/null and b/tests/resources/text-find-ligatures.pdf differ diff --git a/tests/run_compound.py b/tests/run_compound.py new file mode 100755 index 000000000..927b721e3 --- /dev/null +++ b/tests/run_compound.py @@ -0,0 +1,139 @@ +#! /usr/bin/env python3 + +''' +Runs a command using different implementations of PyMuPDF: + +1. Run with rebased implementation of PyMuPDF. + +2. As 1 but also set PYMUPDF_USE_EXTRA=0 to disable use of C++ optimisations. + +Example usage: + + ./PyMuPDF/tests/run_compound.py python -m pytest -s PyMuPDF + +Use `-i ` to select which implementations to use. In +``, `r` means rebased, `R` means rebased without +optimisations. + +For example use the rebased and unoptimised rebased implementations with: + + ./PyMuPDF/tests/run_compound.py python -m pytest -s PyMuPDF +''' + +import shlex +import os +import platform +import subprocess +import sys +import textwrap +import time + + +def log(text): + print(textwrap.indent(text, 'PyMuPDF:tests/run_compound.py: ')) + sys.stdout.flush() + + +def log_star(text): + log('#' * 40) + log(text) + log('#' * 40) + + +def main(): + + implementations = 'rR' + timeout = None + i = 1 + while i < len(sys.argv): + arg = sys.argv[i] + if arg == '-i': + i += 1 + implementations = sys.argv[i] + elif arg == '-t': + i += 1 + timeout = float(sys.argv[i]) + elif arg.startswith('-'): + raise Exception(f'Unrecognised {arg=}.') + else: + break + i += 1 + args = sys.argv[i:] + + e_rebased = None + e_rebased_unoptimised = None + + endtime = None + if timeout: + endtime = time.time() + timeout + + # Check `implementations`. + implementations_seen = set() + for i in implementations: + assert i not in implementations_seen, f'Duplicate implementation {i!r} in {implementations!r}.' + if i == 'r': + name = 'rebased' + elif i == 'R': + name = 'rebased (unoptimised)' + else: + assert 0, f'Unrecognised implementation {i!r} in {implementations!r}.' + log(f' {i!r}: will run with PyMuPDF {name}.') + implementations_seen.add(i) + + for i in implementations: + log(f'run_compound.py: {i=}') + + cpu_bits = int.bit_length(sys.maxsize+1) + log(f'{os.getcwd()=}') + log(f'{platform.machine()=}') + log(f'{platform.platform()=}') + log(f'{platform.python_version()=}') + log(f'{platform.system()=}') + if sys.implementation.name != 'graalpy': + log(f'{platform.uname()=}') + log(f'{sys.executable=}') + log(f'{sys.version=}') + log(f'{sys.version_info=}') + log(f'{list(sys.version_info)=}') + log(f'{cpu_bits=}') + + timeout = None + if endtime: + timeout = max(0, endtime - time.time()) + if i == 'r': + + # Run with default `pymupdf` (rebased). + # + log_star( f'Running using pymupdf (rebased): {shlex.join(args)}') + e_rebased = subprocess.run( args, shell=0, check=0, timeout=timeout).returncode + + elif i == 'R': + + # Run with `pymupdf` (rebased) again, this time with PYMUPDF_USE_EXTRA=0. + # + env = os.environ.copy() + env[ 'PYMUPDF_USE_EXTRA'] = '0' + log_star(f'Running using pymupdf (rebased) with PYMUPDF_USE_EXTRA=0: {shlex.join(args)}') + e_rebased_unoptimised = subprocess.run( args, shell=0, check=0, env=env, timeout=timeout).returncode + + else: + raise Exception(f'Unrecognised implementation {i!r}.') + + if e_rebased is not None: + log(f'{e_rebased=}') + if e_rebased_unoptimised is not None: + log(f'{e_rebased_unoptimised=}') + + if e_rebased or e_rebased_unoptimised: + log('Test(s) failed.') + return 1 + + +if __name__ == '__main__': + try: + sys.exit(main()) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: + # Terminate relatively quietly, failed commands will usually have + # generated diagnostics. + log(str(e)) + sys.exit(1) diff --git a/tests/test_2548.py b/tests/test_2548.py new file mode 100644 index 000000000..f3a2b2db5 --- /dev/null +++ b/tests/test_2548.py @@ -0,0 +1,42 @@ +import os + +import pymupdf + +root = os.path.abspath(f'{__file__}/../..') + +def test_2548(): + """Text extraction should fail because of PDF structure cycle. + + Old MuPDF version did not detect the loop. + """ + print(f'test_2548(): {pymupdf.mupdf_version_tuple=}') + pymupdf.TOOLS.mupdf_warnings(reset=True) + doc = pymupdf.open(f'{root}/tests/resources/test_2548.pdf') + e = False + for page in doc: + try: + _ = page.get_text() + except Exception as ee: + print(f'test_2548: {ee=}') + if hasattr(pymupdf, 'mupdf'): + # Rebased. + expected = "RuntimeError('code=2: cycle in structure tree')" + else: + # Classic. + expected = "RuntimeError('cycle in structure tree')" + assert repr(ee) == expected, f'Expected {expected=} but got {repr(ee)=}.' + e = True + wt = pymupdf.TOOLS.mupdf_warnings() + print(f'test_2548(): {wt=}') + + # This checks that PyMuPDF 1.23.7 fixes this bug, and also that earlier + # versions with updated MuPDF also fix the bug. + rebased = hasattr(pymupdf, 'mupdf') + if pymupdf.mupdf_version_tuple >= (1, 27): + expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing' + expected = '\n'.join([expected] * 5) + else: + expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing' + if rebased: + assert wt == expected, f'expected:\n {expected!r}\nwt:\n {wt!r}\n' + assert not e diff --git a/tests/test_2634.py b/tests/test_2634.py new file mode 100644 index 000000000..7d96cb21c --- /dev/null +++ b/tests/test_2634.py @@ -0,0 +1,65 @@ +import pymupdf + +import difflib +import json +import os +import pprint + + +def test_2634(): + if not hasattr(pymupdf, 'mupdf'): + print('test_2634(): Not running on classic.') + return + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2634.pdf') + with pymupdf.open(path) as pdf, pymupdf.open() as new: + new.insert_pdf(pdf) + new.set_toc(pdf.get_toc(simple=False)) + toc_pdf = pdf.get_toc(simple=False) + toc_new = new.get_toc(simple=False) + + def clear_xref(toc): + ''' + Clear toc items that naturally differ. + ''' + for item in toc: + d = item[3] + if 'collapse' in d: + d['collapse'] = 'dummy' + if 'xref' in d: + d['xref'] = 'dummy' + + clear_xref(toc_pdf) + clear_xref(toc_new) + + print('toc_pdf') + for item in toc_pdf: print(item) + print() + print('toc_new') + for item in toc_new: print(item) + + toc_text_pdf = pprint.pformat(toc_pdf, indent=4).split('\n') + toc_text_new = pprint.pformat(toc_new, indent=4).split('\n') + + diff = difflib.unified_diff( + toc_text_pdf, + toc_text_new, + lineterm='', + ) + print('\n'.join(diff)) + + # Check 'to' points are identical apart from rounding errors. + # + assert len(toc_new) == len(toc_pdf) + for a, b in zip(toc_pdf, toc_new): + a_dict = a[3] + b_dict = b[3] + if 'to' in a_dict: + assert 'to' in b_dict + a_to = a_dict['to'] + b_to = b_dict['to'] + assert isinstance(a_to, pymupdf.Point) + assert isinstance(b_to, pymupdf.Point) + if a_to != b_to: + print(f'Points not identical: {a_to=} {b_to=}.') + assert abs(a_to.x - b_to.x) < 0.01 + assert abs(a_to.y - b_to.y) < 0.01 diff --git a/tests/test_2904.py b/tests/test_2904.py new file mode 100644 index 000000000..21b55fb18 --- /dev/null +++ b/tests/test_2904.py @@ -0,0 +1,40 @@ +import pymupdf + +import os +import sys + +def test_2904(): + print(f'test_2904(): {pymupdf.mupdf_version_tuple=}.') + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2904.pdf') + pdf_docs = pymupdf.open(path) + for page_id, page in enumerate(pdf_docs): + page_imgs = page.get_images() + for i, img in enumerate(page_imgs): + if page_id == 5: + #print(f'{page_id=} {i=} {type(img)=} {img=}') + sys.stdout.flush() + e = None + try: + recs = page.get_image_rects(img, transform=True) + except Exception as ee: + print(f'Exception: {page_id=} {i=} {img=}: {ee}') + if 0 and hasattr(pymupdf, 'mupdf'): + print(f'pymupdf.exception_info:') + pymupdf.exception_info() + sys.stdout.flush() + e = ee + if page_id == 5: + print(f'{pymupdf.mupdf_version_tuple=}: {page_id=} {i=} {e=} {img=}:') + if page_id == 5 and i==3: + assert e + if hasattr(pymupdf, 'mupdf'): + # rebased. + assert str(e) == 'code=8: Failed to read JPX header' + else: + # classic + assert str(e) == 'Failed to read JPX header' + else: + assert not e + + # Clear warnings, as we will have generated many. + pymupdf.TOOLS.mupdf_warnings() diff --git a/tests/test_2907.py b/tests/test_2907.py new file mode 100644 index 000000000..58346be6b --- /dev/null +++ b/tests/test_2907.py @@ -0,0 +1,18 @@ +import pymupdf + +import os.path +import pathlib + +def test_2907(): + # This test is for a bug in classic 'segfault trying to call clean_contents + # on certain pdfs with python 3.12', which we are not going to fix. + if not hasattr(pymupdf, 'mupdf'): + print('test_2907(): not running on classic because known to fail.') + return + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2907.pdf') + pdf_file = pathlib.Path(path).read_bytes() + fitz_document = pymupdf.open(stream=pdf_file, filetype="application/pdf") + + pdf_pages = list(fitz_document.pages()) + (page,) = pdf_pages + page.clean_contents() diff --git a/tests/test_4141.py b/tests/test_4141.py new file mode 100644 index 000000000..bd13988f6 --- /dev/null +++ b/tests/test_4141.py @@ -0,0 +1,19 @@ +import pymupdf + +import os.path + + +def test_4141(): + """survive missing /Resources object in a number of cases.""" + path = os.path.abspath(f"{__file__}/../../tests/resources/test_4141.pdf") + doc = pymupdf.open(path) + page = doc[0] + # make sure the right test file + assert doc.xref_get_key(page.xref, "Resources") == ("null", "null") + page.insert_htmlbox((100, 100, 200, 200), "Hallo") # will fail without the fix + doc.close() + doc = pymupdf.open(doc.name) + page = doc[0] + tw = pymupdf.TextWriter(page.rect) + tw.append((100, 100), "Hallo") + tw.write_text(page) # will fail without the fix diff --git a/tests/test_4466.pdf b/tests/test_4466.pdf new file mode 100644 index 000000000..782c1bead Binary files /dev/null and b/tests/test_4466.pdf differ diff --git a/tests/test_4503.py b/tests/test_4503.py new file mode 100644 index 000000000..fb1b449d0 --- /dev/null +++ b/tests/test_4503.py @@ -0,0 +1,38 @@ +""" +Test for issue #4503 in pymupdf: +Correct recognition of strikeout and underline styles in text spans. +""" + +import os +import pymupdf +from pymupdf import mupdf + +STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT +UNDERLINE = mupdf.FZ_STEXT_UNDERLINE + + +def test_4503(): + """ + Check that the text span with the specified text has the correct styling: + strikeout, but no underline. + Previously, the text was broken in multiple spans with span breaks at + every space. and some parts were not detected as strikeout at all. + """ + scriptdir = os.path.dirname(os.path.abspath(__file__)) + text = "the right to request the state to review and, if appropriate," + filename = os.path.join(scriptdir, "resources", "test-4503.pdf") + doc = pymupdf.open(filename) + page = doc[0] + flags = pymupdf.TEXT_ACCURATE_BBOXES | pymupdf.TEXT_COLLECT_STYLES + spans = [ + s + for b in page.get_text("dict", flags=flags)["blocks"] + for l in b["lines"] + for s in l["spans"] + if s["text"] == text + ] + assert spans, "No spans found with the specified text" + span = spans[0] + + assert span["char_flags"] & STRIKEOUT + assert not span["char_flags"] & UNDERLINE diff --git a/tests/test_4505.py b/tests/test_4505.py new file mode 100644 index 000000000..9c1fdc9f2 --- /dev/null +++ b/tests/test_4505.py @@ -0,0 +1,27 @@ +import pymupdf +import os.path + + +def test_4505(): + """Copy field flags to Parent widget and all of its kids.""" + path = os.path.abspath(f"{__file__}/../../tests/resources/test_4505.pdf") + doc = pymupdf.open(path) + page = doc[0] + text1_flags_before = {} + text1_flags_after = {} + # extract all widgets having the same field name + for w in page.widgets(): + if w.field_name != "text_1": + continue + text1_flags_before[w.xref] = w.field_flags + # expected exiting field flags + assert text1_flags_before == {8: 1, 10: 0, 33: 0} + w = page.load_widget(8) # first of these widgets + # give all connected widgets that field flags value + w.update(sync_flags=True) + # confirm that all connected widgets have the same field flags + for w in page.widgets(): + if w.field_name != "text_1": + continue + text1_flags_after[w.xref] = w.field_flags + assert text1_flags_after == {8: 1, 10: 1, 33: 1} diff --git a/tests/test_4520.py b/tests/test_4520.py new file mode 100644 index 000000000..81e7d2559 --- /dev/null +++ b/tests/test_4520.py @@ -0,0 +1,11 @@ +import pymupdf + + +def test_4520(): + """Accept source pages without /Contents object in show_pdf_page.""" + tar = pymupdf.open() + src = pymupdf.open() + src.new_page() + page = tar.new_page() + xref = page.show_pdf_page(page.rect, src, 0) + assert xref diff --git a/tests/test_4614.py b/tests/test_4614.py new file mode 100644 index 000000000..d3b466cb6 --- /dev/null +++ b/tests/test_4614.py @@ -0,0 +1,10 @@ +import pymupdf +import os + + +def test_4614(): + script_dir = os.path.dirname(__file__) + filename = os.path.join(script_dir, "resources", "test_4614.pdf") + src = pymupdf.open(filename) + doc = pymupdf.open() + doc.insert_pdf(src) diff --git a/tests/test_4716.py b/tests/test_4716.py new file mode 100644 index 000000000..09692277a --- /dev/null +++ b/tests/test_4716.py @@ -0,0 +1,15 @@ +import pymupdf +import os + +def test_4716(): + """Confirm that ZERO WIDTH JOINER will never start a word.""" + script_dir = os.path.dirname(__file__) + filename = os.path.join(script_dir, "resources", "test_4716.pdf") + doc = pymupdf.open(filename) + expected = set(["+25.00", "Любимый", "-10.00"]) + word_text = set() + for page in doc: + words = page.get_text("words") + for w in words: + word_text.add(w[4]) + assert word_text == expected diff --git a/tests/test_annots.py b/tests/test_annots.py index 320d735ef..834d85cdc 100644 --- a/tests/test_annots.py +++ b/tests/test_annots.py @@ -2,24 +2,28 @@ """ Test PDF annotation insertions. """ -import fitz + import os +import platform + +import pymupdf +import gentle_compare -fitz.TOOLS.set_annot_stem("jorj") red = (1, 0, 0) blue = (0, 0, 1) gold = (1, 1, 0) green = (0, 1, 0) +scriptdir = os.path.dirname(__file__) -displ = fitz.Rect(0, 50, 0, 50) -r = fitz.Rect(72, 72, 220, 100) -t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!" -rect = fitz.Rect(100, 100, 200, 200) +displ = pymupdf.Rect(0, 50, 0, 50) +r = pymupdf.Rect(72, 72, 220, 100) +t1 = "têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!" +rect = pymupdf.Rect(100, 100, 200, 200) def test_caret(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_caret_annot(rect.tl) assert annot.type == (14, "Caret") @@ -29,7 +33,7 @@ def test_caret(): def test_freetext(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_freetext_annot( rect, @@ -38,7 +42,7 @@ def test_freetext(): rotate=90, text_color=blue, fill_color=gold, - align=fitz.TEXT_ALIGN_CENTER, + align=pymupdf.TEXT_ALIGN_CENTER, ) annot.set_border(width=0.3, dashes=[2]) annot.update(text_color=blue, fill_color=gold) @@ -46,35 +50,35 @@ def test_freetext(): def test_text(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_text_annot(r.tl, t1) assert annot.type == (0, "Text") def test_highlight(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_highlight_annot(rect) assert annot.type == (8, "Highlight") def test_underline(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_underline_annot(rect) assert annot.type == (9, "Underline") def test_squiggly(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_squiggly_annot(rect) assert annot.type == (10, "Squiggly") def test_strikeout(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_strikeout_annot(rect) assert annot.type == (11, "StrikeOut") @@ -82,10 +86,10 @@ def test_strikeout(): def test_polyline(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() rect = page.rect + (100, 36, -100, -36) - cell = fitz.make_table(rect, rows=10) + cell = pymupdf.make_table(rect, rows=10) for i in range(10): annot = page.add_polyline_annot((cell[i][0].bl, cell[i][0].br)) annot.set_line_ends(i, i) @@ -96,17 +100,17 @@ def test_polyline(): def test_polygon(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_polygon_annot([rect.bl, rect.tr, rect.br, rect.tl]) assert annot.type == (6, "Polygon") def test_line(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() rect = page.rect + (100, 36, -100, -36) - cell = fitz.make_table(rect, rows=10) + cell = pymupdf.make_table(rect, rows=10) for i in range(10): annot = page.add_line_annot(cell[i][0].bl, cell[i][0].br) annot.set_line_ends(i, i) @@ -117,40 +121,49 @@ def test_line(): def test_square(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_rect_annot(rect) assert annot.type == (4, "Square") def test_circle(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_circle_annot(rect) assert annot.type == (5, "Circle") def test_fileattachment(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_file_annot(rect.tl, b"just anything for testing", "testdata.txt") assert annot.type == (17, "FileAttachment") def test_stamp(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - annot = page.add_stamp_annot(r, stamp=10) + annot = page.add_stamp_annot(r, stamp=0) assert annot.type == (13, "Stamp") + assert annot.info["content"] == "Approved" annot_id = annot.info["id"] annot_xref = annot.xref - a1 = page.load_annot(annot_id) - a2 = page.load_annot(annot_xref) + page.load_annot(annot_id) + page.load_annot(annot_xref) page = doc.reload_page(page) -def test_redact(): - doc = fitz.open() +def test_image_stamp(): + doc = pymupdf.open() + page = doc.new_page() + filename = os.path.join(scriptdir, "resources", "nur-ruhig.jpg") + annot = page.add_stamp_annot(r, stamp=filename) + assert annot.info["content"] == "Image Stamp" + + +def test_redact1(): + doc = pymupdf.open() page = doc.new_page() annot = page.add_redact_annot(r, text="Hello") annot.update( @@ -158,8 +171,7 @@ def test_redact(): rotate=-1, ) assert annot.type == (12, "Redact") - x = annot._get_redact_values() - pix = annot.get_pixmap() + annot.get_pixmap() info = annot.info annot.set_info(info) assert not annot.has_popup @@ -168,29 +180,93 @@ def test_redact(): assert s == r page.apply_redactions() + +def test_redact2(): + """Test for keeping text and removing graphics.""" + if not hasattr(pymupdf, "mupdf"): + print("Not executing 'test_redact2' in classic") + return + filename = os.path.join(scriptdir, "resources", "symbol-list.pdf") + doc = pymupdf.open(filename) + page = doc[0] + all_text0 = page.get_text("words") + page.add_redact_annot(page.rect) + page.apply_redactions(text=1) + t = page.get_text("words") + assert t == all_text0 + assert not page.get_drawings() + + +def test_redact3(): + """Test for removing text and graphics.""" + if not hasattr(pymupdf, "mupdf"): + print("Not executing 'test_redact3' in classic") + return + filename = os.path.join(scriptdir, "resources", "symbol-list.pdf") + doc = pymupdf.open(filename) + page = doc[0] + page.add_redact_annot(page.rect) + page.apply_redactions() + assert not page.get_text("words") + assert not page.get_drawings() + + +def test_redact4(): + """Test for removing text and keeping graphics.""" + if not hasattr(pymupdf, "mupdf"): + print("Not executing 'test_redact4' in classic") + return + filename = os.path.join(scriptdir, "resources", "symbol-list.pdf") + doc = pymupdf.open(filename) + page = doc[0] + line_art = page.get_drawings() + page.add_redact_annot(page.rect) + page.apply_redactions(graphics=0) + assert not page.get_text("words") + assert line_art == page.get_drawings() + + def test_1645(): ''' Test fix for #1645. ''' - path_in = os.path.abspath( f'{__file__}/../resources/symbol-list.pdf') - if fitz.mupdf_version_tuple[:2] >= (1, 22): - path_expected = os.path.abspath( f'{__file__}/../resources/test_1645_expected_1.22.pdf') - else: - path_expected = os.path.abspath( f'{__file__}/../resources/test_1645_expected.pdf') - path_out = os.path.abspath( f'{__file__}/../test_1645_out.pdf') - doc = fitz.open(path_in) - page = doc[0] - page_bounds = page.bound() - annot_loc = fitz.Rect(page_bounds.x0, page_bounds.y0, page_bounds.x0 + 75, page_bounds.y0 + 15) - page.add_freetext_annot(annot_loc * page.derotation_matrix, "TEST", fontsize=18, - fill_color=fitz.utils.getColor("FIREBRICK1"), rotate=page.rotation) - doc.save(path_out, garbage=1, deflate=True, no_new_id=True) - print(f'Have created {path_out}. comparing with {path_expected}.') - with open( path_out, 'rb') as f: - out = f.read() - with open( path_expected, 'rb') as f: - expected = f.read() - assert out == expected, f'Files differ: {path_out} {path_expected}' + # The expected output files assume annot_stem is 'jorj'. We need to always + # restore this before returning (this is checked by conftest.py). + annot_stem = pymupdf.JM_annot_id_stem + pymupdf.TOOLS.set_annot_stem('jorj') + try: + path_in = os.path.abspath( f'{__file__}/../resources/symbol-list.pdf') + if pymupdf.mupdf_version_tuple >= (1, 27): + path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected-after-1.27.0.pdf') + else: + path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected.pdf') + path_out = os.path.abspath( f'{__file__}/../test_1645_out.pdf') + doc = pymupdf.open(path_in) + page = doc[0] + page_bounds = page.bound() + annot_loc = pymupdf.Rect(page_bounds.x0, page_bounds.y0, page_bounds.x0 + 75, page_bounds.y0 + 15) + # Check type of page.derotation_matrix - this is #2911. + assert isinstance(page.derotation_matrix, pymupdf.Matrix), \ + f'Bad type for page.derotation_matrix: {type(page.derotation_matrix)=} {page.derotation_matrix=}.' + page.add_freetext_annot( + annot_loc * page.derotation_matrix, + "TEST", + fontsize=18, + fill_color=pymupdf.utils.getColor("FIREBRICK1"), + rotate=page.rotation, + ) + doc.save(path_out, garbage=1, deflate=True, no_new_id=True) + print(f'Have created {path_out}. comparing with {path_expected}.') + with pymupdf.open(path_expected) as doc_expected, pymupdf.open(path_out) as doc_out: + rms = gentle_compare.pixmaps_rms( + doc_expected[0].get_pixmap(), + doc_out[0].get_pixmap(), + ) + print(f'test_1645: {rms=}') + assert rms < 0.1, f'Pixmaps differ: {path_expected=} {path_out=}' + finally: + # Restore annot_stem. + pymupdf.TOOLS.set_annot_stem(annot_stem) def test_1824(): ''' @@ -198,7 +274,7 @@ def test_1824(): transparent image. ''' path = os.path.abspath( f'{__file__}/../resources/test_1824.pdf') - doc=fitz.open(path) + doc=pymupdf.open(path) page=doc[0] page.apply_redactions() @@ -206,11 +282,12 @@ def test_2270(): ''' https://github.com/pymupdf/PyMuPDF/issues/2270 ''' - path = os.path.abspath( f'{__file__}/../resources/test_2270.pdf') - with fitz.open(path) as document: + path = os.path.abspath( f'{__file__}/../../tests/resources/test_2270.pdf') + with pymupdf.open(path) as document: for page_number, page in enumerate(document): - for textBox in page.annots(types=(fitz.PDF_ANNOT_FREE_TEXT,fitz.PDF_ANNOT_TEXT)): + for textBox in page.annots(types=(pymupdf.PDF_ANNOT_FREE_TEXT,pymupdf.PDF_ANNOT_TEXT)): print("textBox.type :", textBox.type) + print(f"{textBox.rect=}") print("textBox.get_text('words') : ", textBox.get_text('words')) print("textBox.get_text('text') : ", textBox.get_text('text')) print("textBox.get_textbox(textBox.rect) : ", textBox.get_textbox(textBox.rect)) @@ -221,3 +298,386 @@ def test_2270(): assert textBox.get_textbox(textBox.rect) == 'abc123' assert textBox.info['content'] == 'abc123' + # Additional check that Annot.get_textpage() returns a + # TextPage that works with page.get_text() - prior to + # 2024-01-30 the TextPage had no `.parent` member. + textpage = textBox.get_textpage() + text = page.get_text() + print(f'{text=}') + text = page.get_text(textpage=textpage) + print(f'{text=}') + print(f'{getattr(textpage, "parent")=}') + + if pymupdf.mupdf_version_tuple >= (1, 26): + # Check Annotation.get_textpage()'s arg. + clip = textBox.rect + clip.x1 = clip.x0 + (clip.x1 - clip.x0) / 3 + textpage2 = textBox.get_textpage(clip=clip) + text = textpage2.extractText() + print(f'With {clip=}: {text=}') + assert text == 'ab\n' + else: + assert not hasattr(pymupdf.mupdf, 'FZ_STEXT_CLIP_RECT') + + +def test_2934_add_redact_annot(): + ''' + Test fix for bug mentioned in #2934. + ''' + path = os.path.abspath(f'{__file__}/../../tests/resources/mupdf_explored.pdf') + with open(path, 'rb') as f: + data = f.read() + doc = pymupdf.Document(stream=data) + print(f'Is PDF: {doc.is_pdf}') + print(f'Number of pages: {doc.page_count}') + + import json + page=doc[0] + page_json_str =doc[0].get_text("json") + page_json_data = json.loads(page_json_str) + span=page_json_data.get("blocks")[0].get("lines")[0].get("spans")[0] + page.add_redact_annot(span["bbox"], text="") + page.apply_redactions() + +def test_2969(): + ''' + https://github.com/pymupdf/PyMuPDF/issues/2969 + ''' + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2969.pdf') + doc = pymupdf.open(path) + page = doc[0] + first_annot = list(page.annots())[0] + first_annot.next + +def test_file_info(): + path = os.path.abspath(f'{__file__}/../../tests/resources/test_annot_file_info.pdf') + document = pymupdf.open(path) + results = list() + for i, page in enumerate(document): + print(f'{i=}') + annotations = page.annots() + for j, annotation in enumerate(annotations): + print(f'{j=} {annotation=}') + t = annotation.type + print(f'{t=}') + if t[0] == pymupdf.PDF_ANNOT_FILE_ATTACHMENT: + file_info = annotation.file_info + print(f'{file_info=}') + results.append(file_info) + assert results == [ + {'filename': 'example.pdf', 'description': '', 'length': 8416, 'size': 8992}, + {'filename': 'photo1.jpeg', 'description': '', 'length': 10154, 'size': 8012}, + ] + +def test_3131(): + doc = pymupdf.open() + page = doc.new_page() + + page.add_line_annot((0, 0), (1, 1)) + page.add_line_annot((1, 0), (0, 1)) + + first_annot, _ = page.annots() + first_annot.next.type + +def test_3209(): + pdf = pymupdf.Document(filetype="pdf") + page = pdf.new_page() + page.add_ink_annot([[(300,300), (400, 380), (350, 350)]]) + n = 0 + for annot in page.annots(): + n += 1 + assert annot.vertices == [[(300.0, 300.0), (400.0, 380.0), (350.0, 350.0)]] + assert n == 1 + path = os.path.abspath(f'{__file__}/../../tests/test_3209_out.pdf') + pdf.save(path) # Check the output PDF that the annotation is correctly drawn + +def test_3863(): + path_in = os.path.normpath(f'{__file__}/../../tests/resources/test_3863.pdf') + path_out = os.path.normpath(f'{__file__}/../../tests/test_3863.pdf.pdf') + + # Create redacted PDF. + print(f'Loading {path_in=}.') + with pymupdf.open(path_in) as document: + + for num, page in enumerate(document): + print(f"Page {num + 1} - {page.rect}:") + + for image in page.get_images(full=True): + print(f" - Image: {image}") + + redact_rect = page.rect + + if page.rotation in (90, 270): + redact_rect = pymupdf.Rect(0, 0, page.rect.height, page.rect.width) + + page.add_redact_annot(redact_rect) + page.apply_redactions(images=pymupdf.PDF_REDACT_IMAGE_NONE) + + print(f'Writing to {path_out=}.') + document.save(path_out) + + with pymupdf.open(path_out) as document: + assert len(document) == 8 + + # Create PNG for each page of redacted PDF. + for num, page in enumerate(document): + path_png = f'{path_out}.{num}.png' + pixmap = page.get_pixmap() + print(f'Writing to {path_png=}.') + pixmap.save(path_png) + # Compare with expected png. + + print(f'Comparing page PNGs with expected PNGs.') + for num, _ in enumerate(document): + path_png = f'{path_out}.{num}.png' + path_png_expected = f'{path_in}.pdf.{num}.png' + print(f'{path_png=}.') + print(f'{path_png_expected=}.') + rms = gentle_compare.pixmaps_rms(path_png, path_png_expected, ' ') + # We get small differences in sysinstall tests, where some + # thirdparty libraries can differ. + assert rms < 1 + +def test_3758(): + # This test requires input file that is not public, so is usually not + # available. + path = os.path.normpath(f'{__file__}/../../../test_3758.pdf') + if not os.path.exists(path): + print(f'test_3758(): not running because does not exist: {path=}.') + return + import json + with pymupdf.open(path) as document: + for page in document: + info = json.loads(page.get_text('json', flags=pymupdf.TEXTFLAGS_TEXT)) + for block_ind, block in enumerate(info['blocks']): + for line_ind, line in enumerate(block['lines']): + for span_ind, span in enumerate(line['spans']): + # print(span) + page.add_redact_annot(pymupdf.Rect(*span['bbox'])) + page.apply_redactions() + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt + + +def test_parent(): + """Test invalidating parent on page re-assignment.""" + doc = pymupdf.open() + page = doc.new_page() + a = page.add_highlight_annot(page.rect) # insert annotation on page 0 + page = doc.new_page() # make a new page, should orphanate annotation + try: + print(a) # should raise + except Exception as e: + if platform.system() == 'OpenBSD': + assert isinstance(e, pymupdf.mupdf.FzErrorBase), f'Incorrect {type(e)=}.' + else: + assert isinstance(e, pymupdf.mupdf.FzErrorArgument), f'Incorrect {type(e)=}.' + assert str(e) == 'code=4: annotation not bound to any page', f'Incorrect error text {str(e)=}.' + else: + assert 0, f'Failed to get expected exception.' + +def test_4047(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4047.pdf') + with pymupdf.open(path) as document: + page = document[0] + fontname = page.get_fonts()[0][3] + if fontname not in pymupdf.Base14_fontnames: + fontname = "Courier" + hits = page.search_for("|") + for rect in hits: + page.add_redact_annot( + rect, " ", fontname=fontname, align=pymupdf.TEXT_ALIGN_CENTER, fontsize=10 + ) # Segmentation Fault... + page.apply_redactions() + +def test_4079(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4079.pdf') + if pymupdf.mupdf_version_tuple >= (1, 25, 5): + path_after = os.path.normpath(f'{__file__}/../../tests/resources/test_4079_after.pdf') + else: + # 2024-11-27 Expect incorrect behaviour. + path_after = os.path.normpath(f'{__file__}/../../tests/resources/test_4079_after_1.25.pdf') + + path_out = os.path.normpath(f'{__file__}/../../tests/test_4079_out') + with pymupdf.open(path_after) as document_after: + page = document_after[0] + pixmap_after_expected = page.get_pixmap() + with pymupdf.open(path) as document: + page = document[0] + rects = [ + [164,213,282,227], + [282,213,397,233], + [434,209,525,243], + [169,228,231,243], + [377,592,440,607], + [373,611,444,626], + ] + for rect in rects: + page.add_redact_annot(rect, fill=(1,0,0)) + page.draw_rect(rect, color=(0, 1, 0)) + document.save(f'{path_out}_before.pdf') + page.apply_redactions(images=0) + pixmap_after = page.get_pixmap() + document.save(f'{path_out}_after.pdf') + rms = gentle_compare.pixmaps_rms(pixmap_after_expected, pixmap_after) + diff = gentle_compare.pixmaps_diff(pixmap_after_expected, pixmap_after) + path = os.path.normpath(f'{__file__}/../../tests/test_4079_diff.png') + diff.save(path) + print(f'{rms=}') + assert rms == 0 + +def test_4254(): + """Ensure that both annotations are fully created + + We do this by asserting equal top-used colors in respective pixmaps. + """ + doc = pymupdf.open() + page = doc.new_page() + + rect = pymupdf.Rect(100, 100, 200, 150) + annot = page.add_freetext_annot(rect, "Test Annotation from minimal example") + annot.set_border(width=1, dashes=(3, 3)) + annot.set_opacity(0.5) + try: + annot.set_colors(stroke=(1, 0, 0)) + except ValueError as e: + assert 'cannot be used for FreeText annotations' in str(e), f'{e}' + else: + assert 0 + annot.update() + + rect = pymupdf.Rect(200, 200, 400, 400) + annot2 = page.add_freetext_annot(rect, "Test Annotation from minimal example pt 2") + annot2.set_border(width=1, dashes=(3, 3)) + annot2.set_opacity(0.5) + try: + annot2.set_colors(stroke=(1, 0, 0)) + except ValueError as e: + assert 'cannot be used for FreeText annotations' in str(e), f'{e}' + else: + assert 0 + annot.update() + annot2.update() + + # stores top color for each pixmap + top_colors = set() + for annot in page.annots(): + pix = annot.get_pixmap() + top_colors.add(pix.color_topusage()[1]) + + # only one color must exist + assert len(top_colors) == 1 + +def test_richtext(): + """Test creation of rich text FreeText annotations. + + We create the same annotation on different pages in different ways, + with and without using Annotation.update(), and then assert equality + of the respective images. + """ + ds = """font-size: 11pt; font-family: sans-serif;""" + bullet = chr(0x2610) + chr(0x2611) + chr(0x2612) + text = f"""

+ PyMuPDF འདི་ ཡིག་ཆ་བཀྲམ་སྤེལ་གྱི་དོན་ལུ་ པའི་ཐོན་ཐུམ་སྒྲིལ་དྲག་ཤོས་དང་མགྱོགས་ཤོས་ཅིག་ཨིན། + Here is some bold and italic text, followed by bold-italic. Text-based check boxes: {bullet}. +

""" + gold = (1, 1, 0) + doc = pymupdf.open() + + # First page. + page = doc.new_page() + rect = pymupdf.Rect(100, 100, 350, 200) + p2 = rect.tr + (50, 30) + p3 = p2 + (0, 30) + annot = page.add_freetext_annot( + rect, + text, + fill_color=gold, + opacity=0.5, + rotate=90, + border_width=1, + dashes=None, + richtext=True, + callout=(p3, p2, rect.tr), + ) + + pix1 = page.get_pixmap() + + # Second page. + # the annotation is created with minimal parameters, which are supplied + # in a separate call to the .update() method. + page = doc.new_page() + annot = page.add_freetext_annot( + rect, + text, + border_width=1, + dashes=None, + richtext=True, + callout=(p3, p2, rect.tr), + ) + annot.update(fill_color=gold, opacity=0.5, rotate=90) + pix2 = page.get_pixmap() + assert pix1.samples == pix2.samples + + +def test_4447(): + document = pymupdf.open() + + page = document.new_page() + + text_color = (1, 0, 0) + fill_color = (0, 1, 0) + border_color = (0, 0, 1) + + annot_rect = pymupdf.Rect(90.1, 486.73, 139.26, 499.46) + + try: + annot = page.add_freetext_annot( + annot_rect, + "AETERM", + fontname="Arial", + fontsize=10, + text_color=text_color, + fill_color=fill_color, + border_color=border_color, + border_width=1, + ) + except ValueError as e: + assert 'cannot set border_color if rich_text is False' in str(e), str(e) + else: + assert 0 + + try: + annot = page.add_freetext_annot( + (30, 400, 100, 450), + "Two", + fontname="Arial", + fontsize=10, + text_color=text_color, + fill_color=fill_color, + border_color=border_color, + border_width=1, + ) + except ValueError as e: + assert 'cannot set border_color if rich_text is False' in str(e), str(e) + else: + assert 0 + + annot = page.add_freetext_annot( + (30, 500, 100, 550), + "Three", + fontname="Arial", + fontsize=10, + text_color=text_color, + border_width=1, + ) + annot.update(text_color=text_color, fill_color=fill_color) + try: + annot.update(border_color=border_color) + except ValueError as e: + assert 'cannot set border_color if rich_text is False' in str(e), str(e) + else: + assert 0 + + path_out = os.path.normpath(f'{__file__}/../../tests/test_4447.pdf') + document.save(path_out) diff --git a/tests/test_badfonts.py b/tests/test_badfonts.py index dc55a83d4..6af55b938 100644 --- a/tests/test_badfonts.py +++ b/tests/test_badfonts.py @@ -3,13 +3,13 @@ """ import os -import fitz +import pymupdf def test_survive_names(): scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "has-bad-fonts.pdf") - doc = fitz.open(filename) + doc = pymupdf.open(filename) print("File '%s' uses the following fonts on page 0:" % doc.name) for f in doc.get_page_fonts(0): print(f) diff --git a/tests/test_balance_count.py b/tests/test_balance_count.py new file mode 100644 index 000000000..cfa56b82e --- /dev/null +++ b/tests/test_balance_count.py @@ -0,0 +1,52 @@ +import pymupdf + + +def test_q_count(): + """Testing graphics state balances and wrap_contents(). + + Take page's contents and generate various imbalanced graphics state + situations. Each time compare q-count with expected results. + Finally confirm we are out of balance using "is_wrapped", wrap the + contents object(s) via "wrap_contents()" and confirm success. + PDF commands "q" / "Q" stand for "push", respectively "pop". + """ + doc = pymupdf.open() + page = doc.new_page() + # the page has no /Contents objects at all yet. Create one causing + # an initial imbalance (so prepended "q" is needed) + pymupdf.TOOLS._insert_contents(page, b"Q", True) # append + assert page._count_q_balance() == (1, 0) + assert page.is_wrapped is False + + # Prepend more data that yield a different type of imbalanced contents: + # Although counts of q and Q are equal now, the unshielded 'cm' before + # the first 'q' makes the contents unusable for insertions. + pymupdf.TOOLS._insert_contents(page, b"1 0 0 -1 0 0 cm q ", False) # prepend + assert page.is_wrapped is False + if page._count_q_balance() == (0, 0): + print("imbalance undetected by q balance count") + + text = "Hello, World!" + page.insert_text((100, 100), text) # establishes balance! + + # this should have produced a balanced graphics state + assert page._count_q_balance() == (0, 0) + assert page.is_wrapped + + # an appended "pop" must be balanced by a prepended "push" + pymupdf.TOOLS._insert_contents(page, b"Q", True) # append + assert page._count_q_balance() == (1, 0) + + # a prepended "pop" yet needs another push + pymupdf.TOOLS._insert_contents(page, b"Q", False) # prepend + assert page._count_q_balance() == (2, 0) + + # an appended "push" needs an additional "pop" + pymupdf.TOOLS._insert_contents(page, b"q", True) # append + assert page._count_q_balance() == (2, 1) + + # wrapping the contents should yield a balanced state again + assert page.is_wrapped is False + page.wrap_contents() + assert page.is_wrapped is True + assert page._count_q_balance() == (0, 0) diff --git a/tests/test_barcode.py b/tests/test_barcode.py new file mode 100644 index 000000000..99682514a --- /dev/null +++ b/tests/test_barcode.py @@ -0,0 +1,63 @@ +import os + +import pymupdf + + +def test_barcode(): + if pymupdf.mupdf_version_tuple < (1, 26): + print(f'Not testing barcode because {pymupdf.mupdf_version=} < 1.26') + return + path = os.path.normpath(f'{__file__}/../../tests/test_barcode_out.pdf') + + url = 'http://artifex.com' + text_in = '012345678901' + text_out = '123456789012' + # Create empty document and add a qrcode image. + with pymupdf.Document() as document: + page = document.new_page() + + pixmap = pymupdf.mupdf.fz_new_barcode_pixmap( + pymupdf.mupdf.FZ_BARCODE_QRCODE, + url, + 512, + 4, # ec_level + 0, # quiet + 1, # hrt + ) + pixmap = pymupdf.Pixmap('raw', pixmap) + page.insert_image( + (0, 0, 100, 100), + pixmap=pixmap, + ) + pixmap = pymupdf.mupdf.fz_new_barcode_pixmap( + pymupdf.mupdf.FZ_BARCODE_EAN13, + text_in, + 512, + 4, # ec_level + 0, # quiet + 1, # hrt + ) + pixmap = pymupdf.Pixmap('raw', pixmap) + page.insert_image( + (0, 200, 100, 300), + pixmap=pixmap, + ) + + document.save(path) + + with pymupdf.open(path) as document: + page = document[0] + for i, ii in enumerate(page.get_images()): + xref = ii[0] + pixmap = pymupdf.Pixmap(document, xref) + hrt, barcode_type = pymupdf.mupdf.fz_decode_barcode_from_pixmap2( + pixmap.this, + 0, # rotate. + ) + print(f'{hrt=}') + if i == 0: + assert hrt == url + elif i == 1: + assert hrt == text_out + else: + assert 0 diff --git a/tests/test_clip_page.py b/tests/test_clip_page.py new file mode 100644 index 000000000..48e4ac597 --- /dev/null +++ b/tests/test_clip_page.py @@ -0,0 +1,37 @@ +""" +Test Page method clip_to_rect. +""" + +import os +import pymupdf + + +def test_clip(): + """ + Clip a Page to a rectangle and confirm that no text has survived + that is completely outside the rectangle.. + """ + scriptdir = os.path.dirname(os.path.abspath(__file__)) + rect = pymupdf.Rect(200, 200, 400, 500) + filename = os.path.join(scriptdir, "resources", "v110-changes.pdf") + doc = pymupdf.open(filename) + page = doc[0] + page.clip_to_rect(rect) # clip the page to the rectangle + # capture font warning message of MuPDF + assert pymupdf.TOOLS.mupdf_warnings() == "bogus font ascent/descent values (0 / 0)" + # extract all text characters and assert that each one + # has a non-empty intersection with the rectangle. + chars = [ + c + for b in page.get_text("rawdict")["blocks"] + for l in b["lines"] + for s in l["spans"] + for c in s["chars"] + ] + for char in chars: + bbox = pymupdf.Rect(char["bbox"]) + if bbox.is_empty: + continue + assert bbox.intersects( + rect + ), f"Character '{char['c']}' at {bbox} is outside of {rect}." diff --git a/tests/test_cluster_drawings.py b/tests/test_cluster_drawings.py new file mode 100644 index 000000000..08fe23214 --- /dev/null +++ b/tests/test_cluster_drawings.py @@ -0,0 +1,47 @@ +import os +import pymupdf + +scriptdir = os.path.dirname(__file__) + + +def test_cluster1(): + """Confirm correct identification of known examples.""" + if not hasattr(pymupdf, "mupdf"): + print("Not executing 'test_cluster1' in classic") + return + filename = os.path.join(scriptdir, "resources", "symbol-list.pdf") + doc = pymupdf.open(filename) + page = doc[0] + assert len(page.cluster_drawings()) == 10 + filename = os.path.join(scriptdir, "resources", "chinese-tables.pdf") + doc = pymupdf.open(filename) + page = doc[0] + assert len(page.cluster_drawings()) == 2 + + +def test_cluster2(): + """Join disjoint but neighbored drawings.""" + if not hasattr(pymupdf, "mupdf"): + print("Not executing 'test_cluster2' in classic") + return + doc = pymupdf.open() + page = doc.new_page() + r1 = pymupdf.Rect(100, 100, 200, 200) + r2 = pymupdf.Rect(203, 203, 400, 400) + page.draw_rect(r1) + page.draw_rect(r2) + assert page.cluster_drawings() == [r1 | r2] + + +def test_cluster3(): + """Confirm as separate if neighborhood threshold exceeded.""" + if not hasattr(pymupdf, "mupdf"): + print("Not executing 'test_cluster3' in classic") + return + doc = pymupdf.open() + page = doc.new_page() + r1 = pymupdf.Rect(100, 100, 200, 200) + r2 = pymupdf.Rect(204, 200, 400, 400) + page.draw_rect(r1) + page.draw_rect(r2) + assert page.cluster_drawings() == [r1, r2] diff --git a/tests/test_codespell.py b/tests/test_codespell.py new file mode 100644 index 000000000..5edcf61e1 --- /dev/null +++ b/tests/test_codespell.py @@ -0,0 +1,72 @@ +import pymupdf + +import os +import platform +import shlex +import subprocess +import sys +import textwrap + + +def test_codespell(): + ''' + Check rebased Python code with codespell. + ''' + if os.environ.get('PYODIDE_ROOT'): + print('test_codespell(): not running on Pyodide - cannot run child processes.') + return + + if not hasattr(pymupdf, 'mupdf'): + print('Not running codespell with classic implementation.') + return + + if platform.system() == 'Windows': + # Git commands seem to fail on Github Windows runners. + print(f'test_codespell(): Not running on Widows') + return + + root = os.path.abspath(f'{__file__}/../..') + + # For now we ignore files that we would ideally still look at, because it + # is difficult to exclude some text sections. + skips = textwrap.dedent(''' + *.pdf + docs/_static/prism/prism.js + docs/_static/prism/prism.js + docs/locales/ja/LC_MESSAGES/changes.po + docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.po + docs/locales/ + src_classic/* + ''') + skips = skips.strip().replace('\n', ',') + + command = textwrap.dedent(f''' + cd {root} && codespell + --skip {shlex.quote(skips)} + --ignore-words-list re-use,flate,thirdparty,re-using + --ignore-regex 'https?://[a-z0-9/_.]+' + --ignore-multiline-regex 'codespell:ignore-begin.*codespell:ignore-end' + ''') + + sys.path.append(root) + try: + import pipcl + finally: + del sys.path[0] + git_files = pipcl.git_items(root) + + for p in git_files: + _, ext = os.path.splitext(p) + if ext in ('.png', '.pdf', '.jpg', '.svg'): + pass + else: + command += f' {p}\n' + + if platform.system() != 'Windows': + command = command.replace('\n', ' \\\n') + # Don't print entire command because very long, and will be displayed + # anyway if there is an error. + #print(f'test_codespell(): Running: {command}') + print(f'Running codespell.') + subprocess.run(command, shell=1, check=1) + print('test_codespell(): codespell succeeded.') diff --git a/tests/test_crypting.py b/tests/test_crypting.py index 81b3a932d..dcb8ef2c4 100644 --- a/tests/test_crypting.py +++ b/tests/test_crypting.py @@ -3,21 +3,21 @@ * make a PDF with owber and user passwords * open and decrypt as owner or user """ -import fitz +import pymupdf def test_encryption(): text = "some secret information" # keep this data secret perm = int( - fitz.PDF_PERM_ACCESSIBILITY # always use this - | fitz.PDF_PERM_PRINT # permit printing - | fitz.PDF_PERM_COPY # permit copying - | fitz.PDF_PERM_ANNOTATE # permit annotations + pymupdf.PDF_PERM_ACCESSIBILITY # always use this + | pymupdf.PDF_PERM_PRINT # permit printing + | pymupdf.PDF_PERM_COPY # permit copying + | pymupdf.PDF_PERM_ANNOTATE # permit annotations ) owner_pass = "owner" # owner password user_pass = "user" # user password - encrypt_meth = fitz.PDF_ENCRYPT_AES_256 # strongest algorithm - doc = fitz.open() # empty pdf + encrypt_meth = pymupdf.PDF_ENCRYPT_AES_256 # strongest algorithm + doc = pymupdf.open() # empty pdf page = doc.new_page() # empty page page.insert_text((50, 72), text) # insert the data tobytes = doc.tobytes( @@ -27,13 +27,13 @@ def test_encryption(): permissions=perm, # set permissions ) doc.close() - doc = fitz.open("pdf", tobytes) + doc = pymupdf.open("pdf", tobytes) assert doc.needs_pass assert doc.is_encrypted rc = doc.authenticate("owner") assert rc == 4 assert not doc.is_encrypted doc.close() - doc = fitz.open("pdf", tobytes) + doc = pymupdf.open("pdf", tobytes) rc = doc.authenticate("user") assert rc == 2 diff --git a/tests/test_docs_samples.py b/tests/test_docs_samples.py index 1f48977ad..da3a3e8ea 100644 --- a/tests/test_docs_samples.py +++ b/tests/test_docs_samples.py @@ -21,6 +21,7 @@ ): print(f'Not testing: {p}') else: + p = os.path.relpath(p, root) samples.append(p) def _test_all(): @@ -29,10 +30,20 @@ def _test_all(): import sys e = 0 for sample in samples: - print( f'Running: {sample}') - sys.stdout.flush() + print( f'Running: {sample}', flush=1) try: - subprocess.check_call( f'{sys.executable} {sample}', shell=1, text=1) + if 0: + # Curiously this fails in an odd way when testing compound + # package with $PYTHONPATH set. + print( f'os.environ is:') + for n, v in os.environ.items(): + print( f' {n}: {v!r}') + command = f'{sys.executable} {sample}' + print( f'command is: {command!r}') + sys.stdout.flush() + subprocess.check_call( command, shell=1, text=1) + else: + runpy.run_path(sample) except Exception: print( f'Failed: {sample}') e += 1 @@ -44,4 +55,5 @@ def _test_all(): # @pytest.mark.parametrize('sample', samples) def test_docs_samples(sample): + sample = f'{root}/{sample}' runpy.run_path(sample) diff --git a/tests/test_drawings.py b/tests/test_drawings.py index e03ee36c9..1c2681b44 100644 --- a/tests/test_drawings.py +++ b/tests/test_drawings.py @@ -1,12 +1,13 @@ """ Extract drawings of a PDF page and compare with stored expected result. """ + import io import os import sys import pprint -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "symbol-list.pdf") @@ -15,7 +16,7 @@ def test_drawings1(): symbols_text = open(symbols).read() # expected result - doc = fitz.open(filename) + doc = pymupdf.open(filename) page = doc[0] paths = page.get_cdrawings() out = io.StringIO() # pprint output goes here @@ -25,10 +26,10 @@ def test_drawings1(): def test_drawings2(): delta = (0, 20, 0, 20) - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - r = fitz.Rect(100, 100, 200, 200) + r = pymupdf.Rect(100, 100, 200, 200) page.draw_circle(r.br, 2, color=0) r += delta @@ -59,109 +60,93 @@ def test_drawings2(): rects = [p["rect"] for p in page.get_cdrawings()] bboxes = [b[1] for b in page.get_bboxlog()] for i, r in enumerate(rects): - assert fitz.Rect(r) in fitz.Rect(bboxes[i]) + assert pymupdf.Rect(r) in pymupdf.Rect(bboxes[i]) def _dict_difference(a, b): - ''' - Returns `(keys_a, keys_b, key_values)`, information about differences - between dicts `a` and `b`. - - `keys_a` is the set of keys that are in `a` but not in `b`. - - `keys_b` is the set of keys that are in `b` but not in `a`. - - `key_values` is a dict with keys that are in both `a` and `b` but where the - values differ; the values in this dict are `(value_a, value_b)`. - ''' - keys_a = set() - keys_b = set() - key_values = dict() - for key in a: - if key not in b: - keys_a.add( key) - for key in b: - if key not in a: - keys_b.add( key) - for key, va in a.items(): - if key in b: - vb = b[key] - if va != vb: - key_values[key] = (va, vb) - return keys_a, keys_b, key_values + """ + Verifies that dictionaries "a", "b" + * have the same keys and values, except for key "items": + * the items list of "a" must be one shorter but otherwise equal the "b" items + Returns last item of b["items"]. + """ + assert a.keys() == b.keys() + for k in a.keys(): + v1 = a[k] + v2 = b[k] + if k != "items": + assert v1 == v2 + else: + assert v1 == v2[:-1] + rc = v2[-1] + return rc -def test_drawings3(): - doc = fitz.open() +def test_drawings3(): + doc = pymupdf.open() page1 = doc.new_page() shape1 = page1.new_shape() shape1.draw_line((10, 10), (10, 50)) shape1.draw_line((10, 50), (100, 100)) - shape1.finish(closePath=False, color=(0,0,0), width=5) + shape1.finish(closePath=False) shape1.commit() - drawings1 = list(page1.get_drawings()) + drawings1 = page1.get_drawings()[0] page2 = doc.new_page() shape2 = page2.new_shape() shape2.draw_line((10, 10), (10, 50)) shape2.draw_line((10, 50), (100, 100)) - shape2.finish(closePath=True, color=(0,0,0), width=5) + shape2.finish(closePath=True) shape2.commit() - drawings2 = list(page2.get_drawings()) + drawings2 = page2.get_drawings()[0] + + assert _dict_difference(drawings1, drawings2) == ("l", (100, 100), (10, 10)) page3 = doc.new_page() shape3 = page3.new_shape() shape3.draw_line((10, 10), (10, 50)) shape3.draw_line((10, 50), (100, 100)) shape3.draw_line((100, 100), (50, 70)) - shape3.finish(closePath=False, color=(0,0,0), width=5) + shape3.finish(closePath=False) shape3.commit() - drawings3 = list(page3.get_drawings()) + drawings3 = page3.get_drawings()[0] page4 = doc.new_page() shape4 = page4.new_shape() shape4.draw_line((10, 10), (10, 50)) shape4.draw_line((10, 50), (100, 100)) shape4.draw_line((100, 100), (50, 70)) - shape4.finish(closePath=True, color=(0,0,0), width=5) + shape4.finish(closePath=True) shape4.commit() - drawings4 = list(page4.get_drawings()) - - assert len(drawings1) == len(drawings2) == 1 - drawings1 = drawings1[0] - drawings2 = drawings2[0] - diff = _dict_difference( drawings1, drawings2) - assert diff == (set(), set(), {'closePath': (False, True)}) - - assert len(drawings3) == len(drawings4) == 1 - drawings3 = drawings3[0] - drawings4 = drawings4[0] - diff = _dict_difference( drawings3, drawings4) - assert diff == (set(), set(), {'closePath': (False, True)}) - + drawings4 = page4.get_drawings()[0] + + assert _dict_difference(drawings3, drawings4) == ("l", (50, 70), (10, 10)) + + def test_2365(): """Draw a filled rectangle on a new page. Then extract the page's vector graphics and confirm that only one path was generated which has all the right properties.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - rect = fitz.Rect(100, 100, 200, 200) + rect = pymupdf.Rect(100, 100, 200, 200) page.draw_rect( - rect, color=fitz.pdfcolor["black"], fill=fitz.pdfcolor["yellow"], width=3 + rect, color=pymupdf.pdfcolor["black"], fill=pymupdf.pdfcolor["yellow"], width=3 ) paths = page.get_drawings() assert len(paths) == 1 path = paths[0] assert path["type"] == "fs" - assert path["fill"] == fitz.pdfcolor["yellow"] + assert path["fill"] == pymupdf.pdfcolor["yellow"] assert path["fill_opacity"] == 1 - assert path["color"] == fitz.pdfcolor["black"] + assert path["color"] == pymupdf.pdfcolor["black"] assert path["stroke_opacity"] == 1 assert path["width"] == 3 assert path["rect"] == rect + def test_2462(): """ Assertion happens, if this code does NOT bring down the interpreter. @@ -172,6 +157,74 @@ def test_2462(): In order to correctly compute the "scissor" rectangle, we now keep track of the clipped object type. """ - doc = fitz.open(f"{scriptdir}/resources/test-2462.pdf") + doc = pymupdf.open(f"{scriptdir}/resources/test-2462.pdf") page = doc[0] vg = page.get_drawings(extended=True) + + +def test_2556(): + """Ensure that incomplete clip paths will be properly ignored.""" + doc = pymupdf.open() # new empty PDF + page = doc.new_page() # new page + # following contains an incomplete clip + c = b"q 50 697.6 400 100.0 re W n q 0 0 m W n Q " + xref = doc.get_new_xref() # prepare /Contents object for page + doc.update_object(xref, "<<>>") # new xref now is a dictionary + doc.update_stream(xref, c) # store drawing commands + page.set_contents(xref) # give the page this xref as /Contents + # following will bring down interpreter if fix not installed + assert page.get_drawings(extended=True) + + +def test_3207(): + """Example graphics with multiple "close path" commands within same path. + + The fix translates a close-path commands into an additional line + which connects the current point with a preceding "move" target. + The example page has 2 paths which each contain 2 close-path + commands after 2 normal "line" commands, i.e. 2 command sequences + "move-to, line-to, line-to, close-path". + This is converted into 3 connected lines, where the last end point + is connect to the start point of the first line. + So, in the sequence of lines / points + + (p0, p1), (p2, p3), (p4, p5), (p6, p7), (p8, p9), (p10, p11) + + point p5 must equal p0, and p11 must equal p6 (for each of the + two paths in the example). + """ + filename = os.path.join(scriptdir, "resources", "test-3207.pdf") + doc = pymupdf.open(filename) + page = doc[0] + paths = page.get_drawings() + assert len(paths) == 2 + + path0 = paths[0] + items = path0["items"] + assert len(items) == 6 + p0 = items[0][1] + p5 = items[2][2] + p6 = items[3][1] + p11 = items[5][2] + assert p0 == p5 + assert p6 == p11 + + path1 = paths[1] + items = path1["items"] + assert len(items) == 6 + p0 = items[0][1] + p5 = items[2][2] + p6 = items[3][1] + p11 = items[5][2] + assert p0 == p5 + assert p6 == p11 + + +def test_3591(): + """Confirm correct scaling factor for rotation matrices.""" + filename = os.path.join(scriptdir, "resources", "test-3591.pdf") + doc = pymupdf.open(filename) + page = doc[0] + paths = page.get_drawings() + for p in paths: + assert p["width"] == 15 diff --git a/tests/test_embeddedfiles.py b/tests/test_embeddedfiles.py index 613c9865d..f44a5acc9 100644 --- a/tests/test_embeddedfiles.py +++ b/tests/test_embeddedfiles.py @@ -1,11 +1,11 @@ """ Tests for PDF EmbeddedFiles functions. """ -import fitz +import pymupdf def test_embedded1(): - doc = fitz.open() + doc = pymupdf.open() buffer = b"123456678790qwexcvnmhofbnmfsdg4589754uiofjkb-" doc.embfile_add( "file1", @@ -21,4 +21,23 @@ def test_embedded1(): assert doc.embfile_info(0)["filename"] == "new-filename.txt" assert doc.embfile_get(0) == buffer doc.embfile_del(0) - assert doc.embfile_count() == 0 \ No newline at end of file + assert doc.embfile_count() == 0 + +def test_4050(): + with pymupdf.open() as document: + document.embfile_add('test', b'foobar', desc='some text') + d = document.embfile_info('test') + print(f'{d=}') + # Date is non-trivial to test for. + del d['creationDate'] + del d['modDate'] + assert d == { + 'name': 'test', + 'collection': 0, + 'filename': 'test', + 'ufilename': 'test', + 'description': 'some text', + 'size': 6, + 'length': 6, + } + diff --git a/tests/test_extractimage.py b/tests/test_extractimage.py index 4be817fee..f2942e8f0 100644 --- a/tests/test_extractimage.py +++ b/tests/test_extractimage.py @@ -2,7 +2,7 @@ Extract images from a PDF file, confirm number of images found. """ import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "joined.pdf") @@ -10,7 +10,7 @@ def test_extract_image(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) image_count = 1 for xref in range(1, doc.xref_length() - 1): @@ -25,16 +25,16 @@ def test_extract_image(): def test_2348(): pdf_path = f'{scriptdir}/test_2348.pdf' - document = fitz.open() + document = pymupdf.open() page = document.new_page(width=500, height=842) - rect = fitz.Rect(20, 20, 480, 820) + rect = pymupdf.Rect(20, 20, 480, 820) page.insert_image(rect, filename=f'{scriptdir}/resources/nur-ruhig.jpg') page = document.new_page(width=500, height=842) page.insert_image(rect, filename=f'{scriptdir}/resources/img-transparent.png') document.ez_save(pdf_path) document.close() - document = fitz.open(pdf_path) + document = pymupdf.open(pdf_path) page = document[0] imlist = page.get_images() image = document.extract_image(imlist[0][0]) @@ -48,3 +48,10 @@ def test_2348(): print(f'jpeg_extension={jpeg_extension!r} png_extension={png_extension!r}') assert jpeg_extension == 'jpeg' assert png_extension == 'png' + +def test_delete_image(): + + doc = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_delete_image.pdf')) + page = doc[0] + xref = page.get_images()[0][0] + page.delete_image(xref) diff --git a/tests/test_flake8.py b/tests/test_flake8.py new file mode 100644 index 000000000..8001cc77a --- /dev/null +++ b/tests/test_flake8.py @@ -0,0 +1,58 @@ +import pymupdf + +import os +import subprocess +import sys + + +def test_flake8(): + ''' + Check rebased Python code with flake8. + ''' + if os.environ.get('PYODIDE_ROOT'): + print('test_flake8(): not running on Pyodide - cannot run child processes.') + return + + if not hasattr(pymupdf, 'mupdf'): + print(f'Not running flake8 with classic implementation.') + return + ignores = ( + 'E123', # closing bracket does not match indentation of opening bracket's line + 'E124', # closing bracket does not match visual indentation + 'E126', # continuation line over-indented for hanging indent + 'E127', # continuation line over-indented for visual indent + 'E128', # continuation line under-indented for visual indent + 'E131', # continuation line unaligned for hanging indent + 'E201', # whitespace after '(' + 'E203', # whitespace before ':' + 'E221', # E221 multiple spaces before operator + 'E225', # missing whitespace around operator + 'E226', # missing whitespace around arithmetic operator + 'E231', # missing whitespace after ',' + 'E241', # multiple spaces after ':' + 'E251', # unexpected spaces around keyword / parameter equals + 'E252', # missing whitespace around parameter equals + 'E261', # at least two spaces before inline comment + 'E265', # block comment should start with '# ' + 'E271', # multiple spaces after keyword + 'E272', # multiple spaces before keyword + 'E302', # expected 2 blank lines, found 1 + 'E305', # expected 2 blank lines after class or function definition, found 1 + 'E306', # expected 1 blank line before a nested definition, found 0 + 'E402', # module level import not at top of file + 'E501', # line too long (80 > 79 characters) + 'E701', # multiple statements on one line (colon) + 'E741', # ambiguous variable name 'l' + 'F541', # f-string is missing placeholders + 'W293', # blank line contains whitespace + 'W503', # line break before binary operator + 'W504', # line break after binary operator + 'E731', # do not assign a lambda expression, use a def + ) + ignores = ','.join(ignores) + root = os.path.abspath(f'{__file__}/../..') + def run(command): + print(f'test_flake8(): Running: {command}') + subprocess.run(command, shell=1, check=1) + run(f'flake8 --ignore={ignores} --statistics {root}/src/__init__.py {root}/src/utils.py {root}/src/table.py') + print(f'test_flake8(): flake8 succeeded.') diff --git a/tests/test_font.py b/tests/test_font.py index 81d7fd3cf..0aa31a8c4 100644 --- a/tests/test_font.py +++ b/tests/test_font.py @@ -1,25 +1,340 @@ """ Tests for the Font class. """ -import fitz +import os +import platform +import pymupdf +import subprocess +import textwrap + +import util def test_font1(): text = "PyMuPDF" - font = fitz.Font("helv") + font = pymupdf.Font("helv") assert font.name == "Helvetica" tl = font.text_length(text, fontsize=20) cl = font.char_lengths(text, fontsize=20) assert len(text) == len(cl) - assert abs(sum(cl) - tl) < fitz.EPSILON + assert abs(sum(cl) - tl) < pymupdf.EPSILON for i in range(len(cl)): assert cl[i] == font.glyph_advance(ord(text[i])) * 20 - font2 = fitz.Font(fontbuffer=font.buffer) + font2 = pymupdf.Font(fontbuffer=font.buffer) + codepoints1 = font.valid_codepoints() + codepoints2 = font2.valid_codepoints() + print('') + print(f'{len(codepoints1)=}') + print(f'{len(codepoints2)=}') + if 0: + for i, (ucs1, ucs2) in enumerate(zip(codepoints1, codepoints2)): + print(f' {i}: {ucs1=} {ucs2=} {"" if ucs2==ucs2 else "*"}') assert font2.valid_codepoints() == font.valid_codepoints() + + # Also check we can get font's bbox. + bbox1 = font.bbox + print(f'{bbox1=}') + if hasattr(pymupdf, 'mupdf'): + bbox2 = font.this.fz_font_bbox() + assert bbox2 == bbox1 def test_font2(): """Old and new length computation must be the same.""" - font = fitz.Font("helv") + font = pymupdf.Font("helv") text = "PyMuPDF" - assert font.text_length(text) == fitz.get_text_length(text) \ No newline at end of file + assert font.text_length(text) == pymupdf.get_text_length(text) + + +def test_fontname(): + """Assert a valid PDF fontname.""" + doc = pymupdf.open() + page = doc.new_page() + assert page.insert_font() # assert: a valid fontname works! + detected = False # preset indicator + try: # fontname check will fail first - don't need a font at all here + page.insert_font(fontname="illegal/char", fontfile="unimportant") + except ValueError as e: + if str(e).startswith("bad fontname chars"): + detected = True # illegal fontname detected + assert detected + +def test_2608(): + flags = (pymupdf.TEXT_DEHYPHENATE | pymupdf.TEXT_MEDIABOX_CLIP) + with pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/2201.00069.pdf')) as doc: + page = doc[0] + blocks = page.get_text_blocks(flags=flags) + text = blocks[10][4] + with open(os.path.abspath(f'{__file__}/../../tests/test_2608_out'), 'wb') as f: + f.write(text.encode('utf8')) + path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_2608_expected') + path_expected_1_26 = os.path.normpath(f'{__file__}/../../tests/resources/test_2608_expected_1.26') + if pymupdf.mupdf_version_tuple >= (1, 27): + path_expected2 = path_expected + else: + path_expected2 = path_expected_1_26 + with open(path_expected2, 'rb') as f: + expected = f.read().decode('utf8') + # Github windows x32 seems to insert \r characters; maybe something to + # do with the Python installation's line endings settings. + expected = expected.replace('\r', '') + print(f'test_2608(): {text.encode("utf8")=}') + print(f'test_2608(): {expected.encode("utf8")=}') + assert text == expected + +def test_fontarchive(): + if os.environ.get('PYODIDE_ROOT'): + print('test_fontarchive(): not running on Pyodide - we get ValueError: No font code \'notos\' found in pymupdf-fonts..') + return + + import subprocess + arch = pymupdf.Archive() + css = pymupdf.css_for_pymupdf_font("notos", archive=arch, name="sans-serif") + print(css) + print(arch.entry_list) + assert arch.entry_list == \ + [ + { + 'fmt': 'tree', + 'entries': + [ + 'notosbo', 'notosbi', 'notosit', 'notos' + ], + 'path': None + } + ] + +def test_load_system_font(): + if not hasattr(pymupdf, 'mupdf'): + print(f'test_load_system_font(): Not running on classic.') + return + trace = list() + def font_f(name, bold, italic, needs_exact_metrics): + trace.append((name, bold, italic, needs_exact_metrics)) + #print(f'test_load_system_font():font_f(): Looking for font: {name=} {bold=} {italic=} {needs_exact_metrics=}.') + return None + def f_cjk(name, ordering, serif): + trace.append((name, ordering, serif)) + #print(f'test_load_system_font():f_cjk(): Looking for font: {name=} {ordering=} {serif=}.') + return None + def f_fallback(script, language, serif, bold, italic): + trace.append((script, language, serif, bold, italic)) + #print(f'test_load_system_font():f_fallback(): looking for font: {script=} {language=} {serif=} {bold=} {italic=}.') + return None + pymupdf.mupdf.fz_install_load_system_font_funcs(font_f, f_cjk, f_fallback) + f = pymupdf.mupdf.fz_load_system_font("some-font-name", 0, 0, 0) + assert trace == [ + ('some-font-name', 0, 0, 0), + ], f'Incorrect {trace=}.' + print(f'test_load_system_font(): {f.m_internal=}') + + +def test_mupdf_subset_fonts2(): + if not hasattr(pymupdf, 'mupdf'): + print('Not running on rebased.') + return + path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') + with pymupdf.open(path) as doc: + n = len(doc) + pages = [i*2 for i in range(n//2)] + print(f'{pages=}.') + pymupdf.mupdf.pdf_subset_fonts2(pymupdf._as_pdf_document(doc), pages) + + +def test_3677(): + pymupdf.TOOLS.set_subset_fontnames(True) + try: + path = os.path.abspath(f'{__file__}/../../tests/resources/test_3677.pdf') + font_names_expected = [ + 'BCDEEE+Aptos', + 'BCDFEE+Aptos', + 'BCDGEE+Calibri-Light', + 'BCDHEE+Calibri-Light', + ] + font_names = list() + with pymupdf.open(path) as document: + for page in document: + for block in page.get_text('dict')['blocks']: + if block['type'] == 0: + if 'lines' in block.keys(): + for line in block['lines']: + for span in line['spans']: + font_name=span['font'] + print(font_name) + font_names.append(font_name) + assert font_names == font_names_expected, f'{font_names=}' + finally: + pymupdf.TOOLS.set_subset_fontnames(False) + + +def test_3933(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3933.pdf') + with pymupdf.open(path) as document: + page = document[0] + print(f'{len(page.get_fonts())=}') + + expected = { + 'BCDEEE+Calibri': 39, + 'BCDFEE+SwissReSan-Regu': 53, + 'BCDGEE+SwissReSan-Ital': 20, + 'BCDHEE+SwissReSan-Bold': 20, + 'BCDIEE+SwissReSan-Regu': 53, + 'BCDJEE+Calibri': 39, + } + + for xref, _, _, name, _, _ in page.get_fonts(): + _, _, _, content = document.extract_font(xref) + + if content: + font = pymupdf.Font(fontname=name, fontbuffer=content) + supported_symbols = font.valid_codepoints() + print(f'Font {name}: {len(supported_symbols)=}.', flush=1) + assert len(supported_symbols) == expected.get(name) + + +def test_3780(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3780.pdf') + with pymupdf.open(path) as document: + for page_i, page in enumerate(document): + for itm in page.get_fonts(): + buff=document.extract_font(itm[0])[-1] + font=pymupdf.Font(fontbuffer=buff) + print(f'{page_i=}: xref {itm[0]} {font.name=} {font.ascender=} {font.descender=}.') + if page_i == 0: + d = page.get_text('dict') + #for n, v in d.items(): + # print(f' {n}: {v!r}') + for i, block in enumerate(d['blocks']): + print(f'block {i}:') + for j, line in enumerate(block['lines']): + print(f' line {j}:') + for k, span in enumerate(line['spans']): + print(f' span {k}:') + for n, v in span.items(): + print(f' {n}: {v!r}') + + +def test_3887(): + print(f'{pymupdf.version=}') + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3887.pdf') + + path2 = os.path.normpath(f'{__file__}/../../tests/resources/test_3887.pdf.ez.pdf') + with pymupdf.open(path) as document: + document.subset_fonts(fallback=False) + document.ez_save(path2) + + with pymupdf.open(path2) as document: + text = f"\u0391\u3001\u0392\u3001\u0393\u3001\u0394\u3001\u0395\u3001\u0396\u3001\u0397\u3001\u0398\u3001\u0399\u3001\u039a\u3001\u039b\u3001\u039c\u3001\u039d\u3001\u039e\u3001\u039f\u3001\u03a0\u3001\u03a1\u3001\u03a3\u3001\u03a4\u3001\u03a5\u3001\u03a6\u3001\u03a7\u3001\u03a8\u3001\u03a9\u3002\u03b1\u3001\u03b2\u3001\u03b3\u3001\u03b4\u3001\u03b5\u3001\u03b6\u3001\u03b7\u3001\u03b8\u3001\u03b9\u3001\u03ba\u3001\u03bb\u3001\u03bc\u3001\u03bd\u3001\u03be\u3001\u03bf\u3001\u03c0\u3001\u03c1\u3001\u03c2\u3001\u03c4\u3001\u03c5\u3001\u03c6\u3001\u03c7\u3001\u03c8\u3001\u03c9\u3002" + page = document[0] + chars = [c for b in page.get_text("rawdict",flags=0)["blocks"] for l in b["lines"] for s in l["spans"] for c in s["chars"]] + output = [c["c"] for c in chars] + print(f'text:\n {text}') + print(f'output:\n {output}') + pixmap = page.get_pixmap() + path_pixmap = f'{path}.0.png' + pixmap.save(path_pixmap) + print(f'Have saved to: {path_pixmap=}') + assert set(output)==set(text) + + +def test_4457(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4457(): not running on Pyodide - cannot run child processes.') + return + + print() + files = ( + ('https://github.com/user-attachments/files/20862923/test_4457_a.pdf', 'test_4457_a.pdf', None, 4), + ('https://github.com/user-attachments/files/20862922/test_4457_b.pdf', 'test_4457_b.pdf', None, 9), + ) + for url, name, size, rms_old_after_max in files: + path = util.download(url, name, size) + + with pymupdf.open(path) as document: + page = document[0] + + pixmap = document[0].get_pixmap() + path_pixmap = f'{path}.png' + pixmap.save(path_pixmap) + print(f'Have created: {path_pixmap=}') + + text = page.get_text() + path_before = f'{path}.before.pdf' + path_after = f'{path}.after.pdf' + document.ez_save(path_before, garbage=4) + print(f'Have created {path_before=}') + + document.subset_fonts() + document.ez_save(path_after, garbage=4) + print(f'Have created {path_after=}') + + with pymupdf.open(path_before) as document: + text_before = document[0].get_text() + pixmap_before = document[0].get_pixmap() + path_pixmap_before = f'{path_before}.png' + pixmap_before.save(path_pixmap_before) + print(f'Have created: {path_pixmap_before=}') + + with pymupdf.open(path_after) as document: + text_after = document[0].get_text() + pixmap_after = document[0].get_pixmap() + path_pixmap_after = f'{path_after}.png' + pixmap_after.save(path_pixmap_after) + print(f'Have created: {path_pixmap_after=}') + + import gentle_compare + rms_before = gentle_compare.pixmaps_rms(pixmap, pixmap_before) + rms_after = gentle_compare.pixmaps_rms(pixmap, pixmap_after) + print(f'{rms_before=}') + print(f'{rms_after=}') + + # Create .png file showing differences between and . + path_pixmap_after_diff = f'{path_after}.diff.png' + pixmap_after_diff = gentle_compare.pixmaps_diff(pixmap, pixmap_after) + pixmap_after_diff.save(path_pixmap_after_diff) + print(f'Have created: {path_pixmap_after_diff}') + + # Extract text from , and and write to + # files so we can show differences with `diff`. + path_text = os.path.normpath(f'{__file__}/../../tests/test_4457.txt') + path_text_before = f'{path_text}.before.txt' + path_text_after = f'{path_text}.after.txt' + with open(path_text, 'w', encoding='utf8') as f: + f.write(text) + with open(path_text_before, 'w', encoding='utf8') as f: + f.write(text_before) + with open(path_text_after, 'w', encoding='utf8') as f: + f.write(text_after) + + # Can't write text to stdout on Windows because of encoding errors. + if platform.system() != 'Windows': + print(f'text:\n{textwrap.indent(text, " ")}') + print(f'text_before:\n{textwrap.indent(text_before, " ")}') + print(f'text_after:\n{textwrap.indent(text_after, " ")}') + print(f'{path_text=}') + print(f'{path_text_before=}') + print(f'{path_text_after=}') + + command = f'diff -u {path_text} {path_text_before}' + print(f'Running: {command}', flush=1) + subprocess.run(command, shell=1) + + command = f'diff -u {path_text} {path_text_after}' + print(f'Running: {command}', flush=1) + subprocess.run(command, shell=1) + + assert text_before == text + assert rms_before == 0 + + if pymupdf.mupdf_version_tuple >= (1, 26, 6): + assert rms_after == 0 + else: + # As of 2025-05-20 there are some differences in some characters, + # e.g. the non-ascii characters in `Philipp Krahenbuhl`. See + # and . + assert abs(rms_after - rms_old_after_max) < 2 + + # Avoid test failure caused by mupdf warnings. + wt = pymupdf.TOOLS.mupdf_warnings() + print(f'{wt=}') + assert wt == 'bogus font ascent/descent values (0 / 0)\n... repeated 5 times...' diff --git a/tests/test_general.py b/tests/test_general.py index 4b037320b..6db125d98 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -7,51 +7,70 @@ import io import os -import fitz +import fnmatch +import json +import pymupdf +import pathlib +import pickle +import platform +import re +import shutil +import subprocess +import sys +import textwrap +import time +import util + +import gentle_compare scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "001003ED.pdf") def test_haslinks(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) assert doc.has_links() == False def test_hasannots(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) assert doc.has_annots() == False def test_haswidgets(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) assert doc.is_form_pdf == False def test_isrepaired(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) assert doc.is_repaired == False - fitz.TOOLS.mupdf_warnings() + pymupdf.TOOLS.mupdf_warnings() def test_isdirty(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) assert doc.is_dirty == False def test_cansaveincrementally(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) assert doc.can_save_incrementally() == True def test_iswrapped(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) page = doc[0] assert page.is_wrapped + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26, 0): + assert wt == 'bogus font ascent/descent values (0 / 0)' + else: + assert not wt def test_wrapcontents(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) page = doc[0] page.wrap_contents() xref = page.get_contents()[0] @@ -60,11 +79,18 @@ def test_wrapcontents(): page.set_contents(xref) assert len(page.get_contents()) == 1 page.clean_contents() + rebased = hasattr(pymupdf, 'mupdf') + if rebased: + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26, 0): + assert wt == 'bogus font ascent/descent values (0 / 0)\nPDF stream Length incorrect' + else: + assert wt == 'PDF stream Length incorrect' def test_page_clean_contents(): """Assert that page contents cleaning actually is invoked.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() # draw two rectangles - will lead to two /Contents objects @@ -81,12 +107,12 @@ def test_page_clean_contents(): def test_annot_clean_contents(): """Assert that annot contents cleaning actually is invoked.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot = page.add_highlight_annot((10, 10, 20, 20)) # the annotation appearance will not start with command b"q" - assert annot._getAP().startswith(b"q") == False + # invoke appearance stream cleaning and reformatting annot.clean_contents() @@ -96,71 +122,74 @@ def test_annot_clean_contents(): def test_config(): - assert fitz.TOOLS.fitz_config["py-memory"] in (True, False) + assert pymupdf.TOOLS.fitz_config["py-memory"] in (True, False) def test_glyphnames(): - name = "infinity" - infinity = fitz.glyph_name_to_unicode(name) - assert fitz.unicode_to_glyph_name(infinity) == name + name = "INFINITY" + infinity = pymupdf.glyph_name_to_unicode(name) + assert pymupdf.unicode_to_glyph_name(infinity) == name def test_rgbcodes(): sRGB = 0xFFFFFF - assert fitz.sRGB_to_pdf(sRGB) == (1, 1, 1) - assert fitz.sRGB_to_rgb(sRGB) == (255, 255, 255) + assert pymupdf.sRGB_to_pdf(sRGB) == (1, 1, 1) + assert pymupdf.sRGB_to_rgb(sRGB) == (255, 255, 255) def test_pdfstring(): - fitz.get_pdf_now() - fitz.get_pdf_str("Beijing, chinesisch 北京") - fitz.get_text_length("Beijing, chinesisch 北京", fontname="china-s") - fitz.get_pdf_str("Latin characters êßöäü") + pymupdf.get_pdf_now() + pymupdf.get_pdf_str("Beijing, chinesisch 北京") + pymupdf.get_text_length("Beijing, chinesisch 北京", fontname="china-s") + pymupdf.get_pdf_str("Latin characters êßöäü") def test_open_exceptions(): - try: - doc = fitz.open(filename, filetype="xps") - except RuntimeError as e: - assert repr(e).startswith("FileDataError") + path = os.path.normpath(f'{__file__}/../../tests/resources/001003ED.pdf') + doc = pymupdf.open(path, filetype="xps") + assert 'PDF' in doc.metadata["format"] - try: - doc = fitz.open(filename, filetype="xxx") - except Exception as e: - assert repr(e).startswith("ValueError") + doc = pymupdf.open(path, filetype="xxx") + assert 'PDF' in doc.metadata["format"] try: - doc = fitz.open("x.y") + pymupdf.open("x.y") except Exception as e: assert repr(e).startswith("FileNotFoundError") + else: + assert 0 try: - doc = fitz.open("pdf", b"") + pymupdf.open(stream=b"", filetype="pdf") except RuntimeError as e: - assert repr(e).startswith("EmptyFileError") + assert repr(e).startswith("EmptyFileError"), f'{repr(e)=}' + else: + print(f'{doc.metadata["format"]=}') + assert 0 def test_bug1945(): - pdf = fitz.open(f'{scriptdir}/resources/bug1945.pdf') + pdf = pymupdf.open(f'{scriptdir}/resources/bug1945.pdf') buffer_ = io.BytesIO() pdf.save(buffer_, clean=True) def test_bug1971(): for _ in range(2): - doc = fitz.Document(f'{scriptdir}/resources/bug1971.pdf') + doc = pymupdf.Document(f'{scriptdir}/resources/bug1971.pdf') page = next(doc.pages()) page.get_drawings() doc.close() + assert doc.is_closed def test_default_font(): - f = fitz.Font() + f = pymupdf.Font() assert str(f) == "Font('Noto Serif Regular')" assert repr(f) == "Font('Noto Serif Regular')" def test_add_ink_annot(): import math - document = fitz.Document() + document = pymupdf.Document() page = document.new_page() line1 = [] line2 = [] @@ -179,22 +208,22 @@ def test_add_ink_annot(): print( f'Have saved to: path={path!r}') def test_techwriter_append(): - print(fitz.__doc__) - doc = fitz.open() + print(pymupdf.__doc__) + doc = pymupdf.open() page = doc.new_page() - tw = fitz.TextWriter(page.rect) + tw = pymupdf.TextWriter(page.rect) text = "Red rectangle = TextWriter.text_rect, blue circle = .last_point" r = tw.append((100, 100), text) print(f'r={r!r}') tw.write_text(page) - page.draw_rect(tw.text_rect, color=fitz.pdfcolor["red"]) - page.draw_circle(tw.last_point, 2, color=fitz.pdfcolor["blue"]) + page.draw_rect(tw.text_rect, color=pymupdf.pdfcolor["red"]) + page.draw_circle(tw.last_point, 2, color=pymupdf.pdfcolor["blue"]) path = f"{scriptdir}/resources/test_techwriter_append.pdf" doc.ez_save(path) print( f'Have saved to: {path}') def test_opacity(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() annot1 = page.add_circle_annot((50, 50, 100, 100)) @@ -212,26 +241,31 @@ def test_opacity(): def test_get_text_dict(): import json - doc=fitz.open(f'{scriptdir}/resources/v110-changes.pdf') + doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf') page=doc[0] blocks=page.get_text("dict")["blocks"] # Check no opaque types in `blocks`. json.dumps( blocks, indent=4) + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26, 0): + assert wt == 'bogus font ascent/descent values (0 / 0)' + else: + assert not wt def test_font(): - font = fitz.Font() + font = pymupdf.Font() print(repr(font)) bbox = font.glyph_bbox( 65) print( f'bbox={bbox!r}') def test_insert_font(): - doc=fitz.open(f'{scriptdir}/resources/v110-changes.pdf') + doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf') page = doc[0] i = page.insert_font() print( f'page.insert_font() => {i}') def test_2173(): - from fitz import IRect, Pixmap, CS_RGB, Colorspace + from pymupdf import IRect, Pixmap, CS_RGB, Colorspace for i in range( 100): #print( f'i={i!r}') image = Pixmap(Colorspace(CS_RGB), IRect(0, 0, 13, 37)) @@ -239,7 +273,7 @@ def test_2173(): def test_texttrace(): import time - document = fitz.Document( f'{scriptdir}/resources/joined.pdf') + document = pymupdf.Document( f'{scriptdir}/resources/joined.pdf') t = time.time() for page in document: tt = page.get_texttrace() @@ -249,7 +283,7 @@ def test_texttrace(): # Repeat, this time writing data to file. import json path = f'{scriptdir}/resources/test_texttrace.txt' - print( f'Writing to: {path}') + print( f'test_texttrace(): Writing to: {path}') with open( path, 'w') as f: for i, page in enumerate(document): tt = page.get_texttrace() @@ -262,23 +296,46 @@ def test_2533(): Search for a unique char on page and confirm that page.get_texttrace() returns the same bbox as the search method. """ - doc = fitz.open(os.path.join(scriptdir, "resources", "test_2533.pdf")) - page = doc[0] - NEEDLE = "民" - ord_NEEDLE = ord(NEEDLE) - for span in page.get_texttrace(): - for char in span["chars"]: - if char[0] == ord_NEEDLE: - bbox = fitz.Rect(char[3]) - break - assert page.search_for(NEEDLE)[0] == bbox + if hasattr(pymupdf, 'mupdf') and not pymupdf.g_use_extra: + print('Not running test_2533() because rebased with use_extra=0 known to fail') + return + pymupdf.TOOLS.set_small_glyph_heights(True) + try: + doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2533.pdf")) + page = doc[0] + NEEDLE = "民" + ord_NEEDLE = ord(NEEDLE) + for span in page.get_texttrace(): + for char in span["chars"]: + if char[0] == ord_NEEDLE: + bbox = pymupdf.Rect(char[3]) + break + bbox2 = page.search_for(NEEDLE)[0] + assert bbox2 == bbox, f'{bbox=} {bbox2=} {bbox2-bbox=}.' + finally: + pymupdf.TOOLS.set_small_glyph_heights(False) + + +def test_2645(): + """Assert same font size calculation in corner cases. + """ + folder = os.path.join(scriptdir, "resources") + files = ("test_2645_1.pdf", "test_2645_2.pdf", "test_2645_3.pdf") + for f in files: + doc = pymupdf.open(os.path.join(folder, f)) + page = doc[0] + fontsize0 = page.get_texttrace()[0]["size"] + fontsize1 = page.get_text("dict", flags=pymupdf.TEXTFLAGS_TEXT)["blocks"][0]["lines"][ + 0 + ]["spans"][0]["size"] + assert abs(fontsize0 - fontsize1) < 1e-5 def test_2506(): """Ensure expected font size across text writing angles.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - point = fitz.Point(100, 300) # insertion point + point = pymupdf.Point(100, 300) # insertion point fontsize = 11 # fontsize text = "Hello" # text angles = (0, 30, 60, 90, 120) # some angles @@ -286,7 +343,7 @@ def test_2506(): # write text with different angles for angle in angles: page.insert_text( - point, text, fontsize=fontsize, morph=(point, fitz.Matrix(angle)) + point, text, fontsize=fontsize, morph=(point, pymupdf.Matrix(angle)) ) # ensure correct fontsize for get_texttrace() - forgiving rounding problems @@ -304,7 +361,7 @@ def test_2506(): def test_2108(): - doc = fitz.open(f'{scriptdir}/resources/test_2108.pdf') + doc = pymupdf.open(f'{scriptdir}/resources/test_2108.pdf') page = doc[0] areas = page.search_for("{sig}") rect = areas[0] @@ -339,8 +396,8 @@ def test_2108(): print(f'') - print(f'fitz.mupdf_version_tuple={fitz.mupdf_version_tuple}') - if fitz.mupdf_version_tuple >= (1, 21, 2): + print(f'{pymupdf.mupdf_version_tuple=}') + if pymupdf.mupdf_version_tuple >= (1, 21, 2): print('Asserting text==text_expected') assert text == text_expected else: @@ -350,10 +407,21 @@ def test_2108(): def test_2238(): filepath = f'{scriptdir}/resources/test2238.pdf' - doc = fitz.open(filepath) - - first_page = doc.load_page(0).get_text('text', fitz.INFINITE_RECT()) - last_page = doc.load_page(-1).get_text('text', fitz.INFINITE_RECT()) + doc = pymupdf.open(filepath) + rebased = hasattr(pymupdf, 'mupdf') + if rebased: + wt = pymupdf.TOOLS.mupdf_warnings() + wt_expected = '' + if pymupdf.mupdf_version_tuple >= (1, 26): + wt_expected += 'garbage bytes before version marker\n' + wt_expected += 'syntax error: expected \'obj\' keyword (6 0 ?)\n' + else: + wt_expected += 'format error: cannot recognize version marker\n' + wt_expected += 'trying to repair broken xref\n' + wt_expected += 'repairing PDF document' + assert wt == wt_expected, f'{wt=}' + first_page = doc.load_page(0).get_text('text', clip=pymupdf.INFINITE_RECT()) + last_page = doc.load_page(-1).get_text('text', clip=pymupdf.INFINITE_RECT()) print(f'first_page={first_page!r}') print(f'last_page={last_page!r}') @@ -370,7 +438,11 @@ def test_2238(): def test_2093(): - doc = fitz.open(f'{scriptdir}/resources/test2093.pdf') + if platform.python_implementation() == 'GraalVM': + print(f'test_2093(): Not running because slow on GraalVM.') + return + + doc = pymupdf.open(f'{scriptdir}/resources/test2093.pdf') def average_color(page): pixmap = page.get_pixmap() @@ -397,16 +469,16 @@ def average_color(page): x1 = rx + rw y1 = ry + rh - rect = fitz.Rect(x0, y0, x1, y1) + rect = pymupdf.Rect(x0, y0, x1, y1) - font = fitz.Font("Helvetica") + font = pymupdf.Font("Helvetica") fill_color=(0,0,0) page.add_redact_annot( quad=rect, #text="null", fontname=font.name, fontsize=12, - align=fitz.TEXT_ALIGN_CENTER, + align=pymupdf.TEXT_ALIGN_CENTER, fill=fill_color, text_color=(1,1,1), ) @@ -417,17 +489,16 @@ def average_color(page): print(f'pixel_average_before={pixel_average_before!r}') print(f'pixel_average_after={pixel_average_after!r}') - # Before this bug was fixed: + # Before this bug was fixed (MuPDF-1.22): # pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174] # pixel_average_after=[138.68844553555772, 123.05687162237561, 100.74275056194105] # After fix: # pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174] # pixel_average_after=[130.8889209934799, 115.25722751837269, 92.94327384463327] # - if fitz.mupdf_version_tuple[:2] >= (1, 22): - for i in range(len(pixel_average_before)): - diff = pixel_average_before[i] - pixel_average_after[i] - assert abs(diff) < 0.1 + for i in range(len(pixel_average_before)): + diff = pixel_average_before[i] - pixel_average_after[i] + assert abs(diff) < 0.1 out = f'{scriptdir}/resources/test2093-out.pdf' doc.save(out) @@ -436,7 +507,7 @@ def average_color(page): def test_2182(): print(f'test_2182() started') - doc = fitz.open(f'{scriptdir}/resources/test2182.pdf') + doc = pymupdf.open(f'{scriptdir}/resources/test2182.pdf') page = doc[0] for annot in page.annots(): print(annot) @@ -471,13 +542,13 @@ def bbox_count(rot): """ # bboxes of spans on page: same text positions are represented by ONE bbox bboxes = set() - doc = fitz.open() + doc = pymupdf.open() # prepare a page with desired MediaBox / CropBox peculiarities - mediabox = fitz.paper_rect("letter") + mediabox = pymupdf.paper_rect("letter") page = doc.new_page(width=mediabox.width, height=mediabox.height) xref = page.xref newmbox = list(map(float, doc.xref_get_key(xref, "MediaBox")[1][1:-1].split())) - newmbox = fitz.Rect(newmbox) + newmbox = pymupdf.Rect(newmbox) mbox = newmbox + (10, 20, 10, 20) cbox = mbox + (10, 10, -10, -10) doc.xref_set_key(xref, "MediaBox", "[%g %g %g %g]" % tuple(mbox)) @@ -485,7 +556,7 @@ def bbox_count(rot): # set page to desired rotation page.set_rotation(rot) page.insert_text((50, 50), "Text inserted at (50,50)") - tw = fitz.TextWriter(page.rect) + tw = pymupdf.TextWriter(page.rect) tw.append((50, 50), "Text inserted at (50,50)") tw.write_text(page) blocks = page.get_text("dict")["blocks"] @@ -493,7 +564,7 @@ def bbox_count(rot): for l in b["lines"]: for s in l["spans"]: # store bbox rounded to 3 decimal places - bboxes.add(fitz.Rect(fitz.JM_TUPLE3(s["bbox"]))) + bboxes.add(pymupdf.Rect(pymupdf.JM_TUPLE3(s["bbox"]))) return len(bboxes) # should be 1! # the following tests must all pass @@ -505,6 +576,1612 @@ def bbox_count(rot): def test_2430(): """Confirm that multiple font property checks will not destroy Py_None.""" - font = fitz.Font("helv") + font = pymupdf.Font("helv") for i in range(1000): _ = font.flags + +def test_2692(): + document = pymupdf.Document(f'{scriptdir}/resources/2.pdf') + for page in document: + pix = page.get_pixmap(clip=pymupdf.Rect(0,0,10,10)) + dl = page.get_displaylist(annots=True) + pix = dl.get_pixmap( + matrix=pymupdf.Identity, + colorspace=pymupdf.csRGB, + alpha=False, + clip=pymupdf.Rect(0,0,10,10), + ) + pix = dl.get_pixmap( + matrix=pymupdf.Identity, + #colorspace=pymupdf.csRGB, + alpha=False, + clip=pymupdf.Rect(0,0,10,10), + ) + + +def test_2596(): + """Confirm correctly abandoning cache when reloading a page.""" + if platform.python_implementation() == 'GraalVM': + print(f'test_2596(): not running on Graal.') + return + doc = pymupdf.Document(f"{scriptdir}/resources/test_2596.pdf") + page = doc[0] + pix0 = page.get_pixmap() # render the page + _ = doc.tobytes(garbage=3) # save with garbage collection + + # Note this will invalidate cache content for this page. + # Reloading the page now empties the cache, so rendering + # will deliver the same pixmap + page = doc.reload_page(page) + pix1 = page.get_pixmap() + assert pix1.samples == pix0.samples + rebased = hasattr(pymupdf, 'mupdf') + if pymupdf.mupdf_version_tuple < (1, 26, 6): + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'too many indirections (possible indirection cycle involving 24 0 R)' + + +def test_2730(): + """Ensure identical output across text extractions.""" + doc = pymupdf.open(f"{scriptdir}/resources/test_2730.pdf") + page = doc[0] + s1 = set(page.get_text()) # plain text extraction + s2 = set(page.get_text(sort=True)) # uses "blocks" extraction + s3 = set(page.get_textbox(page.rect)) + assert s1 == s2 + assert s1 == s3 + + +def test_2553(): + """Ensure identical output across text extractions.""" + verbose = 0 + doc = pymupdf.open(f"{scriptdir}/resources/test_2553.pdf") + page = doc[0] + + # extract plain text, build set of all characters + list1 = page.get_text() + set1 = set(list1) + + # extract text blocks, build set of all characters + list2 = page.get_text(sort=True) # internally uses "blocks" + set2 = set(list2) + + # extract textbox content, build set of all characters + list3 = page.get_textbox(page.rect) + set3 = set(list3) + + def show(l): + ret = f'len={len(l)}\n' + for c in l: + cc = ord(c) + if (cc >= 32 and cc < 127) or c == '\n': + ret += c + else: + ret += f' [0x{hex(cc)}]' + return ret + + if verbose: + print(f'list1:\n{show(list1)}') + print(f'list2:\n{show(list2)}') + print(f'list3:\n{show(list3)}') + + # all sets must be equal + assert set1 == set2 + assert set1 == set3 + + # With mupdf later than 1.23.4, this special page contains no invalid + # Unicodes. + # + print(f'Checking no occurrence of 0xFFFD, {pymupdf.mupdf_version_tuple=}.') + assert chr(0xFFFD) not in set1 + +def test_2553_2(): + doc = pymupdf.open(f"{scriptdir}/resources/test_2553-2.pdf") + page = doc[0] + + # extract plain text, ensure that there are no 0xFFFD characters + text = page.get_text() + assert chr(0xfffd) not in text + +def test_2635(): + """Rendering a page before and after cleaning it should yield the same pixmap.""" + doc = pymupdf.open(f"{scriptdir}/resources/test_2635.pdf") + page = doc[0] + pix1 = page.get_pixmap() # pixmap before cleaning + + page.clean_contents() # clean page + pix2 = page.get_pixmap() # pixmap after cleaning + assert pix1.samples == pix2.samples # assert equality + + +def test_resolve_names(): + """Test PDF name resolution.""" + # guard against wrong PyMuPDF architecture version + if not hasattr(pymupdf.Document, "resolve_names"): + print("PyMuPDF version does not support resolving PDF names") + return + pickle_in = open(f"{scriptdir}/resources/cython.pickle", "rb") + old_names = pickle.load(pickle_in) + doc = pymupdf.open(f"{scriptdir}/resources/cython.pdf") + new_names = doc.resolve_names() + assert new_names == old_names + +def test_2777(): + document = pymupdf.Document() + page = document.new_page() + print(page.mediabox.width) + +def test_2710(): + doc = pymupdf.open(f'{scriptdir}/resources/test_2710.pdf') + page = doc.load_page(0) + + print(f'test_2710(): {page.cropbox=}') + print(f'test_2710(): {page.mediabox=}') + print(f'test_2710(): {page.rect=}') + + def numbers_approx_eq(a, b): + return abs(a-b) < 0.001 + def points_approx_eq(a, b): + return numbers_approx_eq(a.x, b.x) and numbers_approx_eq(a.y, b.y) + def rects_approx_eq(a, b): + return points_approx_eq(a.bottom_left, b.bottom_left) and points_approx_eq(a.top_right, b.top_right) + def assert_rects_approx_eq(a, b): + assert rects_approx_eq(a, b), f'Not nearly identical: {a=} {b=}' + + blocks = page.get_text('blocks') + print(f'test_2710(): {blocks=}') + assert len(blocks) == 2 + block = blocks[1] + rect = pymupdf.Rect(block[:4]) + text = block[4] + print(f'test_2710(): {rect=}') + print(f'test_2710(): {text=}') + assert text == 'Text at left page border\n' + + assert_rects_approx_eq(page.cropbox, pymupdf.Rect(30.0, 30.0, 565.3200073242188, 811.9199829101562)) + assert_rects_approx_eq(page.mediabox, pymupdf.Rect(0.0, 0.0, 595.3200073242188, 841.9199829101562)) + print(f'test_2710(): {pymupdf.mupdf_version_tuple=}') + # 2023-11-05: Currently broken in mupdf master. + print(f'test_2710(): Not Checking page.rect and rect.') + rebased = hasattr(pymupdf, 'mupdf') + if rebased: + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == ( + "syntax error: cannot find ExtGState resource 'GS7'\n" + "syntax error: cannot find ExtGState resource 'GS8'\n" + "encountered syntax errors; page may not be correct" + ) + + +def test_2736(): + """Check handling of CropBox changes vis-a-vis a MediaBox with + negative coordinates.""" + doc = pymupdf.open() + page = doc.new_page() + + # fake a MediaBox for demo purposes + doc.xref_set_key(page.xref, "MediaBox", "[-30 -20 595 842]") + + assert page.cropbox == pymupdf.Rect(-30, 0, 595, 862) + assert page.rect == pymupdf.Rect(0, 0, 625, 862) + + # change the CropBox: shift by (10, 10) in both dimensions. Please note: + # To achieve this, 10 must be subtracted from 862! yo must never be negative! + page.set_cropbox(pymupdf.Rect(-20, 0, 595, 852)) + + # get CropBox from the page definition + assert doc.xref_get_key(page.xref, "CropBox")[1] == "[-20 -10 595 842]" + assert page.rect == pymupdf.Rect(0, 0, 615, 852) + + error = False + text = "" + try: # check error detection + page.set_cropbox((-35, -10, 595, 842)) + except Exception as e: + text = str(e) + error = True + assert error == True + assert text == "CropBox not in MediaBox" + + +def test_subset_fonts(): + if os.environ.get('PYODIDE_ROOT'): + print('test_subset_fonts(): not running on Pyodide - ValueError: No font code \'ubuntu\' found in pymupdf-fonts.') + return + """Confirm subset_fonts is working.""" + if not hasattr(pymupdf, "mupdf"): + print("Not testing 'test_subset_fonts' in classic.") + return + text = "Just some arbitrary text." + arch = pymupdf.Archive() + css = pymupdf.css_for_pymupdf_font("ubuntu", archive=arch) + css += "* {font-family: ubuntu;}" + doc = pymupdf.open() + page = doc.new_page() + page.insert_htmlbox(page.rect, text, css=css, archive=arch) + doc.subset_fonts(verbose=True) + found = False + for xref in range(1, doc.xref_length()): + if "+Ubuntu#20Regular" in doc.xref_object(xref): + found = True + break + assert found is True + + +def test_2957_1(): + """Text following a redaction must not change coordinates.""" + # test file with redactions + doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_1.pdf")) + page = doc[0] + # search for string that must not move by redactions + rects0 = page.search_for("6e9f73dfb4384a2b8af6ebba") + # sort rectangles vertically + rects0 = sorted(rects0, key=lambda r: r.y1) + assert len(rects0) == 2 # must be 2 redactions + page.apply_redactions() + + # reload page to finalize updates + page = doc.reload_page(page) + + # the two string must retain their positions (except rounding errors) + rects1 = page.search_for("6e9f73dfb4384a2b8af6ebba") + rects1 = sorted(rects1, key=lambda r: r.y1) + + assert page.first_annot is None # make sure annotations have disappeared + for i in range(2): + r0 = rects0[i].irect # take rounded rects + r1 = rects1[i].irect + assert r0 == r1 + + +def test_2957_2(): + """Redacted text must not change positions of remaining text.""" + doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_2.pdf")) + page = doc[0] + words0 = page.get_text("words") # all words before redacting + page.apply_redactions() # remove/redact the word "longer" + words1 = page.get_text("words") # extract words again + assert len(words1) == len(words0) - 1 # must be one word less + assert words0[3][4] == "longer" # just confirm test file is correct one + del words0[3] # remove the redacted word from first list + for i in range(len(words1)): # compare words + w1 = words1[i] # word after redaction + bbox1 = pymupdf.Rect(w1[:4]).irect # its IRect coordinates + w0 = words0[i] # word before redaction + bbox0 = pymupdf.Rect(w0[:4]).irect # its IRect coordinates + assert bbox0 == bbox1 # must be same coordinates + + +def test_707560(): + """https://bugs.ghostscript.com/show_bug.cgi?id=707560 + Ensure that redactions also remove characters with an empty width bbox. + """ + # Make text that will contain characters with an empty bbox. + + greetings = ( + "Hello, World!", # english + "Hallo, Welt!", # german + "سلام دنیا!", # persian + "வணக்கம், உலகம்!", # tamil + "สวัสดีชาวโลก!", # thai + "Привіт Світ!", # ucranian + "שלום עולם!", # hebrew + "ওহে বিশ্ব!", # bengali + "你好世界!", # chinese + "こんにちは世界!", # japanese + "안녕하세요, 월드!", # korean + "नमस्कार, विश्व !", # sanskrit + "हैलो वर्ल्ड!", # hindi + ) + text = " ... ".join([g for g in greetings]) + where = (50, 50, 400, 500) + story = pymupdf.Story(text) + bio = io.BytesIO() + writer = pymupdf.DocumentWriter(bio) + more = True + while more: + dev = writer.begin_page(pymupdf.paper_rect("a4")) + more, _ = story.place(where) + story.draw(dev) + writer.end_page() + writer.close() + doc = pymupdf.open("pdf", bio) + page = doc[0] + text = page.get_text() + assert text, "Unexpected: test page has no text." + page.add_redact_annot(page.rect) + page.apply_redactions() + assert not page.get_text(), "Unexpected: text not fully redacted." + + +def test_3070(): + with pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_3070.pdf')) as pdf: + links = pdf[0].get_links() + links[0]['uri'] = "https://www.ddg.gg" + pdf[0].update_link(links[0]) + pdf.save(os.path.abspath(f'{__file__}/../../tests/test_3070_out.pdf')) + +def test_bboxlog_2885(): + doc = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_2885.pdf')) + page=doc[0] + + bbl = page.get_bboxlog() + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'invalid marked content and clip nesting' + + bbl = page.get_bboxlog(layers=True) + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'invalid marked content and clip nesting' + +def test_3081(): + ''' + Check Document.close() closes file handles, even if a Page instance exists. + ''' + path1 = os.path.abspath(f'{__file__}/../../tests/resources/1.pdf') + path2 = os.path.abspath(f'{__file__}/../../tests/test_3081-2.pdf') + + rebased = hasattr(pymupdf, 'mupdf') + + import shutil + import sys + import traceback + shutil.copy2(path1, path2) + + # Find next two available fds. + next_fd_1 = os.open(path2, os.O_RDONLY) + next_fd_2 = os.open(path2, os.O_RDONLY) + os.close(next_fd_1) + os.close(next_fd_2) + + def next_fd(): + fd = os.open(path2, os.O_RDONLY) + os.close(fd) + return fd + + fd1 = next_fd() + document = pymupdf.open(path2) + page = document[0] + fd2 = next_fd() + document.close() + if rebased: + assert document.this is None + assert page.this is None + try: + document.page_count() + except Exception as e: + print(f'Received expected exception: {e}') + #traceback.print_exc(file=sys.stdout) + assert str(e) == 'document closed' + else: + assert 0, 'Did not receive expected exception.' + fd3 = next_fd() + try: + page.bound() + except Exception as e: + print(f'Received expected exception: {e}') + #traceback.print_exc(file=sys.stdout) + if rebased: + assert str(e) == 'page is None' + else: + assert str(e) == 'orphaned object: parent is None' + else: + assert 0, 'Did not receive expected exception.' + page = None + fd4 = next_fd() + print(f'{next_fd_1=} {next_fd_2=}') + print(f'{fd1=} {fd2=} {fd3=} {fd4=}') + print(f'{document=}') + assert fd1 == next_fd_1 + assert fd2 == next_fd_2 # Checks document only uses one fd. + assert fd3 == next_fd_1 # Checks no leaked fds after document close. + assert fd4 == next_fd_1 # Checks no leaked fds after failed page access. + +def test_xml(): + path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') + with pymupdf.open(path) as document: + document.get_xml_metadata() + +def test_3112_set_xml_metadata(): + document = pymupdf.Document() + document.set_xml_metadata('hello world') + +def test_archive_3126(): + if not hasattr(pymupdf, 'mupdf'): + print(f'Not running because known to fail with classic.') + return + p = os.path.abspath(f'{__file__}/../../tests/resources') + p = pathlib.Path(p) + archive = pymupdf.Archive(p) + +def test_3140(): + if not hasattr(pymupdf, 'mupdf'): + print(f'Not running test_3140 on classic, because Page.insert_htmlbox() not available.') + return + css2 = '' + path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') + oldfile = os.path.abspath(f'{__file__}/../../tests/test_3140_old.pdf') + newfile = os.path.abspath(f'{__file__}/../../tests/test_3140_new.pdf') + import shutil + shutil.copy2(path, oldfile) + def next_fd(): + fd = os.open(path, os.O_RDONLY) + os.close(fd) + return fd + fd1 = next_fd() + with pymupdf.open(oldfile) as doc: # open document + page = doc[0] + rect = pymupdf.Rect(130, 400, 430, 600) + CELLS = pymupdf.make_table(rect, cols=3, rows=5) + shape = page.new_shape() # create Shape + for i in range(5): + for j in range(3): + qtext = "" + "Ques #" + str(i*3+j+1) + ": " + "" # codespell:ignore + atext = "" + "Ans:" + "" # codespell:ignore + qtext = qtext + '
' + atext + shape.draw_rect(CELLS[i][j]) # draw rectangle + page.insert_htmlbox(CELLS[i][j], qtext, css=css2, scale_low=0) + shape.finish(width=2.5, color=pymupdf.pdfcolor["blue"], ) + shape.commit() # write all stuff to the page + doc.subset_fonts() + doc.ez_save(newfile) + fd2 = next_fd() + assert fd2 == fd1, f'{fd1=} {fd2=}' + os.remove(oldfile) + +def test_cli(): + if os.environ.get('PYODIDE_ROOT'): + print('test_cli(): not running on Pyodide - cannot run child processes.') + return + + if not hasattr(pymupdf, 'mupdf'): + print('test_cli(): Not running on classic because of fitz_old.') + return + import subprocess + subprocess.run(f'pymupdf -h', shell=1, check=1) + + +def check_lines(expected_regexes, actual): + ''' + Checks lines in match regexes in . + ''' + print(f'check_lines():', flush=1) + print(f'{expected_regexes=}', flush=1) + print(f'{actual=}', flush=1) + def str_to_list(s): + if isinstance(s, str): + return s.split('\n') if s else list() + return s + expected_regexes = str_to_list(expected_regexes) + actual = str_to_list(actual) + if expected_regexes and expected_regexes[-1]: + expected_regexes.append('') # Always expect a trailing empty line. + # Remove `None` regexes and make all regexes match entire lines. + expected_regexes = [f'^{i}$' for i in expected_regexes if i is not None] + print(f'{expected_regexes=}', flush=1) + for expected_regex_line, actual_line in zip(expected_regexes, actual): + print(f' {expected_regex_line=}', flush=1) + print(f' {actual_line=}', flush=1) + assert re.match(expected_regex_line, actual_line) + assert len(expected_regexes) == len(actual), \ + f'expected/actual lines mismatch: {len(expected_regexes)=} {len(actual)=}.' + +def test_cli_out(): + ''' + Check redirection of messages and log diagnostics with environment + variables PYMUPDF_LOG and PYMUPDF_MESSAGE. + ''' + if os.environ.get('PYODIDE_ROOT'): + print('test_cli_out(): not running on Pyodide - cannot run child processes.') + return + + if not hasattr(pymupdf, 'mupdf'): + print('test_cli(): Not running on classic because of fitz_old.') + return + import platform + import re + import subprocess + log_prefix = None + if os.environ.get('PYMUPDF_USE_EXTRA') == '0': + log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\'' + + def check( + expect_out, + expect_err, + message=None, + log=None, + verbose=0, + ): + ''' + Sets PYMUPDF_MESSAGE to `message` and PYMUPDF_LOG to `log`, runs + `pymupdf internal`, and checks lines stdout and stderr match regexes in + `expect_out` and `expect_err`. Note that we enclose regexes in `^...$`. + ''' + env = dict() + if log: + env['PYMUPDF_LOG'] = log + if message: + env['PYMUPDF_MESSAGE'] = message + env = os.environ | env + print(f'Running with {env=}: pymupdf internal', flush=1) + cp = subprocess.run(f'pymupdf internal', shell=1, check=1, capture_output=1, env=env, text=True) + + if verbose: + #print(f'{cp.stdout=}.', flush=1) + #print(f'{cp.stderr=}.', flush=1) + sys.stdout.write(f'stdout:\n{textwrap.indent(cp.stdout, " ")}') + sys.stdout.write(f'stderr:\n{textwrap.indent(cp.stderr, " ")}') + check_lines(expect_out, cp.stdout) + check_lines(expect_err, cp.stderr) + + # + print(f'Checking default, all output to stdout.') + check( + [ + log_prefix, + 'This is from PyMuPDF message[(][)][.]', + '.+This is from PyMuPDF log[(][)].', + ], + '', + ) + + # + if platform.system() != 'Windows': + print(f'Checking redirection of everything to /dev/null.') + check('', '', 'path:/dev/null', 'path:/dev/null') + + # + print(f'Checking redirection to files.') + path_out = os.path.abspath(f'{__file__}/../../tests/test_cli_out.out') + path_err = os.path.abspath(f'{__file__}/../../tests/test_cli_out.err') + check('', '', f'path:{path_out}', f'path:{path_err}') + def read(path): + with open(path) as f: + return f.read() + out = read(path_out) + err = read(path_err) + check_lines(['This is from PyMuPDF message[(][)][.]'], out) + check_lines([log_prefix, '.+This is from PyMuPDF log[(][)][.]'], err) + + # + print(f'Checking redirection to fds.') + check( + [ + 'This is from PyMuPDF message[(][)][.]', + ], + [ + log_prefix, + '.+This is from PyMuPDF log[(][)].', + ], + 'fd:1', + 'fd:2', + ) + + +def test_use_python_logging(): + ''' + Checks pymupdf.use_python_logging(). + ''' + if os.environ.get('PYODIDE_ROOT'): + print('test_cli(): not running on Pyodide - cannot run child processes.') + return + + log_prefix = None + if os.environ.get('PYMUPDF_USE_EXTRA') == '0': + log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\'' + + if os.path.basename(__file__).startswith(f'test_fitz_'): + # Do nothing, because command `pymupdf` outputs diagnostics containing + # `pymupdf` which are not renamed to `fitz`, which breaks our checking. + print(f'Not testing with fitz alias.') + return + + def check( + code, + regexes_stdout, + regexes_stderr, + env = None, + ): + code = textwrap.dedent(code) + path = os.path.abspath(f'{__file__}/../../tests/resources_test_logging.py') + with open(path, 'w') as f: + f.write(code) + command = f'{sys.executable} {path}' + if env: + print(f'{env=}.') + env = os.environ | env + print(f'Running: {command}', flush=1) + try: + cp = subprocess.run(command, shell=1, check=1, capture_output=1, text=True, env=env) + except Exception as e: + print(f'Command failed: {command}.', flush=1) + print(f'Stdout\n{textwrap.indent(e.stdout, " ")}', flush=1) + print(f'Stderr\n{textwrap.indent(e.stderr, " ")}', flush=1) + raise + check_lines(regexes_stdout, cp.stdout) + check_lines(regexes_stderr, cp.stderr) + + print(f'## Basic use of `logging` sends output to stderr instead of default stdout.') + check( + ''' + import pymupdf + pymupdf.message('this is pymupdf.message()') + pymupdf.log('this is pymupdf.log()') + pymupdf.set_messages(pylogging=1) + pymupdf.set_log(pylogging=1) + pymupdf.message('this is pymupdf.message() 2') + pymupdf.log('this is pymupdf.log() 2') + ''', + [ + log_prefix, + 'this is pymupdf.message[(][)]', + '.+this is pymupdf.log[(][)]', + ], + [ + 'this is pymupdf.message[(][)] 2', + '.+this is pymupdf.log[(][)] 2', + ], + ) + + print(f'## Calling logging.basicConfig() makes logging output contain : prefixes.') + check( + ''' + import pymupdf + + import logging + logging.basicConfig() + pymupdf.set_messages(pylogging=1) + pymupdf.set_log(pylogging=1) + + pymupdf.message('this is pymupdf.message()') + pymupdf.log('this is pymupdf.log()') + ''', + [ + log_prefix, + ], + [ + 'WARNING:pymupdf:this is pymupdf.message[(][)]', + 'WARNING:pymupdf:.+this is pymupdf.log[(][)]', + ], + ) + + print(f'## Setting PYMUPDF_USE_PYTHON_LOGGING=1 makes PyMuPDF use logging on startup.') + check( + ''' + import pymupdf + pymupdf.message('this is pymupdf.message()') + pymupdf.log('this is pymupdf.log()') + ''', + '', + [ + log_prefix, + 'this is pymupdf.message[(][)]', + '.+this is pymupdf.log[(][)]', + ], + env = dict( + PYMUPDF_MESSAGE='logging:', + PYMUPDF_LOG='logging:', + ), + ) + + print(f'## Pass explicit logger to pymupdf.use_python_logging() with logging.basicConfig().') + check( + ''' + import pymupdf + + import logging + logging.basicConfig() + + logger = logging.getLogger('foo') + pymupdf.set_messages(pylogging_logger=logger, pylogging_level=logging.WARNING) + pymupdf.set_log(pylogging_logger=logger, pylogging_level=logging.ERROR) + + pymupdf.message('this is pymupdf.message()') + pymupdf.log('this is pymupdf.log()') + ''', + [ + log_prefix, + ], + [ + 'WARNING:foo:this is pymupdf.message[(][)]', + 'ERROR:foo:.+this is pymupdf.log[(][)]', + ], + ) + + print(f'## Check pymupdf.set_messages() pylogging_level args.') + check( + ''' + import pymupdf + + import logging + logging.basicConfig(level=logging.DEBUG) + logger = logging.getLogger('pymupdf') + + pymupdf.set_messages(pylogging_level=logging.CRITICAL) + pymupdf.set_log(pylogging_level=logging.INFO) + + pymupdf.message('this is pymupdf.message()') + pymupdf.log('this is pymupdf.log()') + ''', + [ + log_prefix, + ], + [ + 'CRITICAL:pymupdf:this is pymupdf.message[(][)]', + 'INFO:pymupdf:.+this is pymupdf.log[(][)]', + ], + ) + + print(f'## Check messages() with sys.stdout=None.') + check( + ''' + import sys + sys.stdout = None + import pymupdf + + pymupdf.message('this is pymupdf.message()') + pymupdf.log('this is pymupdf.log()') + ''', + [], + [], + ) + + +def relpath(path, start=None): + ''' + A 'safe' alternative to os.path.relpath(). Avoids an exception on Windows + if the drive needs to change - in this case we use os.path.abspath(). + ''' + try: + return os.path.relpath(path, start) + except ValueError: + # os.path.relpath() fails if trying to change drives. + assert platform.system() == 'Windows' + return os.path.abspath(path) + + +def test_open(): + + if not hasattr(pymupdf, 'mupdf'): + print('test_open(): not running on classic.') + return + + import re + import textwrap + import traceback + + resources = relpath(os.path.abspath(f'{__file__}/../../tests/resources')) + + # We convert all strings to use `/` instead of os.sep, which avoids + # problems with regex's on windows. + resources = resources.replace(os.sep, '/') + + def check(filename=None, stream=None, filetype=None, exception=None): + ''' + Checks we receive expected exception if specified. + ''' + if isinstance(filename, str): + filename = filename.replace(os.sep, '/') + if exception: + etype, eregex = exception + if isinstance(eregex, (tuple, list)): + # Treat as sequence of regexes to look for. + eregex = '.*'.join(eregex) + try: + pymupdf.open(filename=filename, stream=stream, filetype=filetype) + except etype as e: + text = traceback.format_exc(limit=0) + text = text.replace(os.sep, '/') + text = textwrap.indent(text, ' ', lambda line: 1) + assert re.search(eregex, text, re.DOTALL), \ + f'Incorrect exception text, expected {eregex=}, received:\n{text}' + print(f'Received expected exception for {filename=} {stream=} {filetype=}:\n{text}') + except Exception as e: + assert 0, \ + f'Incorrect exception, expected {etype}, received {type(e)=}.' + else: + assert 0, f'Did not received exception, expected {etype=}. {filename=} {stream=} {filetype=} {exception=}' + else: + document = pymupdf.open(filename=filename, stream=stream, filetype=filetype) + return document + + check(f'{resources}/1.pdf') + + check(f'{resources}/Bezier.epub') + + path = 1234 + etype = TypeError + eregex = re.escape(f'bad filename: type(filename)= filename={path}.') + check(path, exception=(etype, eregex)) + + path = 'test_open-this-file-will-not-exist' + etype = pymupdf.FileNotFoundError + eregex = f'no such file: \'{path}\'' + check(path, exception=(etype, eregex)) + + path = resources + etype = pymupdf.FileDataError + eregex = re.escape(f'\'{path}\' is no file') + check(path, exception=(etype, eregex)) + + path = relpath(os.path.abspath(f'{resources}/../test_open_empty')) + path = path.replace(os.sep, '/') + with open(path, 'w') as f: + pass + etype = pymupdf.EmptyFileError + eregex = re.escape(f'Cannot open empty file: filename={path!r}.') + check(path, exception=(etype, eregex)) + + path = f'{resources}/1.pdf' + filetype = 'xps' + etype = pymupdf.FileDataError + # 2023-12-12: On OpenBSD, for some reason the SWIG catch code only catches + # the exception as FzErrorBase. + etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorFormat' + eregex = ( + # With a sysinstall with separate MuPDF install, we get + # `mupdf.FzErrorFormat` instead of `pymupdf.mupdf.FzErrorFormat`. So + # we just search for the former. + re.escape(f'mupdf.{etype2}: code=7: cannot recognize zip archive'), + re.escape(f'pymupdf.FileDataError: Failed to open file {path!r} as type {filetype!r}.'), + ) + check(path, filetype=filetype, exception=None) + + path = f'{resources}/chinese-tables.pickle' + etype = pymupdf.FileDataError + etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorUnsupported' + etext = ( + re.escape(f'mupdf.{etype2}: code=6: cannot find document handler for file: {path}'), + re.escape(f'pymupdf.FileDataError: Failed to open file {path!r}.'), + ) + check(path, exception=(etype, etext)) + + stream = 123 + etype = TypeError + etext = re.escape('bad stream: type(stream)=.') + check(stream=stream, exception=(etype, etext)) + + check(stream=b'', exception=(pymupdf.EmptyFileError, re.escape('Cannot open empty stream.'))) + + +def test_open2(): + ''' + Checks behaviour of fz_open_document() and fz_open_document_with_stream() + with different filenames/magic values. + ''' + if os.environ.get('PYODIDE_ROOT'): + print('test_open2(): not running on Pyodide - cannot run child processes.') + return + + if platform.system() == 'Windows': + print(f'test_open2(): not running on Windows because `git ls-files` known fail on Github Windows runners.') + return + + root = os.path.normpath(f'{__file__}/../..') + root = relpath(root) + + # Find tests/resources/test_open2.* input files/streams. We calculate + # paths relative to the PyMuPDF checkout directory , to allow use + # of tests/resources/test_open2_expected.json regardless of the actual + # checkout directory. + print() + sys.path.append(root) + try: + import pipcl + finally: + del sys.path[0] + paths = pipcl.git_items(f'{root}/tests/resources') + paths = fnmatch.filter(paths, f'test_open2.*') + paths = [f'tests/resources/{i}' for i in paths] + + # Get list of extensions of input files. + extensions = set() + extensions.add('.txt') + extensions.add('') + for path in paths: + _, ext = os.path.splitext(path) + extensions.add(ext) + extensions = sorted(list(extensions)) + + def get_result(e, document): + ''' + Return fz_lookup_metadata(document, 'format') or [ERROR]. + ''' + if e: + return f'[error]' + else: + try: + return pymupdf.mupdf.fz_lookup_metadata2(document, 'format') + except Exception: + return '' + + def dict_set_path(dict_, *items): + for item in items[:-2]: + dict_ = dict_.setdefault(item, dict()) + dict_[items[-2]] = items[-1] + + results = dict() + + # Prevent warnings while we are running. + _g_out_message = pymupdf._g_out_message + pymupdf._g_out_message = None + try: + results = dict() + + for path in paths: + print(path) + for ext in extensions: + path2 = f'{root}/foo{ext}' + path3 = shutil.copy2(f'{root}/{path}', path2) + assert(path3 == path2) + + # Test fz_open_document(). + e = None + document = None + try: + document = pymupdf.mupdf.fz_open_document(path2) + except Exception as ee: + e = ee + wt = pymupdf.TOOLS.mupdf_warnings() + text = get_result(e, document) + print(f' fz_open_document({path2}) => {text}') + dict_set_path(results, path, ext, 'file', text) + + # Test fz_open_document_with_stream(). + e = None + document = None + with open(f'{root}/{path}', 'rb') as f: + data = f.read() + stream = pymupdf.mupdf.fz_open_memory(pymupdf.mupdf.python_buffer_data(data), len(data)) + try: + document = pymupdf.mupdf.fz_open_document_with_stream(ext, stream) + except Exception as ee: + e = ee + wt = pymupdf.TOOLS.mupdf_warnings() + text = get_result(e, document) + print(f' fz_open_document_with_stream(magic={ext!r}) => {text}') + dict_set_path(results, path, ext, 'stream', text) + + finally: + pymupdf._g_out_message = _g_out_message + + # Create html table. + path_html = os.path.normpath(f'{__file__}/../../tests/test_open2.html') + with open(path_html, 'w') as f: + f.write(f'\n') + f.write(f'\n') + f.write(f'

{time.strftime("%F-%T")}\n') + f.write(f'\n') + f.write(f'') + for ext in extensions: + f.write(f'') + f.write('\n') + for path in sorted(results.keys()): + _, ext = os.path.splitext(path) + f.write(f'') + for ext2 in sorted(results[path].keys()): + text_file = results[path][ext2]['file'] + text_stream = results[path][ext2]['stream'] + b1, b2 = ('', '') if ext2==ext else ('', '') + if text_file == text_stream: + if text_file == '[error]': + f.write(f'') + else: + f.write(f'') + else: + f.write(f'') + f.write('\n') + f.write(f'
Extension/magic') + f.write(f'
Data file{ext}
{os.path.basename(path)}
{b1}{text_file}{b2}
{b1}{text_file}{b2}file: {b1}{text_file}{b2}
') + f.write(f'stream: {b1}{text_stream}{b2}
\n') + f.write(f'/\n') + f.write(f'\n') + print(f'Have created: {path_html}') + + path_out = os.path.normpath(f'{__file__}/../../tests/test_open2.json') + with open(path_out, 'w') as f: + json.dump(results, f, indent=4, sort_keys=1) + + if pymupdf.mupdf_version_tuple >= (1, 26): + with open(os.path.normpath(f'{__file__}/../../tests/resources/test_open2_expected.json')) as f: + results_expected = json.load(f) + if results != results_expected: + print(f'results != results_expected:') + def show(r, name): + text = json.dumps(r, indent=4, sort_keys=1) + print(f'{name}:') + print(textwrap.indent(text, ' ')) + show(results_expected, 'results_expected') + show(results, 'results') + assert 0 + + +def test_533(): + if not hasattr(pymupdf, 'mupdf'): + print('test_533(): Not running on classic.') + return + path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') + doc = pymupdf.open(path) + print() + for p in doc: + print(f'test_533(): for p in doc: {p=}.') + for p in list(doc)[:]: + print(f'test_533(): for p in list(doc)[:]: {p=}.') + for p in doc[:]: + print(f'test_533(): for p in doc[:]: {p=}.') + +def test_3354(): + document = pymupdf.open(filename) + v = dict(foo='bar') + document.metadata = v + assert document.metadata == v + +def test_scientific_numbers(): + ''' + This is #3381. + ''' + doc = pymupdf.open() + page = doc.new_page(width=595, height=842) + point = pymupdf.Point(1e-11, -1e-10) + page.insert_text(point, "Test") + contents = page.read_contents() + print(f'{contents=}') + assert b" 1e-" not in contents + +def test_3615(): + print('') + print(f'{pymupdf.pymupdf_version=}', flush=1) + print(f'{pymupdf.VersionBind=}', flush=1) + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3615.epub') + doc = pymupdf.open(path) + print(doc.pagemode) + print(doc.pagelayout) + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt + +def test_3654(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3654.docx') + content = "" + with pymupdf.open(path) as document: + for page in document: + content += page.get_text() + '\n\n' + content = content.strip() + +def test_3727(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3727.pdf') + doc = pymupdf.open(path) + for page in doc: + page.get_pixmap(matrix = pymupdf.Matrix(2,2)) + +def test_3569(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3569.pdf') + document = pymupdf.open(path) + page = document[0] + svg = page.get_svg_image(text_as_path=False) + print(f'{svg=}') + if pymupdf.mupdf_version_tuple >= (1, 27): + assert svg == ( + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '**L1-13\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + ) + else: + assert svg == ( + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '**L1-13\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + ) + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'unknown cid collection: PDFAUTOCAD-Indentity0\nnon-embedded font using identity encoding: ArialMT (mapping via )\ninvalid marked content and clip nesting' + +def test_3450(): + # This issue is a slow-down, so we just show time taken - it's not safe + # to fail if test takes too long because that can give spurious failures + # depending on hardware etc. + # + # On a mac-mini, PyMuPDF-1.24.8 takes 60s, PyMuPDF-1.24.9 takes 4s. + # + if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': + print(f'test_3450(): not running on valgrind because very slow.', flush=1) + return + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3450.pdf') + pdf = pymupdf.open(path) + page = pdf[0] + t = time.time() + pix = page.get_pixmap(alpha=False, dpi=150) + t = time.time() - t + print(f'test_3450(): {t=}') + +def test_3859(): + print(f'{pymupdf.mupdf.PDF_NULL=}.') + print(f'{pymupdf.mupdf.PDF_TRUE=}.') + print(f'{pymupdf.mupdf.PDF_FALSE=}.') + for name in ('NULL', 'TRUE', 'FALSE'): + name2 = f'PDF_{name}' + v = getattr(pymupdf.mupdf, name2) + print(f'{name=} {name2=} {v=} {type(v)=}') + assert type(v)==pymupdf.mupdf.PdfObj, f'`v` is not a pymupdf.mupdf.PdfObj.' + +def test_3905(): + data = b'A,B,C,D\r\n1,2,1,2\r\n2,2,1,2\r\n' + try: + document = pymupdf.open(stream=data, filetype='pdf') + except pymupdf.FileDataError as e: + print(f'test_3905(): e: {e}') + else: + assert 0 + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26): + assert wt == 'format error: cannot find version marker\ntrying to repair broken xref\nrepairing PDF document' + else: + assert wt == 'format error: cannot recognize version marker\ntrying to repair broken xref\nrepairing PDF document' + +def test_3624(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3624.pdf') + path_png_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3624_expected.png') + path_png = os.path.normpath(f'{__file__}/../../tests/test_3624.png') + with pymupdf.open(path) as document: + page = document[0] + pixmap = page.get_pixmap(matrix=pymupdf.Matrix(2, 2)) + print(f'Saving to {path_png=}.') + pixmap.save(path_png) + rms = gentle_compare.pixmaps_rms(path_png_expected, path_png) + print(f'{rms=}') + # We get small differences in sysinstall tests, where some thirdparty + # libraries can differ. + if rms > 1: + pixmap_diff = gentle_compare.pixmaps_diff(path_png_expected, path_png) + path_png_diff = os.path.normpath(f'{__file__}/../../tests/test_3624_diff.png') + pixmap_diff.save(path_png_diff) + assert 0, f'{rms=}' + + +def test_4043(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4043.pdf') + doc = pymupdf.open(path) + doc.fullcopy_page(1) + + +def test_4018(): + document = pymupdf.open() + for page in document.pages(-1, -1): + pass + +def test_4034(): + # tests/resources/test_4034.pdf is first two pages of input file in + # https://github.com/pymupdf/PyMuPDF/issues/4034. + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4034.pdf') + path_clean = os.path.normpath(f'{__file__}/../../tests/test_4034_out.pdf') + with pymupdf.open(path) as document: + pixmap1 = document[0].get_pixmap() + document.save(path_clean, clean=1) + with pymupdf.open(path_clean) as document: + page = document[0] + pixmap2 = document[0].get_pixmap() + rms = gentle_compare.pixmaps_rms(pixmap1, pixmap2) + print(f'test_4034(): Comparison of original/cleaned page 0 pixmaps: {rms=}.') + if pymupdf.mupdf_version_tuple < (1, 25, 2): + assert 30 < rms < 50 + else: + assert rms == 0 + +def test_4309(): + document = pymupdf.open() + page = document.new_page() + document.delete_page() + +def test_4263(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4263(): not running on Pyodide - cannot run child processes.') + return + + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4263.pdf') + path_out = f'{path}.linerarized.pdf' + command = f'pymupdf clean -linear {path} {path_out}' + print(f'Running: {command}') + cp = subprocess.run(command, shell=1, check=0) + if pymupdf.mupdf_version_tuple < (1, 26): + assert cp.returncode == 0 + else: + # Support for linerarisation dropped in MuPDF-1.26. + assert cp.returncode + +def test_4224(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4224.pdf') + with pymupdf.open(path) as document: + for page in document.pages(): + pixmap = page.get_pixmap(dpi=150) + path_pixmap = f'{path}.{page.number}.png' + pixmap.save(path_pixmap) + print(f'Have created: {path_pixmap}') + if pymupdf.mupdf_version_tuple < (1, 25, 5): + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'format error: negative code in 1d faxd\npadding truncated image' + +def test_4319(): + # Have not seen this test reproduce issue #4319, but keeping it anyway. + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4319.pdf') + doc = pymupdf.open() + page = doc.new_page() + page.insert_text((10, 100), "some text") + doc.save(path) + doc.close() + doc = pymupdf.open(path) + page = doc[0] + pc = doc.page_count + doc.close() + os.remove(path) + print(f"removed {doc.name=}") + +def test_3886(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3886.pdf') + path_clean0 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean0.pdf') + path_clean1 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean1.pdf') + + with pymupdf.open(path) as document: + pixmap = document[0].get_pixmap() + document.save(path_clean0, clean=0) + + with pymupdf.open(path) as document: + document.save(path_clean1, clean=1) + + with pymupdf.open(path_clean0) as document: + pixmap_clean0 = document[0].get_pixmap() + + with pymupdf.open(path_clean1) as document: + pixmap_clean1 = document[0].get_pixmap() + + rms_0 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean0) + rms_1 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean1) + print(f'test_3886(): {rms_0=} {rms_1=}') + +def test_4415(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4415.pdf') + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out.png') + path_out_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out_expected.png') + with pymupdf.open(path) as document: + page = document[0] + rot = page.rotation + orig = pymupdf.Point(100, 100) # apparent insertion point + text = 'Text at Top-Left' + mrot = page.derotation_matrix # matrix annihilating page rotation + page.insert_text(orig * mrot, text, fontsize=60, rotate=rot) + pixmap = page.get_pixmap() + pixmap.save(path_out) + rms = gentle_compare.pixmaps_rms(path_out_expected, path_out) + assert rms == 0, f'{rms=}' + +def test_4466(): + path = os.path.normpath(f'{__file__}/../../tests/test_4466.pdf') + with pymupdf.Document(path) as document: + for page in document: + print(f'{page=}', flush=1) + pixmap = page.get_pixmap(clip=(0, 0, 10, 10)) + print(f'{pixmap.n=} {pixmap.size=} {pixmap.stride=} {pixmap.width=} {pixmap.height=} {pixmap.x=} {pixmap.y=}', flush=1) + pixmap.is_unicolor # Used to crash. + + +def test_4479(): + # This passes with pymupdf-1.24.14, fails with pymupdf==1.25.*, passes with + # pymupdf-1.26.0. + print() + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4479.pdf') + with pymupdf.open(path) as document: + + def show(items): + for item in items: + print(f' {repr(item)}') + + items = document.layer_ui_configs() + show(items) + assert items == [ + {'depth': 0, 'locked': 0, 'number': 0, 'on': 1, 'text': 'layer_0', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'}, + ] + + document.set_layer_ui_config(0, pymupdf.PDF_OC_OFF) + items = document.layer_ui_configs() + show(items) + assert items == [ + {'depth': 0, 'locked': 0, 'number': 0, 'on': 0, 'text': 'layer_0', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'}, + {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'}, + ] + + +def test_4533(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4533(): not running on Pyodide - cannot run child processes.') + return + + print() + path = util.download( + 'https://github.com/user-attachments/files/20497146/NineData_user_manual_V3.0.5.pdf', + 'test_4533.pdf', + size=16864501, + ) + # This bug is a segv so we run the test in a child process. + command = f'{sys.executable} -c "import pymupdf; document = pymupdf.open({path!r}); print(len(document))"' + print(f'Running: {command}') + cp = subprocess.run(command, shell=1, check=0) + e = cp.returncode + print(f'{e=}') + if pymupdf.mupdf_version_tuple >= (1, 26, 6): + assert e == 0 + else: + assert e != 0 + + +def test_4564(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4564.pdf') + print() + with pymupdf.open(path) as document: + for key in sorted(document.metadata.keys()): + value = document.metadata[key] + print(f'{key}: {value!r}') + if pymupdf.mupdf_version_tuple >= (1, 27): + assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\x00' + else: + assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\udcc0\udc80' + + +def test_4496(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4496.hwpx') + with pymupdf.open(path) as document: + print(document.page_count) + + +def test_gitinfo(): + # This doesn't really test very much, but can be useful to see the current + # values. + print('') + print(f'test_4496():') + print(f'{pymupdf.mupdf_location=}') + print(f'{pymupdf.mupdf_version=}') + print(f'{pymupdf.pymupdf_git_branch=}') + print(f'{pymupdf.pymupdf_git_sha=}') + print(f'{pymupdf.pymupdf_version=}') + print(f'{pymupdf.pymupdf_git_diff=}') + if pymupdf.pymupdf_git_diff: + print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, " ")}') + + +def test_4392(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4392(): not running on Pyodide - cannot run child processes.') + return + + print() + path = os.path.normpath(f'{__file__}/../../tests/test_4392.py') + with open(path, 'w') as f: + f.write('import pymupdf\n') + + command = f'pytest {path}' + print(f'Running: {command}', flush=1) + e1 = subprocess.run(command, shell=1, check=0).returncode + print(f'{e1=}') + + command = f'pytest -Werror {path}' + print(f'Running: {command}', flush=1) + e2 = subprocess.run(command, shell=1, check=0).returncode + print(f'{e2=}') + + command = f'{sys.executable} -Werror -c "import pymupdf"' + print(f'Running: {command}', flush=1) + e3 = subprocess.run(command, shell=1, check=0).returncode + print(f'{e3=}') + + print(f'{e1=} {e2=} {e3=}') + + print(f'{pymupdf.swig_version=}') + print(f'{pymupdf.swig_version_tuple=}') + + assert e1 == 5 + if pymupdf.swig_version_tuple >= (4, 4): + assert e2 == 5 + assert e3 == 0 + else: + # We get SEGV's etc with older swig. + if platform.system() == 'Windows': + assert (e2, e3) == (0xc0000005, 0xc0000005) + else: + # On plain linux we get (139, 139). On manylinux we get (-11, + # -11). On MacOS we get (-11, -11). + assert (e2, e3) == (139, 139) or (e2, e3) == (-11, -11) + + +def test_4639(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4639.pdf') + with pymupdf.open(path) as document: + page = document[-1] + page.get_bboxlog(layers=True) + + +def test_4590(): + + # Create test PDF. + path = os.path.normpath(f'{__file__}/../../tests/test_4590.pdf') + with pymupdf.open() as document: + page = document.new_page() + + # Add some text + text = 'This PDF contains a file attachment annotation.' + page.insert_text((72, 72), text, fontsize=12) + + # Create a sample file. + path_sample = os.path.normpath(f'{__file__}/../../tests/test_4590_annotation_sample.txt') + with open(path_sample, 'w') as f: + f.write('This is a sample attachment file.') + + # Read file as bytes + with open(path_sample, 'rb') as f: + sample = f.read() + + # Define annotation position (rect or point) + annot_pos = pymupdf.Rect(72, 100, 92, 120) # PushPin icon rectangle + + # Add the file attachment annotation + page.add_file_annot( + point = annot_pos, + buffer_ = sample, + filename = 'sample.txt', + ufilename = 'sample.txt', + desc = 'A test attachment file.', + icon = 'PushPin', + ) + + # Save the PDF + document.save(path) + + # Check pymupdf.Document.scrub() works. + with pymupdf.open(path) as document: + document.scrub() + + +def test_4702(): + if os.environ.get('PYODIDE_ROOT'): + # util.download() uses subprocess. + print('test_4702(): not running on Pyodide - cannot run child processes.') + return + + path = util.download( + 'https://github.com/user-attachments/files/22403483/01995b6ca7837b52abaa24e38e8c076d.pdf', + 'test_4702.pdf', + ) + with pymupdf.open(path) as document: + for xref in range(1, document.xref_length()): + print(f'{xref=}') + try: + _ = document.xref_object(xref) + except Exception as e1: + print(f'{e1=}') + try: + document.update_object(xref, "<<>>") + except Exception as e2: + print(f'{e2=}') + raise + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'repairing PDF document' + + with pymupdf.open(path) as document: + for xref in range(1, document.xref_length()): + print(f'{xref=}') + _ = document.xref_object(xref) + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'repairing PDF document' + + +def test_4712(): + ''' + Crash with "corrupted double-linked list + ''' + if pymupdf.mupdf_version_tuple < (1, 26, 11): + print(f'test_4712m(): Not running because known to fail on mupdf < 1.26.11: {pymupdf.mupdf_version=}.') + return + path_a = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_a.pdf') + path_b = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_b.pdf') + doc1 = pymupdf.open(path_a) + for i in range(6): + doc1.load_page(i).get_pixmap() + doc2 = pymupdf.open(path_b) + for i in range(6): + doc2.load_page(i).get_pixmap() + + +def test_4712m(): + if pymupdf.mupdf_version_tuple < (1, 26, 11): + print(f'test_4712m(): Not running because known to fail on mupdf < 1.26.11: {pymupdf.mupdf_version=}.') + return + + path_a = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_a.pdf') + path_b = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_b.pdf') + + mupdf = pymupdf.mupdf + def get_pixmap(page): + displaylist = mupdf.fz_new_display_list_from_page(page) + rect = mupdf.fz_bound_display_list(displaylist) + irect = mupdf.fz_round_rect(rect) + pixmap = mupdf.fz_new_pixmap_with_bbox( + mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB), + irect, + mupdf.FzSeparations(), + 0, # alpha + ) + mupdf.fz_clear_pixmap_with_value(pixmap, 0xFF) + matrix = mupdf.FzMatrix() + device = mupdf.fz_new_draw_device(matrix, pixmap) + mupdf.fz_run_display_list( + displaylist, + device, + mupdf.FzMatrix(), + mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), + mupdf.FzCookie(), + ) + mupdf.fz_close_device(device) + + def process_document(document): + for i in range(6): + print(f' {i=}', flush=1) + page = mupdf.fz_load_page(document, i) + get_pixmap(page) + + print(f'Processing {path_a=}', flush=1) + document_a = mupdf.fz_open_document(path_a) + process_document(document_a) + + print(f'Processing {path_b=}', flush=1) + document_b = mupdf.fz_open_document(path_b) + process_document(document_b) + + +def test_4746(): + archive = pymupdf.Archive('.') + archive.add(__file__, 'foo') diff --git a/tests/test_geometry.py b/tests/test_geometry.py index 642c3f3d0..645b200dd 100644 --- a/tests/test_geometry.py +++ b/tests/test_geometry.py @@ -3,60 +3,65 @@ * Check matrix inversions in variations * Check algebra constructs """ -import fitz +import os + +import pymupdf def test_rect(): - assert tuple(fitz.Rect()) == (0, 0, 0, 0) - p1 = fitz.Point(10, 20) - p2 = fitz.Point(100, 200) - p3 = fitz.Point(150, 250) - r = fitz.Rect(10, 20, 100, 200) + assert tuple(pymupdf.Rect()) == (0, 0, 0, 0) + if hasattr(pymupdf, 'mupdf'): + assert tuple(pymupdf.Rect(y0=12)) == (0, 12, 0, 0) + assert tuple(pymupdf.Rect(10, 20, 100, 200, x1=12)) == (10, 20, 12, 200) + p1 = pymupdf.Point(10, 20) + p2 = pymupdf.Point(100, 200) + p3 = pymupdf.Point(150, 250) + r = pymupdf.Rect(10, 20, 100, 200) r_tuple = tuple(r) - assert tuple(fitz.Rect(p1, p2)) == r_tuple - assert tuple(fitz.Rect(p1, 100, 200)) == r_tuple - assert tuple(fitz.Rect(10, 20, p2)) == r_tuple + assert tuple(pymupdf.Rect(p1, p2)) == r_tuple + assert tuple(pymupdf.Rect(p1, 100, 200)) == r_tuple + assert tuple(pymupdf.Rect(10, 20, p2)) == r_tuple assert tuple(r.include_point(p3)) == (10, 20, 150, 250) - r = fitz.Rect(10, 20, 100, 200) + r = pymupdf.Rect(10, 20, 100, 200) assert tuple(r.include_rect((100, 200, 110, 220))) == (10, 20, 110, 220) - r = fitz.Rect(10, 20, 100, 200) + r = pymupdf.Rect(10, 20, 100, 200) # include empty rect makes no change assert tuple(r.include_rect((0, 0, 0, 0))) == r_tuple # include invalid rect makes no change assert tuple(r.include_rect((1, 1, -1, -1))) == r_tuple - r = fitz.Rect() + r = pymupdf.Rect() for i in range(4): r[i] = i + 1 - assert r == fitz.Rect(1, 2, 3, 4) - assert fitz.Rect() / 5 == fitz.Rect() - assert fitz.Rect(1, 1, 2, 2) / fitz.Identity == fitz.Rect(1, 1, 2, 2) + assert r == pymupdf.Rect(1, 2, 3, 4) + assert pymupdf.Rect() / 5 == pymupdf.Rect() + assert pymupdf.Rect(1, 1, 2, 2) / pymupdf.Identity == pymupdf.Rect(1, 1, 2, 2) failed = False try: - r = fitz.Rect(1) + r = pymupdf.Rect(1) except: failed = True assert failed failed = False try: - r = fitz.Rect(1, 2, 3, 4, 5) + r = pymupdf.Rect(1, 2, 3, 4, 5) except: failed = True assert failed failed = False try: - r = fitz.Rect((1, 2, 3, 4, 5)) + r = pymupdf.Rect((1, 2, 3, 4, 5)) except: failed = True assert failed failed = False try: - r = fitz.Rect(1, 2, 3, "x") + r = pymupdf.Rect(1, 2, 3, "x") except: failed = True assert failed failed = False try: - r = fitz.Rect() + r = pymupdf.Rect() r[5] = 1 except: failed = True @@ -64,52 +69,52 @@ def test_rect(): def test_irect(): - p1 = fitz.Point(10, 20) - p2 = fitz.Point(100, 200) - p3 = fitz.Point(150, 250) - r = fitz.IRect(10, 20, 100, 200) + p1 = pymupdf.Point(10, 20) + p2 = pymupdf.Point(100, 200) + p3 = pymupdf.Point(150, 250) + r = pymupdf.IRect(10, 20, 100, 200) r_tuple = tuple(r) - assert tuple(fitz.IRect(p1, p2)) == r_tuple - assert tuple(fitz.IRect(p1, 100, 200)) == r_tuple - assert tuple(fitz.IRect(10, 20, p2)) == r_tuple + assert tuple(pymupdf.IRect(p1, p2)) == r_tuple + assert tuple(pymupdf.IRect(p1, 100, 200)) == r_tuple + assert tuple(pymupdf.IRect(10, 20, p2)) == r_tuple assert tuple(r.include_point(p3)) == (10, 20, 150, 250) - r = fitz.IRect(10, 20, 100, 200) + r = pymupdf.IRect(10, 20, 100, 200) assert tuple(r.include_rect((100, 200, 110, 220))) == (10, 20, 110, 220) - r = fitz.IRect(10, 20, 100, 200) + r = pymupdf.IRect(10, 20, 100, 200) # include empty rect makes no change assert tuple(r.include_rect((0, 0, 0, 0))) == r_tuple - r = fitz.IRect() + r = pymupdf.IRect() for i in range(4): r[i] = i + 1 - assert r == fitz.IRect(1, 2, 3, 4) + assert r == pymupdf.IRect(1, 2, 3, 4) failed = False try: - r = fitz.IRect(1) + r = pymupdf.IRect(1) except: failed = True assert failed failed = False try: - r = fitz.IRect(1, 2, 3, 4, 5) + r = pymupdf.IRect(1, 2, 3, 4, 5) except: failed = True assert failed failed = False try: - r = fitz.IRect((1, 2, 3, 4, 5)) + r = pymupdf.IRect((1, 2, 3, 4, 5)) except: failed = True assert failed failed = False try: - r = fitz.IRect(1, 2, 3, "x") + r = pymupdf.IRect(1, 2, 3, "x") except: failed = True assert failed failed = False try: - r = fitz.IRect() + r = pymupdf.IRect() r[5] = 1 except: failed = True @@ -118,117 +123,121 @@ def test_irect(): def test_inversion(): alpha = 255 - m1 = fitz.Matrix(alpha) - m2 = fitz.Matrix(-alpha) + m1 = pymupdf.Matrix(alpha) + m2 = pymupdf.Matrix(-alpha) m3 = m1 * m2 # should equal identity matrix - assert abs(m3 - fitz.Identity) < fitz.EPSILON - m = fitz.Matrix(1, 0, 1, 0, 1, 0) # not invertible! + assert abs(m3 - pymupdf.Identity) < pymupdf.EPSILON + m = pymupdf.Matrix(1, 0, 1, 0, 1, 0) # not invertible! # inverted matrix must be zero - assert ~m == fitz.Matrix() + assert ~m == pymupdf.Matrix() def test_matrix(): - assert tuple(fitz.Matrix()) == (0, 0, 0, 0, 0, 0) - m45p = fitz.Matrix(45) - m45m = fitz.Matrix(-45) - m90 = fitz.Matrix(90) - assert abs(m90 - m45p * m45p) < fitz.EPSILON - assert abs(fitz.Identity - m45p * m45m) < fitz.EPSILON - assert abs(m45p - ~m45m) < fitz.EPSILON - assert fitz.Matrix(2, 3, 1) == fitz.Matrix(1, 3, 2, 1, 0, 0) - m = fitz.Matrix(2, 3, 1) + assert tuple(pymupdf.Matrix()) == (0, 0, 0, 0, 0, 0) + assert tuple(pymupdf.Matrix(90)) == (0, 1, -1, 0, 0, 0) + if hasattr(pymupdf, 'mupdf'): + assert tuple(pymupdf.Matrix(c=1)) == (0, 0, 1, 0, 0, 0) + assert tuple(pymupdf.Matrix(90, e=5)) == (0, 1, -1, 0, 5, 0) + m45p = pymupdf.Matrix(45) + m45m = pymupdf.Matrix(-45) + m90 = pymupdf.Matrix(90) + assert abs(m90 - m45p * m45p) < pymupdf.EPSILON + assert abs(pymupdf.Identity - m45p * m45m) < pymupdf.EPSILON + assert abs(m45p - ~m45m) < pymupdf.EPSILON + assert pymupdf.Matrix(2, 3, 1) == pymupdf.Matrix(1, 3, 2, 1, 0, 0) + m = pymupdf.Matrix(2, 3, 1) m.invert() - assert abs(m * fitz.Matrix(2, 3, 1) - fitz.Identity) < fitz.EPSILON - assert fitz.Matrix(1, 1).pretranslate(2, 3) == fitz.Matrix(1, 0, 0, 1, 2, 3) - assert fitz.Matrix(1, 1).prescale(2, 3) == fitz.Matrix(2, 0, 0, 3, 0, 0) - assert fitz.Matrix(1, 1).preshear(2, 3) == fitz.Matrix(1, 3, 2, 1, 0, 0) - assert abs(fitz.Matrix(1, 1).prerotate(30) - fitz.Matrix(30)) < fitz.EPSILON + assert abs(m * pymupdf.Matrix(2, 3, 1) - pymupdf.Identity) < pymupdf.EPSILON + assert pymupdf.Matrix(1, 1).pretranslate(2, 3) == pymupdf.Matrix(1, 0, 0, 1, 2, 3) + assert pymupdf.Matrix(1, 1).prescale(2, 3) == pymupdf.Matrix(2, 0, 0, 3, 0, 0) + assert pymupdf.Matrix(1, 1).preshear(2, 3) == pymupdf.Matrix(1, 3, 2, 1, 0, 0) + assert abs(pymupdf.Matrix(1, 1).prerotate(30) - pymupdf.Matrix(30)) < pymupdf.EPSILON small = 1e-6 - assert fitz.Matrix(1, 1).prerotate(90 + small) == fitz.Matrix(90) - assert fitz.Matrix(1, 1).prerotate(180 + small) == fitz.Matrix(180) - assert fitz.Matrix(1, 1).prerotate(270 + small) == fitz.Matrix(270) - assert fitz.Matrix(1, 1).prerotate(small) == fitz.Matrix(0) - assert fitz.Matrix(1, 1).concat( - fitz.Matrix(1, 2), fitz.Matrix(3, 4) - ) == fitz.Matrix(3, 0, 0, 8, 0, 0) - assert fitz.Matrix(1, 2, 3, 4, 5, 6) / 1 == fitz.Matrix(1, 2, 3, 4, 5, 6) + assert pymupdf.Matrix(1, 1).prerotate(90 + small) == pymupdf.Matrix(90) + assert pymupdf.Matrix(1, 1).prerotate(180 + small) == pymupdf.Matrix(180) + assert pymupdf.Matrix(1, 1).prerotate(270 + small) == pymupdf.Matrix(270) + assert pymupdf.Matrix(1, 1).prerotate(small) == pymupdf.Matrix(0) + assert pymupdf.Matrix(1, 1).concat( + pymupdf.Matrix(1, 2), pymupdf.Matrix(3, 4) + ) == pymupdf.Matrix(3, 0, 0, 8, 0, 0) + assert pymupdf.Matrix(1, 2, 3, 4, 5, 6) / 1 == pymupdf.Matrix(1, 2, 3, 4, 5, 6) assert m[0] == m.a assert m[1] == m.b assert m[2] == m.c assert m[3] == m.d assert m[4] == m.e assert m[5] == m.f - m = fitz.Matrix() + m = pymupdf.Matrix() for i in range(6): m[i] = i + 1 - assert m == fitz.Matrix(1, 2, 3, 4, 5, 6) + assert m == pymupdf.Matrix(1, 2, 3, 4, 5, 6) failed = False try: - m = fitz.Matrix(1, 2, 3) + m = pymupdf.Matrix(1, 2, 3) except: failed = True assert failed failed = False try: - m = fitz.Matrix(1, 2, 3, 4, 5, 6, 7) + m = pymupdf.Matrix(1, 2, 3, 4, 5, 6, 7) except: failed = True assert failed failed = False try: - m = fitz.Matrix((1, 2, 3, 4, 5, 6, 7)) + m = pymupdf.Matrix((1, 2, 3, 4, 5, 6, 7)) except: failed = True assert failed failed = False try: - m = fitz.Matrix(1, 2, 3, 4, 5, "x") + m = pymupdf.Matrix(1, 2, 3, 4, 5, "x") except: failed = True assert failed failed = False try: - m = fitz.Matrix(1, 0, 1, 0, 1, 0) - n = fitz.Matrix(1, 1) / m + m = pymupdf.Matrix(1, 0, 1, 0, 1, 0) + n = pymupdf.Matrix(1, 1) / m except: failed = True assert failed def test_point(): - assert tuple(fitz.Point()) == (0, 0) - assert fitz.Point(1, -1).unit == fitz.Point(5, -5).unit - assert fitz.Point(-1, -1).abs_unit == fitz.Point(1, 1).unit - assert fitz.Point(1, 1).distance_to(fitz.Point(1, 1)) == 0 - assert fitz.Point(1, 1).distance_to(fitz.Rect(1, 1, 2, 2)) == 0 - assert fitz.Point().distance_to((1, 1, 2, 2)) > 0 + assert tuple(pymupdf.Point()) == (0, 0) + assert pymupdf.Point(1, -1).unit == pymupdf.Point(5, -5).unit + assert pymupdf.Point(-1, -1).abs_unit == pymupdf.Point(1, 1).unit + assert pymupdf.Point(1, 1).distance_to(pymupdf.Point(1, 1)) == 0 + assert pymupdf.Point(1, 1).distance_to(pymupdf.Rect(1, 1, 2, 2)) == 0 + assert pymupdf.Point().distance_to((1, 1, 2, 2)) > 0 failed = False try: - p = fitz.Point(1, 2, 3) + p = pymupdf.Point(1, 2, 3) except: failed = True assert failed failed = False try: - p = fitz.Point((1, 2, 3)) + p = pymupdf.Point((1, 2, 3)) except: failed = True assert failed failed = False try: - p = fitz.Point(1, "x") + p = pymupdf.Point(1, "x") except: failed = True assert failed failed = False try: - p = fitz.Point() + p = pymupdf.Point() p[3] = 1 except: failed = True @@ -236,25 +245,25 @@ def test_point(): def test_algebra(): - p = fitz.Point(1, 2) - m = fitz.Matrix(1, 2, 3, 4, 5, 6) - r = fitz.Rect(1, 1, 2, 2) + p = pymupdf.Point(1, 2) + m = pymupdf.Matrix(1, 2, 3, 4, 5, 6) + r = pymupdf.Rect(1, 1, 2, 2) assert p + p == p * 2 - assert p - p == fitz.Point() + assert p - p == pymupdf.Point() assert m + m == m * 2 - assert m - m == fitz.Matrix() + assert m - m == pymupdf.Matrix() assert r + r == r * 2 - assert r - r == fitz.Rect() - assert p + 5 == fitz.Point(6, 7) - assert m + 5 == fitz.Matrix(6, 7, 8, 9, 10, 11) + assert r - r == pymupdf.Rect() + assert p + 5 == pymupdf.Point(6, 7) + assert m + 5 == pymupdf.Matrix(6, 7, 8, 9, 10, 11) assert r.tl in r assert r.tr not in r assert r.br not in r assert r.bl not in r - assert p * m == fitz.Point(12, 16) - assert r * m == fitz.Rect(9, 12, 13, 18) - assert (fitz.Rect(1, 1, 2, 2) & fitz.Rect(2, 2, 3, 3)).is_empty - assert not fitz.Rect(1, 1, 2, 2).intersects((2, 2, 4, 4)) + assert p * m == pymupdf.Point(12, 16) + assert r * m == pymupdf.Rect(9, 12, 13, 18) + assert (pymupdf.Rect(1, 1, 2, 2) & pymupdf.Rect(2, 2, 3, 3)).is_empty + assert not pymupdf.Rect(1, 1, 2, 2).intersects((2, 2, 4, 4)) failed = False try: x = m + p @@ -283,12 +292,12 @@ def test_algebra(): def test_quad(): - r = fitz.Rect(10, 10, 20, 20) + r = pymupdf.Rect(10, 10, 20, 20) q = r.quad assert q.is_rectangular assert not q.is_empty assert q.is_convex - q *= fitz.Matrix(1, 1).preshear(2, 3) + q *= pymupdf.Matrix(1, 1).preshear(2, 3) assert not q.is_rectangular assert not q.is_empty assert q.is_convex @@ -297,7 +306,7 @@ def test_quad(): assert r.quad not in q failed = False try: - q[5] = fitz.Point() + q[5] = pymupdf.Point() except: failed = True assert failed @@ -312,7 +321,7 @@ def test_quad(): def test_pageboxes(): """Tests concerning ArtBox, TrimBox, BleedBox.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() assert page.cropbox == page.artbox == page.bleedbox == page.trimbox rect_methods = ( @@ -322,9 +331,18 @@ def test_pageboxes(): page.set_trimbox, ) keys = ("CropBox", "ArtBox", "BleedBox", "TrimBox") - rect = fitz.Rect(100, 200, 400, 700) + rect = pymupdf.Rect(100, 200, 400, 700) for f in rect_methods: f(rect) for key in keys: assert doc.xref_get_key(page.xref, key) == ("array", "[100 142 400 642]") assert page.cropbox == page.artbox == page.bleedbox == page.trimbox + +def test_3163(): + b = {'number': 0, 'type': 0, 'bbox': (403.3577880859375, 330.8871765136719, 541.2731323242188, 349.5766296386719), 'lines': [{'spans': [{'size': 14.0, 'flags': 4, 'font': 'SFHello-Medium', 'color': 1907995, 'ascender': 1.07373046875, 'descender': -0.26123046875, 'text': 'Inclusion and diversity', 'origin': (403.3577880859375, 345.9194030761719), 'bbox': (403.3577880859375, 330.8871765136719, 541.2731323242188, 349.5766296386719)}], 'wmode': 0, 'dir': (1.0, 0.0), 'bbox': (403.3577880859375, 330.8871765136719, 541.2731323242188, 349.5766296386719)}]} + bbox = pymupdf.IRect(b["bbox"]) + +def test_3182(): + pix = pymupdf.Pixmap(os.path.abspath(f'{__file__}/../../tests/resources/img-transparent.png')) + rect = pymupdf.Rect(0, 0, 100, 100) + pix.invert_irect(rect) diff --git a/tests/test_imagebbox.py b/tests/test_imagebbox.py index 07653a849..e62b79f5c 100644 --- a/tests/test_imagebbox.py +++ b/tests/test_imagebbox.py @@ -7,12 +7,12 @@ """ import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "image-file1.pdf") image = os.path.join(scriptdir, "resources", "img-transparent.png") -doc = fitz.open(filename) +doc = pymupdf.open(filename) def test_image_bbox(): @@ -22,6 +22,7 @@ def test_image_bbox(): for item in imglist: bbox_list.append(page.get_image_bbox(item, transform=False)) infos = page.get_image_info(xrefs=True) + match = False for im in infos: bbox1 = im["bbox"] match = False @@ -34,7 +35,7 @@ def test_image_bbox(): def test_bboxlog(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() xref = page.insert_image(page.rect, filename=image) img_info = page.get_image_info(xrefs=True) diff --git a/tests/test_imagemasks.py b/tests/test_imagemasks.py new file mode 100644 index 000000000..f4881ab3e --- /dev/null +++ b/tests/test_imagemasks.py @@ -0,0 +1,31 @@ +""" +Confirm image mask detection in TextPage extractions. +""" + +import os + +import pymupdf + +scriptdir = os.path.abspath(os.path.dirname(__file__)) +filename1 = os.path.join(scriptdir, "resources", "img-regular.pdf") +filename2 = os.path.join(scriptdir, "resources", "img-transparent.pdf") + + +def test_imagemask1(): + doc = pymupdf.open(filename1) + page = doc[0] + blocks = page.get_text("dict")["blocks"] + img = blocks[0] + assert img["mask"] is None + img = page.get_image_info()[0] + assert img["has-mask"] is False + + +def test_imagemask2(): + doc = pymupdf.open(filename2) + page = doc[0] + blocks = page.get_text("dict")["blocks"] + img = blocks[0] + assert type(img["mask"]) is bytes + img = page.get_image_info()[0] + assert img["has-mask"] is True diff --git a/tests/test_import.py b/tests/test_import.py new file mode 100644 index 000000000..abc60865a --- /dev/null +++ b/tests/test_import.py @@ -0,0 +1,22 @@ +import os +import subprocess +import sys +import textwrap + + +def test_import(): + if os.environ.get('PYODIDE_ROOT'): + print('test_import(): not running on Pyodide - cannot run child processes.') + return + + root = os.path.abspath(f'{__file__}/../../') + p = f'{root}/tests/resources_test_import.py' + with open(p, 'w') as f: + f.write(textwrap.dedent( + ''' + from pymupdf.utils import * + from pymupdf.table import * + from pymupdf import * + ''' + )) + subprocess.run(f'{sys.executable} {p}', shell=1, check=1) diff --git a/tests/test_insertimage.py b/tests/test_insertimage.py index 45f522547..32d77a282 100644 --- a/tests/test_insertimage.py +++ b/tests/test_insertimage.py @@ -6,22 +6,61 @@ import json import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) imgfile = os.path.join(scriptdir, "resources", "nur-ruhig.jpg") def test_insert(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - r1 = fitz.Rect(50, 50, 100, 100) - r2 = fitz.Rect(50, 150, 200, 400) + r1 = pymupdf.Rect(50, 50, 100, 100) + r2 = pymupdf.Rect(50, 150, 200, 400) page.insert_image(r1, filename=imgfile) page.insert_image(r2, filename=imgfile, rotate=270) info_list = page.get_image_info() assert len(info_list) == 2 - bbox1 = fitz.Rect(info_list[0]["bbox"]) - bbox2 = fitz.Rect(info_list[1]["bbox"]) + bbox1 = pymupdf.Rect(info_list[0]["bbox"]) + bbox2 = pymupdf.Rect(info_list[1]["bbox"]) assert bbox1 in r1 assert bbox2 in r2 + +def test_compress(): + document = pymupdf.open(f'{scriptdir}/resources/2.pdf') + document_new = pymupdf.open() + for page in document: + pixmap = page.get_pixmap( + colorspace=pymupdf.csRGB, + dpi=72, + annots=False, + ) + page_new = document_new.new_page(-1) + page_new.insert_image(rect=page_new.bound(), pixmap=pixmap) + document_new.save( + f'{scriptdir}/resources/2.pdf.compress.pdf', + garbage=3, + deflate=True, + deflate_images=True, + deflate_fonts=True, + pretty=True, + ) + +def test_3087(): + path = os.path.abspath(f'{__file__}/../../tests/resources/test_3087.pdf') + + doc = pymupdf.open(path) + page = doc[0] + print(page.get_images()) + base = doc.extract_image(5)["image"] + mask = doc.extract_image(5)["image"] + page = doc.new_page() + page.insert_image(page.rect, stream=base, mask=mask) + + doc = pymupdf.open(path) + page = doc[0] + print(page.get_images()) + base = doc.extract_image(5)["image"] + mask = doc.extract_image(6)["image"] + page = doc.new_page() + page.insert_image(page.rect, stream=base, mask=mask) diff --git a/tests/test_insertpdf.py b/tests/test_insertpdf.py index 75131e17c..9689beb8c 100644 --- a/tests/test_insertpdf.py +++ b/tests/test_insertpdf.py @@ -5,9 +5,12 @@ - must have different trailers * Try inserting files in a loop. """ + +import io import os import re -import fitz +import pymupdf +from pymupdf import mupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) resources = os.path.join(scriptdir, "resources") @@ -52,41 +55,41 @@ def approx_compare( a, b, max_delta): if ret: print( f'Differ:\n a={a!r}\n b={b!r}') return ret - + def test_insert(): all_text_original = [] # text on input pages all_text_combined = [] # text on resulting output pages # prepare input PDFs - doc1 = fitz.open() + doc1 = pymupdf.open() for i in range(5): # just arbitrary number of pages text = f"doc 1, page {i}" # the 'globally' unique text page = doc1.new_page() page.insert_text((100, 72), text) all_text_original.append(text) - doc2 = fitz.open() + doc2 = pymupdf.open() for i in range(4): text = f"doc 2, page {i}" page = doc2.new_page() page.insert_text((100, 72), text) all_text_original.append(text) - doc3 = fitz.open() + doc3 = pymupdf.open() for i in range(3): text = f"doc 3, page {i}" page = doc3.new_page() page.insert_text((100, 72), text) all_text_original.append(text) - doc4 = fitz.open() + doc4 = pymupdf.open() for i in range(6): text = f"doc 4, page {i}" page = doc4.new_page() page.insert_text((100, 72), text) all_text_original.append(text) - new_doc = fitz.open() # make combined PDF of input files + new_doc = pymupdf.open() # make combined PDF of input files new_doc.insert_pdf(doc1) new_doc.insert_pdf(doc2) new_doc.insert_pdf(doc3) @@ -101,15 +104,232 @@ def test_insert(): def test_issue1417_insertpdf_in_loop(): """Using a context manager instead of explicitly closing files""" f = os.path.join(resources, "1.pdf") - big_doc = fitz.open() + big_doc = pymupdf.open() fd1 = os.open( f, os.O_RDONLY) os.close( fd1) for n in range(0, 1025): - with fitz.open(f) as pdf: + with pymupdf.open(f) as pdf: big_doc.insert_pdf(pdf) - # Create a raw file descriptor. If the above fitz.open() context leaks + # Create a raw file descriptor. If the above pymupdf.open() context leaks # a file descriptor, fd will be seen to increment. fd2 = os.open( f, os.O_RDONLY) assert fd2 == fd1 os.close( fd2) big_doc.close() + + +def _test_insert_adobe(): + path = os.path.abspath( f'{__file__}/../../../PyMuPDF-performance/adobe.pdf') + if not os.path.exists(path): + print(f'Not running test_insert_adobe() because does not exist: {os.path.relpath(path)}') + return + a = pymupdf.Document() + b = pymupdf.Document(path) + a.insert_pdf(b) + + +def _2861_2871_merge_pdf(content: bytes, coverpage: bytes): + with pymupdf.Document(stream=coverpage, filetype="pdf") as coverpage_pdf: + with pymupdf.Document(stream=content, filetype="pdf") as content_pdf: + coverpage_pdf.insert_pdf(content_pdf) + doc = coverpage_pdf.write() + return doc + +def test_2861(): + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2861.pdf') + with open(path, "rb") as content_pdf: + with open(path, "rb") as coverpage_pdf: + content = content_pdf.read() + coverpage = coverpage_pdf.read() + _2861_2871_merge_pdf(content, coverpage) + +def test_2871(): + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2871.pdf') + with open(path, "rb") as content_pdf: + with open(path, "rb") as coverpage_pdf: + content = content_pdf.read() + coverpage = coverpage_pdf.read() + _2861_2871_merge_pdf(content, coverpage) + + +def test_3789(): + + file_path = os.path.abspath(f'{__file__}/../../tests/resources/test_3789.pdf') + result_path = os.path.abspath(f'{__file__}/../../tests/test_3789_out') + pages_per_split = 5 + + # Clean pdf + doc = pymupdf.open(file_path) + tmp = io.BytesIO() + tmp.write(doc.write(garbage=4, deflate=True)) + + source_doc = pymupdf.Document('pdf', tmp.getvalue()) + tmp.close() + + # Calculate the number of pages per split file and the number of split files + page_range = pages_per_split - 1 + split_range = range(0, source_doc.page_count, pages_per_split) + num_splits = len(split_range) + + # Loop through each split range and create a new PDF file + for i, start in enumerate(split_range): + output_doc = pymupdf.open() + + # Determine the ending page for this split file + to_page = start + page_range if i < num_splits - 1 else -1 + output_doc.insert_pdf(source_doc, from_page=start, to_page=to_page) + + # Save the output document to a file and add the path to the list of split files + path = f'{result_path}_{i}.pdf' + output_doc.save(path, garbage=2) + print(f'Have saved to {path=}.') + + # If this is the last split file, exit the loop + if to_page == -1: + break + + +def test_widget_insert(): + """Confirm copy of form fields / widgets.""" + tar = pymupdf.open(os.path.join(resources, "merge-form1.pdf")) + pc0 = tar.page_count # for later assertion + src = pymupdf.open(os.path.join(resources, "interfield-calculation.pdf")) + pc1 = src.page_count # for later assertion + + tarpdf = pymupdf._as_pdf_document(tar) + tar_field_count = mupdf.pdf_array_len( + mupdf.pdf_dict_getp(mupdf.pdf_trailer(tarpdf), "Root/AcroForm/Fields") + ) + tar_co_count = mupdf.pdf_array_len( + mupdf.pdf_dict_getp(mupdf.pdf_trailer(tarpdf), "Root/AcroForm/CO") + ) + srcpdf = pymupdf._as_pdf_document(src) + src_field_count = mupdf.pdf_array_len( + mupdf.pdf_dict_getp(mupdf.pdf_trailer(srcpdf), "Root/AcroForm/Fields") + ) + src_co_count = mupdf.pdf_array_len( + mupdf.pdf_dict_getp(mupdf.pdf_trailer(srcpdf), "Root/AcroForm/CO") + ) + + tar.insert_pdf(src) + new_field_count = mupdf.pdf_array_len( + mupdf.pdf_dict_getp(mupdf.pdf_trailer(tarpdf), "Root/AcroForm/Fields") + ) + new_co_count = mupdf.pdf_array_len( + mupdf.pdf_dict_getp(mupdf.pdf_trailer(tarpdf), "Root/AcroForm/CO") + ) + assert tar.page_count == pc0 + pc1 + assert new_field_count == tar_field_count + src_field_count + assert new_co_count == tar_co_count + src_co_count + + +def names_and_kids(doc): + """Return a list of dictionaries with keys "name" and "kids". + + "name" is the name of a root field in "Root/AcroForm/Fields", and + "kids" is the count of its immediate children. + """ + rc = [] + pdf = pymupdf._as_pdf_document(doc) + fields = mupdf.pdf_dict_getl( + mupdf.pdf_trailer(pdf), + pymupdf.PDF_NAME("Root"), + pymupdf.PDF_NAME("AcroForm"), + pymupdf.PDF_NAME("Fields"), + ) + if not fields.pdf_is_array(): + return rc + root_count = fields.pdf_array_len() + if not root_count: + return rc + for i in range(root_count): + field = fields.pdf_array_get(i) + kids = field.pdf_dict_get(pymupdf.PDF_NAME("Kids")) + kid_count = kids.pdf_array_len() + T = field.pdf_dict_get_text_string(pymupdf.PDF_NAME("T")) + field_dict = {"name": T, "kids": kid_count} + rc.append(field_dict) + return rc + + +def test_merge_checks1(): + """Merge Form PDFs making any duplicate names unique.""" + merge_file1 = os.path.join(resources, "merge-form1.pdf") + merge_file2 = os.path.join(resources, "merge-form2.pdf") + tar = pymupdf.open(merge_file1) + rc0 = names_and_kids(tar) + src = pymupdf.open(merge_file2) + rc1 = names_and_kids(src) + tar.insert_pdf(src, join_duplicates=False) + rc2 = names_and_kids(tar) + assert len(rc2) == len(rc0) + len(rc1) + + +def test_merge_checks2(): + # Join / merge Form PDFs joining any duplicate names in the src PDF. + merge_file1 = os.path.join(resources, "merge-form1.pdf") + merge_file2 = os.path.join(resources, "merge-form2.pdf") + tar = pymupdf.open(merge_file1) + rc0 = names_and_kids(tar) # list of root names and kid counts + names0 = [itm["name"] for itm in rc0] # root names in target + kids0 = sum([itm["kids"] for itm in rc0]) # number of kids in target + + src = pymupdf.open(merge_file2) + rc1 = names_and_kids(src) # list of root namesand kids in source PDF + dup_count = 0 # counts duplicate names in source PDF + dup_kids = 0 # counts the expected kids after merge + + for itm in rc1: # walk root fields of source pdf + if itm["name"] not in names0: # not a duplicate name + continue + # if target field has kids, add their count, else add 1 + dup_kids0 = sum([i["kids"] for i in rc0 if i["name"] == itm["name"]]) + dup_kids += dup_kids0 if dup_kids0 else 1 + # if source field has kids add their count, else add 1 + dup_kids += itm["kids"] if itm["kids"] else 1 + + names1 = [itm["name"] for itm in rc1] # names in source + + tar.insert_pdf(src, join_duplicates=True) # join merging any duplicate names + + rc2 = names_and_kids(tar) # get names and kid counts in resulting PDF + names2 = [itm["name"] for itm in rc2] # resulting names in target + kids2 = sum([itm["kids"] for itm in rc2]) # total resulting kid count + + assert len(set(names0 + names1)) == len(names2) + assert kids2 == dup_kids + + +test_4412_path = os.path.normpath(f'{__file__}/../../tests/resources/test_4412.pdf') + +def test_4412(): + # This tests whether a page from a PDF containing widgets found in the wild + # can be inserted into a new document with default options (widget=True) + # and widget=False. + print() + for widget in True, False: + print(f'{widget=}', flush=1) + with pymupdf.open(test_4412_path) as doc, pymupdf.open() as new_doc: + buf = io.BytesIO() + new_doc.insert_pdf(doc, from_page=1, to_page=1) + new_doc.save(buf) + assert len(new_doc)==1 + + +def test_4571(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4571.pdf') + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4571_out.pdf') + with pymupdf.open() as newdocument: + with pymupdf.open(path) as document: + newdocument.insert_pdf(document) + newdocument.save(path_out, garbage=4, clean=False) + print(f'Have saved to: {path_out=}') + with open(path_out, 'rb') as f: + content = f.read() + if pymupdf.mupdf_version_tuple >= (1, 26, 6): + # Correct. + assert b'<>' in content + else: + # Incorrect. + assert b'<>' in content + diff --git a/tests/test_linebreaks.py b/tests/test_linebreaks.py new file mode 100644 index 000000000..3085faa25 --- /dev/null +++ b/tests/test_linebreaks.py @@ -0,0 +1,16 @@ +import pymupdf + +import os.path + + +def test_linebreaks(): + """Test avoidance of linebreaks.""" + path = os.path.abspath(f"{__file__}/../../tests/resources/test-linebreaks.pdf") + doc = pymupdf.open(path) + page = doc[0] + tp = page.get_textpage(flags=pymupdf.TEXTFLAGS_WORDS) + word_count = len(page.get_text("words", textpage=tp)) + line_count1 = len(page.get_text(textpage=tp).splitlines()) + line_count2 = len(page.get_text(sort=True, textpage=tp).splitlines()) + assert word_count == line_count1 + assert line_count2 < line_count1 / 2 diff --git a/tests/test_linequad.py b/tests/test_linequad.py index 9f15116d1..603f9d92b 100644 --- a/tests/test_linequad.py +++ b/tests/test_linequad.py @@ -4,7 +4,7 @@ """ import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "quad-calc-0.pdf") @@ -12,14 +12,14 @@ def test_quadcalc(): text = " angle 327" # search for this text - doc = fitz.open(filename) + doc = pymupdf.open(filename) page = doc[0] # This special page has one block with one line, and # its last span contains the searched text. block = page.get_text("dict", flags=0)["blocks"][0] line = block["lines"][0] # compute quad of last span in line - lineq = fitz.recover_line_quad(line, spans=line["spans"][-1:]) + lineq = pymupdf.recover_line_quad(line, spans=line["spans"][-1:]) # let text search find the text returning quad coordinates rl = page.search_for(text, quads=True) diff --git a/tests/test_memory.py b/tests/test_memory.py new file mode 100644 index 000000000..cfaccbd92 --- /dev/null +++ b/tests/test_memory.py @@ -0,0 +1,240 @@ +import pymupdf + +import gc +import os +import platform +import sys + + +def merge_pdf(content: bytes, coverpage: bytes): + with pymupdf.Document(stream=coverpage, filetype='pdf') as coverpage_pdf: + with pymupdf.Document(stream=content, filetype='pdf') as content_pdf: + coverpage_pdf.insert_pdf(content_pdf) + doc = coverpage_pdf.write() + return doc + +def test_2791(): + ''' + Check for memory leaks. + ''' + if os.environ.get('PYODIDE_ROOT'): + print('test_2791(): not running on Pyodide - No module named \'psutil\'.') + return + + if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': + print(f'test_2791(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') + return + if platform.system().startswith('MSYS_NT-'): + print(f'test_2791(): not running on msys2 - psutil not available.') + return + #stat_type = 'tracemalloc' + stat_type = 'psutil' + if stat_type == 'tracemalloc': + import tracemalloc + tracemalloc.start(10) + def get_stat(): + current, peak = tracemalloc.get_traced_memory() + return current + elif stat_type == 'psutil': + # We use RSS, as used by mprof. + import psutil + process = psutil.Process() + def get_stat(): + return process.memory_info().rss + else: + def get_stat(): + return 0 + n = 1000 + verbose = False + if platform.python_implementation() == 'GraalVM': + n = 10 + verbose = True + stats = [1] * n + for i in range(n): + if verbose: + print(f'{i+1}/{n}.', flush=1) + root = os.path.abspath(f'{__file__}/../../tests/resources') + with open(f'{root}/test_2791_content.pdf', 'rb') as content_pdf: + with open(f'{root}/test_2791_coverpage.pdf', 'rb') as coverpage_pdf: + content = content_pdf.read() + coverpage = coverpage_pdf.read() + merge_pdf(content, coverpage) + sys.stdout.flush() + + gc.collect() + stats[i] = get_stat() + + print(f'Memory usage {stat_type=}.') + for i, stat in enumerate(stats): + sys.stdout.write(f' {stat}') + #print(f' {i}: {stat}') + sys.stdout.write('\n') + first = stats[2] + last = stats[-1] + ratio = last / first + print(f'{first=} {last=} {ratio=}') + + if platform.system() != 'Linux': + # Values from psutil indicate larger memory leaks on non-Linux. Don't + # yet know whether this is because rss is measured differently or a + # genuine leak is being exposed. + print(f'test_2791(): not asserting ratio because not running on Linux.') + elif not hasattr(pymupdf, 'mupdf'): + # Classic implementation has unfixed leaks. + print(f'test_2791(): not asserting ratio because using classic implementation.') + elif [int(x) for x in platform.python_version_tuple()[:2]] < [3, 11]: + print(f'test_2791(): not asserting ratio because python version less than 3.11: {platform.python_version()=}.') + elif stat_type == 'tracemalloc': + # With tracemalloc Before fix to src/extra.i's calls to + # PyObject_CallMethodObjArgs, ratio was 4.26; after it was 1.40. + assert ratio > 1 and ratio < 1.6 + elif stat_type == 'psutil': + # Prior to fix, ratio was 1.043. After the fix, improved to 1.005, but + # varies and sometimes as high as 1.010. + # 2024-06-03: have seen 0.99919 on musl linux, and sebras reports .025. + assert ratio >= 0.990 and ratio < 1.027, f'{ratio=}' + else: + pass + + +def test_4090(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4090(): not running on Pyodide - No module named \'psutil\'.') + return + + print(f'test_4090(): {os.environ.get("PYTHONMALLOC")=}.') + import psutil + process = psutil.Process() + rsss = list() + def rss(): + ret = process.memory_info().rss + rsss.append(ret) + return ret + + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4090.pdf') + for i in range(100): + d = dict() + d[i] = dict() + with pymupdf.open(path) as document: + for j, page in enumerate(document): + d[i][j] = page.get_text('rawdict') + print(f'test_4090(): {i}: {rss()=}') + print(f'test_4090(): {rss()=}') + gc.collect() + print(f'test_4090(): {rss()=}') + r1 = rsss[2] + r2 = rsss[-1] + r = r2 / r1 + if platform.system() == 'Windows': + assert 0.93 <= r < 1.05, f'{r1=} {r2=} {r=}.' + else: + assert 0.95 <= r < 1.05, f'{r1=} {r2=} {r=}.' + + +def show_tracemalloc_diff(snapshot1, snapshot2): + top_stats = snapshot2.compare_to(snapshot1, 'lineno') + n = 0 + mem = 0 + for i in top_stats: + n += i.count + mem += i.size + print(f'{n=}') + print(f'{mem=}') + print("Top 10:") + for stat in top_stats[:10]: + print(f' {stat}') + snapshot_diff = snapshot2.compare_to(snapshot1, key_type='lineno') + print(f'snapshot_diff:') + count_diff = 0 + size_diff = 0 + for i, s in enumerate(snapshot_diff): + print(f' {i}: {s.count=} {s.count_diff=} {s.size=} {s.size_diff=} {s.traceback=}') + count_diff += s.count_diff + size_diff += s.size_diff + print(f'{count_diff=} {size_diff=}') + + + +def test_4125(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4125(): not running on Pyodide - No module named \'psutil\'.') + return + + if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': + print(f'test_4125(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') + return + if platform.system().startswith('MSYS_NT-'): + print(f'test_4125(): not running on msys2 - psutil not available.') + return + + print('') + print(f'test_4125(): {platform.python_version()=}.') + + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4125.pdf') + import gc + import psutil + + root = os.path.normpath(f'{__file__}/../..') + sys.path.insert(0, root) + try: + import pipcl + finally: + del sys.path[0] + + process = psutil.Process() + + class State: pass + state = State() + state.rsss = list() + state.prev = None + + def get_stat(): + rss = process.memory_info().rss + if not state.rsss: + state.prev = rss + state.rsss.append(rss) + drss = rss - state.prev + state.prev = rss + print(f'test_4125():' + f' {rss=:,}' + f' rss-rss0={rss-state.rsss[0]:,}' + f' drss={drss:,}' + f'.' + ) + + for i in range(10): + with pymupdf.open(path) as document: + for page in document: + for image_info in page.get_images(full=True): + xref, smask, width, height, bpc, colorspace, alt_colorspace, name, filter_, referencer = image_info + pixmap = pymupdf.Pixmap(document, xref) + if pixmap.colorspace != pymupdf.csRGB: + pixmap2 = pymupdf.Pixmap(pymupdf.csRGB, pixmap) + del pixmap2 + del pixmap + pymupdf.TOOLS.store_shrink(100) + pymupdf.TOOLS.glyph_cache_empty() + gc.collect() + get_stat() + + if platform.system() == 'Linux': + rss_delta = state.rsss[-1] - state.rsss[3] + print(f'{rss_delta=}') + pv = platform.python_version_tuple() + pv = (int(pv[0]), int(pv[1])) + if pv < (3, 11): + # Python < 3.11 has less reliable memory usage so we exclude. + print(f'test_4125(): Not checking on {platform.python_version()=} because < 3.11.') + elif pymupdf.mupdf_version_tuple < (1, 25, 2): + rss_delta_expected = 4915200 * (len(state.rsss) - 3) + assert abs(1 - rss_delta / rss_delta_expected) < 0.15, f'{rss_delta_expected=}' + else: + # Before the fix, each iteration would leak 4.9MB. + rss_delta_max = 100*1000 * (len(state.rsss) - 3) + assert rss_delta < rss_delta_max + else: + # Unfortunately on non-Linux Github test machines the RSS values seem + # to vary a lot, which causes spurious test failures. So for at least + # we don't actually check. + # + print(f'Not checking results because non-Linux behaviour is too variable.') diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 9538ef994..9a5ef8ca4 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -4,13 +4,14 @@ """ import json import os +import sys -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "001003ED.pdf") metafile = os.path.join(scriptdir, "resources", "metadata.txt") -doc = fitz.open(filename) +doc = pymupdf.open(filename) def test_metadata(): @@ -24,3 +25,19 @@ def test_erase_meta(): statement1 = doc.xref_get_key(-1, "Info")[1] == "null" statement2 = "Info" not in doc.xref_get_keys(-1) assert statement2 or statement1 + + +def test_3237(): + filename = os.path.abspath(f'{__file__}/../../tests/resources/001003ED.pdf') + with pymupdf.open(filename) as doc: + # We need to explicitly encode in utf8 on windows. + metadata1 = doc.metadata + metadata1 = repr(metadata1).encode('utf8') + doc.set_metadata({}) + + metadata2 = doc.metadata + metadata2 = repr(metadata2).encode('utf8') + print(f'{metadata1=}') + print(f'{metadata2=}') + assert metadata1 == b'{\'format\': \'PDF 1.6\', \'title\': \'RUBRIK_Editorial_01-06.indd\', \'author\': \'Natalie Schaefer\', \'subject\': \'\', \'keywords\': \'\', \'creator\': \'\', \'producer\': \'Acrobat Distiller 7.0.5 (Windows)\', \'creationDate\': "D:20070113191400+01\'00\'", \'modDate\': "D:20070120104154+01\'00\'", \'trapped\': \'\', \'encryption\': None}' + assert metadata2 == b"{'format': 'PDF 1.6', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': '', 'producer': '', 'creationDate': '', 'modDate': '', 'trapped': '', 'encryption': None}" diff --git a/tests/test_mupdf_regressions.py b/tests/test_mupdf_regressions.py new file mode 100644 index 000000000..8816260f7 --- /dev/null +++ b/tests/test_mupdf_regressions.py @@ -0,0 +1,98 @@ +import pymupdf +import os +import gentle_compare + +scriptdir = os.path.abspath(os.path.dirname(__file__)) + + +def test_707448(): + """Confirm page content cleaning does not destroy page appearance.""" + filename = os.path.join(scriptdir, "resources", "test-707448.pdf") + doc = pymupdf.open(filename) + page = doc[0] + words0 = page.get_text("words") + page.clean_contents(sanitize=True) + words1 = page.get_text("words") + assert gentle_compare.gentle_compare(words0, words1) + + +def test_707673(): + """Confirm page content cleaning does not destroy page appearance. + + Fails starting with MuPDF v1.23.9. + + Fixed in: + commit 779b8234529cb82aa1e92826854c7bb98b19e44b (golden/master) + """ + filename = os.path.join(scriptdir, "resources", "test-707673.pdf") + doc = pymupdf.open(filename) + page = doc[0] + words0 = page.get_text("words") + page.clean_contents(sanitize=True) + words1 = page.get_text("words") + ok = gentle_compare.gentle_compare(words0, words1) + assert ok + + +def test_707727(): + """Confirm page content cleaning does not destroy page appearance. + + MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707727 + """ + filename = os.path.join(scriptdir, "resources", "test_3362.pdf") + doc = pymupdf.open(filename) + page = doc[0] + pix0 = page.get_pixmap() + page.clean_contents(sanitize=True) + page = doc.reload_page(page) # required to prevent re-use + pix1 = page.get_pixmap() + rms = gentle_compare.pixmaps_rms(pix0, pix1) + print(f'{rms=}', flush=1) + pix0.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix0.png')) + pix1.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix1.png')) + if pymupdf.mupdf_version_tuple >= (1, 25, 2): + # New sanitising gives small fp rounding errors. + assert rms < 0.05 + else: + assert rms == 0 + + +def test_707721(): + """Confirm text extraction works for nested MCID with Type 3 fonts. + PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3357 + MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707721 + """ + filename = os.path.join(scriptdir, "resources", "test_3357.pdf") + doc = pymupdf.open(filename) + page = doc[0] + ok = page.get_text() + assert ok + + +def test_3376(): + """Check fix of MuPDF bug 707733. + + https://bugs.ghostscript.com/show_bug.cgi?id=707733 + PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3376 + + Test file contains a redaction for the first 3 words: "Table of Contents". + Test strategy: + - extract all words (sorted) + - apply redactions + - extract words again + - confirm: we now have 3 words less and remaining words are equal. + """ + filename = os.path.join(scriptdir, "resources", "test_3376.pdf") + doc = pymupdf.open(filename) + page = doc[0] + words0 = page.get_text("words", sort=True) + words0_s = words0[:3] # first 3 words + words0_e = words0[3:] # remaining words + assert " ".join([w[4] for w in words0_s]) == "Table of Contents" + + page.apply_redactions() + + words1 = page.get_text("words", sort=True) + + ok = gentle_compare.gentle_compare(words0_e, words1) + assert ok diff --git a/tests/test_named_links.py b/tests/test_named_links.py new file mode 100644 index 000000000..0ff070da4 --- /dev/null +++ b/tests/test_named_links.py @@ -0,0 +1,105 @@ +import pymupdf + +import os + + +def test_2886(): + """Confirm correct insertion of a 'named' link.""" + if not hasattr(pymupdf, "mupdf"): + print(f"test_2886(): not running on classic.") + return + + path = os.path.abspath(f"{__file__}/../../tests/resources/cython.pdf") + doc = pymupdf.open(path) + # name "Doc-Start" is a valid named destination in that file + link = { + "kind": pymupdf.LINK_NAMED, + "from": pymupdf.Rect(0, 0, 50, 50), + "name": "Doc-Start", + } + # insert this link in an arbitrary page & rect + page = doc[-1] + page.insert_link(link) + # need this to update the internal MuPDF annotations array + page = doc.reload_page(page) + + # our new link must be the last in the following list + links = page.get_links() + l_dict = links[-1] + assert l_dict["kind"] == pymupdf.LINK_NAMED + assert l_dict["nameddest"] == link["name"] + assert l_dict["from"] == link["from"] + + +def test_2922(): + """Confirm correct recycling of a 'named' link. + + Re-insertion of a named link item in 'Page.get_links()' does not have + the required "name" key. We test the fallback here that uses key + "nameddest" instead. + """ + if not hasattr(pymupdf, "mupdf"): + print(f"test_2922(): not running on classic.") + return + + path = os.path.abspath(f"{__file__}/../../tests/resources/cython.pdf") + doc = pymupdf.open(path) + page = doc[2] # page has a few links, all are named + links = page.get_links() # list of all links + link0 = links[0] # take arbitrary link (1st one is ok) + page.insert_link(link0) # insert it again + page = doc.reload_page(page) # ensure page updates + links = page.get_links() # access all links again + link1 = links[-1] # re-inserted link + + # confirm equality of relevant key-values + assert link0["nameddest"] == link1["nameddest"] + assert link0["page"] == link1["page"] + assert link0["to"] == link1["to"] + assert link0["from"] == link1["from"] + + +def test_3301(): + """Test correct differentiation between URI and LAUNCH links. + + Links encoded as /URI in PDF are converted to either LINK_URI or + LINK_LAUNCH in PyMuPDF. + This function ensures that the 'Link.uri' containing a ':' colon + is converted to a URI if not explicitly starting with "file://". + """ + if not hasattr(pymupdf, "mupdf"): + print(f"test_3301(): not running on classic.") + return + + # list of links and their expected link "kind" upon extraction + text = { + "https://www.google.de": pymupdf.LINK_URI, + "http://www.google.de": pymupdf.LINK_URI, + "mailto:jorj.x.mckie@outlook.de": pymupdf.LINK_URI, + "www.wikipedia.de": pymupdf.LINK_LAUNCH, + "awkward:resource": pymupdf.LINK_URI, + "ftp://www.google.de": pymupdf.LINK_URI, + "some.program": pymupdf.LINK_LAUNCH, + "file://some.program": pymupdf.LINK_LAUNCH, + "another.exe": pymupdf.LINK_LAUNCH, + } + + # make enough "from" rectangles + r = pymupdf.Rect(0, 0, 50, 20) + rects = [r + (0, r.height * i, 0, r.height * i) for i in range(len(text.keys()))] + + # make test page and insert above links as kind=LINK_URI + doc = pymupdf.open() + page = doc.new_page() + for i, k in enumerate(text.keys()): + link = {"kind": pymupdf.LINK_URI, "uri": k, "from": rects[i]} + page.insert_link(link) + + # re-cycle the PDF preparing for link extraction + pdfdata = doc.write() + doc = pymupdf.open("pdf", pdfdata) + page = doc[0] + for link in page.get_links(): + # Extract the link text. Must be 'file' or 'uri'. + t = link["uri"] if (_ := link.get("file")) is None else _ + assert text[t] == link["kind"] diff --git a/tests/test_nonpdf.py b/tests/test_nonpdf.py index b56761edd..043ce360f 100644 --- a/tests/test_nonpdf.py +++ b/tests/test_nonpdf.py @@ -5,11 +5,11 @@ """ import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "Bezier.epub") -doc = fitz.open(filename) +doc = pymupdf.open(filename) def test_isnopdf(): @@ -31,5 +31,5 @@ def test_pageids(): def test_layout(): """Memorize a page location, re-layout with ISO-A4, assert pre-determined location.""" loc = doc.make_bookmark((5, 11)) - doc.layout(fitz.Rect(fitz.paper_rect("a4"))) + doc.layout(pymupdf.Rect(pymupdf.paper_rect("a4"))) assert doc.find_bookmark(loc) == (5, 6) diff --git a/tests/test_object_manipulation.py b/tests/test_object_manipulation.py index f3edf1b86..45e84f6bc 100644 --- a/tests/test_object_manipulation.py +++ b/tests/test_object_manipulation.py @@ -5,7 +5,7 @@ proper page property. 3. Read the PDF trailer and verify it has the keys "/Root", "/ID", etc. """ -import fitz +import pymupdf import os scriptdir = os.path.abspath(os.path.dirname(__file__)) @@ -14,14 +14,14 @@ def test_rotation1(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() page.set_rotation(270) assert doc.xref_get_key(page.xref, "Rotate") == ("int", "270") def test_rotation2(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() doc.xref_set_key(page.xref, "Rotate", "270") assert page.rotation == 270 @@ -29,10 +29,46 @@ def test_rotation2(): def test_trailer(): """Access PDF trailer information.""" - doc = fitz.open(filename) + doc = pymupdf.open(filename) xreflen = doc.xref_length() _, xreflen_str = doc.xref_get_key(-1, "Size") assert xreflen == int(xreflen_str) trailer_keys = doc.xref_get_keys(-1) assert "ID" in trailer_keys assert "Root" in trailer_keys + + +def test_valid_name(): + """Verify correct PDF names in method xref_set_key.""" + doc = pymupdf.open() + page = doc.new_page() + + # testing name in "key": confirm correct spec is accepted + doc.xref_set_key(page.xref, "Rotate", "90") + assert page.rotation == 90 + + # check wrong spec is detected + error_generated = False + try: + # illegal char in name (white space) + doc.xref_set_key(page.xref, "my rotate", "90") + except ValueError as e: + assert str(e) == "bad 'key'" + error_generated = True + assert error_generated + + # test name in "value": confirm correct spec is accepted + doc.xref_set_key(page.xref, "my_rotate/something", "90") + assert doc.xref_get_key(page.xref, "my_rotate/something") == ("int", "90") + doc.xref_set_key(page.xref, "my_rotate", "/90") + assert doc.xref_get_key(page.xref, "my_rotate") == ("name", "/90") + + # check wrong spec is detected + error_generated = False + try: + # no slash inside name allowed + doc.xref_set_key(page.xref, "my_rotate", "/9/0") + except ValueError as e: + assert str(e) == "bad 'value'" + error_generated = True + assert error_generated diff --git a/tests/test_objectstreams.py b/tests/test_objectstreams.py new file mode 100644 index 000000000..257318681 --- /dev/null +++ b/tests/test_objectstreams.py @@ -0,0 +1,83 @@ +import pymupdf + + +def test_objectstream1(): + """Test save option "use_objstms". + This option compresses PDF object definitions into a special object type + "ObjStm". We test its presence by searching for that /Type. + """ + if not hasattr(pymupdf, "mupdf"): + # only implemented for rebased + return + + # make some arbitrary page with content + text = "Hello, World! Hallo, Welt!" + doc = pymupdf.open() + page = doc.new_page() + rect = (50, 50, 200, 500) + + page.insert_htmlbox(rect, text) # place into the rectangle + _ = doc.write(use_objstms=True) + found = False + for xref in range(1, doc.xref_length()): + objstring = doc.xref_object(xref, compressed=True) + if "/Type/ObjStm" in objstring: + found = True + break + assert found, "No object stream found" + + +def test_objectstream2(): + """Test save option "use_objstms". + This option compresses PDF object definitions into a special object type + "ObjStm". We test its presence by searching for that /Type. + """ + if not hasattr(pymupdf, "mupdf"): + # only implemented for rebased + return + + # make some arbitrary page with content + text = "Hello, World! Hallo, Welt!" + doc = pymupdf.open() + page = doc.new_page() + rect = (50, 50, 200, 500) + + page.insert_htmlbox(rect, text) # place into the rectangle + _ = doc.write(use_objstms=False) + + found = False + for xref in range(1, doc.xref_length()): + objstring = doc.xref_object(xref, compressed=True) + if "/Type/ObjStm" in objstring: + found = True + break + assert not found, "Unexpected: Object stream found!" + + +def test_objectstream3(): + """Test ez_save(). + Should automatically use object streams + """ + if not hasattr(pymupdf, "mupdf"): + # only implemented for rebased + return + import io + + fp = io.BytesIO() + + # make some arbitrary page with content + text = "Hello, World! Hallo, Welt!" + doc = pymupdf.open() + page = doc.new_page() + rect = (50, 50, 200, 500) + + page.insert_htmlbox(rect, text) # place into the rectangle + + doc.ez_save(fp) # save PDF to memory + found = False + for xref in range(1, doc.xref_length()): + objstring = doc.xref_object(xref, compressed=True) + if "/Type/ObjStm" in objstring: + found = True + break + assert found, "No object stream found!" diff --git a/tests/test_optional_content.py b/tests/test_optional_content.py index 454854658..210402791 100644 --- a/tests/test_optional_content.py +++ b/tests/test_optional_content.py @@ -1,9 +1,10 @@ """ Test of Optional Content code. """ + import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "joined.pdf") @@ -11,7 +12,7 @@ def test_oc1(): """Arbitrary calls to OC code to get coverage.""" - doc = fitz.open() + doc = pymupdf.open() ocg1 = doc.add_ocg("ocg1") ocg2 = doc.add_ocg("ocg2") ocg3 = doc.add_ocg("ocg3") @@ -27,10 +28,10 @@ def test_oc1(): def test_oc2(): # source file with at least 4 pages - src = fitz.open(filename) + src = pymupdf.open(filename) # new PDF with one page - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() # define the 4 rectangle quadrants to receive the source pages @@ -61,3 +62,78 @@ def test_oc2(): assert set((ocg0, ocg1, ocg2, ocg3)) == set(tuple(doc.get_ocgs().keys())) doc.get_ocmd(ocmd0) page.get_oc_items() + + +def test_3143(): + """Support for non-ascii layer names.""" + doc = pymupdf.open(os.path.join(scriptdir, "resources", "test-3143.pdf")) + page = doc[0] + set0 = set([l["text"] for l in doc.layer_ui_configs()]) + set1 = set([p["layer"] for p in page.get_drawings()]) + set2 = set([b[2] for b in page.get_bboxlog(layers=True)]) + assert set0 == set1 == set2 + + +def test_3180(): + doc = pymupdf.open() + page = doc.new_page() + + # Define the items for the combo box + combo_items = ['first', 'second', 'third'] + + # Create a combo box field + combo_box = pymupdf.Widget() # create a new widget + combo_box.field_type = pymupdf.PDF_WIDGET_TYPE_COMBOBOX + combo_box.field_name = "myComboBox" + combo_box.field_value = combo_items[0] + combo_box.choice_values = combo_items + combo_box.rect = pymupdf.Rect(50, 50, 200, 75) # position of the combo box + combo_box.script_change = """ + var value = event.value; + app.alert('You selected: ' + value); + + //var group_id = optional_content_group_ids[value]; + + """ + + # Insert the combo box into the page + # https://pymupdf.readthedocs.io/en/latest/page.html#Page.add_widget + page.add_widget(combo_box) + + # Create optional content groups + # https://github.com/pymupdf/PyMuPDF-Utilities/blob/master/jupyter-notebooks/optional-content.ipynb + + + # Load images and create OCGs for each + optional_content_group_ids = {} + for i, item in enumerate(combo_items): + optional_content_group_id = doc.add_ocg(item, on=False) + optional_content_group_ids[item] = optional_content_group_id + rect = pymupdf.Rect(50, 100, 250, 300) + image_file_name = f'{item}.png' + # xref = page.insert_image( + # rect, + # filename=image_file_name, + # oc=optional_content_group_id, + # ) + + + first_id = optional_content_group_ids['first'] + second_id = optional_content_group_ids['second'] + third_id = optional_content_group_ids['third'] + + # https://pymupdf.readthedocs.io/en/latest/document.html#Document.set_layer + + + doc.set_layer(-1, basestate="OFF") + layers = doc.get_layer() + doc.set_layer(config=-1, on=[first_id]) + + # https://pymupdf.readthedocs.io/en/latest/document.html#Document.set_layer_ui_config + # configs = doc.layer_ui_configs() + # doc.set_layer_ui_config(0, pymupdf.PDF_OC_ON) + # doc.set_layer_ui_config('third', action=2) + + # Save the PDF + doc.save(os.path.abspath(f'{__file__}/../../tests/test_3180.pdf')) + doc.close() diff --git a/tests/test_page_links.py b/tests/test_page_links.py new file mode 100644 index 000000000..fb36bf2d4 --- /dev/null +++ b/tests/test_page_links.py @@ -0,0 +1,17 @@ +import pymupdf + +import os + + +def test_page_links_generator(): + # open some arbitrary PDF + path = os.path.abspath(f"{__file__}/../../tests/resources/2.pdf") + doc = pymupdf.open(path) + + # select an arbitrary page + page = doc[-1] + + # iterate over pages.links + link_generator = page.links() + links = list(link_generator) + assert len(links) == 7 diff --git a/tests/test_pagedelete.py b/tests/test_pagedelete.py index 42be88fe8..65f42e4b6 100644 --- a/tests/test_pagedelete.py +++ b/tests/test_pagedelete.py @@ -13,22 +13,26 @@ - the remaining TOC items still point to the correct page - the document has no more links at all """ -import fitz +import os + +import pymupdf + +scriptdir = os.path.dirname(__file__) page_count = 100 # initial document length r = range(5, 35, 5) # contains page numbers we will delete # insert this link on pages after first deleted one link = { - "from": fitz.Rect(100, 100, 120, 120), - "kind": fitz.LINK_GOTO, + "from": pymupdf.Rect(100, 100, 120, 120), + "kind": pymupdf.LINK_GOTO, "page": r[0], - "to": fitz.Point(100, 100), + "to": pymupdf.Point(100, 100), } def test_deletion(): # First prepare the document. - doc = fitz.open() + doc = pymupdf.open() toc = [] for i in range(page_count): page = doc.new_page() # make a page @@ -67,3 +71,45 @@ def test_deletion(): doc.copy_page(0) doc.move_page(0) doc.fullcopy_page(0) + + +def test_3094(): + path = os.path.abspath(f"{__file__}/../../tests/resources/test_2871.pdf") + document = pymupdf.open(path) + pnos = [i for i in range(0, document.page_count, 2)] + document.delete_pages(pnos) + + +def test_3150(): + """Assert correct functioning for problem file. + + Implicitly also check use of new MuPDF function + pdf_rearrange_pages() since version 1.23.9. + """ + filename = os.path.join(scriptdir, "resources", "test-3150.pdf") + pages = [3, 3, 3, 2, 3, 1, 0, 0] + doc = pymupdf.open(filename) + doc.select(pages) + assert doc.page_count == len(pages) + + +def test_4462(): + path0 = os.path.normpath(f'{__file__}/../../tests/resources/test_4462_0.pdf') + path1 = os.path.normpath(f'{__file__}/../../tests/resources/test_4462_1.pdf') + path2 = os.path.normpath(f'{__file__}/../../tests/resources/test_4462_2.pdf') + with pymupdf.open() as document: + document.new_page() + document.new_page() + document.new_page() + document.new_page() + document.save(path0) + with pymupdf.open(path0) as document: + assert len(document) == 4 + document.delete_page(-1) + document.save(path1) + with pymupdf.open(path1) as document: + assert len(document) == 3 + document.delete_pages(-1) + document.save(path2) + with pymupdf.open(path2) as document: + assert len(document) == 2 diff --git a/tests/test_pagelabels.py b/tests/test_pagelabels.py index 539df4631..d5334f49f 100644 --- a/tests/test_pagelabels.py +++ b/tests/test_pagelabels.py @@ -2,12 +2,13 @@ Define some page labels in a PDF. Check success in various aspects. """ -import fitz + +import pymupdf def make_doc(): """Makes a PDF with 10 pages.""" - doc = fitz.open() + doc = pymupdf.open() for i in range(10): page = doc.new_page() return doc @@ -35,6 +36,23 @@ def test_setlabels(): doc.set_page_labels(make_labels()) page_labels = [p.get_label() for p in doc] answer = ["A-1", "A-2", "A-3", "A-4", "I", "II", "III", "IV", "V", "VI"] - assert page_labels == answer, f'page_labels={page_labels}' + assert page_labels == answer, f"page_labels={page_labels}" assert doc.get_page_numbers("V") == [8] assert doc.get_page_labels() == make_labels() + + +def test_labels_styleA(): + """Test correct indexing for styles "a", "A".""" + doc = make_doc() + labels = [ + {"startpage": 0, "prefix": "", "style": "a", "firstpagenum": 1}, + {"startpage": 5, "prefix": "", "style": "A", "firstpagenum": 1}, + ] + doc.set_page_labels(labels) + pdfdata = doc.tobytes() + doc.close() + doc = pymupdf.open("pdf", pdfdata) + answer = ["a", "b", "c", "d", "e", "A", "B", "C", "D", "E"] + page_labels = [page.get_label() for page in doc] + assert page_labels == answer + assert doc.get_page_labels() == labels diff --git a/tests/test_pixmap.py b/tests/test_pixmap.py index 889858b09..602658ac2 100644 --- a/tests/test_pixmap.py +++ b/tests/test_pixmap.py @@ -4,10 +4,19 @@ * make pixmap from a PDF xref and compare with extracted image * pixmap from file and from binary image and compare """ + +import pymupdf +import gentle_compare + import os +import platform +import subprocess +import sys import tempfile +import pytest +import textwrap +import time -import fitz scriptdir = os.path.abspath(os.path.dirname(__file__)) epub = os.path.join(scriptdir, "resources", "Bezier.epub") @@ -17,7 +26,7 @@ def test_pagepixmap(): # pixmap from an EPUB page - doc = fitz.open(epub) + doc = pymupdf.open(epub) page = doc[0] pix = page.get_pixmap() assert pix.irect == page.rect.irect @@ -28,11 +37,11 @@ def test_pagepixmap(): def test_pdfpixmap(): # pixmap from xref in a PDF - doc = fitz.open(pdf) + doc = pymupdf.open(pdf) # take first image item of first page img = doc.get_page_images(0)[0] # make pixmap of it - pix = fitz.Pixmap(doc, img[0]) + pix = pymupdf.Pixmap(doc, img[0]) # assert pixmap properties assert pix.width == img[2] assert pix.height == img[3] @@ -45,9 +54,9 @@ def test_pdfpixmap(): def test_filepixmap(): # pixmaps from file and from stream # should lead to same result - pix1 = fitz.Pixmap(imgfile) + pix1 = pymupdf.Pixmap(imgfile) stream = open(imgfile, "rb").read() - pix2 = fitz.Pixmap(stream) + pix2 = pymupdf.Pixmap(stream) assert repr(pix1) == repr(pix2) assert pix1.digest == pix2.digest @@ -55,34 +64,34 @@ def test_filepixmap(): def test_pilsave(): # pixmaps from file then save to pillow image # make pixmap from this and confirm equality - pix1 = fitz.Pixmap(imgfile) try: + pix1 = pymupdf.Pixmap(imgfile) stream = pix1.pil_tobytes("JPEG") - pix2 = fitz.Pixmap(stream) + pix2 = pymupdf.Pixmap(stream) assert repr(pix1) == repr(pix2) - except: - pass + except ModuleNotFoundError: + assert platform.system() in ('Windows', 'Emscripten') and sys.maxsize == 2**31 - 1 def test_save(tmpdir): # pixmaps from file then save to image # make pixmap from this and confirm equality - pix1 = fitz.Pixmap(imgfile) + pix1 = pymupdf.Pixmap(imgfile) outfile = os.path.join(tmpdir, "foo.png") pix1.save(outfile, output="png") # read it back - pix2 = fitz.Pixmap(outfile) + pix2 = pymupdf.Pixmap(outfile) assert repr(pix1) == repr(pix2) def test_setalpha(): # pixmap from JPEG file, then add an alpha channel # with 30% transparency - pix1 = fitz.Pixmap(imgfile) + pix1 = pymupdf.Pixmap(imgfile) opa = int(255 * 0.3) # corresponding to 30% transparency alphas = [opa] * (pix1.width * pix1.height) alphas = bytearray(alphas) - pix2 = fitz.Pixmap(pix1, 1) # add alpha channel + pix2 = pymupdf.Pixmap(pix1, 1) # add alpha channel pix2.set_alpha(alphas) # make image 30% transparent samples = pix2.samples # copy of samples # confirm correct the alpha bytes @@ -90,11 +99,15 @@ def test_setalpha(): assert t == alphas def test_color_count(): - pm = fitz.Pixmap(imgfile) + ''' + This is known to fail if MuPDF is built without PyMuPDF's custom config.h, + e.g. in Linux system installs. + ''' + pm = pymupdf.Pixmap(imgfile) assert pm.color_count() == 40624 def test_memoryview(): - pm = fitz.Pixmap(imgfile) + pm = pymupdf.Pixmap(imgfile) samples = pm.samples_mv assert isinstance( samples, memoryview) print( f'samples={samples} samples.itemsize={samples.itemsize} samples.nbytes={samples.nbytes} samples.ndim={samples.ndim} samples.shape={samples.shape} samples.strides={samples.strides}') @@ -109,7 +122,7 @@ def test_memoryview(): assert color == (83, 66, 40) def test_samples_ptr(): - pm = fitz.Pixmap(imgfile) + pm = pymupdf.Pixmap(imgfile) samples = pm.samples_ptr print( f'samples={samples}') assert isinstance( samples, int) @@ -117,15 +130,523 @@ def test_samples_ptr(): def test_2369(): width, height = 13, 37 - image = fitz.Pixmap(fitz.csGRAY, width, height, b"\x00" * (width * height), False) + image = pymupdf.Pixmap(pymupdf.csGRAY, width, height, b"\x00" * (width * height), False) - with fitz.Document(stream=image.tobytes(output="pam"), filetype="pam") as doc: + with pymupdf.Document(stream=image.tobytes(output="pam"), filetype="pam") as doc: test_pdf_bytes = doc.convert_to_pdf() - with fitz.Document(stream=test_pdf_bytes) as doc: + with pymupdf.Document(stream=test_pdf_bytes) as doc: page = doc[0] img_xref = page.get_images()[0][0] img = doc.extract_image(img_xref) img_bytes = img["image"] - fitz.Pixmap(img_bytes) + pymupdf.Pixmap(img_bytes) + +def test_page_idx_int(): + doc = pymupdf.open(pdf) + with pytest.raises(AssertionError): + doc["0"] + assert doc[0] + assert doc[(0,0)] + +def test_fz_write_pixmap_as_jpeg(): + width, height = 13, 37 + image = pymupdf.Pixmap(pymupdf.csGRAY, width, height, b"\x00" * (width * height), False) + + with pymupdf.Document(stream=image.tobytes(output="jpeg"), filetype="jpeg") as doc: + test_pdf_bytes = doc.convert_to_pdf() + +def test_3020(): + pm = pymupdf.Pixmap(imgfile) + pm2 = pymupdf.Pixmap(pm, 20, 30, None) + pm3 = pymupdf.Pixmap(pymupdf.csGRAY, pm) + pm4 = pymupdf.Pixmap(pm, pm3) + +def test_3050(): + ''' + This is known to fail if MuPDF is built without it's default third-party + libraries, e.g. in Linux system installs. + ''' + path = os.path.normpath(f'{__file__}/../../tests/resources/001003ED.pdf') + with pymupdf.open(path) as pdf_file: + page_no = 0 + page = pdf_file[page_no] + zoom_x = 4.0 + zoom_y = 4.0 + matrix = pymupdf.Matrix(zoom_x, zoom_y) + pix = page.get_pixmap(matrix=matrix) + path_out = os.path.normpath(f'{__file__}/../../tests/test_3050_out.png') + pix.save(path_out) + print(f'{pix.width=} {pix.height=}') + def product(x, y): + for yy in y: + for xx in x: + yield (xx, yy) + n = 0 + # We use a small subset of the image because non-optimised rebase gets + # very slow. + for pos in product(range(100), range(100)): + if sum(pix.pixel(pos[0], pos[1])) >= 600: + n += 1 + pix.set_pixel(pos[0], pos[1], (255, 255, 255)) + path_out2 = os.path.normpath(f'{__file__}/../../tests/test_3050_out2.png') + pix.save(path_out2) + path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3050_expected.png') + rms = gentle_compare.pixmaps_rms(path_expected, path_out2) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple < (1, 26): + # Slight differences in rendering from fix for mupdf bug 708274. + assert rms < 0.2 + else: + assert rms == 0 + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26, 0): + assert wt == 'bogus font ascent/descent values (0 / 0)\nPDF stream Length incorrect' + else: + assert wt == 'PDF stream Length incorrect' + +def test_3058(): + doc = pymupdf.Document(os.path.abspath(f'{__file__}/../../tests/resources/test_3058.pdf')) + images = doc[0].get_images(full=True) + pix = pymupdf.Pixmap(doc, 17) + + # First bug was that `pix.colorspace` was DeviceRGB. + assert str(pix.colorspace) == 'Colorspace(CS_CMYK) - DeviceCMYK' + + pix = pymupdf.Pixmap(pymupdf.csRGB, pix) + assert str(pix.colorspace) == 'Colorspace(CS_RGB) - DeviceRGB' + + # Second bug was that the image was converted to RGB via greyscale proofing + # color space, so image contained only shades of grey. This compressed + # easily to a .png file, so we crudely check the bug is fixed by looking at + # size of .png file. + path = os.path.abspath(f'{__file__}/../../tests/test_3058_out.png') + pix.save(path) + s = os.path.getsize(path) + assert 1800000 < s < 2600000, f'Unexpected size of {path}: {s}' + +def test_3072(): + path = os.path.abspath(f'{__file__}/../../tests/resources/test_3072.pdf') + out = os.path.abspath(f'{__file__}/../../tests') + + doc = pymupdf.open(path) + page_48 = doc[0] + bbox = [147, 300, 447, 699] + rect = pymupdf.Rect(*bbox) + zoom = pymupdf.Matrix(3, 3) + pix = page_48.get_pixmap(clip=rect, matrix=zoom) + image_save_path = f'{out}/1.jpg' + pix.save(image_save_path, jpg_quality=95) + + doc = pymupdf.open(path) + page_49 = doc[1] + bbox = [147, 543, 447, 768] + rect = pymupdf.Rect(*bbox) + zoom = pymupdf.Matrix(3, 3) + pix = page_49.get_pixmap(clip=rect, matrix=zoom) + image_save_path = f'{out}/2.jpg' + pix.save(image_save_path, jpg_quality=95) + rebase = hasattr(pymupdf, 'mupdf') + if rebase: + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == ( + "syntax error: cannot find ExtGState resource 'BlendMode0'\n" + "encountered syntax errors; page may not be correct\n" + "syntax error: cannot find ExtGState resource 'BlendMode0'\n" + "encountered syntax errors; page may not be correct" + ) + +def test_3134(): + doc = pymupdf.Document() + page = doc.new_page() + page.get_pixmap(clip=pymupdf.Rect(0, 0, 100, 100)).save("test_3134_rect.jpg") + page.get_pixmap(clip=pymupdf.IRect(0, 0, 100, 100)).save("test_3134_irect.jpg") + stat_rect = os.stat('test_3134_rect.jpg') + stat_irect = os.stat('test_3134_irect.jpg') + print(f' {stat_rect=}') + print(f'{stat_irect=}') + assert stat_rect.st_size == stat_irect.st_size + +def test_3177(): + path = os.path.abspath(f'{__file__}/../../tests/resources/img-transparent.png') + pixmap = pymupdf.Pixmap(path) + pixmap2 = pymupdf.Pixmap(None, pixmap) + + +def test_3493(): + ''' + If python3-gi is installed, we check fix for #3493, where importing gi + would load an older version of libjpeg than is used in MuPDF, and break + MuPDF. + + This test is excluded by default in sysinstall tests, because running + commands in a new venv does not seem to pick up pymupdf as expected. + ''' + if platform.system() != 'Linux': + print(f'Not running because not Linux: {platform.system()=}') + return + + import subprocess + + root = os.path.abspath(f'{__file__}/../..') + in_path = f'{root}/tests/resources/test_3493.epub' + + def run(command, check=1, stdout=None): + print(f'Running with {check=}: {command}') + return subprocess.run(command, shell=1, check=check, stdout=stdout, text=1) + + def run_code(code, code_path, *, check=True, venv=None, venv_args='', pythonpath=None, stdout=None): + code = textwrap.dedent(code) + with open(code_path, 'w') as f: + f.write(code) + prefix = f'PYTHONPATH={pythonpath} ' if pythonpath else '' + if venv: + # Have seen this fail on Github in a curious way: + # + # Running: /tmp/tmp.fBeKNLJQKk/venv/bin/python -m venv --system-site-packages /project/tests/resources/test_3493_venv + # Error: [Errno 2] No such file or directory: '/project/tests/resources/test_3493_venv/bin/python' + # + r = run(f'{sys.executable} -m venv {venv_args} {venv}', check=check) + if r.returncode: + return r + r = run(f'. {venv}/bin/activate && {prefix}python {code_path}', check=check, stdout=stdout) + else: + r = run(f'{prefix}{sys.executable} {code_path}', check=check, stdout=stdout) + return r + + # Find location of system install of `gi`. + r = run_code( + ''' + from gi.repository import GdkPixbuf + import gi + print(gi.__file__) + ''' + , + f'{root}/tests/resources/test_3493_gi.py', + check=0, + venv=f'{root}/tests/resources/test_3493_venv', + venv_args='--system-site-packages', + stdout=subprocess.PIPE, + ) + if r.returncode: + print(f'test_3493(): Not running test because --system-site-packages venv cannot import gi.') + return + gi = r.stdout.strip() + gi_pythonpath = os.path.abspath(f'{gi}/../..') + + def do(gi): + # Run code that will import gi and pymupdf in different orders, and + # return contents of generated .png file as a bytes. + out = f'{root}/tests/resources/test_3493_{gi}.png' + run_code( + f''' + if {gi}==0: + import pymupdf + elif {gi}==1: + from gi.repository import GdkPixbuf + import pymupdf + elif {gi}==2: + import pymupdf + from gi.repository import GdkPixbuf + else: + assert 0 + document = pymupdf.Document('{in_path}') + page = document[0] + print(f'{gi=}: saving to: {out}') + page.get_pixmap().save('{out}') + ''' + , + os.path.abspath(f'{root}/tests/resources/test_3493_{gi}.py'), + pythonpath=gi_pythonpath, + ) + with open(out, 'rb') as f: + return f.read() + + out0 = do(0) + out1 = do(1) + out2 = do(2) + print(f'{len(out0)=} {len(out1)=} {len(out2)=}.') + assert out1 == out0 + assert out2 == out0 + + +def test_3848(): + if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': + # Takes 40m on Github. + print(f'test_3848(): not running on valgrind because very slow.', flush=1) + return + if platform.python_implementation() == 'GraalVM': + print(f'test_3848(): Not running because slow on GraalVM.') + return + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3848.pdf') + with pymupdf.open(path) as document: + for i in range(len(document)): + page = document.load_page(i) + print(f'{page=}.') + for annot in page.get_drawings(): + if page.get_textbox(annot['rect']): + rect = annot['rect'] + pixmap = page.get_pixmap(clip=rect) + color_bytes = pixmap.color_topusage() + + +def test_3994(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3994.pdf') + with pymupdf.open(path) as document: + page = document[0] + txt_blocks = [blk for blk in page.get_text('dict')['blocks'] if blk['type']==0] + for blk in txt_blocks: + pix = page.get_pixmap(clip=pymupdf.Rect([int(v) for v in blk['bbox']]), colorspace=pymupdf.csRGB, alpha=False) + percent, color = pix.color_topusage() + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'premature end of data in flate filter\n... repeated 2 times...' + + +def test_3448(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3448.pdf') + with pymupdf.open(path) as document: + page = document[0] + pixmap = page.get_pixmap(alpha=False, dpi=150) + path_out = f'{path}.png' + pixmap.save(path_out) + print(f'Have written to: {path_out}') + path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3448.pdf-expected.png') + pixmap_expected = pymupdf.Pixmap(path_expected) + rms = gentle_compare.pixmaps_rms(pixmap, pixmap_expected) + diff = gentle_compare.pixmaps_diff(pixmap_expected, pixmap) + path_diff = os.path.normpath(f'{__file__}/../../tests/test_3448-diff.png') + diff.save(path_diff) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple < (1, 25, 5): + # Prior to fix for mupdf bug 708274. + assert 1 < rms < 2 + else: + assert rms == 0 + + +def test_3854(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3854.pdf') + with pymupdf.open(path) as document: + page = document[0] + pixmap = page.get_pixmap() + pixmap.save(os.path.normpath(f'{__file__}/../../tests/test_3854_out.png')) + + # 2024-11-29: this is the incorrect expected output. + path_expected_png = os.path.normpath(f'{__file__}/../../tests/resources/test_3854_expected.png') + pixmap_expected = pymupdf.Pixmap(path_expected_png) + pixmap_diff = gentle_compare.pixmaps_diff(pixmap_expected, pixmap) + path_diff = os.path.normpath(f'{__file__}/../../tests/resources/test_3854_diff.png') + pixmap_diff.save(path_diff) + rms = gentle_compare.pixmaps_rms(pixmap, pixmap_expected) + print(f'{rms=}.') + if os.environ.get('PYMUPDF_SYSINSTALL_TEST') == '1': + # MuPDF using external third-party libs gives slightly different + # behaviour. + assert rms < 2 + elif pymupdf.mupdf_version_tuple < (1, 25, 5): + # # Prior to fix for mupdf bug 708274. + assert 0.5 < rms < 2 + else: + assert rms == 0 + + +def test_4155(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3854.pdf') + with pymupdf.open(path) as document: + page = document[0] + pixmap = page.get_pixmap() + mv = pixmap.samples_mv + mvb1 = mv.tobytes() + del page + del pixmap + try: + mvb2 = mv.tobytes() + except ValueError as e: + print(f'Received exception: {e}') + assert 'operation forbidden on released memoryview object' in str(e) + else: + assert 0, f'Did not receive expected exception when using defunct memoryview.' + + +def test_4336(): + if 0: + # Compare with last classic release. + import pickle + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4336_cc') + code = textwrap.dedent(f''' + import fitz + import os + import time + import pickle + + path = os.path.normpath(f'{__file__}/../../tests/resources/nur-ruhig.jpg') + pixmap = fitz.Pixmap(path) + t = time.time() + for i in range(10): + cc = pixmap.color_count() + t = time.time() - t + print(f'test_4336(): {{t=}}') + with open({path_out!r}, 'wb') as f: + pickle.dump(cc, f) + ''') + path_code = os.path.normpath(f'{__file__}/../../tests/resources/test_4336.py') + with open(path_code, 'w') as f: + f.write(code) + venv = os.path.normpath(f'{__file__}/../../tests/resources/test_4336_venv') + command = f'{sys.executable} -m venv {venv}' + command += f' && . {venv}/bin/activate' + command += f' && pip install --force-reinstall pymupdf==1.23.8' + command += f' && python {path_code}' + print(f'Running: {command}', flush=1) + subprocess.run(command, shell=1, check=1) + with open(path_out, 'rb') as f: + cc_old = pickle.load(f) + else: + cc_old = None + path = os.path.normpath(f'{__file__}/../../tests/resources/nur-ruhig.jpg') + pixmap = pymupdf.Pixmap(path) + t = time.time() + for i in range(10): + cc = pixmap.color_count() + t = time.time() - t + print(f'test_4336(): {t=}') + + if cc_old: + assert cc == cc_old + + +def test_4435(): + print(f'{pymupdf.version=}') + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4435.pdf') + with pymupdf.open(path) as document: + page = document[2] + print(f'Calling page.get_pixmap().', flush=1) + pixmap = page.get_pixmap(alpha=False, dpi=120) + print(f'Called page.get_pixmap().', flush=1) + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'bogus font ascent/descent values (0 / 0)\n... repeated 9 times...' + + +def test_4423(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4423.pdf') + with pymupdf.open(path) as document: + path2 = f'{path}.pdf' + ee = None + try: + document.save( + path2, + garbage=4, + expand=1, + deflate=True, + pretty=True, + no_new_id=True, + ) + except Exception as e: + print(f'Exception: {e}') + ee = e + + if (1, 25, 5) <= pymupdf.mupdf_version_tuple < (1, 26): + assert ee, f'Did not receive the expected exception.' + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'dropping unclosed output' + else: + assert not ee, f'Received unexpected exception: {e}' + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'format error: cannot find object in xref (56 0 R)\nformat error: cannot find object in xref (68 0 R)' + + +def test_4445(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4445(): not running on Pyodide - cannot run child processes.') + return + print() + # Test case is large so we download it instead of having it in PyMuPDF + # git. We put it in `cache/` directory do it is not removed by `git clean` + # (unless `-d` is specified). + import util + path = util.download( + 'https://github.com/user-attachments/files/19738242/ss.pdf', + 'test_4445.pdf', + size=2671185, + ) + with pymupdf.open(path) as document: + page = document[0] + pixmap = page.get_pixmap() + print(f'{pixmap.width=}') + print(f'{pixmap.height=}') + if pymupdf.mupdf_version_tuple >= (1, 26): + assert (pixmap.width, pixmap.height) == (792, 612) + else: + assert (pixmap.width, pixmap.height) == (612, 792) + if 0: + path_pixmap = f'{path}.png' + pixmap.save(path_pixmap) + print(f'Have created {path_pixmap=}') + wt = pymupdf.TOOLS.mupdf_warnings() + print(f'{wt=}') + assert wt == 'broken xref subsection, proceeding anyway.\nTrailer Size is off-by-one. Ignoring.' + + +def test_3806(): + print() + print(f'{pymupdf.mupdf_version=}') + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3806.pdf') + path_png_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3806-expected.png') + path_png = os.path.normpath(f'{__file__}/../../tests/test_3806.png') + + with pymupdf.open(path) as document: + pixmap = document[0].get_pixmap() + pixmap.save(path_png) + rms = gentle_compare.pixmaps_rms(path_png_expected, pixmap) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple >= (1, 26, 6): + assert rms < 0.1 + else: + assert rms > 50 + + +def test_4388(): + print() + path_BOZ1 = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BOZ1.pdf') + path_BUL1 = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BUL1.pdf') + path_correct = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BUL1.pdf.correct.png') + path_test = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BUL1.pdf.test.png') + + with pymupdf.open(path_BUL1) as bul: + pixmap_correct = bul.load_page(0).get_pixmap() + pixmap_correct.save(path_correct) + + pymupdf.TOOLS.store_shrink(100) + + with pymupdf.open(path_BOZ1) as boz: + boz.load_page(0).get_pixmap() + + with pymupdf.open(path_BUL1) as bul: + pixmap_test = bul.load_page(0).get_pixmap() + pixmap_test.save(path_test) + + rms = gentle_compare.pixmaps_rms(pixmap_correct, pixmap_test) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple >= (1, 26, 6): + assert rms == 0 + else: + assert rms >= 10 + +def test_4699(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4699.pdf') + path_png_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4699.png') + path_png_actual = os.path.normpath(f'{__file__}/../../tests/test_4699.png') + with pymupdf.open(path) as document: + page = document[0] + pixmap = page.get_pixmap() + pixmap.save(path_png_actual) + print(f'Have saved to {path_png_actual=}.') + rms = gentle_compare.pixmaps_rms(path_png_expected, pixmap) + print(f'test_4699(): {rms=}') + if pymupdf.mupdf_version_tuple >= (1, 26, 11): + assert rms == 0 + else: + wt = pymupdf.TOOLS.mupdf_warnings() + assert 'syntax error: cannot find ExtGState resource' in wt + assert rms > 20 diff --git a/tests/test_pylint.py b/tests/test_pylint.py new file mode 100644 index 000000000..38c6d017f --- /dev/null +++ b/tests/test_pylint.py @@ -0,0 +1,145 @@ +import pymupdf +import os +import re +import subprocess +import sys +import textwrap + +def test_pylint(): + + if os.environ.get('PYODIDE_ROOT'): + print('test_pylint(): not running on Pyodide - cannot run child processes.') + return + + if not hasattr(pymupdf, 'mupdf'): + print(f'test_pylint(): Not running with classic implementation.') + return + + ignores = '' + + ignores += textwrap.dedent( + ''' + C0103: Constant name "g_exceptions_verbose" doesn't conform to UPPER_CASE naming style (invalid-name) + C0115: Missing class docstring (missing-class-docstring) + C0116: Missing function or method docstring (missing-function-docstring) + C0301: Line too long (142/100) (line-too-long) + C0302: Too many lines in module (23586/1000) (too-many-lines) + C0303: Trailing whitespace (trailing-whitespace) + C0325: Unnecessary parens after 'not' keyword (superfluous-parens) + C0415: Import outside toplevel (traceback) (import-outside-toplevel) + R0902: Too many instance attributes (9/7) (too-many-instance-attributes) + R0903: Too few public methods (1/2) (too-few-public-methods) + R0911: Too many return statements (9/6) (too-many-return-statements) + R0913: Too many arguments (6/5) (too-many-arguments) + R1705: Unnecessary "elif" after "return", remove the leading "el" from "elif" (no-else-return) + R1720: Unnecessary "elif" after "raise", remove the leading "el" from "elif" (no-else-raise) + R1724: Unnecessary "elif" after "continue", remove the leading "el" from "elif" (no-else-continue) + R1735: Consider using '{}' instead of a call to 'dict'. (use-dict-literal) + W0511: Fixme: we don't support JM_MEMORY=1. (fixme) + W0622: Redefining built-in 'FileNotFoundError' (redefined-builtin) + W0622: Redefining built-in 'open' (redefined-builtin) + W1309: Using an f-string that does not have any interpolated variables (f-string-without-interpolation) + R1734: Consider using [] instead of list() (use-list-literal) + R1727: Boolean condition '0 and g_exceptions_verbose' will always evaluate to '0' (condition-evals-to-constant) + R1726: (simplifiable-condition) + ''' + ) + + # Items that we might want to fix. + ignores += textwrap.dedent( + ''' + C0114: Missing module docstring (missing-module-docstring) + C0117: Consider changing "not rotate % 90 == 0" to "rotate % 90 != 0" (unnecessary-negation) + C0123: Use isinstance() rather than type() for a typecheck. (unidiomatic-typecheck) + C0200: Consider using enumerate instead of iterating with range and len (consider-using-enumerate) + C0201: Consider iterating the dictionary directly instead of calling .keys() (consider-iterating-dictionary) + C0209: Formatting a regular string which could be an f-string (consider-using-f-string) + C0305: Trailing newlines (trailing-newlines) + C0321: More than one statement on a single line (multiple-statements) + C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len) + C1803: "select == []" can be simplified to "not select", if it is strictly a sequence, as an empty list is falsey (use-implicit-booleaness-not-comparison) + R0912: Too many branches (18/12) (too-many-branches) + R0914: Too many local variables (20/15) (too-many-locals) + R0915: Too many statements (58/50) (too-many-statements) + R1702: Too many nested blocks (7/5) (too-many-nested-blocks) + R1703: The if statement can be replaced with 'var = bool(test)' (simplifiable-if-statement) + R1710: Either all return statements in a function should return an expression, or none of them should. (inconsistent-return-statements) + R1714: Consider merging these comparisons with 'in' by using 'width not in (1, 0)'. Use a set instead if elements are hashable. (consider-using-in) + R1716: Simplify chained comparison between the operands (chained-comparison) + R1717: Consider using a dictionary comprehension (consider-using-dict-comprehension) + R1718: Consider using a set comprehension (consider-using-set-comprehension) + R1719: The if expression can be replaced with 'bool(test)' (simplifiable-if-expression) + R1721: Unnecessary use of a comprehension, use list(roman_num(num)) instead. (unnecessary-comprehension) + R1728: Consider using a generator instead 'max(len(k) for k in item.keys())' (consider-using-generator) + R1728: Consider using a generator instead 'max(len(r.cells) for r in self.rows)' (consider-using-generator) + R1730: Consider using 'rowheight = min(rowheight, height)' instead of unnecessary if block (consider-using-min-builtin) + R1731: Consider using 'right = max(right, x1)' instead of unnecessary if block (consider-using-max-builtin) + W0105: String statement has no effect (pointless-string-statement) + W0107: Unnecessary pass statement (unnecessary-pass) + W0212: Access to a protected member _graft_id of a client class (protected-access) + W0602: Using global for 'CHARS' but no assignment is done (global-variable-not-assigned) + W0602: Using global for 'EDGES' but no assignment is done (global-variable-not-assigned) + W0603: Using the global statement (global-statement) + W0612: Unused variable 'keyvals' (unused-variable) + W0613: Unused argument 'kwargs' (unused-argument) + W0621: Redefining name 'show' from outer scope (line 159) (redefined-outer-name) + W0640: Cell variable o defined in loop (cell-var-from-loop) + W0718: Catching too general exception Exception (broad-exception-caught) + W0719: Raising too general exception: Exception (broad-exception-raised) + C3001: Lambda expression assigned to a variable. Define a function using the "def" keyword instead. (unnecessary-lambda-assignment) + R0801: Similar lines in 2 files + R0917: Too many positional arguments (7/5) (too-many-positional-arguments) + ''' + ) + ignores_list = list() + for line in ignores.split('\n'): + if not line or line.startswith('#'): + continue + m = re.match('^(.....): ', line) + assert m, f'Failed to parse {line=}' + ignores_list.append(m.group(1)) + ignores = ','.join(ignores_list) + + root = os.path.abspath(f'{__file__}/../..') + + sys.path.insert(0, root) + import pipcl + del sys.path[0] + + # We want to run pylist on all of our src/*.py files so we find them with + # `pipcl.git_items()`. However this seems to fail on github windows with + # `fatal: not a git repository (or any of the parent directories): .git` so + # we also hard-code the list and verify it matches `git ls-files` on other + # platforms. This ensures that we will always pick up new .py files in the + # future. + # + command = f'pylint -d {ignores}' + directory = f'{root}/src' + directory = directory.replace('/', os.sep) + leafs = [ + '__init__.py', + '__main__.py', + '_apply_pages.py', + '_wxcolors.py', + 'fitz___init__.py', + 'fitz_table.py', + 'fitz_utils.py', + 'pymupdf.py', + 'table.py', + 'utils.py', + ] + leafs.sort() + try: + leafs_git = pipcl.git_items(directory) + except Exception as e: + import platform + assert platform.system() == 'Windows' + else: + leafs_git = [i for i in leafs_git if i.endswith('.py')] + leafs_git.sort() + assert leafs_git == leafs, f'leafs:\n {leafs!r}\nleafs_git:\n {leafs_git!r}' + for leaf in leafs: + command += f' {directory}/{leaf}' + print(f'Running: {command}') + subprocess.run(command, shell=1, check=1) + diff --git a/tests/test_release.py b/tests/test_release.py new file mode 100644 index 000000000..9639ffa98 --- /dev/null +++ b/tests/test_release.py @@ -0,0 +1,85 @@ +import pymupdf + +import os +import re +import sys + + +g_root_abs = os.path.normpath(f'{__file__}/../../') + +sys.path.insert(0, g_root_abs) +try: + import pipcl + import setup +finally: + del sys.path[0] + +g_root = pipcl.relpath(g_root_abs) + + +def _file_line(path, text, re_match, offset=+2): + ''' + Returns : for location of regex match. + + path: + filename. + text: + Contents of . + re_match: + A re.Match. + offset: + Added to line number of start of . Default offset=2 is + because callers usually grep for leading newline, and line numbers are + generally 1-based. + ''' + text_before = text[:re_match.start()] + line = text_before.count('\n') + offset + return f'{path}:{line}' + + +def test_release_versions(): + ''' + PyMuPDF and default MuPDF must have same major.minor version. + ''' + version_p_tuple = [int(i) for i in setup.version_p.split('.')] + version_mupdf_tuple = [int(i) for i in setup.version_mupdf.split('.')] + assert version_p_tuple[:2] == version_mupdf_tuple[:2], \ + f'PyMuPDF and MuPDF major.minor versions do not match. {setup.version_p=} {setup.version_mupdf=}.' + + +def test_release_bug_template(): + ''' + Bug report template must list current PyMuPDF version. + ''' + p = f'{g_root}/.github/ISSUE_TEMPLATE/bug_report.yml' + expected = f'\n - {setup.version_p}\n' + with open(p) as f: + text = f.read() + assert expected in text, f'{p}:1: Failed to find line for {setup.version_p=}, {expected!r}.' + + +def test_release_changelog_version(): + ''' + In changes.txt, first item must match setup.version_p. + ''' + p = f'{g_root}/changes.txt' + with open(p) as f: + text = f.read() + # We match `**Changes in version a.b.c**' optionally followed by ` (YYYY-MM-DD)`. + m = re.search(f'\n[*][*]Changes in version ([0-9.]+)[*][*]( [([0-9-]+[)])?\n', text) + assert m, f'Cannot parse {p}.' + assert m[1] == setup.version_p, \ + f'{_file_line(p, text, m)}: Cannot find {setup.version_p=} in first changelog item: {m[0].strip()!r}.' + + +def test_release_changelog_mupdf_version(): + ''' + In changes.txt, first mentioned of MuPDF must match setup.version_mupdf. + ''' + p = f'{g_root}/changes.txt' + with open(p) as f: + text = f.read() + m = re.search(f'\n[*] Use MuPDF-([0-9.]+)[.]\n', text) + assert m, f'Cannot parse {p}.' + assert m[1] == setup.version_mupdf, \ + f'{_file_line(p, text, m)}: First mentioned MuPDF version does not match {setup.version_mupdf=}: {m[0].strip()!r}.' diff --git a/tests/test_remove-rotation.py b/tests/test_remove-rotation.py new file mode 100644 index 000000000..423c88113 --- /dev/null +++ b/tests/test_remove-rotation.py @@ -0,0 +1,30 @@ +import os +import pymupdf +from gentle_compare import gentle_compare + +scriptdir = os.path.dirname(__file__) + + +def test_remove_rotation(): + """Remove rotation verifying identical appearance and text.""" + filename = os.path.join(scriptdir, "resources", "test-2812.pdf") + doc = pymupdf.open(filename) + + # We always create fresh pages to avoid false positives from cache content. + # Text on these pages consists of pairwise different strings, sorting by + # these strings must therefore yield identical bounding boxes. + for i in range(1, doc.page_count): + assert doc[i].rotation # must be a rotated page + pix0 = doc[i].get_pixmap() # make image + words0 = [] + for w in doc[i].get_text("words"): + words0.append(list(pymupdf.Rect(w[:4]) * doc[i].rotation_matrix) + [w[4]]) + words0.sort(key=lambda w: w[4]) # sort by word strings + # derotate page and confirm nothing else has changed + doc[i].remove_rotation() + assert doc[i].rotation == 0 + pix1 = doc[i].get_pixmap() + words1 = doc[i].get_text("words") + words1.sort(key=lambda w: w[4]) # sort by word strings + assert pix1.digest == pix0.digest, f"{pix1.digest}/{pix0.digest}" + assert gentle_compare(words0, words1) diff --git a/tests/test_rewrite_images.py b/tests/test_rewrite_images.py new file mode 100644 index 000000000..b3cb290cd --- /dev/null +++ b/tests/test_rewrite_images.py @@ -0,0 +1,15 @@ +import pymupdf +import os + +scriptdir = os.path.dirname(__file__) + + +def test_rewrite_images(): + """Example for decreasing file size by more than 30%.""" + filename = os.path.join(scriptdir, "resources", "test-rewrite-images.pdf") + doc = pymupdf.open(filename) + size0 = os.path.getsize(doc.name) + doc.rewrite_images(dpi_threshold=100, dpi_target=72, quality=33) + data = doc.tobytes(garbage=3, deflate=True) + size1 = len(data) + assert (1 - (size1 / size0)) > 0.3 diff --git a/tests/test_rtl.py b/tests/test_rtl.py new file mode 100644 index 000000000..cd80185e6 --- /dev/null +++ b/tests/test_rtl.py @@ -0,0 +1,18 @@ +import pymupdf + +import os + + +def test_rtl(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test-E+A.pdf') + doc = pymupdf.open(path) + page = doc[0] + # set of all RTL characters + rtl_chars = set([chr(i) for i in range(0x590, 0x901)]) + + for w in page.get_text("words"): + # every word string must either ONLY contain RTL chars + cond1 = rtl_chars.issuperset(w[4]) + # ... or NONE. + cond2 = rtl_chars.intersection(w[4]) == set() + assert cond1 or cond2 diff --git a/tests/test_showpdfpage.py b/tests/test_showpdfpage.py index ace2112e4..2e6b27a18 100644 --- a/tests/test_showpdfpage.py +++ b/tests/test_showpdfpage.py @@ -3,23 +3,23 @@ * Convert some image to a PDF * Insert it rotated in some rectangle of a PDF page * Assert PDF Form XObject has been created - * Assert that image contained in inserted PDF is inside given retangle + * Assert that image contained in inserted PDF is inside given rectangle """ import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) imgfile = os.path.join(scriptdir, "resources", "nur-ruhig.jpg") def test_insert(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - rect = fitz.Rect(50, 50, 100, 100) # insert in here - img = fitz.open(imgfile) # open image + rect = pymupdf.Rect(50, 50, 100, 100) # insert in here + img = pymupdf.open(imgfile) # open image tobytes = img.convert_to_pdf() # get its PDF version (bytes object) - src = fitz.open("pdf", tobytes) # open as PDF + src = pymupdf.open("pdf", tobytes) # open as PDF xref = page.show_pdf_page(rect, src, 0, rotate=-23) # insert in rectangle # extract just inserted image info img = page.get_images(True)[0] @@ -29,3 +29,27 @@ def test_insert(): # Multiple computations may have lead to rounding deviations, so we need # some generosity here: enlarge rect by 1 point in each direction. assert img["bbox"] in rect + (-1, -1, 1, 1) + +def test_2742(): + dest = pymupdf.open() + destpage = dest.new_page(width=842, height=595) + + a5 = pymupdf.Rect(0, 0, destpage.rect.width / 3, destpage.rect.height) + shiftright = pymupdf.Rect(destpage.rect.width/3, 0, destpage.rect.width/3, 0) + + src = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_2742.pdf')) + + destpage.show_pdf_page(a5, src, 0) + destpage.show_pdf_page(a5 + shiftright, src, 0) + destpage.show_pdf_page(a5 + shiftright + shiftright, src, 0) + + dest.save(os.path.abspath(f'{__file__}/../../tests/test_2742-out.pdf')) + print("The end!") + + rebased = hasattr(pymupdf, 'mupdf') + if rebased: + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == ( + 'Circular dependencies! Consider page cleaning.\n' + '... repeated 3 times...' + ), f'{wt=}' diff --git a/tests/test_spikes.py b/tests/test_spikes.py new file mode 100644 index 000000000..54dd91807 --- /dev/null +++ b/tests/test_spikes.py @@ -0,0 +1,42 @@ +import pymupdf +import pathlib +import os + + +def test_spikes(): + """Check suppression of text spikes caused by long miters.""" + root = os.path.abspath(f"{__file__}/../..") + spikes_yes = pathlib.Path(f"{root}/docs/images/spikes-yes.png") + spikes_no = pathlib.Path(f"{root}/docs/images/spikes-no.png") + doc = pymupdf.open() + text = "NATO MEMBERS" # some text provoking spikes ("N", "M") + point = (10, 35) # insert point + + # make text provoking spikes + page = doc.new_page(width=200, height=50) # small page + page.insert_text( + point, + text, + fontsize=20, + render_mode=1, # stroke text only + border_width=0.3, # causes thick border lines + miter_limit=None, # do not care about miter spikes + ) + # write same text in white over the previous for better demo purpose + page.insert_text(point, text, fontsize=20, color=(1, 1, 1)) + pix1 = page.get_pixmap() + assert pix1.tobytes() == spikes_yes.read_bytes() + + # make text suppressing spikes + page = doc.new_page(width=200, height=50) + page.insert_text( + point, + text, + fontsize=20, + render_mode=1, + border_width=0.3, + miter_limit=1, # suppress each and every miter spike + ) + page.insert_text(point, text, fontsize=20, color=(1, 1, 1)) + pix2 = page.get_pixmap() + assert pix2.tobytes() == spikes_no.read_bytes() diff --git a/tests/test_story.py b/tests/test_story.py index fa4535f3c..d895e9b58 100644 --- a/tests/test_story.py +++ b/tests/test_story.py @@ -1,9 +1,12 @@ -import fitz +import pymupdf import os +import textwrap def test_story(): otf = os.path.abspath(f'{__file__}/../resources/PragmaticaC.otf') + # 2023-12-06: latest mupdf throws exception if path uses back-slashes. + otf = otf.replace('\\', '/') CSS = f""" @font-face {{font-family: test; src: url({otf});}} """ @@ -12,14 +15,14 @@ def test_story():

We shall meet again at a place where there is no darkness.

""" - MEDIABOX = fitz.paper_rect("letter") + MEDIABOX = pymupdf.paper_rect("letter") WHERE = MEDIABOX + (36, 36, -36, -36) # the font files are located in /home/chinese - arch = fitz.Archive(".") - # if not specfied user_css, the output pdf has content - story = fitz.Story(HTML, user_css=CSS, archive=arch) + arch = pymupdf.Archive(".") + # if not specified user_css, the output pdf has content + story = pymupdf.Story(HTML, user_css=CSS, archive=arch) - writer = fitz.DocumentWriter("output.pdf") + writer = pymupdf.DocumentWriter("output.pdf") more = 1 @@ -30,3 +33,263 @@ def test_story(): writer.end_page() writer.close() + + +def test_2753(): + + def rectfn(rect_num, filled): + return pymupdf.Rect(0, 0, 200, 200), pymupdf.Rect(50, 50, 100, 150), None + + def make_pdf(html, path_out): + story = pymupdf.Story(html=html) + document = story.write_with_links(rectfn) + print(f'test_2753(): Writing to: {path_out=}.') + document.save(path_out) + return document + + doc_before = make_pdf( + textwrap.dedent(''' +

Before

+

+

After

+ '''), + os.path.abspath(f'{__file__}/../../tests/test_2753-out-before.pdf'), + ) + + doc_after = make_pdf( + textwrap.dedent(''' +

Before

+

+

After

+ '''), + os.path.abspath(f'{__file__}/../../tests/test_2753-out-after.pdf'), + ) + + path = os.path.normpath(f'{__file__}/../../tests/test_2753_out') + doc_before.save(f'{path}_before.pdf') + doc_after.save(f'{path}_after.pdf') + assert len(doc_before) == 2 + assert len(doc_after) == 2 + +# codespell:ignore-begin +springer_html = ''' +
+''' +#codespell:ignore-end + +def test_fit_springer(): + + if not hasattr(pymupdf, 'mupdf'): + print(f'test_fit_springer(): not running on classic.') + return + + verbose = 0 + story = pymupdf.Story(springer_html) + + def check(call, expected): + ''' + Checks that eval(call) returned parameter=expected. Also creates PDF + using path that contains `call` in its leafname, + ''' + fit_result = eval(call) + + print(f'test_fit_springer(): {call=} => {fit_result=}.') + if expected is None: + assert not fit_result.big_enough + else: + document = story.write_with_links(lambda rectnum, filled: (fit_result.rect, fit_result.rect, None)) + path = os.path.abspath(f'{__file__}/../../tests/test_fit_springer_{call}_{fit_result.parameter=}_{fit_result.rect=}.pdf') + document.save(path) + print(f'Have saved document to {path}.') + assert abs(fit_result.parameter-expected) < 0.001, f'{expected=} {fit_result.parameter=}' + + check(f'story.fit_scale(pymupdf.Rect(0, 0, 200, 200), scale_min=1, verbose={verbose})', 3.685728073120117) + check(f'story.fit_scale(pymupdf.Rect(0, 0, 595, 842), scale_min=1, verbose={verbose})', 1.0174560546875) + check(f'story.fit_scale(pymupdf.Rect(0, 0, 300, 421), scale_min=1, verbose={verbose})', 2.02752685546875) + check(f'story.fit_scale(pymupdf.Rect(0, 0, 600, 900), scale_min=1, scale_max=1, verbose={verbose})', 1) + + check(f'story.fit_height(20, verbose={verbose})', 10782.3291015625) + check(f'story.fit_height(200, verbose={verbose})', 2437.4990234375) + check(f'story.fit_height(2000, verbose={verbose})', 450.2998046875) + check(f'story.fit_height(5000, verbose={verbose})', 378.2998046875) + check(f'story.fit_height(5500, verbose={verbose})', 378.2998046875) + + check(f'story.fit_width(3000, verbose={verbose})', 167.30859375) + check(f'story.fit_width(2000, verbose={verbose})', 239.595703125) + check(f'story.fit_width(1000, verbose={verbose})', 510.85546875) + check(f'story.fit_width(500, verbose={verbose})', 1622.1272945404053) + check(f'story.fit_width(400, verbose={verbose})', 2837.507724761963) + check(f'story.fit_width(300, width_max=200000, verbose={verbose})', None) + check(f'story.fit_width(200, width_max=200000, verbose={verbose})', None) + + # Run without verbose to check no calls to log() - checked by assert. + check('story.fit_scale(pymupdf.Rect(0, 0, 600, 900), scale_min=1, scale_max=1, verbose=0)', 1) + check('story.fit_scale(pymupdf.Rect(0, 0, 300, 421), scale_min=1, verbose=0)', 2.02752685546875) + + +def test_write_stabilized_with_links(): + + def rectfn(rect_num, filled): + ''' + We return one rect per page. + ''' + rect = pymupdf.Rect(10, 20, 290, 380) + mediabox = pymupdf.Rect(0, 0, 300, 400) + #print(f'rectfn(): rect_num={rect_num} filled={filled}') + return mediabox, rect, None + + def contentfn(positions): + ret = '' + ret += textwrap.dedent(''' + + +

Contents

+
    + ''') + for position in positions: + if position.heading and (position.open_close & 1): + text = position.text if position.text else '' + if position.id: + ret += f'
  • {text}' + else: + ret += f'
  • {text}' + ret += f' page={position.page_num}\n' + ret += '
\n' + ret += textwrap.dedent(f''' +

First section

+

Contents of first section. +

+ +

Second section

+

Contents of second section. +

Second section first subsection

+ +

Contents of second section first subsection. +

IDTEST + +

Third section

+

Contents of third section. +

NAMETEST. + + + ''') + return ret.strip() + + document = pymupdf.Story.write_stabilized_with_links(contentfn, rectfn) + + # Check links. + links = list() + for page in document: + links += page.get_links() + print(f'{len(links)=}.') + external_links = dict() + for i, link in enumerate(links): + print(f' {i}: {link=}') + if link.get('kind') == pymupdf.LINK_URI: + uri = link['uri'] + external_links.setdefault(uri, 0) + external_links[uri] += 1 + + # Check there is one external link. + print(f'{external_links=}') + if hasattr(pymupdf, 'mupdf'): + assert len(external_links) == 1 + assert 'https://artifex.com/' in external_links + + out_path = __file__.replace('.py', '.pdf') + document.save(out_path) + +def test_archive_creation(): + s = pymupdf.Story(archive=pymupdf.Archive('.')) + s = pymupdf.Story(archive='.') + + +def test_3813(): + import pymupdf + + HTML = """ +

Count is fine:

+
    +
  1. Lorem +
      +
    1. Sub Lorem
    2. +
    3. Sub Lorem
    4. +
    +
  2. +
  3. Lorem
  4. +
  5. Lorem
  6. +
+ +

Broken count:

+
    +
  1. Lorem +
      +
    • Sub Lorem
    • +
    • Sub Lorem
    • +
    +
  2. +
  3. Lorem
  4. +
  5. Lorem
  6. +
+ """ + MEDIABOX = pymupdf.paper_rect("A4") + WHERE = MEDIABOX + (36, 36, -36, -36) + + story = pymupdf.Story(html=HTML) + path = os.path.normpath(f'{__file__}/../../tests/test_3813_out.pdf') + writer = pymupdf.DocumentWriter(path) + + more = 1 + + while more: + device = writer.begin_page(MEDIABOX) + more, _ = story.place(WHERE) + story.draw(device) + writer.end_page() + + writer.close() + + with pymupdf.open(path) as document: + page = document[0] + text = page.get_text() + text_utf8 = text.encode() + + text_expected_utf8 = b'Count is \xef\xac\x81ne:\n1. Lorem\n1. Sub Lorem\n2. Sub Lorem\n2. Lorem\n3. Lorem\nBroken count:\n1. Lorem\n\xe2\x80\xa2 Sub Lorem\n\xe2\x80\xa2 Sub Lorem\n2. Lorem\n3. Lorem\n' + text_expected = text_expected_utf8.decode() + + print(f'text_utf8:\n {text_utf8!r}') + print(f'text_expected_utf8:\n {text_expected_utf8!r}') + print(f'text:\n {textwrap.indent(text, " ")}') + print(f'text_expected:\n {textwrap.indent(text_expected, " ")}') + + assert text == text_expected diff --git a/tests/test_tables.py b/tests/test_tables.py new file mode 100644 index 000000000..d2f4c0967 --- /dev/null +++ b/tests/test_tables.py @@ -0,0 +1,465 @@ +import os +import io +from pprint import pprint +import textwrap +import pickle +import platform + +import pymupdf + +scriptdir = os.path.abspath(os.path.dirname(__file__)) +filename = os.path.join(scriptdir, "resources", "chinese-tables.pdf") +pickle_file = os.path.join(scriptdir, "resources", "chinese-tables.pickle") + + +def test_table1(): + """Compare pickled tables with those of the current run.""" + pickle_in = open(pickle_file, "rb") + doc = pymupdf.open(filename) + page = doc[0] + tabs = page.find_tables() + cells = tabs[0].cells + tabs[1].cells # all table cell tuples on page + extracts = [tabs[0].extract(), tabs[1].extract()] # all table cell content + old_data = pickle.load(pickle_in) # previously saved data + + # Compare cell contents + assert old_data["extracts"] == extracts # same cell contents + + # Compare cell coordinates. + # Cell rectangles may get somewhat larger due to more cautious border + # computations, but any differences must be small. + old_cells = old_data["cells"][0] + old_data["cells"][1] + assert len(cells) == len(old_cells) + for i in range(len(cells)): + c1 = pymupdf.Rect(cells[i]) # new cell coordinates + c0 = pymupdf.Rect(old_cells[i]) # old cell coordinates + assert c0 in c1 # always: old contained in new + assert abs(c1 - c0) < 0.2 # difference must be small + + +def test_table2(): + """Confirm header properties.""" + doc = pymupdf.open(filename) + page = doc[0] + tab1, tab2 = page.find_tables().tables + # both tables contain their header data + assert tab1.header.external == False + assert tab1.header.cells == tab1.rows[0].cells + assert tab2.header.external == False + assert tab2.header.cells == tab2.rows[0].cells + + +def test_2812(): + """Ensure table detection and extraction independent from page rotation. + + Make 4 pages with rotations 0, 90, 180 and 270 degrees respectively. + Each page shows the same 8x5 table. + We will check that each table is detected and delivers the same content. + """ + doc = pymupdf.open() + # Page 0: rotation 0 + page = doc.new_page(width=842, height=595) + rect = page.rect + (72, 72, -72, -72) + cols = 5 + rows = 8 + # define the cells, draw the grid and insert unique text in each cell. + cells = pymupdf.make_table(rect, rows=rows, cols=cols) + for i in range(rows): + for j in range(cols): + page.draw_rect(cells[i][j]) + for i in range(rows): + for j in range(cols): + page.insert_textbox( + cells[i][j], + f"cell[{i}][{j}]", + align=pymupdf.TEXT_ALIGN_CENTER, + ) + page.clean_contents() + + # Page 1: rotation 90 degrees + page = doc.new_page() + rect = page.rect + (72, 72, -72, -72) + cols = 8 + rows = 5 + cells = pymupdf.make_table(rect, rows=rows, cols=cols) + for i in range(rows): + for j in range(cols): + page.draw_rect(cells[i][j]) + for i in range(rows): + for j in range(cols): + page.insert_textbox( + cells[i][j], + f"cell[{j}][{rows-i-1}]", + rotate=90, + align=pymupdf.TEXT_ALIGN_CENTER, + ) + page.set_rotation(90) + page.clean_contents() + + # Page 2: rotation 180 degrees + page = doc.new_page(width=842, height=595) + rect = page.rect + (72, 72, -72, -72) + cols = 5 + rows = 8 + cells = pymupdf.make_table(rect, rows=rows, cols=cols) + for i in range(rows): + for j in range(cols): + page.draw_rect(cells[i][j]) + for i in range(rows): + for j in range(cols): + page.insert_textbox( + cells[i][j], + f"cell[{rows-i-1}][{cols-j-1}]", + rotate=180, + align=pymupdf.TEXT_ALIGN_CENTER, + ) + page.set_rotation(180) + page.clean_contents() + + # Page 3: rotation 270 degrees + page = doc.new_page() + rect = page.rect + (72, 72, -72, -72) + cols = 8 + rows = 5 + cells = pymupdf.make_table(rect, rows=rows, cols=cols) + for i in range(rows): + for j in range(cols): + page.draw_rect(cells[i][j]) + for i in range(rows): + for j in range(cols): + page.insert_textbox( + cells[i][j], + f"cell[{cols-j-1}][{i}]", + rotate=270, + align=pymupdf.TEXT_ALIGN_CENTER, + ) + page.set_rotation(270) + page.clean_contents() + + pdfdata = doc.tobytes() + # doc.ez_save("test-2812.pdf") + doc.close() + + # ------------------------------------------------------------------------- + # Test PDF prepared. Extract table on each page and + # ensure identical extracted table data. + # ------------------------------------------------------------------------- + doc = pymupdf.open("pdf", pdfdata) + extracts = [] + for page in doc: + tabs = page.find_tables() + assert len(tabs.tables) == 1 + tab = tabs[0] + fp = io.StringIO() + pprint(tab.extract(), stream=fp) + extracts.append(fp.getvalue()) + fp = None + assert tab.row_count == 8 + assert tab.col_count == 5 + e0 = extracts[0] + for e in extracts[1:]: + assert e == e0 + + +def test_2979(): + """This tests fix #2979 and #3001. + + 2979: identical cell count for each row + 3001: no change of global glyph heights + """ + filename = os.path.join(scriptdir, "resources", "test_2979.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tab = page.find_tables()[0] # extract the table + lengths = set() # stores all row cell counts + for e in tab.extract(): + lengths.add(len(e)) # store number of cells for row + + # test 2979 + assert len(lengths) == 1 + + # test 3001 + assert ( + pymupdf.TOOLS.set_small_glyph_heights() is False + ), f"{pymupdf.TOOLS.set_small_glyph_heights()=}" + + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26, 8): + assert ( + wt + == "bogus font ascent/descent values (3117 / -2463)\n... repeated 2 times...\nActualtext with no position. Text may be lost or mispositioned.\n... repeated 96 times..." + ) + elif pymupdf.mupdf_version_tuple >= (1, 26, 0): + assert ( + wt + == "bogus font ascent/descent values (3117 / -2463)\n... repeated 2 times..." + ) + else: + assert not wt + + +def test_3062(): + """Tests the fix for #3062. + After table extraction, a rotated page should behave and look + like as before.""" + if platform.python_implementation() == 'GraalVM': + print(f'test_3062(): Not running because slow on GraalVM.') + return + + filename = os.path.join(scriptdir, "resources", "test_3062.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tab0 = page.find_tables()[0] + cells0 = tab0.cells + + page = None + page = doc[0] + tab1 = page.find_tables()[0] + cells1 = tab1.cells + assert cells1 == cells0 + + +def test_strict_lines(): + """Confirm that ignoring borderless rectangles improves table detection.""" + filename = os.path.join(scriptdir, "resources", "strict-yes-no.pdf") + doc = pymupdf.open(filename) + page = doc[0] + + tab1 = page.find_tables()[0] + tab2 = page.find_tables(strategy="lines_strict")[0] + assert tab2.row_count < tab1.row_count + assert tab2.col_count < tab1.col_count + + +def test_add_lines(): + """Test new parameter add_lines for table recognition.""" + if platform.python_implementation() == 'GraalVM': + print(f'test_add_lines(): Not running because breaks later tests on GraalVM.') + return + + filename = os.path.join(scriptdir, "resources", "small-table.pdf") + doc = pymupdf.open(filename) + page = doc[0] + assert page.find_tables().tables == [] + + more_lines = [ + ((238.9949951171875, 200.0), (238.9949951171875, 300.0)), + ((334.5559997558594, 200.0), (334.5559997558594, 300.0)), + ((433.1809997558594, 200.0), (433.1809997558594, 300.0)), + ] + + # these 3 additional vertical lines should additional 3 columns + tab2 = page.find_tables(add_lines=more_lines)[0] + assert tab2.col_count == 4 + assert tab2.row_count == 5 + + +def test_3148(): + """Ensure correct extraction text of rotated text.""" + doc = pymupdf.open() + page = doc.new_page() + rect = pymupdf.Rect(100, 100, 300, 300) + text = ( + "rotation 0 degrees", + "rotation 90 degrees", + "rotation 180 degrees", + "rotation 270 degrees", + ) + degrees = (0, 90, 180, 270) + delta = (2, 2, -2, -2) + cells = pymupdf.make_table(rect, cols=3, rows=4) + for i in range(3): + for j in range(4): + page.draw_rect(cells[j][i]) + k = (i + j) % 4 + page.insert_textbox(cells[j][i] + delta, text[k], rotate=degrees[k]) + # doc.save("multi-degree.pdf") + tabs = page.find_tables() + tab = tabs[0] + for extract in tab.extract(): + for item in extract: + item = item.replace("\n", " ") + assert item in text + + +def test_3179(): + """Test correct separation of multiple tables on page.""" + filename = os.path.join(scriptdir, "resources", "test_3179.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tabs = page.find_tables() + assert len(tabs.tables) == 3 + + +def test_battery_file(): + """Tests correctly ignoring non-table suspects. + + Earlier versions erroneously tried to identify table headers + where there existed no table at all. + """ + filename = os.path.join(scriptdir, "resources", "battery-file-22.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tabs = page.find_tables() + assert len(tabs.tables) == 0 + + +def test_markdown(): + """Confirm correct markdown output.""" + filename = os.path.join(scriptdir, "resources", "strict-yes-no.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tab = page.find_tables(strategy="lines_strict")[0] + if pymupdf.mupdf_version_tuple < (1, 26, 3): + md_expected = textwrap.dedent(''' + |Header1|Header2|Header3| + |---|---|---| + |Col11
Col12|~~Col21~~
~~Col22~~|Col31
Col32
Col33| + |Col13|~~Col23~~|Col34
Col35| + |Col14|~~Col24~~|Col36| + |Col15|~~Col25~~
~~Col26~~|| + + ''').lstrip() + else: + md_expected = ( + "|Header1|Header2|Header3|\n" + "|---|---|---|\n" + "|Col11
Col12|Col21
Col22|Col31
Col32
Col33|\n" + "|Col13|Col23|Col34
Col35|\n" + "|Col14|Col24|Col36|\n" + "|Col15|Col25
Col26||\n\n" + ) + + + md = tab.to_markdown() + assert md == md_expected, f'Incorrect md:\n{textwrap.indent(md, " ")}' + + +def test_paths_param(): + """Confirm acceptance of supplied vector graphics list.""" + filename = os.path.join(scriptdir, "resources", "strict-yes-no.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tabs = page.find_tables(paths=[]) # will cause all tables are missed + assert tabs.tables == [] + + +def test_boxes_param(): + """Confirm acceptance of supplied boxes list.""" + filename = os.path.join(scriptdir, "resources", "small-table.pdf") + doc = pymupdf.open(filename) + page = doc[0] + paths = page.get_drawings() + box0 = page.cluster_drawings(drawings=paths)[0] + boxes = [box0] + words = page.get_text("words") + x_vals = [w[0] - 5 for w in words if w[4] in ("min", "max", "avg")] + for x in x_vals: + r = +box0 + r.x1 = x + boxes.append(r) + + y_vals = sorted(set([round(w[3]) for w in words])) + for y in y_vals[:-1]: # skip last one to avoid empty row + r = +box0 + r.y1 = y + boxes.append(r) + + tabs = page.find_tables(paths=[], add_boxes=boxes) + tab = tabs.tables[0] + assert tab.extract() == [ + ["Boiling Points °C", "min", "max", "avg"], + ["Noble gases", "-269", "-62", "-170.5"], + ["Nonmetals", "-253", "4827", "414.1"], + ["Metalloids", "335", "3900", "741.5"], + ["Metals", "357", ">5000", "2755.9"], + ] + + +def test_dotted_grid(): + """Confirm dotted lines are detected as gridlines.""" + filename = os.path.join(scriptdir, "resources", "dotted-gridlines.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tabs = page.find_tables() + assert len(tabs.tables) == 3 # must be 3 tables + t0, t1, t2 = tabs # extract them + # check that they have expected dimensions + assert t0.row_count, t0.col_count == (11, 12) + assert t1.row_count, t1.col_count == (25, 11) + assert t2.row_count, t2.col_count == (1, 10) + + +def test_4017(): + path = os.path.normpath(f"{__file__}/../../tests/resources/test_4017.pdf") + with pymupdf.open(path) as document: + page = document[0] + + tables = page.find_tables(add_lines=None) + print(f"{len(tables.tables)=}.") + tables_text = list() + for i, table in enumerate(tables): + print(f"## {i=}.") + t = table.extract() + for tt in t: + print(f" {tt}") + + # 2024-11-29: expect current incorrect output for last two tables. + + expected_a = [ + ["Class A/B Overcollateralization", "131.44%", ">=", "122.60%", "", "PASS"], + [None, None, None, None, None, "PASS"], + ["Class D Overcollateralization", "112.24%", ">=", "106.40%", "", "PASS"], + [None, None, None, None, None, "PASS"], + ["Event of Default", "156.08%", ">=", "102.50%", "", "PASS"], + [None, None, None, None, None, "PASS"], + ["Class A/B Interest Coverage", "N/A", ">=", "120.00%", "", "N/A"], + [None, None, None, None, None, "N/A"], + ["Class D Interest Coverage", "N/A", ">=", "105.00%", "", "N/A"], + ] + assert tables[-2].extract() == expected_a + + expected_b = [ + [ + "Moody's Maximum Rating Factor Test", + "2,577", + "<=", + "3,250", + "", + "PASS", + "2,581", + ], + [None, None, None, None, None, "PASS", None], + [ + "Minimum Floating Spread", + "3.5006%", + ">=", + "2.0000%", + "", + "PASS", + "3.4871%", + ], + [None, None, None, None, None, "PASS", None], + [ + "Minimum Weighted Average S&P Recovery\nRate Test", + "40.50%", + ">=", + "40.00%", + "", + "PASS", + "40.40%", + ], + [None, None, None, None, None, "PASS", None], + ["Weighted Average Life", "4.83", "<=", "9.00", "", "PASS", "4.92"], + ] + assert tables[-1].extract() == expected_b + + +def test_md_styles(): + """Test output of table with MD-styled cells.""" + filename = os.path.join(scriptdir, "resources", "test-styled-table.pdf") + doc = pymupdf.open(filename) + page = doc[0] + tabs = page.find_tables()[0] + text = """|Column 1|Column 2|Column 3|\n|---|---|---|\n|Zelle (0,0)|**Bold (0,1)**|Zelle (0,2)|\n|~~Strikeout (1,0), Zeile 1~~
~~Hier kommt Zeile 2.~~|Zelle (1,1)|~~Strikeout (1,2)~~|\n|**`Bold-monospaced`**
**`(2,0)`**|_Italic (2,1)_|**_Bold-italic_**
**_(2,2)_**|\n|Zelle (3,0)|~~**Bold-strikeout**~~
~~**(3,1)**~~|Zelle (3,2)|\n\n""" + assert tabs.to_markdown() == text diff --git a/tests/test_tesseract.py b/tests/test_tesseract.py new file mode 100644 index 000000000..7650e8381 --- /dev/null +++ b/tests/test_tesseract.py @@ -0,0 +1,164 @@ +import os +import platform +import textwrap + +import pymupdf + +def test_tesseract(): + ''' + This checks that MuPDF has been built with tesseract support. + + By default we don't supply a valid `tessdata` directory, and just assert + that attempting to use Tesseract raises the expected error (which checks + that MuPDF is built with Tesseract support). + + But if TESSDATA_PREFIX is set in the environment, we assert that + FzPage.get_textpage_ocr() succeeds. + ''' + path = os.path.abspath( f'{__file__}/../resources/2.pdf') + doc = pymupdf.open( path) + page = doc[5] + if hasattr(pymupdf, 'mupdf'): + # rebased. + if pymupdf.mupdf_version_tuple < (1, 25, 4): + tail = 'OCR initialisation failed' + else: + tail = 'Tesseract language initialisation failed' + if os.environ.get('PYODIDE_ROOT'): + e_expected = 'code=6: No OCR support in this build' + e_expected_type = pymupdf.mupdf.FzErrorUnsupported + else: + e_expected = f'code=3: {tail}' + if platform.system() == 'OpenBSD': + # 2023-12-12: For some reason the SWIG catch code only catches + # the exception as FzErrorBase. + e_expected_type = pymupdf.mupdf.FzErrorBase + print(f'OpenBSD workaround - expecting FzErrorBase, not FzErrorLibrary.') + else: + e_expected_type = pymupdf.mupdf.FzErrorLibrary + else: + # classic. + e_expected = 'OCR initialisation failed' + e_expected_type = None + tessdata_prefix = os.environ.get('TESSDATA_PREFIX') + if tessdata_prefix: + tp = page.get_textpage_ocr(full=True) + print(f'test_tesseract(): page.get_textpage_ocr() succeeded') + else: + try: + tp = page.get_textpage_ocr(full=True, tessdata='/foo/bar') + except Exception as e: + e_text = str(e) + print(f'Received exception as expected.') + print(f'{type(e)=}') + print(f'{e_text=}') + assert e_text == e_expected, f'Unexpected exception: {e_text!r}' + if e_expected_type: + print(f'{e_expected_type=}') + assert type(e) == e_expected_type, f'{type(e)=} != {e_expected_type=}.' + else: + assert 0, f'Expected exception {e_expected!r}' + rebased = hasattr(pymupdf, 'mupdf') + if rebased: + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple < (1, 25, 4): + assert wt == ( + 'UNHANDLED EXCEPTION!\n' + 'library error: Tesseract initialisation failed' + ) + else: + assert not wt + + +def test_3842b(): + # Check Tesseract failure when given a bogus languages. + # + # Note that Tesseract seems to output its own diagnostics. + # + if os.environ.get('PYODIDE_ROOT'): + print('test_3842b(): not running on Pyodide - cannot run child processes.') + return + + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') + with pymupdf.open(path) as document: + page = document[6] + try: + partial_tp = page.get_textpage_ocr(flags=0, full=False, language='qwerty') + except Exception as e: + print(f'test_3842b(): received exception: {e}') + if 'No tessdata specified and Tesseract is not installed' in str(e): + pass + else: + if pymupdf.mupdf_version_tuple < (1, 25, 4): + assert 'OCR initialisation failed' in str(e) + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'UNHANDLED EXCEPTION!\nlibrary error: Tesseract initialisation failed\nUNHANDLED EXCEPTION!\nlibrary error: Tesseract initialisation failed', \ + f'Unexpected {wt=}' + else: + assert 'Tesseract language initialisation failed' in str(e) + + +def test_3842(): + if os.environ.get('PYODIDE_ROOT'): + print('test_3842(): not running on Pyodide - cannot run child processes.') + return + + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') + with pymupdf.open(path) as document: + page = document[6] + try: + partial_tp = page.get_textpage_ocr(flags=0, full=False) + except Exception as e: + print(f'test_3842(): received exception: {e}', flush=1) + if 'No tessdata specified and Tesseract is not installed' in str(e): + pass + elif 'Tesseract language initialisation failed' in str(e): + pass + else: + assert 0, f'Unexpected exception text: {str(e)=}' + else: + text = page.get_text(textpage=partial_tp) + print() + print(text) + print(f'text:\n{text!r}') + + # 2024-11-29: This is the current incorrect output. We use + # underscores for lines containing entirely whitespace (which + # textwrap.dedent() unfortunately replaces with empty lines). + text_expected = textwrap.dedent(''' + NIST SP 800-223 + _ + High-Performance Computing Security + February 2024 + _ + __ + iii + Table of Contents + 1. Introduction ...................................................................................................................................1 + 2. HPC System Reference Architecture and Main Components ............................................................2 + 2.1.1. Components of the High-Performance Computing Zone ............................................................. 3 + 2.1.2. Components of the Data Storage Zone ........................................................................................ 4 + 2.1.3. Parallel File System ....................................................................................................................... 4 + 2.1.4. Archival and Campaign Storage .................................................................................................... 5 + 2.1.5. Burst Buffer .................................................................................................................................. 5 + 2.1.6. Components of the Access Zone .................................................................................................. 6 + 2.1.7. Components of the Management Zone ....................................................................................... 6 + 2.1.8. General Architecture and Characteristics .................................................................................... 6 + 2.1.9. Basic Services ................................................................................................................................ 7 + 2.1.10. Configuration Management ....................................................................................................... 7 + 2.1.11. HPC Scheduler and Workflow Management .............................................................................. 7 + 2.1.12. HPC Software .............................................................................................................................. 8 + 2.1.13. User Software ............................................................................................................................. 8 + 2.1.14. Site-Provided Software and Vendor Software ........................................................................... 8 + 2.1.15. Containerized Software in HPC .................................................................................................. 9 + 3. HPC Threat Analysis...................................................................................................................... 10 + 3.2.1. Access Zone Threats ................................................................................................................... 11 + 3.2.2. Management Zone Threats ........................................................................................................ 11 + 3.2.3. High-Performance Computing Zone Threats .............................................................................. 12 + 3.2.4. Data Storage Zone Threats ......................................................................................................... 12 + 4. HPC Security Posture, Challenges, and Recommendations ............................................................. 14 + 5. Conclusions .................................................................................................................................. 19 + ''', + )[1:].replace('_', ' ') + print(f'text_expected:\n{text_expected!r}') + assert text == text_expected diff --git a/tests/test_textbox.py b/tests/test_textbox.py index b08615efe..21b253cf0 100644 --- a/tests/test_textbox.py +++ b/tests/test_textbox.py @@ -5,8 +5,14 @@ Check text is indeed contained in given rectangle. """ -import fitz +import pymupdf +import gentle_compare + +import os +import textwrap + +# codespell:ignore-begin text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca. Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner. @@ -14,22 +20,22 @@ Kleine Schwertwale bilden Schulen von durchschnittlich zehn bis fünfzig Tieren, wobei sie sich auch mit anderen Delfinen vergesellschaften und sich meistens abseits der Küsten aufhalten. Sie sind in allen Ozeanen gemäßigter, subtropischer und tropischer Breiten beheimatet, sind jedoch vor allem in wärmeren Jahreszeiten auch bis in die gemäßigte bis subpolare Zone südlich der Südspitze Südamerikas, vor Nordeuropa und bis vor Kanada anzutreffen.""" - +# codespell:ignore-end def test_textbox1(): """Use TextWriter for text insertion.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - rect = fitz.Rect(50, 50, 400, 400) + rect = pymupdf.Rect(50, 50, 400, 400) blue = (0, 0, 1) - tw = fitz.TextWriter(page.rect, color=blue) + tw = pymupdf.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, - align=fitz.TEXT_ALIGN_LEFT, + align=pymupdf.TEXT_ALIGN_LEFT, fontsize=12, ) - tw.write_text(page, morph=(rect.tl, fitz.Matrix(1, 1))) + tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) page.write_text(writers=tw) @@ -37,16 +43,16 @@ def test_textbox1(): def test_textbox2(): """Use basic text insertion.""" - doc = fitz.open() + doc = pymupdf.open() ocg = doc.add_ocg("ocg1") page = doc.new_page() - rect = fitz.Rect(50, 50, 400, 400) - blue = fitz.utils.getColor("lightblue") - red = fitz.utils.getColorHSV("red") + rect = pymupdf.Rect(50, 50, 400, 400) + blue = pymupdf.utils.getColor("lightblue") + red = pymupdf.utils.getColorHSV("red") page.insert_textbox( rect, text, - align=fitz.TEXT_ALIGN_LEFT, + align=pymupdf.TEXT_ALIGN_LEFT, fontsize=12, color=blue, oc=ocg, @@ -57,21 +63,21 @@ def test_textbox2(): def test_textbox3(): """Use TextWriter for text insertion.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - font = fitz.Font("cjk") - rect = fitz.Rect(50, 50, 400, 400) + font = pymupdf.Font("cjk") + rect = pymupdf.Rect(50, 50, 400, 400) blue = (0, 0, 1) - tw = fitz.TextWriter(page.rect, color=blue) + tw = pymupdf.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, - align=fitz.TEXT_ALIGN_LEFT, + align=pymupdf.TEXT_ALIGN_LEFT, font=font, fontsize=12, right_to_left=True, ) - tw.write_text(page, morph=(rect.tl, fitz.Matrix(1, 1))) + tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) doc.scrub() @@ -80,44 +86,284 @@ def test_textbox3(): def test_textbox4(): """Use TextWriter for text insertion.""" - doc = fitz.open() + doc = pymupdf.open() ocg = doc.add_ocg("ocg1") page = doc.new_page() - rect = fitz.Rect(50, 50, 400, 600) + rect = pymupdf.Rect(50, 50, 400, 600) blue = (0, 0, 1) - tw = fitz.TextWriter(page.rect, color=blue) + tw = pymupdf.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, - align=fitz.TEXT_ALIGN_LEFT, + align=pymupdf.TEXT_ALIGN_LEFT, fontsize=12, - font=fitz.Font("cour"), + font=pymupdf.Font("cour"), right_to_left=True, ) - tw.write_text(page, oc=ocg, morph=(rect.tl, fitz.Matrix(1, 1))) + tw.write_text(page, oc=ocg, morph=(rect.tl, pymupdf.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) def test_textbox5(): """Using basic text insertion.""" - fitz.TOOLS.set_small_glyph_heights(True) - doc = fitz.open() + small_glyph_heights0 = pymupdf.TOOLS.set_small_glyph_heights() + pymupdf.TOOLS.set_small_glyph_heights(True) + try: + doc = pymupdf.open() + page = doc.new_page() + r = pymupdf.Rect(100, 100, 150, 150) + text = "words and words and words and more words..." + rc = -1 + fontsize = 12 + page.draw_rect(r) + while rc < 0: + rc = page.insert_textbox( + r, + text, + fontsize=fontsize, + align=pymupdf.TEXT_ALIGN_JUSTIFY, + ) + fontsize -= 0.5 + + blocks = page.get_text("blocks") + bbox = pymupdf.Rect(blocks[0][:4]) + assert bbox in r + finally: + # Must restore small_glyph_heights, otherwise other tests can fail. + pymupdf.TOOLS.set_small_glyph_heights(small_glyph_heights0) + + +def test_2637(): + """Ensure correct calculation of fitting text.""" + doc = pymupdf.open() page = doc.new_page() - r = fitz.Rect(100, 100, 150, 150) - text = "words and words and words and more words..." + text = ( + "The morning sun painted the sky with hues of orange and pink. " + "Birds chirped harmoniously, greeting the new day. " + "Nature awakened, filling the air with life and promise." + ) + rect = pymupdf.Rect(50, 50, 500, 280) + fontsize = 50 rc = -1 - fontsize = 12 - page.draw_rect(r) - while rc < 0: - rc = page.insert_textbox( - r, - text, - fontsize=fontsize, - align=fitz.TEXT_ALIGN_JUSTIFY, - ) - fontsize -= 0.5 + while rc < 0: # look for largest font size that makes the text fit + rc = page.insert_textbox(rect, text, fontname="hebo", fontsize=fontsize) + fontsize -= 1 + # confirm text won't lap outside rect blocks = page.get_text("blocks") - bbox = fitz.Rect(blocks[0][:4]) - assert bbox in r + bbox = pymupdf.Rect(blocks[0][:4]) + assert bbox in rect + + +def test_htmlbox1(): + """Write HTML-styled text into a rect with different rotations. + + The text is styled and contains a link. + Then extract the text again, and + - assert that text was written in the 4 different angles, + - assert that text properties are correct (bold, italic, color), + - assert that the link has been correctly inserted. + + We try to insert into a rectangle that is too small, setting + scale=False and confirming we have a negative return code. + """ + if not hasattr(pymupdf, "mupdf"): + print("'test_htmlbox1' not executed in classic.") + return + + rect = pymupdf.Rect(100, 100, 200, 200) # this only works with scale=True + + base_text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.""" + + text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.""" + + doc = pymupdf.Document() + + for rot in (0, 90, 180, 270): + wdirs = ((1, 0), (0, -1), (-1, 0), (0, 1)) # all writing directions + page = doc.new_page() + spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1) + assert spare_height < 0 + assert scale == 1 + spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0) + page.draw_rect(rect, (1, 0, 0)) + doc.save(os.path.normpath(f'{__file__}/../../tests/test_htmlbox1.pdf')) + assert abs(spare_height - 3.8507) < 0.001 + assert 0 < scale < 1 + page = doc.reload_page(page) + link = page.get_links()[0] # extracts the links on the page + + assert link["uri"] == "https://www.artifex.com" + + # Assert plain text is complete. + # We must remove line breaks and any ligatures for this. + assert base_text == page.get_text(flags=0)[:-1].replace("\n", " ") + + encounters = 0 # counts the words with selected properties + for b in page.get_text("dict")["blocks"]: + for l in b["lines"]: + wdir = l["dir"] # writing direction + assert wdir == wdirs[page.number] + for s in l["spans"]: + stext = s["text"] + color = pymupdf.sRGB_to_pdf(s["color"]) + bold = bool(s["flags"] & 16) + italic = bool(s["flags"] & 2) + if stext in ("ullamco", "laboris", "voluptate"): + encounters += 1 + if stext == "ullamco": + assert bold is True + assert italic is False + assert color == pymupdf.pdfcolor["black"] + elif stext == "laboris": + assert bold is False + assert italic is True + assert color == pymupdf.pdfcolor["black"] + elif stext == "voluptate": + assert bold is True + assert italic is False + assert color == pymupdf.pdfcolor["green"] + else: + assert bold is False + assert italic is False + # all 3 special special words were encountered + assert encounters == 3 + + +def test_htmlbox2(): + """Test insertion without scaling""" + if not hasattr(pymupdf, "mupdf"): + print("'test_htmlbox2' not executed in classic.") + return + + doc = pymupdf.open() + rect = pymupdf.Rect(100, 100, 200, 200) # large enough to hold text + page = doc.new_page() + bottoms = set() + for rot in (0, 90, 180, 270): + spare_height, scale = page.insert_htmlbox( + rect, "Hello, World!", scale_low=1, rotate=rot + ) + assert scale == 1 + assert 0 < spare_height < rect.height + bottoms.add(spare_height) + assert len(bottoms) == 1 # same result for all rotations + + +def test_htmlbox3(): + """Test insertion with opacity""" + if not hasattr(pymupdf, "mupdf"): + print("'test_htmlbox3' not executed in classic.") + return + + rect = pymupdf.Rect(100, 250, 300, 350) + text = """Just some text.""" + doc = pymupdf.open() + page = doc.new_page() + + # insert some text with opacity + page.insert_htmlbox(rect, text, opacity=0.5) + + # lowlevel-extract inserted text to access opacity + span = page.get_texttrace()[0] + assert span["opacity"] == 0.5 + + +def test_3559(): + doc = pymupdf.Document() + page = doc.new_page() + text_insert="""

""" + rect = pymupdf.Rect(100, 100, 200, 200) + page.insert_htmlbox(rect, text_insert) + + +def test_3916(): + doc = pymupdf.open() + rect = pymupdf.Rect(100, 100, 101, 101) # Too small for the text. + page = doc.new_page() + spare_height, scale = page.insert_htmlbox(rect, "Hello, World!", scale_low=0.5) + assert spare_height == -1 + + +def test_4400(): + with pymupdf.open() as document: + page = document.new_page() + writer = pymupdf.TextWriter(page.rect) + text = '111111111' + print(f'Calling writer.fill_textbox().', flush=1) + writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8) + + +def test_4613(): + print() + text = 3 * 'abcdefghijklmnopqrstuvwxyz\nABCDEFGHIJKLMNOPQRSTUVWXYZ\n' + story = pymupdf.Story(text) + rect = pymupdf.Rect(10, 10, 100, 100) + + # Test default operation where we get additional scaling down because of + # the long words in our text. + print(f'test_4613(): ### Testing default operation.') + with pymupdf.open() as doc: + page = doc.new_page() + spare_height, scale = page.insert_htmlbox(rect, story) + print(f'test_4613(): {spare_height=} {scale=}') + # The additional down-scaling from the long word widths results in + # spare vertical space. + page.draw_rect(rect, (1, 0, 0)) + path = os.path.normpath(f'{__file__}/../../tests/test_4613.pdf') + doc.save(path) + + path_pixmap = os.path.normpath(f'{__file__}/../../tests/test_4613.png') + path_pixmap_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4613.png') + pixmap = page.get_pixmap(dpi=300) + pixmap.save(path_pixmap) + + pixmap_diff = gentle_compare.pixmaps_diff(path_pixmap_expected, pixmap) + pixmap_diff.save(os.path.normpath(f'{__file__}/../../tests/test_4613-diff.png')) + + rms = gentle_compare.pixmaps_rms(pixmap, path_pixmap_expected) + print(f'{rms=}') + assert rms == 0, f'{rms=}' + + assert abs(spare_height - 45.7536) < 0.1 + assert abs(scale - 0.4009) < 0.01 + + new_text = page.get_text('text', clip=rect) + print(f'test_4613(): new_text:') + print(textwrap.indent(new_text, ' ')) + assert new_text == text + + # Check with _scale_word_width=False - ignore too-wide words. + print(f'test_4613(): ### Testing with _scale_word_width=False.') + with pymupdf.open() as doc: + page = doc.new_page() + spare_height, scale = page.insert_htmlbox(rect, story, _scale_word_width=False) + print(f'test_4613(): _scale_word_width=False: {spare_height=} {scale=}') + # With _scale_word_width=False we allow long words to extend beyond the + # rect, so we should have spare_height == 0 and only a small amount of + # down-scaling. + assert spare_height == 0 + assert abs(scale - 0.914) < 0.01 + new_text = page.get_text('text', clip=rect) + print(f'test_4613(): new_text:') + print(textwrap.indent(new_text, ' ')) + assert new_text == textwrap.dedent(''' + abcdefghijklmno + ABCDEFGHIJKLM + abcdefghijklmno + ABCDEFGHIJKLM + abcdefghijklmno + ABCDEFGHIJKLM + ''')[1:] + + + # Check that we get no fit if scale_low is not low enough. + print(f'test_4613(): ### Testing with scale_low too high to allow a fit.') + with pymupdf.open() as doc: + page = doc.new_page() + scale_low=0.6 + spare_height, scale = page.insert_htmlbox(rect, story, scale_low=scale_low) + print(f'test_4613(): {scale_low=}: {spare_height=} {scale=}') + assert spare_height == -1 + assert scale == scale_low diff --git a/tests/test_textextract.py b/tests/test_textextract.py index 3f5d6ec46..ddb118681 100644 --- a/tests/test_textextract.py +++ b/tests/test_textextract.py @@ -1,17 +1,24 @@ """ -Exract page text in various formats. +Extract page text in various formats. No checks performed - just contribute to code coverage. """ import os +import platform +import sys +import textwrap -import fitz +import pymupdf -scriptdir = os.path.abspath(os.path.dirname(__file__)) +import gentle_compare + + +pymupdfdir = os.path.abspath(f'{__file__}/../..') +scriptdir = f'{pymupdfdir}/tests' filename = os.path.join(scriptdir, "resources", "symbol-list.pdf") def test_extract1(): - doc = fitz.open(filename) + doc = pymupdf.open(filename) page = doc[0] text = page.get_text("text") blocks = page.get_text("blocks") @@ -23,6 +30,942 @@ def test_extract1(): text = page.get_text("html") text = page.get_text("xhtml") text = page.get_text("xml") - rects = fitz.get_highlight_selection(page, start=page.rect.tl, stop=page.rect.br) - text = fitz.ConversionHeader("xml") - text = fitz.ConversionTrailer("xml") + rects = pymupdf.get_highlight_selection(page, start=page.rect.tl, stop=page.rect.br) + text = pymupdf.ConversionHeader("xml") + text = pymupdf.ConversionTrailer("xml") + +def _test_extract2(): + import sys + import time + path = f'{scriptdir}/../../PyMuPDF-performance/adobe.pdf' + if not os.path.exists(path): + print(f'test_extract2(): not running because does not exist: {path}') + return + doc = pymupdf.open( path) + for opt in ( + 'dict', + 'dict2', + 'text', + 'blocks', + 'words', + 'html', + 'xhtml', + 'xml', + 'json', + 'rawdict', + 'rawjson', + ): + for flags in None, pymupdf.TEXTFLAGS_TEXT: + t0 = time.time() + for page in doc: + page.get_text(opt, flags=flags) + t = time.time() - t0 + print(f't={t:.02f}: opt={opt} flags={flags}') + sys.stdout.flush() + +def _test_extract3(): + import sys + import time + path = f'{scriptdir}/../../PyMuPDF-performance/adobe.pdf' + if not os.path.exists(path): + print(f'test_extract3(): not running because does not exist: {path}') + return + doc = pymupdf.open( path) + t0 = time.time() + for page in doc: + page.get_text('json') + t = time.time() - t0 + print(f't={t}') + sys.stdout.flush() + +def test_extract4(): + ''' + Rebased-specific. + ''' + if not hasattr(pymupdf, 'mupdf'): + return + path = f'{pymupdfdir}/tests/resources/2.pdf' + document = pymupdf.open(path) + page = document[4] + + out = 'test_stext.html' + text = page.get_text('html') + with open(out, 'w') as f: + f.write(text) + print(f'Have written to: {out}') + + out = 'test_extract.html' + writer = pymupdf.mupdf.FzDocumentWriter( + out, + 'html', + pymupdf.mupdf.FzDocumentWriter.OutputType_DOCX, + ) + device = pymupdf.mupdf.fz_begin_page(writer, pymupdf.mupdf.fz_bound_page(page)) + pymupdf.mupdf.fz_run_page(page, device, pymupdf.mupdf.FzMatrix(), pymupdf.mupdf.FzCookie()) + pymupdf.mupdf.fz_end_page(writer) + pymupdf.mupdf.fz_close_document_writer(writer) + print(f'Have written to: {out}') + + def get_text(page, space_guess): + buffer_ = pymupdf.mupdf.FzBuffer( 10) + out = pymupdf.mupdf.FzOutput( buffer_) + writer = pymupdf.mupdf.FzDocumentWriter( + out, + 'text,space-guess={space_guess}', + pymupdf.mupdf.FzDocumentWriter.OutputType_DOCX, + ) + device = pymupdf.mupdf.fz_begin_page(writer, pymupdf.mupdf.fz_bound_page(page)) + pymupdf.mupdf.fz_run_page(page, device, pymupdf.mupdf.FzMatrix(), pymupdf.mupdf.FzCookie()) + pymupdf.mupdf.fz_end_page(writer) + pymupdf.mupdf.fz_close_document_writer(writer) + text = buffer_.fz_buffer_extract() + text = text.decode('utf8') + n = text.count(' ') + print(f'{space_guess=}: {n=}') + return text, n + page = document[4] + text0, n0 = get_text(page, 0) + text1, n1 = get_text(page, 0.5) + text2, n2 = get_text(page, 0.001) + text2, n2 = get_text(page, 0.1) + text2, n2 = get_text(page, 0.3) + text2, n2 = get_text(page, 0.9) + text2, n2 = get_text(page, 5.9) + assert text1 == text0 + +def test_2954(): + ''' + Check handling of unknown unicode characters, issue #2954, fixed in + mupdf-1.23.9 with addition of FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE. + ''' + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2954.pdf') + flags0 = (0 + | pymupdf.TEXT_PRESERVE_WHITESPACE + | pymupdf.TEXT_PRESERVE_LIGATURES + | pymupdf.TEXT_MEDIABOX_CLIP + ) + + document = pymupdf.Document(path) + + expected_good = ( + "IT-204-IP (2021) Page 3 of 5\nNYPA2514 12/06/21\nPartner's share of \n" + " modifications (see instructions)\n20\n State additions\nNumber\n" + "A ' Total amount\nB '\n State allocated amount\n" + "EA '\n20a\nEA '\n20b\nEA '\n20c\nEA '\n20d\nEA '\n20e\nEA '\n20f\n" + "Total addition modifications (total of column A, lines 20a through 20f)\n" + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . \n" + "21\n21\n22\n State subtractions\n" + "Number\nA ' Total amount\nB '\n State allocated amount\n" + "ES '\n22a\nES '\n22b\nES '\n22c\nES '\n22d\nES '\n22e\nES '\n22f\n23\n23\n" + "Total subtraction modifications (total of column A, lines 22a through 22f). . . . . . . . . . . . . . . . . . . . . . . . . . . . \n" + "Additions to itemized deductions\n24\nAmount\n" + "Letter\n" + "24a\n24b\n24c\n24d\n24e\n24f\n" + "Total additions to itemized deductions (add lines 24a through 24f)\n" + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . \n" + "25\n25\n" + "Subtractions from itemized deductions\n" + "26\nLetter\nAmount\n26a\n26b\n26c\n26d\n26e\n26f\n" + "Total subtractions from itemized deductions (add lines 26a through 26f) . . . . . . . . . . . . . . . . . . . . . . . . . . . . \n" + "27\n27\n" + "This line intentionally left blank. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . \n" + "28\n28\n118003213032\n" + ) + + def check_good(text): + ''' + Returns true if `text` is approximately the same as `expected_good`. + + 2024-01-09: MuPDF master and 1.23.x give slightly different 'good' + output, differing in a missing newline. So we compare without newlines. + ''' + return text.replace('\n', '') == expected_good.replace('\n', '') + + n_fffd_good = 0 + n_fffd_bad = 749 + + def get(flags=None): + text = [page.get_text(flags=flags) for page in document] + assert len(text) == 1 + text = text[0] + n_fffd = text.count(chr(0xfffd)) + if 0: + # This print() fails on Windows with UnicodeEncodeError. + print(f'{flags=} {n_fffd=} {text=}') + return text, n_fffd + + text_none, n_fffd_none = get() + text_0, n_fffd_0 = get(flags0) + + text_1, n_fffd_1 = get(flags0 | pymupdf.TEXT_USE_CID_FOR_UNKNOWN_UNICODE) + + assert n_fffd_none == n_fffd_good + assert n_fffd_0 == n_fffd_bad + assert n_fffd_1 == n_fffd_good + + assert check_good(text_none) + assert not check_good(text_0) + assert check_good(text_1) + + +def test_3027(): + path = path = f'{pymupdfdir}/tests/resources/2.pdf' + doc = pymupdf.open(path) + page = doc[0] + textpage = page.get_textpage() + pymupdf.utils.get_text(page=page, option="dict", textpage=textpage)["blocks"] + + +def test_3186(): + + # codespell:ignore-begin + texts_expected = [ + "Assicurazione sulla vita di tipo Unit Linked\nDocumento informativo precontrattuale aggiuntivo\nper i prodotti d\x00investimento assicurativi\n(DIP aggiuntivo IBIP)\nImpresa: AXA MPS Financial DAC \nProdotto: Progetto Protetto New - Global Dividends\nContratto Unit linked (Ramo III)\nData di realizzazione: Aprile 2023\nIl presente documento contiene informazioni aggiuntive e complementari rispetto a quelle presenti nel documento \ncontenente le informazioni chiave per i prodotti di investimento assicurativi (KID) per aiutare il potenziale \ncontraente a capire più nel dettaglio le caratteristiche del prodotto, gli obblighi contrattuali e la situazione \npatrimoniale dell\x00impresa.\nIl Contraente deve prendere visione delle condizioni d\x00assicurazione prima della sottoscrizione del Contratto.\nAXA MPS Financial DAC, Wolfe Tone House, Wolfe Tone Street, Dublin, DO1 HP90, Irlanda; Tel: 00353-1-6439100; \nsito internet: www.axa-mpsfinancial.ie; e-mail: supporto@axa-mpsfinancial.ie;\nAXA MPS Financial DAC, società del Gruppo Assicurativo AXA Italia, iscritta nell\x00Albo delle Imprese di assicurazione \ncon il numero II.00234. \nLa Compagnia mette a disposizione dei clienti i seguenti recapiti per richiedere eventuali informazioni sia in merito alla \nCompagnia sia in relazione al contratto proposto: Tel: 00353-1-6439100; sito internet: www.axa-mpsfinancial.ie; \ne-mail: supporto@axa-mpsfinancial.ie;\nAXA MPS Financial DAC è un\x00impresa di assicurazione di diritto Irlandese, Sede legale 33 Sir John Rogerson's Quay, \nDublino D02 XK09 Irlanda. L\x00Impresa di Assicurazione è stata autorizzata all\x00esercizio dell\x00attività assicurativa con \nprovvedimento n. C33602 emesso dalla Central Bank of Ireland (l\x00Autorità di vigilanza irlandese) in data 14/05/1999 \ned è iscritta in Irlanda presso il Companies Registration Office (registered nr. 293822). \nLa Compagnia opera in Italia esclusivamente in regime di libera prestazione di servizi ai sensi dell\x00art. 24 del D. Lgs. \n07/09/2005, n. 209 e può investire in attivi non consentiti dalla normativa italiana in materia di assicurazione sulla \nvita, ma in conformità con la normativa irlandese di riferimento in quanto soggetta al controllo della Central Bank of \nIreland.\nCon riferimento all\x00ultimo bilancio d\x00esercizio (esercizio 2021) redatto ai sensi dei principi contabili vigenti, il patrimonio \nnetto di AXA MPS Financial DAC ammonta a 139,6 milioni di euro di cui 635 mila euro di capitale sociale interamente \nversato e 138,9 milioni di euro di riserve patrimoniali compreso il risultato di esercizio.\nAl 31 dicembre 2021 il Requisito patrimoniale di solvibilità è pari a 90 milioni di euro (Solvency Capital Requirement, \nSCR). Sulla base delle valutazioni effettuate della Compagnia coerentemente con gli esistenti dettami regolamentari, il \nRequisito patrimoniale minimo al 31 dicembre 2021 ammonta a 40 milioni di euro (Minimum Capital Requirement, \nMCR).\nL'indice di solvibilità di AXA MPS Financial DAC, ovvero l'indice che rappresenta il rapporto tra l'ammontare del margine \ndi solvibilità disponibile e l'ammontare del margine di solvibilità richiesto dalla normativa vigente, e relativo all'ultimo \nbilancio approvato, è pari al 304% (solvency ratio). L'importo dei fondi propri ammissibili a copertura dei requisiti \npatrimoniali è pari a 276 milioni di euro (Eligible Own Funds, EOF).\nPer informazioni patrimoniali sulla società è possibile consultare il sito: www.axa-mpsfinancial.ie/chi-siamo\nSi rinvia alla relazione sulla solvibilità e sulla condizione finanziaria dell\x00impresa (SFCR) disponibile sul sito internet \ndella Compagnia al seguente link www.axa-mpsfinancial.ie/comunicazioni \nAl contratto si applica la legge italiana\nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 1 di 9\n", + "Quali sono le prestazioni?\nIl contratto prevede le seguenti prestazioni:\na)Prestazioni in caso di vita dell'assicurato\nPrestazione in caso di Riscatto Totale e parziale\nA condizione che siano trascorsi almeno 30 giorni dalla Data di Decorrenza (conclusione del Contratto) e fino all\x00ultimo \nGiorno Lavorativo della terzultima settimana precedente la data di scadenza, il Contraente può riscuotere, interamente \no parzialmente, il Valore di Riscatto. In caso di Riscatto totale, la liquidazione del Valore di Riscatto pone fine al \nContratto con effetto dalla data di ricezione della richiesta.\nIl Contraente ha inoltre la facoltà di esercitare parzialmente il diritto di Riscatto, nella misura minima di 500,00 euro, \nda esercitarsi con le stesse modalità previste per il Riscatto totale. In questo caso, il Contratto rimane in vigore per \nl\x00ammontare residuo, a condizione che il Controvalore delle Quote residue del Contratto non sia inferiore a 1.000,00 \neuro.\nb) Prestazione a Scadenza\nAlla data di scadenza, sempre che l\x00Assicurato sia in vita, l\x00Impresa di Assicurazione corrisponderà agli aventi diritto un \nammontare risultante dal Controvalore delle Quote collegate al Contratto alla scadenza, calcolato come prodotto tra il \nValore Unitario della Quota (rilevato in corrispondenza della data di scadenza) e il numero delle Quote attribuite al \nContratto alla medesima data.\nc) Prestazione in corso di Contratto\nPurché l\x00assicurato sia in vita, nel corso della durata del Contratto, il Fondo Interno mira alla corresponsione di due \nPrestazioni Periodiche. Le prestazioni saranno pari all\x00ammontare risultante dalla moltiplicazione tra il numero di Quote \nassegnate al Contratto il primo giorno Lavorativo della settimana successiva alla Data di Riferimento e 2,50% del \nValore Unitario della Quota registrato alla Data di istituzione del Fondo Interno.\nLe prestazioni verranno liquidate entro trenta giorni dalle Date di Riferimento.\nData di Riferimento\n 1° Prestazione Periodica\n24/04/2024\n 2° Prestazione Periodica\n23/04/2025\nLa corresponsione delle Prestazioni Periodiche non è collegata alla performance positiva o ai ricavi incassati dal Fondo \nInterno, pertanto, la corresponsione potrebbe comportare una riduzione del Controvalore delle Quote senza comportare \nalcuna riduzione del numero di Quote assegnate al Contratto.\nd) Prestazione assicurativa principale in caso di decesso dell'Assicurato\nIn caso di decesso dell\x00Assicurato nel corso della durata contrattuale, è previsto il pagamento ai Beneficiari di un \nimporto pari al Controvalore delle Quote attribuite al Contratto, calcolato come prodotto tra il Valore Unitario della \nQuota rilevato alla Data di Valorizzazione della settimana successiva alla data in cui la notifica di decesso \ndell\x00Assicurato perviene all\x00Impresa di Assicurazione e il numero delle Quote attribuite al Contratto alla medesima data, \nmaggiorato di una percentuale pari allo 0,1%.\nQualora il capitale così determinato fosse inferiore al Premio pagato, sarà liquidato un ulteriore importo pari alla \ndifferenza tra il Premio pagato, al netto della parte di Premio riferita a eventuali Riscatti parziali e l\x00importo caso morte \ncome sopra determinato. Tale importo non potrà essere in ogni caso superiore al 5% del Premio pagato.\nOpzioni contrattuali\nIl Contratto non prevede opzioni contrattuali.\nFondi Assicurativi\nLe prestazioni di cui sopra sono collegate, in base all\x00allocazione del premio come descritto alla sezione \x01Quando e \ncome devo pagare?\x02, al valore delle quote del Fondo Interno denominato PP27 Global Dividends.\nil Fondo interno mira al raggiungimento di un Obiettivo di Protezione del Valore Unitario di Quota, tramite il \nconseguimento di un Valore Unitario di Quota a scadenza almeno pari al 100% del valore di quota registrato alla Data \ndi istituzione dal Fondo Interno.\nIl regolamento di gestione del Fondo Interno è disponibile sul sito dell\x00Impresa di Assicurazione \nwww.axa-mpsfinancial.ie dove puo essere acquisito su supporto duraturo.\nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 2 di 9\n", + 'Che cosa NON è assicurato\nRischi esclusi\nIl rischio di decesso dell\x00Assicurato è coperto qualunque sia la causa, senza limiti territoriali e senza \ntenere conto dei cambiamenti di professione dell\x00Assicurato, ad eccezione dei seguenti casi:\n\x03 il decesso, entro i primi sette anni dalla data di decorrenza del Contratto, dovuto alla sindrome da \nimmunodeficienza acquisita (AIDS) o ad altra patologia ad essa associata;\n\x03 dolo del Contraente o del Beneficiario;\n\x03 partecipazione attiva dell\x00Assicurato a delitti dolosi;\n\x03 partecipazione dell\x00Assicurato a fatti di guerra, salvo che non derivi da obblighi verso lo Stato \nItaliano: in questo caso la garanzia può essere prestata su richiesta del Contraente, alle condizioni \nstabilite dal competente Ministero;\n\x03 incidente di volo, se l\x00Assicurato viaggia a bordo di un aeromobile non autorizzato al volo o con \npilota non titolare di brevetto idoneo e, in ogni caso, se viaggia in qualità di membro \ndell\x00equipaggio;\n\x03 suicidio, se avviene nei primi due anni dalla Data di Decorrenza del Contratto\nCi sono limiti di copertura?\nNon vi sono ulteriori informazioni rispetto al contenuto del KID.\nChe obblighi ho? Quali obblighi ha l\x00Impresa?\nCosa fare in caso \ndi evento?\nDenuncia\nCon riferimento alla liquidazione delle prestazioni dedotte in Contratto, il Contraente o, se del caso, \nil Beneficiario e il Referente Terzo, sono tenuti a recarsi presso la sede dell\x00intermediario presso il \nquale il Contratto è stato sottoscritto ovvero a inviare preventivamente, a mezzo di lettera \nraccomandata con avviso di ricevimento al seguente recapito:\n\x03 AXA MPS Financial DAC\n\x03 Wolfe Tone House, Wolfe Tone Street,\n\x03 Dublin, DO1 HP90 - Ireland\n\x03 Numero Verde: 800.231.187\n\x03 email: supporto@axa-mpsfinancial.ie\ni documenti di seguito elencati per ciascuna prestazione, al fine di consentire all\x00Impresa di \nAssicurazione di verificare l\x00effettiva esistenza dell\x00obbligo di pagamento.\nin caso di Riscatto totale, il Contraente deve inviare all\x00Impresa di Assicurazione:\n\x04 la richiesta di Riscatto totale firmata dal Contraente, indicando il conto corrente su cui il \npagamento deve essere effettuato. Nel caso il conto corrente sia intestato a persona diversa dal \nContraente o dai beneficiari o sia cointestato, il Contraente deve fornire anche I documenti del \ncointestatario e specificare la relazione con il terzo il cui conto viene indicato.\n\x04 copia di un valido documento di identità del Contraente o di un documento attestante i poteri di \nlegale rappresentante, nel caso in cui il Contraente sia una persona giuridica;\nin caso di Riscatto parziale, il Contraente deve inviare all\x00Impresa di Assicurazione:\n\x04 la richiesta di Riscatto parziale firmata dal Contraente, contenente l\x00indicazione dei Fondi \nInterni/OICR che intende riscattare e il relativo ammontare non ché l\x00indicazione del conto corrente \nbancario sul quale effettuare il pagamento;\n\x04 copia di un valido documento di identità del Contraente, o di un documento attestante i poteri di \nlegale rappresentante, nel caso in cui il Contraente sia una persona giuridica.\nIn caso di richiesta di Riscatto totale o parziale non corredata dalla sopra elencata documentazione, \nl\x00Impresa di Assicurazione effettuerà il disinvestimento delle Quote collegate al Contratto alla data \ndi ricezione della relativa richiesta. L\x00Impresa di Assicurazione provvederà tuttavia alla liquidazione \ndelle somme unicamente al momento di ricezione della documentazione mancante, prive degli \neventuali interessi che dovessero maturare;\nIn caso di decesso dell\x00Assicurato, il Beneficiario/i o il Referente Terzo deve inviare all\x00Impresa di \nAssicurazione:\nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 3 di 9\n', + '\x04 la richiesta di pagamento sottoscritta da tutti i Beneficiari, con l\x00indicazione del conto corrente \nbancario sul quale effettuare il pagamento; Nel caso il conto corrente sia intestato a persona \ndiversa dal Contraente o dai beneficiari o sia cointestato, il Contraente deve fornire anche I \ndocumenti del cointestatario e specificare la relazione con il terzo il cui conto viene indicato.\n\x04 copia di un valido documento d\x00identità dei Beneficiari o di un documento attestante i poteri di \nlegale rappresentante, nel caso in cui il Beneficiario sia una persona giuridica;\n\x04 il certificato di morte dell\x00Assicurato;\n\x04 la relazione medica sulle cause del decesso;\n\x04 copia autenticata del testamento accompagnato da dichiarazione sostitutiva di atto di notorietà \ncon l\x00indicazione (i) della circostanza che il testamento è l\x00ultimo da considerarsi valido e non è \nstato impugnato e (ii) degli eredi testamentari, le relative età e capacità\ndi agire;\n\x04 in assenza di testamento, atto notorio (o dichiarazione sostitutiva di atto di notorietà) attestante \nche il decesso è avvenuto senza lasciare testamento e che non vi sono altri soggetti cui la legge \nriconosce diritti o quote di eredità;\n\x04 decreto del Giudice Tutelare nel caso di Beneficiari di minore età, con l\x00indicazione della persona \ndesignata alla riscossione;\n\x04 copia del Questionario KYC.\nPrescrizione: Alla data di redazione del presente documento, i diritti dei beneficiari dei contratti di \nassicurazione sulla vita si prescrivono nel termine di dieci anni dal giorno in cui si è verificato il fatto \nsu cui il diritto si fonda. Decorso tale termine e senza che la Compagnia abbia ricevuto alcuna \ncomunicazione e/o disposizione, gli importi derivanti dal contratto saranno devoluti al Fondo \ncostitutivo presso il Ministero dell\x00Economia e delle Finanze \x01depositi dormienti\x02.\nErogazione della prestazione\nL\x00Impresa di Assicurazione esegue il pagamento entro trenta giorni dal ricevimento della \ndocumentazione completa all\x00indirizzo sopra indicato.\n \nLe dichiarazioni del Contraente, e dell\x00Assicurato se diverso dal Contraente, devono essere esatte e \nveritiere. In caso di dichiarazioni inesatte o reticenti relative a circostanze tali che l\x00Impresa di \nAssicurazione non avrebbe dato il suo consenso, non lo avrebbe dato alle medesime condizioni se \navesse conosciuto il vero stato delle cose, l\x00Impresa di Assicurazione ha diritto a:\na) in caso di dolo o colpa grave:\n\x04 impugnare il Contratto dichiarando al Contraente di voler esercitare tale diritto entro tre mesi dal \ngiorno in cui ha conosciuto l\x00inesattezza della dichiarazione o le reticenze;\n\x04 trattenere il Premio relativo al periodo di assicurazione in corso al momento dell\x00impugnazione e, \nin ogni caso, il Premio corrispondente al primo anno;\n\x04 restituire, in caso di decesso dell\x00Assicurato, solo il Controvalore delle Quote acquisite al \nmomento del decesso, se l\x00evento si verifica prima che sia decorso il termine dianzi indicato per \nl\x00impugnazione;\nb) ove non sussista dolo o colpa grave:\n\x04 recedere dal Contratto, mediante dichiarazione da farsi al Contraente entro tre mesi dal giorno in \ncui ha conosciuto l\x00inesattezza della dichiarazione o le reticenze;\n\x04 se il decesso si verifica prima che l\x00inesattezza della dichiarazione o la reticenza sia conosciuta \ndall\x00Impresa di Assicurazione, o prima che l\x00Impresa abbia dichiarato di recedere dal Contratto, di \nridurre la somma dovuta in proporzione alla differenza tra il Premio convenuto e quello che sarebbe \nstato applicato se si fosse conosciuto il vero stato delle cose.\nIl Contraente è tenuto a inoltrare per iscritto alla Compagnia (posta ordinaria e mail) eventuali \ncomunicazioni inerenti:\n-modifiche dell\x00indirizzo presso il quale intende ricevere le comunicazioni relative al contratto;\n-variazione della residenza Europea nel corso della durata del contratto, presso altro Paese \nmembro della Unione Europea;\n-variazione degli estremi di conto corrente bancario.\nIn tal caso è necessario inoltrare la richiesta attraverso l\x00invio del modulo del mandato, compilato e \nsottoscritto dal contraente, reperibile nella sezione \x01comunicazioni\x02 sul sito internet della \ncompagnia all\x00indirizzo www.axa-mpsfinancial.ie\nFATCA (Foreign Account Tax Compliance Act) e CRS (Common Standard Reporting)\nLa normativa denominata rispettivamente FATCA (Foreign Account Tax Compliance Act - \nIntergovernmental Agreement sottoscritto tra Italia e Stati Uniti in data 10 gennaio 2014 e Legge n. \n95 del 18 giugno 2015) e CRS (Common Reporting Standard - Decreto Ministeriale del 28 \ndicembre 2015) impone agli operatori commerciali, al fine di contrastare la frode fiscale e \nl\x00evasione fiscale transfrontaliera, di eseguire la puntuale identificazione della propria clientela al \nfine di determinarne l\x00effettivo status di contribuente estero.\nDichiarazioni \ninesatte o \nreticenti\nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 4 di 9\n', + "I dati anagrafici e patrimoniali dei Contraenti identificati come fiscalmente residenti negli USA e/o \nin uno o più Paesi aderenti al CRS, dovranno essere trasmessi all\x00autorità fiscale locale, tramite \nl\x00Agenzia delle Entrate.\nL\x00identificazione avviene in fase di stipula del contratto e deve essere ripetuta in caso di \ncambiamento delle condizioni originarie durante tutta la sua durata, mediante l\x00acquisizione di \nautocertificazione rilasciata dai Contraenti. Ogni contraente è tenuto a comunicare \ntempestivamente eventuali variazioni rispetto a quanto dichiarato o rilevato in fase di sottoscrizione \ndel contratto di assicurazione. La Società si riserva inoltre di verificare i dati raccolti e di richiedere \nulteriori informazioni. In caso di autocertificazione che risulti compilata parzialmente o in maniera \nerrata, nonché in caso di mancata/non corretta comunicazione dei propri dati anagrafici, la società \nqualora abbia rilevato indizi di americanità e/o residenze fiscali estere nelle informazioni in suo \npossesso, assocerà al cliente la condizione di contribuente estero, provvedendo alla comunicazione \ndovuta.\nAntiriciclaggio\nIl Contraente è tenuto a fornire alla Compagnia tutte le informazioni necessarie al fine \ndell\x00assolvimento dell\x00adeguata verifica ai fini antiriciclaggio. Qualora la Compagnia, in ragione \ndella mancata collaborazione del Contraente, non sia in grado di portare a compimento l\x00adeguata \nverifica, la stessa non potrà concludere il Contratto o dovrà porre fine allo stesso. In tali ipotesi le \nsomme dovute al Contraente dovranno essere allo stesso versate mediante bonifico a valere un \nconto corrente intestato al Contraente stesso. In tali ipotesi le disponibilità finanziarie \neventualmente già acquisite dalla Compagnia dovranno essere restituite al Contraente liquidando il \nrelativo importo tramite bonifico bancario su un conto corrente bancario indicato dal Contraente e \nallo stesso intestato.\nIn nessun caso l'Impresa di Assicurazione sarà tenuta a fornire alcuna copertura assicurativa, \nsoddisfare richieste di risarcimento o garantire alcuna indennità in virtù del presente contratto, \nqualora tale copertura, pagamento o indennità possa esporla a divieti, sanzioni economiche o \nrestrizioni ai sensi di Risoluzioni delle Nazioni Unite o sanzioni economiche o commerciali, leggi o \nnorme dell\x00Unione Europea, del Regno Unito o degli Stati Uniti d\x00America, ove applicabili in Italia.\nQuando e come devo pagare?\nPremio\nIl Contratto prevede il pagamento di un Premio Unico il cui ammontare minimo è pari a 2.500,00 \neuro, incrementabile di importo pari o in multiplo di 50,00 euro, da corrispondersi in un\x00unica \nsoluzione prima della conclusione del Contratto.\nNon è prevista la possibilità di effettuare versamenti aggiuntivi successivi.\nIl versamento del Premio Unico può essere effettuato mediante addebito su conto corrente \nbancario, indicato nel Modulo di Proposta, previa autorizzazione del titolare del conto corrente.\nIl pagamento dei Premio Unico può essere eseguito mediante addebito su conto corrente bancario, \nprevia autorizzazione, intestato al Contraente oppure tramite bonifico bancario sul conto corrente \ndell\x00Impresa di Assicurazione.\nRimborso\nIl rimborso del Premio Versato è previsto nel caso in cui il Contraente decida di revocare la proposta \nfinché il contratto non è concluso.\nSconti\nAl verificarsi di condizioni particolari ed eccezionali che potrebbero riguardare \x03 a titolo \nesemplificativo ma non esaustivo \x03 il Contraente e la relativa situazione assicurativo/finanziaria, \nl\x00ammontare del Premio pagato e gli investimenti selezionati dal Contraente, l\x00Impresa di \nAssicurazione si riserva la facoltà di applicare sconti sugli oneri previsti dal contratto, concordando \ntale agevolazione con il Contraente.\nQuando comincia la copertura e quando finisce?\nDurata\nIl Contratto ha una durata massima pari a 5 anni 11 mesi e 27 giorni, sino alla data di scadenza \n(11/04/2029, la \x01data di scadenza\x02).\nSospensione\nNon sono possibili delle sospensioni della copertura assicurativa\nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 5 di 9\n", + 'Come posso revocare la proposta, recedere dal contratto o risolvere il contratto? \nRevoca\nLa Proposta di assicurazione può essere revocata fino alle ore 24:00 del giorno in cui il Contratto è \nconcluso. In tal caso, l\x00Impresa di Assicurazione restituirà al Contraente il Premio pagato entro \ntrenta giorni dal ricevimento della comunicazione di Revoca.\nRecesso\nIl Contraente può recedere dal Contratto entro trenta giorni dalla sua conclusione. Il Recesso dovrà \nessere comunicato all\x00Impresa di Assicurazione mediante lettera raccomandata con avviso di \nricevimento.\nL\x00Impresa di Assicurazione, entro trenta giorni dal ricevimento della comunicazione relativa al \nRecesso, rimborserà al Contraente il Controvalore delle Quote attribuite al Contratto alla data di \nricevimento della richiesta di recesso incrementato dai caricamenti, ove previsti, e dedotte \neventuali agevolazioni.\nRisoluzione\nLa risoluzione del contratto è prevista tramite la richiesta di riscatto totale esercitabile in qualsiasi \nmomento della durata contrattuale\nSono previsti riscatti o riduzioni? Si\n no\nValori di\nriscatto e\nriduzione\nA condizione che siano trascorsi almeno 30 giorni dalla Data di Decorrenza (conclusione del \nContratto) e fino all\x00ultimo Giorno Lavorativo della terzultima settimana precedente la data di \nscadenza, il Contraente può riscuotere, interamente o parzialmente, il Valore di Riscatto. In caso di \nRiscatto totale, la liquidazione del Valore di Riscatto pone fine al Contratto con effetto dalla data di \nricezione della richiesta.\nL\x00importo che sarà corrisposto al Contraente in caso di Riscatto sarà pari al Controvalore delle \nQuote del Fondo Interno attribuite al Contratto alla data di Riscatto, al netto dei costi di Riscatto.\nIn caso di Riscatto, ai fini del calcolo del Valore Unitario della Quota, si farà riferimento alla Data di \nValorizzazione della settimana successiva alla data in cui la comunicazione di Riscatto del \nContraente perviene all\x00Impresa di Assicurazione, corredata di tutta la documentazione, al netto dei \ncosti di Riscatto, salvo il verificarsi di Eventi di Turbativa.\nIl Contraente assume il rischio connesso all\x00andamento negativo del valore delle Quote e, pertanto, \nesiste la possibilità di ricevere un ammontare inferiore all\x00investimento finanziario.\nIn caso di Riscatto del Contratto (totale o parziale), l\x00Impresa di Assicurazione non offre alcuna \ngaranzia finanziaria di rendimento minimo e pertanto il Contraente sopporta il rischio di ottenere un \nValore Unitario di Quota inferiore al 100% del Valore Unitario di Quota del Fondo Interno registrato \nalla Data di Istituzione in considerazione dei rischi connessi alla fluttuazione del valore di mercato \ndegli attivi in cui investe, direttamente o indirettamente, il Fondo Interno.\nRichiesta di\ninformazioni\nPer eventuali richieste di informazioni sul valore di riscatto, il Contraente può rivolgersi alla \nCompagnia AXA MPS Financial DAC \x03 Wolfe Tone House, Wolfe Tone Street, Dublin, DO1 HP90 \x03 \nIreland, Numero Verde 800.231.187, e-mail: supporto@axa-mpsfinancial.ie\nA chi è rivolto questo prodotto?\nL\x00investitore al dettaglio a cui è destinato il prodotto varia in funzione dell\x00opzione di investimento sottostante e \nillustrata nel relativo KID.\nIl prodotto è indirizzato a Contraenti persone fisiche e persone giuridiche a condizione che il Contraente (persona fisica) \ne l\x00Assicurato, al momento della sottoscrizione stessa, abbiano un\x00età compresa tra i 18 anni e i 85 anni.\nQuali costi devo sostenere?\nPer l\x00informativa dettagliata sui costi fare riferimento alle indicazioni del KID.\nIn aggiunta rispetto alle informazioni del KID , indicare i seguenti costi a carico del contraente.\nSpese di emissione:\nIl Contratto prevede una spesa fissa di emissione pari a 25 Euro.\nLa deduzione di tale importo avverrà contestualmente alla deduzione del premio.\nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 6 di 9\n', + "L\x00obiettivo di protezione è da considerarsi al netto delle spese di emissione.\nCosti per riscatto\nIl Riscatto (totale e parziale) prevede un costo che varia in funzione della data di richiesta e secondo le percentuali di \nseguito indicate:\n1°Anno 5,00%; 2°Anno 3,50%; 3°Anno 2,00%; dal quarto anno in poi 0%;\nCosti di intermediazione\nla quota parte massima percepita dall\x00intermediario con riferimento all\x00intero flusso commissionale relativo al prodotto \nè pari al 35,17%.\nQuali sono i rischi e qual è il potenziale rendimento?\nSia con riferimento alla prestazione in caso di vita dell\x00assicurato, sia con riferimento al capitale caso morte riferito ai \nFondi Assicurativi Interni, la Compagnia non presta alcuna garanzia di rendimento minimo o di conservazione del \ncapitale. Pertanto il controvalore della prestazione della Compagnia potrebbe essere inferiore all\x00importo dei premi \nversati, in considerazione dei rischi connessi alla fluttuazione del valore di mercato degli attivi in cui investe, \ndirettamente o indirettamente il Fondo Interno.\nCOME POSSO PRESENTARE I RECLAMI E RISOLVERE LE CONTROVERSIE?\nAll\x00IVASS\nNel caso in cui il reclamo presentato all\x00impesa assicuratrice abbia esito insoddisfacente o risposta \ntardiva, è possibile rivolgersi all\x00IVASS, Via del Quirinale, 21 - 00187 Roma, fax 06.42133206, Info \nsu: www.ivass.it.\nEventuali reclami potranno inoltre essere indirizzati all\x00Autorità Irlandese competente al seguente \nindirizzo:\nFinancial Services Ombudsman 3rd Floor, Lincoln House, Lincoln Place, Dublin 2, D02 VH29 \x03 \nIreland\nPRIMA DI RICORRERE ALL\x00AUTORITÀ GIUDIZIARIA è possibile, in alcuni casi necessario, \navvalersi di sistemi alternativi di risoluzione delle controversie, quali:\nMediazione\nInterpellando un Organismo di Mediazione tra quelli presenti nell'elenco del Ministero della \nGiustizia, consultabile sul sito www.giustizia.it (Legge 9/8/2013, n.98)\nNegoziazione \nassistita\nTramite richiesta del proprio avvocato all\x00impresa\nAltri Sistemi \nalternative di \nrisoluzione delle \ncontroversie\nEventuali reclami relativi ad un contratto o servizio assicurativo nei confronti dell'Impresa di \nassicurazione o dell'Intermediario assicurativo con cui si entra in contatto, nonché qualsiasi \nrichiesta di informazioni, devono essere preliminarmente presentati per iscritto (posta, email) ad \nAXA MPS Financial DAC - Ufficio Reclami secondo seguenti modalità:\nEmail: reclami@axa-mpsfinancial.ie\nPosta: AXA MPS Financial DAC - Ufficio Reclami\nWolfe Tone House, Wolfe Tone Street,\nDublin DO1 HP90 - Ireland\nNumero Verde 800.231.187\navendo cura di indicare:\n-nome, cognome, indirizzo completo e recapito telefonico del reclamante;\n-numero della polizza e nominativo del contraente;\n-breve ed esaustiva descrizione del motivo di lamentela;\n-ogni altra indicazione e documento utile per descrivere le circostanze.\nSarà cura della Compagnia fornire risposta entro 45 giorni dalla data di ricevimento del reclamo, \ncome previsto dalla normativa vigente.\nNel caso di mancato o parziale accoglimento del reclamo, nella risposta verrà fornita una chiara \nspiegazione della posizione assunta dalla Compagnia in relazione al reclamo stesso ovvero della \nsua mancata risposta.\nQualora il reclamante non abbia ricevuto risposta oppure ritenga la stessa non soddisfacente, \nprima di rivolgersi all'Autorità Giudiziaria, può scrivere all'IVASS (Via del Quirinale, 21 - 00187 \nRoma; fax 06.42.133.745 o 06.42.133.353, ivass@pec.ivass.it) fornendo copia del reclamo già \nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 7 di 9\n", + "inoltrato all'impresa ed il relativo riscontro anche utilizzando il modello presente nel sito dell'IVASS \nalla sezione per il Consumatore - come presentare un reclamo.\nEventuali reclami potranno inoltre essere indirizzati all'Autorità Irlandese competente al seguente \nindirizzo:\nFinancial Services Ombudsman\n3rd Floor, Lincoln House,\nLincoln Place, Dublin 2, D02 VH29 Ireland\nIl reclamante può ricorrere ai sistemi alternativi per la risoluzione delle controversie previsti a livello \nnormativo o convenzionale, quali:\n\x04 Mediazione: (Decreto Legislativo n.28/2010 e ss.mm.) puo' essere avviata presentando istanza \nad un Organismo di Mediazione tra quelle presenti nell'elenco del Ministero della Giustizia, \nconsultabile sul sito www.giustizia.it. La legge ne prevede l'obbligatorieta' nel caso in cui si intenda \nesercitare in giudizio i propri diritti in materia di contratti assicurativi o finanziari e di risarcimento \nda responsabilita' medica e sanitaria, costituendo condizione di procedibilita' della domanda.\n\x04 Negoziazione Assistita: (Legge n.162/2014) tramite richiesta del proprio Avvocato all'Impresa. E' \nun accordo mediante il quale le parti convengono di cooperare in buona fede e con lealta' per \nrisolvere in via amichevole la controversia tramite l'assistenza di avvocati. Fine del procedimento e' \nla composizione bonaria della lite, con la sottoscrizione delle parti - assistite dai rispettivi difensori - \ndi un accordo detto convenzione di negoziazione. Viene prevista la sua obbligatorieta' nel caso in \ncui si intenda esercitare in giudizio i propri diritti per ogni controversia in materia di risarcimento del \ndanno da circolazione di veicoli e natanti, ovverosia e' condizione di procedibilita' per l'eventuale \ngiudizio civile. Invece e' facoltativa per ogni altra controversia in materia di risarcimenti o di contratti \nassicurativi o finanziari.\nIn caso di controversia relativa alla determinazione dei danni si puo' ricorrere alla perizia \ncontrattuale prevista dalle Condizioni di Assicurazione per la risoluzione di tale tipologia di \ncontroversie. L'istanza di attivazione della perizia contrattuale dovra' essere indirizzata alla \nCompagnia all' indirizzo\nAXA MPS Financial DAC \nWolfe Tone House, Wolfe Tone Street\nDublin DO1 HP90 - Ireland\nPer maggiori informazioni si rimanda a quanto presente nell'area Reclami del sito \nwww.axa-mpsfinancial.ie. \nPer la risoluzione delle liti transfrontaliere è possibile presentare reclamo all'IVASS o direttamente \nal sistema estero http://ec.europa.eu/internal_market/fin-net/members_en.htm competente \nchiedendo l'attivazione della procedura FIN-NET.\nEventuali reclami relativi la mancata osservanza da parte della Compagnia, degli intermediari e dei \nperiti assicurativi, delle disposizioni del Codice delle assicurazioni, delle relative norme di \nattuazione nonché delle norme sulla commercializzazione a distanza dei prodotti assicurativi \npossono essere presentati direttamente all'IVASS, secondo le modalità sopra indicate.\nSi ricorda che resta salva la facoltà di adire l'autorità giudiziaria.\nREGIME FISCALE\nTrattamento \nfiscale applicabile \nal contratto\nLe seguenti informazioni sintetizzano alcuni aspetti del regime fiscale applicabile al Contratto, ai \nsensi della legislazione tributaria italiana e della prassi vigente alla data di pubblicazione del \npresente documento, fermo restando che le stesse rimangono soggette a possibili cambiamenti che \npotrebbero avere altresì effetti retroattivi. Quanto segue non intende rappresentare un\x00analisi \nesauriente di tutte le conseguenze fiscali del Contratto. I Contraenti sono tenuti a consultare i loro \nconsulenti in merito al regime fiscale proprio del Contratto.\nTasse e imposte\nLe imposte e tasse presenti e future applicabili per legge al Contratto sono a carico del Contraente \no dei Beneficiari e aventi diritto e non è prevista la corresponsione al Contraente di alcuna somma \naggiuntiva volta a compensare eventuali riduzioni dei pagamenti relativi al Contratto.\nTassazione delle somme corrisposte a soggetti non esercenti attività d\x00impresa\n1. In caso di decesso dell\x00Assicurato\nLe somme corrisposte dall\x00Impresa di Assicurazione in caso di decesso dell\x00Assicurato non sono \nsoggette a tassazione IRPEF in capo al percettore e sono esenti dall\x00imposta sulle successioni. Si \nricorda tuttavia che, per effetto della legge 23 dicembre 2014 n. 190 (c.d.\x02Legge di Stabilità\x02), i \nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 8 di 9\n", + 'capitali percepiti in caso di morte, a decorrere dal 1 gennaio 2015, in dipendenza di contratti di \nassicurazione sulla vita, a copertura del rischio demografico, sono esenti dall\x00imposta sul reddito \ndelle persone fisiche.\n2. In caso di Riscatto totale o di Riscatto parziale.\nLe somme corrisposte dall\x05Impresa di Assicurazione in caso di Riscatto totale sono soggette ad \nun\x00imposta sostitutiva dell\x00imposta sui redditi nella misura prevista di volta in volta dalla legge. Tale \nimposta, al momento della redazione del presente documento, è pari al 26% sulla differenza \n(plusvalenza) tra il capitale maturato e l\x00ammontare dei premi versati (al netto di eventuali riscatti \nparziali), con l\x00eccezione dei proventi riferibili ai titoli di stato italiani ed equiparati (Paesi facenti \nparte della white list), per i quali l\x00imposta è pari al 12,5%.\nIn caso di Riscatto parziale, ai fini del computo del reddito di capitale da assoggettare alla predetta \nimposta sostitutiva, l\x00ammontare dei premi va rettificato in funzione del rapporto tra il capitale \nerogato ed il valore economico della polizza alla data del Riscatto parziale.\n3. In caso di Recesso\nLe somme corrisposte in caso di Recesso sono soggette all\x00imposta sostitutiva delle imposte sui \nredditi nella misura e con gli stessi criteri indicati per il Riscatto totale del Contratto.\nTassazione delle somme corrisposte a soggetti esercenti attività d\x00impresa\nLe somme corrisposte a soggetti che esercitano l\x00attività d\x00impresa non costituiscono redditi di \ncapitale, bensì redditi d\x00impresa. Su tali somme l\x00Impresa non applica l\x00imposta sostitutiva di cui \nall\x00art. 26-ter del D.P.R. 29 settembre 1973, n. 600.\nSe le somme sono corrisposte a persone fisiche o enti non commerciali in relazione a contratti \nstipulati nell\x00ambito dell\x00attività commerciale, l\x00Impresa non applica l\x00imposta sostitutiva, qualora gli \ninteressati presentino una dichiarazione in merito alla sussistenza di tale requisito.\nL\x00IMPRESA HA L\x00OBBLIGO DI TRASMETTERTI, ENTRO IL 31 MAGGIO DI OGNI ANNO, IL DOCUMENTO \nUNICO DI RENDICONTAZIONE ANNUALE DELLA TUA POSIZIONE ASSICURATIVA\nPER QUESTO CONTRATTO L\x00IMPRESA NON DISPONE DI UN\x00AREA INTERNET DISPOSITIVA RISERVATA \nAL CONTRAENTE (c.d. HOME INSURANCE), PERTANTO DOPO LA SOTTOSCRIZIONE NON POTRAI \nGESTIRE TELEMATICAMENTE IL CONTRATTO MEDESIMO.\nDIP aggiuntivo IBIP - Progetto Protetto New - Global Dividends - Pag. 9 di 9\n', + ] + # codespell:ignore-end + + path = os.path.abspath(f'{__file__}/../../tests/resources/test_3186.pdf') + fitz_doc = pymupdf.open(path) + texts = list() + for page in fitz_doc: + t = page.get_text() + texts.append(t) + assert texts == texts_expected, f'Unexpected output: {texts=}' + + +def test_3197(): + ''' + MuPDF's ActualText support fixes handling of test_3197.pdf. + ''' + path = os.path.abspath(f'{__file__}/../../tests/resources/test_3197.pdf') + + text_utf8_expected = [ + b'NYSE - Nasdaq Real Time Price \xe2\x80\xa2 USD\nFord Motor Company (F)\n12.14 -0.11 (-0.90%)\nAt close: 4:00 PM EST\nAfter hours: 7:43 PM EST\nAll numbers in thousands\nAnnual\nQuarterly\nDownload\nSummary\nNews\nChart\nConversations\nStatistics\nHistorical Data\nProfile\nFinancials\nAnalysis\nOptions\nHolders\nSustainability\nInsights\nFollow\n12.15 +0.01 (+0.08%)\nIncome Statement\nBalance Sheet\nCash Flow\nSearch for news, symbols or companies\nNews\nFinance\nSports\nSign in\nMy Portfolio\nNews\nMarkets\nSectors\nScreeners\nPersonal Finance\nVideos\nFinance Plus\nBack to classic\nMore\n', + b'Related Tickers\nTTM\n12/31/2023\n12/31/2022\n12/31/2021\n12/31/2020\n14,918,000\n14,918,000\n6,853,000\n15,787,000\n24,269,000\n-17,628,000\n-17,628,000\n-4,347,000\n2,745,000\n-18,615,000\n2,584,000\n2,584,000\n2,511,000\n-23,498,000\n2,315,000\n25,110,000\n25,110,000\n25,340,000\n20,737,000\n25,935,000\n-8,236,000\n-8,236,000\n-6,866,000\n-6,227,000\n-5,742,000\n51,659,000\n51,659,000\n45,470,000\n27,901,000\n65,900,000\n-41,965,000\n-41,965,000\n-45,655,000\n-54,164,000\n-60,514,000\n-335,000\n-335,000\n-484,000\n--\n--\n6,682,000\n6,682,000\n-13,000\n9,560,000\n18,527,000\n \nYahoo Finance Plus Essential\naccess required.\nUnlock Access\nBreakdown\nOperating Cash\nFlow\nInvesting Cash\nFlow\nFinancing Cash\nFlow\nEnd Cash Position\nCapital Expenditure\nIssuance of Debt\nRepayment of Debt\nRepurchase of\nCapital Stock\nFree Cash Flow\n12/31/2020 - 6/1/1972\nGM\nGeneral Motors Compa\xe2\x80\xa6\n39.49 +1.23%\n\xc2\xa0\nRIVN\nRivian Automotive, Inc.\n15.39 -3.15%\n\xc2\xa0\nNIO\nNIO Inc.\n5.97 +0.17%\n\xc2\xa0\nSTLA\nStellantis N.V.\n25.63 +0.91%\n\xc2\xa0\nLCID\nLucid Group, Inc.\n3.7000 +0.54%\n\xc2\xa0\nTSLA\nTesla, Inc.\n194.77 +0.52%\n\xc2\xa0\nTM\nToyota Motor Corporati\xe2\x80\xa6\n227.09 +0.14%\n\xc2\xa0\nXPEV\nXPeng Inc.\n9.08 +0.89%\n\xc2\xa0\nFSR\nFisker Inc.\n0.5579 -11.46%\n\xc2\xa0\nCopyright \xc2\xa9 2024 Yahoo.\nAll rights reserved.\nPOPULAR QUOTES\nTesla\nDAX Index\nKOSPI\nDow Jones\nS&P BSE SENSEX\nSPDR S&P 500 ETF Trust\nEXPLORE MORE\nCredit Score Management\nHousing Market\nActive vs. Passive Investing\nShort Selling\nToday\xe2\x80\x99s Mortgage Rates\nHow Much Mortgage Can You Afford\nABOUT\nData Disclaimer\nHelp\nSuggestions\nSitemap\n', + ] + + with pymupdf.open(path) as document: + for i, page in enumerate(document): + text = page.get_text() + #print(f'{i=}:') + text_utf8 = text.encode('utf8') + #print(f' {text_utf8=}') + #print(f' {text_utf8_expected[i]=}') + assert text_utf8 == text_utf8_expected[i] + + +def test_document_text(): + if os.environ.get('PYODIDE_ROOT'): + print('test_document_text(): not running on Pyodide - multiprocessing not available.') + return + + import platform + import time + + path = os.path.abspath(f'{__file__}/../../tests/resources/mupdf_explored.pdf') + concurrency = None + + def llen(texts): + l = 0 + for text in texts: + l += len(text) if isinstance(text, str) else text + return l + + results = dict() + _stats = 1 + + print('') + method = 'single' + t = time.time() + document = pymupdf.Document(path) + texts0 = pymupdf.get_text(path, _stats=_stats) + t0 = time.time() - t + print(f'{method}: {t0=} {llen(texts0)=}', flush=1) + + # Dummy run seems to avoid misleading stats with slow first run. + method = 'mp' + texts = pymupdf.get_text(path, concurrency=concurrency, method=method, _stats=_stats) + + method = 'mp' + t = time.time() + texts = pymupdf.get_text(path, concurrency=concurrency, method=method, _stats=_stats) + t = time.time() - t + print(f'{method}: {concurrency=} {t=} ({t0/t:.2f}x) {llen(texts)=}', flush=1) + assert texts == texts0 + + if platform.system() != 'Windows': + method = 'fork' + t = time.time() + texts = pymupdf.get_text(path, concurrency=concurrency, method='fork', _stats=_stats) + t = time.time() - t + print(f'{method}: {concurrency=} {t=} ({t0/t:.2f}x) {llen(texts)=}', flush=1) + assert texts == texts0 + + if _stats: + pymupdf._log_items_clear() + + +def test_4524(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4524(): not running on Pyodide - multiprocessing not available.') + return + path = os.path.abspath(f'{__file__}/../../tests/resources/mupdf_explored.pdf') + print('') + document = pymupdf.Document(path) + texts_single = pymupdf.get_text(path, method='single', pages=[1, 3, 5]) + texts_mp = pymupdf.get_text(path, method='mp', pages=[1, 3, 5]) + print(f'{len(texts_single)=}') + print(f'{len(texts_mp)=}') + assert texts_mp == texts_single + + +def test_3594(): + verbose = 0 + print() + d = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_3594.pdf')) + for i, p in enumerate(d.pages()): + text = p.get_text() + print(f'Page {i}:') + if verbose: + for line in text.split('\n'): + print(f' {line!r}') + print('='*40) + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple < (1, 26, 8): + assert not wt + else: + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 2 times...' + + +def test_3687(): + path1 = pymupdf.open(os.path.normpath(f'{__file__}/../../tests/resources/test_3687.epub')) + path2 = pymupdf.open(os.path.normpath(f'{__file__}/../../tests/resources/test_3687-3.epub')) + for path in path1, path2: + print(f'Looking at {path=}.') + with pymupdf.open(path) as document: + page = document[0] + text = page.get_text("text") + print(f'{text=!s}') + wt = pymupdf.TOOLS.mupdf_warnings() + print(f'{wt=}') + assert wt == 'unknown epub version: 3.0' + +def test_3705(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3705.pdf') + def get_all_page_from_pdf(document, last_page=None): + if last_page: + document.select(list(range(0, last_page))) + if document.page_count > 30: + document.select(list(range(0, 30))) + return iter(page for page in document) + + filename = os.path.basename(path) + + doc = pymupdf.open(path) + texts0 = list() + for i, page in enumerate(get_all_page_from_pdf(doc)): + text = page.get_text() + print(i, text) + texts0.append(text) + + texts1 = list() + doc = pymupdf.open(path) + for page in doc: + if page.number >= 30: # leave the iterator immediately + break + text = page.get_text() + texts1.append(text) + + assert texts1 == texts0 + + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 27): + expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing' + expected = '\n'.join([expected] * 56) + assert wt == expected + elif pymupdf.mupdf_version_tuple >= (1, 26, 8): + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 7684 times...' + else: + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 434 times...' + +def test_3650(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3650.pdf') + doc = pymupdf.Document(path) + blocks = doc[0].get_text("blocks") + t = [block[4] for block in blocks] + print(f'{t=}') + assert t == [ + 'RECUEIL DES ACTES ADMINISTRATIFS\n', + 'n° 78 du 28 avril 2023\n', + ] + +def test_4026(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4026.pdf') + with pymupdf.open(path) as document: + page = document[4] + blocks = page.get_text('blocks') + for i, block in enumerate(blocks): + print(f'block {i}: {block}') + assert len(blocks) == 5 + +def test_3725(): + # This currently just shows the extracted text. We don't check it is as expected. + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3725.pdf') + with pymupdf.open(path) as document: + page = document[0] + text = page.get_text() + if 0: + print(textwrap.indent(text, ' ')) + +def test_4147(): + print() + items = list() + for expect_visible, path in ( + (False, os.path.normpath(f'{__file__}/../../tests/resources/test_4147.pdf')), + (True, os.path.normpath(f'{__file__}/../../tests/resources/symbol-list.pdf')), + ): + print(f'{expect_visible=} {path=}') + with pymupdf.open(path) as document: + page = document[0] + text = page.get_text('rawdict') + for block in text['blocks']: + if block['type'] == 0: + #print(f' block') + for line in block['lines']: + #print(f' line') + for span in line['spans']: + #print(f' span') + if pymupdf.mupdf_version_tuple >= (1, 25, 2): + #print(f' span: {span["flags"]=:#x} {span["char_flags"]=:#x}') + if expect_visible: + assert span['char_flags'] & pymupdf.mupdf.FZ_STEXT_FILLED + else: + assert not (span['char_flags'] & pymupdf.mupdf.FZ_STEXT_FILLED) + assert not (span['char_flags'] & pymupdf.mupdf.FZ_STEXT_STROKED) + else: + #print(f' span: {span["flags"]=:#x}') + assert 'char_flags' not in span + # Check commit `add 'bidi' to span dict, add 'synthetic' to char dict.` + assert span['bidi'] == 0 + for ch in span['chars']: + assert isinstance(ch['synthetic'], bool) + + +def test_4139(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4139.pdf') + flags = (0 + | pymupdf.TEXT_PRESERVE_IMAGES + | pymupdf.TEXT_PRESERVE_WHITESPACE + | pymupdf.TEXT_USE_CID_FOR_UNKNOWN_UNICODE + ) + with pymupdf.open(path) as document: + page = document[0] + dicts = page.get_text('dict', flags=flags, sort=True) + seen = set() + for b_ctr, b in enumerate(dicts['blocks']): + for l_ctr, l in enumerate(b.get('lines', [])): + for s_ctr, s in enumerate(l['spans']): + color = s.get('color') + if color is not None and color not in seen: + seen.add(color) + print(f"B{b_ctr}.L{l_ctr}.S{s_ctr}: {color=} {hex(color)=} {s=}") + assert color == 0, f'{s=}' + assert s['alpha'] == 255 + + +def test_4245(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4245.pdf') + with pymupdf.open(path) as document: + page = document[0] + regions = page.search_for('Bart Simpson') + print(f'{regions=}') + page.add_highlight_annot(regions) + with pymupdf.open(path) as document: + page = document[0] + regions = page.search_for('Bart Simpson') + for region in regions: + highlight = page.add_highlight_annot(region) + highlight.update() + pixmap = page.get_pixmap() + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4245_out.png') + pixmap.save(path_out) + + path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4245_expected.png') + rms = gentle_compare.pixmaps_rms(path_expected, pixmap) + pixmap_diff = gentle_compare.pixmaps_diff(path_expected, pixmap) + path_diff = os.path.normpath(f'{__file__}/../../tests/resources/test_4245_diff.png') + pixmap_diff.save(path_diff) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple < (1, 25, 5): + # Prior to fix for mupdf bug 708274. + assert 0.1 < rms < 0.2 + else: + assert rms < 0.01 + + +def test_4180(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4180.pdf') + with pymupdf.open(path) as document: + page = document[0] + regions = page.search_for('Reference is made') + for region in regions: + page.add_redact_annot(region, fill=(0, 0, 0)) + page.apply_redactions() + pixmap = page.get_pixmap() + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4180_out.png') + pixmap.save(path_out) + + path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4180_expected.png') + rms = gentle_compare.pixmaps_rms(path_expected, pixmap) + pixmap_diff = gentle_compare.pixmaps_diff(path_expected, pixmap) + path_diff = os.path.normpath(f'{__file__}/../../tests/resources/test_4180_diff.png') + pixmap_diff.save(path_diff) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple < (1, 25, 5): + # Prior to fix for mupdf bug 708274. + assert 0.2 < rms < 0.3 + else: + assert rms < 0.01 + + +def test_4182(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4182.pdf') + with pymupdf.open(path) as document: + page = document[0] + dict_ = page.get_text('dict') + linelist = [] + for block in dict_['blocks']: + if block['type'] == 0: + paranum = block['number'] + if 'lines' in block: + for line in block.get('lines', ()): + for span in line['spans']: + if span['text'].strip(): + page.draw_rect(span['bbox'], color=(1, 0, 0)) + linelist.append([paranum, span['bbox'], repr(span['text'])]) + pixmap = page.get_pixmap() + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4182_out.png') + pixmap.save(path_out) + if platform.system() != 'Windows': # Output on Windows can fail due to non-utf8 stdout. + for l in linelist: + print(l) + path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4182_expected.png') + pixmap_diff = gentle_compare.pixmaps_diff(path_expected, pixmap) + path_diff = os.path.normpath(f'{__file__}/../../tests/resources/test_4182_diff.png') + pixmap_diff.save(path_diff) + rms = gentle_compare.pixmaps_rms(path_expected, pixmap) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple < (1, 25, 5): + # Prior to fix for mupdf bug 708274. + assert 3 < rms < 3.5 + else: + assert rms < 0.01 + + +def test_4179(): + if os.environ.get('PYMUPDF_USE_EXTRA') == '0': + # Looks like Python code doesn't behave same as C++, probably because + # of the code not being correct for Python's native unicode strings. + # + print(f'test_4179(): Not running with PYMUPDF_USE_EXTRA=0 because known to fail.') + return + # We check that using TEXT_ACCURATE_BBOXES gives the correct boxes. But + # this also requires that we disable PyMuPDF quad corrections. + # + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4179.pdf') + + # Disable anti-aliasing to avoid our drawing of multiple identical bboxes + # (from normal/accurate bboxes) giving slightly different results. + aa = pymupdf.mupdf.fz_aa_level() + uqc = pymupdf._globals.skip_quad_corrections + pymupdf.TOOLS.set_aa_level(0) + pymupdf.TOOLS.unset_quad_corrections(True) + assert pymupdf._globals.skip_quad_corrections + try: + with pymupdf.open(path) as document: + page = document[0] + + char_sqrt = b'\xe2\x88\x9a'.decode() + + # Search with defaults. + bboxes_search = page.search_for(char_sqrt) + assert len(bboxes_search) == 1 + print(f'bboxes_search[0]:\n {bboxes_search[0]!r}') + page.draw_rect(bboxes_search[0], color=(1, 0, 0)) + rms = gentle_compare.rms(bboxes_search[0], (250.0489959716797, 91.93604278564453, 258.34783935546875, 101.34073638916016)) + assert rms < 0.01 + + # Search with TEXT_ACCURATE_BBOXES. + bboxes_search_accurate = page.search_for( + char_sqrt, + flags = (0 + | pymupdf.TEXT_DEHYPHENATE + | pymupdf.TEXT_PRESERVE_WHITESPACE + | pymupdf.TEXT_PRESERVE_LIGATURES + | pymupdf.TEXT_MEDIABOX_CLIP + | pymupdf.TEXT_ACCURATE_BBOXES + ), + ) + assert len(bboxes_search_accurate) == 1 + print(f'bboxes_search_accurate[0]\n {bboxes_search_accurate[0]!r}') + page.draw_rect(bboxes_search_accurate[0], color=(0, 1, 0)) + rms = gentle_compare.rms(bboxes_search_accurate[0], (250.0489959716797, 99.00948333740234, 258.34783935546875, 108.97208404541016)) + assert rms < 0.01 + + # Iterate with TEXT_ACCURATE_BBOXES. + bboxes_iterate_accurate = list() + dict_ = page.get_text( + 'rawdict', + flags = pymupdf.TEXT_ACCURATE_BBOXES, + ) + linelist = [] + for block in dict_['blocks']: + if block['type'] == 0: + if 'lines' in block: + for line in block.get('lines', ()): + for span in line['spans']: + for ch in span['chars']: + if ch['c'] == char_sqrt: + bbox_iterate_accurate = ch['bbox'] + bboxes_iterate_accurate.append(bbox_iterate_accurate) + print(f'bbox_iterate_accurate:\n {bbox_iterate_accurate!r}') + page.draw_rect(bbox_iterate_accurate, color=(0, 0, 1)) + + assert bboxes_search_accurate != bboxes_search + assert bboxes_iterate_accurate == bboxes_search_accurate + pixmap = page.get_pixmap() + + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4179_out.png') + pixmap.save(path_out) + path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4179_expected.png') + rms = gentle_compare.pixmaps_rms(path_expected, pixmap) + pixmap_diff = gentle_compare.pixmaps_diff(path_expected, pixmap) + path_out_diff = os.path.normpath(f'{__file__}/../../tests/resources/test_4179_diff.png') + pixmap_diff.save(path_out_diff) + print(f'Have saved to: {path_out_diff=}') + print(f'{rms=}') + if pymupdf.mupdf_version_tuple < (1, 25, 5): + # Prior to fix for mupdf bug 708274, our rects are rendered slightly incorrectly. + assert 3.5 < rms < 4.5 + else: + assert rms < 0.01 + + finally: + pymupdf.TOOLS.set_aa_level(aa) + pymupdf.TOOLS.unset_quad_corrections(uqc) + + +def test_extendable_textpage(): + + # 2025-01-28: + # + # We can create a pdf with two pages whose text is adjacent when stitched + # together vertically: + # + # Page 1: + # + # aaaa + # + # bbbb + # cccc + # + # dddd + # + # Page 2: + # + # eeee + # + # ffff + # gggg + # + # hhhh + # + # + # Create a textpage for both of these pages. Then when extracting text, + # we need to get (specifically the `dddd` and `eeee` sequences need to be + # treated as the same block): + # + # aaaa + # + # bbbb + # cccc + # + # dddd + # eeee + # + # ffff + # gggg + # + # hhhh + # + print() + + path = os.path.normpath(f'{__file__}/../../tests/test_extendable_textpage.pdf') + with pymupdf.open(filetype='pdf') as document: + document.new_page() + document.new_page() + page0 = document[0] + page1 = document[1] + y = 100 + line_height = 9.6 + for i in range(4): + page0.insert_text((100, y+line_height), 'abcd'[i] * 16) + page1.insert_text((100, y+line_height), 'efgh'[i] * 16) + y += line_height + if i%2 == 0: + y += line_height + rect = pymupdf.mupdf.FzRect(100, 100, 200, y) + document[0].draw_rect(rect, (1, 0, 0)) + document[1].draw_rect(rect, (1, 0, 0)) + document.save(path) + + # Create a stext page for the text regions in both pages of our document, + # using direct calls to MuPDF. + # + + with pymupdf.Document(path) as document: + + # Notes: + # + # We need to reuse the stext device for second page. Otherwise if we + # create a new device, the first text in second page will always be in + # a new block, because pen position for new device is (0, 0) and this + # will usually be treated as a paragraph gap to the first text. + # + # At the moment we use infinite mediabox when creating the + # fz_stext_page. I don't know what a non-infinite mediabox would be + # useful for. + # + # FZ_STEXT_CLIP_RECT isn't useful at the moment, because we would need + # to modify it to be in stext pagae coordinates (i.e. adding ctm.f + # to y0 and y1) when we append the second page. But it's internal + # data and there's no api to modify it. So for now we don't specify + # FZ_STEXT_CLIP_RECT when creating the stext device, so we always + # include each page's entire contents. + # + + # We use our knowledge of the text rect in each page to manipulate ctm + # so that the stext contains text starting at (0, 0) and extending + # downwards. + # + y = 0 + cookie = pymupdf.mupdf.FzCookie() + + stext_page = pymupdf.mupdf.FzStextPage( + pymupdf.mupdf.FzRect(pymupdf.mupdf.FzRect.Fixed_INFINITE), # mediabox + ) + stext_options = pymupdf.mupdf.FzStextOptions() + #stext_options.flags |= pymupdf.mupdf.FZ_STEXT_CLIP_RECT + #stext_options.clip = rect.internal() + device = pymupdf.mupdf.fz_new_stext_device(stext_page, stext_options) + + # Add first page to stext_page at (0, y), and update for the next + # page. + page = document[0] + ctm = pymupdf.mupdf.FzMatrix(1, 0, 0, 1, -rect.x0, -rect.y0 + y) + pymupdf.mupdf.fz_run_page(page.this, device, ctm, cookie) + y += rect.y1 - rect.y0 + + # Add second page to stext_page at (0, y), and update for the next + # page. + page = document[1] + ctm = pymupdf.mupdf.FzMatrix(1, 0, 0, 1, -rect.x0, -rect.y0 + y) + pymupdf.mupdf.fz_run_page(page.this, device, ctm, cookie) + y += rect.y1 - rect.y0 + + # We've finished adding text to stext_page. + pymupdf.mupdf.fz_close_device(device) + + # Create a pymupdf.TextPage() for so we can use + # text_page.extractDICT() etc. + text_page = pymupdf.TextPage(stext_page) + + # Read text from stext_page using text_page.extractDICT(). + print(f'Using text_page.extractDICT().') + print(f'{text_page.this.m_internal.mediabox=}') + d = text_page.extractDICT(sort=True) + y0_prev = None + pno = 0 + ydelta = 0 + for block in d['blocks']: + print(f'block {block["bbox"]=}') + for line in block['lines']: + print(f' line {line["bbox"]=}') + for span in line['spans']: + print(f' span {span["bbox"]=}') + bbox = span['bbox'] + x0, y0, x1, y1 = bbox + dy = y0 - y0_prev if y0_prev else 0 + y0_prev = y0 + print(f' {dy=: 5.2f} height={y1-y0:.02f} {x0:.02f} {y0:.02f} {x1:.02f} {y1:.02f} {span["text"]=}') + if 'eee' in span['text']: + pno = 1 + ydelta = rect.y1 - rect.y0 + y0 -= ydelta + y1 -= ydelta + # Debugging - add green lines on original document + # translating final blocks info into original coors. + document[pno].draw_rect((x0, y0, x1, y1), (0, 1, 0)) + + print('\n\n') + + print(f'Using text_page.extractText()') + text = text_page.extractText(True) + print(f'{text}') + + print('\n\n') + print(f'Using extractBLOCKS') + text = list() + for x0, y0, x1, y1, line, no, type_ in text_page.extractBLOCKS(): + print(f'block:') + print(f' bbox={x0, y0, x1, y1} {no=}') + print(f' {line=}') + text.append(line) + + print("\n\n") + print(f'extractBLOCKS joined by newlines:') + print('\n'.join(text)) + + # This checks that lines before/after pages break are treated as a + # single paragraph. + assert text == [ + 'aaaaaaaaaaaaaaaa\n', + 'bbbbbbbbbbbbbbbb\ncccccccccccccccc\n', + 'dddddddddddddddd\neeeeeeeeeeeeeeee\n', + 'ffffffffffffffff\ngggggggggggggggg\n', + 'hhhhhhhhhhhhhhhh\n', + ] + + path3 = os.path.normpath(f'{__file__}/../../tests/test_extendable_textpage3.pdf') + document.save(path3) + + +def test_4363(): + print() + print(f'{pymupdf.version=}') + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4363.pdf') + n = 0 + texts = list() + with pymupdf.open(path) as document: + assert len(document) == 1 + page = document[0] + t = page.search_for('tour') + print(f'{t=}') + n += len(t) + text = page.get_text() + texts.append(text) + print(f'{n=}') + print(f'{len(texts)=}') + text = texts[0] + print('text:') + print(f'{text=}') + text_expected = ( + 'Deal Roadshow SiteTour\n' + 'We know your process. We know your standard.\n' + 'Professional Site Tour Video Productions for the Capital Markets.\n' + '1\n' + ) + if text != text_expected: + print(f'Expected:\n {text_expected!r}') + print(f'Found:\n {text!r}') + assert 0 + + +def test_4546(): + # This issue will not be fixed (in mupdf) because the test input is faulty. + # + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4546.pdf') + with pymupdf.open(path) as document: + page = document[0] + text = page.get_text()[:200] + + # We can't actually test with 1.23.5 because it uses `fitz.` not `pymupdf.`. + expected_1_23_5 = b'JOB No.: \nShipper (complete name and address) \xe5\x8f\x91\xe8\xb4\xa7\xe4\xba\xba(\xe5\x90\x8d\xe7\xa7\xb0\xe5\x8f\x8a\xe5\x9c\xb0\n\xe5\x9d\x80) \nSINORICH TRANSPORT LIMITED\nADD:7C,WEST BLDG.,ZHONGQU\nMANSION,211 ZHONGSHAN\nRD. SHANTOU,515041 CN\nTEL:0754-88570001 FAX:0754-88572709\nS/O No. '.decode() + + # This output is different from expected_1_23_5. + expected_mupdf_1_26_1 = b'JOB No.: Shipper (complete name and address) \xe5\x8f\x91\xe8\xb4\xa7\xe4\xba\xba(\xe5\x90\x8d\xe7\xa7\xb0\xe5\x8f\x8a\xe5\x9c\xb0\xe5\x9d\x80) Tel: Fax: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81 \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88'.decode() + + # This output is different from either of the two expected strings. + expected_mupdf_1_27_0 = b'JOB No.: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81 \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88572702 \n \xe6\x93\x8d\xe4\xbd\x9c\xe9\x83\xa8: \n88570008 \n88570004 \n \xe6\x96\x87\xe4\xbb\xb6\xe9\x83\xa8: \n88570003\n \nNotify Party(complete name and address, '.decode() + + print(f'expected_1_23_5\n{textwrap.indent(expected_1_23_5, " ")}') + print(f'expected_mupdf_1_26_1\n{textwrap.indent(expected_mupdf_1_26_1, " ")}') + + print(f'{pymupdf.version=}') + print(f'text is:\n{textwrap.indent(text, " ")}') + print(f'{text=}') + print(f'{text.encode()=}') + + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26, 8): + assert text == expected_mupdf_1_27_0 + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 120 times...' + elif pymupdf.mupdf_version_tuple >= (1, 26, 1): + assert text == expected_mupdf_1_26_1 + assert not wt + else: + print(f'No expected output for {pymupdf.mupdf_version_tuple=}') + assert not wt + + +def test_4503(): + # Check detection of strikeout text. Behaviour is improved with + # mupdf>=1.26.2, and fixed with mupdf>=1.26.3. + # + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4503.pdf') + span_0 = None + text_0 = None + print() + print(f'{pymupdf.mupdf_version_tuple=}') + with pymupdf.open(path) as document: + page = document[0] + # Specify TEXT_COLLECT_STYLES so we collect char_flags, which contains + # FZ_STEXT_STRIKEOUT etc. + # + text = page.get_text('rawdict', flags=pymupdf.TEXTFLAGS_RAWDICT | pymupdf.TEXT_COLLECT_STYLES) + for i, block in enumerate(text['blocks']): + print(f'block {i}:') + for j, line in enumerate(block['lines']): + print(f' line {j}:') + for k, span in enumerate(line['spans']): + text = '' + for char in span['chars']: + text += char['c'] + print(f' span {k}: {span["flags"]=:#x} {span["char_flags"]=:#x}: {text!r}') + if 'the right to request the state to review' in text: + span_0 = span + text_0 = text + assert span_0 + #print(f'{span_0=}') + print(f'{span_0["flags"]=:#x}') + print(f'{span_0["char_flags"]=:#x}') + print(f'{text_0=}') + strikeout = span_0['char_flags'] & pymupdf.mupdf.FZ_STEXT_STRIKEOUT + print(f'{strikeout=}') + + if pymupdf.mupdf_version_tuple >= (1, 26, 3): + assert strikeout, f'Expected bit 0 (FZ_STEXT_STRIKEOUT) to be set in {span_0["char_flags"]=:#x}.' + assert text_0 == 'the right to request the state to review and, if appropriate,' + elif pymupdf.mupdf_version_tuple >= (1, 26, 2): + # 2025-06-09: This is still incorrect - the span should include the + # following text 'and, if appropriate,'. It looks like following spans + # are: + # strikeout=0: 'and, ' + # strikeout=1: 'if ' + # strikeout=0: 'appropri' + # strikeout=1: 'ate,' + # + assert strikeout, f'Expected bit 0 (FZ_STEXT_STRIKEOUT) to be set in {span_0["char_flags"]=:#x}.' + assert text_0 == 'the right to request the state to review ' + else: + # Expecting the bug. + assert not strikeout, f'Expected bit 0 (FZ_STEXT_STRIKEOUT) to be unset in {span_0["char_flags"]=:#x}.' + assert text_0 == 'notice the right to request the state to review and, if appropriate,' diff --git a/tests/test_textsearch.py b/tests/test_textsearch.py index f0082fda4..16bbc2047 100644 --- a/tests/test_textsearch.py +++ b/tests/test_textsearch.py @@ -7,17 +7,19 @@ Text search with 'clip' parameter - clip rectangle contains two occurrences of searched text. Confirm search locations are inside clip. """ + import os -import fitz +import pymupdf scriptdir = os.path.abspath(os.path.dirname(__file__)) filename1 = os.path.join(scriptdir, "resources", "2.pdf") filename2 = os.path.join(scriptdir, "resources", "github_sample.pdf") +filename3 = os.path.join(scriptdir, "resources", "text-find-ligatures.pdf") def test_search1(): - doc = fitz.open(filename1) + doc = pymupdf.open(filename1) page = doc[0] needle = "mupdf" rlist = page.search_for(needle) @@ -27,11 +29,24 @@ def test_search1(): def test_search2(): - doc = fitz.open(filename2) + doc = pymupdf.open(filename2) page = doc[0] needle = "the" - clip = fitz.Rect(40.5, 228.31436157226562, 346.5226135253906, 239.5338592529297) + clip = pymupdf.Rect(40.5, 228.31436157226562, 346.5226135253906, 239.5338592529297) rl = page.search_for(needle, clip=clip) assert len(rl) == 2 for r in rl: assert r in clip + + +def test_search3(): + """Ensure we find text whether or not it contains ligatures.""" + doc = pymupdf.open(filename3) + page = doc[0] + needle = "flag" + hits = page.search_for(needle, flags=pymupdf.TEXTFLAGS_SEARCH) + assert len(hits) == 2 # all occurrences found + hits = page.search_for( + needle, flags=pymupdf.TEXTFLAGS_SEARCH | pymupdf.TEXT_PRESERVE_LIGATURES + ) + assert len(hits) == 1 # only found text without ligatures diff --git a/tests/test_toc.py b/tests/test_toc.py index 6f5d1f453..92ab81894 100644 --- a/tests/test_toc.py +++ b/tests/test_toc.py @@ -4,9 +4,11 @@ * Verify manipulation of single TOC item works * Verify stability against circular TOC items """ + import os import sys -import fitz +import pymupdf +import pathlib scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "001003ED.pdf") @@ -14,7 +16,8 @@ circular = os.path.join(scriptdir, "resources", "circular-toc.pdf") full_toc = os.path.join(scriptdir, "resources", "full_toc.txt") simple_toc = os.path.join(scriptdir, "resources", "simple_toc.txt") -doc = fitz.open(filename) +file_3820 = os.path.join(scriptdir, "resources", "test-3820.pdf") +doc = pymupdf.open(filename) def test_simple_toc(): @@ -24,15 +27,18 @@ def test_simple_toc(): def test_full_toc(): - if fitz.mupdf_version_tuple >= (1, 23, 0): - # MuPDF changed in 7d41466feaa. - expected_path = f'{scriptdir}/resources/full_toc2.txt' - else: - expected_path = f'{scriptdir}/resources/full_toc.txt' - with open(expected_path, encoding='utf8') as f: - expected = f.read() - toc = '\n'.join([str(t) for t in doc.get_toc(False)]) - toc += '\n' + if not hasattr(pymupdf, "mupdf"): + # Classic implementation does not have fix for this test. + print(f"Not running test_full_toc on classic implementation.") + return + expected_path = f"{scriptdir}/resources/full_toc.txt" + expected = pathlib.Path(expected_path).read_bytes() + # Github windows x32 seems to insert \r characters; maybe something to + # do with the Python installation's line endings settings. + expected = expected.decode("utf8") + expected = expected.replace('\r', '') + toc = "\n".join([str(t) for t in doc.get_toc(False)]) + toc += "\n" assert toc == expected @@ -47,7 +53,7 @@ def test_replace_toc(): def test_setcolors(): - doc = fitz.open(filename2) + doc = pymupdf.open(filename2) toc = doc.get_toc(False) for i in range(len(toc)): d = toc[i][3] @@ -68,13 +74,18 @@ def test_setcolors(): def test_circular(): """The test file contains circular bookmarks.""" - doc = fitz.open(circular) + doc = pymupdf.open(circular) toc = doc.get_toc(False) # this must not loop + if pymupdf.mupdf_version_tuple < (1, 27): + # Expect warning. + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'Bad or missing prev pointer in outline tree, repairing', \ + f'{wt=}' def test_2355(): # Create a test PDF with toc. - doc = fitz.Document() + doc = pymupdf.Document() for _ in range(10): doc.new_page(doc.page_count) doc.set_toc([[1, 'test', 1], [1, 'test2', 5]]) @@ -84,10 +95,194 @@ def test_2355(): # Open many times for i in range(10): - with fitz.open(path) as new_doc: + with pymupdf.open(path) as new_doc: new_doc.get_toc() # Open once and read many times - with fitz.open(path) as new_doc: + with pymupdf.open(path) as new_doc: for i in range(10): new_doc.get_toc() + +def test_2788(): + ''' + Check handling of Document.get_toc() when toc item has kind=4. + ''' + if not hasattr(pymupdf, 'mupdf'): + # Classic implementation does not have fix for this test. + print(f'Not running test_2788 on classic implementation.') + return + path = os.path.abspath(f'{__file__}/../../tests/resources/test_2788.pdf') + document = pymupdf.open(path) + toc0 = [[1, 'page2', 2, {'kind': 4, 'xref': 14, 'page': 1, 'to': pymupdf.Point(100.0, 760.0), 'zoom': 0.0, 'nameddest': 'page.2'}]] + toc1 = document.get_toc(simple=False) + print(f'{toc0=}') + print(f'{toc1=}') + assert toc1 == toc0 + + doc.set_toc(toc0) + toc2 = document.get_toc(simple=False) + print(f'{toc0=}') + print(f'{toc2=}') + assert toc2 == toc0 + + # Also test Page.get_links() bugfix from #2817. + for page in document: + page.get_links() + rebased = hasattr(pymupdf, 'mupdf') + if rebased: + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == ( + "syntax error: expected 'obj' keyword (0 3 ?)\n" + "trying to repair broken xref\n" + "repairing PDF document" + ), f'{wt=}' + + +def test_toc_count(): + file_in = os.path.abspath(f'{__file__}/../../tests/resources/test_toc_count.pdf') + file_out = os.path.abspath(f'{__file__}/../../tests/test_toc_count_out.pdf') + + def get(doc): + outlines = doc.xref_get_key(doc.pdf_catalog(), "Outlines") + ret = doc.xref_object(int(outlines[1].split()[0])) + return ret + print() + with pymupdf.open(file_in) as doc: + print(f'1: {get(doc)}') + toc = doc.get_toc(simple=False) + doc.set_toc([]) + #print(f'2: {get(doc)}') + doc.set_toc(toc) + print(f'3: {get(doc)}') + doc.save(file_out, garbage=4) + with pymupdf.open(file_out) as doc: + print(f'4: {get(doc)}') + pymupdf._log_items_clear() + + +def test_3347(): + ''' + Check fix for #3347 - link destination rectangles when source/destination + pages have different sizes. + ''' + doc = pymupdf.open() + doc.new_page(width=500, height=800) + doc.new_page(width=800, height=500) + rects = [ + (0, pymupdf.Rect(10, 20, 50, 40), pymupdf.utils.getColor('red')), + (0, pymupdf.Rect(300, 350, 400, 450), pymupdf.utils.getColor('green')), + (1, pymupdf.Rect(20, 30, 40, 50), pymupdf.utils.getColor('blue')), + (1, pymupdf.Rect(350, 300, 450, 400), pymupdf.utils.getColor('black')) + ] + + for page, rect, color in rects: + doc[page].draw_rect(rect, color=color) + + for (from_page, from_rect, _), (to_page, to_rect, _) in zip(rects, rects[1:] + rects[:1]): + doc[from_page].insert_link({ + 'kind': 1, + 'from': from_rect, + 'page': to_page, + 'to': to_rect.top_left, + }) + + links_expected = [ + (0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'fitz-L0'}), + (0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'fitz-L1'}), + (1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'fitz-L0'}), + (1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'fitz-L1'}), + ] + + path = os.path.normpath(f'{__file__}/../../tests/test_3347_out.pdf') + doc.save(path) + print(f'Have saved to {path=}.') + + links_actual = list() + for page_i, page in enumerate(doc): + links = page.get_links() + for link_i, link in enumerate(links): + print(f'{page_i=} {link_i=}: {link!r}') + links_actual.append( (page_i, link) ) + + assert links_actual == links_expected + + +def test_3400(): + ''' + Check fix for #3400 - link destination rectangles when source/destination + pages have different rotations. + ''' + width = 750 + height = 1110 + circle_middle_point = pymupdf.Point(height / 4, width / 4) + print(f'{circle_middle_point=}') + with pymupdf.open() as doc: + + page = doc.new_page(width=width, height=height) + page.set_rotation(270) + # draw a circle at the middle point to facilitate debugging + page.draw_circle(circle_middle_point, color=(0, 0, 1), radius=5, width=2) + + for i in range(10): + for j in range(10): + x = i/10 * width + y = j/10 * height + page.draw_circle(pymupdf.Point(x, y), color=(0,0,0), radius=0.2, width=0.1) + page.insert_htmlbox(pymupdf.Rect(x, y, x+width/10, y+height/20), f'({x=:.1f},{y=:.1f})', ) + + # rotate the middle point by the page rotation for the new toc entry + toc_link_coords = circle_middle_point + print(f'{toc_link_coords=}') + + toc = [ + ( + 1, + "Link to circle", + 1, + { + "kind": pymupdf.LINK_GOTO, + "page": 1, + "to": toc_link_coords, + "from": pymupdf.Rect(0, 0, height / 4, width / 4), + }, + ) + ] + doc.set_toc(toc, 0) # set the toc + + page = doc.new_page(width=200, height=300) + from_rect = pymupdf.Rect(10, 10, 100, 50) + page.insert_htmlbox(from_rect, 'link') + link = dict() + link['from'] = from_rect + link['kind'] = pymupdf.LINK_GOTO + link['to'] = toc_link_coords + link['page'] = 0 + page.insert_link(link) + + path = os.path.normpath(f'{__file__}/../../tests/test_3400.pdf') + doc.save(path) + print(f'Saved to {path=}.') + + links_expected = [ + (1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'fitz-L0'}) + ] + + links_actual = list() + for page_i, page in enumerate(doc): + links = page.get_links() + for link_i, link in enumerate(links): + print(f'({page_i}, {link!r})') + links_actual.append( (page_i, link) ) + + assert links_actual == links_expected + + + +def test_3820(): + """Ensure all extended TOC items point to pages.""" + doc = pymupdf.open(file_3820) + toc = doc.get_toc(simple=False) + for _, _, epage, dest in toc: + assert epage == dest["page"] + 1 + + diff --git a/tests/test_widgets.py b/tests/test_widgets.py index fc0245ec3..9eafd0246 100644 --- a/tests/test_widgets.py +++ b/tests/test_widgets.py @@ -2,35 +2,36 @@ """ Test PDF field (widget) insertion. """ -import fitz +import pymupdf import os scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "widgettest.pdf") file_2333 = os.path.join(scriptdir, "resources", "test-2333.pdf") +file_4055 = os.path.join(scriptdir, "resources", "test-4055.pdf") -doc = fitz.open() +doc = pymupdf.open() page = doc.new_page() gold = (1, 1, 0) # define some colors blue = (0, 0, 1) gray = (0.9, 0.9, 0.9) fontsize = 11.0 # define a fontsize lineheight = fontsize + 4.0 -rect = fitz.Rect(50, 72, 400, 200) +rect = pymupdf.Rect(50, 72, 400, 200) def test_text(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - widget = fitz.Widget() # create a widget object + widget = pymupdf.Widget() # create a widget object widget.border_color = blue # border color widget.border_width = 0.3 # border width widget.border_style = "d" widget.border_dashes = (2, 3) widget.field_name = "Textfield-1" # field name widget.field_label = "arbitrary text - e.g. to help filling the field" - widget.field_type = fitz.PDF_WIDGET_TYPE_TEXT # field type + widget.field_type = pymupdf.PDF_WIDGET_TYPE_TEXT # field type widget.fill_color = gold # field background widget.rect = rect # set field rectangle widget.text_color = blue # rext color @@ -44,13 +45,13 @@ def test_text(): def test_checkbox(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - widget = fitz.Widget() + widget = pymupdf.Widget() widget.border_style = "b" widget.field_name = "Button-1" widget.field_label = "a simple check box button" - widget.field_type = fitz.PDF_WIDGET_TYPE_CHECKBOX + widget.field_type = pymupdf.PDF_WIDGET_TYPE_CHECKBOX widget.fill_color = gold widget.rect = rect widget.text_color = blue @@ -62,26 +63,26 @@ def test_checkbox(): # Check #2350 - setting checkbox to readonly. # - widget.field_flags |= fitz.PDF_FIELD_IS_READ_ONLY + widget.field_flags |= pymupdf.PDF_FIELD_IS_READ_ONLY widget.update() - path = f'{scriptdir}/test_checkbox.pdf' + path = f"{scriptdir}/test_checkbox.pdf" doc.save(path) - - doc = fitz.open(path) + + doc = pymupdf.open(path) page = doc[0] widget = page.first_widget assert widget - assert widget.field_flags == fitz.PDF_FIELD_IS_READ_ONLY + assert widget.field_flags == pymupdf.PDF_FIELD_IS_READ_ONLY def test_listbox(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - widget = fitz.Widget() + widget = pymupdf.Widget() widget.field_name = "ListBox-1" widget.field_label = "is not a drop down: scroll with cursor in field" - widget.field_type = fitz.PDF_WIDGET_TYPE_LISTBOX - widget.field_flags = fitz.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE + widget.field_type = pymupdf.PDF_WIDGET_TYPE_LISTBOX + widget.field_flags = pymupdf.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE widget.fill_color = gold widget.choice_values = ( "Frankfurt", @@ -104,15 +105,14 @@ def test_listbox(): def test_combobox(): - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - widget = fitz.Widget() + widget = pymupdf.Widget() widget.field_name = "ComboBox-1" widget.field_label = "an editable combo box ..." - widget.field_type = fitz.PDF_WIDGET_TYPE_COMBOBOX + widget.field_type = pymupdf.PDF_WIDGET_TYPE_COMBOBOX widget.field_flags = ( - fitz.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE - | fitz.PDF_CH_FIELD_IS_EDIT + pymupdf.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE | pymupdf.PDF_CH_FIELD_IS_EDIT ) widget.fill_color = gold widget.choice_values = ( @@ -139,14 +139,14 @@ def test_combobox(): def test_text2(): - doc = fitz.open() + doc = pymupdf.open() doc.new_page() page = [p for p in doc.pages()][0] - widget = fitz.Widget() + widget = pymupdf.Widget() widget.field_name = "textfield-2" widget.field_label = "multi-line text with tabs is also possible!" - widget.field_flags = fitz.PDF_TX_FIELD_IS_MULTILINE - widget.field_type = fitz.PDF_WIDGET_TYPE_TEXT + widget.field_flags = pymupdf.PDF_TX_FIELD_IS_MULTILINE + widget.field_type = pymupdf.PDF_WIDGET_TYPE_TEXT widget.fill_color = gray widget.rect = rect widget.text_color = blue @@ -160,7 +160,7 @@ def test_text2(): def test_2333(): - doc = fitz.open(file_2333) + doc = pymupdf.open(file_2333) page = doc[0] def values(): @@ -179,27 +179,27 @@ def values(): w.field_value = True w.update() assert values() == set(("/Off", f"{i}", f"/{i}")) - w.field_value=False + w.field_value = False w.update() assert values() == set(("Off", "/Off")) - + def test_2411(): """Add combobox values in different formats.""" - doc = fitz.open() + doc = pymupdf.open() page = doc.new_page() - rect = fitz.Rect(100, 100, 300, 200) + rect = pymupdf.Rect(100, 100, 300, 200) - widget = fitz.Widget() + widget = pymupdf.Widget() widget.field_flags = ( - fitz.PDF_CH_FIELD_IS_COMBO - | fitz.PDF_CH_FIELD_IS_EDIT - | fitz.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE + pymupdf.PDF_CH_FIELD_IS_COMBO + | pymupdf.PDF_CH_FIELD_IS_EDIT + | pymupdf.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE ) widget.field_name = "ComboBox-1" widget.field_label = "an editable combo box ..." - widget.field_type = fitz.PDF_WIDGET_TYPE_COMBOBOX - widget.fill_color = fitz.pdfcolor["gold"] + widget.field_type = pymupdf.PDF_WIDGET_TYPE_COMBOBOX + widget.fill_color = pymupdf.pdfcolor["gold"] widget.rect = rect widget.choice_values = [ ["Spain", "ES"], # double value as list @@ -207,28 +207,226 @@ def test_2411(): "Portugal", # single value ] page.add_widget(widget) - + + def test_2391(): """Confirm that multiple times setting a checkbox to ON/True/Yes will work.""" - doc = fitz.open(f'{scriptdir}/resources/widgettest.pdf') + doc = pymupdf.open(f"{scriptdir}/resources/widgettest.pdf") page = doc[0] # its work when we update first-time - for field in page.widgets(types=[fitz.PDF_WIDGET_TYPE_CHECKBOX]): + for field in page.widgets(types=[pymupdf.PDF_WIDGET_TYPE_CHECKBOX]): field.field_value = True field.update() for i in range(5): pdfdata = doc.tobytes() doc.close() - doc = fitz.open("pdf", pdfdata) + doc = pymupdf.open("pdf", pdfdata) page = doc[0] - for field in page.widgets(types=[fitz.PDF_WIDGET_TYPE_CHECKBOX]): + for field in page.widgets(types=[pymupdf.PDF_WIDGET_TYPE_CHECKBOX]): assert field.field_value == field.on_state() field_field_value = field.on_state() field.update() -# def test_deletewidget(): -# pdf = fitz.open(filename) -# page = pdf[0] -# field = page.first_widget -# page.delete_widget(field) + +def test_3216(): + document = pymupdf.open(filename) + for page in document: + while 1: + w = page.first_widget + print(f"{w=}") + if not w: + break + page.delete_widget(w) + + +def test_add_widget(): + doc = pymupdf.open() + page = doc.new_page() + w = pymupdf.Widget() + w.field_type = pymupdf.PDF_WIDGET_TYPE_BUTTON + w.rect = pymupdf.Rect(5, 5, 20, 20) + w.field_flags = pymupdf.PDF_BTN_FIELD_IS_PUSHBUTTON + w.field_name = "button" + w.fill_color = (0, 0, 1) + w.script = "app.alert('Hello, PDF!');" + page.add_widget(w) + + +def test_interfield_calculation(): + """Confirm correct working of interfield calculations. + + We are going to create three pages with a computed result field each. + + Tests the fix for https://github.com/pymupdf/PyMuPDF/issues/3402. + """ + # Field bboxes (same on each page) + r1 = pymupdf.Rect(100, 100, 300, 120) + r2 = pymupdf.Rect(100, 130, 300, 150) + r3 = pymupdf.Rect(100, 180, 300, 200) + + doc = pymupdf.open() + pdf = pymupdf._as_pdf_document(doc) # we need underlying PDF document + + # Make PDF name object for "CO" because it is not defined in MuPDF. + CO_name = pymupdf.mupdf.pdf_new_name("CO") # = PDF_NAME(CO) + for i in range(3): + page = doc.new_page() + w = pymupdf.Widget() + w.field_name = f"NUM1{page.number}" + w.rect = r1 + w.field_type = pymupdf.PDF_WIDGET_TYPE_TEXT + w.field_value = f"{i*100+1}" + w.field_flags = 2 + page.add_widget(w) + + w = pymupdf.Widget() + w.field_name = f"NUM2{page.number}" + w.rect = r2 + w.field_type = pymupdf.PDF_WIDGET_TYPE_TEXT + w.field_value = "200" + w.field_flags = 2 + page.add_widget(w) + + w = pymupdf.Widget() + w.field_name = f"RESULT{page.number}" + w.rect = r3 + w.field_type = pymupdf.PDF_WIDGET_TYPE_TEXT + w.field_value = "Result?" + # Script that adds previous two fields. + w.script_calc = f"""AFSimple_Calculate("SUM", + new Array("NUM1{page.number}", "NUM2{page.number}"));""" + page.add_widget(w) + + # Access the inter-field calculation array. It contains a reference to + # all fields which have a JavaScript stored in their "script_calc" + # property, i.e. an "AA/C" entry. + # Every iteration adds another such field, so this array's length must + # always equal the loop index. + if i == 0: # only need to execute this on first time through + CO = pymupdf.mupdf.pdf_dict_getl( + pymupdf.mupdf.pdf_trailer(pdf), + pymupdf.PDF_NAME("Root"), + pymupdf.PDF_NAME("AcroForm"), + CO_name, + ) + # we confirm CO is an array of foreseeable length + assert pymupdf.mupdf.pdf_array_len(CO) == i + 1 + + # the xref of the i-th item must equal that of the last widget + assert ( + pymupdf.mupdf.pdf_to_num(pymupdf.mupdf.pdf_array_get(CO, i)) + == list(page.widgets())[-1].xref + ) + + +def test_3950(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3950.pdf') + items = list() + with pymupdf.open(path) as document: + for page in document: + for widget in page.widgets(): + items.append(widget.field_label) + print(f'test_3950(): {widget.field_label=}.') + assert items == [ + '{{ named_insured }}', + '{{ policy_period_start_date }}', + '{{ policy_period_end_date }}', + '{{ insurance_line }}', + ] + + +def test_4004(): + import collections + + def get_widgets_by_name(doc): + """ + Extracts and returns a dictionary of widgets indexed by their names. + """ + widgets_by_name = collections.defaultdict(list) + for page_num in range(len(doc)): + page = doc.load_page(page_num) + for field in page.widgets(): + widgets_by_name[field.field_name].append({ + "page_num": page_num, + "widget": field + }) + return widgets_by_name + + # Open document and get widgets + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4004.pdf') + doc = pymupdf.open(path) + widgets_by_name = get_widgets_by_name(doc) + + # Print widget information + for name, widgets in widgets_by_name.items(): + print(f"Widget Name: {name}") + for entry in widgets: + widget = entry["widget"] + page_num = entry["page_num"] + print(f" Page: {page_num + 1}, Type: {widget.field_type}, Value: {widget.field_value}, Rect: {widget.rect}") + + # Attempt to update field value + w = widgets_by_name["Text1"][0] + field = w['widget'] + field.value = "1234567890" + try: + field.update() + except Exception as e: + assert str(e) == 'Annot is not bound to a page' + + doc.close() + + +def test_4055(): + """Check correct setting of CheckBox "Yes" values. + + Test scope: + * setting on with any of 'True' / 'Yes' / built-in values works + * setting off with any of 'False' or 'Off' works + """ + + # this PDF has digits as "Yes" values. + doc = pymupdf.open(file_4055) + page = doc[0] + + # Round 1: confirm all check boxes are off + for w in page.widgets(types=[2]): + # check that this file doesn't use the "Yes" standard + assert w.on_state() != "Yes" + assert w.field_value == "Off" # all check boxes are off + w.field_value = w.on_state() + w.update() + + page = doc.reload_page(page) # reload page to make sure we start fresh + + # Round 2: confirm that fields contain the PDF's own on values + for w in page.widgets(types=[2]): + # confirm each value coincides with the "Yes" value + assert w.field_value == w.on_state() + w.field_value = False # switch to "Off" using False + w.update() + + page = doc.reload_page(page) + + # Round 3: confirm that 'False' achieved "Off" values + for w in page.widgets(types=[2]): + assert w.field_value == "Off" + w.field_value = True # use True for the next round + w.update() + + page = doc.reload_page(page) + + # Round 4: confirm that setting to True also worked + for w in page.widgets(types=[2]): + assert w.field_value == w.on_state() + w.field_value = "Off" # set off again + w.update() + w.field_value = "Yes" + w.update() + + page = doc.reload_page(page) + + # Round 5: final check: setting to "Yes" also does work + for w in page.widgets(types=[2]): + assert w.field_value == w.on_state() diff --git a/tests/test_word_delimiters.py b/tests/test_word_delimiters.py new file mode 100644 index 000000000..a304a4272 --- /dev/null +++ b/tests/test_word_delimiters.py @@ -0,0 +1,23 @@ +import pymupdf +import string + + +def test_delimiters(): + """Test changing word delimiting characters.""" + doc = pymupdf.open() + page = doc.new_page() + text = "word1,word2 - word3. word4?word5." + page.insert_text((50, 50), text) + + # Standard words extraction: + # only spaces and line breaks start a new word + words0 = [w[4] for w in page.get_text("words")] + assert words0 == ["word1,word2", "-", "word3.", "word4?word5."] + + # extract words again + words1 = [w[4] for w in page.get_text("words", delimiters=string.punctuation)] + assert words0 != words1 + assert " ".join(words1) == "word1 word2 word3 word4 word5" + + # confirm we will be getting old extraction + assert [w[4] for w in page.get_text("words")] == words0 diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 000000000..dbb246581 --- /dev/null +++ b/tests/util.py @@ -0,0 +1,28 @@ +import os +import subprocess + + +def download(url, name, size=None): + ''' + Downloads from to a local file and returns its path. + + If file already exists and matches we do not re-download it. + + We put local files within a `cache/` directory so that it is not deleted by + `git clean` (unless `-d` is specified). + ''' + path = os.path.normpath(f'{__file__}/../../tests/cache/{name}') + if os.path.isfile(path) and (not size or os.stat(path).st_size == size): + print(f'Using existing file {path=}.') + else: + print(f'Downloading from {url=}.') + subprocess.run(f'pip install -U requests', check=1, shell=1) + import requests + r = requests.get(url, path, timeout=10) + r.raise_for_status() + if size is not None: + assert len(r.content) == size + os.makedirs(os.path.dirname(path), exist_ok=1) + with open(path, 'wb') as f: + f.write(r.content) + return path diff --git a/valgrind.supp b/valgrind.supp new file mode 100644 index 000000000..58ad57931 --- /dev/null +++ b/valgrind.supp @@ -0,0 +1,17 @@ +# Valgrind suppression for false-positives from use of shared-libraries. +# +{ + sharedlibrary-read + Memcheck:Addr8 + fun:strncmp + fun:is_dst + ... + fun:fillin_rpath.isra.0 + fun:decompose_rpath + ... + fun:openaux + fun:_dl_catch_exception + fun:_dl_map_object_deps + fun:dl_open_worker_begin + fun:_dl_catch_exception +} diff --git a/wdev.py b/wdev.py new file mode 100644 index 000000000..33c348459 --- /dev/null +++ b/wdev.py @@ -0,0 +1,424 @@ +''' +Finds locations of Windows command-line development tools. +''' + +import os +import platform +import glob +import re +import subprocess +import sys +import sysconfig +import textwrap + +import pipcl + + +class WindowsVS: + r''' + Windows only. Finds locations of Visual Studio command-line tools. Assumes + VS2019-style paths. + + Members and example values:: + + .year: 2019 + .grade: Community + .version: 14.28.29910 + .directory: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community + .vcvars: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat + .cl: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.28.29910\bin\Hostx64\x64\cl.exe + .link: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.28.29910\bin\Hostx64\x64\link.exe + .csc: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Current\Bin\Roslyn\csc.exe + .msbuild: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Current\Bin\MSBuild.exe + .devenv: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\devenv.com + + `.csc` is C# compiler; will be None if not found. + ''' + def __init__( + self, + *, + year=None, + grade=None, + version=None, + cpu=None, + directory=None, + verbose=False, + ): + ''' + Args: + year: + None or, for example, `2019`. If None we use environment + variable WDEV_VS_YEAR if set. + grade: + None or, for example, one of: + + * `Community` + * `Professional` + * `Enterprise` + + If None we use environment variable WDEV_VS_GRADE if set. + version: + None or, for example: `14.28.29910`. If None we use environment + variable WDEV_VS_VERSION if set. + cpu: + None or a `WindowsCpu` instance. + directory: + Ignore year, grade, version and cpu and use this directory + directly. + verbose: + . + + ''' + if year is not None: + year = str(year) # Allow specification as a number. + def default(value, name): + if value is None: + name2 = f'WDEV_VS_{name.upper()}' + value = os.environ.get(name2) + if value is not None: + _log(f'Setting {name} from environment variable {name2}: {value!r}') + return value + try: + year = default(year, 'year') + grade = default(grade, 'grade') + version = default(version, 'version') + + if not cpu: + cpu = WindowsCpu() + + if not directory: + # Find `directory`. + # + pattern = _vs_pattern(year, grade) + directories = glob.glob( pattern) + if verbose: + _log( f'Matches for: {pattern=}') + _log( f'{directories=}') + assert directories, f'No match found for {pattern=}.' + directories.sort() + directory = directories[-1] + + # Find `devenv`. + # + devenv = f'{directory}\\Common7\\IDE\\devenv.com' + assert os.path.isfile( devenv), f'Does not exist: {devenv}' + + # Extract `year` and `grade` from `directory`. + # + # We use r'...' for regex strings because an extra level of escaping is + # required for backslashes. + # + regex = rf'^C:\\Program Files.*\\Microsoft Visual Studio\\([^\\]+)\\([^\\]+)' + m = re.match( regex, directory) + assert m, f'No match: {regex=} {directory=}' + year2 = m.group(1) + grade2 = m.group(2) + if year: + assert year2 == year + else: + year = year2 + if grade: + assert grade2 == grade + else: + grade = grade2 + + # Find vcvars.bat. + # + vcvars = f'{directory}\\VC\\Auxiliary\\Build\\vcvars{cpu.bits}.bat' + assert os.path.isfile( vcvars), f'No match for: {vcvars}' + + # Find cl.exe. + # + cl_pattern = f'{directory}\\VC\\Tools\\MSVC\\{version if version else "*"}\\bin\\Host{cpu.windows_name}\\{cpu.windows_name}\\cl.exe' + cl_s = glob.glob( cl_pattern) + assert cl_s, f'No match for: {cl_pattern}' + cl_s.sort() + cl = cl_s[ -1] + + # Extract `version` from cl.exe's path. + # + m = re.search( rf'\\VC\\Tools\\MSVC\\([^\\]+)\\bin\\Host{cpu.windows_name}\\{cpu.windows_name}\\cl.exe$', cl) + assert m + version2 = m.group(1) + if version: + assert version2 == version + else: + version = version2 + assert version + + # Find link.exe. + # + link_pattern = f'{directory}\\VC\\Tools\\MSVC\\{version}\\bin\\Host{cpu.windows_name}\\{cpu.windows_name}\\link.exe' + link_s = glob.glob( link_pattern) + assert link_s, f'No match for: {link_pattern}' + link_s.sort() + link = link_s[ -1] + + # Find csc.exe. + # + csc = None + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + if filename == 'csc.exe': + csc = os.path.join(dirpath, filename) + #_log(f'{csc=}') + #break + + # Find MSBuild.exe. + # + msbuild = None + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + if filename == 'MSBuild.exe': + msbuild = os.path.join(dirpath, filename) + #_log(f'{csc=}') + #break + + self.cl = cl + self.devenv = devenv + self.directory = directory + self.grade = grade + self.link = link + self.csc = csc + self.msbuild = msbuild + self.vcvars = vcvars + self.version = version + self.year = year + self.cpu = cpu + except Exception as e: + raise Exception( f'Unable to find Visual Studio {year=} {grade=} {version=} {cpu=} {directory=}') from e + + def description_ml( self, indent=''): + ''' + Return multiline description of `self`. + ''' + ret = textwrap.dedent(f''' + year: {self.year} + grade: {self.grade} + version: {self.version} + directory: {self.directory} + vcvars: {self.vcvars} + cl: {self.cl} + link: {self.link} + csc: {self.csc} + msbuild: {self.msbuild} + devenv: {self.devenv} + cpu: {self.cpu} + ''') + return textwrap.indent( ret, indent) + + def __repr__( self): + items = list() + for name in ( + 'year', + 'grade', + 'version', + 'directory', + 'vcvars', + 'cl', + 'link', + 'csc', + 'msbuild', + 'devenv', + 'cpu', + ): + items.append(f'{name}={getattr(self, name)!r}') + return ' '.join(items) + + +def _vs_pattern(year=None, grade=None): + return f'C:\\Program Files*\\Microsoft Visual Studio\\{year if year else "2*"}\\{grade if grade else "*"}' + + +def windows_vs_multiple(year=None, grade=None, verbose=0): + ''' + Returns list of WindowsVS instances. + ''' + ret = list() + directories = glob.glob(_vs_pattern(year, grade)) + for directory in directories: + vs = WindowsVS(directory=directory) + if verbose: + _log(vs.description_ml()) + ret.append(vs) + return ret + + +class WindowsCpu: + ''' + For Windows only. Paths and names that depend on cpu. + + Members: + .bits + 32 or 64. + .windows_subdir + Empty string or `x64/`. + .windows_name + `x86` or `x64`. + .windows_config + `x64` or `Win32`, e.g. for use in `/Build Release|x64`. + .windows_suffix + `64` or empty string. + ''' + def __init__(self, name=None): + if not name: + name = _cpu_name() + self.name = name + if name == 'x32': + self.bits = 32 + self.windows_subdir = '' + self.windows_name = 'x86' + self.windows_config = 'Win32' + self.windows_suffix = '' + elif name == 'x64': + self.bits = 64 + self.windows_subdir = 'x64/' + self.windows_name = 'x64' + self.windows_config = 'x64' + self.windows_suffix = '64' + else: + assert 0, f'Unrecognised cpu name: {name}' + + def __repr__(self): + return self.name + + +class WindowsPython: + ''' + Windows only. Information about installed Python with specific word size + and version. Defaults to the currently-running Python. + + Members: + + .path: + Path of python binary. + .version: + `{major}.{minor}`, e.g. `3.9` or `3.11`. Same as `version` passed + to `__init__()` if not None, otherwise the inferred version. + .include: + Python include path. + .cpu: + A `WindowsCpu` instance, same as `cpu` passed to `__init__()` if + not None, otherwise the inferred cpu. + + We parse the output from `py -0p` to find all available python + installations. + ''' + + def __init__( self, cpu=None, version=None, verbose=True): + ''' + Args: + + cpu: + A WindowsCpu instance. If None, we use whatever we are running + on. + version: + Two-digit Python version as a string such as `3.8`. If None we + use current Python's version. + verbose: + If true we show diagnostics. + ''' + if cpu is None: + cpu = WindowsCpu(_cpu_name()) + if version is None: + version = '.'.join(platform.python_version().split('.')[:2]) + _log(f'Looking for Python {version=} {cpu.bits=}.') + + if '.'.join(platform.python_version().split('.')[:2]) == version: + # Current python matches, so use it directly. This avoids problems + # on Github where experimental python-3.13 was not available via + # `py`, and is kept here in case a similar problems happens with + # future Python versions. + _log(f'{cpu=} {version=}: using {sys.executable=}.') + self.path = sys.executable + self.version = version + self.cpu = cpu + self.include = sysconfig.get_path('include') + + else: + command = 'py -0p' + if verbose: + _log(f'{cpu=} {version=}: Running: {command}') + text = subprocess.check_output( command, shell=True, text=True) + for line in text.split('\n'): + #_log( f' {line}') + if m := re.match( '^ *-V:([0-9.]+)(-32)? ([*])? +(.+)$', line): + version2 = m.group(1) + bits = 32 if m.group(2) else 64 + current = m.group(3) + path = m.group(4).strip() + elif m := re.match( '^ *-([0-9.]+)-((32)|(64)) +(.+)$', line): + version2 = m.group(1) + bits = int(m.group(2)) + path = m.group(5).strip() + else: + if verbose: + _log( f'No match for {line=}') + continue + if verbose: + _log( f'{version2=} {bits=} {path=} from {line=}.') + if bits != cpu.bits or version2 != version: + continue + root = os.path.dirname(path) + if not os.path.exists(path): + # Sometimes it seems that the specified .../python.exe does not exist, + # and we have to change it to .../python.exe. + # + assert path.endswith('.exe'), f'path={path!r}' + path2 = f'{path[:-4]}{version}.exe' + _log( f'Python {path!r} does not exist; changed to: {path2!r}') + assert os.path.exists( path2) + path = path2 + + self.path = path + self.version = version + self.cpu = cpu + command = f'{self.path} -c "import sysconfig; print(sysconfig.get_path(\'include\'))"' + _log(f'Finding Python include path by running {command=}.') + self.include = subprocess.check_output(command, shell=True, text=True).strip() + _log(f'Python include path is {self.include=}.') + #_log( f'pipcl.py:WindowsPython():\n{self.description_ml(" ")}') + break + else: + _log(f'Failed to find python matching cpu={cpu}.') + _log(f'Output from {command!r} was:\n{text}') + raise Exception( f'Failed to find python matching cpu={cpu} {version=}.') + + # Oddly there doesn't seem to be a + # `sysconfig.get_path('libs')`, but it seems to be next + # to `includes`: + self.libs = os.path.abspath(f'{self.include}/../libs') + + _log( f'WindowsPython:\n{self.description_ml(" ")}') + + def description_ml(self, indent=''): + ret = textwrap.dedent(f''' + path: {self.path} + version: {self.version} + cpu: {self.cpu} + include: {self.include} + libs: {self.libs} + ''') + return textwrap.indent( ret, indent) + + def __repr__(self): + return f'path={self.path!r} version={self.version!r} cpu={self.cpu!r} include={self.include!r} libs={self.libs!r}' + + +# Internal helpers. +# + +def _cpu_name(): + ''' + Returns `x32` or `x64` depending on Python build. + ''' + #log(f'sys.maxsize={hex(sys.maxsize)}') + return f'x{32 if sys.maxsize == 2**31 - 1 else 64}' + + + +def _log(text='', caller=1): + ''' + Logs lines with prefix. + ''' + pipcl.log1(text, caller+1)
+ +

SPRINGERS EINWÜRFE: INTIME VERBINDUNGEN

+ +

Wieso kann unsereins so vieles, was eine Maus nicht kann? Unser Gehirn ist nicht bloß größer, sondern vor allem überraschend vertrackt verdrahtet.

+ +

Der Heilige Gral der Neu­ro­wis­sen­schaft ist die komplette Kartierung des menschlichen Gehirns – die ge­treue Ab­bildung des Ge­strüpps der Nervenzellen mit den baum­för­mi­gen Ver­ästel­ungen der aus ihnen sprie­ßen­den Den­dri­ten und den viel län­ge­ren Axo­nen, wel­che oft der Sig­nal­über­tragung von einem Sin­nes­or­gan oder zu einer Mus­kel­fa­ser die­nen. Zum Gesamtbild gehören die winzigen Knötchen auf den Dendriten; dort sitzen die Synapsen. Das sind Kontakt- und Schalt­stel­len, leb­haf­te Ver­bin­dungen zu anderen Neuronen.

+ +

Dieses Dickicht bis zur Ebene einzelner Zel­len zu durchforsten und es räumlich dar­zu­stel­len, ist eine gigantische Aufgabe, die bis vor Kurzem utopisch anmuten musste. Neu­er­dings vermag der junge For­schungs­zweig der Konnektomik (von Englisch: con­nect für ver­bin­den) das Zusammenspiel der Neurone immer besser zu verstehen. Das gelingt mit dem Einsatz dreidimensionaler Elek­tro­nen­mik­ros­ko­pie. Aus Dünn­schicht­auf­nah­men von zerebralen Ge­we­be­pro­ben lassen sich plastische Bil­der ganzer Zellverbände zu­sam­men­setzen.

+ +

Da frisches menschliches Hirn­ge­we­be nicht ohne Wei­te­res zu­gäng­lich ist – in der Regel nur nach chirurgischen Eingriffen an Epi­lep­sie­pa­tien­ten –, hält die Maus als Mo­dell­or­ga­nis­mus her. Die evolutionäre Ver­wandt­schaft von Mensch und Nager macht die Wahl plau­sibel. Vor allem das Team um Moritz Helmstaedter am Max-Planck-Institut (MPI) für Hirnforschung in Frankfurt hat in den ver­gangenen Jahren Expertise bei der kon­nek­tomischen Analyse entwickelt.

+ +

Aber steckt in unserem Kopf bloß ein auf die tausendfache Neu­ro­nen­an­zahl auf­ge­bläh­tes Mäu­se­hirn? Oder ist menschliches Ner­ven­ge­we­be viel­leicht doch anders gestrickt? Zur Beantwortung dieser Frage unternahm die MPI-Gruppe einen detaillierten Vergleich von Maus, Makake und Mensch (Science 377, abo0924, 2022).

+ +

Menschliches Gewebe stammte diesmal nicht von Epileptikern, son­dern von zwei wegen Hirntumoren operierten Patienten. Die For­scher wollten damit vermeiden, dass die oft jahrelange Behandlung mit An­ti­epi­lep­ti­ka das Bild der synaptischen Verknüpfungen trübte. Sie verglichen die Proben mit denen eines Makaken und von fünf Mäusen.

+ +

Einerseits ergaben sich – einmal ab­ge­se­hen von den ganz of­fen­sicht­li­chen quan­titativen Unterschieden wie Hirngröße und Neu­ro­nen­anzahl – recht gute Über­ein­stim­mun­gen, die somit den Gebrauch von Tier­modellen recht­fer­ti­gen. Doch in einem Punkt erlebte das MPI-Team eine echte Über­raschung.

+ +

Gewisse Nervenzellen, die so genannten In­ter­neurone, zeichnen sich dadurch aus, dass sie aus­schließ­lich mit anderen Ner­ven­zel­len in­ter­agieren. Solche »Zwi­schen­neu­rone« mit meist kurzen Axonen sind nicht primär für das Verarbeiten externer Reize oder das Aus­lösen körperlicher Reaktionen zuständig; sie be­schäf­ti­gen sich bloß mit der Ver­stär­kung oder Dämpfung interner Signale.

+ +

Just dieser Neuronentyp ist nun bei Makaken und Menschen nicht nur mehr als doppelt so häufig wie bei Mäusen, sondern obendrein be­son­ders intensiv untereinander ver­flochten. Die meisten Interneurone kop­peln sich fast ausschließlich an ihresgleichen. Dadurch wirkt sich ihr konnektomisches Ge­wicht ver­gleichs­weise zehnmal so stark aus.

+ +

Vermutlich ist eine derart mit sich selbst be­schäf­tigte Sig­nal­ver­ar­beitung die Vor­be­ding­ung für ge­stei­gerte Hirn­leis­tungen. Um einen Ver­gleich mit verhältnismäßig pri­mi­ti­ver Tech­nik zu wagen: Bei küns­tli­chen neu­ro­na­len Netzen – Algorithmen nach dem Vor­bild verknüpfter Nervenzellen – ge­nü­gen schon ein, zwei so genannte ver­bor­ge­ne Schich­ten von selbst­be­züg­li­chen Schaltstellen zwischen Input und Output-Ebene, um die ver­blüf­fen­den Erfolge der künstlichen Intel­ligenz her­vor­zu­bringen.

+