diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 5a2fbae3b0..852d5ecb3d 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -138,16 +138,6 @@ jobs: working-directory: /tmp run: python -c "import pypdf;print(pypdf.__version__)" - # - name: Release to pypi if tagged. - # if: startsWith(github.ref, 'refs/tags') - # uses: pypa/gh-action-pypi-publish@release/v1 - # with: - # user: __token__ - # password: ${{ secrets.PYPI_API_TOKEN }} - - name: Create GitHub release if tagged. - if: startsWith(github.ref, 'refs/tags/') - uses: softprops/action-gh-release@v1 - coverage: name: Combine & check coverage. runs-on: ubuntu-latest diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000000..a006a01b65 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,59 @@ +# This action assumes that there is a REL-commit which already has a +# Markdown-formatted git tag. Hence the CHANGELOG is already adjusted +# and it's decided what should be in the release. +# This action only ensures the release is done with the proper contents +# and that it's announced with a Github release. +name: Publish Python Package to PyPI +on: + push: + tags: + - '*.*.*' + +jobs: + build_and_publish: + # this doesn't make sense if you don't have the PyPI secret + if: github.repository == 'py-pdf/pypdf' + name: Publish a new version of pypdf + runs-on: ubuntu-latest + + steps: + # Ensure it's on PyPI + - name: Checkout Repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.x + + - name: Install Flit + run: | + python -m pip install --upgrade pip + pip install flit + + - name: Publish Package to PyPI🚀 + env: + FLIT_USERNAME: '__token__' + FLIT_PASSWORD: ${{ secrets.FLIT_PASSWORD }} + run: | + flit publish + + # Create the Github Page + - name: Prepare variables + id: prepare_variables + run: | + git fetch --tags --force + latest_tag=$(git describe --tags --abbrev=0) + echo "latest_tag=$(git describe --tags --abbrev=0)" >> "$GITHUB_ENV" + echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_ENV" + tag_body=$(git tag -l "${latest_tag}" --format='%(contents:body)') + - name: Create GitHub Release 🚀 + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + release_name: Version ${{ env.latest_tag }}, ${{ env.date }} + draft: false + prerelease: false + body: Body is ${{ env.tag_body }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d9f6537f7d..022dcf68f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,23 +22,23 @@ repos: # hooks: # - id: mypy - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.7.0 hooks: - id: black args: [--target-version, py36] - repo: https://github.com/asottile/blacken-docs - rev: 1.14.0 + rev: 1.15.0 hooks: - id: blacken-docs additional_dependencies: [black==22.1.0] exclude: "docs/user/robustness.md" - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.275' + rev: 'v0.0.278' hooks: - id: ruff args: ['--fix'] - repo: https://github.com/asottile/pyupgrade - rev: v3.7.0 + rev: v3.9.0 hooks: - id: pyupgrade args: [--py36-plus] diff --git a/CHANGELOG.md b/CHANGELOG.md index 7242818824..352989905e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # CHANGELOG +## Version 3.12.2, 2023-07-16 + +### Bug Fixes (BUG) +- Accept calRGB and calGray color_spaces (#1968) +- Process 2bits and 4bits images (#1967) +- Check for AcroForm and ensure it is not None (#1965) + +### Developer Experience (DEV) +- Automate the release process (#1970) + +[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.12.1...3.12.2) + ## Version 3.12.1, 2023-07-09 ### Bug Fixes (BUG) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index ac049ab668..7c8fc73fee 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -12,6 +12,7 @@ history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/pypdf/gra ## Contributors to the pypdf (formerly pyPdf / PyPDF2) project * [abyesilyurt](https://github.com/abyesilyurt) +* [ArkieCoder](https://github.com/ArkieCoder) * [DL6ER](https://github.com/DL6ER) * [ediamondscience](https://github.com/ediamondscience) * [Ermeson, Felipe](https://github.com/FelipeErmeson) diff --git a/pypdf/_encryption.py b/pypdf/_encryption.py index dd4acbc422..b7a65a9ee6 100644 --- a/pypdf/_encryption.py +++ b/pypdf/_encryption.py @@ -194,10 +194,7 @@ def encrypt_object(self, obj: PdfObject) -> PdfObject: obj2[key] = self.encrypt_object(value) obj = obj2 elif isinstance(obj, ArrayObject): - obj2 = ArrayObject() # type: ignore - for x in obj: - obj2.append(self.encrypt_object(x)) # type: ignore - obj = obj2 + obj = ArrayObject(self.encrypt_object(x) for x in obj) # type: ignore return obj def decrypt_object(self, obj: PdfObject) -> PdfObject: diff --git a/pypdf/_page.py b/pypdf/_page.py index 4b87213319..081eb8815f 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -978,7 +978,7 @@ def replace_contents( self[NameObject(PG.CONTENTS)] = content def merge_page( - self, page2: "PageObject", expand: bool = False, over: bool = True + self, page2: "PageObject", expand: bool = False, over: bool = True ) -> None: """ Merge the content streams of two pages into one. @@ -1046,7 +1046,7 @@ def _merge_page( annots = page[PG.ANNOTS] if isinstance(annots, ArrayObject): for ref in annots: - new_annots.append(ref) + new_annots.append(ref) # noqa: PERF402 for res in ( RES.EXT_G_STATE, diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 32105b2716..eb570096e7 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -2217,11 +2217,7 @@ def _list_attachments(self) -> List[str]: ) except KeyError: return [] - attachments_names = [] - # Loop through attachments - for f in filenames: - if isinstance(f, str): - attachments_names.append(f) + attachments_names = [f for f in filenames if isinstance(f, str)] return attachments_names def _get_attachment_list(self, name: str) -> List[bytes]: diff --git a/pypdf/_version.py b/pypdf/_version.py index 555cdb25f5..e2257b3203 100644 --- a/pypdf/_version.py +++ b/pypdf/_version.py @@ -1 +1 @@ -__version__ = "3.12.1" +__version__ = "3.12.2" diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 193ce7812c..1abb615668 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -956,9 +956,7 @@ def update_page_form_field_values( or self._get_qualified_field_name(writer_annot) == field ): if isinstance(value, list): - lst = ArrayObject() - for v in value: - lst.append(TextStringObject(v)) + lst = ArrayObject(TextStringObject(v) for v in value) writer_annot[NameObject(FA.V)] = lst else: writer_annot[NameObject(FA.V)] = TextStringObject(value) @@ -2921,7 +2919,7 @@ def merge( pag[NameObject("/Annots")] = lst self.clean_page(pag) - if "/AcroForm" in cast(DictionaryObject, reader.trailer["/Root"]): + if "/AcroForm" in _ro and _ro["/AcroForm"] is not None: if "/AcroForm" not in self._root_object: self._root_object[NameObject("/AcroForm")] = self._add_object( cast( @@ -3395,10 +3393,7 @@ def _pdf_objectify(obj: Union[Dict[str, Any], str, int, List[Any]]) -> PdfObject to_add[name_key] = casted_value return to_add elif isinstance(obj, list): - arr = ArrayObject() - for el in obj: - arr.append(_pdf_objectify(el)) - return arr + return ArrayObject(_pdf_objectify(el) for el in obj) elif isinstance(obj, str): if obj.startswith("/"): return NameObject(obj) diff --git a/pypdf/filters.py b/pypdf/filters.py index efa22d907b..e4aa6ea4c6 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -641,7 +641,9 @@ def decodeStreamData(stream: Any) -> Union[str, bytes]: # deprecated return decode_stream_data(stream) -mode_str_type: TypeAlias = Literal["", "1", "RGB", "P", "L", "RGBA", "CMYK"] +mode_str_type: TypeAlias = Literal[ + "", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK" +] def _get_imagemode( @@ -654,6 +656,8 @@ def _get_imagemode( raise PdfReadError( "can not interprete colorspace", color_space ) # pragma: no cover + elif color_space[0].startswith("/Cal"): # /CalRGB and /CalGray + color_space = "/Device" + color_space[0][4:] elif color_space[0] == "/ICCBased": icc_profile = color_space[1].get_object() color_components = cast(int, icc_profile["/N"]) @@ -673,6 +677,8 @@ def _get_imagemode( mode_map = { "1bit": "1", # 0 will be used for 1 bit + "2bit": "2bits", # 2 bits images + "4bit": "4bits", # 4 bits "/DeviceGray": "L", "palette": "P", # reserved for color_components alignment "/DeviceRGB": "RGB", @@ -718,6 +724,24 @@ def _handle_flate( Process image encoded in flateEncode Returns img, image_format, extension """ + + def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes: + mask = (2 << bits) - 1 + nbuff = bytearray(size[0] * size[1]) + by = 0 + bit = 8 - bits + for y in range(size[1]): + if (bit != 0) and (bit != 8 - bits): + by += 1 + bit = 8 - bits + for x in range(size[0]): + nbuff[y * size[0] + x] = (data[by] >> bit) & mask + bit -= bits + if bit < 0: + by += 1 + bit = 8 - bits + return bytes(nbuff) + extension = ".png" # mime_type = "image/png" lookup: Any base: Any @@ -726,6 +750,12 @@ def _handle_flate( color_space, base, hival, lookup = ( value.get_object() for value in color_space ) + if mode == "2bits": + mode = "P" + data = bits2byte(data, size, 2) + elif mode == "4bits": + mode = "P" + data = bits2byte(data, size, 4) img = Image.frombytes(mode, size, data) if color_space == "/Indexed": from .generic import ByteStringObject @@ -820,8 +850,8 @@ def _handle_jpx( ): # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes mode: mode_str_type = "RGB" - if x_object_obj.get("/BitsPerComponent", 8) == 1: - mode = _get_imagemode("1bit", 0, "") + if x_object_obj.get("/BitsPerComponent", 8) < 8: + mode = _get_imagemode(f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, "") else: mode = _get_imagemode( color_space, @@ -842,7 +872,11 @@ def _handle_jpx( lfilters = filters[-1] if isinstance(filters, list) else filters if lfilters == FT.FLATE_DECODE: img, image_format, extension = _handle_flate( - size, data, mode, color_space, colors + size, + data, + mode, + color_space, + colors, ) elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE, FT.CCITT_FAX_DECODE): # I'm not sure if the following logic is correct. @@ -898,14 +932,6 @@ def _handle_jpx( # TODO : implement mask if alpha.mode != "L": alpha = alpha.convert("L") - scale = x_object_obj[IA.S_MASK].get("/Decode", [0.0, 1.0]) - if (scale[1] - scale[0]) != 1.0: - alpha = alpha.point( - [ - round(255.0 * (v / 255.0 * (scale[1] - scale[0]) + scale[0])) - for v in range(256) - ] - ) if img.mode == "P": img = img.convert("RGB") img.putalpha(alpha) diff --git a/pyproject.toml b/pyproject.toml index 5da4d9b7f9..4066dd0617 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,6 +183,8 @@ ignore = [ "S101", # Use of `assert` detected "SLF001", # Private member accessed "PD011", # Use `.to_numpy()` instead of `.values` + "FA102", # Missing `from __future__ import annotations`, but uses PEP 604 union + "PERF203", # `try`-`except` within a loop incurs performance overhead ] [tool.ruff.per-file-ignores] diff --git a/requirements/ci-3.11.txt b/requirements/ci-3.11.txt index 7236e1d054..169f930497 100644 --- a/requirements/ci-3.11.txt +++ b/requirements/ci-3.11.txt @@ -61,7 +61,7 @@ pytest-socket==0.6.0 # via -r requirements/ci.in pytest-timeout==2.1.0 # via -r requirements/ci.in -ruff==0.0.275 +ruff==0.0.278 # via -r requirements/ci.in typeguard==3.0.2 # via -r requirements/ci.in diff --git a/tests/test_filters.py b/tests/test_filters.py index 9e0acef5ca..2eb8b58c0e 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -438,3 +438,30 @@ def test_cascaded_filters_images(): for p in reader.pages: for i in p.images: _ = i.name, i.image + + +@pytest.mark.enable_socket() +def test_calrgb(): + url = "https://github.com/py-pdf/pypdf/files/12061061/tt.pdf" + name = "calRGB.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + reader.pages[0].images[0] + + +@pytest.mark.enable_socket() +def test_2bits_image(): + """From #1954, test with 2bits image. TODO: 4bits also""" + url = "https://github.com/py-pdf/pypdf/files/12050253/tt.pdf" + name = "paid.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + url_png = "https://user-images.githubusercontent.com/4083478/253568117-ca95cc85-9dea-4145-a5e0-032f1c1aa322.png" + name_png = "Paid.png" + refimg = Image.open( + BytesIO(get_pdf_from_url(url_png, name=name_png)) + ) # not a pdf but it works + data = reader.pages[0].images[0] + diff = ImageChops.difference(data.image, refimg) + d = sqrt( + sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()]) + ) / (diff.size[0] * diff.size[1]) + assert d < 0.01 diff --git a/tests/test_reader.py b/tests/test_reader.py index 69994f5ca9..dc8b44a2f9 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -712,7 +712,7 @@ def get_dest_pages(x) -> int: # oi can be destination or a list:preferred to just print them for oi in outline: - out.append(get_dest_pages(oi)) + out.append(get_dest_pages(oi)) # noqa: PERF401 def test_decode_permissions(): diff --git a/tests/test_xmp.py b/tests/test_xmp.py index b2709579c6..777a441415 100644 --- a/tests/test_xmp.py +++ b/tests/test_xmp.py @@ -71,9 +71,7 @@ def get_all_tiff(xmp: pypdf.xmp.XmpInformation): about_uri="", namespace="http://ns.adobe.com/tiff/1.0/" ) for tag in tiff_ns: - contents = [] - for content in tag.childNodes: - contents.append(content.data) + contents = [content.data for content in tag.childNodes] data[tag.tagName] = contents return data