diff --git a/CHANGELOG.md b/CHANGELOG.md
index f658278131..014decc31e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,32 @@
 # CHANGELOG
 
+## Version 3.3.0, 2023-01-22
+
+### New Features (ENH)
+-  Add page label support to PdfWriter (#1558)
+-  Accept inline images with space before EI (#1552)
+-  Add circle annotation support (#1556)
+-  Add polygon annotation support (#1557)
+-  Make merging pages produce a deterministic PDF (#1542, #1543)
+
+### Bug Fixes (BUG)
+-  Fix error in cmap extraction (#1544)
+-  Remove erroneous assertion check (#1564)
+-  Fix dictionary access of optional page label keys (#1562)
+
+### Robustness (ROB)
+-  Set ignore_eof=True for read_until_regex (#1521)
+
+### Documentation (DOC)
+-  Paper size (#1550)
+
+### Developer Experience (DEV)
+-  Fix broken combination of dependencies of docs.txt
+-  Annotate tests appropriately (#1551)
+
+[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.2.1...3.3.0)
+
+
 ## Version 3.2.1, 2023-01-08
 
 ### Bug Fixes (BUG)
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index d17c9a1f4d..0af20cd2d8 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -20,6 +20,8 @@ history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/pypdf/gra
 * [Karvonen, Harry](https://github.com/Hatell/)
 * [KourFrost](https://github.com/KourFrost)
 * [Lightup1](https://github.com/Lightup1)
+* [Majumder, Jonah](https://github.com/jonahmajumder)
+* [Manini, Lorenzo](https://github.com/lorenzomanini)
 * [maxbeer99](https://github.com/maxbeer99)
 * [Mérino, Antoine](https://github.com/Merinorus)
 * [Perrensen, Olsen](https://github.com/olsonperrensen)
diff --git a/docs/modules/PaperSize.rst b/docs/modules/PaperSize.rst
index 0487678522..0cbc36f402 100644
--- a/docs/modules/PaperSize.rst
+++ b/docs/modules/PaperSize.rst
@@ -1,7 +1,34 @@
 The PaperSize Class
-------------------------
+-------------------
 
 .. autoclass:: pypdf.PaperSize
     :members:
     :undoc-members:
     :show-inheritance:
+
+Add blank page with PaperSize
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: python
+    :linenos:
+
+    from PyPDF2 import PaperSize, PdfReader, PdfWriter  
+    pdf_reader = PdfReader("sample.pdf")
+    pdf_writer = PdfWriter()
+    pdf_writer.append_pages_from_reader(pdf_reader)
+    pdf_writer.add_blank_page(PaperSize.A8.width, PaperSize.A8.height)
+    with open("output.pdf", "wb") as output_stream:
+        pdf_writer.write(output_stream)
+
+Insert blank page with PaperSize
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: python
+    :linenos:
+    
+    from PyPDF2 import PaperSize, PdfReader, PdfWriter  
+    pdf_reader = PdfReader("sample.pdf")
+    pdf_writer = PdfWriter()
+    pdf_writer.append_pages_from_reader(pdf_reader)
+    pdf_writer.insert_blank_page(PaperSize.A8.width, PaperSize.A8.height, 1)
+    with open("output.pdf", "wb") as output_stream:
+        pdf_writer.write(output_stream)
+    
\ No newline at end of file
diff --git a/docs/user/adding-pdf-annotations.md b/docs/user/adding-pdf-annotations.md
index 54c451547a..620ea8704a 100644
--- a/docs/user/adding-pdf-annotations.md
+++ b/docs/user/adding-pdf-annotations.md
@@ -104,7 +104,7 @@ page = reader.pages[0]
 writer = PdfWriter()
 writer.add_page(page)
 
-# Add the line
+# Add the rectangle
 annotation = AnnotationBuilder.rectangle(
     rect=(50, 550, 200, 650),
 )
@@ -119,6 +119,56 @@ If you want the rectangle to be filled, use the `interiour_color="ff0000"` param
 
 This method uses the "square" annotation type of the PDF format.
 
+
+## Ellipse
+
+If you want to add a circle like this:
+
+![](annotation-circle.png)
+
+```python
+pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
+reader = PdfReader(pdf_path)
+page = reader.pages[0]
+writer = PdfWriter()
+writer.add_page(page)
+
+# Add the rectangle
+annotation = AnnotationBuilder.ellipse(
+    rect=(50, 550, 200, 650),
+writer.add_annotation(page_number=0, annotation=annotation)
+
+# Write the annotated file to disk
+with open("annotated-pdf.pdf", "wb") as fp:
+    writer.write(fp)
+```
+
+## Polygon
+
+If you want to add a polygon like this:
+
+![](annotation-polygon.png)
+
+you can use the {py:class}`AnnotationBuilder <pypdf.generic.AnnotationBuilder>`:
+
+```python
+pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
+reader = PdfReader(pdf_path)
+page = reader.pages[0]
+writer = PdfWriter()
+writer.add_page(page)
+
+# Add the line
+annotation = AnnotationBuilder.polygon(
+    vertices=[(50, 550), (200, 650), (70, 750), (50, 700)],
+)
+writer.add_annotation(page_number=0, annotation=annotation)
+
+# Write the annotated file to disk
+with open("annotated-pdf.pdf", "wb") as fp:
+    writer.write(fp)
+```
+
 ## Link
 
 If you want to add a link, you can use
diff --git a/docs/user/annotation-circle.png b/docs/user/annotation-circle.png
new file mode 100644
index 0000000000..8bf8bdf9dd
Binary files /dev/null and b/docs/user/annotation-circle.png differ
diff --git a/docs/user/annotation-polygon.png b/docs/user/annotation-polygon.png
new file mode 100644
index 0000000000..5b8e74f744
Binary files /dev/null and b/docs/user/annotation-polygon.png differ
diff --git a/docs/user/extract-text.md b/docs/user/extract-text.md
index dca9895694..0036de5500 100644
--- a/docs/user/extract-text.md
+++ b/docs/user/extract-text.md
@@ -151,6 +151,30 @@ the way PDF stores information just makes it hard to achieve that:
 And finally there are issues that pypdf will deal with. If you find such a
 text extraction bug, please share the PDF with us so we can work on it!
 
+### Whitespaces
+
+The PDF format is meant for printing. It is not designed to be read by machines.
+The text within a PDF document is absolutely positioned, meaning that every single
+character could be positioned on the page.
+
+The text
+
+> This is a test document by Ethan Nelson.
+
+can be represented as
+
+> [(This is a )9(te)-3(st)9( do)-4(cu)13(m)-4(en)12(t )-3(b)3(y)-3( )9(Et)-2(h)3(an)4( Nels)13(o)-5(n)3(.)] TJ
+
+Where the numbers are adjustments of vertical space. This representation used
+within the PDF file makes it very hard to guarantee correct whitespaces.
+
+
+More information:
+
+* [issue #1507](https://github.com/py-pdf/pypdf/issues/1507)
+* [Negative numbers in PDF content stream text object](https://stackoverflow.com/a/28203655/562769)
+* Mark Stephens: [Understanding PDF text objects](https://blog.idrsolutions.com/understanding-pdf-text-objects/), 2010.
+
 ## OCR vs Text Extraction
 
 Optical Character Recognition (OCR) is the process of extracting text from
diff --git a/make_changelog.py b/make_changelog.py
index d5f0b93799..badd315d05 100644
--- a/make_changelog.py
+++ b/make_changelog.py
@@ -27,7 +27,8 @@ def main(changelog_path: str):
     today = datetime.now()
     header = f"Version {new_version}, {today:%Y-%m-%d}\n"
     header = header + "-" * (len(header) - 1) + "\n"
-    trailer = f"\n[Full Changelog](https://github.com/py-pdf/pypdf/compare/{git_tag}...{new_version})\n\n"
+    url = f"https://github.com/py-pdf/pypdf/compare/{git_tag}...{new_version}"
+    trailer = f"\n[Full Changelog]({url})\n\n"
     new_entry = header + changes + trailer
     print(new_entry)
 
diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index 8c472f87dd..9890526ba0 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -27,8 +27,10 @@ def build_char_map(
     encoding, space_code = parse_encoding(ft, space_code)
     map_dict, space_code, int_entry = parse_to_unicode(ft, space_code)
 
-    # encoding can be either a string for decode (on 1,2 or a variable number of bytes) of a char table (for 1 byte only for me)
-    # if empty string, it means it is than encoding field is not present and we have to select the good encoding from cmap input data
+    # encoding can be either a string for decode
+    # (on 1,2 or a variable number of bytes) of a char table (for 1 byte only for me)
+    # if empty string, it means it is than encoding field is not present and
+    # we have to select the good encoding from cmap input data
     if encoding == "":
         if -1 not in map_dict or map_dict[-1] == 1:
             # I have not been able to find any rule for no /Encoding nor /ToUnicode
@@ -36,7 +38,9 @@ def build_char_map(
             encoding = "charmap"
         else:
             encoding = "utf-16-be"
-    # apply rule from PDF ref 1.7 §5.9.1, 1st bullet : if cmap not empty encoding should be discarded (here transformed into identity for those characters)
+    # apply rule from PDF ref 1.7 §5.9.1, 1st bullet :
+    #   if cmap not empty encoding should be discarded
+    #   (here transformed into identity for those characters)
     # if encoding is an str it is expected to be a identity translation
     elif isinstance(encoding, dict):
         for x in int_entry:
@@ -131,7 +135,9 @@ def parse_encoding(
     enc: Union(str, DictionaryObject) = ft["/Encoding"].get_object()  # type: ignore
     if isinstance(enc, str):
         try:
-            # allready done : enc = NameObject.unnumber(enc.encode()).decode()  # for #xx decoding
+            # allready done :
+            #       enc = NameObject.unnumber(enc.encode()).decode()
+            # for #xx decoding
             if enc in charset_encoding:
                 encoding = charset_encoding[enc].copy()
             elif enc in _predefined_cmap:
@@ -214,10 +220,12 @@ def prepare_cm(ft: DictionaryObject) -> bytes:
     if isinstance(tu, StreamObject):
         cm = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
     elif isinstance(tu, str) and tu.startswith("/Identity"):
-        cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"  # the full range 0000-FFFF will be processed
+        # the full range 0000-FFFF will be processed
+        cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"
     if isinstance(cm, str):
         cm = cm.encode()
-    # we need to prepare cm before due to missing return line in pdf printed to pdf from word
+    # we need to prepare cm before due to missing return line in pdf printed
+    # to pdf from word
     cm = (
         cm.strip()
         .replace(b"beginbfchar", b"\nbeginbfchar\n")
@@ -280,13 +288,11 @@ def parse_bfrange(
 ) -> Union[None, Tuple[int, int]]:
     lst = [x for x in line.split(b" ") if x]
     closure_found = False
-    nbi = max(len(lst[0]), len(lst[1]))
-    map_dict[-1] = ceil(nbi / 2)
-    fmt = b"%%0%dX" % (map_dict[-1] * 2)
     if multiline_rg is not None:
+        fmt = b"%%0%dX" % (map_dict[-1] * 2)
         a = multiline_rg[0]  # a, b not in the current line
         b = multiline_rg[1]
-        for sq in lst[1:]:
+        for sq in lst[0:]:
             if sq == b"]":
                 closure_found = True
                 break
@@ -301,6 +307,9 @@ def parse_bfrange(
     else:
         a = int(lst[0], 16)
         b = int(lst[1], 16)
+        nbi = max(len(lst[0]), len(lst[1]))
+        map_dict[-1] = ceil(nbi / 2)
+        fmt = b"%%0%dX" % (map_dict[-1] * 2)
         if lst[2] == b"[":
             for sq in lst[3:]:
                 if sq == b"]":
diff --git a/pypdf/_encryption.py b/pypdf/_encryption.py
index 038067a4ba..3d331fdab0 100644
--- a/pypdf/_encryption.py
+++ b/pypdf/_encryption.py
@@ -257,8 +257,8 @@ def compute_key(
                 2E 2E 00 B6 D0 68 3E 80 2F 0C A9 FE 64 53 69 7A >
            That is, if the password string is n bytes long, append
            the first 32 - n bytes of the padding string to the end
-           of the password string. If the password string is empty (zero-length),
-           meaning there is no user password,
+           of the password string. If the password string is empty
+           (zero-length), meaning there is no user password,
            substitute the entire padding string in its place.
 
         b) Initialize the MD5 hash function and pass the result of step (a)
@@ -295,10 +295,10 @@ def compute_key(
           key_size: The size of the key in bytes
           o_entry: The owner entry
           P: A set of flags specifying which operations shall be permitted
-            when the document is opened with user access. If bit 2 is set to 1, all other
-            bits are ignored and all operations are permitted. If bit 2 is set to 0,
-            permission for operations are based on the values of the remaining flags
-            defined in Table 24.
+            when the document is opened with user access. If bit 2 is set to 1,
+            all other bits are ignored and all operations are permitted.
+            If bit 2 is set to 0, permission for operations are based on the
+            values of the remaining flags defined in Table 24.
           id1_entry:
           metadata_encrypted: A boolean indicating if the metadata is encrypted.
 
@@ -465,14 +465,20 @@ def verify_user_password(
         """
         Algorithm 6: Authenticating the user password.
 
-        a) Perform all but the last step of "Algorithm 4: Computing the encryption dictionary’s U (user password)
-           value (Security handlers of revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U (user
-           password) value (Security handlers of revision 3 or greater)" using the supplied password string.
-        b) If the result of step (a) is equal to the value of the encryption dictionary’s U entry (comparing on the first 16
-           bytes in the case of security handlers of revision 3 or greater), the password supplied is the correct user
-           password. The key obtained in step (a) (that is, in the first step of "Algorithm 4: Computing the encryption
-           dictionary’s U (user password) value (Security handlers of revision 2)" or "Algorithm 5: Computing the
-           encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater)") shall be used
+        a) Perform all but the last step of "Algorithm 4: Computing the
+           encryption dictionary’s U (user password) value (Security handlers of
+           revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U
+           (user password) value (Security handlers of revision 3 or greater)"
+           using the supplied password string.
+        b) If the result of step (a) is equal to the value of the encryption
+           dictionary’s U entry (comparing on the first 16 bytes in the case of
+           security handlers of revision 3 or greater), the password supplied is
+           the correct user password. The key obtained in step (a) (that is, in
+           the first step of "Algorithm 4: Computing the encryption
+           dictionary’s U (user password) value
+           (Security handlers of revision 2)" or
+           "Algorithm 5: Computing the encryption dictionary’s U (user password)
+           value (Security handlers of revision 3 or greater)") shall be used
            to decrypt the document.
 
         Args:
@@ -482,10 +488,10 @@ def verify_user_password(
           o_entry: The owner entry
           u_entry: The user entry
           P: A set of flags specifying which operations shall be permitted
-            when the document is opened with user access. If bit 2 is set to 1, all other
-            bits are ignored and all operations are permitted. If bit 2 is set to 0,
-            permission for operations are based on the values of the remaining flags
-            defined in Table 24.
+            when the document is opened with user access. If bit 2 is set to 1,
+            all other bits are ignored and all operations are permitted.
+            If bit 2 is set to 0, permission for operations are based on the
+            values of the remaining flags defined in Table 24.
           id1_entry:
           metadata_encrypted: A boolean indicating if the metadata is encrypted.
 
@@ -517,17 +523,25 @@ def verify_owner_password(
         """
         Algorithm 7: Authenticating the owner password.
 
-        a) Compute an encryption key from the supplied password string, as described in steps (a) to (d) of
-           "Algorithm 3: Computing the encryption dictionary’s O (owner password) value".
-        b) (Security handlers of revision 2 only) Decrypt the value of the encryption dictionary’s O entry, using an RC4
+        a) Compute an encryption key from the supplied password string, as
+           described in steps (a) to (d) of
+           "Algorithm 3: Computing the encryption dictionary’s O (owner password)
+           value".
+        b) (Security handlers of revision 2 only) Decrypt the value of the
+           encryption dictionary’s O entry, using an RC4
            encryption function with the encryption key computed in step (a).
-           (Security handlers of revision 3 or greater) Do the following 20 times: Decrypt the value of the encryption
-           dictionary’s O entry (first iteration) or the output from the previous iteration (all subsequent iterations),
-           using an RC4 encryption function with a different encryption key at each iteration. The key shall be
-           generated by taking the original key (obtained in step (a)) and performing an XOR (exclusive or) operation
-           between each byte of the key and the single-byte value of the iteration counter (from 19 to 0).
-        c) The result of step (b) purports to be the user password. Authenticate this user password using "Algorithm 6:
-           Authenticating the user password". If it is correct, the password supplied is the correct owner password.
+           (Security handlers of revision 3 or greater) Do the following 20 times:
+           Decrypt the value of the encryption dictionary’s O entry (first iteration)
+           or the output from the previous iteration (all subsequent iterations),
+           using an RC4 encryption function with a different encryption key at
+           each iteration. The key shall be generated by taking the original key
+           (obtained in step (a)) and performing an XOR (exclusive or) operation
+           between each byte of the key and the single-byte value of the
+           iteration counter (from 19 to 0).
+        c) The result of step (b) purports to be the user password.
+           Authenticate this user password using
+           "Algorithm 6: Authenticating the user password".
+           If it is correct, the password supplied is the correct owner password.
 
         Args:
           owner_password:
@@ -536,10 +550,10 @@ def verify_owner_password(
           o_entry: The owner entry
           u_entry: The user entry
           P: A set of flags specifying which operations shall be permitted
-            when the document is opened with user access. If bit 2 is set to 1, all other
-            bits are ignored and all operations are permitted. If bit 2 is set to 0,
-            permission for operations are based on the values of the remaining flags
-            defined in Table 24.
+            when the document is opened with user access. If bit 2 is set to 1,
+            all other bits are ignored and all operations are permitted.
+            If bit 2 is set to 0, permission for operations are based on the
+            values of the remaining flags defined in Table 24.
           id1_entry:
           metadata_encrypted: A boolean indicating if the metadata is encrypted.
 
@@ -575,31 +589,44 @@ def verify_owner_password(
         """
         Algorithm 3.2a Computing an encryption key.
 
-        To understand the algorithm below, it is necessary to treat the O and U strings in the Encrypt dictionary
-        as made up of three sections. The first 32 bytes are a hash value (explained below). The next 8 bytes are
-        called the Validation Salt. The final 8 bytes are called the Key Salt.
-
-        1. The password string is generated from Unicode input by processing the input string with the SASLprep
-           (IETF RFC 4013) profile of stringprep (IETF RFC 3454), and then converting to a UTF-8 representation.
-        2. Truncate the UTF-8 representation to 127 bytes if it is longer than 127 bytes.
-        3. Test the password against the owner key by computing the SHA-256 hash of the UTF-8 password
-           concatenated with the 8 bytes of owner Validation Salt, concatenated with the 48-byte U string. If the
-           32-byte result matches the first 32 bytes of the O string, this is the owner password.
-           Compute an intermediate owner key by computing the SHA-256 hash of the UTF-8 password
-           concatenated with the 8 bytes of owner Key Salt, concatenated with the 48-byte U string. The 32-byte
-           result is the key used to decrypt the 32-byte OE string using AES-256 in CBC mode with no padding and
-           an initialization vector of zero. The 32-byte result is the file encryption key.
-        4. Test the password against the user key by computing the SHA-256 hash of the UTF-8 password
-           concatenated with the 8 bytes of user Validation Salt. If the 32 byte result matches the first 32 bytes of
+        To understand the algorithm below, it is necessary to treat the O and U
+        strings in the Encrypt dictionary as made up of three sections.
+        The first 32 bytes are a hash value (explained below). The next 8 bytes
+        are called the Validation Salt. The final 8 bytes are called the Key Salt.
+
+        1. The password string is generated from Unicode input by processing the
+           input string with the SASLprep (IETF RFC 4013) profile of
+           stringprep (IETF RFC 3454), and then converting to a UTF-8
+           representation.
+        2. Truncate the UTF-8 representation to 127 bytes if it is longer than
+           127 bytes.
+        3. Test the password against the owner key by computing the SHA-256 hash
+           of the UTF-8 password concatenated with the 8 bytes of owner
+           Validation Salt, concatenated with the 48-byte U string. If the
+           32-byte result matches the first 32 bytes of the O string, this is
+           the owner password.
+           Compute an intermediate owner key by computing the SHA-256 hash of
+           the UTF-8 password concatenated with the 8 bytes of owner Key Salt,
+           concatenated with the 48-byte U string. The 32-byte result is the
+           key used to decrypt the 32-byte OE string using AES-256 in CBC mode
+           with no padding and an initialization vector of zero.
+           The 32-byte result is the file encryption key.
+        4. Test the password against the user key by computing the SHA-256 hash
+           of the UTF-8 password concatenated with the 8 bytes of user
+           Validation Salt. If the 32 byte result matches the first 32 bytes of
            the U string, this is the user password.
-           Compute an intermediate user key by computing the SHA-256 hash of the UTF-8 password
-           concatenated with the 8 bytes of user Key Salt. The 32-byte result is the key used to decrypt the 32-byte
-           UE string using AES-256 in CBC mode with no padding and an initialization vector of zero. The 32-byte
-           result is the file encryption key.
-        5. Decrypt the 16-byte Perms string using AES-256 in ECB mode with an initialization vector of zero and
-           the file encryption key as the key. Verify that bytes 9-11 of the result are the characters ‘a’, ‘d’, ‘b’. Bytes
-           0-3 of the decrypted Perms entry, treated as a little-endian integer, are the user permissions. They
-           should match the value in the P key.
+           Compute an intermediate user key by computing the SHA-256 hash of the
+           UTF-8 password concatenated with the 8 bytes of user Key Salt.
+           The 32-byte result is the key used to decrypt the 32-byte
+           UE string using AES-256 in CBC mode with no padding and an
+           initialization vector of zero. The 32-byte result is the file
+           encryption key.
+        5. Decrypt the 16-byte Perms string using AES-256 in ECB mode with an
+           initialization vector of zero and the file encryption key as the key.
+           Verify that bytes 9-11 of the result are the characters ‘a’, ‘d’, ‘b’.
+           Bytes 0-3 of the decrypted Perms entry, treated as a little-endian
+           integer, are the user permissions.
+           They should match the value in the P key.
 
         Args:
           R:  A number specifying which revision of the standard security
@@ -721,15 +748,20 @@ def generate_values(
     @staticmethod
     def compute_U_value(password: bytes, key: bytes) -> Tuple[bytes, bytes]:
         """
-        Algorithm 3.8 Computing the encryption dictionary’s U (user password) and UE (user encryption key) values
-
-        1. Generate 16 random bytes of data using a strong random number generator. The first 8 bytes are the
-           User Validation Salt. The second 8 bytes are the User Key Salt. Compute the 32-byte SHA-256 hash of
-           the password concatenated with the User Validation Salt. The 48-byte string consisting of the 32-byte
-           hash followed by the User Validation Salt followed by the User Key Salt is stored as the U key.
-        2. Compute the 32-byte SHA-256 hash of the password concatenated with the User Key Salt. Using this
-           hash as the key, encrypt the file encryption key using AES-256 in CBC mode with no padding and an
-           initialization vector of zero. The resulting 32-byte string is stored as the UE key.
+        Algorithm 3.8 Computing the encryption dictionary’s U (user password)
+        and UE (user encryption key) values
+
+        1. Generate 16 random bytes of data using a strong random number generator.
+           The first 8 bytes are the User Validation Salt. The second 8 bytes
+           are the User Key Salt. Compute the 32-byte SHA-256 hash of the
+           password concatenated with the User Validation Salt. The 48-byte
+           string consisting of the 32-byte hash followed by the User
+           Validation Salt followed by the User Key Salt is stored as the U key.
+        2. Compute the 32-byte SHA-256 hash of the password concatenated with
+           the User Key Salt. Using this hash as the key, encrypt the file
+           encryption key using AES-256 in CBC mode with no padding and an
+           initialization vector of zero. The resulting 32-byte string is stored
+           as the UE key.
 
         Args:
           password:
@@ -753,24 +785,30 @@ def compute_O_value(
         password: bytes, key: bytes, u_value: bytes
     ) -> Tuple[bytes, bytes]:
         """
-        Algorithm 3.9 Computing the encryption dictionary’s O (owner password) and OE (owner encryption key) values.
-
-        1. Generate 16 random bytes of data using a strong random number generator. The first 8 bytes are the
-           Owner Validation Salt. The second 8 bytes are the Owner Key Salt. Compute the 32-byte SHA-256 hash
-           of the password concatenated with the Owner Validation Salt and then concatenated with the 48-byte
-           U string as generated in Algorithm 3.8. The 48-byte string consisting of the 32-byte hash followed by
-           the Owner Validation Salt followed by the Owner Key Salt is stored as the O key.
-        2. Compute the 32-byte SHA-256 hash of the password concatenated with the Owner Key Salt and then
-           concatenated with the 48-byte U string as generated in Algorithm 3.8. Using this hash as the key,
-           encrypt the file encryption key using AES-256 in CBC mode with no padding and an initialization vector
-           of zero. The resulting 32-byte string is stored as the OE key.
+        Algorithm 3.9 Computing the encryption dictionary’s O (owner password)
+        and OE (owner encryption key) values.
+
+        1. Generate 16 random bytes of data using a strong random number
+           generator. The first 8 bytes are the Owner Validation Salt. The
+           second 8 bytes are the Owner Key Salt. Compute the 32-byte SHA-256
+           hash of the password concatenated with the Owner Validation Salt and
+           then concatenated with the 48-byte U string as generated in
+           Algorithm 3.8. The 48-byte string consisting of the 32-byte hash
+           followed by the Owner Validation Salt followed by the Owner Key Salt
+           is stored as the O key.
+        2. Compute the 32-byte SHA-256 hash of the password concatenated with
+           the Owner Key Salt and then concatenated with the 48-byte U string as
+           generated in Algorithm 3.8. Using this hash as the key,
+           encrypt the file encryption key using AES-256 in CBC mode with
+           no padding and an initialization vector of zero.
+           The resulting 32-byte string is stored as the OE key.
 
         Args:
           password:
           key:
           u_value: A 32-byte string, based on the user password, that shall be
-            used in determining whether to prompt the user for a password and, if so,
-            whether a valid user or owner password was entered.
+            used in determining whether to prompt the user for a password and,
+            if so, whether a valid user or owner password was entered.
 
         Returns:
           A tuple (O value, OE value)
@@ -792,23 +830,27 @@ def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
         """
         Algorithm 3.10 Computing the encryption dictionary’s Perms (permissions) value
 
-        1. Extend the permissions (contents of the P integer) to 64 bits by setting the upper 32 bits to all 1’s. (This
-           allows for future extension without changing the format.)
-        2. Record the 8 bytes of permission in the bytes 0-7 of the block, low order byte first.
-        3. Set byte 8 to the ASCII value ' T ' or ' F ' according to the EncryptMetadata Boolean.
+        1. Extend the permissions (contents of the P integer) to 64 bits by
+           setting the upper 32 bits to all 1’s.
+           (This allows for future extension without changing the format.)
+        2. Record the 8 bytes of permission in the bytes 0-7 of the block,
+           low order byte first.
+        3. Set byte 8 to the ASCII value ' T ' or ' F ' according to the
+           EncryptMetadata Boolean.
         4. Set bytes 9-11 to the ASCII characters ' a ', ' d ', ' b '.
         5. Set bytes 12-15 to 4 bytes of random data, which will be ignored.
-        6. Encrypt the 16-byte block using AES-256 in ECB mode with an initialization vector of zero, using the file
-           encryption key as the key. The result (16 bytes) is stored as the Perms string, and checked for validity
-           when the file is opened.
+        6. Encrypt the 16-byte block using AES-256 in ECB mode with an
+           initialization vector of zero, using the file encryption key as the
+           key. The result (16 bytes) is stored as the Perms string, and checked
+           for validity when the file is opened.
 
         Args:
           key:
           p: A set of flags specifying which operations shall be permitted
-            when the document is opened with user access. If bit 2 is set to 1, all other
-            bits are ignored and all operations are permitted. If bit 2 is set to 0,
-            permission for operations are based on the values of the remaining flags
-            defined in Table 24.
+            when the document is opened with user access. If bit 2 is set to 1,
+            all other bits are ignored and all operations are permitted.
+            If bit 2 is set to 0, permission for operations are based on the
+            values of the remaining flags defined in Table 24.
           metadata_encrypted: A boolean indicating if the metadata is encrypted.
 
         Returns:
@@ -860,30 +902,40 @@ def decrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObje
         """
         Algorithm 1: Encryption of data using the RC4 or AES algorithms.
 
-        a) Obtain the object number and generation number from the object identifier of the string or stream to be
-           encrypted (see 7.3.10, "Indirect Objects"). If the string is a direct object, use the identifier of the indirect
-           object containing it.
-        b) For all strings and streams without crypt filter specifier; treating the object number and generation number
-           as binary integers, extend the original n-byte encryption key to n + 5 bytes by appending the low-order 3
-           bytes of the object number and the low-order 2 bytes of the generation number in that order, low-order byte
-           first. (n is 5 unless the value of V in the encryption dictionary is greater than 1, in which case n is the value
-           of Length divided by 8.)
-           If using the AES algorithm, extend the encryption key an additional 4 bytes by adding the value “sAlT”,
-           which corresponds to the hexadecimal values 0x73, 0x41, 0x6C, 0x54. (This addition is done for backward
-           compatibility and is not intended to provide additional security.)
-        c) Initialize the MD5 hash function and pass the result of step (b) as input to this function.
-        d) Use the first (n + 5) bytes, up to a maximum of 16, of the output from the MD5 hash as the key for the RC4
-           or AES symmetric key algorithms, along with the string or stream data to be encrypted.
-           If using the AES algorithm, the Cipher Block Chaining (CBC) mode, which requires an initialization vector,
-           is used. The block size parameter is set to 16 bytes, and the initialization vector is a 16-byte random
-           number that is stored as the first 16 bytes of the encrypted stream or string.
+        a) Obtain the object number and generation number from the object
+           identifier of the string or stream to be encrypted
+           (see 7.3.10, "Indirect Objects"). If the string is a direct object,
+           use the identifier of the indirect object containing it.
+        b) For all strings and streams without crypt filter specifier; treating
+           the object number and generation number as binary integers, extend
+           the original n-byte encryption key to n + 5 bytes by appending the
+           low-order 3 bytes of the object number and the low-order 2 bytes of
+           the generation number in that order, low-order byte first.
+           (n is 5 unless the value of V in the encryption dictionary is greater
+           than 1, in which case n is the value of Length divided by 8.)
+           If using the AES algorithm, extend the encryption key an additional
+           4 bytes by adding the value “sAlT”, which corresponds to the
+           hexadecimal values 0x73, 0x41, 0x6C, 0x54. (This addition is done for
+           backward compatibility and is not intended to provide additional
+           security.)
+        c) Initialize the MD5 hash function and pass the result of step (b) as
+           input to this function.
+        d) Use the first (n + 5) bytes, up to a maximum of 16, of the output
+           from the MD5 hash as the key for the RC4 or AES symmetric key
+           algorithms, along with the string or stream data to be encrypted.
+           If using the AES algorithm, the Cipher Block Chaining (CBC) mode,
+           which requires an initialization vector, is used. The block size
+           parameter is set to 16 bytes, and the initialization vector is a
+           16-byte random number that is stored as the first 16 bytes of the
+           encrypted stream or string.
 
         Algorithm 3.1a Encryption of data using the AES algorithm
-        1. Use the 32-byte file encryption key for the AES-256 symmetric key algorithm, along with the string or
-           stream data to be encrypted.
-           Use the AES algorithm in Cipher Block Chaining (CBC) mode, which requires an initialization vector. The
-           block size parameter is set to 16 bytes, and the initialization vector is a 16-byte random number that is
-           stored as the first 16 bytes of the encrypted stream or string.
+        1. Use the 32-byte file encryption key for the AES-256 symmetric key
+           algorithm, along with the string or stream data to be encrypted.
+           Use the AES algorithm in Cipher Block Chaining (CBC) mode, which
+           requires an initialization vector. The block size parameter is set to
+           16 bytes, and the initialization vector is a 16-byte random number
+           that is stored as the first 16 bytes of the encrypted stream or string.
            The output is the encrypted data to be stored in the PDF file.
 
         Args:
diff --git a/pypdf/_merger.py b/pypdf/_merger.py
index 7642e79031..cbfeb4a38e 100644
--- a/pypdf/_merger.py
+++ b/pypdf/_merger.py
@@ -178,7 +178,8 @@ def merge(
                 )
             else:
                 raise ValueError(
-                    "The argument position of merge is deprecated. Use page_number only."
+                    "The argument position of merge is deprecated. "
+                    "Use page_number only."
                 )
 
         if page_number is None:  # deprecated
@@ -335,7 +336,8 @@ def write(self, fileobj: Union[Path, StrByteType]) -> None:
             page.out_pagedata = self.output.get_reference(
                 pages_obj[PA.KIDS][-1].get_object()
             )
-            # idnum = self.output._objects.index(self.output._pages.get_object()[PA.KIDS][-1].get_object()) + 1
+            # key_temp = self.output._pages.get_object()[PA.KIDS][-1].get_object()
+            # idnum = self.output._objects.index(key_temp) + 1
             # page.out_pagedata = IndirectObject(idnum, 0, self.output)
 
         # Once all pages are added, create outline items to point at those pages
@@ -703,7 +705,8 @@ def add_outline_item(
         """
         if page_number is not None and pagenum is not None:
             raise ValueError(
-                "The argument pagenum of add_outline_item is deprecated. Use page_number only."
+                "The argument pagenum of add_outline_item is deprecated. "
+                "Use page_number only."
             )
         if pagenum is not None:
             old_term = "pagenum"
@@ -809,7 +812,8 @@ def add_named_destination(
         """
         if page_number is not None and pagenum is not None:
             raise ValueError(
-                "The argument pagenum of add_named_destination is deprecated. Use page_number only."
+                "The argument pagenum of add_named_destination is deprecated. "
+                "Use page_number only."
             )
         if pagenum is not None:
             old_term = "pagenum"
diff --git a/pypdf/_page.py b/pypdf/_page.py
index 2a9656f223..0eb268da26 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -28,7 +28,6 @@
 # POSSIBILITY OF SUCH DAMAGE.
 
 import math
-import uuid
 import warnings
 from decimal import Decimal
 from typing import (
@@ -95,17 +94,20 @@ def set_custom_rtl(
             If set to `None`, the value will not be changed.
             If set to an integer or string, it will be converted to its ASCII code.
             The default value is -1, which sets no additional range to be converted.
-        _max: The new maximum value for the range of custom characters that will be written right to left.
+        _max: The new maximum value for the range of custom characters that will
+            be written right to left.
             If set to `None`, the value will not be changed.
             If set to an integer or string, it will be converted to its ASCII code.
             The default value is -1, which sets no additional range to be converted.
-        specials: The new list of special characters to be inserted in the current insertion order.
+        specials: The new list of special characters to be inserted in the
+            current insertion order.
             If set to `None`, the current value will not be changed.
             If set to a string, it will be converted to a list of ASCII codes.
             The default value is an empty list.
 
     Returns:
-        A tuple containing the new values for `CUSTOM_RTL_MIN`, `CUSTOM_RTL_MAX`, and `CUSTOM_RTL_SPECIAL_CHARS`.
+        A tuple containing the new values for `CUSTOM_RTL_MIN`,
+        `CUSTOM_RTL_MAX`, and `CUSTOM_RTL_SPECIAL_CHARS`.
     """
     global CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
     if isinstance(_min, int):
@@ -576,17 +578,41 @@ def _merge_resources(
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         new_res = DictionaryObject()
         new_res.update(res1.get(resource, DictionaryObject()).get_object())
+
+        def compute_unique_key(base_key: str) -> Tuple[str, bool]:
+            """Find a key that either doesn't already exist or has the same
+            value (indicated by the bool)"""
+            value = page2res.raw_get(base_key)
+            # try the current key first (e.g. "foo"), but otherwise iterate
+            # through "foo-0", "foo-1", etc. new_res can contain only finitely
+            # many keys, thus this'll eventually end, even if it's been crafted
+            # to be maximally annoying.
+            computed_key = base_key
+            idx = 0
+            while computed_key in new_res:
+                if new_res.raw_get(computed_key) == value:
+                    # there's already a resource of this name, with the exact
+                    # same value
+                    return computed_key, True
+                computed_key = f"{base_key}-{idx}"
+                idx += 1
+            return computed_key, False
+
         page2res = cast(
             DictionaryObject, res2.get(resource, DictionaryObject()).get_object()
         )
         rename_res = {}
-        for key in list(page2res.keys()):
-            if key in new_res and new_res.raw_get(key) != page2res.raw_get(key):
-                newname = NameObject(key + str(uuid.uuid4()))
+        for key in sorted(page2res.keys()):
+            unique_key, same_value = compute_unique_key(key)
+            newname = NameObject(unique_key)
+            if key != unique_key:
+                # we have to use a different name for this
                 rename_res[key] = newname
+
+            if not same_value:
+                # the value wasn't already recorded
                 new_res[newname] = page2res[key]
-            elif key not in new_res:
-                new_res[key] = page2res.raw_get(key)
+
         return new_res, rename_res
 
     @staticmethod
@@ -740,12 +766,14 @@ def _merge_page(
                 new_resources[NameObject(res)] = new
                 rename.update(newrename)
 
-        # Combine /ProcSet sets.
+        # Combine /ProcSet sets, making sure there's a consistent order
         new_resources[NameObject(RES.PROC_SET)] = ArrayObject(
-            frozenset(
-                original_resources.get(RES.PROC_SET, ArrayObject()).get_object()
-            ).union(
-                frozenset(page2resources.get(RES.PROC_SET, ArrayObject()).get_object())
+            sorted(
+                set(
+                    original_resources.get(RES.PROC_SET, ArrayObject()).get_object()
+                ).union(
+                    set(page2resources.get(RES.PROC_SET, ArrayObject()).get_object())
+                )
             )
         )
 
@@ -894,7 +922,8 @@ def mergeScaledPage(
         """
         deprecation_with_replacement(
             "page.mergeScaledPage(page2, scale, expand)",
-            "page2.add_transformation(Transformation().scale(scale)); page.merge_page(page2, expand)",
+            "page2.add_transformation(Transformation().scale(scale)); "
+            "page.merge_page(page2, expand)",
             "3.0.0",
         )
         op = Transformation().scale(scale, scale)
@@ -919,7 +948,8 @@ def mergeRotatedPage(
         """
         deprecation_with_replacement(
             "page.mergeRotatedPage(page2, rotation, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation)); page.merge_page(page2, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation)); "
+            "page.merge_page(page2, expand)",
             "3.0.0",
         )
         op = Transformation().rotate(rotation)
@@ -945,7 +975,8 @@ def mergeTranslatedPage(
         """
         deprecation_with_replacement(
             "page.mergeTranslatedPage(page2, tx, ty, expand)",
-            "page2.add_transformation(Transformation().translate(tx, ty)); page.merge_page(page2, expand)",
+            "page2.add_transformation(Transformation().translate(tx, ty)); "
+            "page.merge_page(page2, expand)",
             "3.0.0",
         )
         op = Transformation().translate(tx, ty)
@@ -977,7 +1008,8 @@ def mergeRotatedTranslatedPage(
         """
         deprecation_with_replacement(
             "page.mergeRotatedTranslatedPage(page2, rotation, tx, ty, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation).translate(tx, ty)); page.merge_page(page2, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation).translate(tx, ty)); "
+            "page.merge_page(page2, expand)",
             "3.0.0",
         )
         op = Transformation().translate(-tx, -ty).rotate(rotation).translate(tx, ty)
@@ -1003,7 +1035,8 @@ def mergeRotatedScaledPage(
         """
         deprecation_with_replacement(
             "page.mergeRotatedScaledPage(page2, rotation, scale, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); "
+            "page.merge_page(page2, expand)",
             "3.0.0",
         )
         op = Transformation().rotate(rotation).scale(scale, scale)
@@ -1035,7 +1068,8 @@ def mergeScaledTranslatedPage(
         """
         deprecation_with_replacement(
             "page.mergeScaledTranslatedPage(page2, scale, tx, ty, expand)",
-            "page2.add_transformation(Transformation().scale(scale).translate(tx, ty)); page.merge_page(page2, expand)",
+            "page2.add_transformation(Transformation().scale(scale).translate(tx, ty)); "
+            "page.merge_page(page2, expand)",
             "3.0.0",
         )
         op = Transformation().scale(scale, scale).translate(tx, ty)
@@ -1070,7 +1104,8 @@ def mergeRotatedScaledTranslatedPage(
         """
         deprecation_with_replacement(
             "page.mergeRotatedScaledTranslatedPage(page2, rotation, tx, ty, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); "
+            "page.merge_page(page2, expand)",
             "3.0.0",
         )
         op = Transformation().rotate(rotation).scale(scale, scale).translate(tx, ty)
@@ -1334,10 +1369,13 @@ def _extract_text(
             while NameObject(PG.RESOURCES) not in objr:
                 # /Resources can be inherited sometimes so we look to parents
                 objr = objr["/Parent"].get_object()
-                # if no parents we will have no /Resources will be available => an exception wil be raised
+                # if no parents we will have no /Resources will be available
+                # => an exception wil be raised
             resources_dict = cast(DictionaryObject, objr[PG.RESOURCES])
         except Exception:
-            return ""  # no resources means no text is possible (no font) we consider the file as not damaged, no need to check for TJ or Tj
+            # no resources means no text is possible (no font) we consider the
+            # file as not damaged, no need to check for TJ or Tj
+            return ""
         if "/Font" in resources_dict:
             for f in cast(DictionaryObject, resources_dict["/Font"]):
                 cmaps[f] = build_char_map(f, space_width, obj)
@@ -1403,7 +1441,9 @@ def current_spacewidth() -> float:
             return _space_width / 1000.0
 
         def process_operation(operator: bytes, operands: List) -> None:
-            nonlocal cm_matrix, cm_stack, tm_matrix, tm_prev, output, text, char_scale, space_scale, _space_width, TL, font_size, cmap, orientations, rtl_dir, visitor_text
+            nonlocal cm_matrix, cm_stack, tm_matrix, tm_prev, output, text
+            nonlocal char_scale, space_scale, _space_width, TL, font_size, cmap
+            nonlocal orientations, rtl_dir, visitor_text
             global CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
 
             check_crlf_space: bool = False
@@ -1484,10 +1524,12 @@ def process_operation(operator: bytes, operands: List) -> None:
                 text = ""
                 # rtl_dir = False
                 try:
-                    # charMapTuple: font_type, float(sp_width / 2), encoding, map_dict, font-dictionary
+                    # charMapTuple: font_type, float(sp_width / 2), encoding,
+                    #               map_dict, font-dictionary
                     charMapTuple = cmaps[operands[0]]
                     _space_width = charMapTuple[1]
-                    # current cmap: encoding, map_dict, font resource name (internal name, not the real font-name),
+                    # current cmap: encoding, map_dict, font resource name
+                    #               (internal name, not the real font-name),
                     # font-dictionary. The font-dictionary describes the font.
                     cmap = (
                         charMapTuple[2],
@@ -1550,7 +1592,10 @@ def process_operation(operator: bytes, operands: List) -> None:
                                 t = tt.decode(
                                     cmap[0], "surrogatepass"
                                 )  # apply str encoding
-                            except Exception:  # the data does not match the expectation, we use the alternative ; text extraction may not be good
+                            except Exception:
+                                # the data does not match the expectation,
+                                # we use the alternative ;
+                                # text extraction may not be good
                                 t = tt.decode(
                                     "utf-16-be" if cmap[0] == "charmap" else "charmap",
                                     "surrogatepass",
@@ -1568,7 +1613,9 @@ def process_operation(operator: bytes, operands: List) -> None:
                         ):
                             xx = ord(x)
                             # fmt: off
-                            if (  # cases where the current inserting order is kept (punctuation,...)
+                            if (
+                                # cases where the current inserting order is
+                                # kept (punctuation,...)
                                 (xx <= 0x2F)                        # punctuations but...
                                 or (0x3A <= xx and xx <= 0x40)      # numbers (x30-39)
                                 or (0x2000 <= xx and xx <= 0x206F)  # upper punctuations..
@@ -1784,9 +1831,11 @@ def extract_text(
         will change if this function is made more sophisticated.
 
         Arabic, Hebrew,... are extracted in the good order.
-        If required an custom RTL range of characters can be defined; see function set_custom_rtl
+        If required an custom RTL range of characters can be defined;
+        see function set_custom_rtl
 
-        Additionally you can provide visitor-methods to get informed on all operands and all text-objects.
+        Additionally you can provide visitor-methods to get informed on all
+        operands and all text-objects.
         For example in some PDF files this can be useful to parse tables.
 
         Args:
@@ -1913,9 +1962,9 @@ def _get_fonts(self) -> Tuple[Set[str], Set[str]]:
 
     mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
     """
-    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
-    defining the boundaries of the physical medium on which the page is
-    intended to be displayed or printed.
+    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
+    default user space units, defining the boundaries of the physical medium on
+    which the page is intended to be displayed or printed.
     """
 
     @property
@@ -1940,10 +1989,10 @@ def mediaBox(self, value: RectangleObject) -> None:  # deprecated
 
     cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,))
     """
-    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
-    defining the visible region of default user space.  When the page is
-    displayed or printed, its contents are to be clipped (cropped) to this
-    rectangle and then imposed on the output medium in some
+    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
+    default user space units, defining the visible region of default user space.
+    When the page is displayed or printed, its contents are to be clipped
+    (cropped) to this rectangle and then imposed on the output medium in some
     implementation-defined manner.  Default value: same as :attr:`mediabox<mediabox>`.
     """
 
@@ -1964,9 +2013,9 @@ def cropBox(self, value: RectangleObject) -> None:  # deprecated
 
     bleedbox = _create_rectangle_accessor("/BleedBox", ("/CropBox", PG.MEDIABOX))
     """
-    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
-    defining the region to which the contents of the page should be clipped
-    when output in a production environment.
+    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
+    default user space units, defining the region to which the contents of the
+    page should be clipped when output in a production environment.
     """
 
     @property
@@ -1986,8 +2035,9 @@ def bleedBox(self, value: RectangleObject) -> None:  # deprecated
 
     trimbox = _create_rectangle_accessor("/TrimBox", ("/CropBox", PG.MEDIABOX))
     """
-    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
-    defining the intended dimensions of the finished page after trimming.
+    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
+    default user space units, defining the intended dimensions of the finished
+    page after trimming.
     """
 
     @property
@@ -2007,9 +2057,9 @@ def trimBox(self, value: RectangleObject) -> None:  # deprecated
 
     artbox = _create_rectangle_accessor("/ArtBox", ("/CropBox", PG.MEDIABOX))
     """
-    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
-    defining the extent of the page's meaningful content as intended by the
-    page's creator.
+    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
+    default user space units, defining the extent of the page's meaningful
+    content as intended by the page's creator.
     """
 
     @property
diff --git a/pypdf/_page_labels.py b/pypdf/_page_labels.py
index 7a83c7d11b..e2baa8aeda 100644
--- a/pypdf/_page_labels.py
+++ b/pypdf/_page_labels.py
@@ -57,11 +57,21 @@
                            aa to zz for the next 26, and so on)
 """
 
-from typing import Iterator
+from typing import (
+    Iterator,
+    Optional,
+    Tuple,
+)
 
 from ._protocols import PdfReaderProtocol
 from ._utils import logger_warning
 
+from .generic import (
+    ArrayObject,
+    DictionaryObject,
+    NumberObject,
+)
+
 
 def number2uppercase_roman_numeral(num: int) -> str:
     roman = [
@@ -151,6 +161,7 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str:
                 break
             i += 2
         m = {
+            None: lambda n: "",
             "/D": lambda n: str(n),
             "/R": number2uppercase_roman_numeral,
             "/r": number2lowercase_roman_numeral,
@@ -161,7 +172,9 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str:
             value = reader.get_object(value)
         if not isinstance(value, dict):
             return str(index + 1)  # Fallback
-        return m[value["/S"]](index - start_index + 1)
+        start = value.get("/St", 1)
+        prefix = value.get("/P", "")
+        return prefix + m[value.get("/S")](index - start_index + start)
     if "/Kids" in number_tree or "/Limits" in number_tree:
         logger_warning(
             (
@@ -173,3 +186,81 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str:
         )
     # TODO: Implement /Kids and /Limits for number tree
     return str(index + 1)  # Fallback
+
+
+def nums_insert(
+    key: NumberObject,
+    value: DictionaryObject,
+    nums: ArrayObject,
+) -> None:
+    """
+    Insert a key, value pair in a Nums array.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry
+        value: value of the entry
+        nums: Nums array to modify
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("a nums like array must have an even number of elements")
+
+    i = len(nums)
+    while i != 0 and key <= nums[i - 2]:
+        i = i - 2
+
+    if i < len(nums) and key == nums[i]:
+        nums[i + 1] = value
+    else:
+        nums.insert(i, key)
+        nums.insert(i + 1, value)
+
+
+def nums_clear_range(
+    key: NumberObject,
+    page_index_to: int,
+    nums: ArrayObject,
+) -> None:
+    """
+    Remove all entries in a number tree in a range after an entry.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry before the range
+        page_index_to: The page index of the upper limit of the range
+        nums: Nums array to modify
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("a nums like array must have an even number of elements")
+    if page_index_to < key:
+        raise ValueError("page_index_to must be greater or equal than key")
+
+    i = nums.index(key) + 2
+    while i < len(nums) and nums[i] <= page_index_to:
+        nums.pop(i)
+        nums.pop(i)
+
+
+def nums_next(
+    key: NumberObject,
+    nums: ArrayObject,
+) -> Tuple[Optional[NumberObject], Optional[DictionaryObject]]:
+    """
+    Return the (key, value) pair of the entry after the given one.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry
+        nums: Nums array
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("a nums like array must have an even number of elements")
+
+    i = nums.index(key) + 2
+    if i < len(nums):
+        return (nums[i], nums[i + 1])
+    else:
+        return (None, None)
diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index 21520ed8f3..ddca4b1a40 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -118,7 +118,8 @@ def convertToInt(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:  # depreca
 class DocumentInformation(DictionaryObject):
     """
     A class representing the basic document metadata provided in a PDF File.
-    This class is accessible through :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.
+    This class is accessible through
+    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.
 
     All text properties of the document metadata have
     *two* properties, eg. author and author_raw. The non-raw property will
@@ -1901,11 +1902,11 @@ def _read_xref_subsections(
         get_entry: Callable[[int], Union[int, Tuple[int, ...]]],
         used_before: Callable[[int, Union[int, Tuple[int, ...]]], bool],
     ) -> None:
-        last_end = 0
+        # last_end = 0
         for start, size in self._pairs(idx_pairs):
             # The subsections must increase
-            assert start >= last_end
-            last_end = start + size
+            # assert start >= last_end
+            # last_end = start + size
             for num in range(start, start + size):
                 # The first entry is the type
                 xref_type = get_entry(0)
diff --git a/pypdf/_utils.py b/pypdf/_utils.py
index 4da2663fcd..f68e0c359e 100644
--- a/pypdf/_utils.py
+++ b/pypdf/_utils.py
@@ -163,31 +163,22 @@ def skip_over_comment(stream: StreamType) -> None:
             tok = stream.read(1)
 
 
-def read_until_regex(
-    stream: StreamType, regex: Pattern[bytes], ignore_eof: bool = False
-) -> bytes:
+def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes:
     """
     Read until the regular expression pattern matched (ignore the match).
+    Treats EOF on the underlying stream as the end of the token to be matched.
 
     Args:
-      ignore_eof: If true, ignore end-of-line and return immediately
       regex: re.Pattern
-      ignore_eof:  (Default value = False)
 
     Returns:
       The read bytes.
-
-    Raises:
-      PdfStreamError: on premature end-of-file
-
     """
     name = b""
     while True:
         tok = stream.read(16)
         if not tok:
-            if ignore_eof:
-                return name
-            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+            return name
         m = regex.search(tok)
         if m is not None:
             name += tok[: m.start()]
@@ -479,8 +470,9 @@ def rename_kwargs(  # type: ignore
                 )
             if new_term in kwargs:
                 raise TypeError(
-                    f"{func_name} received both {old_term} and {new_term} as an argument. "
-                    f"{old_term} is deprecated. Use {new_term} instead."
+                    f"{func_name} received both {old_term} and {new_term} as "
+                    f"an argument. {old_term} is deprecated. "
+                    f"Use {new_term} instead."
                 )
             kwargs[new_term] = kwargs.pop(old_term)
             warnings.warn(
diff --git a/pypdf/_version.py b/pypdf/_version.py
index 1da6a55523..88c513ea36 100644
--- a/pypdf/_version.py
+++ b/pypdf/_version.py
@@ -1 +1 @@
-__version__ = "3.2.1"
+__version__ = "3.3.0"
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index df318e5169..571df35894 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -59,6 +59,7 @@
 
 from ._encryption import Encryption
 from ._page import PageObject, _VirtualList
+from ._page_labels import nums_clear_range, nums_insert, nums_next
 from ._reader import PdfReader
 from ._security import _alg33, _alg34, _alg35
 from ._utils import (
@@ -84,6 +85,7 @@
     InteractiveFormDictEntries,
 )
 from .constants import PageAttributes as PG
+from .constants import PageLabelStyle
 from .constants import PagesAttributes as PA
 from .constants import StreamAttributes as SA
 from .constants import TrailerKeys as TK
@@ -222,12 +224,14 @@ def get_object(
         if ido is not None:  # deprecated
             if indirect_reference is not None:
                 raise ValueError(
-                    "Please only set 'indirect_reference'. The 'ido' argument is deprecated."
+                    "Please only set 'indirect_reference'. The 'ido' argument "
+                    "is deprecated."
                 )
             else:
                 indirect_reference = ido
                 warnings.warn(
-                    "The parameter 'ido' is depreciated and will be removed in pypdf 4.0.0.",
+                    "The parameter 'ido' is depreciated and will be removed in "
+                    "pypdf 4.0.0.",
                     DeprecationWarning,
                 )
         assert (
@@ -258,9 +262,10 @@ def _add_page(
         page_org = page
         excluded_keys = list(excluded_keys)
         excluded_keys += [PA.PARENT, "/StructParents"]
-        # acrobat does not accept to have two indirect ref pointing on the same page;
-        # therefore in order to add easily multiple copies of the same page, we need to create a new
-        # dictionary for the page, however the objects below (including content) is not duplicated
+        # acrobat does not accept to have two indirect ref pointing on the same
+        # page; therefore in order to add easily multiple copies of the same "
+        # page, we need to create a new dictionary for the page, however the "
+        # objects below (including content) is not duplicated
         try:  # delete an already existing page
             del self._id_translated[id(page_org.indirect_reference.pdf)][  # type: ignore
                 page_org.indirect_reference.idnum  # type: ignore
@@ -581,7 +586,8 @@ def add_js(self, javascript: str) -> None:
                 NameObject("/JS"): TextStringObject(f"{javascript}"),
             }
         )
-        # We need a name for parameterized javascript in the pdf file, but it can be anything.
+        # We need a name for parameterized javascript in the pdf file,
+        # but it can be anything.
         js_list.append(create_string_object(str(uuid.uuid4())))
         js_list.append(self._add_object(js))
 
@@ -650,7 +656,8 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
             }
         )
 
-        # Then create the entry for the root, as it needs a reference to the Filespec
+        # Then create the entry for the root, as it needs
+        # a reference to the Filespec
         # Sample:
         # 1 0 obj
         # <<
@@ -692,8 +699,8 @@ def append_pages_from_reader(
         after_page_append: Optional[Callable[[PageObject], None]] = None,
     ) -> None:
         """
-        Copy pages from reader to writer. Includes an optional callback parameter
-        which is invoked after pages are appended to the writer.
+        Copy pages from reader to writer. Includes an optional callback
+        parameter which is invoked after pages are appended to the writer.
 
         `append` should be prefered.
 
@@ -979,7 +986,8 @@ def encrypt(
         if owner_pwd is not None:  # deprecated
             if owner_password is not None:
                 raise ValueError(
-                    "The argument owner_pwd of encrypt is deprecated. Use owner_password only."
+                    "The argument owner_pwd of encrypt is deprecated. "
+                    "Use owner_password only."
                 )
             else:
                 old_term = "owner_pwd"
@@ -1204,7 +1212,8 @@ def _sweep_indirect_references(
                         discovered.append(str(data))
                         stack.append((data.get_object(), None, None, []))
 
-            # Check if data has a parent and if it is a dict or an array update the value
+            # Check if data has a parent and if it is a dict or
+            # an array update the value
             if isinstance(parent, (DictionaryObject, ArrayObject)):
                 if isinstance(data, StreamObject):
                     # a dictionary value is a stream.  streams must be indirect
@@ -1313,7 +1322,8 @@ def get_threads_root(self) -> ArrayObject:
         See §8.3.2 from PDF 1.7 spec.
 
         Returns:
-            An array (possibly empty) of Dictionaries with ``/F`` and ``/I`` properties.
+            An array (possibly empty) of Dictionaries with ``/F`` and
+            ``/I`` properties.
         """
         if CO.THREADS in self._root_object:
             # TABLE 3.25 Entries in the catalog dictionary
@@ -1394,7 +1404,8 @@ def add_outline_item_destination(
     ) -> IndirectObject:
         if page_destination is not None and dest is not None:  # deprecated
             raise ValueError(
-                "The argument dest of add_outline_item_destination is deprecated. Use page_destination only."
+                "The argument dest of add_outline_item_destination is "
+                "deprecated. Use page_destination only."
             )
         if dest is not None:  # deprecated
             old_term = "dest"
@@ -1513,7 +1524,7 @@ def add_outline_item(
         pagenum: Optional[int] = None,  # deprecated
     ) -> IndirectObject:
         """
-        Add an outline item (commonly referred to as a "Bookmark") to this PDF file.
+        Add an outline item (commonly referred to as a "Bookmark") to the PDF file.
 
         Args:
           title: Title to use for this outline item.
@@ -1540,7 +1551,8 @@ def add_outline_item(
             )
         if page_number is not None and pagenum is not None:
             raise ValueError(
-                "The argument pagenum of add_outline_item is deprecated. Use page_number only."
+                "The argument pagenum of add_outline_item is deprecated. "
+                "Use page_number only."
             )
         if page_number is None:
             action_ref = None
@@ -1662,7 +1674,8 @@ def add_named_destination_object(
     ) -> IndirectObject:
         if page_destination is not None and dest is not None:
             raise ValueError(
-                "The argument dest of add_named_destination_object is deprecated. Use page_destination only."
+                "The argument dest of add_named_destination_object is "
+                "deprecated. Use page_destination only."
             )
         if dest is not None:  # deprecated
             old_term = "dest"
@@ -1706,7 +1719,8 @@ def add_named_destination(
     ) -> IndirectObject:
         if page_number is not None and pagenum is not None:
             raise ValueError(
-                "The argument pagenum of add_outline_item is deprecated. Use page_number only."
+                "The argument pagenum of add_outline_item is deprecated. "
+                "Use page_number only."
             )
         if pagenum is not None:
             old_term = "pagenum"
@@ -1862,7 +1876,7 @@ def remove_text(self, ignore_byte_string_object: bool = False) -> None:
         Remove text from this output.
 
         Args:
-          ignore_byte_string_object: optional parameter to ignore ByteString Objects.
+          ignore_byte_string_object: optional parameter
         """
         pg_dict = cast(DictionaryObject, self.get_object(self._pages))
         pages = cast(List[IndirectObject], pg_dict[PA.KIDS])
@@ -1925,8 +1939,8 @@ def add_uri(
         Args:
           page_number: index of the page on which to place the URI action.
           uri: URI of resource to link to.
-          rect: :class:`RectangleObject<pypdf.generic.RectangleObject>` or array of four
-            integers specifying the clickable rectangular area
+          rect: :class:`RectangleObject<pypdf.generic.RectangleObject>` or
+            array of four integers specifying the clickable rectangular area
             ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``.
           border: if provided, an array describing border-drawing
             properties. See the PDF spec for details. No border will be
@@ -2323,7 +2337,8 @@ def add_annotation(self, page_number: int, annotation: Dict[str, Any]) -> None:
     def clean_page(self, page: Union[PageObject, IndirectObject]) -> PageObject:
         """
         Perform some clean up in the page.
-        Currently: convert NameObject nameddestination to TextStringObject (required for names/dests list)
+        Currently: convert NameObject nameddestination to TextStringObject
+        (required for names/dests list)
 
         Args:
           page:
@@ -2874,6 +2889,118 @@ def reset_translation(
         else:
             raise Exception("invalid parameter {reader}")
 
+    def set_page_label(
+        self,
+        page_index_from: int,
+        page_index_to: int,
+        style: Optional[PageLabelStyle] = None,
+        prefix: Optional[str] = None,
+        start: Optional[int] = 0,
+    ) -> None:
+        """
+        Set a page label to a range of pages.
+
+        Page indexes must be given starting from 0.
+        Labels must have a style, a prefix or both.
+        If to a range is not assigned any page label a decimal label starting from 1 is applied.
+
+        Args:
+            page_index_from: page index of the beginning of the range starting from 0
+            page_index_to: page index of the beginning of the range starting from 0
+            style:  The numbering style to be used for the numeric portion of each page label:
+                        '/D' Decimal arabic numerals
+                        '/R' Uppercase roman numerals
+                        '/r' Lowercase roman numerals
+                        '/A' Uppercase letters (A to Z for the first 26 pages,
+                             AA to ZZ for the next 26, and so on)
+                        '/a' Lowercase letters (a to z for the first 26 pages,
+                             aa to zz for the next 26, and so on)
+            prefix: The label prefix for page labels in this range.
+            start:  The value of the numeric portion for the first page label
+                    in the range.
+                    Subsequent pages are numbered sequentially from this value,
+                    which must be greater than or equal to 1.
+                    Default value: 1.
+        """
+        if style is None and prefix is None:
+            raise ValueError("at least one between style and prefix must be given")
+        if page_index_from < 0:
+            raise ValueError("page_index_from must be equal or greater then 0")
+        if page_index_to < page_index_from:
+            raise ValueError(
+                "page_index_to must be equal or greater then page_index_from"
+            )
+        if page_index_to >= len(self.pages):
+            raise ValueError("page_index_to exceeds number of pages")
+        if start is not None and start != 0 and start < 1:
+            raise ValueError("if given, start must be equal or greater than one")
+
+        self._set_page_label(page_index_from, page_index_to, style, prefix, start)
+
+    def _set_page_label(
+        self,
+        page_index_from: int,
+        page_index_to: int,
+        style: Optional[PageLabelStyle] = None,
+        prefix: Optional[str] = None,
+        start: Optional[int] = 0,
+    ) -> None:
+        """
+        Set a page label to a range of pages.
+        Page indexes must be given starting from 0.
+        Labels must have a style, a prefix or both.
+        If to a range is not assigned any page label a decimal label starting
+        from 1 is applied.
+
+        Args:
+            page_index_from: page index of the beginning of the range starting from 0
+            page_index_to: page index of the beginning of the range starting from 0
+            style:  The numbering style to be used for the numeric portion of each page label:
+                        /D Decimal arabic numerals
+                        /R Uppercase roman numerals
+                        /r Lowercase roman numerals
+                        /A Uppercase letters (A to Z for the first 26 pages,
+                           AA to ZZ for the next 26, and so on)
+                        /a Lowercase letters (a to z for the first 26 pages,
+                           aa to zz for the next 26, and so on)
+            prefix: The label prefix for page labels in this range.
+            start:  The value of the numeric portion for the first page label
+                    in the range.
+                    Subsequent pages are numbered sequentially from this value,
+                    which must be greater than or equal to 1. Default value: 1.
+        """
+        default_page_label = DictionaryObject()
+        default_page_label[NameObject("/S")] = NameObject("/D")
+
+        new_page_label = DictionaryObject()
+        if style is not None:
+            new_page_label[NameObject("/S")] = NameObject(style)
+        if prefix is not None:
+            new_page_label[NameObject("/P")] = TextStringObject(prefix)
+        if start != 0:
+            new_page_label[NameObject("/St")] = NumberObject(start)
+
+        if NameObject(CatalogDictionary.PAGE_LABELS) not in self._root_object:
+            nums = ArrayObject()
+            nums_insert(NumberObject(0), default_page_label, nums)
+            page_labels = TreeObject()
+            page_labels[NameObject("/Nums")] = nums
+            self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels
+
+        page_labels = cast(
+            TreeObject, self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)]
+        )
+        nums = cast(ArrayObject, page_labels[NameObject("/Nums")])
+
+        nums_insert(NumberObject(page_index_from), new_page_label, nums)
+        nums_clear_range(NumberObject(page_index_from), page_index_to, nums)
+        next_label_pos, *_ = nums_next(NumberObject(page_index_from), nums)
+        if next_label_pos != page_index_to + 1 and page_index_to + 1 < len(self.pages):
+            nums_insert(NumberObject(page_index_to + 1), default_page_label, nums)
+
+        page_labels[NameObject("/Nums")] = nums
+        self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels
+
 
 def _pdf_objectify(obj: Union[Dict[str, Any], str, int, List[Any]]) -> PdfObject:
     if isinstance(obj, PdfObject):
diff --git a/pypdf/constants.py b/pypdf/constants.py
index 10b5886fb5..cd8cc1ecf4 100644
--- a/pypdf/constants.py
+++ b/pypdf/constants.py
@@ -430,6 +430,16 @@ class OutlineFontFlag(IntFlag):
     bold = 2
 
 
+class PageLabelStyle:
+    """Table 8.10 in the 1.7 reference."""
+
+    DECIMAL = "/D"  # Decimal arabics
+    LOWERCASE_ROMAN = "/r"  # Lowercase roman numbers
+    UPPERCASE_ROMAN = "/R"  # Uppercase roman numbers
+    LOWERCASE_LETTER = "/a"  # Lowercase letters
+    UPPERCASE_LETTER = "/A"  # Uppercase letters
+
+
 PDF_KEYS = (
     AnnotationDictionaryAttributes,
     CatalogAttributes,
diff --git a/pypdf/errors.py b/pypdf/errors.py
index ad45946284..25fc1f7de4 100644
--- a/pypdf/errors.py
+++ b/pypdf/errors.py
@@ -8,67 +8,55 @@
 class DeprecationError(Exception):
     """Raised when a deprecated feature is used."""
 
-    pass
-
 
 class DependencyError(Exception):
-    """Raised when a required dependency (a library or module that PyPDF depends on) is not available or cannot be imported."""
-
-    pass
+    """
+    Raised when a required dependency (a library or module that PyPDF depends on)
+    is not available or cannot be imported.
+    """
 
 
 class PyPdfError(Exception):
     """Base class for all exceptions raised by PyPDF."""
 
-    pass
-
 
 class PdfReadError(PyPdfError):
     """Raised when there is an issue reading a PDF file."""
 
-    pass
-
 
 class PageSizeNotDefinedError(PyPdfError):
     """Raised when the page size of a PDF document is not defined."""
 
-    pass
-
 
 class PdfReadWarning(UserWarning):
     """Issued when there is a potential issue reading a PDF file, but it can still be read."""
 
-    pass
-
 
 class PdfStreamError(PdfReadError):
     """Raised when there is an issue reading the stream of data in a PDF file."""
 
-    pass
-
 
 class ParseError(Exception):
-    """Raised when there is an issue parsing (analyzing and understanding the structure and meaning of) a PDF file."""
-
-    pass
+    """
+    Raised when there is an issue parsing (analyzing and understanding the
+    structure and meaning of) a PDF file.
+    """
 
 
 class FileNotDecryptedError(PdfReadError):
-    """Raised when a PDF file that has been encrypted (meaning it requires a password to be accessed) has not been successfully decrypted."""
-
-    pass
+    """
+    Raised when a PDF file that has been encrypted
+    (meaning it requires a password to be accessed) has not been successfully
+    decrypted.
+    """
 
 
 class WrongPasswordError(FileNotDecryptedError):
     """Raised when the wrong password is used to try to decrypt an encrypted PDF file."""
 
-    pass
-
 
 class EmptyFileError(PdfReadError):
     """Raised when a PDF file is empty or has no content."""
 
-    pass
-
 
 STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"
diff --git a/pypdf/generic/_annotations.py b/pypdf/generic/_annotations.py
index c2f43af90e..26dfd63daa 100644
--- a/pypdf/generic/_annotations.py
+++ b/pypdf/generic/_annotations.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 from ._base import (
     BooleanObject,
@@ -37,9 +37,9 @@ def text(
         Add text annotation.
 
         Args:
-          rect:
-            or array of four integers specifying the clickable rectangular area
-            ``[xLL, yLL, xUR, yUR]``
+          rect: array of four integers ``[xLL, yLL, xUR, yUR]``
+            specifying the clickable rectangular area
+          text: The text that is added to the document
           open:
           flags:
 
@@ -76,15 +76,15 @@ def free_text(
 
         Args:
           text: Text to be added
-          rect: or array of four integers
-            specifying the clickable rectangular area ``[xLL, yLL, xUR, yUR]``
+          rect: array of four integers ``[xLL, yLL, xUR, yUR]``
+            specifying the clickable rectangular area
           font: Name of the Font, e.g. 'Helvetica'
           bold: Print the text in bold
           italic: Print the text in italic
           font_size: How big the text will be, e.g. '14pt'
-          font_color: Hex-string for the color
-          border_color: Hex-string for the border color
-          background_color: Hex-string for the background of the annotation
+          font_color: Hex-string for the color, e.g. cdcdcd
+          border_color: Hex-string for the border color, e.g. cdcdcd
+          background_color: Hex-string for the background of the annotation, e.g. cdcdcd
 
         Returns:
           A dictionary object representing the annotation.
@@ -135,9 +135,8 @@ def line(
         Args:
           p1: First point
           p2: Second point
-          rect: or array of four
-            integers specifying the clickable rectangular area
-            ``[xLL, yLL, xUR, yUR]``
+          rect: array of four integers ``[xLL, yLL, xUR, yUR]``
+            specifying the clickable rectangular area
           text: Text to be displayed as the line annotation
           title_bar: Text to be displayed in the title bar of the
             annotation; by convention this is the name of the author
@@ -185,12 +184,13 @@ def rectangle(
         """
         Draw a rectangle on the PDF.
 
+        This method uses the /Square annotation type of the PDF format.
+
         Args:
-          rect: or array of four
-            integers specifying the clickable rectangular area
-            ``[xLL, yLL, xUR, yUR]``
-          rect:
-          interiour_color:
+          rect: array of four integers ``[xLL, yLL, xUR, yUR]``
+            specifying the clickable rectangular area
+          interiour_color: None or hex-string for the color, e.g. cdcdcd
+            If None is used, the interiour is transparent.
 
         Returns:
           A dictionary object representing the annotation.
@@ -210,6 +210,67 @@ def rectangle(
 
         return square_obj
 
+    @staticmethod
+    def ellipse(
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        interiour_color: Optional[str] = None,
+    ) -> DictionaryObject:
+        """
+        Draw a rectangle on the PDF.
+
+        This method uses the /Circle annotation type of the PDF format.
+
+        Args:
+          rect: array of four integers ``[xLL, yLL, xUR, yUR]`` specifying
+            the bounding box of the ellipse
+          interiour_color: None or hex-string for the color, e.g. cdcdcd
+            If None is used, the interiour is transparent.
+
+        Returns:
+          A dictionary object representing the annotation.
+        """
+        ellipse_obj = DictionaryObject(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Circle"),
+                NameObject("/Rect"): RectangleObject(rect),
+            }
+        )
+
+        if interiour_color:
+            ellipse_obj[NameObject("/IC")] = ArrayObject(
+                [FloatObject(n) for n in hex_to_rgb(interiour_color)]
+            )
+
+        return ellipse_obj
+
+    @staticmethod
+    def polygon(vertices: List[Tuple[float, float]]) -> DictionaryObject:
+        if len(vertices) == 0:
+            raise ValueError("A polygon needs at least 1 vertex with two coordinates")
+        x_min, y_min = vertices[0][0], vertices[0][1]
+        x_max, y_max = vertices[0][0], vertices[0][1]
+        for x, y in vertices:
+            x_min = min(x_min, x)
+            y_min = min(y_min, y)
+            x_max = min(x_max, x)
+            y_max = min(y_max, y)
+        rect = RectangleObject((x_min, y_min, x_max, y_max))
+        coord_list = []
+        for x, y in vertices:
+            coord_list.append(NumberObject(x))
+            coord_list.append(NumberObject(y))
+        obj = DictionaryObject(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Polygon"),
+                NameObject("/Vertices"): ArrayObject(coord_list),
+                NameObject("/IT"): NameObject("PolygonCloud"),
+                NameObject("/Rect"): RectangleObject(rect),
+            }
+        )
+        return obj
+
     @staticmethod
     def link(
         rect: Union[RectangleObject, Tuple[float, float, float, float]],
@@ -227,9 +288,8 @@ def link(
         An internal link requires the target_page_index, fit, and fit args.
 
         Args:
-          rect: or array of four
-            integers specifying the clickable rectangular area
-            ``[xLL, yLL, xUR, yUR]``
+          rect: array of four integers ``[xLL, yLL, xUR, yUR]``
+            specifying the clickable rectangular area
           border: if provided, an array describing border-drawing
             properties. See the PDF spec for details. No border will be
             drawn if this argument is omitted.
@@ -255,7 +315,8 @@ def link(
             )
         if is_external and is_internal:
             raise ValueError(
-                f"Either 'url' or 'target_page_index' have to be provided. url={url}, target_page_index={target_page_index}"
+                "Either 'url' or 'target_page_index' have to be provided. "
+                f"url={url}, target_page_index={target_page_index}"
             )
 
         border_arr: BorderArrayType
diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
index b1adcc557e..a84ce1841e 100644
--- a/pypdf/generic/_base.py
+++ b/pypdf/generic/_base.py
@@ -77,8 +77,10 @@ def clone(
         """
         clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
         force_duplicate: in standard if the object has been already cloned and reference,
-                         the copy is returned; when force_duplicate == True, a new copy is always performed
-        ignore_fields : list/tuple of Fields names (for dictionaries that will be ignored during cloning (apply also to childs duplication)
+            the copy is returned; when force_duplicate == True,
+            a new copy is always performed
+        ignore_fields : list/tuple of Fields names (for dictionaries that will
+            be ignored during cloning (apply also to childs duplication)
         in standard, clone function call _reference_clone (see _reference)
 
         Args:
@@ -620,7 +622,7 @@ def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject":  # PdfReader
         name = stream.read(1)
         if name != NameObject.surfix:
             raise PdfReadError("name read error")
-        name += read_until_regex(stream, NameObject.delimiter_pattern, ignore_eof=True)
+        name += read_until_regex(stream, NameObject.delimiter_pattern)
         try:
             # Name objects should represent irregular characters
             # with a '#' followed by the symbol's hex number
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 2e472f51c1..3df0ae14bd 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -346,10 +346,10 @@ def read_from_stream(
         def get_next_obj_pos(
             p: int, p1: int, rem_gens: List[int], pdf: Any
         ) -> int:  # PdfReader
-            l = pdf.xref[rem_gens[0]]
-            for o in l:
-                if p1 > l[o] and p < l[o]:
-                    p1 = l[o]
+            loc = pdf.xref[rem_gens[0]]
+            for o in loc:
+                if p1 > loc[o] and p < loc[o]:
+                    p1 = loc[o]
             if len(rem_gens) == 1:
                 return p1
             else:
@@ -969,7 +969,7 @@ def __parse_content_stream(self, stream: StreamType) -> None:
                 break
             stream.seek(-1, 1)
             if peek.isalpha() or peek in (b"'", b'"'):
-                operator = read_until_regex(stream, NameObject.delimiter_pattern, True)
+                operator = read_until_regex(stream, NameObject.delimiter_pattern)
                 if operator == b"BI":
                     # begin inline image - a completely different parsing
                     # mechanism is required, of course... thanks buddy...
@@ -1016,7 +1016,9 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
             # We have reached the end of the stream, but haven't found the EI operator.
             if not buf:
                 raise PdfReadError("Unexpected end of stream")
-            loc = buf.find(b"E")
+            loc = buf.find(
+                b"E"
+            )  # we can not look straight for "EI" because it may not have been loaded in the buffer
 
             if loc == -1:
                 data.write(buf)
@@ -1026,28 +1028,44 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
 
                 # Seek back in the stream to read the E next.
                 stream.seek(loc - len(buf), 1)
-                tok = stream.read(1)
+                tok = stream.read(1)  # E of "EI"
                 # Check for End Image
-                tok2 = stream.read(1)
-                if tok2 == b"I" and buf[loc - 1 : loc] in WHITESPACES:
-                    # Data can contain [\s]EI,  so check for the separator \s; 4 chars suffisent Q operator not required.
-                    tok3 = stream.read(1)
-                    info = tok + tok2
-                    # We need to find at least one whitespace after.
-                    has_q_whitespace = False
+                tok2 = stream.read(1)  # I of "EI"
+                if tok2 != b"I":
+                    stream.seek(-1, 1)
+                    data.write(tok)
+                    continue
+                # for further debug : print("!!!!",buf[loc-1:loc+10])
+                info = tok + tok2
+                tok3 = stream.read(
+                    1
+                )  # possible space after "EI" may not been loaded  in buf
+                if tok3 not in WHITESPACES:
+                    stream.seek(-2, 1)  # to step back on I
+                    data.write(tok)
+                elif buf[loc - 1 : loc] in WHITESPACES:  # and tok3 in WHITESPACES:
+                    # Data can contain [\s]EI[\s]: 4 chars sufficient, checking Q operator not required.
+                    while tok3 in WHITESPACES:
+                        # needed ???? : info += tok3
+                        tok3 = stream.read(1)
+                    stream.seek(-1, 1)
+                    # we do not insert EI
+                    break
+                else:  # buf[loc - 1 : loc] not in WHITESPACES and tok3 in WHITESPACES:
+                    # Data can contain [!\s]EI[\s],  so check for Q or EMC operator is required to have 4 chars.
                     while tok3 in WHITESPACES:
-                        has_q_whitespace = True
                         info += tok3
                         tok3 = stream.read(1)
-                    if has_q_whitespace:
-                        stream.seek(-1, 1)
+                    stream.seek(-1, 1)
+                    if tok3 == b"Q":
                         break
+                    elif tok3 == b"E":
+                        ope = stream.read(3)
+                        stream.seek(-3, 1)
+                        if ope == b"EMC":
+                            break
                     else:
-                        stream.seek(-1, 1)
                         data.write(info)
-                else:
-                    stream.seek(-1, 1)
-                    data.write(tok)
         return {"settings": settings, "data": data.getvalue()}
 
     @property
diff --git a/pypdf/generic/_utils.py b/pypdf/generic/_utils.py
index ed1fd1f5d5..1643422ab8 100644
--- a/pypdf/generic/_utils.py
+++ b/pypdf/generic/_utils.py
@@ -150,10 +150,11 @@ def create_string_object(
                     retval.autodetect_utf16 = True
                     return retval
                 else:
-                    # This is probably a big performance hit here, but we need to
-                    # convert string objects into the text/unicode-aware version if
-                    # possible... and the only way to check if that's possible is
-                    # to try.  Some strings are strings, some are just byte arrays.
+                    # This is probably a big performance hit here, but we need
+                    # to convert string objects into the text/unicode-aware
+                    # version if possible... and the only way to check if that's
+                    # possible is to try.
+                    # Some strings are strings, some are just byte arrays.
                     retval = TextStringObject(decode_pdfdocencoding(string))
                     retval.autodetect_pdfdocencoding = True
                     return retval
diff --git a/pyproject.toml b/pyproject.toml
index b380e481ee..8a12f7e453 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,3 +97,6 @@ exclude_lines = [
     "if 0:",
     "if __name__ == .__main__.:",
 ]
+
+[tool.ruff]
+line-length = 120
diff --git a/requirements/docs.in b/requirements/docs.in
index 641b64b65c..6fe145949f 100644
--- a/requirements/docs.in
+++ b/requirements/docs.in
@@ -2,3 +2,4 @@ sphinx
 sphinx_rtd_theme
 myst_parser==0.16.1
 -e .
+attrs  # required for myst, but not automatically installed by myst
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 4a44968633..cb44bcd2bb 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -6,13 +6,15 @@
 #
 -e .
     # via -r requirements/docs.in
-alabaster==0.7.12
+alabaster==0.7.13
     # via sphinx
+attrs==22.2.0
+    # via -r requirements/docs.in
 babel==2.11.0
     # via sphinx
 certifi==2022.12.7
     # via requests
-charset-normalizer==2.1.1
+charset-normalizer==3.0.1
     # via requests
 docutils==0.17.1
     # via
@@ -33,7 +35,7 @@ markdown-it-py==2.1.0
     # via
     #   mdit-py-plugins
     #   myst-parser
-markupsafe==2.1.1
+markupsafe==2.1.2
     # via jinja2
 mdit-py-plugins==0.3.3
     # via myst-parser
@@ -41,19 +43,19 @@ mdurl==0.1.2
     # via markdown-it-py
 myst-parser==0.16.1
     # via -r requirements/docs.in
-packaging==22.0
+packaging==23.0
     # via sphinx
 pygments==2.14.0
     # via sphinx
-pytz==2022.7
+pytz==2022.7.1
     # via babel
 pyyaml==6.0
     # via myst-parser
-requests==2.28.1
+requests==2.28.2
     # via sphinx
 snowballstemmer==2.2.0
     # via sphinx
-sphinx==5.3.0
+sphinx==4.5.0
     # via
     #   -r requirements/docs.in
     #   myst-parser
@@ -76,9 +78,8 @@ typing-extensions==4.4.0
     # via
     #   importlib-metadata
     #   markdown-it-py
-    #   myst-parser
     #   pypdf
-urllib3==1.26.13
+urllib3==1.26.14
     # via requests
 zipp==3.11.0
     # via importlib-metadata
diff --git a/tests/test_cmap.py b/tests/test_cmap.py
index 17740e167a..38f745413e 100644
--- a/tests/test_cmap.py
+++ b/tests/test_cmap.py
@@ -3,6 +3,7 @@
 import pytest
 
 from pypdf import PdfReader
+from pypdf._cmap import build_char_map
 from pypdf.errors import PdfReadWarning
 
 from . import get_pdf_from_url
@@ -57,12 +58,18 @@ def test_get_font_width_from_default():  # L40
 @pytest.mark.external
 def test_multiline_bfrange():
     # non regression test for iss_1285
-    url = "https://github.com/alexanderquispe/1REI05/raw/main/reports/report_1/The%20lean%20times%20in%20the%20Peruvian%20economy.pdf"
+    url = (
+        "https://github.com/alexanderquispe/1REI05/raw/main/reports/report_1/"
+        "The%20lean%20times%20in%20the%20Peruvian%20economy.pdf"
+    )
     name = "tika-908104.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     for page in reader.pages:
         page.extract_text()
-    url = "https://github.com/yxj-HGNwmb5kdp8ewr/yxj-HGNwmb5kdp8ewr.github.io/raw/master/files/Giacalone%20Llobell%20Jaeger%20(2022)%20Food%20Qual%20Prefer.pdf"
+    url = (
+        "https://github.com/yxj-HGNwmb5kdp8ewr/yxj-HGNwmb5kdp8ewr.github.io/raw/master/files/"
+        "Giacalone%20Llobell%20Jaeger%20(2022)%20Food%20Qual%20Prefer.pdf"
+    )
     name = "Giacalone.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     for page in reader.pages:
@@ -72,7 +79,10 @@ def test_multiline_bfrange():
 @pytest.mark.external
 def test_bfchar_on_2_chars():
     # iss #1293
-    url = "https://github.com/xyegithub/myBlog/raw/main/posts/c94b2364/paper_pdfs/ImageClassification/2007%2CASurveyofImageClassificationBasedTechniques.pdf"
+    url = (
+        "https://github.com/xyegithub/myBlog/raw/main/posts/c94b2364/paper_pdfs/ImageClassification/"
+        "2007%2CASurveyofImageClassificationBasedTechniques.pdf"
+    )
     name = "ASurveyofImageClassificationBasedTechniques.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     for page in reader.pages:
@@ -102,3 +112,12 @@ def test_iss1379():
     name = "02voc.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     reader.pages[2].extract_text()
+
+
+@pytest.mark.external
+def test_iss1533():
+    url = "https://github.com/py-pdf/pypdf/files/10376149/iss1533.pdf"
+    name = "iss1533.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
+    reader.pages[0].extract_text()  # no error
+    assert build_char_map("/F", 200, reader.pages[0])[3]["\x01"] == "Ü"
diff --git a/tests/test_encryption.py b/tests/test_encryption.py
index dfe14c760b..6400a7adc5 100644
--- a/tests/test_encryption.py
+++ b/tests/test_encryption.py
@@ -24,33 +24,47 @@
     [
         # unencrypted pdf
         ("unencrypted.pdf", False),
-        # created by `qpdf --encrypt "" "" 40 -- unencrypted.pdf r2-empty-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "" 40 -- unencrypted.pdf r2-empty-password.pdf
         ("r2-empty-password.pdf", False),
-        # created by `qpdf --encrypt "" "" 128 -- unencrypted.pdf r3-empty-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "" 128 -- unencrypted.pdf r3-empty-password.pdf
         ("r3-empty-password.pdf", False),
-        # created by `qpdf --encrypt "asdfzxcv" "" 40 -- unencrypted.pdf r2-user-password.pdf`:
+        # created by:
+        # qpdf --encrypt "asdfzxcv" "" 40 -- unencrypted.pdf r2-user-password.pdf
         ("r2-user-password.pdf", False),
-        # created by `qpdf --encrypt "" "asdfzxcv" 40 -- unencrypted.pdf r2-user-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "asdfzxcv" 40 -- unencrypted.pdf r2-user-password.pdf
         ("r2-owner-password.pdf", False),
-        # created by `qpdf --encrypt "asdfzxcv" "" 128 -- unencrypted.pdf r3-user-password.pdf`:
+        # created by:
+        # qpdf --encrypt "asdfzxcv" "" 128 -- unencrypted.pdf r3-user-password.pdf
         ("r3-user-password.pdf", False),
-        # created by `qpdf --encrypt "asdfzxcv" "" 128 --force-V4 -- unencrypted.pdf r4-user-password.pdf`:
+        # created by:
+        # qpdf --encrypt "asdfzxcv" "" 128 --force-V4 -- unencrypted.pdf r4-user-password.pdf
         ("r4-user-password.pdf", False),
-        # created by `qpdf --encrypt "" "asdfzxcv" 128 --force-V4 -- unencrypted.pdf r4-owner-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "asdfzxcv" 128 --force-V4 -- unencrypted.pdf r4-owner-password.pdf
         ("r4-owner-password.pdf", False),
-        # created by `qpdf --encrypt "asdfzxcv" "" 128 --use-aes=y -- unencrypted.pdf r4-aes-user-password.pdf`:
+        # created by:
+        # qpdf --encrypt "asdfzxcv" "" 128 --use-aes=y -- unencrypted.pdf r4-aes-user-password.pdf
         ("r4-aes-user-password.pdf", True),
-        # # created by `qpdf --encrypt "" "" 256 --force-R5 -- unencrypted.pdf r5-empty-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "" 256 --force-R5 -- unencrypted.pdf r5-empty-password.pdf
         ("r5-empty-password.pdf", True),
-        # # created by `qpdf --encrypt "asdfzxcv" "" 256 --force-R5 -- unencrypted.pdf r5-user-password.pdf`:
+        # created by:
+        # qpdf --encrypt "asdfzxcv" "" 256 --force-R5 -- unencrypted.pdf r5-user-password.pdf
         ("r5-user-password.pdf", True),
-        # # created by `qpdf --encrypt "" "asdfzxcv" 256 --force-R5 -- unencrypted.pdf r5-owner-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "asdfzxcv" 256 --force-R5 -- unencrypted.pdf r5-owner-password.pdf
         ("r5-owner-password.pdf", True),
-        # created by `qpdf --encrypt "" "" 256 -- unencrypted.pdf r6-empty-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "" 256 -- unencrypted.pdf r6-empty-password.pdf
         ("r6-empty-password.pdf", True),
-        # created by `qpdf --encrypt "asdfzxcv" "" 256 -- unencrypted.pdf r6-user-password.pdf`:
+        # created by:
+        # qpdf --encrypt "asdfzxcv" "" 256 -- unencrypted.pdf r6-user-password.pdf
         ("r6-user-password.pdf", True),
-        # created by `qpdf --encrypt "" "asdfzxcv" 256 -- unencrypted.pdf r6-owner-password.pdf`:
+        # created by:
+        # qpdf --encrypt "" "asdfzxcv" 256 -- unencrypted.pdf r6-owner-password.pdf
         ("r6-owner-password.pdf", True),
     ],
 )
@@ -87,7 +101,8 @@ def test_encryption(name, requres_pycryptodome):
 @pytest.mark.parametrize(
     ("name", "user_passwd", "owner_passwd"),
     [
-        # created by `qpdf --encrypt "foo" "bar" 256 -- unencrypted.pdf r6-both-passwords.pdf`
+        # created by
+        # qpdf --encrypt "foo" "bar" 256 -- unencrypted.pdf r6-both-passwords.pdf
         ("r6-both-passwords.pdf", "foo", "bar"),
     ],
 )
diff --git a/tests/test_filters.py b/tests/test_filters.py
index a514695521..c43b2aa4c3 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -244,7 +244,7 @@ def test_image_without_imagemagic():
         for page in reader.pages:
             with pytest.raises(ImportError) as exc:
                 page.images
-            assert (
-                exc.value.args[0]
-                == "pillow is required to do image extraction. It can be installed via 'pip install pypdf[image]'"
+            assert exc.value.args[0] == (
+                "pillow is required to do image extraction. "
+                "It can be installed via 'pip install pypdf[image]'"
             )
diff --git a/tests/test_generic.py b/tests/test_generic.py
index a59cd3d3e3..acdb3408e8 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -675,7 +675,10 @@ def test_bool_repr(tmp_path):
 @pytest.mark.external
 @patch("pypdf._reader.logger_warning")
 def test_issue_997(mock_logger_warning):
-    url = "https://github.com/py-pdf/pypdf/files/8908874/Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf"
+    url = (
+        "https://github.com/py-pdf/pypdf/files/8908874/"
+        "Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf"
+    )
     name = "gh-issue-997.pdf"
 
     merger = PdfMerger()
@@ -746,6 +749,34 @@ def test_annotation_builder_free_text():
     os.remove(target)  # comment this out for manual inspection
 
 
+def test_annotation_builder_polygon():
+    # Arrange
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(pdf_path)
+    page = reader.pages[0]
+    writer = PdfWriter()
+    writer.add_page(page)
+
+    # Act
+    with pytest.raises(ValueError) as exc:
+        AnnotationBuilder.polygon(
+            vertices=[],
+        )
+    assert exc.value.args[0] == "A polygon needs at least 1 vertex with two coordinates"
+
+    annotation = AnnotationBuilder.polygon(
+        vertices=[(50, 550), (200, 650), (70, 750), (50, 700)],
+    )
+    writer.add_annotation(0, annotation)
+
+    # Assert: You need to inspect the file manually
+    target = "annotated-pdf.pdf"
+    with open(target, "wb") as fp:
+        writer.write(fp)
+
+    os.remove(target)  # comment this out for manual inspection
+
+
 def test_annotation_builder_line():
     # Arrange
     pdf_path = RESOURCE_ROOT / "crazyones.pdf"
@@ -798,6 +829,34 @@ def test_annotation_builder_square():
     os.remove(target)  # comment this out for manual inspection
 
 
+def test_annotation_builder_circle():
+    # Arrange
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(pdf_path)
+    page = reader.pages[0]
+    writer = PdfWriter()
+    writer.add_page(page)
+
+    # Act
+    circle_annotation = AnnotationBuilder.ellipse(
+        rect=(50, 550, 200, 650), interiour_color="ff0000"
+    )
+    writer.add_annotation(0, circle_annotation)
+
+    diameter = 100
+    circle_annotation = AnnotationBuilder.ellipse(
+        rect=(110, 500, 110 + diameter, 500 + diameter),
+    )
+    writer.add_annotation(0, circle_annotation)
+
+    # Assert: You need to inspect the file manually
+    target = "annotated-pdf-circle.pdf"
+    with open(target, "wb") as fp:
+        writer.write(fp)
+
+    os.remove(target)  # comment this out for manual inspection
+
+
 def test_annotation_builder_link():
     # Arrange
     pdf_path = RESOURCE_ROOT / "outline-without-title.pdf"
@@ -814,9 +873,9 @@ def test_annotation_builder_link():
             url="https://martin-thoma.com/",
             target_page_index=3,
         )
-    assert (
-        exc.value.args[0]
-        == "Either 'url' or 'target_page_index' have to be provided. url=https://martin-thoma.com/, target_page_index=3"
+    assert exc.value.args[0] == (
+        "Either 'url' or 'target_page_index' have to be provided. "
+        "url=https://martin-thoma.com/, target_page_index=3"
     )
 
     # Part 2: Too few args
diff --git a/tests/test_merger.py b/tests/test_merger.py
index 2dd43a214c..ee0453c56e 100644
--- a/tests/test_merger.py
+++ b/tests/test_merger.py
@@ -56,9 +56,9 @@ def merger_operate(merger):
     with open(pdf_path, "rb") as fh:
         merger.append(fh)
 
-    merger.write(
-        BytesIO()
-    )  # to force to build outlines and ensur the add_outline_item is at end of the list
+    # to force to build outlines and ensure the add_outline_item is
+    # at end of the list
+    merger.write(BytesIO())
     outline_item = merger.add_outline_item("An outline item", 0)
     oi2 = merger.add_outline_item(
         "deeper", 0, parent=outline_item, italic=True, bold=True
@@ -670,7 +670,10 @@ def test_deprecation_bookmark_decorator_deprecationexcp():
     merger = PdfMerger()
     with pytest.raises(
         DeprecationError,
-        match="import_bookmarks is deprecated as an argument. Use import_outline instead",
+        match=(
+            "import_bookmarks is deprecated as an argument. "
+            "Use import_outline instead"
+        ),
     ):
         merger.merge(0, reader, import_bookmarks=True)
 
@@ -680,7 +683,10 @@ def test_deprecation_bookmark_decorator_deprecationexcp_with_writer():
     merger = PdfWriter()
     with pytest.raises(
         DeprecationError,
-        match="import_bookmarks is deprecated as an argument. Use import_outline instead",
+        match=(
+            "import_bookmarks is deprecated as an argument. "
+            "Use import_outline instead"
+        ),
     ):
         merger.merge(0, reader, import_bookmarks=True)
 
diff --git a/tests/test_page.py b/tests/test_page.py
index b58d42ceb5..1914636a2e 100644
--- a/tests/test_page.py
+++ b/tests/test_page.py
@@ -1,5 +1,6 @@
 import json
 import os
+import random
 from copy import deepcopy
 from io import BytesIO
 from pathlib import Path
@@ -13,6 +14,7 @@
 from pypdf.errors import DeprecationError, PdfReadWarning
 from pypdf.generic import (
     ArrayObject,
+    ContentStream,
     DictionaryObject,
     FloatObject,
     IndirectObject,
@@ -570,7 +572,8 @@ def ignore_large_rectangles(r):
         page_lrs_model, rect_filter=ignore_large_rectangles
     )
 
-    # We see ten rectangles (5 tabs, 5 boxes) but there are 64 rectangles (including some invisible ones).
+    # We see ten rectangles (5 tabs, 5 boxes) but there are 64 rectangles
+    # (including some invisible ones).
     assert len(rectangles) == 60
     rectangle2texts = {}
     for t in texts:
@@ -871,3 +874,170 @@ def test_no_resources():
     page_one = reader.pages[0]
     page_two = reader.pages[0]
     page_one.merge_page(page_two)
+
+
+def test_merge_page_reproducible_with_proc_set():
+    page1 = PageObject.create_blank_page(width=100, height=100)
+    page2 = PageObject.create_blank_page(width=100, height=100)
+
+    ordered = sorted(NameObject(f"/{x}") for x in range(20))
+
+    shuffled = list(ordered)
+    random.shuffle(shuffled)
+
+    # each page has some overlap in their /ProcSet, and they're in a weird order
+    page1[NameObject("/Resources")][NameObject("/ProcSet")] = ArrayObject(shuffled[:15])
+    page2[NameObject("/Resources")][NameObject("/ProcSet")] = ArrayObject(shuffled[5:])
+    page1.merge_page(page2)
+
+    assert page1[NameObject("/Resources")][NameObject("/ProcSet")] == ordered
+
+
+@pytest.mark.parametrize(
+    ("page1", "page2", "expected_result", "expected_renames"),
+    [
+        # simple cases:
+        pytest.param({}, {}, {}, {}, id="no resources"),
+        pytest.param(
+            {"/1": "/v1"},
+            {"/2": "/v2"},
+            {"/1": "/v1", "/2": "/v2"},
+            {},
+            id="no overlap",
+        ),
+        pytest.param(
+            {"/x": "/v"}, {"/x": "/v"}, {"/x": "/v"}, {}, id="overlap, matching values"
+        ),
+        pytest.param(
+            {"/x": "/v1"},
+            {"/x": "/v2"},
+            {"/x": "/v1", "/x-0": "/v2"},
+            {"/x": "/x-0"},
+            id="overlap, different values",
+        ),
+        # carefully crafted names that match the renaming pattern:
+        pytest.param(
+            {"/x": "/v1", "/x-0": "/v1", "/x-1": "/v1"},
+            {"/x": "/v2"},
+            {
+                "/x": "/v1",
+                "/x-0": "/v1",
+                "/x-1": "/v1",
+                "/x-2": "/v2",
+            },
+            {"/x": "/x-2"},
+            id="crafted, different values",
+        ),
+        pytest.param(
+            {"/x": "/v1", "/x-0": "/v1", "/x-1": "/v"},
+            {"/x": "/v"},
+            {"/x": "/v1", "/x-0": "/v1", "/x-1": "/v"},
+            {"/x": "/x-1"},
+            id="crafted, matching value in chain",
+        ),
+        pytest.param(
+            {"/x": "/v1"},
+            {"/x": "/v2.1", "/x-0": "/v2.2"},
+            {"/x": "/v1", "/x-0": "/v2.1", "/x-0-0": "/v2.2"},
+            {"/x": "/x-0", "/x-0": "/x-0-0"},
+            id="crafted, overlaps with previous rename, different value",
+        ),
+        pytest.param(
+            {"/x": "/v1"},
+            {"/x": "/v2", "/x-0": "/v2"},
+            {"/x": "/v1", "/x-0": "/v2"},
+            {"/x": "/x-0"},
+            id="crafted, overlaps with previous rename, matching value",
+        ),
+    ],
+)
+def test_merge_resources(page1, page2, expected_result, expected_renames):
+    # Arrange
+    page1 = DictionaryObject(
+        {
+            PG.RESOURCES: DictionaryObject(
+                {NameObject(k): NameObject(v) for k, v in page1.items()}
+            )
+        }
+    )
+    page2 = DictionaryObject(
+        {
+            PG.RESOURCES: DictionaryObject(
+                {NameObject(k): NameObject(v) for k, v in page2.items()}
+            )
+        }
+    )
+
+    # Act
+    result, renames = PageObject._merge_resources(page1, page2, PG.RESOURCES)
+
+    # Assert
+    assert result == expected_result
+    assert renames == expected_renames
+
+
+def test_merge_page_resources_smoke_test():
+    # Arrange
+    page1 = PageObject.create_blank_page(width=100, height=100)
+    page2 = PageObject.create_blank_page(width=100, height=100)
+
+    NO = NameObject
+
+    # set up some dummy resources that overlap (or not) between the two pages
+    # (note, all the edge cases are tested in test_merge_resources)
+    props1 = page1[NO("/Resources")][NO("/Properties")] = DictionaryObject(
+        {
+            NO("/just1"): NO("/just1-value"),
+            NO("/overlap-matching"): NO("/overlap-matching-value"),
+            NO("/overlap-different"): NO("/overlap-different-value1"),
+        }
+    )
+    props2 = page2[NO("/Resources")][NO("/Properties")] = DictionaryObject(
+        {
+            NO("/just2"): NO("/just2-value"),
+            NO("/overlap-matching"): NO("/overlap-matching-value"),
+            NO("/overlap-different"): NO("/overlap-different-value2"),
+        }
+    )
+    # use these keys for some "operations", to validate renaming
+    # (the operand name doesn't matter)
+    contents1 = page1[NO("/Contents")] = ContentStream(None, None)
+    contents1.operations = [(ArrayObject(props1.keys()), "page1-contents")]
+    contents2 = page2[NO("/Contents")] = ContentStream(None, None)
+    contents2.operations = [(ArrayObject(props2.keys()), "page2-contents")]
+
+    expected_properties = {
+        "/just1": "/just1-value",
+        "/just2": "/just2-value",
+        "/overlap-matching": "/overlap-matching-value",
+        "/overlap-different": "/overlap-different-value1",
+        "/overlap-different-0": "/overlap-different-value2",
+    }
+    expected_operations = [
+        # no renaming
+        (ArrayObject(props1.keys()), b"page1-contents"),
+        # some renaming
+        (
+            ArrayObject(
+                [
+                    NO("/just2"),
+                    NO("/overlap-matching"),
+                    NO("/overlap-different-0"),
+                ]
+            ),
+            b"page2-contents",
+        ),
+    ]
+
+    # Act
+    page1.merge_page(page2)
+
+    # Assert
+    assert page1[NO("/Resources")][NO("/Properties")] == expected_properties
+
+    relevant_operations = [
+        (op, name)
+        for op, name in page1.get_contents().operations
+        if name in (b"page1-contents", b"page2-contents")
+    ]
+    assert relevant_operations == expected_operations
diff --git a/tests/test_reader.py b/tests/test_reader.py
index 710e6c59ca..b8a115877a 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -119,7 +119,10 @@ def test_broken_meta_data(pdf_path):
         reader = PdfReader(f)
         with pytest.raises(
             PdfReadError,
-            match=r"trailer not found or does not point to document information directory",
+            match=(
+                "trailer not found or does not point to document "
+                "information directory"
+            ),
         ):
             reader.metadata
 
@@ -176,6 +179,7 @@ def test_get_outline(src, outline_elements):
     assert len(outline) == outline_elements
 
 
+@pytest.mark.samples
 @pytest.mark.parametrize(
     ("src", "expected_images"),
     [
@@ -615,23 +619,25 @@ def test_get_destination_page_number():
 
 
 def test_do_not_get_stuck_on_large_files_without_start_xref():
-    """Tests for the absence of a DoS bug, where a large file without an startxref mark
-    would cause the library to hang for minutes to hours"""
+    """
+    Tests for the absence of a DoS bug, where a large file without an startxref
+    mark would cause the library to hang for minutes to hours
+    """
     start_time = time.time()
     broken_stream = BytesIO(b"\0" * 5 * 1000 * 1000)
     with pytest.raises(PdfReadError):
         PdfReader(broken_stream)
     parse_duration = time.time() - start_time
-    # parsing is expected take less than a second on a modern cpu, but include a large
-    # tolerance to account for busy or slow systems
+    # parsing is expected take less than a second on a modern cpu, but include
+    # a large tolerance to account for busy or slow systems
     assert parse_duration < 60
 
 
 @pytest.mark.external
 def test_decrypt_when_no_id():
     """
-    Decrypt an encrypted file that's missing the 'ID' value in its
-    trailer.
+    Decrypt an encrypted file that's missing the 'ID' value in its trailer.
+
     https://github.com/py-pdf/pypdf/issues/608
     """
 
@@ -797,7 +803,10 @@ def test_read_path():
 
 def test_read_not_binary_mode(caplog):
     with open(RESOURCE_ROOT / "crazyones.pdf") as f:
-        msg = "PdfReader stream/file object is not in binary mode. It may not be read correctly."
+        msg = (
+            "PdfReader stream/file object is not in binary mode. "
+            "It may not be read correctly."
+        )
         with pytest.raises(io.UnsupportedOperation):
             PdfReader(f)
     assert normalize_warnings(caplog.text) == [msg]
@@ -866,6 +875,7 @@ def test_get_fields():
     assert dict(fields["c1-1"]) == ({"/FT": "/Btn", "/T": "c1-1"})
 
 
+@pytest.mark.external
 def test_get_full_qualified_fields():
     url = "https://github.com/py-pdf/PyPDF2/files/10142389/fields_with_dots.pdf"
     name = "fields_with_dots.pdf"
@@ -1115,7 +1125,10 @@ def test_named_destination():
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     assert len(reader.named_destinations) > 0
     # 2nd case : Dest below names and with Kids...
-    url = "https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf"
+    url = (
+        "https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/"
+        "pdfstandards/pdf/PDF32000_2008.pdf"
+    )
     name = "PDF32000_2008.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     assert len(reader.named_destinations) > 0
@@ -1144,7 +1157,8 @@ def test_outline_with_empty_action():
 
 def test_outline_with_invalid_destinations():
     reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
-    # contains 9 outline items, 6 with invalid destinations caused by different malformations
+    # contains 9 outline items, 6 with invalid destinations
+    # caused by different malformations
     assert len(reader.outline) == 9
 
 
@@ -1208,14 +1222,21 @@ def test_reader(caplog):
 @pytest.mark.external
 def test_zeroing_xref():
     # iss #328
-    url = "https://github.com/py-pdf/pypdf/files/9066120/UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
+    url = (
+        "https://github.com/py-pdf/pypdf/files/9066120/"
+        "UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
+    )
     name = "UTA_OSHA.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     len(reader.pages)
 
 
+@pytest.mark.external
 def test_thread():
-    url = "https://github.com/py-pdf/pypdf/files/9066120/UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
+    url = (
+        "https://github.com/py-pdf/pypdf/files/9066120/"
+        "UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
+    )
     name = "UTA_OSHA.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     assert reader.threads is None
@@ -1226,6 +1247,7 @@ def test_thread():
     assert len(reader.threads) >= 1
 
 
+@pytest.mark.external
 def test_build_outline_item(caplog):
     url = "https://github.com/py-pdf/pypdf/files/9464742/shiv_resume.pdf"
     name = "shiv_resume.pdf"
@@ -1253,6 +1275,7 @@ def test_build_outline_item(caplog):
     assert "Unexpected destination 2" in exc.value.args[0]
 
 
+@pytest.mark.samples
 @pytest.mark.parametrize(
     ("src", "page_labels"),
     [
@@ -1274,3 +1297,11 @@ def test_build_outline_item(caplog):
 def test_page_labels(src, page_labels):
     max_indices = 6
     assert PdfReader(src).page_labels[:max_indices] == page_labels[:max_indices]
+
+
+def test_iss1559():
+    url = "https://github.com/py-pdf/pypdf/files/10441992/default.pdf"
+    name = "iss1559.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
+    for p in reader.pages:
+        p.extract_text()
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 841c8d7122..e447d5d674 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -62,20 +62,11 @@ def test_skip_over_comment(stream, remainder):
     assert stream.read() == remainder
 
 
-def test_read_until_regex_premature_ending_raise():
-    import re
-
-    stream = io.BytesIO(b"")
-    with pytest.raises(PdfStreamError) as exc:
-        read_until_regex(stream, re.compile(b"."))
-    assert exc.value.args[0] == "Stream has ended unexpectedly"
-
-
 def test_read_until_regex_premature_ending_name():
     import re
 
     stream = io.BytesIO(b"")
-    assert read_until_regex(stream, re.compile(b"."), ignore_eof=True) == b""
+    assert read_until_regex(stream, re.compile(b".")) == b""
 
 
 @pytest.mark.parametrize(
@@ -250,7 +241,10 @@ def foo(old_param=1, baz=2):
 @pytest.mark.external
 def test_escapedcode_followed_by_int():
     # iss #1294
-    url = "https://github.com/timedegree/playground_files/raw/main/%E8%AE%BA%E6%96%87/AN%20EXACT%20ANALYTICAL%20SOLUTION%20OF%20KEPLER'S%20EQUATION.pdf"
+    url = (
+        "https://github.com/timedegree/playground_files/raw/main/"
+        "%E8%AE%BA%E6%96%87/AN%20EXACT%20ANALYTICAL%20SOLUTION%20OF%20KEPLER'S%20EQUATION.pdf"
+    )
     name = "keppler.pdf"
 
     reader = PdfReader(io.BytesIO(get_pdf_from_url(url, name=name)))
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index fb8187e70e..0e7e8c381b 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -15,7 +15,8 @@
 
 from pypdf import PdfMerger, PdfReader, PdfWriter
 from pypdf.constants import PageAttributes as PG
-from pypdf.errors import PdfReadWarning
+from pypdf.errors import PdfReadError, PdfReadWarning
+from pypdf.generic import ContentStream, read_object
 
 from . import get_pdf_from_url, normalize_warnings
 
@@ -74,7 +75,7 @@ def test_dropdown_items():
     inputfile = RESOURCE_ROOT / "libreoffice-form.pdf"
     reader = PdfReader(inputfile)
     fields = reader.get_fields()
-    assert "/Opt" in fields["Nationality"].keys()
+    assert "/Opt" in fields["Nationality"]
 
 
 def test_PdfReaderFileLoad():
@@ -99,15 +100,17 @@ def test_PdfReaderFileLoad():
             assert expected_line == actual_line
 
         assert text == pdftext, (
-            "PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n"
-            % (pdftext, text)
+            "PDF extracted text differs from expected value.\n\n"
+            "Expected:\n\n%r\n\nExtracted:\n\n%r\n\n" % (pdftext, text)
         )
 
 
 def test_PdfReaderJpegImage():
     """
-    Test loading and parsing of a file. Extract the image of the file and compare to expected
-    textual output. Expected outcome: file loads, image matches expected.
+    Test loading and parsing of a file. Extract the image of the file and
+    compare to expected textual output.
+
+    Expected outcome: file loads, image matches expected.
     """
 
     with open(RESOURCE_ROOT / "jpeg.pdf", "rb") as inputfile:
@@ -124,7 +127,8 @@ def test_PdfReaderJpegImage():
 
         # Compare the text of the PDF to a known source
         assert binascii.hexlify(data).decode() == imagetext, (
-            "PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n"
+            "PDF extracted image differs from expected value.\n\n"
+            "Expected:\n\n%r\n\nExtracted:\n\n%r\n\n"
             % (imagetext, binascii.hexlify(data).decode())
         )
 
@@ -880,3 +884,36 @@ def test_tounicode_is_identity():
     data = BytesIO(get_pdf_from_url(url, name=name))
     reader = PdfReader(data, strict=False)
     reader.pages[0].extract_text()
+
+
+@pytest.mark.external
+def test_extra_test_iss1541():
+    url = "https://github.com/py-pdf/pypdf/files/10418158/tst_iss1541.pdf"
+    name = "tst_iss1541.pdf"
+    data = BytesIO(get_pdf_from_url(url, name=name))
+    reader = PdfReader(data, strict=False)
+    reader.pages[0].extract_text()
+
+    cs = ContentStream(reader.pages[0]["/Contents"], None, None)
+    cs.operations.insert(-1, ([], b"EMC"))
+    bu = BytesIO()
+    cs.write_to_stream(bu, None)
+    bu.seek(0)
+    ContentStream(read_object(bu, None, None), None, None).operations
+
+    cs = ContentStream(reader.pages[0]["/Contents"], None, None)
+    cs.operations.insert(-1, ([], b"E!C"))
+    bu = BytesIO()
+    cs.write_to_stream(bu, None)
+    bu.seek(0)
+    with pytest.raises(PdfReadError) as exc:
+        ContentStream(read_object(bu, None, None), None, None).operations
+    assert exc.value.args[0] == "Unexpected end of stream"
+
+    buf2 = BytesIO(data.getbuffer())
+    reader = PdfReader(
+        BytesIO(bytes(buf2.getbuffer()).replace(b"EI \n", b"E! \n")), strict=False
+    )
+    with pytest.raises(PdfReadError) as exc:
+        reader.pages[0].extract_text()
+    assert exc.value.args[0] == "Unexpected end of stream"
diff --git a/tests/test_writer.py b/tests/test_writer.py
index 60b4a17de2..df94af6e19 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -33,9 +33,8 @@ def test_writer_exception_non_binary(tmp_path, caplog):
     writer = PdfWriter()
     writer.add_page(reader.pages[0])
 
-    with open(tmp_path / "out.txt", "w") as fp:
-        with pytest.raises(TypeError):
-            writer.write_stream(fp)
+    with open(tmp_path / "out.txt", "w") as fp, pytest.raises(TypeError):
+        writer.write_stream(fp)
     ending = "to write to is not in binary mode. It may not be written to correctly.\n"
     assert caplog.text.endswith(ending)
 
@@ -380,7 +379,8 @@ def test_remove_text_all_operators(ignore_byte_string_object):
         pdf_data.find(b"4 0 obj") + startx_correction,
         pdf_data.find(b"5 0 obj") + startx_correction,
         pdf_data.find(b"6 0 obj") + startx_correction,
-        # startx_correction should be -1 due to double % at the beginning inducing an error on startxref computation
+        # startx_correction should be -1 due to double % at the beginning
+        # inducing an error on startxref computation
         pdf_data.find(b"xref"),
     )
     print(pdf_data.decode())
@@ -930,6 +930,7 @@ def test_startup_dest():
     pdf_file_writer.open_destination = None
 
 
+@pytest.mark.external
 def test_iss471():
     url = "https://github.com/py-pdf/pypdf/files/9139245/book.pdf"
     name = "book_471.pdf"
@@ -942,6 +943,7 @@ def test_iss471():
     )
 
 
+@pytest.mark.external
 def test_reset_translation():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
     name = "tika-924666.pdf"
@@ -977,6 +979,7 @@ def test_threads_empty():
     assert thr == thr2
 
 
+@pytest.mark.external
 def test_append_without_annots_and_articles():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
     name = "tika-924666.pdf"
@@ -993,6 +996,7 @@ def test_append_without_annots_and_articles():
     assert len(writer.threads) >= 1
 
 
+@pytest.mark.external
 def test_append_multiple():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
     name = "tika-924666.pdf"
@@ -1005,3 +1009,144 @@ def test_append_multiple():
     pages = writer._root_object["/Pages"]["/Kids"]
     assert pages[0] not in pages[1:]  # page not repeated
     assert pages[-1] not in pages[0:-1]  # page not repeated
+
+
+@pytest.mark.samples
+def test_set_page_label():
+    src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"  # File without labels
+    target = "pypdf-output.pdf"
+    reader = PdfReader(src)
+
+    expected = [
+        "i",
+        "ii",
+        "1",
+        "2",
+        "A",
+        "B",
+        "1",
+        "2",
+        "3",
+        "4",
+        "A",
+        "i",
+        "I",
+        "II",
+        "1",
+        "2",
+        "3",
+        "I",
+        "II",
+    ]
+
+    # Tests full lenght with labels assigned at first and last elements
+    # Tests different labels assigned to consecutive ranges
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(0, 1, "/r")
+    writer.set_page_label(4, 5, "/A")
+    writer.set_page_label(10, 10, "/A")
+    writer.set_page_label(11, 11, "/r")
+    writer.set_page_label(12, 13, "/R")
+    writer.set_page_label(17, 18, "/R")
+    writer.write(target)
+    assert PdfReader(target).page_labels == expected
+
+    writer = PdfWriter()  # Same labels, different set order
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(17, 18, "/R")
+    writer.set_page_label(4, 5, "/A")
+    writer.set_page_label(10, 10, "/A")
+    writer.set_page_label(0, 1, "/r")
+    writer.set_page_label(12, 13, "/R")
+    writer.set_page_label(11, 11, "/r")
+    writer.write(target)
+    assert PdfReader(target).page_labels == expected
+
+    # Tests labels assigned only in the middle
+    # Tests label assigned to a range already containing labled ranges
+    expected = ["1", "2", "i", "ii", "iii", "iv", "v", "1"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(3, 4, "/a")
+    writer.set_page_label(5, 5, "/A")
+    writer.set_page_label(2, 6, "/r")
+    writer.write(target)
+    assert PdfReader(target).page_labels[: len(expected)] == expected
+
+    # Tests labels assigned inside a previously existing range
+    expected = ["1", "2", "i", "a", "b", "A", "1", "1", "2"]
+    # Ones repeat because user didnt cover the entire original range
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(2, 6, "/r")
+    writer.set_page_label(3, 4, "/a")
+    writer.set_page_label(5, 5, "/A")
+    writer.write(target)
+    assert PdfReader(target).page_labels[: len(expected)] == expected
+
+    # Tests invalid user input
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    with pytest.raises(
+        ValueError, match="at least one between style and prefix must be given"
+    ):
+        writer.set_page_label(0, 5, start=2)
+    with pytest.raises(
+        ValueError, match="page_index_from must be equal or greater then 0"
+    ):
+        writer.set_page_label(-1, 5, "/r")
+    with pytest.raises(
+        ValueError, match="page_index_to must be equal or greater then page_index_from"
+    ):
+        writer.set_page_label(5, 0, "/r")
+    with pytest.raises(ValueError, match="page_index_to exceeds number of pages"):
+        writer.set_page_label(0, 19, "/r")
+    with pytest.raises(
+        ValueError, match="if given, start must be equal or greater than one"
+    ):
+        writer.set_page_label(0, 5, "/r", start=-1)
+
+    os.remove(target)
+
+    src = (
+        SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
+    )  # File with pre existing labels
+    target = "pypdf-output.pdf"
+    reader = PdfReader(src)
+
+    # Tests adding labels to existing ones
+    expected = ["i", "ii", "A", "B", "1"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(2, 3, "/A")
+    writer.write(target)
+    assert PdfReader(target).page_labels[: len(expected)] == expected
+
+    # Tests replacing existing lables
+    expected = ["A", "B", "1", "1", "2"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(0, 1, "/A")
+    writer.write(target)
+    assert PdfReader(target).page_labels[: len(expected)] == expected
+
+    os.remove(target)
+
+    # Tests prefix and start.
+    src = RESOURCE_ROOT / "issue-604.pdf"  # File without page labels
+    target = "page_labels_test.pdf"
+    reader = PdfReader(src)
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+
+    writer.set_page_label(0, 0, prefix="FRONT")
+    writer.set_page_label(1, 2, "/D", start=2)
+    writer.set_page_label(3, 6, prefix="UPDATES")
+    writer.set_page_label(7, 10, "/D", prefix="THYR-")
+    writer.set_page_label(11, 21, "/D", prefix="PAP-")
+    writer.set_page_label(22, 30, "/D", prefix="FOLL-")
+    writer.set_page_label(31, 39, "/D", prefix="HURT-")
+    writer.write(target)
+
+    os.remove(target)  # comment to see result