notofonts · rsheeter · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/nototools/unicode_data.py b/nototools/unicode_data.py
@@ -43,7 +43,7 @@
 from nototools import tool_utils  # parse_int_ranges
 
 # Update this when we update the base version data we use
-UNICODE_VERSION = 14.0
+UNICODE_VERSION = 17.0
 
 _data_is_loaded = False
 _property_value_aliases_data = {}
@@ -935,53 +935,6 @@ def _read_emoji_test_data(data_string):
 fe82b ; fully-qualified # ? unknown flag PUA codepoint
 """
 
-# These are skin tone sequences that Unicode decided not to define.  Android
-# shipped with them, so we're stuck with them forever regardless of what
-# Unicode says.
-#
-# This data is in the format of emoji-sequences.txt and emoji-zwj-sequences.txt
-_LEGACY_ANDROID_SEQUENCES = """
-1F93C 1F3FB                ; Emoji_Modifier_Sequence ; people wrestling: light skin tone # 9.0
-1F93C 1F3FC                ; Emoji_Modifier_Sequence ; people wrestling: medium-light skin tone # 9.0
-1F93C 1F3FD                ; Emoji_Modifier_Sequence ; people wrestling: medium skin tone # 9.0
-1F93C 1F3FE                ; Emoji_Modifier_Sequence ; people wrestling: medium-dark skin tone # 9.0
-1F93C 1F3FF                ; Emoji_Modifier_Sequence ; people wrestling: dark skin tone # 9.0
-1F93C 1F3FB 200D 2642 FE0F ; Emoji_ZWJ_Sequence ; men wrestling: light skin tone # 9.0
-1F93C 1F3FC 200D 2642 FE0F ; Emoji_ZWJ_Sequence ; men wrestling: medium-light skin tone # 9.0
-1F93C 1F3FD 200D 2642 FE0F ; Emoji_ZWJ_Sequence ; men wrestling: medium skin tone # 9.0
-1F93C 1F3FE 200D 2642 FE0F ; Emoji_ZWJ_Sequence ; men wrestling: medium-dark skin tone # 9.0
-1F93C 1F3FF 200D 2642 FE0F ; Emoji_ZWJ_Sequence ; men wrestling: dark skin tone # 9.0
-1F93C 1F3FB 200D 2640 FE0F ; Emoji_ZWJ_Sequence ; women wrestling: light skin tone # 9.0
-1F93C 1F3FC 200D 2640 FE0F ; Emoji_ZWJ_Sequence ; women wrestling: medium-light skin tone # 9.0
-1F93C 1F3FD 200D 2640 FE0F ; Emoji_ZWJ_Sequence ; women wrestling: medium skin tone # 9.0
-1F93C 1F3FE 200D 2640 FE0F ; Emoji_ZWJ_Sequence ; women wrestling: medium-dark skin tone # 9.0
-1F93C 1F3FF 200D 2640 FE0F ; Emoji_ZWJ_Sequence ; women wrestling: dark skin tone # 9.0
-"""
-
-# Defines how to insert the new sequences into the standard order data.  Would
-# have been nice to merge it into the above legacy data but that would have
-# required a format change.
-_LEGACY_ANDROID_ORDER = """
--1F93C  # people wrestling
-1F93C 1F3FB
-1F93C 1F3FC
-1F93C 1F3FD
-1F93C 1F3FE
-1F93C 1F3FF
--1F93C 200D 2642 FE0F  # men wrestling
-1F93C 1F3FB 200D 2642 FE0F
-1F93C 1F3FC 200D 2642 FE0F
-1F93C 1F3FD 200D 2642 FE0F
-1F93C 1F3FE 200D 2642 FE0F
-1F93C 1F3FF 200D 2642 FE0F
--1F93C 200D 2640 FE0F  # women wrestling
-1F93C 1F3FB 200D 2640 FE0F
-1F93C 1F3FC 200D 2640 FE0F
-1F93C 1F3FD 200D 2640 FE0F
-1F93C 1F3FE 200D 2640 FE0F
-1F93C 1F3FF 200D 2640 FE0F
-"""
-
 
 def _get_order_patch(order_text, seq_to_name):
     """Create a mapping from a key sequence to a list of sequence, name tuples.
@@ -1018,47 +971,6 @@ def get_sequence(seqtext):
     return patch_map
 
 
-def _get_android_order_patch():
-    """Get an order patch using the legacy android data."""
-
-    # maps from sequence to (name, age, type), we only need the name
-    seq_data = _read_emoji_data(_LEGACY_ANDROID_SEQUENCES.splitlines())
-    seq_to_name = {k: v[0] for k, v in seq_data.items()}
-    return _get_order_patch(_LEGACY_ANDROID_ORDER, seq_to_name)
-
-
-def _apply_order_patch(patch, group_list):
-    """patch is a map from a key sequence to list of sequence, name pairs, and
-    group_list is an ordered list of sequence, group, subgroup, name tuples.
-    Iterate through the group list appending each item to a new list, and
-    after appending an item matching a key sequence, also append all of its
-    associated sequences in order using the same group and subgroup.
-    Return the new list.  If there are any unused patches, raise an exception."""
-
-    result = []
-    patched = set()
-    for t in group_list:
-        result.append(t)
-        if t[0] in patch:
-            patched.add(t[0])
-            _, group, subgroup, _ = t
-            for seq, name in patch[t[0]]:
-                result.append((seq, group, subgroup, name))
-
-    unused = set(patch.keys()) - patched
-    if unused:
-        raise Exception(
-            "%d unused patch%s\n  %s: "
-            % (
-                len(unused),
-                "" if len(unused) == 1 else "es",
-                "\n  ".join(seq_to_string(seq) for seq in sorted(unused)),
-            )
-        )
-
-    return result
-
-
 def _load_emoji_group_data():
     global _emoji_group_data
     if _emoji_group_data:
@@ -1070,10 +982,6 @@ def _load_emoji_group_data():
         text = f.read()
     group_list = _read_emoji_test_data(text)
 
-    # patch with android items
-    patch = _get_android_order_patch()
-    group_list = _apply_order_patch(patch, group_list)
-
     group_list.extend(_read_emoji_test_data(_SUPPLEMENTAL_EMOJI_GROUP_DATA))
     for i, (seq, group, subgroup, name) in enumerate(group_list):
         if seq in _emoji_group_data:
@@ -1084,7 +992,7 @@ def _load_emoji_group_data():
             print("    new value would be %s" % str((i, group, subgroup, name)))
         _emoji_group_data[seq] = (i, group, subgroup, name)
 
-    assert len(group_list) == len(_emoji_group_data)
+    assert len(group_list) == len(_emoji_group_data), f"{len(group_list)} != {len(_emoji_group_data)}"
 
 
 def get_emoji_group_data(seq):
@@ -1168,7 +1076,6 @@ def add_data(data):
 
     for datafile in ["emoji-zwj-sequences.txt", "emoji-sequences.txt"]:
         add_data(_read_emoji_data_file(datafile))
-    add_data(_read_emoji_data(_LEGACY_ANDROID_SEQUENCES.splitlines()))
 
     _load_unicode_data_txt()  # ensure character_names_data is populated
     _load_emoji_data()  # ensure presentation_default_text is populated

diff --git a/tests/unicode_data_test.py b/tests/unicode_data_test.py
@@ -59,7 +59,8 @@ def test_defined(self):
         """Tests the is_defined() method."""
         self.assertTrue(unicode_data.is_defined(0x20BD))
         self.assertFalse(unicode_data.is_defined(0xFDD0))
-        self.assertFalse(unicode_data.is_defined(0x088F))
+        # This sometimes snaps when new codepoints are assigned
+        self.assertFalse(unicode_data.is_defined(0x0892))
         # CJK ranges
         self.assertTrue(unicode_data.is_defined(0x3400))
         self.assertTrue(unicode_data.is_defined(0x4DB5))

diff --git a/third_party/ucd/BidiMirroring.txt b/third_party/ucd/BidiMirroring.txt
@@ -1,6 +1,6 @@
-# BidiMirroring-16.0.0.txt
-# Date: 2024-01-30
-# © 2024 Unicode®, Inc.
+# BidiMirroring-17.0.0.txt
+# Date: 2025-08-01
+# © 2025 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
@@ -16,7 +16,7 @@
 # value, for which there is another Unicode character that typically has a glyph
 # that is the mirror image of the original character's glyph.
 #
-# The repertoire covered by the file is Unicode 16.0.0.
+# The repertoire covered by the file is Unicode 17.0.0.
 #
 # The file contains a list of lines with mappings from one code point
 # to another one for character-based mirroring.

diff --git a/third_party/ucd/Blocks.txt b/third_party/ucd/Blocks.txt
@@ -1,6 +1,6 @@
-# Blocks-16.0.0.txt
-# Date: 2024-02-02
-# © 2024 Unicode®, Inc.
+# Blocks-17.0.0.txt
+# Date: 2025-08-01
+# © 2025 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
@@ -228,6 +228,7 @@ FFF0..FFFF; Specials
 108E0..108FF; Hatran
 10900..1091F; Phoenician
 10920..1093F; Lydian
+10940..1095F; Sidetic
 10980..1099F; Meroitic Hieroglyphs
 109A0..109FF; Meroitic Cursive
 10A00..10A5F; Kharoshthi
@@ -279,11 +280,13 @@ FFF0..FFFF; Specials
 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
 11AC0..11AFF; Pau Cin Hau
 11B00..11B5F; Devanagari Extended-A
+11B60..11B7F; Sharada Supplement
 11BC0..11BFF; Sunuwar
 11C00..11C6F; Bhaiksuki
 11C70..11CBF; Marchen
 11D00..11D5F; Masaram Gondi
 11D60..11DAF; Gunjala Gondi
+11DB0..11DEF; Tolong Siki
 11EE0..11EFF; Makasar
 11F00..11F5F; Kawi
 11FB0..11FBF; Lisu Supplement
@@ -304,12 +307,14 @@ FFF0..FFFF; Specials
 16B00..16B8F; Pahawh Hmong
 16D40..16D7F; Kirat Rai
 16E40..16E9F; Medefaidrin
+16EA0..16EDF; Beria Erfe
 16F00..16F9F; Miao
 16FE0..16FFF; Ideographic Symbols and Punctuation
 17000..187FF; Tangut
 18800..18AFF; Tangut Components
 18B00..18CFF; Khitan Small Script
 18D00..18D7F; Tangut Supplement
+18D80..18DFF; Tangut Components Supplement
 1AFF0..1AFFF; Kana Extended-B
 1B000..1B0FF; Kana Supplement
 1B100..1B12F; Kana Extended-A
@@ -318,6 +323,7 @@ FFF0..FFFF; Specials
 1BC00..1BC9F; Duployan
 1BCA0..1BCAF; Shorthand Format Controls
 1CC00..1CEBF; Symbols for Legacy Computing Supplement
+1CEC0..1CEFF; Miscellaneous Symbols Supplement
 1CF00..1CFCF; Znamenny Musical Notation
 1D000..1D0FF; Byzantine Musical Symbols
 1D100..1D1FF; Musical Symbols
@@ -336,6 +342,7 @@ FFF0..FFFF; Specials
 1E2C0..1E2FF; Wancho
 1E4D0..1E4FF; Nag Mundari
 1E5D0..1E5FF; Ol Onal
+1E6C0..1E6FF; Tai Yo
 1E7E0..1E7FF; Ethiopic Extended-B
 1E800..1E8DF; Mende Kikakui
 1E900..1E95F; Adlam
@@ -367,6 +374,7 @@ FFF0..FFFF; Specials
 2F800..2FA1F; CJK Compatibility Ideographs Supplement
 30000..3134F; CJK Unified Ideographs Extension G
 31350..323AF; CJK Unified Ideographs Extension H
+323B0..3347F; CJK Unified Ideographs Extension J
 E0000..E007F; Tags
 E0100..E01EF; Variation Selectors Supplement
 F0000..FFFFF; Supplementary Private Use Area-A

diff --git a/third_party/ucd/DerivedAge.txt b/third_party/ucd/DerivedAge.txt
@@ -1,6 +1,6 @@
-# DerivedAge-16.0.0.txt
-# Date: 2024-04-30, 21:48:12 GMT
-# © 2024 Unicode®, Inc.
+# DerivedAge-17.0.0.txt
+# Date: 2025-07-30, 23:54:38 GMT
+# © 2025 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
@@ -15,7 +15,8 @@
 # - The term 'assigned' means that a previously reserved code point was assigned
 #   to be a character (graphic, format, control, or private-use);
 #   a noncharacter code point; or a surrogate code point.
-#   For more information, see The Unicode Standard Section 2.4
+#   For more information, see the
+#   "General Structure" / "Code Points and Characters" section of the core specification.
 #
 # - Versions are only tracked from 1.1 onwards, since version 1.0
 #   predated changes required by the ISO 10646 merger.
@@ -2059,4 +2060,60 @@ A7DA..A7DC    ; 16.0 #   [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L
 
 # Total code points: 5185
 
+# ================================================
+
+# Age=V17_0
+
+# Newly assigned in Unicode 17.0.0 (September, 2025)
+
+088F          ; 17.0 #       ARABIC LETTER NOON WITH RING ABOVE
+0C5C          ; 17.0 #       TELUGU ARCHAIC SHRII
+0CDC          ; 17.0 #       KANNADA ARCHAIC SHRII
+1ACF..1ADD    ; 17.0 #  [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW
+1AE0..1AEB    ; 17.0 #  [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
+20C1          ; 17.0 #       SAUDI RIYAL SIGN
+2B96          ; 17.0 #       EQUALS SIGN WITH INFINITY ABOVE
+A7CE..A7CF    ; 17.0 #   [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE
+A7D2          ; 17.0 #       LATIN CAPITAL LETTER DOUBLE THORN
+A7D4          ; 17.0 #       LATIN CAPITAL LETTER DOUBLE WYNN
+A7F1          ; 17.0 #       MODIFIER LETTER CAPITAL S
+FBC3..FBD2    ; 17.0 #  [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH
+FD90..FD91    ; 17.0 #   [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA
+FDC8..FDCE    ; 17.0 #   [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH
+10940..10959  ; 17.0 #  [26] SIDETIC LETTER N01..SIDETIC LETTER N26
+10EC5..10EC7  ; 17.0 #   [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW
+10ED0..10ED8  ; 17.0 #   [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH
+10EFA..10EFB  ; 17.0 #   [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON
+11B60..11B67  ; 17.0 #   [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O
+11DB0..11DDB  ; 17.0 #  [44] TOLONG SIKI LETTER I..TOLONG SIKI UNGGA
+11DE0..11DE9  ; 17.0 #  [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE
+16EA0..16EB8  ; 17.0 #  [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY
+16EBB..16ED3  ; 17.0 #  [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY
+16FF2..16FF6  ; 17.0 #   [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS
+187F8..187FF  ; 17.0 #   [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF
+18D09..18D1E  ; 17.0 #  [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E
+18D80..18DF2  ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883
+1CCFA..1CCFC  ; 17.0 #   [3] SNAKE SYMBOL..NOSE SYMBOL
+1CEBA..1CED0  ; 17.0 #  [23] FRAGILE SYMBOL..LEUKOTHEA
+1CEE0..1CEF0  ; 17.0 #  [17] GEOMANTIC FIGURE POPULUS..MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR
+1E6C0..1E6DE  ; 17.0 #  [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO
+1E6E0..1E6F5  ; 17.0 #  [22] TAI YO LETTER AA..TAI YO SIGN OM
+1E6FE..1E6FF  ; 17.0 #   [2] TAI YO SYMBOL MUEANG..TAI YO XAM LAI
+1F6D8         ; 17.0 #       LANDSLIDE
+1F777..1F77A  ; 17.0 #   [4] VESTA FORM TWO..PARTHENOPE FORM TWO
+1F8D0..1F8D8  ; 17.0 #   [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE
+1FA54..1FA57  ; 17.0 #   [4] WHITE CHESS FERZ..BLACK CHESS ALFIL
+1FA8A         ; 17.0 #       TROMBONE
+1FA8E         ; 17.0 #       TREASURE CHEST
+1FAC8         ; 17.0 #       HAIRY CREATURE
+1FACD         ; 17.0 #       ORCA
+1FAEA         ; 17.0 #       DISTORTED FACE
+1FAEF         ; 17.0 #       FIGHT CLOUD
+1FBFA         ; 17.0 #       ALARM BELL SYMBOL
+2B73A..2B73F  ; 17.0 #   [6] CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73F
+2CEA2..2CEAD  ; 17.0 #  [12] CJK UNIFIED IDEOGRAPH-2CEA2..CJK UNIFIED IDEOGRAPH-2CEAD
+323B0..33479  ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479
+
+# Total code points: 4803
+
 # EOF