forked from python/cpython
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathunicodectype.c
More file actions
10361 lines (10315 loc) · 469 KB
/
unicodectype.c
File metadata and controls
10361 lines (10315 loc) · 469 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Unicode character type helpers.
The data contained in the function's switch tables was extracted
from the Unicode 3.0 data file.
Written by Marc-Andre Lemburg (mal@lemburg.com).
Copyright (c) Corporation for National Research Initiatives.
*/
#include "Python.h"
#include "unicodeobject.h"
#if defined(macintosh) || defined(MS_WIN64)
/*XXX This was required to avoid a compiler error for an early Win64
* cross-compiler that was used for the port to Win64. When the platform is
* released the MS_WIN64 inclusion here should no longer be necessary.
*/
/* This probably needs to be defined for some other compilers too. It breaks the
** 5000-label switch statement up into switches with around 1000 cases each.
*/
#define BREAK_SWITCH_UP return 1; } switch (ch) {
#else
#define BREAK_SWITCH_UP /* nothing */
#endif
/* Returns 1 for Unicode characters having the category 'Zl' or type
'B', 0 otherwise. */
int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
{
switch (ch) {
case 0x000A: /* LINE FEED */
case 0x000D: /* CARRIAGE RETURN */
case 0x001C: /* FILE SEPARATOR */
case 0x001D: /* GROUP SEPARATOR */
case 0x001E: /* RECORD SEPARATOR */
case 0x0085: /* NEXT LINE */
case 0x2028: /* LINE SEPARATOR */
case 0x2029: /* PARAGRAPH SEPARATOR */
return 1;
default:
return 0;
}
}
/* Returns the titlecase Unicode characters corresponding to ch or just
ch if no titlecase mapping is known. */
Py_UNICODE _PyUnicode_ToTitlecase(register const Py_UNICODE ch)
{
switch (ch) {
case 0x01C4: /* LATIN CAPITAL LETTER DZ WITH CARON */
return (Py_UNICODE)0x01C5;
case 0x01C6: /* LATIN SMALL LETTER DZ WITH CARON */
return (Py_UNICODE)0x01C5;
case 0x01C7: /* LATIN CAPITAL LETTER LJ */
return (Py_UNICODE)0x01C8;
case 0x01C9: /* LATIN SMALL LETTER LJ */
return (Py_UNICODE)0x01C8;
case 0x01CA: /* LATIN CAPITAL LETTER NJ */
return (Py_UNICODE)0x01CB;
case 0x01CC: /* LATIN SMALL LETTER NJ */
return (Py_UNICODE)0x01CB;
case 0x01F1: /* LATIN CAPITAL LETTER DZ */
return (Py_UNICODE)0x01F2;
case 0x01F3: /* LATIN SMALL LETTER DZ */
return (Py_UNICODE)0x01F2;
default:
return Py_UNICODE_TOUPPER(ch);
}
}
/* Returns 1 for Unicode characters having the category 'Lt', 0
otherwise. */
int _PyUnicode_IsTitlecase(register const Py_UNICODE ch)
{
switch (ch) {
case 0x01C5: /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON */
case 0x01C8: /* LATIN CAPITAL LETTER L WITH SMALL LETTER J */
case 0x01CB: /* LATIN CAPITAL LETTER N WITH SMALL LETTER J */
case 0x01F2: /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z */
case 0x1F88: /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI */
case 0x1F89: /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI */
case 0x1F8A: /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI */
case 0x1F8B: /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI */
case 0x1F8C: /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI */
case 0x1F8D: /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI */
case 0x1F8E: /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI */
case 0x1F8F: /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */
case 0x1F98: /* GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI */
case 0x1F99: /* GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI */
case 0x1F9A: /* GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI */
case 0x1F9B: /* GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI */
case 0x1F9C: /* GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI */
case 0x1F9D: /* GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI */
case 0x1F9E: /* GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI */
case 0x1F9F: /* GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */
case 0x1FA8: /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI */
case 0x1FA9: /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI */
case 0x1FAA: /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI */
case 0x1FAB: /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI */
case 0x1FAC: /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI */
case 0x1FAD: /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI */
case 0x1FAE: /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI */
case 0x1FAF: /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */
case 0x1FBC: /* GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI */
case 0x1FCC: /* GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI */
case 0x1FFC: /* GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI */
return 1;
default:
return 0;
}
}
/* Returns the integer decimal (0-9) for Unicode characters having
this property, -1 otherwise. */
int _PyUnicode_ToDecimalDigit(register const Py_UNICODE ch)
{
switch (ch) {
case 0x0030:
case 0x0660:
case 0x06F0:
case 0x0966:
case 0x09E6:
case 0x0A66:
case 0x0AE6:
case 0x0B66:
case 0x0C66:
case 0x0CE6:
case 0x0D66:
case 0x0E50:
case 0x0ED0:
case 0x0F20:
case 0x1040:
case 0x17E0:
case 0x1810:
case 0x2070:
case 0x2080:
case 0xFF10:
return 0;
case 0x0031:
case 0x00B9:
case 0x0661:
case 0x06F1:
case 0x0967:
case 0x09E7:
case 0x0A67:
case 0x0AE7:
case 0x0B67:
case 0x0BE7:
case 0x0C67:
case 0x0CE7:
case 0x0D67:
case 0x0E51:
case 0x0ED1:
case 0x0F21:
case 0x1041:
case 0x1369:
case 0x17E1:
case 0x1811:
case 0x2081:
case 0xFF11:
return 1;
case 0x0032:
case 0x00B2:
case 0x0662:
case 0x06F2:
case 0x0968:
case 0x09E8:
case 0x0A68:
case 0x0AE8:
case 0x0B68:
case 0x0BE8:
case 0x0C68:
case 0x0CE8:
case 0x0D68:
case 0x0E52:
case 0x0ED2:
case 0x0F22:
case 0x1042:
case 0x136A:
case 0x17E2:
case 0x1812:
case 0x2082:
case 0xFF12:
return 2;
case 0x0033:
case 0x00B3:
case 0x0663:
case 0x06F3:
case 0x0969:
case 0x09E9:
case 0x0A69:
case 0x0AE9:
case 0x0B69:
case 0x0BE9:
case 0x0C69:
case 0x0CE9:
case 0x0D69:
case 0x0E53:
case 0x0ED3:
case 0x0F23:
case 0x1043:
case 0x136B:
case 0x17E3:
case 0x1813:
case 0x2083:
case 0xFF13:
return 3;
case 0x0034:
case 0x0664:
case 0x06F4:
case 0x096A:
case 0x09EA:
case 0x0A6A:
case 0x0AEA:
case 0x0B6A:
case 0x0BEA:
case 0x0C6A:
case 0x0CEA:
case 0x0D6A:
case 0x0E54:
case 0x0ED4:
case 0x0F24:
case 0x1044:
case 0x136C:
case 0x17E4:
case 0x1814:
case 0x2074:
case 0x2084:
case 0xFF14:
return 4;
case 0x0035:
case 0x0665:
case 0x06F5:
case 0x096B:
case 0x09EB:
case 0x0A6B:
case 0x0AEB:
case 0x0B6B:
case 0x0BEB:
case 0x0C6B:
case 0x0CEB:
case 0x0D6B:
case 0x0E55:
case 0x0ED5:
case 0x0F25:
case 0x1045:
case 0x136D:
case 0x17E5:
case 0x1815:
case 0x2075:
case 0x2085:
case 0xFF15:
return 5;
case 0x0036:
case 0x0666:
case 0x06F6:
case 0x096C:
case 0x09EC:
case 0x0A6C:
case 0x0AEC:
case 0x0B6C:
case 0x0BEC:
case 0x0C6C:
case 0x0CEC:
case 0x0D6C:
case 0x0E56:
case 0x0ED6:
case 0x0F26:
case 0x1046:
case 0x136E:
case 0x17E6:
case 0x1816:
case 0x2076:
case 0x2086:
case 0xFF16:
return 6;
case 0x0037:
case 0x0667:
case 0x06F7:
case 0x096D:
case 0x09ED:
case 0x0A6D:
case 0x0AED:
case 0x0B6D:
case 0x0BED:
case 0x0C6D:
case 0x0CED:
case 0x0D6D:
case 0x0E57:
case 0x0ED7:
case 0x0F27:
case 0x1047:
case 0x136F:
case 0x17E7:
case 0x1817:
case 0x2077:
case 0x2087:
case 0xFF17:
return 7;
case 0x0038:
case 0x0668:
case 0x06F8:
case 0x096E:
case 0x09EE:
case 0x0A6E:
case 0x0AEE:
case 0x0B6E:
case 0x0BEE:
case 0x0C6E:
case 0x0CEE:
case 0x0D6E:
case 0x0E58:
case 0x0ED8:
case 0x0F28:
case 0x1048:
case 0x1370:
case 0x17E8:
case 0x1818:
case 0x2078:
case 0x2088:
case 0xFF18:
return 8;
case 0x0039:
case 0x0669:
case 0x06F9:
case 0x096F:
case 0x09EF:
case 0x0A6F:
case 0x0AEF:
case 0x0B6F:
case 0x0BEF:
case 0x0C6F:
case 0x0CEF:
case 0x0D6F:
case 0x0E59:
case 0x0ED9:
case 0x0F29:
case 0x1049:
case 0x1371:
case 0x17E9:
case 0x1819:
case 0x2079:
case 0x2089:
case 0xFF19:
return 9;
default:
return -1;
}
}
int _PyUnicode_IsDecimalDigit(register const Py_UNICODE ch)
{
if (_PyUnicode_ToDecimalDigit(ch) < 0)
return 0;
return 1;
}
/* Returns the integer digit (0-9) for Unicode characters having
this property, -1 otherwise. */
int _PyUnicode_ToDigit(register const Py_UNICODE ch)
{
switch (ch) {
case 0x24EA:
return 0;
case 0x2460:
case 0x2474:
case 0x2488:
case 0x2776:
case 0x2780:
case 0x278A:
return 1;
case 0x2461:
case 0x2475:
case 0x2489:
case 0x2777:
case 0x2781:
case 0x278B:
return 2;
case 0x2462:
case 0x2476:
case 0x248A:
case 0x2778:
case 0x2782:
case 0x278C:
return 3;
case 0x2463:
case 0x2477:
case 0x248B:
case 0x2779:
case 0x2783:
case 0x278D:
return 4;
case 0x2464:
case 0x2478:
case 0x248C:
case 0x277A:
case 0x2784:
case 0x278E:
return 5;
case 0x2465:
case 0x2479:
case 0x248D:
case 0x277B:
case 0x2785:
case 0x278F:
return 6;
case 0x2466:
case 0x247A:
case 0x248E:
case 0x277C:
case 0x2786:
case 0x2790:
return 7;
case 0x2467:
case 0x247B:
case 0x248F:
case 0x277D:
case 0x2787:
case 0x2791:
return 8;
case 0x2468:
case 0x247C:
case 0x2490:
case 0x277E:
case 0x2788:
case 0x2792:
return 9;
default:
return _PyUnicode_ToDecimalDigit(ch);
}
}
int _PyUnicode_IsDigit(register const Py_UNICODE ch)
{
if (_PyUnicode_ToDigit(ch) < 0)
return 0;
return 1;
}
/* Returns the numeric value as double for Unicode characters having
this property, -1.0 otherwise. */
double _PyUnicode_ToNumeric(register const Py_UNICODE ch)
{
switch (ch) {
case 0x3007:
return (double) 0;
case 0x09F4:
case 0x215F:
case 0x2160:
case 0x2170:
case 0x3021:
case 0x3280:
return (double) 1;
case 0x00BD:
return (double) 1 / 2;
case 0x2153:
return (double) 1 / 3;
case 0x00BC:
return (double) 1 / 4;
case 0x2155:
return (double) 1 / 5;
case 0x2159:
return (double) 1 / 6;
case 0x215B:
return (double) 1 / 8;
case 0x0BF0:
case 0x1372:
case 0x2169:
case 0x2179:
case 0x2469:
case 0x247D:
case 0x2491:
case 0x277F:
case 0x2789:
case 0x2793:
case 0x3038:
case 0x3289:
return (double) 10;
case 0x0BF1:
case 0x137B:
case 0x216D:
case 0x217D:
return (double) 100;
case 0x0BF2:
case 0x216F:
case 0x217F:
case 0x2180:
return (double) 1000;
case 0x137C:
case 0x2182:
return (double) 10000;
case 0x216A:
case 0x217A:
case 0x246A:
case 0x247E:
case 0x2492:
return (double) 11;
case 0x216B:
case 0x217B:
case 0x246B:
case 0x247F:
case 0x2493:
return (double) 12;
case 0x246C:
case 0x2480:
case 0x2494:
return (double) 13;
case 0x246D:
case 0x2481:
case 0x2495:
return (double) 14;
case 0x246E:
case 0x2482:
case 0x2496:
return (double) 15;
case 0x09F9:
case 0x246F:
case 0x2483:
case 0x2497:
return (double) 16;
case 0x16EE:
case 0x2470:
case 0x2484:
case 0x2498:
return (double) 17;
case 0x16EF:
case 0x2471:
case 0x2485:
case 0x2499:
return (double) 18;
case 0x16F0:
case 0x2472:
case 0x2486:
case 0x249A:
return (double) 19;
case 0x09F5:
case 0x2161:
case 0x2171:
case 0x3022:
case 0x3281:
return (double) 2;
case 0x2154:
return (double) 2 / 3;
case 0x2156:
return (double) 2 / 5;
case 0x1373:
case 0x2473:
case 0x2487:
case 0x249B:
case 0x3039:
return (double) 20;
case 0x09F6:
case 0x2162:
case 0x2172:
case 0x3023:
case 0x3282:
return (double) 3;
case 0x00BE:
return (double) 3 / 4;
case 0x2157:
return (double) 3 / 5;
case 0x215C:
return (double) 3 / 8;
case 0x1374:
case 0x303A:
return (double) 30;
case 0x09F7:
case 0x2163:
case 0x2173:
case 0x3024:
case 0x3283:
return (double) 4;
case 0x2158:
return (double) 4 / 5;
case 0x1375:
return (double) 40;
case 0x2164:
case 0x2174:
case 0x3025:
case 0x3284:
return (double) 5;
case 0x215A:
return (double) 5 / 6;
case 0x215D:
return (double) 5 / 8;
case 0x1376:
case 0x216C:
case 0x217C:
return (double) 50;
case 0x216E:
case 0x217E:
return (double) 500;
case 0x2181:
return (double) 5000;
case 0x2165:
case 0x2175:
case 0x3026:
case 0x3285:
return (double) 6;
case 0x1377:
return (double) 60;
case 0x2166:
case 0x2176:
case 0x3027:
case 0x3286:
return (double) 7;
case 0x215E:
return (double) 7 / 8;
case 0x1378:
return (double) 70;
case 0x2167:
case 0x2177:
case 0x3028:
case 0x3287:
return (double) 8;
case 0x1379:
return (double) 80;
case 0x2168:
case 0x2178:
case 0x3029:
case 0x3288:
return (double) 9;
case 0x137A:
return (double) 90;
default:
return (double) _PyUnicode_ToDigit(ch);
}
}
int _PyUnicode_IsNumeric(register const Py_UNICODE ch)
{
if (_PyUnicode_ToNumeric(ch) < 0.0)
return 0;
return 1;
}
#ifndef WANT_WCTYPE_FUNCTIONS
/* Returns 1 for Unicode characters having the bidirectional type
'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
{
switch (ch) {
case 0x0009: /* HORIZONTAL TABULATION */
case 0x000A: /* LINE FEED */
case 0x000B: /* VERTICAL TABULATION */
case 0x000C: /* FORM FEED */
case 0x000D: /* CARRIAGE RETURN */
case 0x001C: /* FILE SEPARATOR */
case 0x001D: /* GROUP SEPARATOR */
case 0x001E: /* RECORD SEPARATOR */
case 0x001F: /* UNIT SEPARATOR */
case 0x0020: /* SPACE */
case 0x0085: /* NEXT LINE */
case 0x00A0: /* NO-BREAK SPACE */
case 0x1680: /* OGHAM SPACE MARK */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x200B: /* ZERO WIDTH SPACE */
case 0x2028: /* LINE SEPARATOR */
case 0x2029: /* PARAGRAPH SEPARATOR */
case 0x202F: /* NARROW NO-BREAK SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
return 1;
default:
return 0;
}
}
/* Returns 1 for Unicode characters having the category 'Ll', 0
otherwise. */
int _PyUnicode_IsLowercase(register const Py_UNICODE ch)
{
switch (ch) {
case 0x0061: /* LATIN SMALL LETTER A */
case 0x0062: /* LATIN SMALL LETTER B */
case 0x0063: /* LATIN SMALL LETTER C */
case 0x0064: /* LATIN SMALL LETTER D */
case 0x0065: /* LATIN SMALL LETTER E */
case 0x0066: /* LATIN SMALL LETTER F */
case 0x0067: /* LATIN SMALL LETTER G */
case 0x0068: /* LATIN SMALL LETTER H */
case 0x0069: /* LATIN SMALL LETTER I */
case 0x006A: /* LATIN SMALL LETTER J */
case 0x006B: /* LATIN SMALL LETTER K */
case 0x006C: /* LATIN SMALL LETTER L */
case 0x006D: /* LATIN SMALL LETTER M */
case 0x006E: /* LATIN SMALL LETTER N */
case 0x006F: /* LATIN SMALL LETTER O */
case 0x0070: /* LATIN SMALL LETTER P */
case 0x0071: /* LATIN SMALL LETTER Q */
case 0x0072: /* LATIN SMALL LETTER R */
case 0x0073: /* LATIN SMALL LETTER S */
case 0x0074: /* LATIN SMALL LETTER T */
case 0x0075: /* LATIN SMALL LETTER U */
case 0x0076: /* LATIN SMALL LETTER V */
case 0x0077: /* LATIN SMALL LETTER W */
case 0x0078: /* LATIN SMALL LETTER X */
case 0x0079: /* LATIN SMALL LETTER Y */
case 0x007A: /* LATIN SMALL LETTER Z */
case 0x00AA: /* FEMININE ORDINAL INDICATOR */
case 0x00B5: /* MICRO SIGN */
case 0x00BA: /* MASCULINE ORDINAL INDICATOR */
case 0x00DF: /* LATIN SMALL LETTER SHARP S */
case 0x00E0: /* LATIN SMALL LETTER A WITH GRAVE */
case 0x00E1: /* LATIN SMALL LETTER A WITH ACUTE */
case 0x00E2: /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
case 0x00E3: /* LATIN SMALL LETTER A WITH TILDE */
case 0x00E4: /* LATIN SMALL LETTER A WITH DIAERESIS */
case 0x00E5: /* LATIN SMALL LETTER A WITH RING ABOVE */
case 0x00E6: /* LATIN SMALL LETTER AE */
case 0x00E7: /* LATIN SMALL LETTER C WITH CEDILLA */
case 0x00E8: /* LATIN SMALL LETTER E WITH GRAVE */
case 0x00E9: /* LATIN SMALL LETTER E WITH ACUTE */
case 0x00EA: /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
case 0x00EB: /* LATIN SMALL LETTER E WITH DIAERESIS */
case 0x00EC: /* LATIN SMALL LETTER I WITH GRAVE */
case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */
case 0x00EE: /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
case 0x00EF: /* LATIN SMALL LETTER I WITH DIAERESIS */
case 0x00F0: /* LATIN SMALL LETTER ETH */
case 0x00F1: /* LATIN SMALL LETTER N WITH TILDE */
case 0x00F2: /* LATIN SMALL LETTER O WITH GRAVE */
case 0x00F3: /* LATIN SMALL LETTER O WITH ACUTE */
case 0x00F4: /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
case 0x00F5: /* LATIN SMALL LETTER O WITH TILDE */
case 0x00F6: /* LATIN SMALL LETTER O WITH DIAERESIS */
case 0x00F8: /* LATIN SMALL LETTER O WITH STROKE */
case 0x00F9: /* LATIN SMALL LETTER U WITH GRAVE */
case 0x00FA: /* LATIN SMALL LETTER U WITH ACUTE */
case 0x00FB: /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
case 0x00FC: /* LATIN SMALL LETTER U WITH DIAERESIS */
case 0x00FD: /* LATIN SMALL LETTER Y WITH ACUTE */
case 0x00FE: /* LATIN SMALL LETTER THORN */
case 0x00FF: /* LATIN SMALL LETTER Y WITH DIAERESIS */
case 0x0101: /* LATIN SMALL LETTER A WITH MACRON */
case 0x0103: /* LATIN SMALL LETTER A WITH BREVE */
case 0x0105: /* LATIN SMALL LETTER A WITH OGONEK */
case 0x0107: /* LATIN SMALL LETTER C WITH ACUTE */
case 0x0109: /* LATIN SMALL LETTER C WITH CIRCUMFLEX */
case 0x010B: /* LATIN SMALL LETTER C WITH DOT ABOVE */
case 0x010D: /* LATIN SMALL LETTER C WITH CARON */
case 0x010F: /* LATIN SMALL LETTER D WITH CARON */
case 0x0111: /* LATIN SMALL LETTER D WITH STROKE */
case 0x0113: /* LATIN SMALL LETTER E WITH MACRON */
case 0x0115: /* LATIN SMALL LETTER E WITH BREVE */
case 0x0117: /* LATIN SMALL LETTER E WITH DOT ABOVE */
case 0x0119: /* LATIN SMALL LETTER E WITH OGONEK */
case 0x011B: /* LATIN SMALL LETTER E WITH CARON */
case 0x011D: /* LATIN SMALL LETTER G WITH CIRCUMFLEX */
case 0x011F: /* LATIN SMALL LETTER G WITH BREVE */
case 0x0121: /* LATIN SMALL LETTER G WITH DOT ABOVE */
case 0x0123: /* LATIN SMALL LETTER G WITH CEDILLA */
case 0x0125: /* LATIN SMALL LETTER H WITH CIRCUMFLEX */
case 0x0127: /* LATIN SMALL LETTER H WITH STROKE */
case 0x0129: /* LATIN SMALL LETTER I WITH TILDE */
case 0x012B: /* LATIN SMALL LETTER I WITH MACRON */
case 0x012D: /* LATIN SMALL LETTER I WITH BREVE */
case 0x012F: /* LATIN SMALL LETTER I WITH OGONEK */
case 0x0131: /* LATIN SMALL LETTER DOTLESS I */
case 0x0133: /* LATIN SMALL LIGATURE IJ */
case 0x0135: /* LATIN SMALL LETTER J WITH CIRCUMFLEX */
case 0x0137: /* LATIN SMALL LETTER K WITH CEDILLA */
case 0x0138: /* LATIN SMALL LETTER KRA */
case 0x013A: /* LATIN SMALL LETTER L WITH ACUTE */
case 0x013C: /* LATIN SMALL LETTER L WITH CEDILLA */
case 0x013E: /* LATIN SMALL LETTER L WITH CARON */
case 0x0140: /* LATIN SMALL LETTER L WITH MIDDLE DOT */
case 0x0142: /* LATIN SMALL LETTER L WITH STROKE */
case 0x0144: /* LATIN SMALL LETTER N WITH ACUTE */
case 0x0146: /* LATIN SMALL LETTER N WITH CEDILLA */
case 0x0148: /* LATIN SMALL LETTER N WITH CARON */
case 0x0149: /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
case 0x014B: /* LATIN SMALL LETTER ENG */
case 0x014D: /* LATIN SMALL LETTER O WITH MACRON */
case 0x014F: /* LATIN SMALL LETTER O WITH BREVE */
case 0x0151: /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */
case 0x0153: /* LATIN SMALL LIGATURE OE */
case 0x0155: /* LATIN SMALL LETTER R WITH ACUTE */
case 0x0157: /* LATIN SMALL LETTER R WITH CEDILLA */
case 0x0159: /* LATIN SMALL LETTER R WITH CARON */
case 0x015B: /* LATIN SMALL LETTER S WITH ACUTE */
case 0x015D: /* LATIN SMALL LETTER S WITH CIRCUMFLEX */
case 0x015F: /* LATIN SMALL LETTER S WITH CEDILLA */
case 0x0161: /* LATIN SMALL LETTER S WITH CARON */
case 0x0163: /* LATIN SMALL LETTER T WITH CEDILLA */
case 0x0165: /* LATIN SMALL LETTER T WITH CARON */
case 0x0167: /* LATIN SMALL LETTER T WITH STROKE */
case 0x0169: /* LATIN SMALL LETTER U WITH TILDE */
case 0x016B: /* LATIN SMALL LETTER U WITH MACRON */
case 0x016D: /* LATIN SMALL LETTER U WITH BREVE */
case 0x016F: /* LATIN SMALL LETTER U WITH RING ABOVE */
case 0x0171: /* LATIN SMALL LETTER U WITH DOUBLE ACUTE */
case 0x0173: /* LATIN SMALL LETTER U WITH OGONEK */
case 0x0175: /* LATIN SMALL LETTER W WITH CIRCUMFLEX */
case 0x0177: /* LATIN SMALL LETTER Y WITH CIRCUMFLEX */
case 0x017A: /* LATIN SMALL LETTER Z WITH ACUTE */
case 0x017C: /* LATIN SMALL LETTER Z WITH DOT ABOVE */
case 0x017E: /* LATIN SMALL LETTER Z WITH CARON */
case 0x017F: /* LATIN SMALL LETTER LONG S */
case 0x0180: /* LATIN SMALL LETTER B WITH STROKE */
case 0x0183: /* LATIN SMALL LETTER B WITH TOPBAR */
case 0x0185: /* LATIN SMALL LETTER TONE SIX */
case 0x0188: /* LATIN SMALL LETTER C WITH HOOK */
case 0x018C: /* LATIN SMALL LETTER D WITH TOPBAR */
case 0x018D: /* LATIN SMALL LETTER TURNED DELTA */
case 0x0192: /* LATIN SMALL LETTER F WITH HOOK */
case 0x0195: /* LATIN SMALL LETTER HV */
case 0x0199: /* LATIN SMALL LETTER K WITH HOOK */
case 0x019A: /* LATIN SMALL LETTER L WITH BAR */
case 0x019B: /* LATIN SMALL LETTER LAMBDA WITH STROKE */
case 0x019E: /* LATIN SMALL LETTER N WITH LONG RIGHT LEG */
case 0x01A1: /* LATIN SMALL LETTER O WITH HORN */
case 0x01A3: /* LATIN SMALL LETTER OI */
case 0x01A5: /* LATIN SMALL LETTER P WITH HOOK */
case 0x01A8: /* LATIN SMALL LETTER TONE TWO */
case 0x01AA: /* LATIN LETTER REVERSED ESH LOOP */
case 0x01AB: /* LATIN SMALL LETTER T WITH PALATAL HOOK */
case 0x01AD: /* LATIN SMALL LETTER T WITH HOOK */
case 0x01B0: /* LATIN SMALL LETTER U WITH HORN */
case 0x01B4: /* LATIN SMALL LETTER Y WITH HOOK */
case 0x01B6: /* LATIN SMALL LETTER Z WITH STROKE */
case 0x01B9: /* LATIN SMALL LETTER EZH REVERSED */
case 0x01BA: /* LATIN SMALL LETTER EZH WITH TAIL */
case 0x01BD: /* LATIN SMALL LETTER TONE FIVE */
case 0x01BE: /* LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE */
case 0x01BF: /* LATIN LETTER WYNN */
case 0x01C6: /* LATIN SMALL LETTER DZ WITH CARON */
case 0x01C9: /* LATIN SMALL LETTER LJ */
case 0x01CC: /* LATIN SMALL LETTER NJ */
case 0x01CE: /* LATIN SMALL LETTER A WITH CARON */
case 0x01D0: /* LATIN SMALL LETTER I WITH CARON */
case 0x01D2: /* LATIN SMALL LETTER O WITH CARON */
case 0x01D4: /* LATIN SMALL LETTER U WITH CARON */
case 0x01D6: /* LATIN SMALL LETTER U WITH DIAERESIS AND MACRON */
case 0x01D8: /* LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE */
case 0x01DA: /* LATIN SMALL LETTER U WITH DIAERESIS AND CARON */
case 0x01DC: /* LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE */
case 0x01DD: /* LATIN SMALL LETTER TURNED E */
case 0x01DF: /* LATIN SMALL LETTER A WITH DIAERESIS AND MACRON */
case 0x01E1: /* LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON */
case 0x01E3: /* LATIN SMALL LETTER AE WITH MACRON */
case 0x01E5: /* LATIN SMALL LETTER G WITH STROKE */
case 0x01E7: /* LATIN SMALL LETTER G WITH CARON */
case 0x01E9: /* LATIN SMALL LETTER K WITH CARON */
case 0x01EB: /* LATIN SMALL LETTER O WITH OGONEK */
case 0x01ED: /* LATIN SMALL LETTER O WITH OGONEK AND MACRON */
case 0x01EF: /* LATIN SMALL LETTER EZH WITH CARON */
case 0x01F0: /* LATIN SMALL LETTER J WITH CARON */
case 0x01F3: /* LATIN SMALL LETTER DZ */
case 0x01F5: /* LATIN SMALL LETTER G WITH ACUTE */
case 0x01F9: /* LATIN SMALL LETTER N WITH GRAVE */
case 0x01FB: /* LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE */
case 0x01FD: /* LATIN SMALL LETTER AE WITH ACUTE */
case 0x01FF: /* LATIN SMALL LETTER O WITH STROKE AND ACUTE */
case 0x0201: /* LATIN SMALL LETTER A WITH DOUBLE GRAVE */
case 0x0203: /* LATIN SMALL LETTER A WITH INVERTED BREVE */
case 0x0205: /* LATIN SMALL LETTER E WITH DOUBLE GRAVE */
case 0x0207: /* LATIN SMALL LETTER E WITH INVERTED BREVE */
case 0x0209: /* LATIN SMALL LETTER I WITH DOUBLE GRAVE */
case 0x020B: /* LATIN SMALL LETTER I WITH INVERTED BREVE */
case 0x020D: /* LATIN SMALL LETTER O WITH DOUBLE GRAVE */
case 0x020F: /* LATIN SMALL LETTER O WITH INVERTED BREVE */
case 0x0211: /* LATIN SMALL LETTER R WITH DOUBLE GRAVE */
case 0x0213: /* LATIN SMALL LETTER R WITH INVERTED BREVE */
case 0x0215: /* LATIN SMALL LETTER U WITH DOUBLE GRAVE */
case 0x0217: /* LATIN SMALL LETTER U WITH INVERTED BREVE */
case 0x0219: /* LATIN SMALL LETTER S WITH COMMA BELOW */
case 0x021B: /* LATIN SMALL LETTER T WITH COMMA BELOW */
case 0x021D: /* LATIN SMALL LETTER YOGH */
case 0x021F: /* LATIN SMALL LETTER H WITH CARON */
case 0x0223: /* LATIN SMALL LETTER OU */
case 0x0225: /* LATIN SMALL LETTER Z WITH HOOK */
case 0x0227: /* LATIN SMALL LETTER A WITH DOT ABOVE */
case 0x0229: /* LATIN SMALL LETTER E WITH CEDILLA */
case 0x022B: /* LATIN SMALL LETTER O WITH DIAERESIS AND MACRON */
case 0x022D: /* LATIN SMALL LETTER O WITH TILDE AND MACRON */
case 0x022F: /* LATIN SMALL LETTER O WITH DOT ABOVE */
case 0x0231: /* LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON */
case 0x0233: /* LATIN SMALL LETTER Y WITH MACRON */
case 0x0250: /* LATIN SMALL LETTER TURNED A */
case 0x0251: /* LATIN SMALL LETTER ALPHA */
case 0x0252: /* LATIN SMALL LETTER TURNED ALPHA */
case 0x0253: /* LATIN SMALL LETTER B WITH HOOK */
case 0x0254: /* LATIN SMALL LETTER OPEN O */
case 0x0255: /* LATIN SMALL LETTER C WITH CURL */
case 0x0256: /* LATIN SMALL LETTER D WITH TAIL */
case 0x0257: /* LATIN SMALL LETTER D WITH HOOK */
case 0x0258: /* LATIN SMALL LETTER REVERSED E */
case 0x0259: /* LATIN SMALL LETTER SCHWA */
case 0x025A: /* LATIN SMALL LETTER SCHWA WITH HOOK */
case 0x025B: /* LATIN SMALL LETTER OPEN E */
case 0x025C: /* LATIN SMALL LETTER REVERSED OPEN E */
case 0x025D: /* LATIN SMALL LETTER REVERSED OPEN E WITH HOOK */
case 0x025E: /* LATIN SMALL LETTER CLOSED REVERSED OPEN E */
case 0x025F: /* LATIN SMALL LETTER DOTLESS J WITH STROKE */
case 0x0260: /* LATIN SMALL LETTER G WITH HOOK */
case 0x0261: /* LATIN SMALL LETTER SCRIPT G */
case 0x0262: /* LATIN LETTER SMALL CAPITAL G */
case 0x0263: /* LATIN SMALL LETTER GAMMA */
case 0x0264: /* LATIN SMALL LETTER RAMS HORN */
case 0x0265: /* LATIN SMALL LETTER TURNED H */
case 0x0266: /* LATIN SMALL LETTER H WITH HOOK */
case 0x0267: /* LATIN SMALL LETTER HENG WITH HOOK */
case 0x0268: /* LATIN SMALL LETTER I WITH STROKE */
case 0x0269: /* LATIN SMALL LETTER IOTA */
case 0x026A: /* LATIN LETTER SMALL CAPITAL I */
case 0x026B: /* LATIN SMALL LETTER L WITH MIDDLE TILDE */
case 0x026C: /* LATIN SMALL LETTER L WITH BELT */
case 0x026D: /* LATIN SMALL LETTER L WITH RETROFLEX HOOK */
case 0x026E: /* LATIN SMALL LETTER LEZH */
case 0x026F: /* LATIN SMALL LETTER TURNED M */
case 0x0270: /* LATIN SMALL LETTER TURNED M WITH LONG LEG */
case 0x0271: /* LATIN SMALL LETTER M WITH HOOK */
case 0x0272: /* LATIN SMALL LETTER N WITH LEFT HOOK */
case 0x0273: /* LATIN SMALL LETTER N WITH RETROFLEX HOOK */
case 0x0274: /* LATIN LETTER SMALL CAPITAL N */
case 0x0275: /* LATIN SMALL LETTER BARRED O */
case 0x0276: /* LATIN LETTER SMALL CAPITAL OE */
case 0x0277: /* LATIN SMALL LETTER CLOSED OMEGA */
case 0x0278: /* LATIN SMALL LETTER PHI */
case 0x0279: /* LATIN SMALL LETTER TURNED R */
case 0x027A: /* LATIN SMALL LETTER TURNED R WITH LONG LEG */
case 0x027B: /* LATIN SMALL LETTER TURNED R WITH HOOK */
case 0x027C: /* LATIN SMALL LETTER R WITH LONG LEG */
case 0x027D: /* LATIN SMALL LETTER R WITH TAIL */
case 0x027E: /* LATIN SMALL LETTER R WITH FISHHOOK */
case 0x027F: /* LATIN SMALL LETTER REVERSED R WITH FISHHOOK */
case 0x0280: /* LATIN LETTER SMALL CAPITAL R */
case 0x0281: /* LATIN LETTER SMALL CAPITAL INVERTED R */
case 0x0282: /* LATIN SMALL LETTER S WITH HOOK */
case 0x0283: /* LATIN SMALL LETTER ESH */
case 0x0284: /* LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK */
case 0x0285: /* LATIN SMALL LETTER SQUAT REVERSED ESH */
case 0x0286: /* LATIN SMALL LETTER ESH WITH CURL */
case 0x0287: /* LATIN SMALL LETTER TURNED T */
case 0x0288: /* LATIN SMALL LETTER T WITH RETROFLEX HOOK */
case 0x0289: /* LATIN SMALL LETTER U BAR */
case 0x028A: /* LATIN SMALL LETTER UPSILON */
case 0x028B: /* LATIN SMALL LETTER V WITH HOOK */
case 0x028C: /* LATIN SMALL LETTER TURNED V */
case 0x028D: /* LATIN SMALL LETTER TURNED W */
case 0x028E: /* LATIN SMALL LETTER TURNED Y */
case 0x028F: /* LATIN LETTER SMALL CAPITAL Y */
case 0x0290: /* LATIN SMALL LETTER Z WITH RETROFLEX HOOK */
case 0x0291: /* LATIN SMALL LETTER Z WITH CURL */
case 0x0292: /* LATIN SMALL LETTER EZH */
case 0x0293: /* LATIN SMALL LETTER EZH WITH CURL */
case 0x0294: /* LATIN LETTER GLOTTAL STOP */
case 0x0295: /* LATIN LETTER PHARYNGEAL VOICED FRICATIVE */
case 0x0296: /* LATIN LETTER INVERTED GLOTTAL STOP */
case 0x0297: /* LATIN LETTER STRETCHED C */
case 0x0298: /* LATIN LETTER BILABIAL CLICK */
case 0x0299: /* LATIN LETTER SMALL CAPITAL B */
case 0x029A: /* LATIN SMALL LETTER CLOSED OPEN E */
case 0x029B: /* LATIN LETTER SMALL CAPITAL G WITH HOOK */
case 0x029C: /* LATIN LETTER SMALL CAPITAL H */
case 0x029D: /* LATIN SMALL LETTER J WITH CROSSED-TAIL */
case 0x029E: /* LATIN SMALL LETTER TURNED K */
case 0x029F: /* LATIN LETTER SMALL CAPITAL L */
case 0x02A0: /* LATIN SMALL LETTER Q WITH HOOK */
case 0x02A1: /* LATIN LETTER GLOTTAL STOP WITH STROKE */
case 0x02A2: /* LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE */
case 0x02A3: /* LATIN SMALL LETTER DZ DIGRAPH */
case 0x02A4: /* LATIN SMALL LETTER DEZH DIGRAPH */
case 0x02A5: /* LATIN SMALL LETTER DZ DIGRAPH WITH CURL */
case 0x02A6: /* LATIN SMALL LETTER TS DIGRAPH */
case 0x02A7: /* LATIN SMALL LETTER TESH DIGRAPH */
case 0x02A8: /* LATIN SMALL LETTER TC DIGRAPH WITH CURL */
case 0x02A9: /* LATIN SMALL LETTER FENG DIGRAPH */
case 0x02AA: /* LATIN SMALL LETTER LS DIGRAPH */
case 0x02AB: /* LATIN SMALL LETTER LZ DIGRAPH */
case 0x02AC: /* LATIN LETTER BILABIAL PERCUSSIVE */
case 0x02AD: /* LATIN LETTER BIDENTAL PERCUSSIVE */
case 0x0390: /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
case 0x03AC: /* GREEK SMALL LETTER ALPHA WITH TONOS */
case 0x03AD: /* GREEK SMALL LETTER EPSILON WITH TONOS */
case 0x03AE: /* GREEK SMALL LETTER ETA WITH TONOS */