|
| 1 | +// based on https://github.com/microsoft/TypeScript/tree/master/scripts/regenerate-unicode-identifier-parts.js |
| 2 | + |
| 3 | +/** @param {number} i */ |
| 4 | +function toHex4Digits(i) { |
| 5 | + let s = i.toString(16); |
| 6 | + while (s.length < 4) { |
| 7 | + s = '0' + s; |
| 8 | + } |
| 9 | + if (s.length > 4) throw new Error('Invalid Hex4Digits value'); |
| 10 | + return s; |
| 11 | +} |
| 12 | + |
| 13 | +class NonSurrogateRange { |
| 14 | + /** @param {number} codePoint */ |
| 15 | + constructor(codePoint) { |
| 16 | + this.firstCodePoint = codePoint; |
| 17 | + this.lastCodePoint = codePoint; |
| 18 | + } |
| 19 | + toString() { |
| 20 | + let text = '\\u' + toHex4Digits(this.firstCodePoint); |
| 21 | + if (this.lastCodePoint !== this.firstCodePoint) { |
| 22 | + text += '-\\u' + toHex4Digits(this.lastCodePoint); |
| 23 | + } |
| 24 | + return text; |
| 25 | + } |
| 26 | +} |
| 27 | + |
| 28 | +class LeadSurrogateRange { |
| 29 | + /** @param {number} leadSurrogate */ |
| 30 | + constructor(leadSurrogate) { |
| 31 | + this.leadSurrogate = leadSurrogate; |
| 32 | + /** @type {TrailSurrogateRange[]} */ |
| 33 | + this.ranges = []; |
| 34 | + } |
| 35 | + |
| 36 | + toString() { |
| 37 | + return ( |
| 38 | + '\\u' + |
| 39 | + toHex4Digits(this.leadSurrogate) + |
| 40 | + '[' + |
| 41 | + this.ranges.join('') + |
| 42 | + ']' |
| 43 | + ); |
| 44 | + } |
| 45 | +} |
| 46 | + |
| 47 | +class TrailSurrogateRange { |
| 48 | + /** @param {number} trailSurrogate */ |
| 49 | + constructor(trailSurrogate) { |
| 50 | + this.firstTrailSurrogate = trailSurrogate; |
| 51 | + this.lastTrailSurrogate = trailSurrogate; |
| 52 | + } |
| 53 | + toString() { |
| 54 | + let text = '\\u' + toHex4Digits(this.firstTrailSurrogate); |
| 55 | + if (this.lastTrailSurrogate !== this.firstTrailSurrogate) { |
| 56 | + text += '-\\u' + toHex4Digits(this.lastTrailSurrogate); |
| 57 | + } |
| 58 | + return text; |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +class Writer { |
| 63 | + constructor() { |
| 64 | + /** @type {number} */ |
| 65 | + this.lastCodePoint = -1; |
| 66 | + /** @type {NonSurrogateRange[]} */ |
| 67 | + this.nonSurrogateRanges = []; |
| 68 | + /** @type {LeadSurrogateRange[]} */ |
| 69 | + this.surrogateRanges = []; |
| 70 | + /** @type {NonSurrogateRange} */ |
| 71 | + this.nonSurrogateRange; |
| 72 | + /** @type {LeadSurrogateRange} */ |
| 73 | + this.leadSurrogateRange; |
| 74 | + /** @type {TrailSurrogateRange} */ |
| 75 | + this.trailSurrogateRange; |
| 76 | + } |
| 77 | + |
| 78 | + /** @param {number} codePoint */ |
| 79 | + push(codePoint) { |
| 80 | + if (codePoint <= this.lastCodePoint) |
| 81 | + throw new Error('Code points must be added in order.'); |
| 82 | + this.lastCodePoint = codePoint; |
| 83 | + |
| 84 | + if (codePoint < MAX_UNICODE_NON_SURROGATE) { |
| 85 | + if ( |
| 86 | + this.nonSurrogateRange && |
| 87 | + this.nonSurrogateRange.lastCodePoint === codePoint - 1 |
| 88 | + ) { |
| 89 | + this.nonSurrogateRange.lastCodePoint = codePoint; |
| 90 | + return; |
| 91 | + } |
| 92 | + this.nonSurrogateRange = new NonSurrogateRange(codePoint); |
| 93 | + this.nonSurrogateRanges.push(this.nonSurrogateRange); |
| 94 | + } else { |
| 95 | + const leadSurrogate = Math.floor((codePoint - 0x10000) / 0x400) + 0xd800; |
| 96 | + const trailSurrogate = ((codePoint - 0x10000) % 0x400) + 0xdc00; |
| 97 | + if ( |
| 98 | + !this.leadSurrogateRange || |
| 99 | + this.leadSurrogateRange.leadSurrogate !== leadSurrogate |
| 100 | + ) { |
| 101 | + this.trailSurrogateRange = undefined; |
| 102 | + this.leadSurrogateRange = new LeadSurrogateRange(leadSurrogate); |
| 103 | + this.surrogateRanges.push(this.leadSurrogateRange); |
| 104 | + } |
| 105 | + |
| 106 | + if ( |
| 107 | + this.trailSurrogateRange && |
| 108 | + this.trailSurrogateRange.lastTrailSurrogate === trailSurrogate - 1 |
| 109 | + ) { |
| 110 | + this.trailSurrogateRange.lastTrailSurrogate = trailSurrogate; |
| 111 | + return; |
| 112 | + } |
| 113 | + |
| 114 | + this.trailSurrogateRange = new TrailSurrogateRange(trailSurrogate); |
| 115 | + this.leadSurrogateRange.ranges.push(this.trailSurrogateRange); |
| 116 | + } |
| 117 | + } |
| 118 | + |
| 119 | + toString() { |
| 120 | + let first = this.nonSurrogateRanges.join(''); |
| 121 | + let second = this.surrogateRanges.join('|'); |
| 122 | + return first && second |
| 123 | + ? `([${first}]|${second})` |
| 124 | + : first |
| 125 | + ? `[${first}]` |
| 126 | + : second |
| 127 | + ? `(${second})` |
| 128 | + : ''; |
| 129 | + } |
| 130 | +} |
| 131 | + |
| 132 | +const MAX_UNICODE_NON_SURROGATE = 0xffff; |
| 133 | +const MAX_UNICODE_CODEPOINT = 0x10ffff; |
| 134 | +const isStart = c => /\p{ID_Start}/u.test(c); |
| 135 | +const isContinue = c => /\p{ID_Continue}/u.test(c); |
| 136 | + |
| 137 | +let idStartWriter = new Writer(); |
| 138 | +let idContinueWriter = new Writer(); |
| 139 | + |
| 140 | +for (let cp = 0; cp < MAX_UNICODE_CODEPOINT; cp++) { |
| 141 | + const ch = String.fromCodePoint(cp); |
| 142 | + if (isStart(ch)) { |
| 143 | + idStartWriter.push(cp); |
| 144 | + } |
| 145 | + if (isContinue(ch)) { |
| 146 | + idContinueWriter.push(cp); |
| 147 | + } |
| 148 | +} |
| 149 | + |
| 150 | +console.log(`/** |
| 151 | +* Generated by scripts/generate-unicode-id-parts.js on node ${ |
| 152 | + process.version |
| 153 | +} with unicode ${process.versions.unicode} |
| 154 | +* based on http://www.unicode.org/reports/tr31/ and https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords |
| 155 | +* U_ID_START corresponds to the ID_Start property, and U_ID_CONTINUE corresponds to ID_Continue property. |
| 156 | +*/`); |
| 157 | +console.log('U_ID_START ' + idStartWriter.toString()); |
| 158 | +console.log('U_ID_CONTINUE ' + idContinueWriter.toString()); |
0 commit comments