1if not modules then modules = { } end modules [ ' char-ini ' ] = {
2 version = 1 . 001 ,
3 comment = " companion to char-ini.mkiv " ,
4 author = " Hans Hagen, PRAGMA-ADE, Hasselt NL " ,
5 copyright = " PRAGMA ADE / ConTeXt Development Team " ,
6 license = " see context related readme files "
7}
8
9
10
11
12
13
14local utfchar , utfbyte , utfvalues , ustring , utotable = utf . char , utf . byte , utf . values , utf . ustring , utf . totable
15local concat , unpack , tohash , insert = table . concat , table . unpack , table . tohash , table . insert
16local next , tonumber , type , rawget , rawset = next , tonumber , type , rawget , rawset
17local format , lower , gsub , find = string . format , string . lower , string . gsub , string . find
18local P , R , S , C , Cs , Ct , Cc , V = lpeg . P , lpeg . R , lpeg . S , lpeg . C , lpeg . Cs , lpeg . Ct , lpeg . Cc , lpeg . V
19local formatters = string . formatters
20
21if not characters then require ( " char-def " ) end
22
23local lpegpatterns = lpeg . patterns
24local lpegmatch = lpeg . match
25local utf8byte = lpegpatterns . utf8byte
26local utf8character = lpegpatterns . utf8character
27
28local utfchartabletopattern = lpeg . utfchartabletopattern
29
30local allocate = utilities . storage . allocate
31local mark = utilities . storage . mark
32
33local setmetatableindex = table . setmetatableindex
34
35local trace_defining = false trackers . register ( " characters.defining " , function ( v ) characters_defining = v end )
36
37local report_defining = logs . reporter ( " characters " )
38
39
47
48
49
50
51
52
53
54characters = characters or { }
55local characters = characters
56local data = characters . data
57
58if data then
59 mark ( data )
60else
61 report_defining ( " fatal error: 'char-def.lua' is not loaded " )
62 os . exit ( )
63end
64
65
68
69if context and not characters . private then
70
71 require ( " char-prv " )
72
73 for unicode , d in next , characters . private do
74 data [ unicode ] = d
75 end
76
77end
78
79
82
83local pattern = ( P ( " 0x " ) + P ( " U+ " ) ) * ( ( R ( " 09 " , " AF " ) ^ 1 * P ( -1 ) ) / function ( s ) return tonumber ( s , 16 ) end )
84
85lpegpatterns . chartonumber = pattern
86
87local function chartonumber ( k )
88 if type ( k ) = = " string " then
89 local u = lpegmatch ( pattern , k )
90 if u then
91 return utfbyte ( u )
92 else
93 return utfbyte ( k ) or 0
94 end
95 else
96 return k or 0
97 end
98end
99
100local function charfromnumber ( k )
101 if type ( k ) = = " number " then
102 return utfchar ( k ) or " "
103 else
104 local u = lpegmatch ( pattern , k )
105 if u then
106 return utfchar ( u )
107 else
108 return k
109 end
110 end
111end
112
113
114
115characters . tonumber = chartonumber
116characters . fromnumber = charfromnumber
117
118local private = {
119 description = " PRIVATE SLOT " ,
120}
121
122local ranges = allocate ( )
123characters . ranges = ranges
124
125setmetatableindex ( data , function ( t , k )
126 local tk = type ( k )
127 if tk = = " string " then
128 k = lpegmatch ( pattern , k ) or utfbyte ( k )
129 if k then
130 local v = rawget ( t , k )
131 if v then
132 return v
133 else
134 tk = " number "
135 end
136 else
137 return private
138 end
139 end
140 if tk = = " number " and k < 0xF0000 then
141 for r = 1 , # ranges do
142 local rr = ranges [ r ]
143 if k > = rr . first and k < = rr . last then
144 local extender = rr . extender
145 if extender then
146 local v = extender ( k )
147 t [ k ] = v
148 return v
149 end
150 end
151 end
152 end
153 return private
154end )
155
156local variant_selector_metatable = {
157 category = " mn " ,
158 cjkwd = " a " ,
159 direction = " nsm " ,
160 linebreak = " cm " ,
161}
162
163
164
165local f_variant = string . formatters [ " VARIATION SELECTOR-0x%04X " ]
166
167insert ( characters . ranges , {
168 first = 0xFE00 ,
169 last = 0xFE0F ,
170 name = " variant selector " ,
171 extender = function ( k )
172 local t = {
173 description = f_variant ( k - 0xFE00 + 0x0001 ) ,
174 unicodeslot = k ,
175 }
176 setmetatable ( t , variant_selector_metatable )
177 return t
178 end ,
179} )
180
181insert ( characters . ranges , {
182 first = 0xE0100 ,
183 last = 0xE01EF ,
184 name = " variant selector extension " ,
185 extender = function ( k )
186 local t = {
187 description = f_variant ( k - 0xE0100 + 0x0011 ) ,
188 unicodeslot = k ,
189 }
190 setmetatable ( t , variant_selector_metatable )
191 return t
192 end ,
193} )
194
195local blocks = allocate {
196 [ " adlam " ] = { first = 0x1E900 , last = 0x1E95F , description = " Adlam " } ,
197 [ " aegeannumbers " ] = { first = 0x10100 , last = 0x1013F , description = " Aegean Numbers " } ,
198 [ " ahom " ] = { first = 0x11700 , last = 0x1173F , description = " Ahom " } ,
199 [ " alchemicalsymbols " ] = { first = 0x1F700 , last = 0x1F77F , description = " Alchemical Symbols " } ,
200 [ " alphabeticpresentationforms " ] = { first = 0x0FB00 , last = 0x0FB4F , otf = " latn " , description = " Alphabetic Presentation Forms " } ,
201 [ " anatolianhieroglyphs " ] = { first = 0x14400 , last = 0x1467F , description = " Anatolian Hieroglyphs " } ,
202 [ " ancientgreekmusicalnotation " ] = { first = 0x1D200 , last = 0x1D24F , otf = " grek " , description = " Ancient Greek Musical Notation " } ,
203 [ " ancientgreeknumbers " ] = { first = 0x10140 , last = 0x1018F , otf = " grek " , description = " Ancient Greek Numbers " } ,
204 [ " ancientsymbols " ] = { first = 0x10190 , last = 0x101CF , otf = " grek " , description = " Ancient Symbols " } ,
205 [ " arabic " ] = { first = 0x00600 , last = 0x006FF , otf = " arab " , description = " Arabic " } ,
206 [ " arabicextendeda " ] = { first = 0x008A0 , last = 0x008FF , description = " Arabic Extended-A " } ,
207 [ " arabicmathematicalalphabeticsymbols " ] = { first = 0x1EE00 , last = 0x1EEFF , description = " Arabic Mathematical Alphabetic Symbols " } ,
208 [ " arabicpresentationformsa " ] = { first = 0x0FB50 , last = 0x0FDFF , otf = " arab " , description = " Arabic Presentation Forms-A " } ,
209 [ " arabicpresentationformsb " ] = { first = 0x0FE70 , last = 0x0FEFF , otf = " arab " , description = " Arabic Presentation Forms-B " } ,
210 [ " arabicsupplement " ] = { first = 0x00750 , last = 0x0077F , otf = " arab " , description = " Arabic Supplement " } ,
211 [ " armenian " ] = { first = 0x00530 , last = 0x0058F , otf = " armn " , description = " Armenian " } ,
212 [ " arrows " ] = { first = 0x02190 , last = 0x021FF , description = " Arrows " } ,
213 [ " avestan " ] = { first = 0x10B00 , last = 0x10B3F , description = " Avestan " } ,
214 [ " balinese " ] = { first = 0x01B00 , last = 0x01B7F , otf = " bali " , description = " Balinese " } ,
215 [ " bamum " ] = { first = 0x0A6A0 , last = 0x0A6FF , description = " Bamum " } ,
216 [ " bamumsupplement " ] = { first = 0x16800 , last = 0x16A3F , description = " Bamum Supplement " } ,
217 [ " basiclatin " ] = { first = 0x00000 , last = 0x0007F , otf = " latn " , description = " Basic Latin " } ,
218 [ " bassavah " ] = { first = 0x16AD0 , last = 0x16AFF , description = " Bassa Vah " } ,
219 [ " batak " ] = { first = 0x01BC0 , last = 0x01BFF , description = " Batak " } ,
220 [ " bengali " ] = { first = 0x00980 , last = 0x009FF , otf = " beng " , description = " Bengali " } ,
221 [ " bhaiksuki " ] = { first = 0x11C00 , last = 0x11C6F , description = " Bhaiksuki " } ,
222 [ " blockelements " ] = { first = 0x02580 , last = 0x0259F , otf = " bopo " , description = " Block Elements " } ,
223 [ " bopomofo " ] = { first = 0x03100 , last = 0x0312F , otf = " bopo " , description = " Bopomofo " } ,
224 [ " bopomofoextended " ] = { first = 0x031A0 , last = 0x031BF , otf = " bopo " , description = " Bopomofo Extended " } ,
225 [ " boxdrawing " ] = { first = 0x02500 , last = 0x0257F , description = " Box Drawing " } ,
226 [ " brahmi " ] = { first = 0x11000 , last = 0x1107F , description = " Brahmi " } ,
227 [ " braillepatterns " ] = { first = 0x02800 , last = 0x028FF , otf = " brai " , description = " Braille Patterns " } ,
228 [ " buginese " ] = { first = 0x01A00 , last = 0x01A1F , otf = " bugi " , description = " Buginese " } ,
229 [ " buhid " ] = { first = 0x01740 , last = 0x0175F , otf = " buhd " , description = " Buhid " } ,
230 [ " byzantinemusicalsymbols " ] = { first = 0x1D000 , last = 0x1D0FF , otf = " byzm " , description = " Byzantine Musical Symbols " } ,
231 [ " carian " ] = { first = 0x102A0 , last = 0x102DF , description = " Carian " } ,
232 [ " caucasianalbanian " ] = { first = 0x10530 , last = 0x1056F , description = " Caucasian Albanian " } ,
233 [ " chakma " ] = { first = 0x11100 , last = 0x1114F , description = " Chakma " } ,
234 [ " cham " ] = { first = 0x0AA00 , last = 0x0AA5F , description = " Cham " } ,
235 [ " cherokee " ] = { first = 0x013A0 , last = 0x013FF , otf = " cher " , description = " Cherokee " } ,
236 [ " cherokeesupplement " ] = { first = 0x0AB70 , last = 0x0ABBF , description = " Cherokee Supplement " } ,
237 [ " chesssymbols " ] = { first = 0x1FA00 , last = 0x1FA6F , description = " Chess Symbols " } ,
238 [ " chorasmian " ] = { first = 0x10FB0 , last = 0x10FDF , description = " Chorasmian " } ,
239 [ " cjkcompatibility " ] = { first = 0x03300 , last = 0x033FF , otf = " hang " , description = " CJK Compatibility " } ,
240 [ " cjkcompatibilityforms " ] = { first = 0x0FE30 , last = 0x0FE4F , otf = " hang " , description = " CJK Compatibility Forms " } ,
241 [ " cjkcompatibilityideographs " ] = { first = 0x0F900 , last = 0x0FAFF , otf = " hang " , description = " CJK Compatibility Ideographs " } ,
242 [ " cjkcompatibilityideographssupplement " ] = { first = 0x2F800 , last = 0x2FA1F , otf = " hang " , description = " CJK Compatibility Ideographs Supplement " } ,
243 [ " cjkradicalssupplement " ] = { first = 0x02E80 , last = 0x02EFF , otf = " hang " , description = " CJK Radicals Supplement " } ,
244 [ " cjkstrokes " ] = { first = 0x031C0 , last = 0x031EF , otf = " hang " , description = " CJK Strokes " } ,
245 [ " cjksymbolsandpunctuation " ] = { first = 0x03000 , last = 0x0303F , otf = " hang " , description = " CJK Symbols and Punctuation " } ,
246 [ " cjkunifiedideographs " ] = { first = 0x04E00 , last = 0x09FFF , otf = " hang " , description = " CJK Unified Ideographs " , catcode = " letter " } ,
247 [ " cjkunifiedideographsextensiona " ] = { first = 0x03400 , last = 0x04DBF , otf = " hang " , description = " CJK Unified Ideographs Extension A " } ,
248 [ " cjkunifiedideographsextensionb " ] = { first = 0x20000 , last = 0x2A6DF , otf = " hang " , description = " CJK Unified Ideographs Extension B " } ,
249 [ " cjkunifiedideographsextensionc " ] = { first = 0x2A700 , last = 0x2B73F , description = " CJK Unified Ideographs Extension C " } ,
250 [ " cjkunifiedideographsextensiond " ] = { first = 0x2B740 , last = 0x2B81F , description = " CJK Unified Ideographs Extension D " } ,
251 [ " cjkunifiedideographsextensione " ] = { first = 0x2B820 , last = 0x2CEAF , description = " CJK Unified Ideographs Extension E " } ,
252 [ " cjkunifiedideographsextensionf " ] = { first = 0x2CEB0 , last = 0x2EBEF , description = " CJK Unified Ideographs Extension F " } ,
253 [ " cjkunifiedideographsextensiong " ] = { first = 0x30000 , last = 0x3134F , description = " CJK Unified Ideographs Extension G " } ,
254 [ " combiningdiacriticalmarks " ] = { first = 0x00300 , last = 0x0036F , description = " Combining Diacritical Marks " } ,
255 [ " combiningdiacriticalmarksextended " ] = { first = 0x01AB0 , last = 0x01AFF , description = " Combining Diacritical Marks Extended " } ,
256 [ " combiningdiacriticalmarksforsymbols " ] = { first = 0x020D0 , last = 0x020FF , description = " Combining Diacritical Marks for Symbols " } ,
257 [ " combiningdiacriticalmarkssupplement " ] = { first = 0x01DC0 , last = 0x01DFF , description = " Combining Diacritical Marks Supplement " } ,
258 [ " combininghalfmarks " ] = { first = 0x0FE20 , last = 0x0FE2F , description = " Combining Half Marks " } ,
259 [ " commonindicnumberforms " ] = { first = 0x0A830 , last = 0x0A83F , description = " Common Indic Number Forms " } ,
260 [ " controlpictures " ] = { first = 0x02400 , last = 0x0243F , description = " Control Pictures " } ,
261 [ " coptic " ] = { first = 0x02C80 , last = 0x02CFF , otf = " copt " , description = " Coptic " } ,
262 [ " copticepactnumbers " ] = { first = 0x102E0 , last = 0x102FF , description = " Coptic Epact Numbers " } ,
263 [ " countingrodnumerals " ] = { first = 0x1D360 , last = 0x1D37F , description = " Counting Rod Numerals " } ,
264 [ " cuneiform " ] = { first = 0x12000 , last = 0x123FF , otf = " xsux " , description = " Cuneiform " } ,
265 [ " cuneiformnumbersandpunctuation " ] = { first = 0x12400 , last = 0x1247F , otf = " xsux " , description = " Cuneiform Numbers and Punctuation " } ,
266 [ " currencysymbols " ] = { first = 0x020A0 , last = 0x020CF , description = " Currency Symbols " } ,
267 [ " cypriotsyllabary " ] = { first = 0x10800 , last = 0x1083F , otf = " cprt " , description = " Cypriot Syllabary " } ,
268 [ " cyrillic " ] = { first = 0x00400 , last = 0x004FF , otf = " cyrl " , description = " Cyrillic " } ,
269 [ " cyrillicextendeda " ] = { first = 0x02DE0 , last = 0x02DFF , otf = " cyrl " , description = " Cyrillic Extended-A " } ,
270 [ " cyrillicextendedb " ] = { first = 0x0A640 , last = 0x0A69F , otf = " cyrl " , description = " Cyrillic Extended-B " } ,
271 [ " cyrillicextendedc " ] = { first = 0x01C80 , last = 0x01C8F , description = " Cyrillic Extended-C " } ,
272 [ " cyrillicsupplement " ] = { first = 0x00500 , last = 0x0052F , otf = " cyrl " , description = " Cyrillic Supplement " } ,
273 [ " deseret " ] = { first = 0x10400 , last = 0x1044F , otf = " dsrt " , description = " Deseret " } ,
274 [ " devanagari " ] = { first = 0x00900 , last = 0x0097F , otf = " deva " , description = " Devanagari " } ,
275 [ " devanagariextended " ] = { first = 0x0A8E0 , last = 0x0A8FF , description = " Devanagari Extended " } ,
276 [ " digitsarabicindic " ] = { first = 0x00660 , last = 0x00669 , math = true } ,
277
278 [ " digitsbold " ] = { first = 0x1D7CE , last = 0x1D7D8 , math = true } ,
279
280 [ " digitsdoublestruck " ] = { first = 0x1D7D8 , last = 0x1D7E2 , math = true } ,
281
282 [ " digitsextendedarabicindic " ] = { first = 0x006F0 , last = 0x006F9 , math = true } ,
283
284
285
286
287
288 [ " digitslatin " ] = { first = 0x00030 , last = 0x00039 , math = true } ,
289
290
291 [ " digitsmonospace " ] = { first = 0x1D7F6 , last = 0x1D80F , math = true } ,
292
293 [ " digitsnormal " ] = { first = 0x00030 , last = 0x00039 , math = true } ,
294
295 [ " digitssansserifbold " ] = { first = 0x1D7EC , last = 0x1D805 , math = true } ,
296 [ " digitssansserifnormal " ] = { first = 0x1D7E2 , last = 0x1D7EC , math = true } ,
297
298
299
300
301 [ " dingbats " ] = { first = 0x02700 , last = 0x027BF , description = " Dingbats " } ,
302 [ " divesakuru " ] = { first = 0x11900 , last = 0x1195F , description = " Dives Akuru " } ,
303 [ " dogra " ] = { first = 0x11800 , last = 0x1184F , description = " Dogra " } ,
304 [ " dominotiles " ] = { first = 0x1F030 , last = 0x1F09F , description = " Domino Tiles " } ,
305 [ " duployan " ] = { first = 0x1BC00 , last = 0x1BC9F , description = " Duployan " } ,
306 [ " earlydynasticcuneiform " ] = { first = 0x12480 , last = 0x1254F , description = " Early Dynastic Cuneiform " } ,
307 [ " egyptianhieroglyphformatcontrols " ] = { first = 0x13430 , last = 0x1343F , description = " Egyptian Hieroglyph Format Controls " } ,
308 [ " egyptianhieroglyphs " ] = { first = 0x13000 , last = 0x1342F , description = " Egyptian Hieroglyphs " } ,
309 [ " elbasan " ] = { first = 0x10500 , last = 0x1052F , description = " Elbasan " } ,
310 [ " elymaic " ] = { first = 0x10FE0 , last = 0x10FFF , description = " Elymaic " } ,
311 [ " emoticons " ] = { first = 0x1F600 , last = 0x1F64F , description = " Emoticons " } ,
312 [ " enclosedalphanumerics " ] = { first = 0x02460 , last = 0x024FF , description = " Enclosed Alphanumerics " } ,
313 [ " enclosedalphanumericsupplement " ] = { first = 0x1F100 , last = 0x1F1FF , description = " Enclosed Alphanumeric Supplement " } ,
314 [ " enclosedcjklettersandmonths " ] = { first = 0x03200 , last = 0x032FF , description = " Enclosed CJK Letters and Months " } ,
315 [ " enclosedideographicsupplement " ] = { first = 0x1F200 , last = 0x1F2FF , description = " Enclosed Ideographic Supplement " } ,
316 [ " ethiopic " ] = { first = 0x01200 , last = 0x0137F , otf = " ethi " , description = " Ethiopic " } ,
317 [ " ethiopicextended " ] = { first = 0x02D80 , last = 0x02DDF , otf = " ethi " , description = " Ethiopic Extended " } ,
318 [ " ethiopicextendeda " ] = { first = 0x0AB00 , last = 0x0AB2F , description = " Ethiopic Extended-A " } ,
319 [ " ethiopicsupplement " ] = { first = 0x01380 , last = 0x0139F , otf = " ethi " , description = " Ethiopic Supplement " } ,
320 [ " generalpunctuation " ] = { first = 0x02000 , last = 0x0206F , description = " General Punctuation " } ,
321 [ " geometricshapes " ] = { first = 0x025A0 , last = 0x025FF , math = true , description = " Geometric Shapes " } ,
322 [ " geometricshapesextended " ] = { first = 0x1F780 , last = 0x1F7FF , description = " Geometric Shapes Extended " } ,
323 [ " georgian " ] = { first = 0x010A0 , last = 0x010FF , otf = " geor " , description = " Georgian " } ,
324 [ " georgianextended " ] = { first = 0x01C90 , last = 0x01CBF , description = " Georgian Extended " } ,
325 [ " georgiansupplement " ] = { first = 0x02D00 , last = 0x02D2F , otf = " geor " , description = " Georgian Supplement " } ,
326 [ " glagolitic " ] = { first = 0x02C00 , last = 0x02C5F , otf = " glag " , description = " Glagolitic " } ,
327 [ " glagoliticsupplement " ] = { first = 0x1E000 , last = 0x1E02F , description = " Glagolitic Supplement " } ,
328 [ " gothic " ] = { first = 0x10330 , last = 0x1034F , otf = " goth " , description = " Gothic " } ,
329 [ " grantha " ] = { first = 0x11300 , last = 0x1137F , description = " Grantha " } ,
330 [ " greekandcoptic " ] = { first = 0x00370 , last = 0x003FF , otf = " grek " , description = " Greek and Coptic " } ,
331 [ " greekextended " ] = { first = 0x01F00 , last = 0x01FFF , otf = " grek " , description = " Greek Extended " } ,
332 [ " gujarati " ] = { first = 0x00A80 , last = 0x00AFF , otf = " gujr " , description = " Gujarati " } ,
333 [ " gunjalagondi " ] = { first = 0x11D60 , last = 0x11DAF , description = " Gunjala Gondi " } ,
334 [ " gurmukhi " ] = { first = 0x00A00 , last = 0x00A7F , otf = " guru " , description = " Gurmukhi " } ,
335 [ " halfwidthandfullwidthforms " ] = { first = 0x0FF00 , last = 0x0FFEF , description = " Halfwidth and Fullwidth Forms " } ,
336 [ " hangulcompatibilityjamo " ] = { first = 0x03130 , last = 0x0318F , otf = " jamo " , description = " Hangul Compatibility Jamo " } ,
337 [ " hanguljamo " ] = { first = 0x01100 , last = 0x011FF , otf = " jamo " , description = " Hangul Jamo " } ,
338 [ " hanguljamoextendeda " ] = { first = 0x0A960 , last = 0x0A97F , description = " Hangul Jamo Extended-A " } ,
339 [ " hanguljamoextendedb " ] = { first = 0x0D7B0 , last = 0x0D7FF , description = " Hangul Jamo Extended-B " } ,
340 [ " hangulsyllables " ] = { first = 0x0AC00 , last = 0x0D7AF , otf = " hang " , description = " Hangul Syllables " } ,
341 [ " hanifirohingya " ] = { first = 0x10D00 , last = 0x10D3F , description = " Hanifi Rohingya " } ,
342 [ " hanunoo " ] = { first = 0x01720 , last = 0x0173F , otf = " hano " , description = " Hanunoo " } ,
343 [ " hatran " ] = { first = 0x108E0 , last = 0x108FF , description = " Hatran " } ,
344 [ " hebrew " ] = { first = 0x00590 , last = 0x005FF , otf = " hebr " , description = " Hebrew " } ,
345 [ " highprivateusesurrogates " ] = { first = 0x0DB80 , last = 0x0DBFF , description = " High Private Use Surrogates " } ,
346 [ " highsurrogates " ] = { first = 0x0D800 , last = 0x0DB7F , description = " High Surrogates " } ,
347 [ " hiragana " ] = { first = 0x03040 , last = 0x0309F , otf = " kana " , description = " Hiragana " } ,
348 [ " ideographicdescriptioncharacters " ] = { first = 0x02FF0 , last = 0x02FFF , description = " Ideographic Description Characters " } ,
349 [ " ideographicsymbolsandpunctuation " ] = { first = 0x16FE0 , last = 0x16FFF , description = " Ideographic Symbols and Punctuation " } ,
350 [ " imperialaramaic " ] = { first = 0x10840 , last = 0x1085F , description = " Imperial Aramaic " } ,
351 [ " indicsiyaqnumbers " ] = { first = 0x1EC70 , last = 0x1ECBF , description = " Indic Siyaq Numbers " } ,
352 [ " inscriptionalpahlavi " ] = { first = 0x10B60 , last = 0x10B7F , description = " Inscriptional Pahlavi " } ,
353 [ " inscriptionalparthian " ] = { first = 0x10B40 , last = 0x10B5F , description = " Inscriptional Parthian " } ,
354 [ " ipaextensions " ] = { first = 0x00250 , last = 0x002AF , description = " IPA Extensions " } ,
355 [ " javanese " ] = { first = 0x0A980 , last = 0x0A9DF , description = " Javanese " } ,
356 [ " kaithi " ] = { first = 0x11080 , last = 0x110CF , description = " Kaithi " } ,
357 [ " kanaextendeda " ] = { first = 0x1B100 , last = 0x1B12F , description = " Kana Extended-A " } ,
358 [ " kanasupplement " ] = { first = 0x1B000 , last = 0x1B0FF , description = " Kana Supplement " } ,
359 [ " kanbun " ] = { first = 0x03190 , last = 0x0319F , description = " Kanbun " } ,
360 [ " kangxiradicals " ] = { first = 0x02F00 , last = 0x02FDF , description = " Kangxi Radicals " } ,
361 [ " kannada " ] = { first = 0x00C80 , last = 0x00CFF , otf = " knda " , description = " Kannada " } ,
362 [ " katakana " ] = { first = 0x030A0 , last = 0x030FF , otf = " kana " , description = " Katakana " } ,
363 [ " katakanaphoneticextensions " ] = { first = 0x031F0 , last = 0x031FF , otf = " kana " , description = " Katakana Phonetic Extensions " } ,
364 [ " kayahli " ] = { first = 0x0A900 , last = 0x0A92F , description = " Kayah Li " } ,
365 [ " kharoshthi " ] = { first = 0x10A00 , last = 0x10A5F , otf = " khar " , description = " Kharoshthi " } ,
366 [ " khitansmallscript " ] = { first = 0x18B00 , last = 0x18CFF , description = " Khitan Small Script " } ,
367 [ " khmer " ] = { first = 0x01780 , last = 0x017FF , otf = " khmr " , description = " Khmer " } ,
368 [ " khmersymbols " ] = { first = 0x019E0 , last = 0x019FF , otf = " khmr " , description = " Khmer Symbols " } ,
369 [ " khojki " ] = { first = 0x11200 , last = 0x1124F , description = " Khojki " } ,
370 [ " khudawadi " ] = { first = 0x112B0 , last = 0x112FF , description = " Khudawadi " } ,
371 [ " lao " ] = { first = 0x00E80 , last = 0x00EFF , otf = " lao " , description = " Lao " } ,
372 [ " latinextendeda " ] = { first = 0x00100 , last = 0x0017F , otf = " latn " , description = " Latin Extended-A " } ,
373 [ " latinextendedadditional " ] = { first = 0x01E00 , last = 0x01EFF , otf = " latn " , description = " Latin Extended Additional " } ,
374 [ " latinextendedb " ] = { first = 0x00180 , last = 0x0024F , otf = " latn " , description = " Latin Extended-B " } ,
375 [ " latinextendedc " ] = { first = 0x02C60 , last = 0x02C7F , otf = " latn " , description = " Latin Extended-C " } ,
376 [ " latinextendedd " ] = { first = 0x0A720 , last = 0x0A7FF , otf = " latn " , description = " Latin Extended-D " } ,
377 [ " latinextendede " ] = { first = 0x0AB30 , last = 0x0AB6F , description = " Latin Extended-E " } ,
378 [ " latinsupplement " ] = { first = 0x00080 , last = 0x000FF , otf = " latn " , description = " Latin-1 Supplement " } ,
379 [ " lepcha " ] = { first = 0x01C00 , last = 0x01C4F , description = " Lepcha " } ,
380 [ " letterlikesymbols " ] = { first = 0x02100 , last = 0x0214F , math = true , description = " Letterlike Symbols " } ,
381 [ " limbu " ] = { first = 0x01900 , last = 0x0194F , otf = " limb " , description = " Limbu " } ,
382 [ " lineara " ] = { first = 0x10600 , last = 0x1077F , description = " Linear A " } ,
383 [ " linearbideograms " ] = { first = 0x10080 , last = 0x100FF , otf = " linb " , description = " Linear B Ideograms " } ,
384 [ " linearbsyllabary " ] = { first = 0x10000 , last = 0x1007F , otf = " linb " , description = " Linear B Syllabary " } ,
385 [ " lisu " ] = { first = 0x0A4D0 , last = 0x0A4FF , description = " Lisu " } ,
386 [ " lisusupplement " ] = { first = 0x11FB0 , last = 0x11FBF , description = " Lisu Supplement " } ,
387 [ " lowercasebold " ] = { first = 0x1D41A , last = 0x1D433 , math = true } ,
388 [ " lowercaseboldfraktur " ] = { first = 0x1D586 , last = 0x1D59F , math = true } ,
389 [ " lowercasebolditalic " ] = { first = 0x1D482 , last = 0x1D49B , math = true } ,
390 [ " lowercaseboldscript " ] = { first = 0x1D4EA , last = 0x1D503 , math = true } ,
391 [ " lowercasedoublestruck " ] = { first = 0x1D552 , last = 0x1D56B , math = true } ,
392 [ " lowercasefraktur " ] = { first = 0x1D51E , last = 0x1D537 , math = true } ,
393 [ " lowercasegreekbold " ] = { first = 0x1D6C2 , last = 0x1D6DB , math = true } ,
394 [ " lowercasegreekbolditalic " ] = { first = 0x1D736 , last = 0x1D74F , math = true } ,
395 [ " lowercasegreekitalic " ] = { first = 0x1D6FC , last = 0x1D715 , math = true } ,
396 [ " lowercasegreeknormal " ] = { first = 0x003B1 , last = 0x003CA , math = true } ,
397 [ " lowercasegreeksansserifbold " ] = { first = 0x1D770 , last = 0x1D789 , math = true } ,
398 [ " lowercasegreeksansserifbolditalic " ] = { first = 0x1D7AA , last = 0x1D7C3 , math = true } ,
399 [ " lowercaseitalic " ] = { first = 0x1D44E , last = 0x1D467 , math = true } ,
400 [ " lowercasemonospace " ] = { first = 0x1D68A , last = 0x1D6A3 , math = true } ,
401 [ " lowercasenormal " ] = { first = 0x00061 , last = 0x0007A , math = true } ,
402 [ " lowercasesansserifbold " ] = { first = 0x1D5EE , last = 0x1D607 , math = true } ,
403 [ " lowercasesansserifbolditalic " ] = { first = 0x1D656 , last = 0x1D66F , math = true } ,
404 [ " lowercasesansserifitalic " ] = { first = 0x1D622 , last = 0x1D63B , math = true } ,
405 [ " lowercasesansserifnormal " ] = { first = 0x1D5BA , last = 0x1D5D3 , math = true } ,
406 [ " lowercasescript " ] = { first = 0x1D4B6 , last = 0x1D4CF , math = true } ,
407 [ " lowsurrogates " ] = { first = 0x0DC00 , last = 0x0DFFF , description = " Low Surrogates " } ,
408 [ " lycian " ] = { first = 0x10280 , last = 0x1029F , description = " Lycian " } ,
409 [ " lydian " ] = { first = 0x10920 , last = 0x1093F , description = " Lydian " } ,
410 [ " mahajani " ] = { first = 0x11150 , last = 0x1117F , description = " Mahajani " } ,
411 [ " mahjongtiles " ] = { first = 0x1F000 , last = 0x1F02F , description = " Mahjong Tiles " } ,
412 [ " makasar " ] = { first = 0x11EE0 , last = 0x11EFF , description = " Makasar " } ,
413 [ " malayalam " ] = { first = 0x00D00 , last = 0x00D7F , otf = " mlym " , description = " Malayalam " } ,
414 [ " mandaic " ] = { first = 0x00840 , last = 0x0085F , otf = " mand " , description = " Mandaic " } ,
415 [ " manichaean " ] = { first = 0x10AC0 , last = 0x10AFF , description = " Manichaean " } ,
416 [ " marchen " ] = { first = 0x11C70 , last = 0x11CBF , description = " Marchen " } ,
417 [ " masaramgondi " ] = { first = 0x11D00 , last = 0x11D5F , description = " Masaram Gondi " } ,
418 [ " mathematicalalphanumericsymbols " ] = { first = 0x1D400 , last = 0x1D7FF , math = true , description = " Mathematical Alphanumeric Symbols " } ,
419 [ " mathematicaloperators " ] = { first = 0x02200 , last = 0x022FF , math = true , description = " Mathematical Operators " } ,
420 [ " mayannumerals " ] = { first = 0x1D2E0 , last = 0x1D2FF , description = " Mayan Numerals " } ,
421 [ " medefaidrin " ] = { first = 0x16E40 , last = 0x16E9F , description = " Medefaidrin " } ,
422 [ " meeteimayek " ] = { first = 0x0ABC0 , last = 0x0ABFF , description = " Meetei Mayek " } ,
423 [ " meeteimayekextensions " ] = { first = 0x0AAE0 , last = 0x0AAFF , description = " Meetei Mayek Extensions " } ,
424 [ " mendekikakui " ] = { first = 0x1E800 , last = 0x1E8DF , description = " Mende Kikakui " } ,
425 [ " meroiticcursive " ] = { first = 0x109A0 , last = 0x109FF , description = " Meroitic Cursive " } ,
426 [ " meroitichieroglyphs " ] = { first = 0x10980 , last = 0x1099F , description = " Meroitic Hieroglyphs " } ,
427 [ " miao " ] = { first = 0x16F00 , last = 0x16F9F , description = " Miao " } ,
428 [ " miscellaneousmathematicalsymbolsa " ] = { first = 0x027C0 , last = 0x027EF , math = true , description = " Miscellaneous Mathematical Symbols-A " } ,
429 [ " miscellaneousmathematicalsymbolsb " ] = { first = 0x02980 , last = 0x029FF , math = true , description = " Miscellaneous Mathematical Symbols-B " } ,
430 [ " miscellaneoussymbols " ] = { first = 0x02600 , last = 0x026FF , math = true , description = " Miscellaneous Symbols " } ,
431 [ " miscellaneoussymbolsandarrows " ] = { first = 0x02B00 , last = 0x02BFF , math = true , description = " Miscellaneous Symbols and Arrows " } ,
432 [ " miscellaneoussymbolsandpictographs " ] = { first = 0x1F300 , last = 0x1F5FF , description = " Miscellaneous Symbols and Pictographs " } ,
433 [ " miscellaneoustechnical " ] = { first = 0x02300 , last = 0x023FF , math = true , description = " Miscellaneous Technical " } ,
434 [ " modi " ] = { first = 0x11600 , last = 0x1165F , description = " Modi " } ,
435 [ " modifiertoneletters " ] = { first = 0x0A700 , last = 0x0A71F , description = " Modifier Tone Letters " } ,
436 [ " mongolian " ] = { first = 0x01800 , last = 0x018AF , otf = " mong " , description = " Mongolian " } ,
437 [ " mongoliansupplement " ] = { first = 0x11660 , last = 0x1167F , description = " Mongolian Supplement " } ,
438 [ " mro " ] = { first = 0x16A40 , last = 0x16A6F , description = " Mro " } ,
439 [ " multani " ] = { first = 0x11280 , last = 0x112AF , description = " Multani " } ,
440 [ " musicalsymbols " ] = { first = 0x1D100 , last = 0x1D1FF , otf = " musc " , description = " Musical Symbols " } ,
441 [ " myanmar " ] = { first = 0x01000 , last = 0x0109F , otf = " mymr " , description = " Myanmar " } ,
442 [ " myanmarextendeda " ] = { first = 0x0AA60 , last = 0x0AA7F , description = " Myanmar Extended-A " } ,
443 [ " myanmarextendedb " ] = { first = 0x0A9E0 , last = 0x0A9FF , description = " Myanmar Extended-B " } ,
444 [ " nabataean " ] = { first = 0x10880 , last = 0x108AF , description = " Nabataean " } ,
445 [ " nandinagari " ] = { first = 0x119A0 , last = 0x119FF , description = " Nandinagari " } ,
446 [ " newa " ] = { first = 0x11400 , last = 0x1147F , description = " Newa " } ,
447 [ " newtailue " ] = { first = 0x01980 , last = 0x019DF , description = " New Tai Lue " } ,
448 [ " nko " ] = { first = 0x007C0 , last = 0x007FF , otf = " nko " , description = " NKo " } ,
449 [ " numberforms " ] = { first = 0x02150 , last = 0x0218F , description = " Number Forms " } ,
450 [ " nushu " ] = { first = 0x1B170 , last = 0x1B2FF , description = " Nushu " } ,
451 [ " nyiakengpuachuehmong " ] = { first = 0x1E100 , last = 0x1E14F , description = " Nyiakeng Puachue Hmong " } ,
452 [ " ogham " ] = { first = 0x01680 , last = 0x0169F , otf = " ogam " , description = " Ogham " } ,
453 [ " olchiki " ] = { first = 0x01C50 , last = 0x01C7F , description = " Ol Chiki " } ,
454 [ " oldhungarian " ] = { first = 0x10C80 , last = 0x10CFF , description = " Old Hungarian " } ,
455 [ " olditalic " ] = { first = 0x10300 , last = 0x1032F , otf = " ital " , description = " Old Italic " } ,
456 [ " oldnortharabian " ] = { first = 0x10A80 , last = 0x10A9F , description = " Old North Arabian " } ,
457 [ " oldpermic " ] = { first = 0x10350 , last = 0x1037F , description = " Old Permic " } ,
458 [ " oldpersian " ] = { first = 0x103A0 , last = 0x103DF , otf = " xpeo " , description = " Old Persian " } ,
459 [ " oldsogdian " ] = { first = 0x10F00 , last = 0x10F2F , description = " Old Sogdian " } ,
460 [ " oldsoutharabian " ] = { first = 0x10A60 , last = 0x10A7F , description = " Old South Arabian " } ,
461 [ " oldturkic " ] = { first = 0x10C00 , last = 0x10C4F , description = " Old Turkic " } ,
462 [ " opticalcharacterrecognition " ] = { first = 0x02440 , last = 0x0245F , description = " Optical Character Recognition " } ,
463 [ " oriya " ] = { first = 0x00B00 , last = 0x00B7F , otf = " orya " , description = " Oriya " } ,
464 [ " ornamentaldingbats " ] = { first = 0x1F650 , last = 0x1F67F , description = " Ornamental Dingbats " } ,
465 [ " osage " ] = { first = 0x104B0 , last = 0x104FF , description = " Osage " } ,
466 [ " osmanya " ] = { first = 0x10480 , last = 0x104AF , otf = " osma " , description = " Osmanya " } ,
467 [ " ottomansiyaqnumbers " ] = { first = 0x1ED00 , last = 0x1ED4F , description = " Ottoman Siyaq Numbers " } ,
468 [ " pahawhhmong " ] = { first = 0x16B00 , last = 0x16B8F , description = " Pahawh Hmong " } ,
469 [ " palmyrene " ] = { first = 0x10860 , last = 0x1087F , description = " Palmyrene " } ,
470 [ " paucinhau " ] = { first = 0x11AC0 , last = 0x11AFF , description = " Pau Cin Hau " } ,
471 [ " phagspa " ] = { first = 0x0A840 , last = 0x0A87F , otf = " phag " , description = " Phags-pa " } ,
472 [ " phaistosdisc " ] = { first = 0x101D0 , last = 0x101FF , description = " Phaistos Disc " } ,
473 [ " phoenician " ] = { first = 0x10900 , last = 0x1091F , otf = " phnx " , description = " Phoenician " } ,
474 [ " phoneticextensions " ] = { first = 0x01D00 , last = 0x01D7F , description = " Phonetic Extensions " } ,
475 [ " phoneticextensionssupplement " ] = { first = 0x01D80 , last = 0x01DBF , description = " Phonetic Extensions Supplement " } ,
476 [ " playingcards " ] = { first = 0x1F0A0 , last = 0x1F0FF , description = " Playing Cards " } ,
477 [ " privateusearea " ] = { first = 0x0E000 , last = 0x0F8FF , description = " Private Use Area " } ,
478 [ " psalterpahlavi " ] = { first = 0x10B80 , last = 0x10BAF , description = " Psalter Pahlavi " } ,
479 [ " rejang " ] = { first = 0x0A930 , last = 0x0A95F , description = " Rejang " } ,
480 [ " ruminumeralsymbols " ] = { first = 0x10E60 , last = 0x10E7F , description = " Rumi Numeral Symbols " } ,
481 [ " runic " ] = { first = 0x016A0 , last = 0x016FF , otf = " runr " , description = " Runic " } ,
482 [ " samaritan " ] = { first = 0x00800 , last = 0x0083F , description = " Samaritan " } ,
483 [ " saurashtra " ] = { first = 0x0A880 , last = 0x0A8DF , description = " Saurashtra " } ,
484 [ " sharada " ] = { first = 0x11180 , last = 0x111DF , description = " Sharada " } ,
485 [ " shavian " ] = { first = 0x10450 , last = 0x1047F , otf = " shaw " , description = " Shavian " } ,
486 [ " shorthandformatcontrols " ] = { first = 0x1BCA0 , last = 0x1BCAF , description = " Shorthand Format Controls " } ,
487 [ " siddham " ] = { first = 0x11580 , last = 0x115FF , description = " Siddham " } ,
488 [ " sinhala " ] = { first = 0x00D80 , last = 0x00DFF , otf = " sinh " , description = " Sinhala " } ,
489 [ " sinhalaarchaicnumbers " ] = { first = 0x111E0 , last = 0x111FF , description = " Sinhala Archaic Numbers " } ,
490 [ " smallformvariants " ] = { first = 0x0FE50 , last = 0x0FE6F , description = " Small Form Variants " } ,
491 [ " smallkanaextension " ] = { first = 0x1B130 , last = 0x1B16F , description = " Small Kana Extension " } ,
492 [ " sogdian " ] = { first = 0x10F30 , last = 0x10F6F , description = " Sogdian " } ,
493 [ " sorasompeng " ] = { first = 0x110D0 , last = 0x110FF , description = " Sora Sompeng " } ,
494 [ " soyombo " ] = { first = 0x11A50 , last = 0x11AAF , description = " Soyombo " } ,
495 [ " spacingmodifierletters " ] = { first = 0x002B0 , last = 0x002FF , description = " Spacing Modifier Letters " } ,
496 [ " specials " ] = { first = 0x0FFF0 , last = 0x0FFFF , description = " Specials " } ,
497 [ " sundanese " ] = { first = 0x01B80 , last = 0x01BBF , description = " Sundanese " } ,
498 [ " sundanesesupplement " ] = { first = 0x01CC0 , last = 0x01CCF , description = " Sundanese Supplement " } ,
499 [ " superscriptsandsubscripts " ] = { first = 0x02070 , last = 0x0209F , description = " Superscripts and Subscripts " } ,
500 [ " supplementalarrowsa " ] = { first = 0x027F0 , last = 0x027FF , math = true , description = " Supplemental Arrows-A " } ,
501 [ " supplementalarrowsb " ] = { first = 0x02900 , last = 0x0297F , math = true , description = " Supplemental Arrows-B " } ,
502 [ " supplementalarrowsc " ] = { first = 0x1F800 , last = 0x1F8FF , math = true , description = " Supplemental Arrows-C " } ,
503 [ " supplementalmathematicaloperators " ] = { first = 0x02A00 , last = 0x02AFF , math = true , description = " Supplemental Mathematical Operators " } ,
504 [ " supplementalpunctuation " ] = { first = 0x02E00 , last = 0x02E7F , description = " Supplemental Punctuation " } ,
505 [ " supplementalsymbolsandpictographs " ] = { first = 0x1F900 , last = 0x1F9FF , description = " Supplemental Symbols and Pictographs " } ,
506 [ " supplementaryprivateuseareaa " ] = { first = 0xF0000 , last = 0xFFFFF , description = " Supplementary Private Use Area-A " } ,
507 [ " supplementaryprivateuseareab " ] = { first = 0x100000 , last = 0x10FFFF , description = " Supplementary Private Use Area-B " } ,
508 [ " suttonsignwriting " ] = { first = 0x1D800 , last = 0x1DAAF , description = " Sutton SignWriting " } ,
509 [ " sylotinagri " ] = { first = 0x0A800 , last = 0x0A82F , otf = " sylo " , description = " Syloti Nagri " } ,
510 [ " symbolsandpictographsextendeda " ] = { first = 0x1FA70 , last = 0x1FAFF , description = " Symbols and Pictographs Extended-A " } ,
511 [ " symbolsforlegacycomputing " ] = { first = 0x1FB00 , last = 0x1FBFF , description = " Symbols for Legacy Computing " } ,
512 [ " syriac " ] = { first = 0x00700 , last = 0x0074F , otf = " syrc " , description = " Syriac " } ,
513 [ " syriacsupplement " ] = { first = 0x00860 , last = 0x0086F , description = " Syriac Supplement " } ,
514 [ " tagalog " ] = { first = 0x01700 , last = 0x0171F , otf = " tglg " , description = " Tagalog " } ,
515 [ " tagbanwa " ] = { first = 0x01760 , last = 0x0177F , otf = " tagb " , description = " Tagbanwa " } ,
516 [ " tags " ] = { first = 0xE0000 , last = 0xE007F , description = " Tags " } ,
517 [ " taile " ] = { first = 0x01950 , last = 0x0197F , otf = " tale " , description = " Tai Le " } ,
518 [ " taitham " ] = { first = 0x01A20 , last = 0x01AAF , description = " Tai Tham " } ,
519 [ " taiviet " ] = { first = 0x0AA80 , last = 0x0AADF , description = " Tai Viet " } ,
520 [ " taixuanjingsymbols " ] = { first = 0x1D300 , last = 0x1D35F , description = " Tai Xuan Jing Symbols " } ,
521 [ " takri " ] = { first = 0x11680 , last = 0x116CF , description = " Takri " } ,
522 [ " tamil " ] = { first = 0x00B80 , last = 0x00BFF , otf = " taml " , description = " Tamil " } ,
523 [ " tamilsupplement " ] = { first = 0x11FC0 , last = 0x11FFF , description = " Tamil Supplement " } ,
524 [ " tangut " ] = { first = 0x17000 , last = 0x187FF , description = " Tangut " } ,
525 [ " tangutsupplement " ] = { first = 0x18D00 , last = 0x18D8F , description = " Tangut Supplement " } ,
526 [ " tangutcomponents " ] = { first = 0x18800 , last = 0x18AFF , description = " Tangut Components " } ,
527 [ " telugu " ] = { first = 0x00C00 , last = 0x00C7F , otf = " telu " , description = " Telugu " } ,
528 [ " thaana " ] = { first = 0x00780 , last = 0x007BF , otf = " thaa " , description = " Thaana " } ,
529 [ " thai " ] = { first = 0x00E00 , last = 0x00E7F , otf = " thai " , description = " Thai " } ,
530 [ " tibetan " ] = { first = 0x00F00 , last = 0x00FFF , otf = " tibt " , description = " Tibetan " } ,
531 [ " tifinagh " ] = { first = 0x02D30 , last = 0x02D7F , otf = " tfng " , description = " Tifinagh " } ,
532 [ " tirhuta " ] = { first = 0x11480 , last = 0x114DF , description = " Tirhuta " } ,
533 [ " transportandmapsymbols " ] = { first = 0x1F680 , last = 0x1F6FF , description = " Transport and Map Symbols " } ,
534 [ " ugaritic " ] = { first = 0x10380 , last = 0x1039F , otf = " ugar " , description = " Ugaritic " } ,
535 [ " unifiedcanadianaboriginalsyllabics " ] = { first = 0x01400 , last = 0x0167F , otf = " cans " , description = " Unified Canadian Aboriginal Syllabics " } ,
536 [ " unifiedcanadianaboriginalsyllabicsextended " ] = { first = 0x018B0 , last = 0x018FF , description = " Unified Canadian Aboriginal Syllabics Extended " } ,
537 [ " uppercasebold " ] = { first = 0x1D400 , last = 0x1D419 , math = true } ,
538 [ " uppercaseboldfraktur " ] = { first = 0x1D56C , last = 0x1D585 , math = true } ,
539 [ " uppercasebolditalic " ] = { first = 0x1D468 , last = 0x1D481 , math = true } ,
540 [ " uppercaseboldscript " ] = { first = 0x1D4D0 , last = 0x1D4E9 , math = true } ,
541 [ " uppercasedoublestruck " ] = { first = 0x1D538 , last = 0x1D551 , math = true } ,
542 [ " uppercasefraktur " ] = { first = 0x1D504 , last = 0x1D51D , math = true } ,
543 [ " uppercasegreekbold " ] = { first = 0x1D6A8 , last = 0x1D6C1 , math = true } ,
544 [ " uppercasegreekbolditalic " ] = { first = 0x1D71C , last = 0x1D735 , math = true } ,
545 [ " uppercasegreekitalic " ] = { first = 0x1D6E2 , last = 0x1D6FB , math = true } ,
546 [ " uppercasegreeknormal " ] = { first = 0x00391 , last = 0x003AA , math = true } ,
547 [ " uppercasegreeksansserifbold " ] = { first = 0x1D756 , last = 0x1D76F , math = true } ,
548 [ " uppercasegreeksansserifbolditalic " ] = { first = 0x1D790 , last = 0x1D7A9 , math = true } ,
549 [ " uppercaseitalic " ] = { first = 0x1D434 , last = 0x1D44D , math = true } ,
550 [ " uppercasemonospace " ] = { first = 0x1D670 , last = 0x1D689 , math = true } ,
551 [ " uppercasenormal " ] = { first = 0x00041 , last = 0x0005A , math = true } ,
552 [ " uppercasesansserifbold " ] = { first = 0x1D5D4 , last = 0x1D5ED , math = true } ,
553 [ " uppercasesansserifbolditalic " ] = { first = 0x1D63C , last = 0x1D655 , math = true } ,
554 [ " uppercasesansserifitalic " ] = { first = 0x1D608 , last = 0x1D621 , math = true } ,
555 [ " uppercasesansserifnormal " ] = { first = 0x1D5A0 , last = 0x1D5B9 , math = true } ,
556 [ " uppercasescript " ] = { first = 0x1D49C , last = 0x1D4B5 , math = true } ,
557 [ " vai " ] = { first = 0x0A500 , last = 0x0A63F , description = " Vai " } ,
558 [ " variationselectors " ] = { first = 0x0FE00 , last = 0x0FE0F , description = " Variation Selectors " } ,
559 [ " variationselectorssupplement " ] = { first = 0xE0100 , last = 0xE01EF , description = " Variation Selectors Supplement " } ,
560 [ " vedicextensions " ] = { first = 0x01CD0 , last = 0x01CFF , description = " Vedic Extensions " } ,
561 [ " verticalforms " ] = { first = 0x0FE10 , last = 0x0FE1F , description = " Vertical Forms " } ,
562 [ " wancho " ] = { first = 0x1E2C0 , last = 0x1E2FF , description = " Wancho " } ,
563 [ " warangciti " ] = { first = 0x118A0 , last = 0x118FF , description = " Warang Citi " } ,
564 [ " yezidi " ] = { first = 0x10E80 , last = 0x10EBF , description = " Yezidi " } ,
565 [ " yijinghexagramsymbols " ] = { first = 0x04DC0 , last = 0x04DFF , otf = " yi " , description = " Yijing Hexagram Symbols " } ,
566 [ " yiradicals " ] = { first = 0x0A490 , last = 0x0A4CF , otf = " yi " , description = " Yi Radicals " } ,
567 [ " yisyllables " ] = { first = 0x0A000 , last = 0x0A48F , otf = " yi " , description = " Yi Syllables " } ,
568 [ " zanabazarsquare " ] = { first = 0x11A00 , last = 0x11A4F , description = " Zanabazar Square " } ,
569}
570
571
572
573
574
575
576
577
578blocks . lowercaseitalic . gaps = {
579 [ 0x1D455 ] = 0x0210E ,
580}
581
582blocks . uppercasescript . gaps = {
583 [ 0x1D49D ] = 0x0212C ,
584 [ 0x1D4A0 ] = 0x02130 ,
585 [ 0x1D4A1 ] = 0x02131 ,
586 [ 0x1D4A3 ] = 0x0210B ,
587 [ 0x1D4A4 ] = 0x02110 ,
588 [ 0x1D4A7 ] = 0x02112 ,
589 [ 0x1D4A8 ] = 0x02133 ,
590 [ 0x1D4AD ] = 0x0211B ,
591}
592
593blocks . lowercasescript . gaps = {
594 [ 0x1D4BA ] = 0x0212F ,
595 [ 0x1D4BC ] = 0x0210A ,
596 [ 0x1D4C4 ] = 0x02134 ,
597}
598
599blocks . uppercasefraktur . gaps = {
600 [ 0x1D506 ] = 0x0212D ,
601 [ 0x1D50B ] = 0x0210C ,
602 [ 0x1D50C ] = 0x02111 ,
603 [ 0x1D515 ] = 0x0211C ,
604 [ 0x1D51D ] = 0x02128 ,
605}
606
607blocks . uppercasedoublestruck . gaps = {
608 [ 0x1D53A ] = 0x02102 ,
609 [ 0x1D53F ] = 0x0210D ,
610 [ 0x1D545 ] = 0x02115 ,
611 [ 0x1D547 ] = 0x02119 ,
612 [ 0x1D548 ] = 0x0211A ,
613 [ 0x1D549 ] = 0x0211D ,
614 [ 0x1D551 ] = 0x02124 ,
615}
616
617characters . blocks = blocks
618
619function characters . blockrange ( name )
620 local b = blocks [ name ]
621 if b then
622 return b . first , b . last
623 else
624 return 0 , 0
625 end
626end
627
628setmetatableindex ( blocks , function ( t , k )
629 return k and rawget ( t , lower ( gsub ( k , " [^a-zA-Z] " , " " ) ) )
630end )
631
632local otfscripts = utilities . storage . allocate ( )
633characters . otfscripts = otfscripts
634
635setmetatableindex ( otfscripts , function ( t , unicode )
636 for k , v in next , blocks do
637 local first = v . first
638 local last = v . last
639 if unicode > = first and unicode < = last then
640 local script = v . otf or " dflt "
641 for u = first , last do
642 t [ u ] = script
643 end
644 return script
645 end
646 end
647
648 t [ unicode ] = " dflt "
649 return " dflt "
650end )
651
652local splitter1 = lpeg . splitat ( S ( " :- " ) )
653local splitter2 = lpeg . splitat ( S ( " +- " ) , true )
654
655function characters . getrange ( name , expression )
656 local range = rawget ( blocks , lower ( gsub ( name , " [^a-zA-Z0-9] " , " " ) ) )
657 if range then
658 return range . first , range . last , range . description , range . gaps
659 end
660 name = gsub ( name , ' " ' , " 0x " )
661 local start , stop
662 if expression then
663 local n = tonumber ( name )
664 if n then
665 return n , n , nil
666 else
667 local first , rest = lpegmatch ( splitter2 , name )
668 local range = rawget ( blocks , lower ( gsub ( first , " [^a-zA-Z0-9] " , " " ) ) )
669 if range then
670 local s = loadstring ( " return 0 " . . rest )
671 if type ( s ) = = " function " then
672 local d = s ( )
673 if type ( d ) = = " number " then
674 return range . first + d , range . last + d , nil
675 end
676 end
677 end
678 end
679 end
680 local start , stop = lpegmatch ( splitter1 , name )
681 if start and stop then
682 start = tonumber ( start , 16 ) or tonumber ( start )
683 stop = tonumber ( stop , 16 ) or tonumber ( stop )
684 if start and stop then
685 return start , stop , nil
686 end
687 end
688 local slot = tonumber ( name , 16 ) or tonumber ( name )
689 return slot , slot , nil
690end
691
692
693
694
695local categorytags = allocate {
696 lu = " Letter Uppercase " ,
697 ll = " Letter Lowercase " ,
698 lt = " Letter Titlecase " ,
699 lm = " Letter Modifier " ,
700 lo = " Letter Other " ,
701 mn = " Mark Nonspacing " ,
702 mc = " Mark Spacing Combining " ,
703 me = " Mark Enclosing " ,
704 nd = " Number Decimal Digit " ,
705 nl = " Number Letter " ,
706 no = " Number Other " ,
707 pc = " Punctuation Connector " ,
708 pd = " Punctuation Dash " ,
709 ps = " Punctuation Open " ,
710 pe = " Punctuation Close " ,
711 pi = " Punctuation Initial Quote " ,
712 pf = " Punctuation Final Quote " ,
713 po = " Punctuation Other " ,
714 sm = " Symbol Math " ,
715 sc = " Symbol Currency " ,
716 sk = " Symbol Modifier " ,
717 so = " Symbol Other " ,
718 zs = " Separator Space " ,
719 zl = " Separator Line " ,
720 zp = " Separator Paragraph " ,
721 cc = " Other Control " ,
722 cf = " Other Format " ,
723 cs = " Other Surrogate " ,
724 co = " Other Private Use " ,
725 cn = " Other Not Assigned " ,
726}
727
728local detailtags = allocate {
729 sl = " small letter " ,
730 bl = " big letter " ,
731 im = " iteration mark " ,
732 pm = " prolonged sound mark "
733}
734
735characters . categorytags = categorytags
736characters . detailtags = detailtags
737
738
739
740
741
742
743local is_character = allocate ( tohash {
744 " lu " , " ll " , " lt " , " lm " , " lo " ,
745 " nd " , " nl " , " no " ,
746 " mn " ,
747 " nl " , " no " ,
748 " pc " , " pd " , " ps " , " pe " , " pi " , " pf " , " po " ,
749 " sm " , " sc " , " sk " , " so "
750} )
751
752local is_letter = allocate ( tohash {
753 " ll " , " lm " , " lo " , " lt " , " lu "
754} )
755
756local is_command = allocate ( tohash {
757 " cf " , " zs "
758} )
759
760local is_spacing = allocate ( tohash {
761 " zs " , " zl " , " zp " ,
762} )
763
764local is_mark = allocate ( tohash {
765 " mn " , " ms " ,
766} )
767
768local is_punctuation = allocate ( tohash {
769 " pc " , " pd " , " ps " , " pe " , " pi " , " pf " , " po " ,
770} )
771
772local is_symbol = allocate ( tohash {
773 " sm " , " sc " , " sk " , " so " ,
774} )
775
776
777
778characters . is_character = is_character
779characters . is_letter = is_letter
780characters . is_command = is_command
781characters . is_spacing = is_spacing
782characters . is_mark = is_mark
783characters . is_punctuation = is_punctuation
784characters . is_symbol = is_symbol
785
786local mti = function ( t , k )
787 if type ( k ) = = " number " then
788 local c = data [ k ] . category
789 return c and rawget ( t , c )
790 else
791
792 end
793end
794
795setmetatableindex ( characters . is_character , mti )
796setmetatableindex ( characters . is_letter , mti )
797setmetatableindex ( characters . is_command , mti )
798setmetatableindex ( characters . is_spacing , mti )
799setmetatableindex ( characters . is_punctuation , mti )
800
801
802
803
804
805
806
807
808
809
810characters . linebreaks = allocate {
811
812
813
814 [ " bk " ] = " mandatory break " ,
815 [ " cr " ] = " carriage return " ,
816 [ " lf " ] = " line feed " ,
817 [ " cm " ] = " combining mark " ,
818 [ " nl " ] = " next line " ,
819 [ " sg " ] = " surrogate " ,
820 [ " wj " ] = " word joiner " ,
821 [ " zw " ] = " zero width space " ,
822 [ " gl " ] = " non-breaking (glue) " ,
823 [ " sp " ] = " space " ,
824 [ " zwj " ] = " zero width joiner " ,
825
826
827
828 [ " b2 " ] = " break opportunity before and after " ,
829 [ " ba " ] = " break after " ,
830 [ " bb " ] = " break before " ,
831 [ " hy " ] = " hyphen " ,
832 [ " cb " ] = " contingent break opportunity " ,
833
834
835
836 [ " cl " ] = " close punctuation " ,
837 [ " cp " ] = " close parenthesis " ,
838 [ " ex " ] = " exclamation/interrogation " ,
839 [ " in " ] = " inseparable " ,
840 [ " ns " ] = " nonstarter " ,
841 [ " op " ] = " open punctuation " ,
842 [ " qu " ] = " quotation " ,
843
844
845
846 [ " is " ] = " infix numeric separator " ,
847 [ " nu " ] = " numeric " ,
848 [ " po " ] = " postfix numeric " ,
849 [ " pr " ] = " prefix numeric " ,
850 [ " sy " ] = " symbols allowing break after " ,
851
852
853
854 [ " ai " ] = " ambiguous (alphabetic or ideographic) " ,
855 [ " al " ] = " alphabetic " ,
856 [ " cj " ] = " conditional japanese starter " ,
857 [ " eb " ] = " emoji base " ,
858 [ " em " ] = " emoji modifier " ,
859 [ " h2 " ] = " hangul lv syllable " ,
860 [ " h3 " ] = " hangul lvt syllable " ,
861 [ " hl " ] = " hebrew letter " ,
862 [ " id " ] = " ideographic " ,
863 [ " jl " ] = " hangul l jamo " ,
864 [ " jv " ] = " hangul v jamo " ,
865 [ " jt " ] = " hangul t jamo " ,
866 [ " ri " ] = " regional indicator " ,
867 [ " sa " ] = " complex context dependent (south east asian) " ,
868 [ " xx " ] = " unknown " ,
869
870}
871
872
873
874
875
876characters . bidi = allocate {
877 l = " Left-to-Right " ,
878 lre = " Left-to-Right Embedding " ,
879 lro = " Left-to-Right Override " ,
880 r = " Right-to-Left " ,
881 al = " Right-to-Left Arabic " ,
882 rle = " Right-to-Left Embedding " ,
883 rlo = " Right-to-Left Override " ,
884 pdf = " Pop Directional Format " ,
885 en = " European Number " ,
886 es = " European Number Separator " ,
887 et = " European Number Terminator " ,
888 an = " Arabic Number " ,
889 cs = " Common Number Separator " ,
890 nsm = " Non-Spacing Mark " ,
891 bn = " Boundary Neutral " ,
892 b = " Paragraph Separator " ,
893 s = " Segment Separator " ,
894 ws = " Whitespace " ,
895 on = " Other Neutrals " ,
896}
897
898
902
903if not characters . fallbacks then
904
905 characters . fallbacks = allocate {
906 [ 0x0308 ] = 0x00A8 , [ 0x00A8 ] = 0x0308 ,
907 [ 0x0304 ] = 0x00AF , [ 0x00AF ] = 0x0304 ,
908 [ 0x0301 ] = 0x00B4 , [ 0x00B4 ] = 0x0301 ,
909 [ 0x0327 ] = 0x00B8 , [ 0x00B8 ] = 0x0327 ,
910 [ 0x0302 ] = 0x02C6 , [ 0x02C6 ] = 0x0302 ,
911 [ 0x030C ] = 0x02C7 , [ 0x02C7 ] = 0x030C ,
912 [ 0x0306 ] = 0x02D8 , [ 0x02D8 ] = 0x0306 ,
913 [ 0x0307 ] = 0x02D9 , [ 0x02D9 ] = 0x0307 ,
914 [ 0x030A ] = 0x02DA , [ 0x02DA ] = 0x030A ,
915 [ 0x0328 ] = 0x02DB , [ 0x02DB ] = 0x0328 ,
916 [ 0x0303 ] = 0x02DC , [ 0x02DC ] = 0x0303 ,
917 [ 0x030B ] = 0x02DD , [ 0x02DD ] = 0x030B ,
918 [ 0x0305 ] = 0x203E , [ 0x203E ] = 0x0305 ,
919 [ 0x0300 ] = 0x0060 , [ 0x0060 ] = 0x0333 ,
920 }
921
922
923
924
925
926
927end
928
929if storage then
930 storage . register ( " characters/fallbacks " , characters . fallbacks , " characters.fallbacks " )
931end
932
933characters . directions = { }
934
935setmetatableindex ( characters . directions , function ( t , k )
936 local d = data [ k ]
937 if d then
938 local v = d . direction
939 if v then
940 t [ k ] = v
941 return v
942 end
943 end
944 t [ k ] = false
945 return false
946end )
947
948characters . mirrors = { }
949
950setmetatableindex ( characters . mirrors , function ( t , k )
951 local d = data [ k ]
952 if d then
953 local v = d . mirror
954 if v then
955 t [ k ] = v
956 return v
957 end
958 end
959 t [ k ] = false
960 return false
961end )
962
963characters . textclasses = { }
964
965setmetatableindex ( characters . textclasses , function ( t , k )
966 local d = data [ k ]
967 if d then
968 local v = d . textclass
969 if v then
970 t [ k ] = v
971 return v
972 end
973 end
974 t [ k ] = false
975 return false
976end )
977
978
982
983
984
985function characters . contextname ( n ) return data [ n ] and data [ n ] . contextname or " " end
986function characters . adobename ( n ) return data [ n ] and data [ n ] . adobename or " " end
987function characters . description ( n ) return data [ n ] and data [ n ] . description or " " end
988
989
990function characters . category ( n , verbose )
991 local c = data [ n ] . category
992 if not c then
993 return " "
994 elseif verbose then
995 return categorytags [ c ]
996 else
997 return c
998 end
999end
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011local function toutfstring ( s )
1012 if type ( s ) = = " table " then
1013 return utfchar ( unpack ( s ) )
1014 else
1015 return utfchar ( s )
1016 end
1017end
1018
1019utf . tostring = toutfstring
1020
1021local categories = allocate ( ) characters . categories = categories
1022
1023setmetatableindex ( categories , function ( t , u ) if u then local c = data [ u ] c = c and c . category or u t [ u ] = c return c end end )
1024
1025
1026
1027
1028local lccodes = allocate ( ) characters . lccodes = lccodes
1029local uccodes = allocate ( ) characters . uccodes = uccodes
1030local shcodes = allocate ( ) characters . shcodes = shcodes
1031local fscodes = allocate ( ) characters . fscodes = fscodes
1032
1033setmetatableindex ( lccodes , function ( t , u ) if u then local c = data [ u ] c = c and c . lccode or ( type ( u ) = = " string " and utfbyte ( u ) ) or u t [ u ] = c return c end end )
1034setmetatableindex ( uccodes , function ( t , u ) if u then local c = data [ u ] c = c and c . uccode or ( type ( u ) = = " string " and utfbyte ( u ) ) or u t [ u ] = c return c end end )
1035setmetatableindex ( shcodes , function ( t , u ) if u then local c = data [ u ] c = c and c . shcode or ( type ( u ) = = " string " and utfbyte ( u ) ) or u t [ u ] = c return c end end )
1036setmetatableindex ( fscodes , function ( t , u ) if u then local c = data [ u ] c = c and c . fscode or ( type ( u ) = = " string " and utfbyte ( u ) ) or u t [ u ] = c return c end end )
1037
1038local lcchars = allocate ( ) characters . lcchars = lcchars
1039local ucchars = allocate ( ) characters . ucchars = ucchars
1040local shchars = allocate ( ) characters . shchars = shchars
1041local fschars = allocate ( ) characters . fschars = fschars
1042
1043setmetatableindex ( lcchars , function ( t , u ) if u then local c = data [ u ] c = c and c . lccode c = c and toutfstring ( c ) or ( type ( u ) = = " number " and utfchar ( u ) ) or u t [ u ] = c return c end end )
1044setmetatableindex ( ucchars , function ( t , u ) if u then local c = data [ u ] c = c and c . uccode c = c and toutfstring ( c ) or ( type ( u ) = = " number " and utfchar ( u ) ) or u t [ u ] = c return c end end )
1045setmetatableindex ( shchars , function ( t , u ) if u then local c = data [ u ] c = c and c . shcode c = c and toutfstring ( c ) or ( type ( u ) = = " number " and utfchar ( u ) ) or u t [ u ] = c return c end end )
1046setmetatableindex ( fschars , function ( t , u ) if u then local c = data [ u ] c = c and c . fscode c = c and toutfstring ( c ) or ( type ( u ) = = " number " and utfchar ( u ) ) or u t [ u ] = c return c end end )
1047
1048local decomposed = allocate ( ) characters . decomposed = decomposed
1049local specials = allocate ( ) characters . specials = specials
1050
1051setmetatableindex ( decomposed , function ( t , u )
1052 if u then
1053 local c = data [ u ]
1054 local s = c and c . decomposed or false
1055 t [ u ] = s
1056 return s
1057 end
1058end )
1059
1060setmetatableindex ( specials , function ( t , u )
1061 if u then
1062 local c = data [ u ]
1063 local s = c and c . specials or false
1064 t [ u ] = s
1065 return s
1066 end
1067end )
1068
1069local specialchars = allocate ( ) characters . specialchars = specialchars
1070local descriptions = allocate ( ) characters . descriptions = descriptions
1071local synonyms = allocate ( ) characters . synonyms = synonyms
1072
1073setmetatableindex ( specialchars , function ( t , u )
1074 if u then
1075 local c = data [ u ]
1076 local s = c and c . specials
1077 if s then
1078 local tt = { }
1079 local ttn = 0
1080 for i = 2 , # s do
1081 local si = s [ i ]
1082 local c = data [ si ]
1083 if is_letter [ c . category ] then
1084 ttn = ttn + 1
1085 tt [ ttn ] = utfchar ( si )
1086 end
1087 end
1088 c = concat ( tt )
1089 t [ u ] = c
1090 return c
1091 else
1092 if type ( u ) = = " number " then
1093 u = utfchar ( u )
1094 end
1095 t [ u ] = u
1096 return u
1097 end
1098 end
1099end )
1100
1101setmetatableindex ( descriptions , function ( t , k )
1102
1103 for u , c in next , data do
1104 local d = c . description
1105 if d then
1106 if find ( d , " " , 1 , true ) then
1107 d = gsub ( d , " " , " " )
1108 end
1109 d = lower ( d )
1110 t [ d ] = u
1111 end
1112 end
1113 local d = rawget ( t , k )
1114 if not d then
1115 t [ k ] = k
1116 end
1117 return d
1118end )
1119
1120setmetatableindex ( synonyms , function ( t , k )
1121 for u , c in next , data do
1122 local s = c . synonyms
1123 if s then
1124 if find ( s , " " , 1 , true ) then
1125 s = gsub ( s , " " , " " )
1126 end
1127
1128 t [ s ] = u
1129 end
1130 end
1131 local s = rawget ( t , k )
1132 if not s then
1133 t [ s ] = s
1134 end
1135 return s
1136end )
1137
1138function characters . unicodechar ( asked )
1139 local n = tonumber ( asked )
1140 if n then
1141 return n
1142 elseif type ( asked ) = = " string " then
1143 return descriptions [ asked ] or descriptions [ gsub ( asked , " " , " " ) ]
1144 end
1145end
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178local tolower = Cs ( ( utf8character / lcchars ) ^ 0 )
1179local toupper = Cs ( ( utf8character / ucchars ) ^ 0 )
1180local toshape = Cs ( ( utf8character / shchars ) ^ 0 )
1181
1182lpegpatterns . tolower = tolower
1183lpegpatterns . toupper = toupper
1184lpegpatterns . toshape = toshape
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200if not characters . splits then
1201
1202 local char = allocate ( )
1203 local compat = allocate ( )
1204
1205 local splits = {
1206 char = char ,
1207 compat = compat ,
1208 }
1209
1210 characters . splits = splits
1211
1212
1213
1214
1215 for unicode , data in next , characters . data do
1216 local specials = data . specials
1217 if specials and # specials > 2 then
1218 local kind = specials [ 1 ]
1219 if kind = = " compat " then
1220 compat [ unicode ] = { unpack ( specials , 2 ) }
1221 elseif kind = = " char " then
1222 char [ unicode ] = { unpack ( specials , 2 ) }
1223 end
1224 end
1225 end
1226
1227 if storage then
1228 storage . register ( " characters/splits " , splits , " characters.splits " )
1229 end
1230
1231end
1232
1233if not characters . lhash then
1234
1235 local lhash = allocate ( ) characters . lhash = lhash
1236 local uhash = allocate ( ) characters . uhash = uhash
1237 local shash = allocate ( ) characters . shash = shash
1238
1239 for k , v in next , characters . data do
1240
1241 local l = v . lccode
1242 if l then
1243
1244 if type ( l ) = = " number " then
1245 lhash [ utfchar ( k ) ] = utfchar ( l )
1246 elseif # l = = 2 then
1247 lhash [ utfchar ( k ) ] = utfchar ( l [ 1 ] ) . . utfchar ( l [ 2 ] )
1248
1249
1250 end
1251 else
1252 local u = v . uccode
1253 if u then
1254
1255 if type ( u ) = = " number " then
1256 uhash [ utfchar ( k ) ] = utfchar ( u )
1257 elseif # u = = 2 then
1258 uhash [ utfchar ( k ) ] = utfchar ( u [ 1 ] ) . . utfchar ( u [ 2 ] )
1259
1260
1261 end
1262 end
1263 end
1264 local s = v . shcode
1265 if s then
1266 if type ( s ) = = " number " then
1267 shash [ utfchar ( k ) ] = utfchar ( s )
1268 elseif # s = = 2 then
1269 shash [ utfchar ( k ) ] = utfchar ( s [ 1 ] ) . . utfchar ( s [ 2 ] )
1270
1271
1272 end
1273 end
1274
1275 end
1276
1277 if storage then
1278 storage . register ( " characters/lhash " , lhash , " characters.lhash " )
1279 storage . register ( " characters/uhash " , uhash , " characters.uhash " )
1280 storage . register ( " characters/shash " , shash , " characters.shash " )
1281 end
1282
1283end
1284
1285local lhash = characters . lhash mark ( lhash )
1286local uhash = characters . uhash mark ( uhash )
1287local shash = characters . shash mark ( shash )
1288
1289local utf8lowercharacter = utfchartabletopattern ( lhash ) / lhash
1290local utf8uppercharacter = utfchartabletopattern ( uhash ) / uhash
1291local utf8shapecharacter = utfchartabletopattern ( shash ) / shash
1292
1293local utf8lower = Cs ( ( utf8lowercharacter + utf8character ) ^ 0 )
1294local utf8upper = Cs ( ( utf8uppercharacter + utf8character ) ^ 0 )
1295local utf8shape = Cs ( ( utf8shapecharacter + utf8character ) ^ 0 )
1296
1297lpegpatterns . utf8lowercharacter = utf8lowercharacter
1298lpegpatterns . utf8uppercharacter = utf8uppercharacter
1299lpegpatterns . utf8shapecharacter = utf8shapecharacter
1300
1301lpegpatterns . utf8lower = utf8lower
1302lpegpatterns . utf8upper = utf8upper
1303lpegpatterns . utf8shape = utf8shape
1304
1305function characters . lower ( str ) return str and lpegmatch ( utf8lower , str ) or " " end
1306function characters . upper ( str ) return str and lpegmatch ( utf8upper , str ) or " " end
1307function characters . shaped ( str ) return str and lpegmatch ( utf8shape , str ) or " " end
1308
1309lpeg . setutfcasers ( characters . lower , characters . upper )
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345function characters . lettered ( str , spacing )
1346 local new , n = { } , 0
1347 if spacing then
1348 local done = false
1349 for u in utfvalues ( str ) do
1350 local c = data [ u ] . category
1351 if is_letter [ c ] then
1352 if done and n > 1 then
1353 n = n + 1
1354 new [ n ] = " "
1355 done = false
1356 end
1357 n = n + 1
1358 new [ n ] = utfchar ( u )
1359 elseif spacing and is_spacing [ c ] then
1360 done = true
1361 end
1362 end
1363 else
1364 for u in utfvalues ( str ) do
1365 if is_letter [ data [ u ] . category ] then
1366 n = n + 1
1367 new [ n ] = utfchar ( u )
1368 end
1369 end
1370 end
1371 return concat ( new )
1372end
1373
1374
1377
1378function characters . uccode ( n ) return uccodes [ n ] end
1379function characters . lccode ( n ) return lccodes [ n ] end
1380
1381function characters . shape ( n )
1382 local shcode = shcodes [ n ]
1383 if not shcode then
1384 return n , nil
1385 elseif type ( shcode ) = = " table " then
1386 return shcode [ 1 ] , shcode [ # shcode ]
1387 else
1388 return shcode , nil
1389 end
1390end
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466if not characters . superscripts then
1467
1468 local superscripts = allocate ( ) characters . superscripts = superscripts
1469 local subscripts = allocate ( ) characters . subscripts = subscripts
1470 local fractions = allocate ( ) characters . fractions = fractions
1471
1472
1473
1474 for k , v in next , data do
1475 local specials = v . specials
1476 if specials then
1477 local what = specials [ 1 ]
1478 if what = = " super " then
1479 if # specials = = 2 then
1480 superscripts [ k ] = specials [ 2 ]
1481 elseif trace_defining then
1482 report_defining ( " ignoring %s %a, char %c, description %a " , " superscript " , ustring ( k ) , k , v . description )
1483 end
1484 elseif what = = " sub " then
1485 if # specials = = 2 then
1486 subscripts [ k ] = specials [ 2 ]
1487 elseif trace_defining then
1488 report_defining ( " ignoring %s %a, char %c, description %a " , " subscript " , ustring ( k ) , k , v . description )
1489 end
1490 elseif what = = " fraction " then
1491 if # specials > 1 then
1492 fractions [ k ] = { unpack ( specials , 2 ) }
1493 elseif trace_defining then
1494 report_defining ( " ignoring %s %a, char %c, description %a " , " fraction " , ustring ( k ) , k , v . description )
1495 end
1496 end
1497 end
1498 end
1499
1500
1501
1502
1503
1504 if storage then
1505 storage . register ( " characters/superscripts " , superscripts , " characters.superscripts " )
1506 storage . register ( " characters/subscripts " , subscripts , " characters.subscripts " )
1507 storage . register ( " characters/fractions " , fractions , " characters.fractions " )
1508 end
1509
1510end
1511
1512function characters . showstring ( str )
1513 local list = utotable ( str )
1514 for i = 1 , # list do
1515 report_defining ( " split % 3i : %C " , i , list [ i ] )
1516 end
1517end
1518
1519do
1520
1521
1522
1523 local any = P ( 1 )
1524 local special = S ( [[ '".,:;-+() ]] )
1525 + P ( ' “ ' ) + P ( ' ” ' )
1526 local apostrofe = P ( " ’ " ) + P ( " ' " )
1527
1528 local pattern = Cs ( (
1529 ( P ( " medium light " ) / " medium-light " + P ( " medium dark " ) / " medium-dark " ) * P ( " skin tone " )
1530 + ( apostrofe * P ( " s " ) ) / " "
1531 + special / " "
1532 + any
1533 ) ^ 1 )
1534
1535 local function load ( )
1536 local name = resolvers . findfile ( " char-emj.lua " )
1537 local data = name and name ~ = " " and dofile ( name ) or { }
1538 local hash = { }
1539 for d , c in next , data do
1540 local k = lpegmatch ( pattern , d ) or d
1541 local u = { }
1542 for i = 1 , # c do
1543 u [ i ] = utfchar ( c [ i ] )
1544 end
1545 u = concat ( u )
1546 hash [ k ] = u
1547 end
1548 return data , hash
1549 end
1550
1551 local data , hash = nil , nil
1552
1553 function characters . emojized ( name )
1554 local t = lpegmatch ( pattern , name )
1555 if t then
1556 return t
1557 else
1558 return { name }
1559 end
1560 end
1561
1562 local start = P ( " " )
1563 local finish = P ( -1 ) + P ( " " )
1564 local skintone = P ( " medium " ) ^ 0 * ( P ( " light " ) + P ( " dark " ) ) ^ 0 * P ( " skin tone " )
1565 local gender = P ( " woman " ) + P ( " man " )
1566 local expanded = (
1567 P ( " m-l- " ) / " medium-light "
1568 + P ( " m-d- " ) / " medium-dark "
1569 + P ( " l- " ) / " light "
1570 + P ( " m- " ) / " medium "
1571 + P ( " d- " ) / " dark "
1572 )
1573 * ( P ( " s-t " ) / " skin tone " )
1574 local compacted = (
1575 ( P ( " medium- " ) / " m- " * ( P ( " light " ) / " l " + P ( " dark " ) / " d " ) )
1576 + ( P ( " medium " ) / " m " + P ( " light " ) / " l " + P ( " dark " ) / " d " )
1577 )
1578 * ( P ( " skin tone " ) / " -s-t " )
1579
1580 local pattern_0 = Cs ( ( expanded + any ) ^ 1 )
1581 local pattern_1 = Cs ( ( ( start * skintone + skintone * finish ) / " " + any ) ^ 1 )
1582 local pattern_2 = Cs ( ( ( start * gender + gender * finish ) / " " + any ) ^ 1 )
1583 local pattern_4 = Cs ( ( compacted + any ) ^ 1 )
1584
1585
1586
1587
1588
1589
1590
1591 local skin =
1592 P ( " light skin tone " ) / utfchar ( 0x1F3FB )
1593 + P ( " medium-light skin tone " ) / utfchar ( 0x1F3FC )
1594 + P ( " medium skin tone " ) / utfchar ( 0x1F3FD )
1595 + P ( " medium-dark skin tone " ) / utfchar ( 0x1F3FE )
1596 + P ( " dark skin tone " ) / utfchar ( 0x1F3FF )
1597
1598 local parent =
1599 P ( " man " ) / utfchar ( 0x1F468 )
1600 + P ( " woman " ) / utfchar ( 0x1F469 )
1601
1602 local child =
1603 P ( " baby " ) / utfchar ( 0x1F476 )
1604 + P ( " boy " ) / utfchar ( 0x1F466 )
1605 + P ( " girl " ) / utfchar ( 0x1F467 )
1606
1607 local zwj = utfchar ( 0x200D )
1608 local heart = utfchar ( 0x2764 ) . . utfchar ( 0xFE0F ) . . zwj
1609 local kiss = utfchar ( 0x2764 ) . . utfchar ( 0xFE0F ) . . utfchar ( 0x200D ) . . utfchar ( 0x1F48B ) . . zwj
1610
1611
1612
1613 local space = P ( " " )
1614 local final = P ( -1 )
1615
1616 local p_done = ( space ^ 1 / zwj ) + P ( -1 )
1617 local p_rest = space / " " * ( skin * p_done ) + p_done
1618 local p_parent = parent * p_rest
1619 local p_child = child * p_rest
1620
1621 local p_family = Cs ( ( P ( " family " ) * space ^ 1 ) / " " * p_parent ^ -2 * p_child ^ -2 )
1622 local p_couple = Cs ( ( P ( " couple with heart " ) * space ^ 1 ) / " " * p_parent * Cc ( heart ) * p_parent )
1623 local p_kiss = Cs ( ( P ( " kiss " ) * space ^ 1 ) / " " * p_parent * Cc ( kiss ) * p_parent )
1624
1625 local p_special = p_family + p_couple + p_kiss
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640 local emoji = { }
1641 characters . emoji = emoji
1642
1643local cache = setmetatable ( { } , { __mode = " k " } )
1644
1645 function emoji . resolve ( name )
1646 if not hash then
1647 data , hash = load ( )
1648 end
1649 local h = hash [ name ]
1650 if h then
1651 return h
1652 end
1653 local h = cache [ name ]
1654 if h then
1655 return h
1656 elseif h = = false then
1657 return
1658 end
1659
1660 local name = lpegmatch ( pattern_0 , name ) or name
1661
1662 local h = lpegmatch ( p_special , name )
1663 if h then
1664 cache [ name ] = h
1665 return h
1666 end
1667
1668 local s = lpegmatch ( pattern_1 , name )
1669 local h = hash [ s ]
1670 if h then
1671 cache [ name ] = h
1672 return h
1673 end
1674
1675 local s = lpegmatch ( pattern_2 , name )
1676 local h = hash [ s ]
1677 if h then
1678 cache [ name ] = h
1679 return h
1680 end
1681 cache [ name ] = false
1682 end
1683
1684 function emoji . known ( )
1685 if not hash then
1686 data , hash = load ( )
1687 end
1688 return hash , data
1689 end
1690
1691 function emoji . compact ( name )
1692 return lpegmatch ( pattern_4 , name ) or name
1693 end
1694
1695end
1696
1697
1698
1699return characters
1700 |