1if not modules then modules = { } end modules ['char-ini'] = {
2 version = 1.001,
3 comment = "companion to char-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14local utfchar, utfbyte, utfvalues, ustring, utotable = utf.char, utf.byte, utf.values, utf.ustring, utf.totable
15local concat, unpack, tohash, insert = table.concat, table.unpack, table.tohash, table.insert
16local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
17local format, lower, gsub, find = string.format, string.lower, string.gsub, string.find
18local P, R, S, C, Cs, Ct, Cc, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc, lpeg.V
19local formatters = string.formatters
20
21if not characters then require("char-def") end
22
23local lpegpatterns = lpeg.patterns
24local lpegmatch = lpeg.match
25local utf8byte = lpegpatterns.utf8byte
26local utf8character = lpegpatterns.utf8character
27
28local utfchartabletopattern = lpeg.utfchartabletopattern
29
30local allocate = utilities.storage.allocate
31local mark = utilities.storage.mark
32
33local setmetatableindex = table.setmetatableindex
34
35local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
36
37local report_defining = logs.reporter("characters")
38
39
40
41
42
43
44
45
46
47
48
49
50characters = characters or { }
51local characters = characters
52local data = characters.data
53
54if data then
55 mark(data)
56else
57 report_defining("fatal error: 'char-def.lua' is not loaded")
58 os.exit()
59end
60
61
62
63if context and CONTEXTLMTXMODE == 0 then
64
65 if not characters.private then
66
67 require("char-prv")
68
69 if storage then
70 storage.register("characters/private", characters.private, "characters.private")
71 end
72
73 end
74
75 for unicode, d in next, characters.private do
76 data[unicode] = d
77 end
78
79end
80
81
82
83local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end)
84
85lpegpatterns.chartonumber = pattern
86
87local function chartonumber(k)
88 if type(k) == "string" then
89 local u = lpegmatch(pattern,k)
90 if u then
91 return utfbyte(u)
92 else
93 return utfbyte(k) or 0
94 end
95 else
96 return k or 0
97 end
98end
99
100local function charfromnumber(k)
101 if type(k) == "number" then
102 return utfchar(k) or ""
103 else
104 local u = lpegmatch(pattern,k)
105 if u then
106 return utfchar(u)
107 else
108 return k
109 end
110 end
111end
112
113
114
115characters.tonumber = chartonumber
116characters.fromnumber = charfromnumber
117
118local private = {
119 description = "PRIVATE SLOT",
120}
121
122local ranges = allocate()
123characters.ranges = ranges
124
125setmetatableindex(data, function(t,k)
126 local tk = type(k)
127 if tk == "string" then
128 k = lpegmatch(pattern,k) or utfbyte(k)
129 if k then
130 local v = rawget(t,k)
131 if v then
132 return v
133 else
134 tk = "number"
135 end
136 else
137 return private
138 end
139 end
140 if tk == "number" and k < 0xF0000 then
141 for r=1,#ranges do
142 local rr = ranges[r]
143 if k >= rr.first and k <= rr.last then
144 local extender = rr.extender
145 if extender then
146 local v = extender(k)
147 t[k] = v
148 return v
149 end
150 end
151 end
152 end
153 return private
154end)
155
156local variant_selector_metatable = {
157 category = "mn",
158 cjkwd = "a",
159 direction = "nsm",
160 linebreak = "cm",
161}
162
163
164
165local f_variant = string.formatters["VARIATION SELECTOR-0x%04X"]
166
167insert(characters.ranges,{
168 first = 0xFE00,
169 last = 0xFE0F,
170 name = "variant selector",
171 extender = function(k)
172 local t = {
173 description = f_variant(k - 0xFE00 + 0x0001),
174 unicodeslot = k,
175 }
176 setmetatable(t,variant_selector_metatable)
177 return t
178 end,
179})
180
181insert(characters.ranges,{
182 first = 0xE0100,
183 last = 0xE01EF,
184 name = "variant selector extension",
185 extender = function(k)
186 local t = {
187 description = f_variant(k - 0xE0100 + 0x0011),
188 unicodeslot = k,
189 }
190 setmetatable(t,variant_selector_metatable)
191 return t
192 end,
193})
194
195local blocks = allocate {
196 ["adlam"] = { first = 0x1E900, last = 0x1E95F, description = "Adlam" },
197 ["aegeannumbers"] = { first = 0x10100, last = 0x1013F, description = "Aegean Numbers" },
198 ["ahom"] = { first = 0x11700, last = 0x1174F, description = "Ahom" },
199 ["alchemicalsymbols"] = { first = 0x1F700, last = 0x1F77F, description = "Alchemical Symbols" },
200 ["alphabeticpresentationforms"] = { first = 0x0FB00, last = 0x0FB4F, otf="latn", description = "Alphabetic Presentation Forms" },
201 ["anatolianhieroglyphs"] = { first = 0x14400, last = 0x1467F, description = "Anatolian Hieroglyphs" },
202 ["ancientgreekmusicalnotation"] = { first = 0x1D200, last = 0x1D24F, otf="grek", description = "Ancient Greek Musical Notation" },
203 ["ancientgreeknumbers"] = { first = 0x10140, last = 0x1018F, otf="grek", description = "Ancient Greek Numbers" },
204 ["ancientsymbols"] = { first = 0x10190, last = 0x101CF, otf="grek", description = "Ancient Symbols" },
205 ["arabic"] = { first = 0x00600, last = 0x006FF, otf="arab", description = "Arabic" },
206 ["arabicextendeda"] = { first = 0x008A0, last = 0x008FF, description = "Arabic Extended-A" },
207 ["arabicextendedb"] = { first = 0x00870, last = 0x0089F, description = "Arabic Extended-B" },
208 ["arabicextendedc"] = { first = 0x10EC0, last = 0x10EFF, description = "Arabic Extended-C" },
209 ["arabicmathematicalalphabeticsymbols"] = { first = 0x1EE00, last = 0x1EEFF, description = "Arabic Mathematical Alphabetic Symbols" },
210 ["arabicpresentationformsa"] = { first = 0x0FB50, last = 0x0FDFF, otf="arab", description = "Arabic Presentation Forms-A" },
211 ["arabicpresentationformsb"] = { first = 0x0FE70, last = 0x0FEFF, otf="arab", description = "Arabic Presentation Forms-B" },
212 ["arabicsupplement"] = { first = 0x00750, last = 0x0077F, otf="arab", description = "Arabic Supplement" },
213 ["armenian"] = { first = 0x00530, last = 0x0058F, otf="armn", description = "Armenian" },
214 ["arrows"] = { first = 0x02190, last = 0x021FF, description = "Arrows" },
215 ["avestan"] = { first = 0x10B00, last = 0x10B3F, description = "Avestan" },
216 ["balinese"] = { first = 0x01B00, last = 0x01B7F, otf="bali", description = "Balinese" },
217 ["bamum"] = { first = 0x0A6A0, last = 0x0A6FF, description = "Bamum" },
218 ["bamumsupplement"] = { first = 0x16800, last = 0x16A3F, description = "Bamum Supplement" },
219 ["basiclatin"] = { first = 0x00000, last = 0x0007F, otf="latn", description = "Basic Latin" },
220 ["bassavah"] = { first = 0x16AD0, last = 0x16AFF, description = "Bassa Vah" },
221 ["batak"] = { first = 0x01BC0, last = 0x01BFF, description = "Batak" },
222 ["bengali"] = { first = 0x00980, last = 0x009FF, otf="beng", description = "Bengali" },
223 ["bhaiksuki"] = { first = 0x11C00, last = 0x11C6F, description = "Bhaiksuki" },
224 ["blockelements"] = { first = 0x02580, last = 0x0259F, otf="bopo", description = "Block Elements" },
225 ["bopomofo"] = { first = 0x03100, last = 0x0312F, otf="bopo", description = "Bopomofo" },
226 ["bopomofoextended"] = { first = 0x031A0, last = 0x031BF, otf="bopo", description = "Bopomofo Extended" },
227 ["boxdrawing"] = { first = 0x02500, last = 0x0257F, description = "Box Drawing" },
228 ["brahmi"] = { first = 0x11000, last = 0x1107F, description = "Brahmi" },
229 ["braillepatterns"] = { first = 0x02800, last = 0x028FF, otf="brai", description = "Braille Patterns" },
230 ["buginese"] = { first = 0x01A00, last = 0x01A1F, otf="bugi", description = "Buginese" },
231 ["buhid"] = { first = 0x01740, last = 0x0175F, otf="buhd", description = "Buhid" },
232 ["byzantinemusicalsymbols"] = { first = 0x1D000, last = 0x1D0FF, otf="byzm", description = "Byzantine Musical Symbols" },
233 ["carian"] = { first = 0x102A0, last = 0x102DF, description = "Carian" },
234 ["caucasianalbanian"] = { first = 0x10530, last = 0x1056F, description = "Caucasian Albanian" },
235 ["chakma"] = { first = 0x11100, last = 0x1114F, description = "Chakma" },
236 ["cham"] = { first = 0x0AA00, last = 0x0AA5F, description = "Cham" },
237 ["cherokee"] = { first = 0x013A0, last = 0x013FF, otf="cher", description = "Cherokee" },
238 ["cherokeesupplement"] = { first = 0x0AB70, last = 0x0ABBF, description = "Cherokee Supplement" },
239 ["chesssymbols"] = { first = 0x1FA00, last = 0x1FA6F, description = "Chess Symbols" },
240 ["chorasmian"] = { first = 0x10FB0, last = 0x10FDF, description = "Chorasmian" },
241 ["cjkcompatibility"] = { first = 0x03300, last = 0x033FF, otf="hang", description = "CJK Compatibility" },
242 ["cjkcompatibilityforms"] = { first = 0x0FE30, last = 0x0FE4F, otf="hang", description = "CJK Compatibility Forms" },
243 ["cjkcompatibilityideographs"] = { first = 0x0F900, last = 0x0FAFF, otf="hang", description = "CJK Compatibility Ideographs" },
244 ["cjkcompatibilityideographssupplement"] = { first = 0x2F800, last = 0x2FA1F, otf="hang", description = "CJK Compatibility Ideographs Supplement" },
245 ["cjkradicalssupplement"] = { first = 0x02E80, last = 0x02EFF, otf="hang", description = "CJK Radicals Supplement" },
246 ["cjkstrokes"] = { first = 0x031C0, last = 0x031EF, otf="hang", description = "CJK Strokes" },
247 ["cjksymbolsandpunctuation"] = { first = 0x03000, last = 0x0303F, otf="hang", description = "CJK Symbols and Punctuation" },
248 ["cjkunifiedideographs"] = { first = 0x04E00, last = 0x09FFF, otf="hang", description = "CJK Unified Ideographs", catcode = "letter" },
249 ["cjkunifiedideographsextensiona"] = { first = 0x03400, last = 0x04DBF, otf="hang", description = "CJK Unified Ideographs Extension A" },
250 ["cjkunifiedideographsextensionb"] = { first = 0x20000, last = 0x2A6DF, otf="hang", description = "CJK Unified Ideographs Extension B" },
251 ["cjkunifiedideographsextensionc"] = { first = 0x2A700, last = 0x2B73F, description = "CJK Unified Ideographs Extension C" },
252 ["cjkunifiedideographsextensiond"] = { first = 0x2B740, last = 0x2B81F, description = "CJK Unified Ideographs Extension D" },
253 ["cjkunifiedideographsextensione"] = { first = 0x2B820, last = 0x2CEAF, description = "CJK Unified Ideographs Extension E" },
254 ["cjkunifiedideographsextensionf"] = { first = 0x2CEB0, last = 0x2EBEF, description = "CJK Unified Ideographs Extension F" },
255 ["cjkunifiedideographsextensiong"] = { first = 0x30000, last = 0x3134F, description = "CJK Unified Ideographs Extension G" },
256 ["cjkunifiedideographsextensionh"] = { first = 0x31350, last = 0x323AF, description = "CJK Unified Ideographs Extension H" },
257 ["cjkunifiedideographsextensioni"] = { first = 0x2EBF0, last = 0x2EE5F, description = "CJK Unified Ideographs Extension I" },
258 ["combiningdiacriticalmarks"] = { first = 0x00300, last = 0x0036F, description = "Combining Diacritical Marks" },
259 ["combiningdiacriticalmarksextended"] = { first = 0x01AB0, last = 0x01AFF, description = "Combining Diacritical Marks Extended" },
260 ["combiningdiacriticalmarksforsymbols"] = { first = 0x020D0, last = 0x020FF, description = "Combining Diacritical Marks for Symbols" },
261 ["combiningdiacriticalmarkssupplement"] = { first = 0x01DC0, last = 0x01DFF, description = "Combining Diacritical Marks Supplement" },
262 ["combininghalfmarks"] = { first = 0x0FE20, last = 0x0FE2F, description = "Combining Half Marks" },
263 ["commonindicnumberforms"] = { first = 0x0A830, last = 0x0A83F, description = "Common Indic Number Forms" },
264 ["controlpictures"] = { first = 0x02400, last = 0x0243F, description = "Control Pictures" },
265 ["coptic"] = { first = 0x02C80, last = 0x02CFF, otf="copt", description = "Coptic" },
266 ["copticepactnumbers"] = { first = 0x102E0, last = 0x102FF, description = "Coptic Epact Numbers" },
267 ["countingrodnumerals"] = { first = 0x1D360, last = 0x1D37F, description = "Counting Rod Numerals" },
268 ["cuneiform"] = { first = 0x12000, last = 0x123FF, otf="xsux", description = "Cuneiform" },
269 ["cuneiformnumbersandpunctuation"] = { first = 0x12400, last = 0x1247F, otf="xsux", description = "Cuneiform Numbers and Punctuation" },
270 ["currencysymbols"] = { first = 0x020A0, last = 0x020CF, description = "Currency Symbols" },
271 ["cypriotsyllabary"] = { first = 0x10800, last = 0x1083F, otf="cprt", description = "Cypriot Syllabary" },
272 ["cyprominoan"] = { first = 0x12F90, last = 0x12FFF, description = "Cypro-Minoan" },
273 ["cyrillic"] = { first = 0x00400, last = 0x004FF, otf="cyrl", description = "Cyrillic" },
274 ["cyrillicextendeda"] = { first = 0x02DE0, last = 0x02DFF, otf="cyrl", description = "Cyrillic Extended-A" },
275 ["cyrillicextendedb"] = { first = 0x0A640, last = 0x0A69F, otf="cyrl", description = "Cyrillic Extended-B" },
276 ["cyrillicextendedc"] = { first = 0x01C80, last = 0x01C8F, description = "Cyrillic Extended-C" },
277 ["cyrillicextendedd"] = { first = 0x1E030, last = 0x1E08F, description = "Cyrillic Extended-D" },
278 ["cyrillicsupplement"] = { first = 0x00500, last = 0x0052F, otf="cyrl", description = "Cyrillic Supplement" },
279 ["deseret"] = { first = 0x10400, last = 0x1044F, otf="dsrt", description = "Deseret" },
280 ["devanagari"] = { first = 0x00900, last = 0x0097F, otf="deva", description = "Devanagari" },
281 ["devanagariextended"] = { first = 0x0A8E0, last = 0x0A8FF, description = "Devanagari Extended" },
282 ["devanagariextendeda"] = { first = 0x11B00, last = 0x11B5F, description = "Devanagari Extended-A" },
283 ["digitsarabicindic"] = { first = 0x00660, last = 0x00669, math = true },
284
285 ["digitsbold"] = { first = 0x1D7CE, last = 0x1D7D7, math = true },
286
287 ["digitsdoublestruck"] = { first = 0x1D7D8, last = 0x1D7E1, math = true },
288
289 ["digitsextendedarabicindic"] = { first = 0x006F0, last = 0x006F9, math = true },
290
291
292
293
294
295 ["digitslatin"] = { first = 0x00030, last = 0x00039, math = true },
296
297
298 ["digitsmonospace"] = { first = 0x1D7F6, last = 0x1D7FF, math = true },
299
300 ["digitsnormal"] = { first = 0x00030, last = 0x00039, math = true },
301
302 ["digitssansserifbold"] = { first = 0x1D7EC, last = 0x1D7F5, math = true },
303 ["digitssansserifnormal"] = { first = 0x1D7E2, last = 0x1D7EB, math = true },
304
305
306
307
308 ["dingbats"] = { first = 0x02700, last = 0x027BF, description = "Dingbats" },
309 ["divesakuru"] = { first = 0x11900, last = 0x1195F, description = "Dives Akuru" },
310 ["dogra"] = { first = 0x11800, last = 0x1184F, description = "Dogra" },
311 ["dominotiles"] = { first = 0x1F030, last = 0x1F09F, description = "Domino Tiles" },
312 ["duployan"] = { first = 0x1BC00, last = 0x1BC9F, description = "Duployan" },
313 ["earlydynasticcuneiform"] = { first = 0x12480, last = 0x1254F, description = "Early Dynastic Cuneiform" },
314 ["egyptianhieroglyphformatcontrols"] = { first = 0x13430, last = 0x1345F, description = "Egyptian Hieroglyph Format Controls" },
315 ["egyptianhieroglyphs"] = { first = 0x13000, last = 0x1342F, description = "Egyptian Hieroglyphs" },
316 ["elbasan"] = { first = 0x10500, last = 0x1052F, description = "Elbasan" },
317 ["elymaic"] = { first = 0x10FE0, last = 0x10FFF, description = "Elymaic" },
318 ["emoticons"] = { first = 0x1F600, last = 0x1F64F, description = "Emoticons" },
319 ["enclosedalphanumerics"] = { first = 0x02460, last = 0x024FF, description = "Enclosed Alphanumerics" },
320 ["enclosedalphanumericsupplement"] = { first = 0x1F100, last = 0x1F1FF, description = "Enclosed Alphanumeric Supplement" },
321 ["enclosedcjklettersandmonths"] = { first = 0x03200, last = 0x032FF, description = "Enclosed CJK Letters and Months" },
322 ["enclosedideographicsupplement"] = { first = 0x1F200, last = 0x1F2FF, description = "Enclosed Ideographic Supplement" },
323 ["ethiopic"] = { first = 0x01200, last = 0x0137F, otf="ethi", description = "Ethiopic" },
324 ["ethiopicextended"] = { first = 0x02D80, last = 0x02DDF, otf="ethi", description = "Ethiopic Extended" },
325 ["ethiopicextendeda"] = { first = 0x0AB00, last = 0x0AB2F, description = "Ethiopic Extended-A" },
326 ["ethiopicextendedb"] = { first = 0x1E7E0, last = 0x1E7FF, description = "Ethiopic Extended-B" },
327 ["ethiopicsupplement"] = { first = 0x01380, last = 0x0139F, otf="ethi", description = "Ethiopic Supplement" },
328 ["generalpunctuation"] = { first = 0x02000, last = 0x0206F, description = "General Punctuation" },
329 ["geometricshapes"] = { first = 0x025A0, last = 0x025FF, math = true, description = "Geometric Shapes" },
330 ["geometricshapesextended"] = { first = 0x1F780, last = 0x1F7FF, description = "Geometric Shapes Extended" },
331 ["georgian"] = { first = 0x010A0, last = 0x010FF, otf="geor", description = "Georgian" },
332 ["georgianextended"] = { first = 0x01C90, last = 0x01CBF, description = "Georgian Extended" },
333 ["georgiansupplement"] = { first = 0x02D00, last = 0x02D2F, otf="geor", description = "Georgian Supplement" },
334 ["glagolitic"] = { first = 0x02C00, last = 0x02C5F, otf="glag", description = "Glagolitic" },
335 ["glagoliticsupplement"] = { first = 0x1E000, last = 0x1E02F, description = "Glagolitic Supplement" },
336 ["gothic"] = { first = 0x10330, last = 0x1034F, otf="goth", description = "Gothic" },
337 ["grantha"] = { first = 0x11300, last = 0x1137F, description = "Grantha" },
338 ["greekandcoptic"] = { first = 0x00370, last = 0x003FF, otf="grek", description = "Greek and Coptic" },
339 ["greekextended"] = { first = 0x01F00, last = 0x01FFF, otf="grek", description = "Greek Extended" },
340 ["gujarati"] = { first = 0x00A80, last = 0x00AFF, otf="gujr", description = "Gujarati" },
341 ["gunjalagondi"] = { first = 0x11D60, last = 0x11DAF, description = "Gunjala Gondi" },
342 ["gurmukhi"] = { first = 0x00A00, last = 0x00A7F, otf="guru", description = "Gurmukhi" },
343 ["halfwidthandfullwidthforms"] = { first = 0x0FF00, last = 0x0FFEF, description = "Halfwidth and Fullwidth Forms" },
344 ["hangulcompatibilityjamo"] = { first = 0x03130, last = 0x0318F, otf="jamo", description = "Hangul Compatibility Jamo" },
345 ["hanguljamo"] = { first = 0x01100, last = 0x011FF, otf="jamo", description = "Hangul Jamo" },
346 ["hanguljamoextendeda"] = { first = 0x0A960, last = 0x0A97F, description = "Hangul Jamo Extended-A" },
347 ["hanguljamoextendedb"] = { first = 0x0D7B0, last = 0x0D7FF, description = "Hangul Jamo Extended-B" },
348 ["hangulsyllables"] = { first = 0x0AC00, last = 0x0D7AF, otf="hang", description = "Hangul Syllables" },
349 ["hanifirohingya"] = { first = 0x10D00, last = 0x10D3F, description = "Hanifi Rohingya" },
350 ["hanunoo"] = { first = 0x01720, last = 0x0173F, otf="hano", description = "Hanunoo" },
351 ["hatran"] = { first = 0x108E0, last = 0x108FF, description = "Hatran" },
352 ["hebrew"] = { first = 0x00590, last = 0x005FF, otf="hebr", description = "Hebrew" },
353 ["highprivateusesurrogates"] = { first = 0x0DB80, last = 0x0DBFF, description = "High Private Use Surrogates" },
354 ["highsurrogates"] = { first = 0x0D800, last = 0x0DB7F, description = "High Surrogates" },
355 ["hiragana"] = { first = 0x03040, last = 0x0309F, otf="kana", description = "Hiragana" },
356 ["ideographicdescriptioncharacters"] = { first = 0x02FF0, last = 0x02FFF, description = "Ideographic Description Characters" },
357 ["ideographicsymbolsandpunctuation"] = { first = 0x16FE0, last = 0x16FFF, description = "Ideographic Symbols and Punctuation" },
358 ["imperialaramaic"] = { first = 0x10840, last = 0x1085F, description = "Imperial Aramaic" },
359 ["indicsiyaqnumbers"] = { first = 0x1EC70, last = 0x1ECBF, description = "Indic Siyaq Numbers" },
360 ["inscriptionalpahlavi"] = { first = 0x10B60, last = 0x10B7F, description = "Inscriptional Pahlavi" },
361 ["inscriptionalparthian"] = { first = 0x10B40, last = 0x10B5F, description = "Inscriptional Parthian" },
362 ["ipaextensions"] = { first = 0x00250, last = 0x002AF, description = "IPA Extensions" },
363 ["javanese"] = { first = 0x0A980, last = 0x0A9DF, description = "Javanese" },
364 ["kaithi"] = { first = 0x11080, last = 0x110CF, description = "Kaithi" },
365 ["kaktoviknumerals"] = { first = 0x1D2C0, last = 0x1D2DF, description = "Kaktovik Numerals" },
366 ["kanaextendeda"] = { first = 0x1B100, last = 0x1B12F, description = "Kana Extended-A" },
367 ["kanaextendedb"] = { first = 0x1AFF0, last = 0x1AFFF, description = "Kana Extended-B" },
368 ["kanasupplement"] = { first = 0x1B000, last = 0x1B0FF, description = "Kana Supplement" },
369 ["kanbun"] = { first = 0x03190, last = 0x0319F, description = "Kanbun" },
370 ["kangxiradicals"] = { first = 0x02F00, last = 0x02FDF, description = "Kangxi Radicals" },
371 ["kannada"] = { first = 0x00C80, last = 0x00CFF, otf="knda", description = "Kannada" },
372 ["katakana"] = { first = 0x030A0, last = 0x030FF, otf="kana", description = "Katakana" },
373 ["katakanaphoneticextensions"] = { first = 0x031F0, last = 0x031FF, otf="kana", description = "Katakana Phonetic Extensions" },
374 ["kayahli"] = { first = 0x0A900, last = 0x0A92F, description = "Kayah Li" },
375 ["kawi"] = { first = 0x11F00, last = 0x11F5F, description = "Kawi" },
376 ["kharoshthi"] = { first = 0x10A00, last = 0x10A5F, otf="khar", description = "Kharoshthi" },
377 ["khitansmallscript"] = { first = 0x18B00, last = 0x18CFF, description = "Khitan Small Script" },
378 ["khmer"] = { first = 0x01780, last = 0x017FF, otf="khmr", description = "Khmer" },
379 ["khmersymbols"] = { first = 0x019E0, last = 0x019FF, otf="khmr", description = "Khmer Symbols" },
380 ["khojki"] = { first = 0x11200, last = 0x1124F, description = "Khojki" },
381 ["khudawadi"] = { first = 0x112B0, last = 0x112FF, description = "Khudawadi" },
382 ["lao"] = { first = 0x00E80, last = 0x00EFF, otf="lao", description = "Lao" },
383 ["latinextendeda"] = { first = 0x00100, last = 0x0017F, otf="latn", description = "Latin Extended-A" },
384 ["latinextendedadditional"] = { first = 0x01E00, last = 0x01EFF, otf="latn", description = "Latin Extended Additional" },
385 ["latinextendedb"] = { first = 0x00180, last = 0x0024F, otf="latn", description = "Latin Extended-B" },
386 ["latinextendedc"] = { first = 0x02C60, last = 0x02C7F, otf="latn", description = "Latin Extended-C" },
387 ["latinextendedd"] = { first = 0x0A720, last = 0x0A7FF, otf="latn", description = "Latin Extended-D" },
388 ["latinextendede"] = { first = 0x0AB30, last = 0x0AB6F, description = "Latin Extended-E" },
389 ["latinextendedf"] = { first = 0x10780, last = 0x107BF, description = "Latin Extended-F" },
390 ["latinextendedg"] = { first = 0x1DF00, last = 0x1DFFF, description = "Latin Extended-G" },
391 ["latinsupplement"] = { first = 0x00080, last = 0x000FF, otf="latn", description = "Latin-1 Supplement" },
392 ["lepcha"] = { first = 0x01C00, last = 0x01C4F, description = "Lepcha" },
393 ["letterlikesymbols"] = { first = 0x02100, last = 0x0214F, math = true, description = "Letterlike Symbols" },
394 ["limbu"] = { first = 0x01900, last = 0x0194F, otf="limb", description = "Limbu" },
395 ["lineara"] = { first = 0x10600, last = 0x1077F, description = "Linear A" },
396 ["linearbideograms"] = { first = 0x10080, last = 0x100FF, otf="linb", description = "Linear B Ideograms" },
397 ["linearbsyllabary"] = { first = 0x10000, last = 0x1007F, otf="linb", description = "Linear B Syllabary" },
398 ["lisu"] = { first = 0x0A4D0, last = 0x0A4FF, description = "Lisu" },
399 ["lisusupplement"] = { first = 0x11FB0, last = 0x11FBF, description = "Lisu Supplement" },
400 ["lowercasebold"] = { first = 0x1D41A, last = 0x1D433, math = true },
401 ["lowercaseboldfraktur"] = { first = 0x1D586, last = 0x1D59F, math = true },
402 ["lowercasebolditalic"] = { first = 0x1D482, last = 0x1D49B, math = true, italic = true },
403 ["lowercaseboldscript"] = { first = 0x1D4EA, last = 0x1D503, math = true, italic = true },
404 ["lowercasedoublestruck"] = { first = 0x1D552, last = 0x1D56B, math = true },
405 ["lowercasefraktur"] = { first = 0x1D51E, last = 0x1D537, math = true },
406 ["lowercasegreekbold"] = { first = 0x1D6C2, last = 0x1D6DB, math = true },
407 ["lowercasegreekbolditalic"] = { first = 0x1D736, last = 0x1D74F, math = true, italic = true },
408 ["lowercasegreekitalic"] = { first = 0x1D6FC, last = 0x1D715, math = true, italic = true },
409 ["lowercasegreeknormal"] = { first = 0x003B1, last = 0x003C9, math = true },
410 ["lowercasegreeksansserifbold"] = { first = 0x1D770, last = 0x1D789, math = true },
411 ["lowercasegreeksansserifbolditalic"] = { first = 0x1D7AA, last = 0x1D7C3, math = true, italic = true },
412 ["lowercaseitalic"] = { first = 0x1D44E, last = 0x1D467, math = true, italic = true },
413 ["lowercasemonospace"] = { first = 0x1D68A, last = 0x1D6A3, math = true },
414 ["lowercasenormal"] = { first = 0x00061, last = 0x0007A, math = true },
415 ["lowercasesansserifbold"] = { first = 0x1D5EE, last = 0x1D607, math = true },
416 ["lowercasesansserifbolditalic"] = { first = 0x1D656, last = 0x1D66F, math = true, italic = true },
417 ["lowercasesansserifitalic"] = { first = 0x1D622, last = 0x1D63B, math = true, italic = true },
418 ["lowercasesansserifnormal"] = { first = 0x1D5BA, last = 0x1D5D3, math = true },
419 ["lowercasescript"] = { first = 0x1D4B6, last = 0x1D4CF, math = true, italic = true },
420 ["lowsurrogates"] = { first = 0x0DC00, last = 0x0DFFF, description = "Low Surrogates" },
421 ["lycian"] = { first = 0x10280, last = 0x1029F, description = "Lycian" },
422 ["lydian"] = { first = 0x10920, last = 0x1093F, description = "Lydian" },
423 ["mahajani"] = { first = 0x11150, last = 0x1117F, description = "Mahajani" },
424 ["mahjongtiles"] = { first = 0x1F000, last = 0x1F02F, description = "Mahjong Tiles" },
425 ["makasar"] = { first = 0x11EE0, last = 0x11EFF, description = "Makasar" },
426 ["malayalam"] = { first = 0x00D00, last = 0x00D7F, otf="mlym", description = "Malayalam" },
427 ["mandaic"] = { first = 0x00840, last = 0x0085F, otf="mand", description = "Mandaic" },
428 ["manichaean"] = { first = 0x10AC0, last = 0x10AFF, description = "Manichaean" },
429 ["marchen"] = { first = 0x11C70, last = 0x11CBF, description = "Marchen" },
430 ["masaramgondi"] = { first = 0x11D00, last = 0x11D5F, description = "Masaram Gondi" },
431 ["mathematicalalphanumericsymbols"] = { first = 0x1D400, last = 0x1D7FF, math = true, description = "Mathematical Alphanumeric Symbols" },
432 ["mathematicaloperators"] = { first = 0x02200, last = 0x022FF, math = true, description = "Mathematical Operators" },
433 ["mayannumerals"] = { first = 0x1D2E0, last = 0x1D2FF, description = "Mayan Numerals" },
434 ["medefaidrin"] = { first = 0x16E40, last = 0x16E9F, description = "Medefaidrin" },
435 ["meeteimayek"] = { first = 0x0ABC0, last = 0x0ABFF, description = "Meetei Mayek" },
436 ["meeteimayekextensions"] = { first = 0x0AAE0, last = 0x0AAFF, description = "Meetei Mayek Extensions" },
437 ["mendekikakui"] = { first = 0x1E800, last = 0x1E8DF, description = "Mende Kikakui" },
438 ["meroiticcursive"] = { first = 0x109A0, last = 0x109FF, description = "Meroitic Cursive" },
439 ["meroitichieroglyphs"] = { first = 0x10980, last = 0x1099F, description = "Meroitic Hieroglyphs" },
440 ["miao"] = { first = 0x16F00, last = 0x16F9F, description = "Miao" },
441 ["miscellaneousmathematicalsymbolsa"] = { first = 0x027C0, last = 0x027EF, math = true, description = "Miscellaneous Mathematical Symbols-A" },
442 ["miscellaneousmathematicalsymbolsb"] = { first = 0x02980, last = 0x029FF, math = true, description = "Miscellaneous Mathematical Symbols-B" },
443 ["miscellaneoussymbols"] = { first = 0x02600, last = 0x026FF, math = true, description = "Miscellaneous Symbols" },
444 ["miscellaneoussymbolsandarrows"] = { first = 0x02B00, last = 0x02BFF, math = true, description = "Miscellaneous Symbols and Arrows" },
445 ["miscellaneoussymbolsandpictographs"] = { first = 0x1F300, last = 0x1F5FF, description = "Miscellaneous Symbols and Pictographs" },
446 ["miscellaneoustechnical"] = { first = 0x02300, last = 0x023FF, math = true, description = "Miscellaneous Technical" },
447 ["modi"] = { first = 0x11600, last = 0x1165F, description = "Modi" },
448 ["modifiertoneletters"] = { first = 0x0A700, last = 0x0A71F, description = "Modifier Tone Letters" },
449 ["mongolian"] = { first = 0x01800, last = 0x018AF, otf="mong", description = "Mongolian" },
450 ["mongoliansupplement"] = { first = 0x11660, last = 0x1167F, description = "Mongolian Supplement" },
451 ["mro"] = { first = 0x16A40, last = 0x16A6F, description = "Mro" },
452 ["multani"] = { first = 0x11280, last = 0x112AF, description = "Multani" },
453 ["musicalsymbols"] = { first = 0x1D100, last = 0x1D1FF, otf="musc", description = "Musical Symbols" },
454 ["myanmar"] = { first = 0x01000, last = 0x0109F, otf="mymr", description = "Myanmar" },
455 ["myanmarextendeda"] = { first = 0x0AA60, last = 0x0AA7F, description = "Myanmar Extended-A" },
456 ["myanmarextendedb"] = { first = 0x0A9E0, last = 0x0A9FF, description = "Myanmar Extended-B" },
457 ["nabataean"] = { first = 0x10880, last = 0x108AF, description = "Nabataean" },
458 ["nagmundari"] = { first = 0x1E4D0, last = 0x1E4FF, description = "Nag Mundari" },
459 ["nandinagari"] = { first = 0x119A0, last = 0x119FF, description = "Nandinagari" },
460 ["newa"] = { first = 0x11400, last = 0x1147F, description = "Newa" },
461 ["newtailue"] = { first = 0x01980, last = 0x019DF, description = "New Tai Lue" },
462 ["nko"] = { first = 0x007C0, last = 0x007FF, otf="nko", description = "NKo" },
463 ["numberforms"] = { first = 0x02150, last = 0x0218F, description = "Number Forms" },
464 ["nushu"] = { first = 0x1B170, last = 0x1B2FF, description = "Nushu" },
465 ["nyiakengpuachuehmong"] = { first = 0x1E100, last = 0x1E14F, description = "Nyiakeng Puachue Hmong" },
466 ["ogham"] = { first = 0x01680, last = 0x0169F, otf="ogam", description = "Ogham" },
467 ["olchiki"] = { first = 0x01C50, last = 0x01C7F, description = "Ol Chiki" },
468 ["oldhungarian"] = { first = 0x10C80, last = 0x10CFF, description = "Old Hungarian" },
469 ["olditalic"] = { first = 0x10300, last = 0x1032F, otf="ital", description = "Old Italic" },
470 ["oldnortharabian"] = { first = 0x10A80, last = 0x10A9F, description = "Old North Arabian" },
471 ["oldpermic"] = { first = 0x10350, last = 0x1037F, description = "Old Permic" },
472 ["oldpersian"] = { first = 0x103A0, last = 0x103DF, otf="xpeo", description = "Old Persian" },
473 ["oldsogdian"] = { first = 0x10F00, last = 0x10F2F, description = "Old Sogdian" },
474 ["oldsoutharabian"] = { first = 0x10A60, last = 0x10A7F, description = "Old South Arabian" },
475 ["oldturkic"] = { first = 0x10C00, last = 0x10C4F, description = "Old Turkic" },
476 ["olduyghur"] = { first = 0x10F70, last = 0x10FAF, description = "Old Uyghur" },
477 ["opticalcharacterrecognition"] = { first = 0x02440, last = 0x0245F, description = "Optical Character Recognition" },
478 ["oriya"] = { first = 0x00B00, last = 0x00B7F, otf="orya", description = "Oriya" },
479 ["ornamentaldingbats"] = { first = 0x1F650, last = 0x1F67F, description = "Ornamental Dingbats" },
480 ["osage"] = { first = 0x104B0, last = 0x104FF, description = "Osage" },
481 ["osmanya"] = { first = 0x10480, last = 0x104AF, otf="osma", description = "Osmanya" },
482 ["ottomansiyaqnumbers"] = { first = 0x1ED00, last = 0x1ED4F, description = "Ottoman Siyaq Numbers" },
483 ["pahawhhmong"] = { first = 0x16B00, last = 0x16B8F, description = "Pahawh Hmong" },
484 ["palmyrene"] = { first = 0x10860, last = 0x1087F, description = "Palmyrene" },
485 ["paucinhau"] = { first = 0x11AC0, last = 0x11AFF, description = "Pau Cin Hau" },
486 ["phagspa"] = { first = 0x0A840, last = 0x0A87F, otf="phag", description = "Phags-pa" },
487 ["phaistosdisc"] = { first = 0x101D0, last = 0x101FF, description = "Phaistos Disc" },
488 ["phoenician"] = { first = 0x10900, last = 0x1091F, otf="phnx", description = "Phoenician" },
489 ["phoneticextensions"] = { first = 0x01D00, last = 0x01D7F, description = "Phonetic Extensions" },
490 ["phoneticextensionssupplement"] = { first = 0x01D80, last = 0x01DBF, description = "Phonetic Extensions Supplement" },
491 ["playingcards"] = { first = 0x1F0A0, last = 0x1F0FF, description = "Playing Cards" },
492 ["privateusearea"] = { first = 0x0E000, last = 0x0F8FF, description = "Private Use Area" },
493 ["psalterpahlavi"] = { first = 0x10B80, last = 0x10BAF, description = "Psalter Pahlavi" },
494 ["rejang"] = { first = 0x0A930, last = 0x0A95F, description = "Rejang" },
495 ["ruminumeralsymbols"] = { first = 0x10E60, last = 0x10E7F, description = "Rumi Numeral Symbols" },
496 ["runic"] = { first = 0x016A0, last = 0x016FF, otf="runr", description = "Runic" },
497 ["samaritan"] = { first = 0x00800, last = 0x0083F, description = "Samaritan" },
498 ["saurashtra"] = { first = 0x0A880, last = 0x0A8DF, description = "Saurashtra" },
499 ["sharada"] = { first = 0x11180, last = 0x111DF, description = "Sharada" },
500 ["shavian"] = { first = 0x10450, last = 0x1047F, otf="shaw", description = "Shavian" },
501 ["shorthandformatcontrols"] = { first = 0x1BCA0, last = 0x1BCAF, description = "Shorthand Format Controls" },
502 ["siddham"] = { first = 0x11580, last = 0x115FF, description = "Siddham" },
503 ["sinhala"] = { first = 0x00D80, last = 0x00DFF, otf="sinh", description = "Sinhala" },
504 ["sinhalaarchaicnumbers"] = { first = 0x111E0, last = 0x111FF, description = "Sinhala Archaic Numbers" },
505 ["smallformvariants"] = { first = 0x0FE50, last = 0x0FE6F, description = "Small Form Variants" },
506 ["smallkanaextension"] = { first = 0x1B130, last = 0x1B16F, description = "Small Kana Extension" },
507 ["sogdian"] = { first = 0x10F30, last = 0x10F6F, description = "Sogdian" },
508 ["sorasompeng"] = { first = 0x110D0, last = 0x110FF, description = "Sora Sompeng" },
509 ["soyombo"] = { first = 0x11A50, last = 0x11AAF, description = "Soyombo" },
510 ["spacingmodifierletters"] = { first = 0x002B0, last = 0x002FF, description = "Spacing Modifier Letters" },
511 ["specials"] = { first = 0x0FFF0, last = 0x0FFFF, description = "Specials" },
512 ["sundanese"] = { first = 0x01B80, last = 0x01BBF, description = "Sundanese" },
513 ["sundanesesupplement"] = { first = 0x01CC0, last = 0x01CCF, description = "Sundanese Supplement" },
514 ["superscriptsandsubscripts"] = { first = 0x02070, last = 0x0209F, description = "Superscripts and Subscripts" },
515 ["supplementalarrowsa"] = { first = 0x027F0, last = 0x027FF, math = true, description = "Supplemental Arrows-A" },
516 ["supplementalarrowsb"] = { first = 0x02900, last = 0x0297F, math = true, description = "Supplemental Arrows-B" },
517 ["supplementalarrowsc"] = { first = 0x1F800, last = 0x1F8FF, math = true, description = "Supplemental Arrows-C" },
518 ["supplementalmathematicaloperators"] = { first = 0x02A00, last = 0x02AFF, math = true, description = "Supplemental Mathematical Operators" },
519 ["supplementalpunctuation"] = { first = 0x02E00, last = 0x02E7F, description = "Supplemental Punctuation" },
520 ["supplementalsymbolsandpictographs"] = { first = 0x1F900, last = 0x1F9FF, description = "Supplemental Symbols and Pictographs" },
521 ["supplementaryprivateuseareaa"] = { first = 0xF0000, last = 0xFFFFF, description = "Supplementary Private Use Area-A" },
522 ["supplementaryprivateuseareab"] = { first = 0x100000,last = 0x10FFFF, description = "Supplementary Private Use Area-B" },
523 ["suttonsignwriting"] = { first = 0x1D800, last = 0x1DAAF, description = "Sutton SignWriting" },
524 ["sylotinagri"] = { first = 0x0A800, last = 0x0A82F, otf="sylo", description = "Syloti Nagri" },
525 ["symbolsandpictographsextendeda"] = { first = 0x1FA70, last = 0x1FAFF, description = "Symbols and Pictographs Extended-A" },
526 ["symbolsforlegacycomputing"] = { first = 0x1FB00, last = 0x1FBFF, description = "Symbols for Legacy Computing" },
527 ["syriac"] = { first = 0x00700, last = 0x0074F, otf="syrc", description = "Syriac" },
528 ["syriacsupplement"] = { first = 0x00860, last = 0x0086F, description = "Syriac Supplement" },
529 ["tagalog"] = { first = 0x01700, last = 0x0171F, otf="tglg", description = "Tagalog" },
530 ["tagbanwa"] = { first = 0x01760, last = 0x0177F, otf="tagb", description = "Tagbanwa" },
531 ["tags"] = { first = 0xE0000, last = 0xE007F, description = "Tags" },
532 ["taile"] = { first = 0x01950, last = 0x0197F, otf="tale", description = "Tai Le" },
533 ["taitham"] = { first = 0x01A20, last = 0x01AAF, description = "Tai Tham" },
534 ["taiviet"] = { first = 0x0AA80, last = 0x0AADF, description = "Tai Viet" },
535 ["taixuanjingsymbols"] = { first = 0x1D300, last = 0x1D35F, description = "Tai Xuan Jing Symbols" },
536 ["takri"] = { first = 0x11680, last = 0x116CF, description = "Takri" },
537 ["tamil"] = { first = 0x00B80, last = 0x00BFF, otf="taml", description = "Tamil" },
538 ["tamilsupplement"] = { first = 0x11FC0, last = 0x11FFF, description = "Tamil Supplement" },
539 ["tangut"] = { first = 0x17000, last = 0x187FF, description = "Tangut" },
540 ["tangutsupplement"] = { first = 0x18D00, last = 0x18D7F, description = "Tangut Supplement" },
541 ["tangutcomponents"] = { first = 0x18800, last = 0x18AFF, description = "Tangut Components" },
542 ["tangsa"] = { first = 0x16A70, last = 0x16ACF, description = "Tangsa" },
543 ["telugu"] = { first = 0x00C00, last = 0x00C7F, otf="telu", description = "Telugu" },
544 ["thaana"] = { first = 0x00780, last = 0x007BF, otf="thaa", description = "Thaana" },
545 ["thai"] = { first = 0x00E00, last = 0x00E7F, otf="thai", description = "Thai" },
546 ["tibetan"] = { first = 0x00F00, last = 0x00FFF, otf="tibt", description = "Tibetan" },
547 ["tifinagh"] = { first = 0x02D30, last = 0x02D7F, otf="tfng", description = "Tifinagh" },
548 ["tirhuta"] = { first = 0x11480, last = 0x114DF, description = "Tirhuta" },
549 ["toto"] = { first = 0x1E290, last = 0x1E2BF, description = "Toto" },
550 ["transportandmapsymbols"] = { first = 0x1F680, last = 0x1F6FF, description = "Transport and Map Symbols" },
551 ["ugaritic"] = { first = 0x10380, last = 0x1039F, otf="ugar", description = "Ugaritic" },
552 ["unifiedcanadianaboriginalsyllabics"] = { first = 0x01400, last = 0x0167F, otf="cans", description = "Unified Canadian Aboriginal Syllabics" },
553 ["unifiedcanadianaboriginalsyllabicsextended"] = { first = 0x018B0, last = 0x018FF, description = "Unified Canadian Aboriginal Syllabics Extended" },
554 ["unifiedcanadianaboriginalsyllabicsextendeda"] = { first = 0x11AB0, last = 0x11ABF, description = "Unified Canadian Aboriginal Syllabics Extended-A" },
555 ["uppercasebold"] = { first = 0x1D400, last = 0x1D419, math = true },
556 ["uppercaseboldfraktur"] = { first = 0x1D56C, last = 0x1D585, math = true },
557 ["uppercasebolditalic"] = { first = 0x1D468, last = 0x1D481, math = true, italic = true },
558 ["uppercaseboldscript"] = { first = 0x1D4D0, last = 0x1D4E9, math = true, italic = true },
559 ["uppercasedoublestruck"] = { first = 0x1D538, last = 0x1D551, math = true },
560 ["uppercasefraktur"] = { first = 0x1D504, last = 0x1D51D, math = true },
561 ["uppercasegreekbold"] = { first = 0x1D6A8, last = 0x1D6C1, math = true },
562 ["uppercasegreekbolditalic"] = { first = 0x1D71C, last = 0x1D735, math = true, italic = true },
563 ["uppercasegreekitalic"] = { first = 0x1D6E2, last = 0x1D6FB, math = true, italic = true },
564 ["uppercasegreeknormal"] = { first = 0x00391, last = 0x003AA, math = true },
565 ["uppercasegreeksansserifbold"] = { first = 0x1D756, last = 0x1D76F, math = true },
566 ["uppercasegreeksansserifbolditalic"] = { first = 0x1D790, last = 0x1D7A9, math = true, italic = true },
567 ["uppercaseitalic"] = { first = 0x1D434, last = 0x1D44D, math = true, italic = true },
568 ["uppercasemonospace"] = { first = 0x1D670, last = 0x1D689, math = true },
569 ["uppercasenormal"] = { first = 0x00041, last = 0x0005A, math = true },
570 ["uppercasesansserifbold"] = { first = 0x1D5D4, last = 0x1D5ED, math = true },
571 ["uppercasesansserifbolditalic"] = { first = 0x1D63C, last = 0x1D655, math = true, italic = true },
572 ["uppercasesansserifitalic"] = { first = 0x1D608, last = 0x1D621, math = true, italic = true },
573 ["uppercasesansserifnormal"] = { first = 0x1D5A0, last = 0x1D5B9, math = true },
574 ["uppercasescript"] = { first = 0x1D49C, last = 0x1D4B5, math = true, italic = true },
575 ["vai"] = { first = 0x0A500, last = 0x0A63F, description = "Vai" },
576 ["variationselectors"] = { first = 0x0FE00, last = 0x0FE0F, description = "Variation Selectors" },
577 ["variationselectorssupplement"] = { first = 0xE0100, last = 0xE01EF, description = "Variation Selectors Supplement" },
578 ["vedicextensions"] = { first = 0x01CD0, last = 0x01CFF, description = "Vedic Extensions" },
579 ["verticalforms"] = { first = 0x0FE10, last = 0x0FE1F, description = "Vertical Forms" },
580 ["vithkuqi"] = { first = 0x10570, last = 0x105BF, description = "Vithkuqi" },
581 ["wancho"] = { first = 0x1E2C0, last = 0x1E2FF, description = "Wancho" },
582 ["warangciti"] = { first = 0x118A0, last = 0x118FF, description = "Warang Citi" },
583 ["yezidi"] = { first = 0x10E80, last = 0x10EBF, description = "Yezidi" },
584 ["yijinghexagramsymbols"] = { first = 0x04DC0, last = 0x04DFF, otf="yi", description = "Yijing Hexagram Symbols" },
585 ["yiradicals"] = { first = 0x0A490, last = 0x0A4CF, otf="yi", description = "Yi Radicals" },
586 ["yisyllables"] = { first = 0x0A000, last = 0x0A48F, otf="yi", description = "Yi Syllables" },
587 ["zanabazarsquare"] = { first = 0x11A00, last = 0x11A4F, description = "Zanabazar Square" },
588 ["znamennymusicalnotation"] = { first = 0x1CF00, last = 0x1CFCF, description = "Znamenny Musical Notation" },
589
590
591
592
593
594 ["lowercasecalligraphic"] = { first = 0x100000, last = 0x100019, math = true },
595 ["uppercasecalligraphic"] = { first = 0x100020, last = 0x100039, math = true },
596 ["lowercaseboldcalligraphic"] = { first = 0x100040, last = 0x100059, math = true },
597 ["uppercaseboldcalligraphic"] = { first = 0x100060, last = 0x100079, math = true },
598
599
600
601
602 ["lowercasesansgreek"] = { first = 0x100080, last = 0x100099, math = true },
603 ["uppercasesansgreek"] = { first = 0x1000A0, last = 0x1000B9, math = true },
604 ["lowercaseitalicsansgreek"] = { first = 0x1000C0, last = 0x1000D9, math = true },
605 ["uppercaseitalicsansgreek"] = { first = 0x1000E0, last = 0x1000F9, math = true },
606
607
608
609
610
611 ["lowercaseblackboarditalic"] = { first = 0x100100, last = 0x100119, math = true },
612 ["uppercaseblackboarditalic"] = { first = 0x100120, last = 0x100139, math = true },
613
614
615
616
617
618
619
620
621
622
623}
624
625
626
627
628
629
630
631
632blocks.lowercaseitalic.gaps = {
633 [0x1D455] = 0x0210E,
634}
635
636blocks.uppercasescript.gaps = {
637 [0x1D49D] = 0x0212C,
638 [0x1D4A0] = 0x02130,
639 [0x1D4A1] = 0x02131,
640 [0x1D4A3] = 0x0210B,
641 [0x1D4A4] = 0x02110,
642 [0x1D4A7] = 0x02112,
643 [0x1D4A8] = 0x02133,
644 [0x1D4AD] = 0x0211B,
645}
646
647blocks.lowercasescript.gaps = {
648 [0x1D4BA] = 0x0212F,
649 [0x1D4BC] = 0x0210A,
650 [0x1D4C4] = 0x02134,
651}
652
653blocks.uppercasefraktur.gaps = {
654 [0x1D506] = 0x0212D,
655 [0x1D50B] = 0x0210C,
656 [0x1D50C] = 0x02111,
657 [0x1D515] = 0x0211C,
658 [0x1D51D] = 0x02128,
659}
660
661blocks.uppercasedoublestruck.gaps = {
662 [0x1D53A] = 0x02102,
663 [0x1D53F] = 0x0210D,
664 [0x1D545] = 0x02115,
665 [0x1D547] = 0x02119,
666 [0x1D548] = 0x0211A,
667 [0x1D549] = 0x0211D,
668 [0x1D551] = 0x02124,
669}
670
671characters.blocks = blocks
672
673function characters.blockrange(name)
674 local b = blocks[name]
675 if b then
676 return b.first, b.last
677 else
678 return 0, 0
679 end
680end
681
682setmetatableindex(blocks, function(t,k)
683 return k and rawget(t,lower(gsub(k,"[^a-zA-Z]","")))
684end)
685
686local otfscripts = utilities.storage.allocate()
687characters.otfscripts = otfscripts
688
689setmetatableindex(otfscripts,function(t,unicode)
690 for k, v in next, blocks do
691 local first = v.first
692 local last = v.last
693 if unicode >= first and unicode <= last then
694 local script = v.otf or "dflt"
695 for u=first,last do
696 t[u] = script
697 end
698 return script
699 end
700 end
701
702 t[unicode] = "dflt"
703 return "dflt"
704end)
705
706local splitter1 = lpeg.splitat(S(":-"))
707local splitter2 = lpeg.splitat(S(" +-"),true)
708
709function characters.getrange(name,expression)
710 local range = rawget(blocks,lower(gsub(name,"[^a-zA-Z0-9]","")))
711 if range then
712 return range.first, range.last, range.description, range.gaps
713 end
714 name = gsub(name,'"',"0x")
715 local start, stop
716 if expression then
717 local n = tonumber(name)
718 if n then
719 return n, n, nil
720 else
721 local first, rest = lpegmatch(splitter2,name)
722 local range = rawget(blocks,lower(gsub(first,"[^a-zA-Z0-9]","")))
723 if range then
724 local s = loadstring("return 0 " .. rest)
725 if type(s) == "function" then
726 local d = s()
727 if type(d) == "number" then
728 return range.first + d, range.last + d, nil
729 end
730 end
731 end
732 end
733 end
734 local start, stop = lpegmatch(splitter1,name)
735 if start and stop then
736 start = tonumber(start,16) or tonumber(start)
737 stop = tonumber(stop, 16) or tonumber(stop)
738 if start and stop then
739 return start, stop, nil
740 end
741 end
742 local slot = tonumber(name,16) or tonumber(name)
743 return slot, slot, nil
744end
745
746
747
748
749local categorytags = allocate {
750 lu = "Letter Uppercase",
751 ll = "Letter Lowercase",
752 lt = "Letter Titlecase",
753 lm = "Letter Modifier",
754 lo = "Letter Other",
755 mn = "Mark Nonspacing",
756 mc = "Mark Spacing Combining",
757 me = "Mark Enclosing",
758 nd = "Number Decimal Digit",
759 nl = "Number Letter",
760 no = "Number Other",
761 pc = "Punctuation Connector",
762 pd = "Punctuation Dash",
763 ps = "Punctuation Open",
764 pe = "Punctuation Close",
765 pi = "Punctuation Initial Quote",
766 pf = "Punctuation Final Quote",
767 po = "Punctuation Other",
768 sm = "Symbol Math",
769 sc = "Symbol Currency",
770 sk = "Symbol Modifier",
771 so = "Symbol Other",
772 zs = "Separator Space",
773 zl = "Separator Line",
774 zp = "Separator Paragraph",
775 cc = "Other Control",
776 cf = "Other Format",
777 cs = "Other Surrogate",
778 co = "Other Private Use",
779 cn = "Other Not Assigned",
780}
781
782local detailtags = allocate {
783 sl = "small letter",
784 bl = "big letter",
785 im = "iteration mark",
786 pm = "prolonged sound mark"
787}
788
789characters.categorytags = categorytags
790characters.detailtags = detailtags
791
792
793
794
795
796
797local is_character = allocate ( tohash {
798 "lu","ll","lt","lm","lo",
799 "nd","nl","no",
800 "mn",
801 "nl","no",
802 "pc","pd","ps","pe","pi","pf","po",
803 "sm","sc","sk","so"
804} )
805
806local is_letter = allocate ( tohash {
807 "ll","lm","lo","lt","lu"
808} )
809
810local is_command = allocate ( tohash {
811 "cf","zs"
812} )
813
814local is_spacing = allocate ( tohash {
815 "zs", "zl","zp",
816} )
817
818local is_mark = allocate ( tohash {
819 "mn", "ms",
820} )
821
822local is_punctuation = allocate ( tohash {
823 "pc", "pd", "ps", "pe", "pi", "pf", "po",
824} )
825
826local is_hyphenator = allocate ( tohash {
827 "pd",
828} )
829
830local is_symbol = allocate ( tohash {
831 "sm", "sc", "sk", "so",
832} )
833
834local can_have_space = allocate ( tohash {
835 "lu", "ll", "lt", "lm", "lo",
836
837 "nd", "nl", "no",
838 "ps", "pi",
839
840
841 "sm", "sc", "sk", "so",
842
843
844} )
845
846
847
848
849characters.is_character = is_character
850characters.is_letter = is_letter
851characters.is_command = is_command
852characters.is_spacing = is_spacing
853characters.is_mark = is_mark
854characters.is_punctuation = is_punctuation
855characters.is_hyphenator = is_hyphenator
856characters.is_symbol = is_symbol
857characters.can_have_space = can_have_space
858
859local mti = function(t,k)
860 if type(k) == "number" then
861 local c = data[k].category
862 return c and rawget(t,c)
863 else
864
865 end
866end
867
868setmetatableindex(characters.is_character, mti)
869setmetatableindex(characters.is_letter, mti)
870setmetatableindex(characters.is_command, mti)
871setmetatableindex(characters.is_spacing, mti)
872setmetatableindex(characters.is_punctuation, mti)
873setmetatableindex(characters.is_hyphenator, mti)
874setmetatableindex(characters.is_symbol, mti)
875setmetatableindex(characters.can_have_space, mti)
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893characters.linebreaks = allocate {
894
895
896
897 ["bk"] = "mandatory break",
898 ["cr"] = "carriage return",
899 ["lf"] = "line feed",
900 ["cm"] = "combining mark",
901 ["nl"] = "next line",
902 ["sg"] = "surrogate",
903 ["wj"] = "word joiner",
904 ["zw"] = "zero width space",
905 ["gl"] = "non-breaking (glue)",
906 ["sp"] = "space",
907 ["zwj"] = "zero width joiner",
908
909
910
911 ["b2"] = "break opportunity before and after",
912 ["ba"] = "break after",
913 ["bb"] = "break before",
914 ["hy"] = "hyphen",
915 ["cb"] = "contingent break opportunity",
916
917
918
919 ["cl"] = "close punctuation",
920 ["cp"] = "close parenthesis",
921 ["ex"] = "exclamation/interrogation",
922 ["in"] = "inseparable",
923 ["ns"] = "nonstarter",
924 ["op"] = "open punctuation",
925 ["qu"] = "quotation",
926
927
928
929 ["is"] = "infix numeric separator",
930 ["nu"] = "numeric",
931 ["po"] = "postfix numeric",
932 ["pr"] = "prefix numeric",
933 ["sy"] = "symbols allowing break after",
934
935
936
937 ["ai"] = "ambiguous (alphabetic or ideographic)",
938 ["ak"] = "aksara",
939 ["al"] = "alphabetic",
940 ["ap"] = "aksara pre-pase",
941 ["as"] = "ksara start",
942 ["cj"] = "conditional japanese starter",
943 ["eb"] = "emoji base",
944 ["em"] = "emoji modifier",
945 ["h2"] = "hangul lv syllable",
946 ["h3"] = "hangul lvt syllable",
947 ["hl"] = "hebrew letter",
948 ["id"] = "ideographic",
949 ["jl"] = "hangul l jamo",
950 ["jt"] = "hangul t jamo",
951 ["jv"] = "hangul v jamo",
952 ["ri"] = "regional indicator",
953 ["sa"] = "complex context dependent (south east asian)",
954 ["vf"] = "virama final",
955 ["vi"] = "virama",
956 ["xx"] = "unknown",
957
958}
959
960
961
962
963
964characters.bidi = allocate {
965 l = "Left-to-Right",
966 lre = "Left-to-Right Embedding",
967 lro = "Left-to-Right Override",
968 r = "Right-to-Left",
969 al = "Right-to-Left Arabic",
970 rle = "Right-to-Left Embedding",
971 rlo = "Right-to-Left Override",
972 pdf = "Pop Directional Format",
973 en = "European Number",
974 es = "European Number Separator",
975 et = "European Number Terminator",
976 an = "Arabic Number",
977 cs = "Common Number Separator",
978 nsm = "Non-Spacing Mark",
979 bn = "Boundary Neutral",
980 b = "Paragraph Separator",
981 s = "Segment Separator",
982 ws = "Whitespace",
983 on = "Other Neutrals",
984}
985
986
987
988
989if not characters.fallbacks then
990
991 characters.fallbacks = allocate {
992 [0x0308] = 0x00A8, [0x00A8] = 0x0308,
993 [0x0304] = 0x00AF, [0x00AF] = 0x0304,
994 [0x0301] = 0x00B4, [0x00B4] = 0x0301,
995 [0x0327] = 0x00B8, [0x00B8] = 0x0327,
996 [0x0302] = 0x02C6, [0x02C6] = 0x0302,
997 [0x030C] = 0x02C7, [0x02C7] = 0x030C,
998 [0x0306] = 0x02D8, [0x02D8] = 0x0306,
999 [0x0307] = 0x02D9, [0x02D9] = 0x0307,
1000 [0x030A] = 0x02DA, [0x02DA] = 0x030A,
1001 [0x0328] = 0x02DB, [0x02DB] = 0x0328,
1002 [0x0303] = 0x02DC, [0x02DC] = 0x0303,
1003 [0x030B] = 0x02DD, [0x02DD] = 0x030B,
1004 [0x0305] = 0x203E, [0x203E] = 0x0305,
1005 [0x0300] = 0x0060, [0x0060] = 0x0333,
1006 }
1007
1008
1009
1010
1011
1012
1013end
1014
1015if storage then
1016 storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks")
1017end
1018
1019characters.directions = { }
1020
1021setmetatableindex(characters.directions,function(t,k)
1022 local d = data[k]
1023 if d then
1024 local v = d.direction
1025 if v then
1026 t[k] = v
1027 return v
1028 end
1029 end
1030 t[k] = false
1031 return false
1032end)
1033
1034characters.mirrors = { }
1035
1036setmetatableindex(characters.mirrors,function(t,k)
1037 local d = data[k]
1038 if d then
1039 local v = d.mirror
1040 if v then
1041 t[k] = v
1042 return v
1043 end
1044 end
1045 t[k] = false
1046 return false
1047end)
1048
1049characters.textclasses = { }
1050
1051setmetatableindex(characters.textclasses,function(t,k)
1052 local d = data[k]
1053 if d then
1054 local v = d.textclass
1055 if v then
1056 t[k] = v
1057 return v
1058 end
1059 end
1060 t[k] = false
1061 return false
1062end)
1063
1064
1065
1066
1067
1068
1069function characters.contextname(n) return data[n] and data[n].contextname or "" end
1070function characters.adobename (n) return data[n] and data[n].adobename or "" end
1071function characters.description(n) return data[n] and data[n].description or "" end
1072
1073
1074function characters.category(n,verbose)
1075 local c = data[n].category
1076 if not c then
1077 return ""
1078 elseif verbose then
1079 return categorytags[c]
1080 else
1081 return c
1082 end
1083end
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095local function toutfstring(s)
1096 if type(s) == "table" then
1097 return utfchar(unpack(s))
1098 else
1099 return utfchar(s)
1100 end
1101end
1102
1103utf.tostring = toutfstring
1104
1105local categories = allocate() characters.categories = categories
1106
1107setmetatableindex(categories, function(t,u) if u then local c = data[u] c = c and c.category or u t[u] = c return c end end)
1108
1109
1110
1111
1112local lccodes = allocate() characters.lccodes = lccodes
1113local uccodes = allocate() characters.uccodes = uccodes
1114local shcodes = allocate() characters.shcodes = shcodes
1115local fscodes = allocate() characters.fscodes = fscodes
1116
1117setmetatableindex(lccodes, function(t,u) if u then local c = data[u] c = c and c.lccode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1118setmetatableindex(uccodes, function(t,u) if u then local c = data[u] c = c and c.uccode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1119setmetatableindex(shcodes, function(t,u) if u then local c = data[u] c = c and c.shcode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1120setmetatableindex(fscodes, function(t,u) if u then local c = data[u] c = c and c.fscode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1121
1122local lcchars = allocate() characters.lcchars = lcchars
1123local ucchars = allocate() characters.ucchars = ucchars
1124local shchars = allocate() characters.shchars = shchars
1125local fschars = allocate() characters.fschars = fschars
1126
1127setmetatableindex(lcchars, function(t,u) if u then local c = data[u] c = c and c.lccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1128setmetatableindex(ucchars, function(t,u) if u then local c = data[u] c = c and c.uccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1129setmetatableindex(shchars, function(t,u) if u then local c = data[u] c = c and c.shcode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1130setmetatableindex(fschars, function(t,u) if u then local c = data[u] c = c and c.fscode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1131
1132local decomposed = allocate() characters.decomposed = decomposed
1133local specials = allocate() characters.specials = specials
1134
1135setmetatableindex(decomposed, function(t,u)
1136 if u then
1137 local c = data[u]
1138 local s = c and c.decomposed or false
1139 t[u] = s
1140 return s
1141 end
1142end)
1143
1144setmetatableindex(specials, function(t,u)
1145 if u then
1146 local c = data[u]
1147 local s = c and c.specials or false
1148 t[u] = s
1149 return s
1150 end
1151end)
1152
1153local specialchars = allocate() characters.specialchars = specialchars
1154local descriptions = allocate() characters.descriptions = descriptions
1155local synonyms = allocate() characters.synonyms = synonyms
1156
1157setmetatableindex(specialchars, function(t,u)
1158 if u then
1159 local c = data[u]
1160 local s = c and c.specials
1161 if s then
1162 local tt = { }
1163 local ttn = 0
1164 for i=2,#s do
1165 local si = s[i]
1166 local c = data[si]
1167 if is_letter[c.category] then
1168 ttn = ttn + 1
1169 tt[ttn] = utfchar(si)
1170 end
1171 end
1172 c = concat(tt)
1173 t[u] = c
1174 return c
1175 else
1176 if type(u) == "number" then
1177 u = utfchar(u)
1178 end
1179 t[u] = u
1180 return u
1181 end
1182 end
1183end)
1184
1185setmetatableindex(descriptions, function(t,k)
1186
1187 for u, c in next, data do
1188 local d = c.description
1189 if d then
1190 if find(d," ",1,true) then
1191 d = gsub(d," ","")
1192 end
1193 d = lower(d)
1194 t[d] = u
1195 end
1196 end
1197 local d = rawget(t,k)
1198 if not d then
1199 t[k] = k
1200 end
1201 return d
1202end)
1203
1204setmetatableindex(synonyms, function(t,k)
1205 for u, c in next, data do
1206 local s = c.synonyms
1207 if s then
1208 if find(s," ",1,true) then
1209 s = gsub(s," ","")
1210 end
1211
1212 t[s] = u
1213 end
1214 end
1215 local s = rawget(t,k)
1216 if not s then
1217 t[s] = s
1218 end
1219 return s
1220end)
1221
1222function characters.unicodechar(asked)
1223 local n = tonumber(asked)
1224 if n then
1225 return n
1226 elseif type(asked) == "string" then
1227 return descriptions[asked] or descriptions[gsub(asked," ","")]
1228 end
1229end
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262local tolower = Cs((utf8character/lcchars)^0)
1263local toupper = Cs((utf8character/ucchars)^0)
1264local toshape = Cs((utf8character/shchars)^0)
1265
1266lpegpatterns.tolower = tolower
1267lpegpatterns.toupper = toupper
1268lpegpatterns.toshape = toshape
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284if not characters.splits then
1285
1286 local char = allocate()
1287 local compat = allocate()
1288
1289 local splits = {
1290 char = char,
1291 compat = compat,
1292 }
1293
1294 characters.splits = splits
1295
1296
1297
1298
1299 for unicode, data in next, characters.data do
1300 local specials = data.specials
1301 if specials and #specials > 2 then
1302 local kind = specials[1]
1303 if kind == "compat" then
1304 compat[unicode] = { unpack(specials,2) }
1305 elseif kind == "char" or kind == "with" then
1306 char [unicode] = { unpack(specials,2) }
1307 end
1308 end
1309 end
1310
1311 if storage then
1312 storage.register("characters/splits", splits, "characters.splits")
1313 end
1314
1315end
1316
1317if not characters.lhash then
1318
1319 local lhash = allocate() characters.lhash = lhash
1320 local uhash = allocate() characters.uhash = uhash
1321 local shash = allocate() characters.shash = shash
1322
1323 for k, v in next, characters.data do
1324
1325 local l = v.lccode
1326 if l then
1327
1328 if type(l) == "number" then
1329 lhash[utfchar(k)] = utfchar(l)
1330 elseif #l == 2 then
1331 lhash[utfchar(k)] = utfchar(l[1]) .. utfchar(l[2])
1332
1333
1334 end
1335 else
1336 local u = v.uccode
1337 if u then
1338
1339 if type(u) == "number" then
1340 uhash[utfchar(k)] = utfchar(u)
1341 elseif #u == 2 then
1342 uhash[utfchar(k)] = utfchar(u[1]) .. utfchar(u[2])
1343
1344
1345 end
1346 end
1347 end
1348 local s = v.shcode
1349 if s then
1350 if type(s) == "number" then
1351 shash[utfchar(k)] = utfchar(s)
1352 elseif #s == 2 then
1353 shash[utfchar(k)] = utfchar(s[1]) .. utfchar(s[2])
1354
1355
1356 end
1357 end
1358
1359 end
1360
1361 if storage then
1362 storage.register("characters/lhash", lhash, "characters.lhash")
1363 storage.register("characters/uhash", uhash, "characters.uhash")
1364 storage.register("characters/shash", shash, "characters.shash")
1365 end
1366
1367end
1368
1369local lhash = characters.lhash mark(lhash)
1370local uhash = characters.uhash mark(uhash)
1371local shash = characters.shash mark(shash)
1372
1373local utf8lowercharacter = utfchartabletopattern(lhash) / lhash
1374local utf8uppercharacter = utfchartabletopattern(uhash) / uhash
1375local utf8shapecharacter = utfchartabletopattern(shash) / shash
1376
1377local utf8lower = Cs((utf8lowercharacter + utf8character)^0)
1378local utf8upper = Cs((utf8uppercharacter + utf8character)^0)
1379local utf8shape = Cs((utf8shapecharacter + utf8character)^0)
1380
1381lpegpatterns.utf8lowercharacter = utf8lowercharacter
1382lpegpatterns.utf8uppercharacter = utf8uppercharacter
1383lpegpatterns.utf8shapecharacter = utf8shapecharacter
1384
1385lpegpatterns.utf8lower = utf8lower
1386lpegpatterns.utf8upper = utf8upper
1387lpegpatterns.utf8shape = utf8shape
1388
1389function characters.lower (str) return str and lpegmatch(utf8lower,str) or "" end
1390function characters.upper (str) return str and lpegmatch(utf8upper,str) or "" end
1391function characters.shaped(str) return str and lpegmatch(utf8shape,str) or "" end
1392
1393lpeg.setutfcasers(characters.lower,characters.upper)
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429function characters.lettered(str,spacing)
1430 local new, n = { }, 0
1431 if spacing then
1432 local done = false
1433 for u in utfvalues(str) do
1434 local c = data[u].category
1435 if is_letter[c] then
1436 if done and n > 1 then
1437 n = n + 1
1438 new[n] = " "
1439 done = false
1440 end
1441 n = n + 1
1442 new[n] = utfchar(u)
1443 elseif spacing and is_spacing[c] then
1444 done = true
1445 end
1446 end
1447 else
1448 for u in utfvalues(str) do
1449 if is_letter[data[u].category] then
1450 n = n + 1
1451 new[n] = utfchar(u)
1452 end
1453 end
1454 end
1455 return concat(new)
1456end
1457
1458
1459
1460function characters.uccode(n) return uccodes[n] end
1461function characters.lccode(n) return lccodes[n] end
1462
1463function characters.shape(n)
1464 local shcode = shcodes[n]
1465 if not shcode then
1466 return n, nil
1467 elseif type(shcode) == "table" then
1468 return shcode[1], shcode[#shcode]
1469 else
1470 return shcode, nil
1471 end
1472end
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548if not characters.superscripts then
1549
1550 local superscripts = allocate() characters.superscripts = superscripts
1551 local subscripts = allocate() characters.subscripts = subscripts
1552 local fractions = allocate() characters.fractions = fractions
1553
1554
1555
1556 for k, v in next, data do
1557 local specials = v.specials
1558 if specials then
1559 local what = specials[1]
1560 if what == "super" then
1561 if #specials == 2 then
1562 superscripts[k] = specials[2]
1563 elseif trace_defining then
1564 report_defining("ignoring %s %a, char %c, description %a","superscript",ustring(k),k,v.description)
1565 end
1566 elseif what == "sub" then
1567 if #specials == 2 then
1568 subscripts[k] = specials[2]
1569 elseif trace_defining then
1570 report_defining("ignoring %s %a, char %c, description %a","subscript",ustring(k),k,v.description)
1571 end
1572 elseif what == "fraction" then
1573 if #specials > 1 then
1574 fractions[k] = { unpack(specials,2) }
1575 elseif trace_defining then
1576 report_defining("ignoring %s %a, char %c, description %a","fraction",ustring(k),k,v.description)
1577 end
1578 end
1579 end
1580 end
1581
1582
1583
1584
1585
1586 if storage then
1587 storage.register("characters/superscripts", superscripts, "characters.superscripts")
1588 storage.register("characters/subscripts", subscripts, "characters.subscripts")
1589 storage.register("characters/fractions", fractions, "characters.fractions")
1590 end
1591
1592end
1593
1594function characters.showstring(str)
1595 local list = utotable(str)
1596 for i=1,#list do
1597 report_defining("split % 3i : %C",i,list[i])
1598 end
1599end
1600
1601do
1602
1603
1604
1605 local any = P(1)
1606 local special = S([['".,:;-+()]])
1607 + P('“') + P('”')
1608 local apostrofe = P("’") + P("'")
1609
1610 local pattern = Cs ( (
1611 (P("medium light") / "medium-light" + P("medium dark") / "medium-dark") * P(" skin tone")
1612 + (apostrofe * P("s"))/""
1613 + special/""
1614 + any
1615 )^1)
1616
1617 local function load()
1618 local name = resolvers.findfile("char-emj.lua")
1619 local data = name and name ~= "" and dofile(name) or { }
1620 local hash = { }
1621 for d, c in next, data do
1622 local k = lpegmatch(pattern,d) or d
1623 local u = { }
1624 for i=1,#c do
1625 u[i] = utfchar(c[i])
1626 end
1627 u = concat(u)
1628 hash[k] = u
1629 end
1630 return data, hash
1631 end
1632
1633 local data, hash = nil, nil
1634
1635 function characters.emojized(name)
1636 local t = lpegmatch(pattern,name)
1637 if t then
1638 return t
1639 else
1640 return { name }
1641 end
1642 end
1643
1644 local start = P(" ")
1645 local finish = P(-1) + P(" ")
1646 local skintone = P("medium ")^0 * (P("light ") + P("dark "))^0 * P("skin tone")
1647 local gender = P("woman") + P("man")
1648 local expanded = (
1649 P("m-l-")/"medium-light"
1650 + P("m-d-")/"medium-dark"
1651 + P("l-") /"light"
1652 + P("m-") /"medium"
1653 + P("d-") /"dark"
1654 )
1655 * (P("s-t")/" skin tone")
1656 local compacted = (
1657 (P("medium-")/"m-" * (P("light")/"l" + P("dark")/"d"))
1658 + (P("medium")/"m" + P("light")/"l" + P("dark")/"d")
1659 )
1660 * (P(" skin tone")/"-s-t")
1661
1662 local pattern_0 = Cs((expanded + any)^1)
1663 local pattern_1 = Cs(((start * skintone + skintone * finish)/"" + any)^1)
1664 local pattern_2 = Cs(((start * gender + gender * finish)/"" + any)^1)
1665 local pattern_4 = Cs((compacted + any)^1)
1666
1667
1668
1669
1670
1671
1672
1673 local skin =
1674 P("light skin tone") / utfchar(0x1F3FB)
1675 + P("medium-light skin tone") / utfchar(0x1F3FC)
1676 + P("medium skin tone") / utfchar(0x1F3FD)
1677 + P("medium-dark skin tone") / utfchar(0x1F3FE)
1678 + P("dark skin tone") / utfchar(0x1F3FF)
1679
1680 local parent =
1681 P("man") / utfchar(0x1F468)
1682 + P("woman") / utfchar(0x1F469)
1683
1684 local child =
1685 P("baby") / utfchar(0x1F476)
1686 + P("boy") / utfchar(0x1F466)
1687 + P("girl") / utfchar(0x1F467)
1688
1689 local zwj = utfchar(0x200D)
1690 local heart = utfchar(0x2764) .. utfchar(0xFE0F) .. zwj
1691 local kiss = utfchar(0x2764) .. utfchar(0xFE0F) .. utfchar(0x200D) .. utfchar(0x1F48B) .. zwj
1692
1693
1694
1695 local space = P(" ")
1696 local final = P(-1)
1697
1698 local p_done = (space^1/zwj) + P(-1)
1699 local p_rest = space/"" * (skin * p_done) + p_done
1700 local p_parent = parent * p_rest
1701 local p_child = child * p_rest
1702
1703 local p_family = Cs ( (P("family") * space^1)/"" * p_parent^-2 * p_child^-2 )
1704 local p_couple = Cs ( (P("couple with heart") * space^1)/"" * p_parent * Cc(heart) * p_parent )
1705 local p_kiss = Cs ( (P("kiss") * space^1)/"" * p_parent * Cc(kiss) * p_parent )
1706
1707 local p_special = p_family + p_couple + p_kiss
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722 local emoji = { }
1723 characters.emoji = emoji
1724
1725local cache = setmetatable({ }, { __mode = "k" } )
1726
1727 function emoji.resolve(name)
1728 if not hash then
1729 data, hash = load()
1730 end
1731 local h = hash[name]
1732 if h then
1733 return h
1734 end
1735 local h = cache[name]
1736 if h then
1737 return h
1738 elseif h == false then
1739 return
1740 end
1741
1742 local name = lpegmatch(pattern_0,name) or name
1743
1744 local h = lpegmatch(p_special,name)
1745 if h then
1746 cache[name] = h
1747 return h
1748 end
1749
1750 local s = lpegmatch(pattern_1,name)
1751 local h = hash[s]
1752 if h then
1753 cache[name] = h
1754 return h
1755 end
1756
1757 local s = lpegmatch(pattern_2,name)
1758 local h = hash[s]
1759 if h then
1760 cache[name] = h
1761 return h
1762 end
1763 cache[name] = false
1764 end
1765
1766 function emoji.known()
1767 if not hash then
1768 data, hash = load()
1769 end
1770 return hash, data
1771 end
1772
1773 function emoji.compact(name)
1774 return lpegmatch(pattern_4,name) or name
1775 end
1776
1777end
1778
1779
1780
1781return characters
1782 |