1if not modules then modules = { } end modules ['char-ini'] = {
2 version = 1.001,
3 comment = "companion to char-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14local utfchar, utfbyte, utfvalues, ustring, utotable = utf.char, utf.byte, utf.values, utf.ustring, utf.totable
15local concat, unpack, tohash, insert = table.concat, table.unpack, table.tohash, table.insert
16local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
17local format, lower, gsub, find = string.format, string.lower, string.gsub, string.find
18local P, R, S, C, Cs, Ct, Cc, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc, lpeg.V
19local formatters = string.formatters
20
21if not characters then require("char-def") end
22
23local lpegpatterns = lpeg.patterns
24local lpegmatch = lpeg.match
25local utf8byte = lpegpatterns.utf8byte
26local utf8character = lpegpatterns.utf8character
27
28local utfchartabletopattern = lpeg.utfchartabletopattern
29
30local allocate = utilities.storage.allocate
31local mark = utilities.storage.mark
32
33local setmetatableindex = table.setmetatableindex
34
35local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
36
37local report_defining = logs.reporter("characters")
38
39
47
48
49
50
51
52
53
54characters = characters or { }
55local characters = characters
56local data = characters.data
57
58if data then
59 mark(data)
60else
61 report_defining("fatal error: 'char-def.lua' is not loaded")
62 os.exit()
63end
64
65
68
69if context then
70
71 if not characters.private then
72
73 require("char-prv")
74
75 if storage then
76 storage.register("characters/private", characters.private, "characters.private")
77 end
78
79 end
80
81 for unicode, d in next, characters.private do
82 data[unicode] = d
83 end
84
85end
86
87
90
91local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end)
92
93lpegpatterns.chartonumber = pattern
94
95local function chartonumber(k)
96 if type(k) == "string" then
97 local u = lpegmatch(pattern,k)
98 if u then
99 return utfbyte(u)
100 else
101 return utfbyte(k) or 0
102 end
103 else
104 return k or 0
105 end
106end
107
108local function charfromnumber(k)
109 if type(k) == "number" then
110 return utfchar(k) or ""
111 else
112 local u = lpegmatch(pattern,k)
113 if u then
114 return utfchar(u)
115 else
116 return k
117 end
118 end
119end
120
121
122
123characters.tonumber = chartonumber
124characters.fromnumber = charfromnumber
125
126local private = {
127 description = "PRIVATE SLOT",
128}
129
130local ranges = allocate()
131characters.ranges = ranges
132
133setmetatableindex(data, function(t,k)
134 local tk = type(k)
135 if tk == "string" then
136 k = lpegmatch(pattern,k) or utfbyte(k)
137 if k then
138 local v = rawget(t,k)
139 if v then
140 return v
141 else
142 tk = "number"
143 end
144 else
145 return private
146 end
147 end
148 if tk == "number" and k < 0xF0000 then
149 for r=1,#ranges do
150 local rr = ranges[r]
151 if k >= rr.first and k <= rr.last then
152 local extender = rr.extender
153 if extender then
154 local v = extender(k)
155 t[k] = v
156 return v
157 end
158 end
159 end
160 end
161 return private
162end)
163
164local variant_selector_metatable = {
165 category = "mn",
166 cjkwd = "a",
167 direction = "nsm",
168 linebreak = "cm",
169}
170
171
172
173local f_variant = string.formatters["VARIATION SELECTOR-0x%04X"]
174
175insert(characters.ranges,{
176 first = 0xFE00,
177 last = 0xFE0F,
178 name = "variant selector",
179 extender = function(k)
180 local t = {
181 description = f_variant(k - 0xFE00 + 0x0001),
182 unicodeslot = k,
183 }
184 setmetatable(t,variant_selector_metatable)
185 return t
186 end,
187})
188
189insert(characters.ranges,{
190 first = 0xE0100,
191 last = 0xE01EF,
192 name = "variant selector extension",
193 extender = function(k)
194 local t = {
195 description = f_variant(k - 0xE0100 + 0x0011),
196 unicodeslot = k,
197 }
198 setmetatable(t,variant_selector_metatable)
199 return t
200 end,
201})
202
203local blocks = allocate {
204 ["adlam"] = { first = 0x1E900, last = 0x1E95F, description = "Adlam" },
205 ["aegeannumbers"] = { first = 0x10100, last = 0x1013F, description = "Aegean Numbers" },
206 ["ahom"] = { first = 0x11700, last = 0x1174F, description = "Ahom" },
207 ["alchemicalsymbols"] = { first = 0x1F700, last = 0x1F77F, description = "Alchemical Symbols" },
208 ["alphabeticpresentationforms"] = { first = 0x0FB00, last = 0x0FB4F, otf="latn", description = "Alphabetic Presentation Forms" },
209 ["anatolianhieroglyphs"] = { first = 0x14400, last = 0x1467F, description = "Anatolian Hieroglyphs" },
210 ["ancientgreekmusicalnotation"] = { first = 0x1D200, last = 0x1D24F, otf="grek", description = "Ancient Greek Musical Notation" },
211 ["ancientgreeknumbers"] = { first = 0x10140, last = 0x1018F, otf="grek", description = "Ancient Greek Numbers" },
212 ["ancientsymbols"] = { first = 0x10190, last = 0x101CF, otf="grek", description = "Ancient Symbols" },
213 ["arabic"] = { first = 0x00600, last = 0x006FF, otf="arab", description = "Arabic" },
214 ["arabicextendeda"] = { first = 0x008A0, last = 0x008FF, description = "Arabic Extended-A" },
215 ["arabicextendedb"] = { first = 0x00870, last = 0x0089F, description = "Arabic Extended-B" },
216 ["arabicmathematicalalphabeticsymbols"] = { first = 0x1EE00, last = 0x1EEFF, description = "Arabic Mathematical Alphabetic Symbols" },
217 ["arabicpresentationformsa"] = { first = 0x0FB50, last = 0x0FDFF, otf="arab", description = "Arabic Presentation Forms-A" },
218 ["arabicpresentationformsb"] = { first = 0x0FE70, last = 0x0FEFF, otf="arab", description = "Arabic Presentation Forms-B" },
219 ["arabicsupplement"] = { first = 0x00750, last = 0x0077F, otf="arab", description = "Arabic Supplement" },
220 ["armenian"] = { first = 0x00530, last = 0x0058F, otf="armn", description = "Armenian" },
221 ["arrows"] = { first = 0x02190, last = 0x021FF, description = "Arrows" },
222 ["avestan"] = { first = 0x10B00, last = 0x10B3F, description = "Avestan" },
223 ["balinese"] = { first = 0x01B00, last = 0x01B7F, otf="bali", description = "Balinese" },
224 ["bamum"] = { first = 0x0A6A0, last = 0x0A6FF, description = "Bamum" },
225 ["bamumsupplement"] = { first = 0x16800, last = 0x16A3F, description = "Bamum Supplement" },
226 ["basiclatin"] = { first = 0x00000, last = 0x0007F, otf="latn", description = "Basic Latin" },
227 ["bassavah"] = { first = 0x16AD0, last = 0x16AFF, description = "Bassa Vah" },
228 ["batak"] = { first = 0x01BC0, last = 0x01BFF, description = "Batak" },
229 ["bengali"] = { first = 0x00980, last = 0x009FF, otf="beng", description = "Bengali" },
230 ["bhaiksuki"] = { first = 0x11C00, last = 0x11C6F, description = "Bhaiksuki" },
231 ["blockelements"] = { first = 0x02580, last = 0x0259F, otf="bopo", description = "Block Elements" },
232 ["bopomofo"] = { first = 0x03100, last = 0x0312F, otf="bopo", description = "Bopomofo" },
233 ["bopomofoextended"] = { first = 0x031A0, last = 0x031BF, otf="bopo", description = "Bopomofo Extended" },
234 ["boxdrawing"] = { first = 0x02500, last = 0x0257F, description = "Box Drawing" },
235 ["brahmi"] = { first = 0x11000, last = 0x1107F, description = "Brahmi" },
236 ["braillepatterns"] = { first = 0x02800, last = 0x028FF, otf="brai", description = "Braille Patterns" },
237 ["buginese"] = { first = 0x01A00, last = 0x01A1F, otf="bugi", description = "Buginese" },
238 ["buhid"] = { first = 0x01740, last = 0x0175F, otf="buhd", description = "Buhid" },
239 ["byzantinemusicalsymbols"] = { first = 0x1D000, last = 0x1D0FF, otf="byzm", description = "Byzantine Musical Symbols" },
240 ["carian"] = { first = 0x102A0, last = 0x102DF, description = "Carian" },
241 ["caucasianalbanian"] = { first = 0x10530, last = 0x1056F, description = "Caucasian Albanian" },
242 ["chakma"] = { first = 0x11100, last = 0x1114F, description = "Chakma" },
243 ["cham"] = { first = 0x0AA00, last = 0x0AA5F, description = "Cham" },
244 ["cherokee"] = { first = 0x013A0, last = 0x013FF, otf="cher", description = "Cherokee" },
245 ["cherokeesupplement"] = { first = 0x0AB70, last = 0x0ABBF, description = "Cherokee Supplement" },
246 ["chesssymbols"] = { first = 0x1FA00, last = 0x1FA6F, description = "Chess Symbols" },
247 ["chorasmian"] = { first = 0x10FB0, last = 0x10FDF, description = "Chorasmian" },
248 ["cjkcompatibility"] = { first = 0x03300, last = 0x033FF, otf="hang", description = "CJK Compatibility" },
249 ["cjkcompatibilityforms"] = { first = 0x0FE30, last = 0x0FE4F, otf="hang", description = "CJK Compatibility Forms" },
250 ["cjkcompatibilityideographs"] = { first = 0x0F900, last = 0x0FAFF, otf="hang", description = "CJK Compatibility Ideographs" },
251 ["cjkcompatibilityideographssupplement"] = { first = 0x2F800, last = 0x2FA1F, otf="hang", description = "CJK Compatibility Ideographs Supplement" },
252 ["cjkradicalssupplement"] = { first = 0x02E80, last = 0x02EFF, otf="hang", description = "CJK Radicals Supplement" },
253 ["cjkstrokes"] = { first = 0x031C0, last = 0x031EF, otf="hang", description = "CJK Strokes" },
254 ["cjksymbolsandpunctuation"] = { first = 0x03000, last = 0x0303F, otf="hang", description = "CJK Symbols and Punctuation" },
255 ["cjkunifiedideographs"] = { first = 0x04E00, last = 0x09FFF, otf="hang", description = "CJK Unified Ideographs", catcode = "letter" },
256 ["cjkunifiedideographsextensiona"] = { first = 0x03400, last = 0x04DBF, otf="hang", description = "CJK Unified Ideographs Extension A" },
257 ["cjkunifiedideographsextensionb"] = { first = 0x20000, last = 0x2A6DF, otf="hang", description = "CJK Unified Ideographs Extension B" },
258 ["cjkunifiedideographsextensionc"] = { first = 0x2A700, last = 0x2B73F, description = "CJK Unified Ideographs Extension C" },
259 ["cjkunifiedideographsextensiond"] = { first = 0x2B740, last = 0x2B81F, description = "CJK Unified Ideographs Extension D" },
260 ["cjkunifiedideographsextensione"] = { first = 0x2B820, last = 0x2CEAF, description = "CJK Unified Ideographs Extension E" },
261 ["cjkunifiedideographsextensionf"] = { first = 0x2CEB0, last = 0x2EBEF, description = "CJK Unified Ideographs Extension F" },
262 ["cjkunifiedideographsextensiong"] = { first = 0x30000, last = 0x3134F, description = "CJK Unified Ideographs Extension G" },
263 ["combiningdiacriticalmarks"] = { first = 0x00300, last = 0x0036F, description = "Combining Diacritical Marks" },
264 ["combiningdiacriticalmarksextended"] = { first = 0x01AB0, last = 0x01AFF, description = "Combining Diacritical Marks Extended" },
265 ["combiningdiacriticalmarksforsymbols"] = { first = 0x020D0, last = 0x020FF, description = "Combining Diacritical Marks for Symbols" },
266 ["combiningdiacriticalmarkssupplement"] = { first = 0x01DC0, last = 0x01DFF, description = "Combining Diacritical Marks Supplement" },
267 ["combininghalfmarks"] = { first = 0x0FE20, last = 0x0FE2F, description = "Combining Half Marks" },
268 ["commonindicnumberforms"] = { first = 0x0A830, last = 0x0A83F, description = "Common Indic Number Forms" },
269 ["controlpictures"] = { first = 0x02400, last = 0x0243F, description = "Control Pictures" },
270 ["coptic"] = { first = 0x02C80, last = 0x02CFF, otf="copt", description = "Coptic" },
271 ["copticepactnumbers"] = { first = 0x102E0, last = 0x102FF, description = "Coptic Epact Numbers" },
272 ["countingrodnumerals"] = { first = 0x1D360, last = 0x1D37F, description = "Counting Rod Numerals" },
273 ["cuneiform"] = { first = 0x12000, last = 0x123FF, otf="xsux", description = "Cuneiform" },
274 ["cuneiformnumbersandpunctuation"] = { first = 0x12400, last = 0x1247F, otf="xsux", description = "Cuneiform Numbers and Punctuation" },
275 ["currencysymbols"] = { first = 0x020A0, last = 0x020CF, description = "Currency Symbols" },
276 ["cypriotsyllabary"] = { first = 0x10800, last = 0x1083F, otf="cprt", description = "Cypriot Syllabary" },
277 ["cyprominoan"] = { first = 0x12F90, last = 0x12FFF, description = "Cypro-Minoan" },
278 ["cyrillic"] = { first = 0x00400, last = 0x004FF, otf="cyrl", description = "Cyrillic" },
279 ["cyrillicextendeda"] = { first = 0x02DE0, last = 0x02DFF, otf="cyrl", description = "Cyrillic Extended-A" },
280 ["cyrillicextendedb"] = { first = 0x0A640, last = 0x0A69F, otf="cyrl", description = "Cyrillic Extended-B" },
281 ["cyrillicextendedc"] = { first = 0x01C80, last = 0x01C8F, description = "Cyrillic Extended-C" },
282 ["cyrillicsupplement"] = { first = 0x00500, last = 0x0052F, otf="cyrl", description = "Cyrillic Supplement" },
283 ["deseret"] = { first = 0x10400, last = 0x1044F, otf="dsrt", description = "Deseret" },
284 ["devanagari"] = { first = 0x00900, last = 0x0097F, otf="deva", description = "Devanagari" },
285 ["devanagariextended"] = { first = 0x0A8E0, last = 0x0A8FF, description = "Devanagari Extended" },
286 ["digitsarabicindic"] = { first = 0x00660, last = 0x00669, math = true },
287
288 ["digitsbold"] = { first = 0x1D7CE, last = 0x1D7D8, math = true },
289
290 ["digitsdoublestruck"] = { first = 0x1D7D8, last = 0x1D7E2, math = true },
291
292 ["digitsextendedarabicindic"] = { first = 0x006F0, last = 0x006F9, math = true },
293
294
295
296
297
298 ["digitslatin"] = { first = 0x00030, last = 0x00039, math = true },
299
300
301 ["digitsmonospace"] = { first = 0x1D7F6, last = 0x1D80F, math = true },
302
303 ["digitsnormal"] = { first = 0x00030, last = 0x00039, math = true },
304
305 ["digitssansserifbold"] = { first = 0x1D7EC, last = 0x1D805, math = true },
306 ["digitssansserifnormal"] = { first = 0x1D7E2, last = 0x1D7EC, math = true },
307
308
309
310
311 ["dingbats"] = { first = 0x02700, last = 0x027BF, description = "Dingbats" },
312 ["divesakuru"] = { first = 0x11900, last = 0x1195F, description = "Dives Akuru" },
313 ["dogra"] = { first = 0x11800, last = 0x1184F, description = "Dogra" },
314 ["dominotiles"] = { first = 0x1F030, last = 0x1F09F, description = "Domino Tiles" },
315 ["duployan"] = { first = 0x1BC00, last = 0x1BC9F, description = "Duployan" },
316 ["earlydynasticcuneiform"] = { first = 0x12480, last = 0x1254F, description = "Early Dynastic Cuneiform" },
317 ["egyptianhieroglyphformatcontrols"] = { first = 0x13430, last = 0x1343F, description = "Egyptian Hieroglyph Format Controls" },
318 ["egyptianhieroglyphs"] = { first = 0x13000, last = 0x1342F, description = "Egyptian Hieroglyphs" },
319 ["elbasan"] = { first = 0x10500, last = 0x1052F, description = "Elbasan" },
320 ["elymaic"] = { first = 0x10FE0, last = 0x10FFF, description = "Elymaic" },
321 ["emoticons"] = { first = 0x1F600, last = 0x1F64F, description = "Emoticons" },
322 ["enclosedalphanumerics"] = { first = 0x02460, last = 0x024FF, description = "Enclosed Alphanumerics" },
323 ["enclosedalphanumericsupplement"] = { first = 0x1F100, last = 0x1F1FF, description = "Enclosed Alphanumeric Supplement" },
324 ["enclosedcjklettersandmonths"] = { first = 0x03200, last = 0x032FF, description = "Enclosed CJK Letters and Months" },
325 ["enclosedideographicsupplement"] = { first = 0x1F200, last = 0x1F2FF, description = "Enclosed Ideographic Supplement" },
326 ["ethiopic"] = { first = 0x01200, last = 0x0137F, otf="ethi", description = "Ethiopic" },
327 ["ethiopicextended"] = { first = 0x02D80, last = 0x02DDF, otf="ethi", description = "Ethiopic Extended" },
328 ["ethiopicextendeda"] = { first = 0x0AB00, last = 0x0AB2F, description = "Ethiopic Extended-A" },
329 ["ethiopicextendedb"] = { first = 0x1E7E0, last = 0x1E7FF, description = "Ethiopic Extended-B" },
330 ["ethiopicsupplement"] = { first = 0x01380, last = 0x0139F, otf="ethi", description = "Ethiopic Supplement" },
331 ["generalpunctuation"] = { first = 0x02000, last = 0x0206F, description = "General Punctuation" },
332 ["geometricshapes"] = { first = 0x025A0, last = 0x025FF, math = true, description = "Geometric Shapes" },
333 ["geometricshapesextended"] = { first = 0x1F780, last = 0x1F7FF, description = "Geometric Shapes Extended" },
334 ["georgian"] = { first = 0x010A0, last = 0x010FF, otf="geor", description = "Georgian" },
335 ["georgianextended"] = { first = 0x01C90, last = 0x01CBF, description = "Georgian Extended" },
336 ["georgiansupplement"] = { first = 0x02D00, last = 0x02D2F, otf="geor", description = "Georgian Supplement" },
337 ["glagolitic"] = { first = 0x02C00, last = 0x02C5F, otf="glag", description = "Glagolitic" },
338 ["glagoliticsupplement"] = { first = 0x1E000, last = 0x1E02F, description = "Glagolitic Supplement" },
339 ["gothic"] = { first = 0x10330, last = 0x1034F, otf="goth", description = "Gothic" },
340 ["grantha"] = { first = 0x11300, last = 0x1137F, description = "Grantha" },
341 ["greekandcoptic"] = { first = 0x00370, last = 0x003FF, otf="grek", description = "Greek and Coptic" },
342 ["greekextended"] = { first = 0x01F00, last = 0x01FFF, otf="grek", description = "Greek Extended" },
343 ["gujarati"] = { first = 0x00A80, last = 0x00AFF, otf="gujr", description = "Gujarati" },
344 ["gunjalagondi"] = { first = 0x11D60, last = 0x11DAF, description = "Gunjala Gondi" },
345 ["gurmukhi"] = { first = 0x00A00, last = 0x00A7F, otf="guru", description = "Gurmukhi" },
346 ["halfwidthandfullwidthforms"] = { first = 0x0FF00, last = 0x0FFEF, description = "Halfwidth and Fullwidth Forms" },
347 ["hangulcompatibilityjamo"] = { first = 0x03130, last = 0x0318F, otf="jamo", description = "Hangul Compatibility Jamo" },
348 ["hanguljamo"] = { first = 0x01100, last = 0x011FF, otf="jamo", description = "Hangul Jamo" },
349 ["hanguljamoextendeda"] = { first = 0x0A960, last = 0x0A97F, description = "Hangul Jamo Extended-A" },
350 ["hanguljamoextendedb"] = { first = 0x0D7B0, last = 0x0D7FF, description = "Hangul Jamo Extended-B" },
351 ["hangulsyllables"] = { first = 0x0AC00, last = 0x0D7AF, otf="hang", description = "Hangul Syllables" },
352 ["hanifirohingya"] = { first = 0x10D00, last = 0x10D3F, description = "Hanifi Rohingya" },
353 ["hanunoo"] = { first = 0x01720, last = 0x0173F, otf="hano", description = "Hanunoo" },
354 ["hatran"] = { first = 0x108E0, last = 0x108FF, description = "Hatran" },
355 ["hebrew"] = { first = 0x00590, last = 0x005FF, otf="hebr", description = "Hebrew" },
356 ["highprivateusesurrogates"] = { first = 0x0DB80, last = 0x0DBFF, description = "High Private Use Surrogates" },
357 ["highsurrogates"] = { first = 0x0D800, last = 0x0DB7F, description = "High Surrogates" },
358 ["hiragana"] = { first = 0x03040, last = 0x0309F, otf="kana", description = "Hiragana" },
359 ["ideographicdescriptioncharacters"] = { first = 0x02FF0, last = 0x02FFF, description = "Ideographic Description Characters" },
360 ["ideographicsymbolsandpunctuation"] = { first = 0x16FE0, last = 0x16FFF, description = "Ideographic Symbols and Punctuation" },
361 ["imperialaramaic"] = { first = 0x10840, last = 0x1085F, description = "Imperial Aramaic" },
362 ["indicsiyaqnumbers"] = { first = 0x1EC70, last = 0x1ECBF, description = "Indic Siyaq Numbers" },
363 ["inscriptionalpahlavi"] = { first = 0x10B60, last = 0x10B7F, description = "Inscriptional Pahlavi" },
364 ["inscriptionalparthian"] = { first = 0x10B40, last = 0x10B5F, description = "Inscriptional Parthian" },
365 ["ipaextensions"] = { first = 0x00250, last = 0x002AF, description = "IPA Extensions" },
366 ["javanese"] = { first = 0x0A980, last = 0x0A9DF, description = "Javanese" },
367 ["kaithi"] = { first = 0x11080, last = 0x110CF, description = "Kaithi" },
368 ["kanaextendeda"] = { first = 0x1B100, last = 0x1B12F, description = "Kana Extended-A" },
369 ["kanaextendedb"] = { first = 0x1AFF0, last = 0x1AFFF, description = "Kana Extended-B" },
370 ["kanasupplement"] = { first = 0x1B000, last = 0x1B0FF, description = "Kana Supplement" },
371 ["kanbun"] = { first = 0x03190, last = 0x0319F, description = "Kanbun" },
372 ["kangxiradicals"] = { first = 0x02F00, last = 0x02FDF, description = "Kangxi Radicals" },
373 ["kannada"] = { first = 0x00C80, last = 0x00CFF, otf="knda", description = "Kannada" },
374 ["katakana"] = { first = 0x030A0, last = 0x030FF, otf="kana", description = "Katakana" },
375 ["katakanaphoneticextensions"] = { first = 0x031F0, last = 0x031FF, otf="kana", description = "Katakana Phonetic Extensions" },
376 ["kayahli"] = { first = 0x0A900, last = 0x0A92F, description = "Kayah Li" },
377 ["kharoshthi"] = { first = 0x10A00, last = 0x10A5F, otf="khar", description = "Kharoshthi" },
378 ["khitansmallscript"] = { first = 0x18B00, last = 0x18CFF, description = "Khitan Small Script" },
379 ["khmer"] = { first = 0x01780, last = 0x017FF, otf="khmr", description = "Khmer" },
380 ["khmersymbols"] = { first = 0x019E0, last = 0x019FF, otf="khmr", description = "Khmer Symbols" },
381 ["khojki"] = { first = 0x11200, last = 0x1124F, description = "Khojki" },
382 ["khudawadi"] = { first = 0x112B0, last = 0x112FF, description = "Khudawadi" },
383 ["lao"] = { first = 0x00E80, last = 0x00EFF, otf="lao", description = "Lao" },
384 ["latinextendeda"] = { first = 0x00100, last = 0x0017F, otf="latn", description = "Latin Extended-A" },
385 ["latinextendedadditional"] = { first = 0x01E00, last = 0x01EFF, otf="latn", description = "Latin Extended Additional" },
386 ["latinextendedb"] = { first = 0x00180, last = 0x0024F, otf="latn", description = "Latin Extended-B" },
387 ["latinextendedc"] = { first = 0x02C60, last = 0x02C7F, otf="latn", description = "Latin Extended-C" },
388 ["latinextendedd"] = { first = 0x0A720, last = 0x0A7FF, otf="latn", description = "Latin Extended-D" },
389 ["latinextendede"] = { first = 0x0AB30, last = 0x0AB6F, description = "Latin Extended-E" },
390 ["latinextendedf"] = { first = 0x10780, last = 0x107BF, description = "Latin Extended-F" },
391 ["latinextendedg"] = { first = 0x1DF00, last = 0x1DFFF, description = "Latin Extended-G" },
392 ["latinsupplement"] = { first = 0x00080, last = 0x000FF, otf="latn", description = "Latin-1 Supplement" },
393 ["lepcha"] = { first = 0x01C00, last = 0x01C4F, description = "Lepcha" },
394 ["letterlikesymbols"] = { first = 0x02100, last = 0x0214F, math = true, description = "Letterlike Symbols" },
395 ["limbu"] = { first = 0x01900, last = 0x0194F, otf="limb", description = "Limbu" },
396 ["lineara"] = { first = 0x10600, last = 0x1077F, description = "Linear A" },
397 ["linearbideograms"] = { first = 0x10080, last = 0x100FF, otf="linb", description = "Linear B Ideograms" },
398 ["linearbsyllabary"] = { first = 0x10000, last = 0x1007F, otf="linb", description = "Linear B Syllabary" },
399 ["lisu"] = { first = 0x0A4D0, last = 0x0A4FF, description = "Lisu" },
400 ["lisusupplement"] = { first = 0x11FB0, last = 0x11FBF, description = "Lisu Supplement" },
401 ["lowercasebold"] = { first = 0x1D41A, last = 0x1D433, math = true },
402 ["lowercaseboldfraktur"] = { first = 0x1D586, last = 0x1D59F, math = true },
403 ["lowercasebolditalic"] = { first = 0x1D482, last = 0x1D49B, math = true },
404 ["lowercaseboldscript"] = { first = 0x1D4EA, last = 0x1D503, math = true },
405 ["lowercasedoublestruck"] = { first = 0x1D552, last = 0x1D56B, math = true },
406 ["lowercasefraktur"] = { first = 0x1D51E, last = 0x1D537, math = true },
407 ["lowercasegreekbold"] = { first = 0x1D6C2, last = 0x1D6DB, math = true },
408 ["lowercasegreekbolditalic"] = { first = 0x1D736, last = 0x1D74F, math = true },
409 ["lowercasegreekitalic"] = { first = 0x1D6FC, last = 0x1D715, math = true },
410 ["lowercasegreeknormal"] = { first = 0x003B1, last = 0x003CA, math = true },
411 ["lowercasegreeksansserifbold"] = { first = 0x1D770, last = 0x1D789, math = true },
412 ["lowercasegreeksansserifbolditalic"] = { first = 0x1D7AA, last = 0x1D7C3, math = true },
413 ["lowercaseitalic"] = { first = 0x1D44E, last = 0x1D467, math = true },
414 ["lowercasemonospace"] = { first = 0x1D68A, last = 0x1D6A3, math = true },
415 ["lowercasenormal"] = { first = 0x00061, last = 0x0007A, math = true },
416 ["lowercasesansserifbold"] = { first = 0x1D5EE, last = 0x1D607, math = true },
417 ["lowercasesansserifbolditalic"] = { first = 0x1D656, last = 0x1D66F, math = true },
418 ["lowercasesansserifitalic"] = { first = 0x1D622, last = 0x1D63B, math = true },
419 ["lowercasesansserifnormal"] = { first = 0x1D5BA, last = 0x1D5D3, math = true },
420 ["lowercasescript"] = { first = 0x1D4B6, last = 0x1D4CF, math = true },
421 ["lowsurrogates"] = { first = 0x0DC00, last = 0x0DFFF, description = "Low Surrogates" },
422 ["lycian"] = { first = 0x10280, last = 0x1029F, description = "Lycian" },
423 ["lydian"] = { first = 0x10920, last = 0x1093F, description = "Lydian" },
424 ["mahajani"] = { first = 0x11150, last = 0x1117F, description = "Mahajani" },
425 ["mahjongtiles"] = { first = 0x1F000, last = 0x1F02F, description = "Mahjong Tiles" },
426 ["makasar"] = { first = 0x11EE0, last = 0x11EFF, description = "Makasar" },
427 ["malayalam"] = { first = 0x00D00, last = 0x00D7F, otf="mlym", description = "Malayalam" },
428 ["mandaic"] = { first = 0x00840, last = 0x0085F, otf="mand", description = "Mandaic" },
429 ["manichaean"] = { first = 0x10AC0, last = 0x10AFF, description = "Manichaean" },
430 ["marchen"] = { first = 0x11C70, last = 0x11CBF, description = "Marchen" },
431 ["masaramgondi"] = { first = 0x11D00, last = 0x11D5F, description = "Masaram Gondi" },
432 ["mathematicalalphanumericsymbols"] = { first = 0x1D400, last = 0x1D7FF, math = true, description = "Mathematical Alphanumeric Symbols" },
433 ["mathematicaloperators"] = { first = 0x02200, last = 0x022FF, math = true, description = "Mathematical Operators" },
434 ["mayannumerals"] = { first = 0x1D2E0, last = 0x1D2FF, description = "Mayan Numerals" },
435 ["medefaidrin"] = { first = 0x16E40, last = 0x16E9F, description = "Medefaidrin" },
436 ["meeteimayek"] = { first = 0x0ABC0, last = 0x0ABFF, description = "Meetei Mayek" },
437 ["meeteimayekextensions"] = { first = 0x0AAE0, last = 0x0AAFF, description = "Meetei Mayek Extensions" },
438 ["mendekikakui"] = { first = 0x1E800, last = 0x1E8DF, description = "Mende Kikakui" },
439 ["meroiticcursive"] = { first = 0x109A0, last = 0x109FF, description = "Meroitic Cursive" },
440 ["meroitichieroglyphs"] = { first = 0x10980, last = 0x1099F, description = "Meroitic Hieroglyphs" },
441 ["miao"] = { first = 0x16F00, last = 0x16F9F, description = "Miao" },
442 ["miscellaneousmathematicalsymbolsa"] = { first = 0x027C0, last = 0x027EF, math = true, description = "Miscellaneous Mathematical Symbols-A" },
443 ["miscellaneousmathematicalsymbolsb"] = { first = 0x02980, last = 0x029FF, math = true, description = "Miscellaneous Mathematical Symbols-B" },
444 ["miscellaneoussymbols"] = { first = 0x02600, last = 0x026FF, math = true, description = "Miscellaneous Symbols" },
445 ["miscellaneoussymbolsandarrows"] = { first = 0x02B00, last = 0x02BFF, math = true, description = "Miscellaneous Symbols and Arrows" },
446 ["miscellaneoussymbolsandpictographs"] = { first = 0x1F300, last = 0x1F5FF, description = "Miscellaneous Symbols and Pictographs" },
447 ["miscellaneoustechnical"] = { first = 0x02300, last = 0x023FF, math = true, description = "Miscellaneous Technical" },
448 ["modi"] = { first = 0x11600, last = 0x1165F, description = "Modi" },
449 ["modifiertoneletters"] = { first = 0x0A700, last = 0x0A71F, description = "Modifier Tone Letters" },
450 ["mongolian"] = { first = 0x01800, last = 0x018AF, otf="mong", description = "Mongolian" },
451 ["mongoliansupplement"] = { first = 0x11660, last = 0x1167F, description = "Mongolian Supplement" },
452 ["mro"] = { first = 0x16A40, last = 0x16A6F, description = "Mro" },
453 ["multani"] = { first = 0x11280, last = 0x112AF, description = "Multani" },
454 ["musicalsymbols"] = { first = 0x1D100, last = 0x1D1FF, otf="musc", description = "Musical Symbols" },
455 ["myanmar"] = { first = 0x01000, last = 0x0109F, otf="mymr", description = "Myanmar" },
456 ["myanmarextendeda"] = { first = 0x0AA60, last = 0x0AA7F, description = "Myanmar Extended-A" },
457 ["myanmarextendedb"] = { first = 0x0A9E0, last = 0x0A9FF, description = "Myanmar Extended-B" },
458 ["nabataean"] = { first = 0x10880, last = 0x108AF, description = "Nabataean" },
459 ["nandinagari"] = { first = 0x119A0, last = 0x119FF, description = "Nandinagari" },
460 ["newa"] = { first = 0x11400, last = 0x1147F, description = "Newa" },
461 ["newtailue"] = { first = 0x01980, last = 0x019DF, description = "New Tai Lue" },
462 ["nko"] = { first = 0x007C0, last = 0x007FF, otf="nko", description = "NKo" },
463 ["numberforms"] = { first = 0x02150, last = 0x0218F, description = "Number Forms" },
464 ["nushu"] = { first = 0x1B170, last = 0x1B2FF, description = "Nushu" },
465 ["nyiakengpuachuehmong"] = { first = 0x1E100, last = 0x1E14F, description = "Nyiakeng Puachue Hmong" },
466 ["ogham"] = { first = 0x01680, last = 0x0169F, otf="ogam", description = "Ogham" },
467 ["olchiki"] = { first = 0x01C50, last = 0x01C7F, description = "Ol Chiki" },
468 ["oldhungarian"] = { first = 0x10C80, last = 0x10CFF, description = "Old Hungarian" },
469 ["olditalic"] = { first = 0x10300, last = 0x1032F, otf="ital", description = "Old Italic" },
470 ["oldnortharabian"] = { first = 0x10A80, last = 0x10A9F, description = "Old North Arabian" },
471 ["oldpermic"] = { first = 0x10350, last = 0x1037F, description = "Old Permic" },
472 ["oldpersian"] = { first = 0x103A0, last = 0x103DF, otf="xpeo", description = "Old Persian" },
473 ["oldsogdian"] = { first = 0x10F00, last = 0x10F2F, description = "Old Sogdian" },
474 ["oldsoutharabian"] = { first = 0x10A60, last = 0x10A7F, description = "Old South Arabian" },
475 ["oldturkic"] = { first = 0x10C00, last = 0x10C4F, description = "Old Turkic" },
476 ["olduyghur"] = { first = 0x10F70, last = 0x10FAF, description = "Old Uyghur" },
477 ["opticalcharacterrecognition"] = { first = 0x02440, last = 0x0245F, description = "Optical Character Recognition" },
478 ["oriya"] = { first = 0x00B00, last = 0x00B7F, otf="orya", description = "Oriya" },
479 ["ornamentaldingbats"] = { first = 0x1F650, last = 0x1F67F, description = "Ornamental Dingbats" },
480 ["osage"] = { first = 0x104B0, last = 0x104FF, description = "Osage" },
481 ["osmanya"] = { first = 0x10480, last = 0x104AF, otf="osma", description = "Osmanya" },
482 ["ottomansiyaqnumbers"] = { first = 0x1ED00, last = 0x1ED4F, description = "Ottoman Siyaq Numbers" },
483 ["pahawhhmong"] = { first = 0x16B00, last = 0x16B8F, description = "Pahawh Hmong" },
484 ["palmyrene"] = { first = 0x10860, last = 0x1087F, description = "Palmyrene" },
485 ["paucinhau"] = { first = 0x11AC0, last = 0x11AFF, description = "Pau Cin Hau" },
486 ["phagspa"] = { first = 0x0A840, last = 0x0A87F, otf="phag", description = "Phags-pa" },
487 ["phaistosdisc"] = { first = 0x101D0, last = 0x101FF, description = "Phaistos Disc" },
488 ["phoenician"] = { first = 0x10900, last = 0x1091F, otf="phnx", description = "Phoenician" },
489 ["phoneticextensions"] = { first = 0x01D00, last = 0x01D7F, description = "Phonetic Extensions" },
490 ["phoneticextensionssupplement"] = { first = 0x01D80, last = 0x01DBF, description = "Phonetic Extensions Supplement" },
491 ["playingcards"] = { first = 0x1F0A0, last = 0x1F0FF, description = "Playing Cards" },
492 ["privateusearea"] = { first = 0x0E000, last = 0x0F8FF, description = "Private Use Area" },
493 ["psalterpahlavi"] = { first = 0x10B80, last = 0x10BAF, description = "Psalter Pahlavi" },
494 ["rejang"] = { first = 0x0A930, last = 0x0A95F, description = "Rejang" },
495 ["ruminumeralsymbols"] = { first = 0x10E60, last = 0x10E7F, description = "Rumi Numeral Symbols" },
496 ["runic"] = { first = 0x016A0, last = 0x016FF, otf="runr", description = "Runic" },
497 ["samaritan"] = { first = 0x00800, last = 0x0083F, description = "Samaritan" },
498 ["saurashtra"] = { first = 0x0A880, last = 0x0A8DF, description = "Saurashtra" },
499 ["sharada"] = { first = 0x11180, last = 0x111DF, description = "Sharada" },
500 ["shavian"] = { first = 0x10450, last = 0x1047F, otf="shaw", description = "Shavian" },
501 ["shorthandformatcontrols"] = { first = 0x1BCA0, last = 0x1BCAF, description = "Shorthand Format Controls" },
502 ["siddham"] = { first = 0x11580, last = 0x115FF, description = "Siddham" },
503 ["sinhala"] = { first = 0x00D80, last = 0x00DFF, otf="sinh", description = "Sinhala" },
504 ["sinhalaarchaicnumbers"] = { first = 0x111E0, last = 0x111FF, description = "Sinhala Archaic Numbers" },
505 ["smallformvariants"] = { first = 0x0FE50, last = 0x0FE6F, description = "Small Form Variants" },
506 ["smallkanaextension"] = { first = 0x1B130, last = 0x1B16F, description = "Small Kana Extension" },
507 ["sogdian"] = { first = 0x10F30, last = 0x10F6F, description = "Sogdian" },
508 ["sorasompeng"] = { first = 0x110D0, last = 0x110FF, description = "Sora Sompeng" },
509 ["soyombo"] = { first = 0x11A50, last = 0x11AAF, description = "Soyombo" },
510 ["spacingmodifierletters"] = { first = 0x002B0, last = 0x002FF, description = "Spacing Modifier Letters" },
511 ["specials"] = { first = 0x0FFF0, last = 0x0FFFF, description = "Specials" },
512 ["sundanese"] = { first = 0x01B80, last = 0x01BBF, description = "Sundanese" },
513 ["sundanesesupplement"] = { first = 0x01CC0, last = 0x01CCF, description = "Sundanese Supplement" },
514 ["superscriptsandsubscripts"] = { first = 0x02070, last = 0x0209F, description = "Superscripts and Subscripts" },
515 ["supplementalarrowsa"] = { first = 0x027F0, last = 0x027FF, math = true, description = "Supplemental Arrows-A" },
516 ["supplementalarrowsb"] = { first = 0x02900, last = 0x0297F, math = true, description = "Supplemental Arrows-B" },
517 ["supplementalarrowsc"] = { first = 0x1F800, last = 0x1F8FF, math = true, description = "Supplemental Arrows-C" },
518 ["supplementalmathematicaloperators"] = { first = 0x02A00, last = 0x02AFF, math = true, description = "Supplemental Mathematical Operators" },
519 ["supplementalpunctuation"] = { first = 0x02E00, last = 0x02E7F, description = "Supplemental Punctuation" },
520 ["supplementalsymbolsandpictographs"] = { first = 0x1F900, last = 0x1F9FF, description = "Supplemental Symbols and Pictographs" },
521 ["supplementaryprivateuseareaa"] = { first = 0xF0000, last = 0xFFFFF, description = "Supplementary Private Use Area-A" },
522 ["supplementaryprivateuseareab"] = { first = 0x100000,last = 0x10FFFF, description = "Supplementary Private Use Area-B" },
523 ["suttonsignwriting"] = { first = 0x1D800, last = 0x1DAAF, description = "Sutton SignWriting" },
524 ["sylotinagri"] = { first = 0x0A800, last = 0x0A82F, otf="sylo", description = "Syloti Nagri" },
525 ["symbolsandpictographsextendeda"] = { first = 0x1FA70, last = 0x1FAFF, description = "Symbols and Pictographs Extended-A" },
526 ["symbolsforlegacycomputing"] = { first = 0x1FB00, last = 0x1FBFF, description = "Symbols for Legacy Computing" },
527 ["syriac"] = { first = 0x00700, last = 0x0074F, otf="syrc", description = "Syriac" },
528 ["syriacsupplement"] = { first = 0x00860, last = 0x0086F, description = "Syriac Supplement" },
529 ["tagalog"] = { first = 0x01700, last = 0x0171F, otf="tglg", description = "Tagalog" },
530 ["tagbanwa"] = { first = 0x01760, last = 0x0177F, otf="tagb", description = "Tagbanwa" },
531 ["tags"] = { first = 0xE0000, last = 0xE007F, description = "Tags" },
532 ["taile"] = { first = 0x01950, last = 0x0197F, otf="tale", description = "Tai Le" },
533 ["taitham"] = { first = 0x01A20, last = 0x01AAF, description = "Tai Tham" },
534 ["taiviet"] = { first = 0x0AA80, last = 0x0AADF, description = "Tai Viet" },
535 ["taixuanjingsymbols"] = { first = 0x1D300, last = 0x1D35F, description = "Tai Xuan Jing Symbols" },
536 ["takri"] = { first = 0x11680, last = 0x116CF, description = "Takri" },
537 ["tamil"] = { first = 0x00B80, last = 0x00BFF, otf="taml", description = "Tamil" },
538 ["tamilsupplement"] = { first = 0x11FC0, last = 0x11FFF, description = "Tamil Supplement" },
539 ["tangut"] = { first = 0x17000, last = 0x187FF, description = "Tangut" },
540 ["tangutsupplement"] = { first = 0x18D00, last = 0x18D7F, description = "Tangut Supplement" },
541 ["tangutcomponents"] = { first = 0x18800, last = 0x18AFF, description = "Tangut Components" },
542 ["tangsa"] = { first = 0x16A70, last = 0x16ACF, description = "Tangsa" },
543 ["telugu"] = { first = 0x00C00, last = 0x00C7F, otf="telu", description = "Telugu" },
544 ["thaana"] = { first = 0x00780, last = 0x007BF, otf="thaa", description = "Thaana" },
545 ["thai"] = { first = 0x00E00, last = 0x00E7F, otf="thai", description = "Thai" },
546 ["tibetan"] = { first = 0x00F00, last = 0x00FFF, otf="tibt", description = "Tibetan" },
547 ["tifinagh"] = { first = 0x02D30, last = 0x02D7F, otf="tfng", description = "Tifinagh" },
548 ["tirhuta"] = { first = 0x11480, last = 0x114DF, description = "Tirhuta" },
549 ["toto"] = { first = 0x1E290, last = 0x1E2BF, description = "Toto" },
550 ["transportandmapsymbols"] = { first = 0x1F680, last = 0x1F6FF, description = "Transport and Map Symbols" },
551 ["ugaritic"] = { first = 0x10380, last = 0x1039F, otf="ugar", description = "Ugaritic" },
552 ["unifiedcanadianaboriginalsyllabics"] = { first = 0x01400, last = 0x0167F, otf="cans", description = "Unified Canadian Aboriginal Syllabics" },
553 ["unifiedcanadianaboriginalsyllabicsextended"] = { first = 0x018B0, last = 0x018FF, description = "Unified Canadian Aboriginal Syllabics Extended" },
554 ["unifiedcanadianaboriginalsyllabicsextendeda"] = { first = 0x11AB0, last = 0x11ABF, description = "Unified Canadian Aboriginal Syllabics Extended-A" },
555 ["uppercasebold"] = { first = 0x1D400, last = 0x1D419, math = true },
556 ["uppercaseboldfraktur"] = { first = 0x1D56C, last = 0x1D585, math = true },
557 ["uppercasebolditalic"] = { first = 0x1D468, last = 0x1D481, math = true },
558 ["uppercaseboldscript"] = { first = 0x1D4D0, last = 0x1D4E9, math = true },
559 ["uppercasedoublestruck"] = { first = 0x1D538, last = 0x1D551, math = true },
560 ["uppercasefraktur"] = { first = 0x1D504, last = 0x1D51D, math = true },
561 ["uppercasegreekbold"] = { first = 0x1D6A8, last = 0x1D6C1, math = true },
562 ["uppercasegreekbolditalic"] = { first = 0x1D71C, last = 0x1D735, math = true },
563 ["uppercasegreekitalic"] = { first = 0x1D6E2, last = 0x1D6FB, math = true },
564 ["uppercasegreeknormal"] = { first = 0x00391, last = 0x003AA, math = true },
565 ["uppercasegreeksansserifbold"] = { first = 0x1D756, last = 0x1D76F, math = true },
566 ["uppercasegreeksansserifbolditalic"] = { first = 0x1D790, last = 0x1D7A9, math = true },
567 ["uppercaseitalic"] = { first = 0x1D434, last = 0x1D44D, math = true },
568 ["uppercasemonospace"] = { first = 0x1D670, last = 0x1D689, math = true },
569 ["uppercasenormal"] = { first = 0x00041, last = 0x0005A, math = true },
570 ["uppercasesansserifbold"] = { first = 0x1D5D4, last = 0x1D5ED, math = true },
571 ["uppercasesansserifbolditalic"] = { first = 0x1D63C, last = 0x1D655, math = true },
572 ["uppercasesansserifitalic"] = { first = 0x1D608, last = 0x1D621, math = true },
573 ["uppercasesansserifnormal"] = { first = 0x1D5A0, last = 0x1D5B9, math = true },
574 ["uppercasescript"] = { first = 0x1D49C, last = 0x1D4B5, math = true },
575 ["vai"] = { first = 0x0A500, last = 0x0A63F, description = "Vai" },
576 ["variationselectors"] = { first = 0x0FE00, last = 0x0FE0F, description = "Variation Selectors" },
577 ["variationselectorssupplement"] = { first = 0xE0100, last = 0xE01EF, description = "Variation Selectors Supplement" },
578 ["vedicextensions"] = { first = 0x01CD0, last = 0x01CFF, description = "Vedic Extensions" },
579 ["verticalforms"] = { first = 0x0FE10, last = 0x0FE1F, description = "Vertical Forms" },
580 ["vithkuqi"] = { first = 0x10570, last = 0x105BF, description = "Vithkuqi" },
581 ["wancho"] = { first = 0x1E2C0, last = 0x1E2FF, description = "Wancho" },
582 ["warangciti"] = { first = 0x118A0, last = 0x118FF, description = "Warang Citi" },
583 ["yezidi"] = { first = 0x10E80, last = 0x10EBF, description = "Yezidi" },
584 ["yijinghexagramsymbols"] = { first = 0x04DC0, last = 0x04DFF, otf="yi", description = "Yijing Hexagram Symbols" },
585 ["yiradicals"] = { first = 0x0A490, last = 0x0A4CF, otf="yi", description = "Yi Radicals" },
586 ["yisyllables"] = { first = 0x0A000, last = 0x0A48F, otf="yi", description = "Yi Syllables" },
587 ["zanabazarsquare"] = { first = 0x11A00, last = 0x11A4F, description = "Zanabazar Square" },
588 ["znamennymusicalnotation"] = { first = 0x1CF00, last = 0x1CFCF, description = "Znamenny Musical Notation" }
589}
590
591
592
593
594
595
596
597
598blocks.lowercaseitalic.gaps = {
599 [0x1D455] = 0x0210E,
600}
601
602blocks.uppercasescript.gaps = {
603 [0x1D49D] = 0x0212C,
604 [0x1D4A0] = 0x02130,
605 [0x1D4A1] = 0x02131,
606 [0x1D4A3] = 0x0210B,
607 [0x1D4A4] = 0x02110,
608 [0x1D4A7] = 0x02112,
609 [0x1D4A8] = 0x02133,
610 [0x1D4AD] = 0x0211B,
611}
612
613blocks.lowercasescript.gaps = {
614 [0x1D4BA] = 0x0212F,
615 [0x1D4BC] = 0x0210A,
616 [0x1D4C4] = 0x02134,
617}
618
619blocks.uppercasefraktur.gaps = {
620 [0x1D506] = 0x0212D,
621 [0x1D50B] = 0x0210C,
622 [0x1D50C] = 0x02111,
623 [0x1D515] = 0x0211C,
624 [0x1D51D] = 0x02128,
625}
626
627blocks.uppercasedoublestruck.gaps = {
628 [0x1D53A] = 0x02102,
629 [0x1D53F] = 0x0210D,
630 [0x1D545] = 0x02115,
631 [0x1D547] = 0x02119,
632 [0x1D548] = 0x0211A,
633 [0x1D549] = 0x0211D,
634 [0x1D551] = 0x02124,
635}
636
637characters.blocks = blocks
638
639function characters.blockrange(name)
640 local b = blocks[name]
641 if b then
642 return b.first, b.last
643 else
644 return 0, 0
645 end
646end
647
648setmetatableindex(blocks, function(t,k)
649 return k and rawget(t,lower(gsub(k,"[^a-zA-Z]","")))
650end)
651
652local otfscripts = utilities.storage.allocate()
653characters.otfscripts = otfscripts
654
655setmetatableindex(otfscripts,function(t,unicode)
656 for k, v in next, blocks do
657 local first = v.first
658 local last = v.last
659 if unicode >= first and unicode <= last then
660 local script = v.otf or "dflt"
661 for u=first,last do
662 t[u] = script
663 end
664 return script
665 end
666 end
667
668 t[unicode] = "dflt"
669 return "dflt"
670end)
671
672local splitter1 = lpeg.splitat(S(":-"))
673local splitter2 = lpeg.splitat(S(" +-"),true)
674
675function characters.getrange(name,expression)
676 local range = rawget(blocks,lower(gsub(name,"[^a-zA-Z0-9]","")))
677 if range then
678 return range.first, range.last, range.description, range.gaps
679 end
680 name = gsub(name,'"',"0x")
681 local start, stop
682 if expression then
683 local n = tonumber(name)
684 if n then
685 return n, n, nil
686 else
687 local first, rest = lpegmatch(splitter2,name)
688 local range = rawget(blocks,lower(gsub(first,"[^a-zA-Z0-9]","")))
689 if range then
690 local s = loadstring("return 0 " .. rest)
691 if type(s) == "function" then
692 local d = s()
693 if type(d) == "number" then
694 return range.first + d, range.last + d, nil
695 end
696 end
697 end
698 end
699 end
700 local start, stop = lpegmatch(splitter1,name)
701 if start and stop then
702 start = tonumber(start,16) or tonumber(start)
703 stop = tonumber(stop, 16) or tonumber(stop)
704 if start and stop then
705 return start, stop, nil
706 end
707 end
708 local slot = tonumber(name,16) or tonumber(name)
709 return slot, slot, nil
710end
711
712
713
714
715local categorytags = allocate {
716 lu = "Letter Uppercase",
717 ll = "Letter Lowercase",
718 lt = "Letter Titlecase",
719 lm = "Letter Modifier",
720 lo = "Letter Other",
721 mn = "Mark Nonspacing",
722 mc = "Mark Spacing Combining",
723 me = "Mark Enclosing",
724 nd = "Number Decimal Digit",
725 nl = "Number Letter",
726 no = "Number Other",
727 pc = "Punctuation Connector",
728 pd = "Punctuation Dash",
729 ps = "Punctuation Open",
730 pe = "Punctuation Close",
731 pi = "Punctuation Initial Quote",
732 pf = "Punctuation Final Quote",
733 po = "Punctuation Other",
734 sm = "Symbol Math",
735 sc = "Symbol Currency",
736 sk = "Symbol Modifier",
737 so = "Symbol Other",
738 zs = "Separator Space",
739 zl = "Separator Line",
740 zp = "Separator Paragraph",
741 cc = "Other Control",
742 cf = "Other Format",
743 cs = "Other Surrogate",
744 co = "Other Private Use",
745 cn = "Other Not Assigned",
746}
747
748local detailtags = allocate {
749 sl = "small letter",
750 bl = "big letter",
751 im = "iteration mark",
752 pm = "prolonged sound mark"
753}
754
755characters.categorytags = categorytags
756characters.detailtags = detailtags
757
758
759
760
761
762
763local is_character = allocate ( tohash {
764 "lu","ll","lt","lm","lo",
765 "nd","nl","no",
766 "mn",
767 "nl","no",
768 "pc","pd","ps","pe","pi","pf","po",
769 "sm","sc","sk","so"
770} )
771
772local is_letter = allocate ( tohash {
773 "ll","lm","lo","lt","lu"
774} )
775
776local is_command = allocate ( tohash {
777 "cf","zs"
778} )
779
780local is_spacing = allocate ( tohash {
781 "zs", "zl","zp",
782} )
783
784local is_mark = allocate ( tohash {
785 "mn", "ms",
786} )
787
788local is_punctuation = allocate ( tohash {
789 "pc", "pd", "ps", "pe", "pi", "pf", "po",
790} )
791
792local is_hyphenator = allocate ( tohash {
793 "pd",
794} )
795
796local is_symbol = allocate ( tohash {
797 "sm", "sc", "sk", "so",
798} )
799
800
801
802characters.is_character = is_character
803characters.is_letter = is_letter
804characters.is_command = is_command
805characters.is_spacing = is_spacing
806characters.is_mark = is_mark
807characters.is_punctuation = is_punctuation
808characters.is_hyphenator = is_hyphenator
809characters.is_symbol = is_symbol
810
811local mti = function(t,k)
812 if type(k) == "number" then
813 local c = data[k].category
814 return c and rawget(t,c)
815 else
816
817 end
818end
819
820setmetatableindex(characters.is_character, mti)
821setmetatableindex(characters.is_letter, mti)
822setmetatableindex(characters.is_command, mti)
823setmetatableindex(characters.is_spacing, mti)
824setmetatableindex(characters.is_punctuation,mti)
825setmetatableindex(characters.is_hyphenator, mti)
826
827
828
829
830
831
832
833
834
835
836characters.linebreaks = allocate {
837
838
839
840 ["bk"] = "mandatory break",
841 ["cr"] = "carriage return",
842 ["lf"] = "line feed",
843 ["cm"] = "combining mark",
844 ["nl"] = "next line",
845 ["sg"] = "surrogate",
846 ["wj"] = "word joiner",
847 ["zw"] = "zero width space",
848 ["gl"] = "non-breaking (glue)",
849 ["sp"] = "space",
850 ["zwj"] = "zero width joiner",
851
852
853
854 ["b2"] = "break opportunity before and after",
855 ["ba"] = "break after",
856 ["bb"] = "break before",
857 ["hy"] = "hyphen",
858 ["cb"] = "contingent break opportunity",
859
860
861
862 ["cl"] = "close punctuation",
863 ["cp"] = "close parenthesis",
864 ["ex"] = "exclamation/interrogation",
865 ["in"] = "inseparable",
866 ["ns"] = "nonstarter",
867 ["op"] = "open punctuation",
868 ["qu"] = "quotation",
869
870
871
872 ["is"] = "infix numeric separator",
873 ["nu"] = "numeric",
874 ["po"] = "postfix numeric",
875 ["pr"] = "prefix numeric",
876 ["sy"] = "symbols allowing break after",
877
878
879
880 ["ai"] = "ambiguous (alphabetic or ideographic)",
881 ["al"] = "alphabetic",
882 ["cj"] = "conditional japanese starter",
883 ["eb"] = "emoji base",
884 ["em"] = "emoji modifier",
885 ["h2"] = "hangul lv syllable",
886 ["h3"] = "hangul lvt syllable",
887 ["hl"] = "hebrew letter",
888 ["id"] = "ideographic",
889 ["jl"] = "hangul l jamo",
890 ["jv"] = "hangul v jamo",
891 ["jt"] = "hangul t jamo",
892 ["ri"] = "regional indicator",
893 ["sa"] = "complex context dependent (south east asian)",
894 ["xx"] = "unknown",
895
896}
897
898
899
900
901
902characters.bidi = allocate {
903 l = "Left-to-Right",
904 lre = "Left-to-Right Embedding",
905 lro = "Left-to-Right Override",
906 r = "Right-to-Left",
907 al = "Right-to-Left Arabic",
908 rle = "Right-to-Left Embedding",
909 rlo = "Right-to-Left Override",
910 pdf = "Pop Directional Format",
911 en = "European Number",
912 es = "European Number Separator",
913 et = "European Number Terminator",
914 an = "Arabic Number",
915 cs = "Common Number Separator",
916 nsm = "Non-Spacing Mark",
917 bn = "Boundary Neutral",
918 b = "Paragraph Separator",
919 s = "Segment Separator",
920 ws = "Whitespace",
921 on = "Other Neutrals",
922}
923
924
928
929if not characters.fallbacks then
930
931 characters.fallbacks = allocate {
932 [0x0308] = 0x00A8, [0x00A8] = 0x0308,
933 [0x0304] = 0x00AF, [0x00AF] = 0x0304,
934 [0x0301] = 0x00B4, [0x00B4] = 0x0301,
935 [0x0327] = 0x00B8, [0x00B8] = 0x0327,
936 [0x0302] = 0x02C6, [0x02C6] = 0x0302,
937 [0x030C] = 0x02C7, [0x02C7] = 0x030C,
938 [0x0306] = 0x02D8, [0x02D8] = 0x0306,
939 [0x0307] = 0x02D9, [0x02D9] = 0x0307,
940 [0x030A] = 0x02DA, [0x02DA] = 0x030A,
941 [0x0328] = 0x02DB, [0x02DB] = 0x0328,
942 [0x0303] = 0x02DC, [0x02DC] = 0x0303,
943 [0x030B] = 0x02DD, [0x02DD] = 0x030B,
944 [0x0305] = 0x203E, [0x203E] = 0x0305,
945 [0x0300] = 0x0060, [0x0060] = 0x0333,
946 }
947
948
949
950
951
952
953end
954
955if storage then
956 storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks")
957end
958
959characters.directions = { }
960
961setmetatableindex(characters.directions,function(t,k)
962 local d = data[k]
963 if d then
964 local v = d.direction
965 if v then
966 t[k] = v
967 return v
968 end
969 end
970 t[k] = false
971 return false
972end)
973
974characters.mirrors = { }
975
976setmetatableindex(characters.mirrors,function(t,k)
977 local d = data[k]
978 if d then
979 local v = d.mirror
980 if v then
981 t[k] = v
982 return v
983 end
984 end
985 t[k] = false
986 return false
987end)
988
989characters.textclasses = { }
990
991setmetatableindex(characters.textclasses,function(t,k)
992 local d = data[k]
993 if d then
994 local v = d.textclass
995 if v then
996 t[k] = v
997 return v
998 end
999 end
1000 t[k] = false
1001 return false
1002end)
1003
1004
1008
1009
1010
1011function characters.contextname(n) return data[n] and data[n].contextname or "" end
1012function characters.adobename (n) return data[n] and data[n].adobename or "" end
1013function characters.description(n) return data[n] and data[n].description or "" end
1014
1015
1016function characters.category(n,verbose)
1017 local c = data[n].category
1018 if not c then
1019 return ""
1020 elseif verbose then
1021 return categorytags[c]
1022 else
1023 return c
1024 end
1025end
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037local function toutfstring(s)
1038 if type(s) == "table" then
1039 return utfchar(unpack(s))
1040 else
1041 return utfchar(s)
1042 end
1043end
1044
1045utf.tostring = toutfstring
1046
1047local categories = allocate() characters.categories = categories
1048
1049setmetatableindex(categories, function(t,u) if u then local c = data[u] c = c and c.category or u t[u] = c return c end end)
1050
1051
1052
1053
1054local lccodes = allocate() characters.lccodes = lccodes
1055local uccodes = allocate() characters.uccodes = uccodes
1056local shcodes = allocate() characters.shcodes = shcodes
1057local fscodes = allocate() characters.fscodes = fscodes
1058
1059setmetatableindex(lccodes, function(t,u) if u then local c = data[u] c = c and c.lccode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1060setmetatableindex(uccodes, function(t,u) if u then local c = data[u] c = c and c.uccode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1061setmetatableindex(shcodes, function(t,u) if u then local c = data[u] c = c and c.shcode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1062setmetatableindex(fscodes, function(t,u) if u then local c = data[u] c = c and c.fscode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1063
1064local lcchars = allocate() characters.lcchars = lcchars
1065local ucchars = allocate() characters.ucchars = ucchars
1066local shchars = allocate() characters.shchars = shchars
1067local fschars = allocate() characters.fschars = fschars
1068
1069setmetatableindex(lcchars, function(t,u) if u then local c = data[u] c = c and c.lccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1070setmetatableindex(ucchars, function(t,u) if u then local c = data[u] c = c and c.uccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1071setmetatableindex(shchars, function(t,u) if u then local c = data[u] c = c and c.shcode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1072setmetatableindex(fschars, function(t,u) if u then local c = data[u] c = c and c.fscode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1073
1074local decomposed = allocate() characters.decomposed = decomposed
1075local specials = allocate() characters.specials = specials
1076
1077setmetatableindex(decomposed, function(t,u)
1078 if u then
1079 local c = data[u]
1080 local s = c and c.decomposed or false
1081 t[u] = s
1082 return s
1083 end
1084end)
1085
1086setmetatableindex(specials, function(t,u)
1087 if u then
1088 local c = data[u]
1089 local s = c and c.specials or false
1090 t[u] = s
1091 return s
1092 end
1093end)
1094
1095local specialchars = allocate() characters.specialchars = specialchars
1096local descriptions = allocate() characters.descriptions = descriptions
1097local synonyms = allocate() characters.synonyms = synonyms
1098
1099setmetatableindex(specialchars, function(t,u)
1100 if u then
1101 local c = data[u]
1102 local s = c and c.specials
1103 if s then
1104 local tt = { }
1105 local ttn = 0
1106 for i=2,#s do
1107 local si = s[i]
1108 local c = data[si]
1109 if is_letter[c.category] then
1110 ttn = ttn + 1
1111 tt[ttn] = utfchar(si)
1112 end
1113 end
1114 c = concat(tt)
1115 t[u] = c
1116 return c
1117 else
1118 if type(u) == "number" then
1119 u = utfchar(u)
1120 end
1121 t[u] = u
1122 return u
1123 end
1124 end
1125end)
1126
1127setmetatableindex(descriptions, function(t,k)
1128
1129 for u, c in next, data do
1130 local d = c.description
1131 if d then
1132 if find(d," ",1,true) then
1133 d = gsub(d," ","")
1134 end
1135 d = lower(d)
1136 t[d] = u
1137 end
1138 end
1139 local d = rawget(t,k)
1140 if not d then
1141 t[k] = k
1142 end
1143 return d
1144end)
1145
1146setmetatableindex(synonyms, function(t,k)
1147 for u, c in next, data do
1148 local s = c.synonyms
1149 if s then
1150 if find(s," ",1,true) then
1151 s = gsub(s," ","")
1152 end
1153
1154 t[s] = u
1155 end
1156 end
1157 local s = rawget(t,k)
1158 if not s then
1159 t[s] = s
1160 end
1161 return s
1162end)
1163
1164function characters.unicodechar(asked)
1165 local n = tonumber(asked)
1166 if n then
1167 return n
1168 elseif type(asked) == "string" then
1169 return descriptions[asked] or descriptions[gsub(asked," ","")]
1170 end
1171end
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204local tolower = Cs((utf8character/lcchars)^0)
1205local toupper = Cs((utf8character/ucchars)^0)
1206local toshape = Cs((utf8character/shchars)^0)
1207
1208lpegpatterns.tolower = tolower
1209lpegpatterns.toupper = toupper
1210lpegpatterns.toshape = toshape
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226if not characters.splits then
1227
1228 local char = allocate()
1229 local compat = allocate()
1230
1231 local splits = {
1232 char = char,
1233 compat = compat,
1234 }
1235
1236 characters.splits = splits
1237
1238
1239
1240
1241 for unicode, data in next, characters.data do
1242 local specials = data.specials
1243 if specials and #specials > 2 then
1244 local kind = specials[1]
1245 if kind == "compat" then
1246 compat[unicode] = { unpack(specials,2) }
1247 elseif kind == "char" or kind == "with" then
1248 char [unicode] = { unpack(specials,2) }
1249 end
1250 end
1251 end
1252
1253 if storage then
1254 storage.register("characters/splits", splits, "characters.splits")
1255 end
1256
1257end
1258
1259if not characters.lhash then
1260
1261 local lhash = allocate() characters.lhash = lhash
1262 local uhash = allocate() characters.uhash = uhash
1263 local shash = allocate() characters.shash = shash
1264
1265 for k, v in next, characters.data do
1266
1267 local l = v.lccode
1268 if l then
1269
1270 if type(l) == "number" then
1271 lhash[utfchar(k)] = utfchar(l)
1272 elseif #l == 2 then
1273 lhash[utfchar(k)] = utfchar(l[1]) .. utfchar(l[2])
1274
1275
1276 end
1277 else
1278 local u = v.uccode
1279 if u then
1280
1281 if type(u) == "number" then
1282 uhash[utfchar(k)] = utfchar(u)
1283 elseif #u == 2 then
1284 uhash[utfchar(k)] = utfchar(u[1]) .. utfchar(u[2])
1285
1286
1287 end
1288 end
1289 end
1290 local s = v.shcode
1291 if s then
1292 if type(s) == "number" then
1293 shash[utfchar(k)] = utfchar(s)
1294 elseif #s == 2 then
1295 shash[utfchar(k)] = utfchar(s[1]) .. utfchar(s[2])
1296
1297
1298 end
1299 end
1300
1301 end
1302
1303 if storage then
1304 storage.register("characters/lhash", lhash, "characters.lhash")
1305 storage.register("characters/uhash", uhash, "characters.uhash")
1306 storage.register("characters/shash", shash, "characters.shash")
1307 end
1308
1309end
1310
1311local lhash = characters.lhash mark(lhash)
1312local uhash = characters.uhash mark(uhash)
1313local shash = characters.shash mark(shash)
1314
1315local utf8lowercharacter = utfchartabletopattern(lhash) / lhash
1316local utf8uppercharacter = utfchartabletopattern(uhash) / uhash
1317local utf8shapecharacter = utfchartabletopattern(shash) / shash
1318
1319local utf8lower = Cs((utf8lowercharacter + utf8character)^0)
1320local utf8upper = Cs((utf8uppercharacter + utf8character)^0)
1321local utf8shape = Cs((utf8shapecharacter + utf8character)^0)
1322
1323lpegpatterns.utf8lowercharacter = utf8lowercharacter
1324lpegpatterns.utf8uppercharacter = utf8uppercharacter
1325lpegpatterns.utf8shapecharacter = utf8shapecharacter
1326
1327lpegpatterns.utf8lower = utf8lower
1328lpegpatterns.utf8upper = utf8upper
1329lpegpatterns.utf8shape = utf8shape
1330
1331function characters.lower (str) return str and lpegmatch(utf8lower,str) or "" end
1332function characters.upper (str) return str and lpegmatch(utf8upper,str) or "" end
1333function characters.shaped(str) return str and lpegmatch(utf8shape,str) or "" end
1334
1335lpeg.setutfcasers(characters.lower,characters.upper)
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371function characters.lettered(str,spacing)
1372 local new, n = { }, 0
1373 if spacing then
1374 local done = false
1375 for u in utfvalues(str) do
1376 local c = data[u].category
1377 if is_letter[c] then
1378 if done and n > 1 then
1379 n = n + 1
1380 new[n] = " "
1381 done = false
1382 end
1383 n = n + 1
1384 new[n] = utfchar(u)
1385 elseif spacing and is_spacing[c] then
1386 done = true
1387 end
1388 end
1389 else
1390 for u in utfvalues(str) do
1391 if is_letter[data[u].category] then
1392 n = n + 1
1393 new[n] = utfchar(u)
1394 end
1395 end
1396 end
1397 return concat(new)
1398end
1399
1400
1403
1404function characters.uccode(n) return uccodes[n] end
1405function characters.lccode(n) return lccodes[n] end
1406
1407function characters.shape(n)
1408 local shcode = shcodes[n]
1409 if not shcode then
1410 return n, nil
1411 elseif type(shcode) == "table" then
1412 return shcode[1], shcode[#shcode]
1413 else
1414 return shcode, nil
1415 end
1416end
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492if not characters.superscripts then
1493
1494 local superscripts = allocate() characters.superscripts = superscripts
1495 local subscripts = allocate() characters.subscripts = subscripts
1496 local fractions = allocate() characters.fractions = fractions
1497
1498
1499
1500 for k, v in next, data do
1501 local specials = v.specials
1502 if specials then
1503 local what = specials[1]
1504 if what == "super" then
1505 if #specials == 2 then
1506 superscripts[k] = specials[2]
1507 elseif trace_defining then
1508 report_defining("ignoring %s %a, char %c, description %a","superscript",ustring(k),k,v.description)
1509 end
1510 elseif what == "sub" then
1511 if #specials == 2 then
1512 subscripts[k] = specials[2]
1513 elseif trace_defining then
1514 report_defining("ignoring %s %a, char %c, description %a","subscript",ustring(k),k,v.description)
1515 end
1516 elseif what == "fraction" then
1517 if #specials > 1 then
1518 fractions[k] = { unpack(specials,2) }
1519 elseif trace_defining then
1520 report_defining("ignoring %s %a, char %c, description %a","fraction",ustring(k),k,v.description)
1521 end
1522 end
1523 end
1524 end
1525
1526
1527
1528
1529
1530 if storage then
1531 storage.register("characters/superscripts", superscripts, "characters.superscripts")
1532 storage.register("characters/subscripts", subscripts, "characters.subscripts")
1533 storage.register("characters/fractions", fractions, "characters.fractions")
1534 end
1535
1536end
1537
1538function characters.showstring(str)
1539 local list = utotable(str)
1540 for i=1,#list do
1541 report_defining("split % 3i : %C",i,list[i])
1542 end
1543end
1544
1545do
1546
1547
1548
1549 local any = P(1)
1550 local special = S([['".,:;-+()]])
1551 + P('“') + P('”')
1552 local apostrofe = P("’") + P("'")
1553
1554 local pattern = Cs ( (
1555 (P("medium light") / "medium-light" + P("medium dark") / "medium-dark") * P(" skin tone")
1556 + (apostrofe * P("s"))/""
1557 + special/""
1558 + any
1559 )^1)
1560
1561 local function load()
1562 local name = resolvers.findfile("char-emj.lua")
1563 local data = name and name ~= "" and dofile(name) or { }
1564 local hash = { }
1565 for d, c in next, data do
1566 local k = lpegmatch(pattern,d) or d
1567 local u = { }
1568 for i=1,#c do
1569 u[i] = utfchar(c[i])
1570 end
1571 u = concat(u)
1572 hash[k] = u
1573 end
1574 return data, hash
1575 end
1576
1577 local data, hash = nil, nil
1578
1579 function characters.emojized(name)
1580 local t = lpegmatch(pattern,name)
1581 if t then
1582 return t
1583 else
1584 return { name }
1585 end
1586 end
1587
1588 local start = P(" ")
1589 local finish = P(-1) + P(" ")
1590 local skintone = P("medium ")^0 * (P("light ") + P("dark "))^0 * P("skin tone")
1591 local gender = P("woman") + P("man")
1592 local expanded = (
1593 P("m-l-")/"medium-light"
1594 + P("m-d-")/"medium-dark"
1595 + P("l-") /"light"
1596 + P("m-") /"medium"
1597 + P("d-") /"dark"
1598 )
1599 * (P("s-t")/" skin tone")
1600 local compacted = (
1601 (P("medium-")/"m-" * (P("light")/"l" + P("dark")/"d"))
1602 + (P("medium")/"m" + P("light")/"l" + P("dark")/"d")
1603 )
1604 * (P(" skin tone")/"-s-t")
1605
1606 local pattern_0 = Cs((expanded + any)^1)
1607 local pattern_1 = Cs(((start * skintone + skintone * finish)/"" + any)^1)
1608 local pattern_2 = Cs(((start * gender + gender * finish)/"" + any)^1)
1609 local pattern_4 = Cs((compacted + any)^1)
1610
1611
1612
1613
1614
1615
1616
1617 local skin =
1618 P("light skin tone") / utfchar(0x1F3FB)
1619 + P("medium-light skin tone") / utfchar(0x1F3FC)
1620 + P("medium skin tone") / utfchar(0x1F3FD)
1621 + P("medium-dark skin tone") / utfchar(0x1F3FE)
1622 + P("dark skin tone") / utfchar(0x1F3FF)
1623
1624 local parent =
1625 P("man") / utfchar(0x1F468)
1626 + P("woman") / utfchar(0x1F469)
1627
1628 local child =
1629 P("baby") / utfchar(0x1F476)
1630 + P("boy") / utfchar(0x1F466)
1631 + P("girl") / utfchar(0x1F467)
1632
1633 local zwj = utfchar(0x200D)
1634 local heart = utfchar(0x2764) .. utfchar(0xFE0F) .. zwj
1635 local kiss = utfchar(0x2764) .. utfchar(0xFE0F) .. utfchar(0x200D) .. utfchar(0x1F48B) .. zwj
1636
1637
1638
1639 local space = P(" ")
1640 local final = P(-1)
1641
1642 local p_done = (space^1/zwj) + P(-1)
1643 local p_rest = space/"" * (skin * p_done) + p_done
1644 local p_parent = parent * p_rest
1645 local p_child = child * p_rest
1646
1647 local p_family = Cs ( (P("family") * space^1)/"" * p_parent^-2 * p_child^-2 )
1648 local p_couple = Cs ( (P("couple with heart") * space^1)/"" * p_parent * Cc(heart) * p_parent )
1649 local p_kiss = Cs ( (P("kiss") * space^1)/"" * p_parent * Cc(kiss) * p_parent )
1650
1651 local p_special = p_family + p_couple + p_kiss
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666 local emoji = { }
1667 characters.emoji = emoji
1668
1669local cache = setmetatable({ }, { __mode = "k" } )
1670
1671 function emoji.resolve(name)
1672 if not hash then
1673 data, hash = load()
1674 end
1675 local h = hash[name]
1676 if h then
1677 return h
1678 end
1679 local h = cache[name]
1680 if h then
1681 return h
1682 elseif h == false then
1683 return
1684 end
1685
1686 local name = lpegmatch(pattern_0,name) or name
1687
1688 local h = lpegmatch(p_special,name)
1689 if h then
1690 cache[name] = h
1691 return h
1692 end
1693
1694 local s = lpegmatch(pattern_1,name)
1695 local h = hash[s]
1696 if h then
1697 cache[name] = h
1698 return h
1699 end
1700
1701 local s = lpegmatch(pattern_2,name)
1702 local h = hash[s]
1703 if h then
1704 cache[name] = h
1705 return h
1706 end
1707 cache[name] = false
1708 end
1709
1710 function emoji.known()
1711 if not hash then
1712 data, hash = load()
1713 end
1714 return hash, data
1715 end
1716
1717 function emoji.compact(name)
1718 return lpegmatch(pattern_4,name) or name
1719 end
1720
1721end
1722
1723
1724
1725return characters
1726 |