1if not modules then modules = { } end modules ['char-ini'] = {
2 version = 1.001,
3 comment = "companion to char-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14local utfchar, utfbyte, utfvalues, ustring, utotable = utf.char, utf.byte, utf.values, utf.ustring, utf.totable
15local concat, unpack, tohash, insert = table.concat, table.unpack, table.tohash, table.insert
16local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
17local format, lower, gsub, find = string.format, string.lower, string.gsub, string.find
18local P, R, S, C, Cs, Ct, Cc, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc, lpeg.V
19local formatters = string.formatters
20
21if not characters then require("char-def") end
22
23local lpegpatterns = lpeg.patterns
24local lpegmatch = lpeg.match
25local utf8byte = lpegpatterns.utf8byte
26local utf8character = lpegpatterns.utf8character
27
28local utfchartabletopattern = lpeg.utfchartabletopattern
29
30local allocate = utilities.storage.allocate
31local mark = utilities.storage.mark
32
33local setmetatableindex = table.setmetatableindex
34
35local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
36
37local report_defining = logs.reporter("characters")
38
39
40
41
42
43
44
45
46
47
48
49
50characters = characters or { }
51local characters = characters
52local data = characters.data
53
54if data then
55 mark(data)
56else
57 report_defining("fatal error: 'char-def.lua' is not loaded")
58 os.exit()
59end
60
61
62
63if context and CONTEXTLMTXMODE == 0 then
64
65 if not characters.private then
66
67 require("char-prv")
68
69 if storage then
70 storage.register("characters/private", characters.private, "characters.private")
71 end
72
73 end
74
75 for unicode, d in next, characters.private do
76 data[unicode] = d
77 end
78
79end
80
81
82
83local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end)
84
85lpegpatterns.chartonumber = pattern
86
87local function chartonumber(k)
88 if type(k) == "string" then
89 local u = lpegmatch(pattern,k)
90 if u then
91 return utfbyte(u)
92 else
93 return utfbyte(k) or 0
94 end
95 else
96 return k or 0
97 end
98end
99
100local function charfromnumber(k)
101 if type(k) == "number" then
102 return utfchar(k) or ""
103 else
104 local u = lpegmatch(pattern,k)
105 if u then
106 return utfchar(u)
107 else
108 return k
109 end
110 end
111end
112
113
114
115characters.tonumber = chartonumber
116characters.fromnumber = charfromnumber
117
118local private = {
119 description = "PRIVATE SLOT",
120}
121
122local ranges = allocate()
123characters.ranges = ranges
124
125setmetatableindex(data, function(t,k)
126 local tk = type(k)
127 if tk == "string" then
128 k = lpegmatch(pattern,k) or utfbyte(k)
129 if k then
130 local v = rawget(t,k)
131 if v then
132 return v
133 else
134 tk = "number"
135 end
136 else
137 return private
138 end
139 end
140 if tk == "number" and k < 0xF0000 then
141 for r=1,#ranges do
142 local rr = ranges[r]
143 if k >= rr.first and k <= rr.last then
144 local extender = rr.extender
145 if extender then
146 local v = extender(k)
147 t[k] = v
148 return v
149 end
150 end
151 end
152 end
153 return private
154end)
155
156local variant_selector_metatable = {
157 category = "mn",
158 cjkwd = "a",
159 direction = "nsm",
160 linebreak = "cm",
161}
162
163
164
165local f_variant = string.formatters["VARIATION SELECTOR-0x%04X"]
166
167insert(characters.ranges,{
168 first = 0xFE00,
169 last = 0xFE0F,
170 name = "variant selector",
171 extender = function(k)
172 local t = {
173 description = f_variant(k - 0xFE00 + 0x0001),
174 unicodeslot = k,
175 }
176 setmetatable(t,variant_selector_metatable)
177 return t
178 end,
179})
180
181insert(characters.ranges,{
182 first = 0xE0100,
183 last = 0xE01EF,
184 name = "variant selector extension",
185 extender = function(k)
186 local t = {
187 description = f_variant(k - 0xE0100 + 0x0011),
188 unicodeslot = k,
189 }
190 setmetatable(t,variant_selector_metatable)
191 return t
192 end,
193})
194
195local blocks = allocate {
196 ["adlam"] = { first = 0x1E900, last = 0x1E95F, description = "Adlam" },
197 ["aegeannumbers"] = { first = 0x10100, last = 0x1013F, description = "Aegean Numbers" },
198 ["ahom"] = { first = 0x11700, last = 0x1174F, description = "Ahom" },
199 ["alchemicalsymbols"] = { first = 0x1F700, last = 0x1F77F, description = "Alchemical Symbols" },
200 ["alphabeticpresentationforms"] = { first = 0x0FB00, last = 0x0FB4F, otf="latn", description = "Alphabetic Presentation Forms" },
201 ["anatolianhieroglyphs"] = { first = 0x14400, last = 0x1467F, description = "Anatolian Hieroglyphs" },
202 ["ancientgreekmusicalnotation"] = { first = 0x1D200, last = 0x1D24F, otf="grek", description = "Ancient Greek Musical Notation" },
203 ["ancientgreeknumbers"] = { first = 0x10140, last = 0x1018F, otf="grek", description = "Ancient Greek Numbers" },
204 ["ancientsymbols"] = { first = 0x10190, last = 0x101CF, otf="grek", description = "Ancient Symbols" },
205 ["arabic"] = { first = 0x00600, last = 0x006FF, otf="arab", description = "Arabic" },
206 ["arabicextendeda"] = { first = 0x008A0, last = 0x008FF, description = "Arabic Extended-A" },
207 ["arabicextendedb"] = { first = 0x00870, last = 0x0089F, description = "Arabic Extended-B" },
208 ["arabicextendedc"] = { first = 0x10EC0, last = 0x10EFF, description = "Arabic Extended-C" },
209 ["arabicmathematicalalphabeticsymbols"] = { first = 0x1EE00, last = 0x1EEFF, description = "Arabic Mathematical Alphabetic Symbols" },
210 ["arabicpresentationformsa"] = { first = 0x0FB50, last = 0x0FDFF, otf="arab", description = "Arabic Presentation Forms-A" },
211 ["arabicpresentationformsb"] = { first = 0x0FE70, last = 0x0FEFF, otf="arab", description = "Arabic Presentation Forms-B" },
212 ["arabicsupplement"] = { first = 0x00750, last = 0x0077F, otf="arab", description = "Arabic Supplement" },
213 ["armenian"] = { first = 0x00530, last = 0x0058F, otf="armn", description = "Armenian" },
214 ["arrows"] = { first = 0x02190, last = 0x021FF, description = "Arrows" },
215 ["avestan"] = { first = 0x10B00, last = 0x10B3F, description = "Avestan" },
216 ["balinese"] = { first = 0x01B00, last = 0x01B7F, otf="bali", description = "Balinese" },
217 ["bamum"] = { first = 0x0A6A0, last = 0x0A6FF, description = "Bamum" },
218 ["bamumsupplement"] = { first = 0x16800, last = 0x16A3F, description = "Bamum Supplement" },
219 ["basiclatin"] = { first = 0x00000, last = 0x0007F, otf="latn", description = "Basic Latin" },
220 ["bassavah"] = { first = 0x16AD0, last = 0x16AFF, description = "Bassa Vah" },
221 ["batak"] = { first = 0x01BC0, last = 0x01BFF, description = "Batak" },
222 ["bengali"] = { first = 0x00980, last = 0x009FF, otf="beng", description = "Bengali" },
223 ["bhaiksuki"] = { first = 0x11C00, last = 0x11C6F, description = "Bhaiksuki" },
224 ["blockelements"] = { first = 0x02580, last = 0x0259F, otf="bopo", description = "Block Elements" },
225 ["bopomofo"] = { first = 0x03100, last = 0x0312F, otf="bopo", description = "Bopomofo" },
226 ["bopomofoextended"] = { first = 0x031A0, last = 0x031BF, otf="bopo", description = "Bopomofo Extended" },
227 ["boxdrawing"] = { first = 0x02500, last = 0x0257F, description = "Box Drawing" },
228 ["brahmi"] = { first = 0x11000, last = 0x1107F, description = "Brahmi" },
229 ["braillepatterns"] = { first = 0x02800, last = 0x028FF, otf="brai", description = "Braille Patterns" },
230 ["buginese"] = { first = 0x01A00, last = 0x01A1F, otf="bugi", description = "Buginese" },
231 ["buhid"] = { first = 0x01740, last = 0x0175F, otf="buhd", description = "Buhid" },
232 ["byzantinemusicalsymbols"] = { first = 0x1D000, last = 0x1D0FF, otf="byzm", description = "Byzantine Musical Symbols" },
233 ["carian"] = { first = 0x102A0, last = 0x102DF, description = "Carian" },
234 ["caucasianalbanian"] = { first = 0x10530, last = 0x1056F, description = "Caucasian Albanian" },
235 ["chakma"] = { first = 0x11100, last = 0x1114F, description = "Chakma" },
236 ["cham"] = { first = 0x0AA00, last = 0x0AA5F, description = "Cham" },
237 ["cherokee"] = { first = 0x013A0, last = 0x013FF, otf="cher", description = "Cherokee" },
238 ["cherokeesupplement"] = { first = 0x0AB70, last = 0x0ABBF, description = "Cherokee Supplement" },
239 ["chesssymbols"] = { first = 0x1FA00, last = 0x1FA6F, description = "Chess Symbols" },
240 ["chorasmian"] = { first = 0x10FB0, last = 0x10FDF, description = "Chorasmian" },
241 ["cjkcompatibility"] = { first = 0x03300, last = 0x033FF, otf="hang", description = "CJK Compatibility" },
242 ["cjkcompatibilityforms"] = { first = 0x0FE30, last = 0x0FE4F, otf="hang", description = "CJK Compatibility Forms" },
243 ["cjkcompatibilityideographs"] = { first = 0x0F900, last = 0x0FAFF, otf="hang", description = "CJK Compatibility Ideographs" },
244 ["cjkcompatibilityideographssupplement"] = { first = 0x2F800, last = 0x2FA1F, otf="hang", description = "CJK Compatibility Ideographs Supplement" },
245 ["cjkradicalssupplement"] = { first = 0x02E80, last = 0x02EFF, otf="hang", description = "CJK Radicals Supplement" },
246 ["cjkstrokes"] = { first = 0x031C0, last = 0x031EF, otf="hang", description = "CJK Strokes" },
247 ["cjksymbolsandpunctuation"] = { first = 0x03000, last = 0x0303F, otf="hang", description = "CJK Symbols and Punctuation" },
248 ["cjkunifiedideographs"] = { first = 0x04E00, last = 0x09FFF, otf="hang", description = "CJK Unified Ideographs", catcode = "letter" },
249 ["cjkunifiedideographsextensiona"] = { first = 0x03400, last = 0x04DBF, otf="hang", description = "CJK Unified Ideographs Extension A" },
250 ["cjkunifiedideographsextensionb"] = { first = 0x20000, last = 0x2A6DF, otf="hang", description = "CJK Unified Ideographs Extension B" },
251 ["cjkunifiedideographsextensionc"] = { first = 0x2A700, last = 0x2B73F, description = "CJK Unified Ideographs Extension C" },
252 ["cjkunifiedideographsextensiond"] = { first = 0x2B740, last = 0x2B81F, description = "CJK Unified Ideographs Extension D" },
253 ["cjkunifiedideographsextensione"] = { first = 0x2B820, last = 0x2CEAF, description = "CJK Unified Ideographs Extension E" },
254 ["cjkunifiedideographsextensionf"] = { first = 0x2CEB0, last = 0x2EBEF, description = "CJK Unified Ideographs Extension F" },
255 ["cjkunifiedideographsextensiong"] = { first = 0x30000, last = 0x3134F, description = "CJK Unified Ideographs Extension G" },
256 ["cjkunifiedideographsextensionh"] = { first = 0x31350, last = 0x323AF, description = "CJK Unified Ideographs Extension H" },
257 ["cjkunifiedideographsextensioni"] = { first = 0x2EBF0, last = 0x2EE5F, description = "CJK Unified Ideographs Extension I" },
258 ["combiningdiacriticalmarks"] = { first = 0x00300, last = 0x0036F, description = "Combining Diacritical Marks" },
259 ["combiningdiacriticalmarksextended"] = { first = 0x01AB0, last = 0x01AFF, description = "Combining Diacritical Marks Extended" },
260 ["combiningdiacriticalmarksforsymbols"] = { first = 0x020D0, last = 0x020FF, description = "Combining Diacritical Marks for Symbols" },
261 ["combiningdiacriticalmarkssupplement"] = { first = 0x01DC0, last = 0x01DFF, description = "Combining Diacritical Marks Supplement" },
262 ["combininghalfmarks"] = { first = 0x0FE20, last = 0x0FE2F, description = "Combining Half Marks" },
263 ["commonindicnumberforms"] = { first = 0x0A830, last = 0x0A83F, description = "Common Indic Number Forms" },
264 ["controlpictures"] = { first = 0x02400, last = 0x0243F, description = "Control Pictures" },
265 ["coptic"] = { first = 0x02C80, last = 0x02CFF, otf="copt", description = "Coptic" },
266 ["copticepactnumbers"] = { first = 0x102E0, last = 0x102FF, description = "Coptic Epact Numbers" },
267 ["countingrodnumerals"] = { first = 0x1D360, last = 0x1D37F, description = "Counting Rod Numerals" },
268 ["cuneiform"] = { first = 0x12000, last = 0x123FF, otf="xsux", description = "Cuneiform" },
269 ["cuneiformnumbersandpunctuation"] = { first = 0x12400, last = 0x1247F, otf="xsux", description = "Cuneiform Numbers and Punctuation" },
270 ["currencysymbols"] = { first = 0x020A0, last = 0x020CF, description = "Currency Symbols" },
271 ["cypriotsyllabary"] = { first = 0x10800, last = 0x1083F, otf="cprt", description = "Cypriot Syllabary" },
272 ["cyprominoan"] = { first = 0x12F90, last = 0x12FFF, description = "Cypro-Minoan" },
273 ["cyrillic"] = { first = 0x00400, last = 0x004FF, otf="cyrl", description = "Cyrillic" },
274 ["cyrillicextendeda"] = { first = 0x02DE0, last = 0x02DFF, otf="cyrl", description = "Cyrillic Extended-A" },
275 ["cyrillicextendedb"] = { first = 0x0A640, last = 0x0A69F, otf="cyrl", description = "Cyrillic Extended-B" },
276 ["cyrillicextendedc"] = { first = 0x01C80, last = 0x01C8F, description = "Cyrillic Extended-C" },
277 ["cyrillicextendedd"] = { first = 0x1E030, last = 0x1E08F, description = "Cyrillic Extended-D" },
278 ["cyrillicsupplement"] = { first = 0x00500, last = 0x0052F, otf="cyrl", description = "Cyrillic Supplement" },
279 ["deseret"] = { first = 0x10400, last = 0x1044F, otf="dsrt", description = "Deseret" },
280 ["devanagari"] = { first = 0x00900, last = 0x0097F, otf="deva", description = "Devanagari" },
281 ["devanagariextended"] = { first = 0x0A8E0, last = 0x0A8FF, description = "Devanagari Extended" },
282 ["devanagariextendeda"] = { first = 0x11B00, last = 0x11B5F, description = "Devanagari Extended-A" },
283 ["digitsarabicindic"] = { first = 0x00660, last = 0x00669, math = true },
284
285 ["digitsbold"] = { first = 0x1D7CE, last = 0x1D7D7, math = true },
286
287 ["digitsdoublestruck"] = { first = 0x1D7D8, last = 0x1D7E1, math = true },
288
289 ["digitsextendedarabicindic"] = { first = 0x006F0, last = 0x006F9, math = true },
290
291
292
293
294
295 ["digitslatin"] = { first = 0x00030, last = 0x00039, math = true },
296
297
298 ["digitsmonospace"] = { first = 0x1D7F6, last = 0x1D7FF, math = true },
299
300 ["digitsnormal"] = { first = 0x00030, last = 0x00039, math = true },
301
302 ["digitssansserifbold"] = { first = 0x1D7EC, last = 0x1D7F5, math = true },
303 ["digitssansserifnormal"] = { first = 0x1D7E2, last = 0x1D7EB, math = true },
304
305
306
307
308 ["dingbats"] = { first = 0x02700, last = 0x027BF, description = "Dingbats" },
309 ["divesakuru"] = { first = 0x11900, last = 0x1195F, description = "Dives Akuru" },
310 ["dogra"] = { first = 0x11800, last = 0x1184F, description = "Dogra" },
311 ["dominotiles"] = { first = 0x1F030, last = 0x1F09F, description = "Domino Tiles" },
312 ["duployan"] = { first = 0x1BC00, last = 0x1BC9F, description = "Duployan" },
313 ["earlydynasticcuneiform"] = { first = 0x12480, last = 0x1254F, description = "Early Dynastic Cuneiform" },
314 ["egyptianhieroglyphformatcontrols"] = { first = 0x13430, last = 0x1345F, description = "Egyptian Hieroglyph Format Controls" },
315 ["egyptianhieroglyphs"] = { first = 0x13000, last = 0x1342F, description = "Egyptian Hieroglyphs" },
316 ["egyptianhieroglyphsextendeda"] = { first = 0x13460, last = 0x143FF, description = "Egyptian Hieroglyphs Extended-A" },
317 ["elbasan"] = { first = 0x10500, last = 0x1052F, description = "Elbasan" },
318 ["elymaic"] = { first = 0x10FE0, last = 0x10FFF, description = "Elymaic" },
319 ["emoticons"] = { first = 0x1F600, last = 0x1F64F, description = "Emoticons" },
320 ["enclosedalphanumerics"] = { first = 0x02460, last = 0x024FF, description = "Enclosed Alphanumerics" },
321 ["enclosedalphanumericsupplement"] = { first = 0x1F100, last = 0x1F1FF, description = "Enclosed Alphanumeric Supplement" },
322 ["enclosedcjklettersandmonths"] = { first = 0x03200, last = 0x032FF, description = "Enclosed CJK Letters and Months" },
323 ["enclosedideographicsupplement"] = { first = 0x1F200, last = 0x1F2FF, description = "Enclosed Ideographic Supplement" },
324 ["ethiopic"] = { first = 0x01200, last = 0x0137F, otf="ethi", description = "Ethiopic" },
325 ["ethiopicextended"] = { first = 0x02D80, last = 0x02DDF, otf="ethi", description = "Ethiopic Extended" },
326 ["ethiopicextendeda"] = { first = 0x0AB00, last = 0x0AB2F, description = "Ethiopic Extended-A" },
327 ["ethiopicextendedb"] = { first = 0x1E7E0, last = 0x1E7FF, description = "Ethiopic Extended-B" },
328 ["ethiopicsupplement"] = { first = 0x01380, last = 0x0139F, otf="ethi", description = "Ethiopic Supplement" },
329 ["garay"] = { first = 0x10D40, last = 0x10D8F, description = "Garay" },
330 ["generalpunctuation"] = { first = 0x02000, last = 0x0206F, description = "General Punctuation" },
331 ["geometricshapes"] = { first = 0x025A0, last = 0x025FF, math = true, description = "Geometric Shapes" },
332 ["geometricshapesextended"] = { first = 0x1F780, last = 0x1F7FF, description = "Geometric Shapes Extended" },
333 ["georgian"] = { first = 0x010A0, last = 0x010FF, otf="geor", description = "Georgian" },
334 ["georgianextended"] = { first = 0x01C90, last = 0x01CBF, description = "Georgian Extended" },
335 ["georgiansupplement"] = { first = 0x02D00, last = 0x02D2F, otf="geor", description = "Georgian Supplement" },
336 ["glagolitic"] = { first = 0x02C00, last = 0x02C5F, otf="glag", description = "Glagolitic" },
337 ["glagoliticsupplement"] = { first = 0x1E000, last = 0x1E02F, description = "Glagolitic Supplement" },
338 ["gothic"] = { first = 0x10330, last = 0x1034F, otf="goth", description = "Gothic" },
339 ["grantha"] = { first = 0x11300, last = 0x1137F, description = "Grantha" },
340 ["greekandcoptic"] = { first = 0x00370, last = 0x003FF, otf="grek", description = "Greek and Coptic" },
341 ["greekextended"] = { first = 0x01F00, last = 0x01FFF, otf="grek", description = "Greek Extended" },
342 ["gujarati"] = { first = 0x00A80, last = 0x00AFF, otf="gujr", description = "Gujarati" },
343 ["gunjalagondi"] = { first = 0x11D60, last = 0x11DAF, description = "Gunjala Gondi" },
344 ["gurmukhi"] = { first = 0x00A00, last = 0x00A7F, otf="guru", description = "Gurmukhi" },
345 ["gurungkhema"] = { first = 0x16100, last = 0x1613F, description = "Gurung Khema" },
346 ["halfwidthandfullwidthforms"] = { first = 0x0FF00, last = 0x0FFEF, description = "Halfwidth and Fullwidth Forms" },
347 ["hangulcompatibilityjamo"] = { first = 0x03130, last = 0x0318F, otf="jamo", description = "Hangul Compatibility Jamo" },
348 ["hanguljamo"] = { first = 0x01100, last = 0x011FF, otf="jamo", description = "Hangul Jamo" },
349 ["hanguljamoextendeda"] = { first = 0x0A960, last = 0x0A97F, description = "Hangul Jamo Extended-A" },
350 ["hanguljamoextendedb"] = { first = 0x0D7B0, last = 0x0D7FF, description = "Hangul Jamo Extended-B" },
351 ["hangulsyllables"] = { first = 0x0AC00, last = 0x0D7AF, otf="hang", description = "Hangul Syllables" },
352 ["hanifirohingya"] = { first = 0x10D00, last = 0x10D3F, description = "Hanifi Rohingya" },
353 ["hanunoo"] = { first = 0x01720, last = 0x0173F, otf="hano", description = "Hanunoo" },
354 ["hatran"] = { first = 0x108E0, last = 0x108FF, description = "Hatran" },
355 ["hebrew"] = { first = 0x00590, last = 0x005FF, otf="hebr", description = "Hebrew" },
356 ["highprivateusesurrogates"] = { first = 0x0DB80, last = 0x0DBFF, description = "High Private Use Surrogates" },
357 ["highsurrogates"] = { first = 0x0D800, last = 0x0DB7F, description = "High Surrogates" },
358 ["hiragana"] = { first = 0x03040, last = 0x0309F, otf="kana", description = "Hiragana" },
359 ["ideographicdescriptioncharacters"] = { first = 0x02FF0, last = 0x02FFF, description = "Ideographic Description Characters" },
360 ["ideographicsymbolsandpunctuation"] = { first = 0x16FE0, last = 0x16FFF, description = "Ideographic Symbols and Punctuation" },
361 ["imperialaramaic"] = { first = 0x10840, last = 0x1085F, description = "Imperial Aramaic" },
362 ["indicsiyaqnumbers"] = { first = 0x1EC70, last = 0x1ECBF, description = "Indic Siyaq Numbers" },
363 ["inscriptionalpahlavi"] = { first = 0x10B60, last = 0x10B7F, description = "Inscriptional Pahlavi" },
364 ["inscriptionalparthian"] = { first = 0x10B40, last = 0x10B5F, description = "Inscriptional Parthian" },
365 ["ipaextensions"] = { first = 0x00250, last = 0x002AF, description = "IPA Extensions" },
366 ["javanese"] = { first = 0x0A980, last = 0x0A9DF, description = "Javanese" },
367 ["kaithi"] = { first = 0x11080, last = 0x110CF, description = "Kaithi" },
368 ["kaktoviknumerals"] = { first = 0x1D2C0, last = 0x1D2DF, description = "Kaktovik Numerals" },
369 ["kanaextendeda"] = { first = 0x1B100, last = 0x1B12F, description = "Kana Extended-A" },
370 ["kanaextendedb"] = { first = 0x1AFF0, last = 0x1AFFF, description = "Kana Extended-B" },
371 ["kanasupplement"] = { first = 0x1B000, last = 0x1B0FF, description = "Kana Supplement" },
372 ["kanbun"] = { first = 0x03190, last = 0x0319F, description = "Kanbun" },
373 ["kangxiradicals"] = { first = 0x02F00, last = 0x02FDF, description = "Kangxi Radicals" },
374 ["kannada"] = { first = 0x00C80, last = 0x00CFF, otf="knda", description = "Kannada" },
375 ["katakana"] = { first = 0x030A0, last = 0x030FF, otf="kana", description = "Katakana" },
376 ["katakanaphoneticextensions"] = { first = 0x031F0, last = 0x031FF, otf="kana", description = "Katakana Phonetic Extensions" },
377 ["kayahli"] = { first = 0x0A900, last = 0x0A92F, description = "Kayah Li" },
378 ["kawi"] = { first = 0x11F00, last = 0x11F5F, description = "Kawi" },
379 ["kharoshthi"] = { first = 0x10A00, last = 0x10A5F, otf="khar", description = "Kharoshthi" },
380 ["khitansmallscript"] = { first = 0x18B00, last = 0x18CFF, description = "Khitan Small Script" },
381 ["khmer"] = { first = 0x01780, last = 0x017FF, otf="khmr", description = "Khmer" },
382 ["khmersymbols"] = { first = 0x019E0, last = 0x019FF, otf="khmr", description = "Khmer Symbols" },
383 ["khojki"] = { first = 0x11200, last = 0x1124F, description = "Khojki" },
384 ["khudawadi"] = { first = 0x112B0, last = 0x112FF, description = "Khudawadi" },
385 ["kiratrai"] = { first = 0x16D40, last = 0x16D7F, description = "Kirat Rai" },
386 ["lao"] = { first = 0x00E80, last = 0x00EFF, otf="lao", description = "Lao" },
387 ["latinextendeda"] = { first = 0x00100, last = 0x0017F, otf="latn", description = "Latin Extended-A" },
388 ["latinextendedadditional"] = { first = 0x01E00, last = 0x01EFF, otf="latn", description = "Latin Extended Additional" },
389 ["latinextendedb"] = { first = 0x00180, last = 0x0024F, otf="latn", description = "Latin Extended-B" },
390 ["latinextendedc"] = { first = 0x02C60, last = 0x02C7F, otf="latn", description = "Latin Extended-C" },
391 ["latinextendedd"] = { first = 0x0A720, last = 0x0A7FF, otf="latn", description = "Latin Extended-D" },
392 ["latinextendede"] = { first = 0x0AB30, last = 0x0AB6F, description = "Latin Extended-E" },
393 ["latinextendedf"] = { first = 0x10780, last = 0x107BF, description = "Latin Extended-F" },
394 ["latinextendedg"] = { first = 0x1DF00, last = 0x1DFFF, description = "Latin Extended-G" },
395 ["latinsupplement"] = { first = 0x00080, last = 0x000FF, otf="latn", description = "Latin-1 Supplement" },
396 ["lepcha"] = { first = 0x01C00, last = 0x01C4F, description = "Lepcha" },
397 ["letterlikesymbols"] = { first = 0x02100, last = 0x0214F, math = true, description = "Letterlike Symbols" },
398 ["limbu"] = { first = 0x01900, last = 0x0194F, otf="limb", description = "Limbu" },
399 ["lineara"] = { first = 0x10600, last = 0x1077F, description = "Linear A" },
400 ["linearbideograms"] = { first = 0x10080, last = 0x100FF, otf="linb", description = "Linear B Ideograms" },
401 ["linearbsyllabary"] = { first = 0x10000, last = 0x1007F, otf="linb", description = "Linear B Syllabary" },
402 ["lisu"] = { first = 0x0A4D0, last = 0x0A4FF, description = "Lisu" },
403 ["lisusupplement"] = { first = 0x11FB0, last = 0x11FBF, description = "Lisu Supplement" },
404 ["lowercasebold"] = { first = 0x1D41A, last = 0x1D433, math = true },
405 ["lowercaseboldfraktur"] = { first = 0x1D586, last = 0x1D59F, math = true },
406 ["lowercasebolditalic"] = { first = 0x1D482, last = 0x1D49B, math = true, italic = true },
407 ["lowercaseboldscript"] = { first = 0x1D4EA, last = 0x1D503, math = true, italic = true },
408 ["lowercasedoublestruck"] = { first = 0x1D552, last = 0x1D56B, math = true },
409 ["lowercasefraktur"] = { first = 0x1D51E, last = 0x1D537, math = true },
410 ["lowercasegreekbold"] = { first = 0x1D6C2, last = 0x1D6DB, math = true },
411 ["lowercasegreekbolditalic"] = { first = 0x1D736, last = 0x1D74F, math = true, italic = true },
412 ["lowercasegreekitalic"] = { first = 0x1D6FC, last = 0x1D715, math = true, italic = true },
413 ["lowercasegreeknormal"] = { first = 0x003B1, last = 0x003C9, math = true },
414 ["lowercasegreeksansserifbold"] = { first = 0x1D770, last = 0x1D789, math = true },
415 ["lowercasegreeksansserifbolditalic"] = { first = 0x1D7AA, last = 0x1D7C3, math = true, italic = true },
416 ["lowercaseitalic"] = { first = 0x1D44E, last = 0x1D467, math = true, italic = true },
417 ["lowercasemonospace"] = { first = 0x1D68A, last = 0x1D6A3, math = true },
418 ["lowercasenormal"] = { first = 0x00061, last = 0x0007A, math = true },
419 ["lowercasesansserifbold"] = { first = 0x1D5EE, last = 0x1D607, math = true },
420 ["lowercasesansserifbolditalic"] = { first = 0x1D656, last = 0x1D66F, math = true, italic = true },
421 ["lowercasesansserifitalic"] = { first = 0x1D622, last = 0x1D63B, math = true, italic = true },
422 ["lowercasesansserifnormal"] = { first = 0x1D5BA, last = 0x1D5D3, math = true },
423 ["lowercasescript"] = { first = 0x1D4B6, last = 0x1D4CF, math = true, italic = true },
424 ["lowsurrogates"] = { first = 0x0DC00, last = 0x0DFFF, description = "Low Surrogates" },
425 ["lycian"] = { first = 0x10280, last = 0x1029F, description = "Lycian" },
426 ["lydian"] = { first = 0x10920, last = 0x1093F, description = "Lydian" },
427 ["mahajani"] = { first = 0x11150, last = 0x1117F, description = "Mahajani" },
428 ["mahjongtiles"] = { first = 0x1F000, last = 0x1F02F, description = "Mahjong Tiles" },
429 ["makasar"] = { first = 0x11EE0, last = 0x11EFF, description = "Makasar" },
430 ["malayalam"] = { first = 0x00D00, last = 0x00D7F, otf="mlym", description = "Malayalam" },
431 ["mandaic"] = { first = 0x00840, last = 0x0085F, otf="mand", description = "Mandaic" },
432 ["manichaean"] = { first = 0x10AC0, last = 0x10AFF, description = "Manichaean" },
433 ["marchen"] = { first = 0x11C70, last = 0x11CBF, description = "Marchen" },
434 ["masaramgondi"] = { first = 0x11D00, last = 0x11D5F, description = "Masaram Gondi" },
435 ["mathematicalalphanumericsymbols"] = { first = 0x1D400, last = 0x1D7FF, math = true, description = "Mathematical Alphanumeric Symbols" },
436 ["mathematicaloperators"] = { first = 0x02200, last = 0x022FF, math = true, description = "Mathematical Operators" },
437 ["mayannumerals"] = { first = 0x1D2E0, last = 0x1D2FF, description = "Mayan Numerals" },
438 ["medefaidrin"] = { first = 0x16E40, last = 0x16E9F, description = "Medefaidrin" },
439 ["meeteimayek"] = { first = 0x0ABC0, last = 0x0ABFF, description = "Meetei Mayek" },
440 ["meeteimayekextensions"] = { first = 0x0AAE0, last = 0x0AAFF, description = "Meetei Mayek Extensions" },
441 ["mendekikakui"] = { first = 0x1E800, last = 0x1E8DF, description = "Mende Kikakui" },
442 ["meroiticcursive"] = { first = 0x109A0, last = 0x109FF, description = "Meroitic Cursive" },
443 ["meroitichieroglyphs"] = { first = 0x10980, last = 0x1099F, description = "Meroitic Hieroglyphs" },
444 ["miao"] = { first = 0x16F00, last = 0x16F9F, description = "Miao" },
445 ["miscellaneousmathematicalsymbolsa"] = { first = 0x027C0, last = 0x027EF, math = true, description = "Miscellaneous Mathematical Symbols-A" },
446 ["miscellaneousmathematicalsymbolsb"] = { first = 0x02980, last = 0x029FF, math = true, description = "Miscellaneous Mathematical Symbols-B" },
447 ["miscellaneoussymbols"] = { first = 0x02600, last = 0x026FF, math = true, description = "Miscellaneous Symbols" },
448 ["miscellaneoussymbolsandarrows"] = { first = 0x02B00, last = 0x02BFF, math = true, description = "Miscellaneous Symbols and Arrows" },
449 ["miscellaneoussymbolsandpictographs"] = { first = 0x1F300, last = 0x1F5FF, description = "Miscellaneous Symbols and Pictographs" },
450 ["miscellaneoustechnical"] = { first = 0x02300, last = 0x023FF, math = true, description = "Miscellaneous Technical" },
451 ["modi"] = { first = 0x11600, last = 0x1165F, description = "Modi" },
452 ["modifiertoneletters"] = { first = 0x0A700, last = 0x0A71F, description = "Modifier Tone Letters" },
453 ["mongolian"] = { first = 0x01800, last = 0x018AF, otf="mong", description = "Mongolian" },
454 ["mongoliansupplement"] = { first = 0x11660, last = 0x1167F, description = "Mongolian Supplement" },
455 ["mro"] = { first = 0x16A40, last = 0x16A6F, description = "Mro" },
456 ["multani"] = { first = 0x11280, last = 0x112AF, description = "Multani" },
457 ["musicalsymbols"] = { first = 0x1D100, last = 0x1D1FF, otf="musc", description = "Musical Symbols" },
458 ["myanmar"] = { first = 0x01000, last = 0x0109F, otf="mymr", description = "Myanmar" },
459 ["myanmarextendeda"] = { first = 0x0AA60, last = 0x0AA7F, description = "Myanmar Extended-A" },
460 ["myanmarextendedb"] = { first = 0x0A9E0, last = 0x0A9FF, description = "Myanmar Extended-B" },
461 ["myanmarextendedc"] = { first = 0x116D0, last = 0x116FF, description = "Myanmar Extended-C" },
462 ["nabataean"] = { first = 0x10880, last = 0x108AF, description = "Nabataean" },
463 ["nagmundari"] = { first = 0x1E4D0, last = 0x1E4FF, description = "Nag Mundari" },
464 ["nandinagari"] = { first = 0x119A0, last = 0x119FF, description = "Nandinagari" },
465 ["newa"] = { first = 0x11400, last = 0x1147F, description = "Newa" },
466 ["newtailue"] = { first = 0x01980, last = 0x019DF, description = "New Tai Lue" },
467 ["nko"] = { first = 0x007C0, last = 0x007FF, otf="nko", description = "NKo" },
468 ["numberforms"] = { first = 0x02150, last = 0x0218F, description = "Number Forms" },
469 ["nushu"] = { first = 0x1B170, last = 0x1B2FF, description = "Nushu" },
470 ["nyiakengpuachuehmong"] = { first = 0x1E100, last = 0x1E14F, description = "Nyiakeng Puachue Hmong" },
471 ["ogham"] = { first = 0x01680, last = 0x0169F, otf="ogam", description = "Ogham" },
472 ["olchiki"] = { first = 0x01C50, last = 0x01C7F, description = "Ol Chiki" },
473 ["oldhungarian"] = { first = 0x10C80, last = 0x10CFF, description = "Old Hungarian" },
474 ["olditalic"] = { first = 0x10300, last = 0x1032F, otf="ital", description = "Old Italic" },
475 ["oldnortharabian"] = { first = 0x10A80, last = 0x10A9F, description = "Old North Arabian" },
476 ["oldpermic"] = { first = 0x10350, last = 0x1037F, description = "Old Permic" },
477 ["oldpersian"] = { first = 0x103A0, last = 0x103DF, otf="xpeo", description = "Old Persian" },
478 ["oldsogdian"] = { first = 0x10F00, last = 0x10F2F, description = "Old Sogdian" },
479 ["oldsoutharabian"] = { first = 0x10A60, last = 0x10A7F, description = "Old South Arabian" },
480 ["oldturkic"] = { first = 0x10C00, last = 0x10C4F, description = "Old Turkic" },
481 ["olduyghur"] = { first = 0x10F70, last = 0x10FAF, description = "Old Uyghur" },
482 ["olonal"] = { first = 0x1E5D0, last = 0x1E5FF, description = "Ol Onal" },
483 ["opticalcharacterrecognition"] = { first = 0x02440, last = 0x0245F, description = "Optical Character Recognition" },
484 ["oriya"] = { first = 0x00B00, last = 0x00B7F, otf="orya", description = "Oriya" },
485 ["ornamentaldingbats"] = { first = 0x1F650, last = 0x1F67F, description = "Ornamental Dingbats" },
486 ["osage"] = { first = 0x104B0, last = 0x104FF, description = "Osage" },
487 ["osmanya"] = { first = 0x10480, last = 0x104AF, otf="osma", description = "Osmanya" },
488 ["ottomansiyaqnumbers"] = { first = 0x1ED00, last = 0x1ED4F, description = "Ottoman Siyaq Numbers" },
489 ["pahawhhmong"] = { first = 0x16B00, last = 0x16B8F, description = "Pahawh Hmong" },
490 ["palmyrene"] = { first = 0x10860, last = 0x1087F, description = "Palmyrene" },
491 ["paucinhau"] = { first = 0x11AC0, last = 0x11AFF, description = "Pau Cin Hau" },
492 ["phagspa"] = { first = 0x0A840, last = 0x0A87F, otf="phag", description = "Phags-pa" },
493 ["phaistosdisc"] = { first = 0x101D0, last = 0x101FF, description = "Phaistos Disc" },
494 ["phoenician"] = { first = 0x10900, last = 0x1091F, otf="phnx", description = "Phoenician" },
495 ["phoneticextensions"] = { first = 0x01D00, last = 0x01D7F, description = "Phonetic Extensions" },
496 ["phoneticextensionssupplement"] = { first = 0x01D80, last = 0x01DBF, description = "Phonetic Extensions Supplement" },
497 ["playingcards"] = { first = 0x1F0A0, last = 0x1F0FF, description = "Playing Cards" },
498 ["privateusearea"] = { first = 0x0E000, last = 0x0F8FF, description = "Private Use Area" },
499 ["psalterpahlavi"] = { first = 0x10B80, last = 0x10BAF, description = "Psalter Pahlavi" },
500 ["rejang"] = { first = 0x0A930, last = 0x0A95F, description = "Rejang" },
501 ["ruminumeralsymbols"] = { first = 0x10E60, last = 0x10E7F, description = "Rumi Numeral Symbols" },
502 ["runic"] = { first = 0x016A0, last = 0x016FF, otf="runr", description = "Runic" },
503 ["samaritan"] = { first = 0x00800, last = 0x0083F, description = "Samaritan" },
504 ["saurashtra"] = { first = 0x0A880, last = 0x0A8DF, description = "Saurashtra" },
505 ["sharada"] = { first = 0x11180, last = 0x111DF, description = "Sharada" },
506 ["shavian"] = { first = 0x10450, last = 0x1047F, otf="shaw", description = "Shavian" },
507 ["shorthandformatcontrols"] = { first = 0x1BCA0, last = 0x1BCAF, description = "Shorthand Format Controls" },
508 ["siddham"] = { first = 0x11580, last = 0x115FF, description = "Siddham" },
509 ["sinhala"] = { first = 0x00D80, last = 0x00DFF, otf="sinh", description = "Sinhala" },
510 ["sinhalaarchaicnumbers"] = { first = 0x111E0, last = 0x111FF, description = "Sinhala Archaic Numbers" },
511 ["smallformvariants"] = { first = 0x0FE50, last = 0x0FE6F, description = "Small Form Variants" },
512 ["smallkanaextension"] = { first = 0x1B130, last = 0x1B16F, description = "Small Kana Extension" },
513 ["sogdian"] = { first = 0x10F30, last = 0x10F6F, description = "Sogdian" },
514 ["sorasompeng"] = { first = 0x110D0, last = 0x110FF, description = "Sora Sompeng" },
515 ["soyombo"] = { first = 0x11A50, last = 0x11AAF, description = "Soyombo" },
516 ["spacingmodifierletters"] = { first = 0x002B0, last = 0x002FF, description = "Spacing Modifier Letters" },
517 ["specials"] = { first = 0x0FFF0, last = 0x0FFFF, description = "Specials" },
518 ["sundanese"] = { first = 0x01B80, last = 0x01BBF, description = "Sundanese" },
519 ["sundanesesupplement"] = { first = 0x01CC0, last = 0x01CCF, description = "Sundanese Supplement" },
520 ["sunuwar"] = { first = 0x11BC0, last = 0x11BFF, description = "Sunuwar" },
521 ["superscriptsandsubscripts"] = { first = 0x02070, last = 0x0209F, description = "Superscripts and Subscripts" },
522 ["supplementalarrowsa"] = { first = 0x027F0, last = 0x027FF, math = true, description = "Supplemental Arrows-A" },
523 ["supplementalarrowsb"] = { first = 0x02900, last = 0x0297F, math = true, description = "Supplemental Arrows-B" },
524 ["supplementalarrowsc"] = { first = 0x1F800, last = 0x1F8FF, math = true, description = "Supplemental Arrows-C" },
525 ["supplementalmathematicaloperators"] = { first = 0x02A00, last = 0x02AFF, math = true, description = "Supplemental Mathematical Operators" },
526 ["supplementalpunctuation"] = { first = 0x02E00, last = 0x02E7F, description = "Supplemental Punctuation" },
527 ["supplementalsymbolsandpictographs"] = { first = 0x1F900, last = 0x1F9FF, description = "Supplemental Symbols and Pictographs" },
528 ["supplementaryprivateuseareaa"] = { first = 0xF0000, last = 0xFFFFF, description = "Supplementary Private Use Area-A" },
529 ["supplementaryprivateuseareab"] = { first = 0x100000,last = 0x10FFFF, description = "Supplementary Private Use Area-B" },
530 ["suttonsignwriting"] = { first = 0x1D800, last = 0x1DAAF, description = "Sutton SignWriting" },
531 ["sylotinagri"] = { first = 0x0A800, last = 0x0A82F, otf="sylo", description = "Syloti Nagri" },
532 ["symbolsandpictographsextendeda"] = { first = 0x1FA70, last = 0x1FAFF, description = "Symbols and Pictographs Extended-A" },
533 ["symbolsforlegacycomputing"] = { first = 0x1FB00, last = 0x1FBFF, description = "Symbols for Legacy Computing" },
534 ["symbolsforlegacycomputingsupplement"] = { first = 0x1CC00, last = 0x1CEBF, description = "Symbols for Legacy Computing Supplement" },
535 ["syriac"] = { first = 0x00700, last = 0x0074F, otf="syrc", description = "Syriac" },
536 ["syriacsupplement"] = { first = 0x00860, last = 0x0086F, description = "Syriac Supplement" },
537 ["tagalog"] = { first = 0x01700, last = 0x0171F, otf="tglg", description = "Tagalog" },
538 ["tagbanwa"] = { first = 0x01760, last = 0x0177F, otf="tagb", description = "Tagbanwa" },
539 ["tags"] = { first = 0xE0000, last = 0xE007F, description = "Tags" },
540 ["taile"] = { first = 0x01950, last = 0x0197F, otf="tale", description = "Tai Le" },
541 ["taitham"] = { first = 0x01A20, last = 0x01AAF, description = "Tai Tham" },
542 ["taiviet"] = { first = 0x0AA80, last = 0x0AADF, description = "Tai Viet" },
543 ["taixuanjingsymbols"] = { first = 0x1D300, last = 0x1D35F, description = "Tai Xuan Jing Symbols" },
544 ["takri"] = { first = 0x11680, last = 0x116CF, description = "Takri" },
545 ["tamil"] = { first = 0x00B80, last = 0x00BFF, otf="taml", description = "Tamil" },
546 ["tamilsupplement"] = { first = 0x11FC0, last = 0x11FFF, description = "Tamil Supplement" },
547 ["tangut"] = { first = 0x17000, last = 0x187FF, description = "Tangut" },
548 ["tangutsupplement"] = { first = 0x18D00, last = 0x18D7F, description = "Tangut Supplement" },
549 ["tangutcomponents"] = { first = 0x18800, last = 0x18AFF, description = "Tangut Components" },
550 ["tangsa"] = { first = 0x16A70, last = 0x16ACF, description = "Tangsa" },
551 ["telugu"] = { first = 0x00C00, last = 0x00C7F, otf="telu", description = "Telugu" },
552 ["thaana"] = { first = 0x00780, last = 0x007BF, otf="thaa", description = "Thaana" },
553 ["thai"] = { first = 0x00E00, last = 0x00E7F, otf="thai", description = "Thai" },
554 ["tibetan"] = { first = 0x00F00, last = 0x00FFF, otf="tibt", description = "Tibetan" },
555 ["tifinagh"] = { first = 0x02D30, last = 0x02D7F, otf="tfng", description = "Tifinagh" },
556 ["tirhuta"] = { first = 0x11480, last = 0x114DF, description = "Tirhuta" },
557 ["todhri"] = { first = 0x105C0, last = 0x105FF, description = "Todhri" },
558 ["toto"] = { first = 0x1E290, last = 0x1E2BF, description = "Toto" },
559 ["transportandmapsymbols"] = { first = 0x1F680, last = 0x1F6FF, description = "Transport and Map Symbols" },
560 ["tulutigalari"] = { first = 0x11380, last = 0x113FF, description = "Tulu-Tigalari" },
561 ["ugaritic"] = { first = 0x10380, last = 0x1039F, otf="ugar", description = "Ugaritic" },
562 ["unifiedcanadianaboriginalsyllabics"] = { first = 0x01400, last = 0x0167F, otf="cans", description = "Unified Canadian Aboriginal Syllabics" },
563 ["unifiedcanadianaboriginalsyllabicsextended"] = { first = 0x018B0, last = 0x018FF, description = "Unified Canadian Aboriginal Syllabics Extended" },
564 ["unifiedcanadianaboriginalsyllabicsextendeda"] = { first = 0x11AB0, last = 0x11ABF, description = "Unified Canadian Aboriginal Syllabics Extended-A" },
565 ["uppercasebold"] = { first = 0x1D400, last = 0x1D419, math = true },
566 ["uppercaseboldfraktur"] = { first = 0x1D56C, last = 0x1D585, math = true },
567 ["uppercasebolditalic"] = { first = 0x1D468, last = 0x1D481, math = true, italic = true },
568 ["uppercaseboldscript"] = { first = 0x1D4D0, last = 0x1D4E9, math = true, italic = true },
569 ["uppercasedoublestruck"] = { first = 0x1D538, last = 0x1D551, math = true },
570 ["uppercasefraktur"] = { first = 0x1D504, last = 0x1D51D, math = true },
571 ["uppercasegreekbold"] = { first = 0x1D6A8, last = 0x1D6C1, math = true },
572 ["uppercasegreekbolditalic"] = { first = 0x1D71C, last = 0x1D735, math = true, italic = true },
573 ["uppercasegreekitalic"] = { first = 0x1D6E2, last = 0x1D6FB, math = true, italic = true },
574 ["uppercasegreeknormal"] = { first = 0x00391, last = 0x003AA, math = true },
575 ["uppercasegreeksansserifbold"] = { first = 0x1D756, last = 0x1D76F, math = true },
576 ["uppercasegreeksansserifbolditalic"] = { first = 0x1D790, last = 0x1D7A9, math = true, italic = true },
577 ["uppercaseitalic"] = { first = 0x1D434, last = 0x1D44D, math = true, italic = true },
578 ["uppercasemonospace"] = { first = 0x1D670, last = 0x1D689, math = true },
579 ["uppercasenormal"] = { first = 0x00041, last = 0x0005A, math = true },
580 ["uppercasesansserifbold"] = { first = 0x1D5D4, last = 0x1D5ED, math = true },
581 ["uppercasesansserifbolditalic"] = { first = 0x1D63C, last = 0x1D655, math = true, italic = true },
582 ["uppercasesansserifitalic"] = { first = 0x1D608, last = 0x1D621, math = true, italic = true },
583 ["uppercasesansserifnormal"] = { first = 0x1D5A0, last = 0x1D5B9, math = true },
584 ["uppercasescript"] = { first = 0x1D49C, last = 0x1D4B5, math = true, italic = true },
585 ["vai"] = { first = 0x0A500, last = 0x0A63F, description = "Vai" },
586 ["variationselectors"] = { first = 0x0FE00, last = 0x0FE0F, description = "Variation Selectors" },
587 ["variationselectorssupplement"] = { first = 0xE0100, last = 0xE01EF, description = "Variation Selectors Supplement" },
588 ["vedicextensions"] = { first = 0x01CD0, last = 0x01CFF, description = "Vedic Extensions" },
589 ["verticalforms"] = { first = 0x0FE10, last = 0x0FE1F, description = "Vertical Forms" },
590 ["vithkuqi"] = { first = 0x10570, last = 0x105BF, description = "Vithkuqi" },
591 ["wancho"] = { first = 0x1E2C0, last = 0x1E2FF, description = "Wancho" },
592 ["warangciti"] = { first = 0x118A0, last = 0x118FF, description = "Warang Citi" },
593 ["yezidi"] = { first = 0x10E80, last = 0x10EBF, description = "Yezidi" },
594 ["yijinghexagramsymbols"] = { first = 0x04DC0, last = 0x04DFF, otf="yi", description = "Yijing Hexagram Symbols" },
595 ["yiradicals"] = { first = 0x0A490, last = 0x0A4CF, otf="yi", description = "Yi Radicals" },
596 ["yisyllables"] = { first = 0x0A000, last = 0x0A48F, otf="yi", description = "Yi Syllables" },
597 ["zanabazarsquare"] = { first = 0x11A00, last = 0x11A4F, description = "Zanabazar Square" },
598 ["znamennymusicalnotation"] = { first = 0x1CF00, last = 0x1CFCF, description = "Znamenny Musical Notation" },
599
600
601
602
603
604 ["lowercasecalligraphic"] = { first = 0x100000, last = 0x100019, math = true },
605 ["uppercasecalligraphic"] = { first = 0x100020, last = 0x100039, math = true },
606 ["lowercaseboldcalligraphic"] = { first = 0x100040, last = 0x100059, math = true },
607 ["uppercaseboldcalligraphic"] = { first = 0x100060, last = 0x100079, math = true },
608
609
610
611
612 ["lowercasesansgreek"] = { first = 0x100080, last = 0x100099, math = true },
613 ["uppercasesansgreek"] = { first = 0x1000A0, last = 0x1000B9, math = true },
614 ["lowercaseitalicsansgreek"] = { first = 0x1000C0, last = 0x1000D9, math = true },
615 ["uppercaseitalicsansgreek"] = { first = 0x1000E0, last = 0x1000F9, math = true },
616
617
618
619
620
621 ["lowercaseblackboarditalic"] = { first = 0x100100, last = 0x100119, math = true },
622 ["uppercaseblackboarditalic"] = { first = 0x100120, last = 0x100139, math = true },
623
624
625
626
627
628
629
630
631
632
633}
634
635
636
637
638
639
640
641
642blocks.lowercaseitalic.gaps = {
643 [0x1D455] = 0x0210E,
644}
645
646blocks.uppercasescript.gaps = {
647 [0x1D49D] = 0x0212C,
648 [0x1D4A0] = 0x02130,
649 [0x1D4A1] = 0x02131,
650 [0x1D4A3] = 0x0210B,
651 [0x1D4A4] = 0x02110,
652 [0x1D4A7] = 0x02112,
653 [0x1D4A8] = 0x02133,
654 [0x1D4AD] = 0x0211B,
655}
656
657blocks.lowercasescript.gaps = {
658 [0x1D4BA] = 0x0212F,
659 [0x1D4BC] = 0x0210A,
660 [0x1D4C4] = 0x02134,
661}
662
663blocks.uppercasefraktur.gaps = {
664 [0x1D506] = 0x0212D,
665 [0x1D50B] = 0x0210C,
666 [0x1D50C] = 0x02111,
667 [0x1D515] = 0x0211C,
668 [0x1D51D] = 0x02128,
669}
670
671blocks.uppercasedoublestruck.gaps = {
672 [0x1D53A] = 0x02102,
673 [0x1D53F] = 0x0210D,
674 [0x1D545] = 0x02115,
675 [0x1D547] = 0x02119,
676 [0x1D548] = 0x0211A,
677 [0x1D549] = 0x0211D,
678 [0x1D551] = 0x02124,
679}
680
681characters.blocks = blocks
682
683function characters.blockrange(name)
684 local b = blocks[name]
685 if b then
686 return b.first, b.last
687 else
688 return 0, 0
689 end
690end
691
692setmetatableindex(blocks, function(t,k)
693 return k and rawget(t,lower(gsub(k,"[^a-zA-Z]","")))
694end)
695
696local otfscripts = utilities.storage.allocate()
697characters.otfscripts = otfscripts
698
699setmetatableindex(otfscripts,function(t,unicode)
700 for k, v in next, blocks do
701 local first = v.first
702 local last = v.last
703 if unicode >= first and unicode <= last then
704 local script = v.otf or "dflt"
705 for u=first,last do
706 t[u] = script
707 end
708 return script
709 end
710 end
711
712 t[unicode] = "dflt"
713 return "dflt"
714end)
715
716local splitter1 = lpeg.splitat(S(":-"))
717local splitter2 = lpeg.splitat(S(" +-"),true)
718
719function characters.getrange(name,expression)
720 local range = rawget(blocks,lower(gsub(name,"[^a-zA-Z0-9]","")))
721 if range then
722 return range.first, range.last, range.description, range.gaps
723 end
724 name = gsub(name,'"',"0x")
725 local start, stop
726 if expression then
727 local n = tonumber(name)
728 if n then
729 return n, n, nil
730 else
731 local first, rest = lpegmatch(splitter2,name)
732 local range = rawget(blocks,lower(gsub(first,"[^a-zA-Z0-9]","")))
733 if range then
734 local s = loadstring("return 0 " .. rest)
735 if type(s) == "function" then
736 local d = s()
737 if type(d) == "number" then
738 return range.first + d, range.last + d, nil
739 end
740 end
741 end
742 end
743 end
744 local start, stop = lpegmatch(splitter1,name)
745 if start and stop then
746 start = tonumber(start,16) or tonumber(start)
747 stop = tonumber(stop, 16) or tonumber(stop)
748 if start and stop then
749 return start, stop, nil
750 end
751 end
752 local slot = tonumber(name,16) or tonumber(name)
753 return slot, slot, nil
754end
755
756
757
758
759local categorytags = allocate {
760 lu = "Letter Uppercase",
761 ll = "Letter Lowercase",
762 lt = "Letter Titlecase",
763 lm = "Letter Modifier",
764 lo = "Letter Other",
765 mn = "Mark Nonspacing",
766 mc = "Mark Spacing Combining",
767 me = "Mark Enclosing",
768 nd = "Number Decimal Digit",
769 nl = "Number Letter",
770 no = "Number Other",
771 pc = "Punctuation Connector",
772 pd = "Punctuation Dash",
773 ps = "Punctuation Open",
774 pe = "Punctuation Close",
775 pi = "Punctuation Initial Quote",
776 pf = "Punctuation Final Quote",
777 po = "Punctuation Other",
778 sm = "Symbol Math",
779 sc = "Symbol Currency",
780 sk = "Symbol Modifier",
781 so = "Symbol Other",
782 zs = "Separator Space",
783 zl = "Separator Line",
784 zp = "Separator Paragraph",
785 cc = "Other Control",
786 cf = "Other Format",
787 cs = "Other Surrogate",
788 co = "Other Private Use",
789 cn = "Other Not Assigned",
790}
791
792local detailtags = allocate {
793 sl = "small letter",
794 bl = "big letter",
795 im = "iteration mark",
796 pm = "prolonged sound mark"
797}
798
799characters.categorytags = categorytags
800characters.detailtags = detailtags
801
802
803
804
805
806
807local is_character = allocate ( tohash {
808 "lu","ll","lt","lm","lo",
809 "nd","nl","no",
810 "mn",
811 "nl","no",
812 "pc","pd","ps","pe","pi","pf","po",
813 "sm","sc","sk","so"
814} )
815
816local is_letter = allocate ( tohash {
817 "ll","lm","lo","lt","lu"
818} )
819
820local is_command = allocate ( tohash {
821 "cf","zs"
822} )
823
824local is_spacing = allocate ( tohash {
825 "zs", "zl","zp",
826} )
827
828local is_mark = allocate ( tohash {
829 "mn", "ms",
830} )
831
832local is_punctuation = allocate ( tohash {
833 "pc", "pd", "ps", "pe", "pi", "pf", "po",
834} )
835
836local is_hyphenator = allocate ( tohash {
837 "pd",
838} )
839
840local is_symbol = allocate ( tohash {
841 "sm", "sc", "sk", "so",
842} )
843
844local is_nothing = allocate ( tohash {
845 "pc", "pd", "ps", "pe", "pi", "pf", "po",
846 "zs", "zl","zp",
847} )
848
849local can_have_space = allocate ( tohash {
850 "lu", "ll", "lt", "lm", "lo",
851
852 "nd", "nl", "no",
853 "ps", "pi",
854
855
856 "sm", "sc", "sk", "so",
857
858
859} )
860
861
862
863
864characters.is_character = is_character
865characters.is_letter = is_letter
866characters.is_command = is_command
867characters.is_spacing = is_spacing
868characters.is_mark = is_mark
869characters.is_punctuation = is_punctuation
870characters.is_hyphenator = is_hyphenator
871characters.is_symbol = is_symbol
872characters.is_nothing = is_nothing
873characters.can_have_space = can_have_space
874
875local mti = function(t,k)
876 if type(k) == "number" then
877 local c = data[k].category
878 return c and rawget(t,c)
879 else
880
881 end
882end
883
884setmetatableindex(characters.is_character, mti)
885setmetatableindex(characters.is_letter, mti)
886setmetatableindex(characters.is_command, mti)
887setmetatableindex(characters.is_spacing, mti)
888setmetatableindex(characters.is_punctuation, mti)
889setmetatableindex(characters.is_hyphenator, mti)
890setmetatableindex(characters.is_symbol, mti)
891setmetatableindex(characters.can_have_space, mti)
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909characters.linebreaks = allocate {
910
911
912
913 ["bk"] = "mandatory break",
914 ["cr"] = "carriage return",
915 ["lf"] = "line feed",
916 ["cm"] = "combining mark",
917 ["nl"] = "next line",
918 ["sg"] = "surrogate",
919 ["wj"] = "word joiner",
920 ["zw"] = "zero width space",
921 ["gl"] = "non-breaking (glue)",
922 ["sp"] = "space",
923 ["zwj"] = "zero width joiner",
924
925
926
927 ["b2"] = "break opportunity before and after",
928 ["ba"] = "break after",
929 ["bb"] = "break before",
930 ["hy"] = "hyphen",
931 ["cb"] = "contingent break opportunity",
932
933
934
935 ["cl"] = "close punctuation",
936 ["cp"] = "close parenthesis",
937 ["ex"] = "exclamation/interrogation",
938 ["in"] = "inseparable",
939 ["ns"] = "nonstarter",
940 ["op"] = "open punctuation",
941 ["qu"] = "quotation",
942
943
944
945 ["is"] = "infix numeric separator",
946 ["nu"] = "numeric",
947 ["po"] = "postfix numeric",
948 ["pr"] = "prefix numeric",
949 ["sy"] = "symbols allowing break after",
950
951
952
953 ["ai"] = "ambiguous (alphabetic or ideographic)",
954 ["ak"] = "aksara",
955 ["al"] = "alphabetic",
956 ["ap"] = "aksara pre-pase",
957 ["as"] = "ksara start",
958 ["cj"] = "conditional japanese starter",
959 ["eb"] = "emoji base",
960 ["em"] = "emoji modifier",
961 ["h2"] = "hangul lv syllable",
962 ["h3"] = "hangul lvt syllable",
963 ["hl"] = "hebrew letter",
964 ["id"] = "ideographic",
965 ["jl"] = "hangul l jamo",
966 ["jt"] = "hangul t jamo",
967 ["jv"] = "hangul v jamo",
968 ["ri"] = "regional indicator",
969 ["sa"] = "complex context dependent (south east asian)",
970 ["vf"] = "virama final",
971 ["vi"] = "virama",
972 ["xx"] = "unknown",
973
974}
975
976
977
978
979
980characters.bidi = allocate {
981 l = "Left-to-Right",
982 lre = "Left-to-Right Embedding",
983 lro = "Left-to-Right Override",
984 r = "Right-to-Left",
985 al = "Right-to-Left Arabic",
986 rle = "Right-to-Left Embedding",
987 rlo = "Right-to-Left Override",
988 pdf = "Pop Directional Format",
989 en = "European Number",
990 es = "European Number Separator",
991 et = "European Number Terminator",
992 an = "Arabic Number",
993 cs = "Common Number Separator",
994 nsm = "Non-Spacing Mark",
995 bn = "Boundary Neutral",
996 b = "Paragraph Separator",
997 s = "Segment Separator",
998 ws = "Whitespace",
999 on = "Other Neutrals",
1000}
1001
1002
1003
1004
1005if not characters.fallbacks then
1006
1007 characters.fallbacks = allocate {
1008 [0x0308] = 0x00A8, [0x00A8] = 0x0308,
1009 [0x0304] = 0x00AF, [0x00AF] = 0x0304,
1010 [0x0301] = 0x00B4, [0x00B4] = 0x0301,
1011 [0x0327] = 0x00B8, [0x00B8] = 0x0327,
1012 [0x0302] = 0x02C6, [0x02C6] = 0x0302,
1013 [0x030C] = 0x02C7, [0x02C7] = 0x030C,
1014 [0x0306] = 0x02D8, [0x02D8] = 0x0306,
1015 [0x0307] = 0x02D9, [0x02D9] = 0x0307,
1016 [0x030A] = 0x02DA, [0x02DA] = 0x030A,
1017 [0x0328] = 0x02DB, [0x02DB] = 0x0328,
1018 [0x0303] = 0x02DC, [0x02DC] = 0x0303,
1019 [0x030B] = 0x02DD, [0x02DD] = 0x030B,
1020 [0x0305] = 0x203E, [0x203E] = 0x0305,
1021 [0x0300] = 0x0060, [0x0060] = 0x0333,
1022 }
1023
1024
1025
1026
1027
1028
1029end
1030
1031if storage then
1032 storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks")
1033end
1034
1035characters.directions = { }
1036
1037setmetatableindex(characters.directions,function(t,k)
1038 local d = data[k]
1039 if d then
1040 local v = d.direction
1041 if v then
1042 t[k] = v
1043 return v
1044 end
1045 end
1046 t[k] = false
1047 return false
1048end)
1049
1050characters.mirrors = { }
1051
1052setmetatableindex(characters.mirrors,function(t,k)
1053 local d = data[k]
1054 if d then
1055 local v = d.mirror
1056 if v then
1057 t[k] = v
1058 return v
1059 end
1060 end
1061 t[k] = false
1062 return false
1063end)
1064
1065characters.textclasses = { }
1066
1067setmetatableindex(characters.textclasses,function(t,k)
1068 local d = data[k]
1069 if d then
1070 local v = d.textclass
1071 if v then
1072 t[k] = v
1073 return v
1074 end
1075 end
1076 t[k] = false
1077 return false
1078end)
1079
1080
1081
1082
1083
1084
1085function characters.contextname(n) return data[n] and data[n].contextname or "" end
1086function characters.adobename (n) return data[n] and data[n].adobename or "" end
1087function characters.description(n) return data[n] and data[n].description or "" end
1088
1089
1090function characters.category(n,verbose)
1091 local c = data[n].category
1092 if not c then
1093 return ""
1094 elseif verbose then
1095 return categorytags[c]
1096 else
1097 return c
1098 end
1099end
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111local function toutfstring(s)
1112 if type(s) == "table" then
1113 return utfchar(unpack(s))
1114 else
1115 return utfchar(s)
1116 end
1117end
1118
1119utf.tostring = toutfstring
1120
1121local categories = allocate() characters.categories = categories
1122
1123setmetatableindex(categories, function(t,u) if u then local c = data[u] c = c and c.category or u t[u] = c return c end end)
1124
1125
1126
1127
1128local lccodes = allocate() characters.lccodes = lccodes
1129local uccodes = allocate() characters.uccodes = uccodes
1130local shcodes = allocate() characters.shcodes = shcodes
1131local fscodes = allocate() characters.fscodes = fscodes
1132
1133setmetatableindex(lccodes, function(t,u) if u then local c = data[u] c = c and c.lccode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1134setmetatableindex(uccodes, function(t,u) if u then local c = data[u] c = c and c.uccode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1135setmetatableindex(shcodes, function(t,u) if u then local c = data[u] c = c and c.shcode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1136setmetatableindex(fscodes, function(t,u) if u then local c = data[u] c = c and c.fscode or (type(u) == "string" and utfbyte(u)) or u t[u] = c return c end end)
1137
1138local lcchars = allocate() characters.lcchars = lcchars
1139local ucchars = allocate() characters.ucchars = ucchars
1140local shchars = allocate() characters.shchars = shchars
1141local fschars = allocate() characters.fschars = fschars
1142
1143setmetatableindex(lcchars, function(t,u) if u then local c = data[u] c = c and c.lccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1144setmetatableindex(ucchars, function(t,u) if u then local c = data[u] c = c and c.uccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1145setmetatableindex(shchars, function(t,u) if u then local c = data[u] c = c and c.shcode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1146setmetatableindex(fschars, function(t,u) if u then local c = data[u] c = c and c.fscode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end)
1147
1148local decomposed = allocate() characters.decomposed = decomposed
1149local specials = allocate() characters.specials = specials
1150
1151setmetatableindex(decomposed, function(t,u)
1152 if u then
1153 local c = data[u]
1154 local s = c and c.decomposed or false
1155 t[u] = s
1156 return s
1157 end
1158end)
1159
1160setmetatableindex(specials, function(t,u)
1161 if u then
1162 local c = data[u]
1163 local s = c and c.specials or false
1164 t[u] = s
1165 return s
1166 end
1167end)
1168
1169local specialchars = allocate() characters.specialchars = specialchars
1170local descriptions = allocate() characters.descriptions = descriptions
1171local synonyms = allocate() characters.synonyms = synonyms
1172
1173setmetatableindex(specialchars, function(t,u)
1174 if u then
1175 local c = data[u]
1176 local s = c and c.specials
1177 if s then
1178 local tt = { }
1179 local ttn = 0
1180 for i=2,#s do
1181 local si = s[i]
1182 local c = data[si]
1183 if is_letter[c.category] then
1184 ttn = ttn + 1
1185 tt[ttn] = utfchar(si)
1186 end
1187 end
1188 c = concat(tt)
1189 t[u] = c
1190 return c
1191 else
1192 if type(u) == "number" then
1193 u = utfchar(u)
1194 end
1195 t[u] = u
1196 return u
1197 end
1198 end
1199end)
1200
1201setmetatableindex(descriptions, function(t,k)
1202
1203 for u, c in next, data do
1204 local d = c.description
1205 if d then
1206 if find(d," ",1,true) then
1207 d = gsub(d," ","")
1208 end
1209 d = lower(d)
1210 t[d] = u
1211 end
1212 end
1213 local d = rawget(t,k)
1214 if not d then
1215 t[k] = k
1216 end
1217 return d
1218end)
1219
1220setmetatableindex(synonyms, function(t,k)
1221 for u, c in next, data do
1222 local s = c.synonyms
1223 if s then
1224 if find(s," ",1,true) then
1225 s = gsub(s," ","")
1226 end
1227
1228 t[s] = u
1229 end
1230 end
1231 local s = rawget(t,k)
1232 if not s then
1233 t[s] = s
1234 end
1235 return s
1236end)
1237
1238function characters.unicodechar(asked)
1239 local n = tonumber(asked)
1240 if n then
1241 return n
1242 elseif type(asked) == "string" then
1243 return descriptions[asked] or descriptions[gsub(asked," ","")]
1244 end
1245end
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278local tolower = Cs((utf8character/lcchars)^0)
1279local toupper = Cs((utf8character/ucchars)^0)
1280local toshape = Cs((utf8character/shchars)^0)
1281
1282lpegpatterns.tolower = tolower
1283lpegpatterns.toupper = toupper
1284lpegpatterns.toshape = toshape
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300if not characters.splits then
1301
1302 local char = allocate()
1303 local compat = allocate()
1304
1305 local splits = {
1306 char = char,
1307 compat = compat,
1308 }
1309
1310 characters.splits = splits
1311
1312
1313
1314
1315 for unicode, data in next, characters.data do
1316 local specials = data.specials
1317 if specials and #specials > 2 then
1318 local kind = specials[1]
1319 if kind == "compat" then
1320 compat[unicode] = { unpack(specials,2) }
1321 elseif kind == "char" or kind == "with" then
1322 char [unicode] = { unpack(specials,2) }
1323 end
1324 end
1325 end
1326
1327 if storage then
1328 storage.register("characters/splits", splits, "characters.splits")
1329 end
1330
1331end
1332
1333if not characters.lhash then
1334
1335 local lhash = allocate() characters.lhash = lhash
1336 local uhash = allocate() characters.uhash = uhash
1337 local shash = allocate() characters.shash = shash
1338
1339 for k, v in next, characters.data do
1340
1341 local l = v.lccode
1342 if l then
1343
1344 if type(l) == "number" then
1345 lhash[utfchar(k)] = utfchar(l)
1346 elseif #l == 2 then
1347 lhash[utfchar(k)] = utfchar(l[1]) .. utfchar(l[2])
1348
1349
1350 end
1351 else
1352 local u = v.uccode
1353 if u then
1354
1355 if type(u) == "number" then
1356 uhash[utfchar(k)] = utfchar(u)
1357 elseif #u == 2 then
1358 uhash[utfchar(k)] = utfchar(u[1]) .. utfchar(u[2])
1359
1360
1361 end
1362 end
1363 end
1364 local s = v.shcode
1365 if s then
1366 if type(s) == "number" then
1367 shash[utfchar(k)] = utfchar(s)
1368 elseif #s == 2 then
1369 shash[utfchar(k)] = utfchar(s[1]) .. utfchar(s[2])
1370
1371
1372 end
1373 end
1374
1375 end
1376
1377 if storage then
1378 storage.register("characters/lhash", lhash, "characters.lhash")
1379 storage.register("characters/uhash", uhash, "characters.uhash")
1380 storage.register("characters/shash", shash, "characters.shash")
1381 end
1382
1383end
1384
1385local lhash = characters.lhash mark(lhash)
1386local uhash = characters.uhash mark(uhash)
1387local shash = characters.shash mark(shash)
1388
1389local utf8lowercharacter = utfchartabletopattern(lhash) / lhash
1390local utf8uppercharacter = utfchartabletopattern(uhash) / uhash
1391local utf8shapecharacter = utfchartabletopattern(shash) / shash
1392
1393local utf8lower = Cs((utf8lowercharacter + utf8character)^0)
1394local utf8upper = Cs((utf8uppercharacter + utf8character)^0)
1395local utf8shape = Cs((utf8shapecharacter + utf8character)^0)
1396
1397lpegpatterns.utf8lowercharacter = utf8lowercharacter
1398lpegpatterns.utf8uppercharacter = utf8uppercharacter
1399lpegpatterns.utf8shapecharacter = utf8shapecharacter
1400
1401lpegpatterns.utf8lower = utf8lower
1402lpegpatterns.utf8upper = utf8upper
1403lpegpatterns.utf8shape = utf8shape
1404
1405function characters.lower (str) return str and lpegmatch(utf8lower,str) or "" end
1406function characters.upper (str) return str and lpegmatch(utf8upper,str) or "" end
1407function characters.shaped(str) return str and lpegmatch(utf8shape,str) or "" end
1408
1409lpeg.setutfcasers(characters.lower,characters.upper)
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445function characters.lettered(str,spacing)
1446 local new, n = { }, 0
1447 if spacing then
1448 local done = false
1449 for u in utfvalues(str) do
1450 local c = data[u].category
1451 if is_letter[c] then
1452 if done and n > 1 then
1453 n = n + 1
1454 new[n] = " "
1455 done = false
1456 end
1457 n = n + 1
1458 new[n] = utfchar(u)
1459 elseif spacing and is_spacing[c] then
1460 done = true
1461 end
1462 end
1463 else
1464 for u in utfvalues(str) do
1465 if is_letter[data[u].category] then
1466 n = n + 1
1467 new[n] = utfchar(u)
1468 end
1469 end
1470 end
1471 return concat(new)
1472end
1473
1474
1475
1476function characters.uccode(n) return uccodes[n] end
1477function characters.lccode(n) return lccodes[n] end
1478
1479function characters.shape(n)
1480 local shcode = shcodes[n]
1481 if not shcode then
1482 return n, nil
1483 elseif type(shcode) == "table" then
1484 return shcode[1], shcode[#shcode]
1485 else
1486 return shcode, nil
1487 end
1488end
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564if not characters.superscripts then
1565
1566 local superscripts = allocate() characters.superscripts = superscripts
1567 local subscripts = allocate() characters.subscripts = subscripts
1568 local fractions = allocate() characters.fractions = fractions
1569
1570
1571
1572 for k, v in next, data do
1573 local specials = v.specials
1574 if specials then
1575 local what = specials[1]
1576 if what == "super" then
1577 if #specials == 2 then
1578 superscripts[k] = specials[2]
1579 elseif trace_defining then
1580 report_defining("ignoring %s %a, char %c, description %a","superscript",ustring(k),k,v.description)
1581 end
1582 elseif what == "sub" then
1583 if #specials == 2 then
1584 subscripts[k] = specials[2]
1585 elseif trace_defining then
1586 report_defining("ignoring %s %a, char %c, description %a","subscript",ustring(k),k,v.description)
1587 end
1588 elseif what == "fraction" then
1589 if #specials > 1 then
1590 fractions[k] = { unpack(specials,2) }
1591 elseif trace_defining then
1592 report_defining("ignoring %s %a, char %c, description %a","fraction",ustring(k),k,v.description)
1593 end
1594 end
1595 end
1596 end
1597
1598
1599
1600
1601
1602 if storage then
1603 storage.register("characters/superscripts", superscripts, "characters.superscripts")
1604 storage.register("characters/subscripts", subscripts, "characters.subscripts")
1605 storage.register("characters/fractions", fractions, "characters.fractions")
1606 end
1607
1608end
1609
1610function characters.showstring(str)
1611 local list = utotable(str)
1612 for i=1,#list do
1613 report_defining("split % 3i : %C",i,list[i])
1614 end
1615end
1616
1617do
1618
1619
1620
1621 local any = P(1)
1622 local special = S([['".,:;-+()]])
1623 + P('“') + P('”')
1624 local apostrofe = P("’") + P("'")
1625
1626 local pattern = Cs ( (
1627 (P("medium light") / "medium-light" + P("medium dark") / "medium-dark") * P(" skin tone")
1628 + (apostrofe * P("s"))/""
1629 + special/""
1630 + any
1631 )^1)
1632
1633 local function load()
1634 local name = resolvers.findfile("char-emj.lua")
1635 local data = name and name ~= "" and dofile(name) or { }
1636 local hash = { }
1637 for d, c in next, data do
1638 local k = lpegmatch(pattern,d) or d
1639 local u = { }
1640 for i=1,#c do
1641 u[i] = utfchar(c[i])
1642 end
1643 u = concat(u)
1644 hash[k] = u
1645 end
1646 return data, hash
1647 end
1648
1649 local data, hash = nil, nil
1650
1651 function characters.emojized(name)
1652 local t = lpegmatch(pattern,name)
1653 if t then
1654 return t
1655 else
1656 return { name }
1657 end
1658 end
1659
1660 local start = P(" ")
1661 local finish = P(-1) + P(" ")
1662 local skintone = P("medium ")^0 * (P("light ") + P("dark "))^0 * P("skin tone")
1663 local gender = P("woman") + P("man")
1664 local expanded = (
1665 P("m-l-")/"medium-light"
1666 + P("m-d-")/"medium-dark"
1667 + P("l-") /"light"
1668 + P("m-") /"medium"
1669 + P("d-") /"dark"
1670 )
1671 * (P("s-t")/" skin tone")
1672 local compacted = (
1673 (P("medium-")/"m-" * (P("light")/"l" + P("dark")/"d"))
1674 + (P("medium")/"m" + P("light")/"l" + P("dark")/"d")
1675 )
1676 * (P(" skin tone")/"-s-t")
1677
1678 local pattern_0 = Cs((expanded + any)^1)
1679 local pattern_1 = Cs(((start * skintone + skintone * finish)/"" + any)^1)
1680 local pattern_2 = Cs(((start * gender + gender * finish)/"" + any)^1)
1681 local pattern_4 = Cs((compacted + any)^1)
1682
1683
1684
1685
1686
1687
1688
1689 local skin =
1690 P("light skin tone") / utfchar(0x1F3FB)
1691 + P("medium-light skin tone") / utfchar(0x1F3FC)
1692 + P("medium skin tone") / utfchar(0x1F3FD)
1693 + P("medium-dark skin tone") / utfchar(0x1F3FE)
1694 + P("dark skin tone") / utfchar(0x1F3FF)
1695
1696 local parent =
1697 P("man") / utfchar(0x1F468)
1698 + P("woman") / utfchar(0x1F469)
1699
1700 local child =
1701 P("baby") / utfchar(0x1F476)
1702 + P("boy") / utfchar(0x1F466)
1703 + P("girl") / utfchar(0x1F467)
1704
1705 local zwj = utfchar(0x200D)
1706 local heart = utfchar(0x2764) .. utfchar(0xFE0F) .. zwj
1707 local kiss = utfchar(0x2764) .. utfchar(0xFE0F) .. utfchar(0x200D) .. utfchar(0x1F48B) .. zwj
1708
1709
1710
1711 local space = P(" ")
1712 local final = P(-1)
1713
1714 local p_done = (space^1/zwj) + P(-1)
1715 local p_rest = space/"" * (skin * p_done) + p_done
1716 local p_parent = parent * p_rest
1717 local p_child = child * p_rest
1718
1719 local p_family = Cs ( (P("family") * space^1)/"" * p_parent^-2 * p_child^-2 )
1720 local p_couple = Cs ( (P("couple with heart") * space^1)/"" * p_parent * Cc(heart) * p_parent )
1721 local p_kiss = Cs ( (P("kiss") * space^1)/"" * p_parent * Cc(kiss) * p_parent )
1722
1723 local p_special = p_family + p_couple + p_kiss
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738 local emoji = { }
1739 characters.emoji = emoji
1740
1741local cache = setmetatable({ }, { __mode = "k" } )
1742
1743 function emoji.resolve(name)
1744 if not hash then
1745 data, hash = load()
1746 end
1747 local h = hash[name]
1748 if h then
1749 return h
1750 end
1751 local h = cache[name]
1752 if h then
1753 return h
1754 elseif h == false then
1755 return
1756 end
1757
1758 local name = lpegmatch(pattern_0,name) or name
1759
1760 local h = lpegmatch(p_special,name)
1761 if h then
1762 cache[name] = h
1763 return h
1764 end
1765
1766 local s = lpegmatch(pattern_1,name)
1767 local h = hash[s]
1768 if h then
1769 cache[name] = h
1770 return h
1771 end
1772
1773 local s = lpegmatch(pattern_2,name)
1774 local h = hash[s]
1775 if h then
1776 cache[name] = h
1777 return h
1778 end
1779 cache[name] = false
1780 end
1781
1782 function emoji.known()
1783 if not hash then
1784 data, hash = load()
1785 end
1786 return hash, data
1787 end
1788
1789 function emoji.compact(name)
1790 return lpegmatch(pattern_4,name) or name
1791 end
1792
1793end
1794
1795
1796
1797return characters
1798 |