1if not modules then modules = { } end modules ['mtx-unicode'] = {
2 version = 1.002,
3 comment = "companion to mtxrun.lua",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70local helpinfo = [[
71<?xml version="1.0"?>
72<application>
73 <metadata>
74 <entry name="name">mtx-unicode</entry>
75 <entry name="detail">Checker for char-dat.lua</entry>
76 <entry name="version">1.02</entry>
77 </metadata>
78 <flags>
79 <category name="basic">
80 <subcategory>
81 <flag name="whatever"><short>do whatever</short></flag>
82 </subcategory>
83 </category>
84 </flags>
85</application>
86]]
87
88local application = logs.application {
89 name = "mtx-unicode",
90 banner = "Checker for char-def.lua 1.02",
91 helpinfo = helpinfo,
92}
93
94local gmatch, match, gsub, find, lower, upper, format = string.gmatch, string.match, string.gsub, string.find, string.lower, string.upper, string.format
95local concat, sort, sortedhash = table.concat, table.sort, table.sortedhash
96local split, splitlines, strip = string.split, string.splitlines, string.strip
97local are_equal = table.are_equal
98local tonumber, tostring, rawget = tonumber, tostring, rawget
99local lpegmatch = lpeg.match
100local P, C, S, R, Cs, Ct, Cg, Cf, Cc = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cg, lpeg.Cf, lpeg.Cc
101local formatters = string.formatters
102local utfchar = utf.char
103
104local report = application.report
105
106scripts = scripts or { }
107scripts.unicode = scripts.unicode or { }
108
109characters = characters or { }
110characters.data = characters.data or { }
111
112fonts = fonts or { }
113fonts.encodings = fonts.encodings or { }
114
115local textfiles = { }
116local textdata = { }
117
118local sparse = false
119
120local split_space_table = lpeg.tsplitat(" ")
121local split_space_two = lpeg.splitat (" ")
122local split_range_two = lpeg.splitat ("..")
123local split_colon_table = lpeg.tsplitat(P(" ")^0 * P(";") * P(" ")^0)
124
125local skipped = {
126 [0x002C6] = true,
127 [0x002C7] = true,
128}
129
130for i=0x0FE00,0x0FE0F do skipped[i] = true end
131for i=0xE0100,0xE01EF do skipped[i] = true end
132
133function scripts.unicode.update()
134 local unicodedata = texttables.unicodedata
135 local bidimirroring = texttables.bidimirroring
136 local linebreak = texttables.linebreak
137 local eastasianwidth = texttables.eastasianwidth
138 local standardizedvariants = texttables.standardizedvariants
139 local arabicshaping = texttables.arabicshaping
140 local casefolding = texttables.casefolding
141 local index = texttables.index
142 local characterdata = characters.data
143
144 local descriptions = { }
145
146 for unicode, ud in table.sortedpairs(unicodedata) do
147 if not skipped[unicode] then
148 local char = rawget(characterdata,unicode)
149 local description = ud[2] or formatters["UNICODE ENTRY %U"](unicode)
150 if not find(description,"^<") then
151 local ld = linebreak[unicode]
152 local bd = bidimirroring[unicode]
153 local ed = eastasianwidth[unicode]
154 local category = lower(ud[3] or "?")
155 local combining = tonumber(ud[4])
156 local direction = lower(ud[5] or "l")
157 local linebreak = ld and lower(ld[2] or "xx")
158 local specials = ud[6] or ""
159 local cjkwd = ed and lower(ed[2] or "n")
160 local mirror = bd and tonumber(bd[2],16)
161 local arabic = nil
162 local lccode = false
163 local uccode = false
164 descriptions[description] = unicode
165 if sparse and direction == "l" then
166 direction = nil
167 end
168 if linebreak == "xx" then
169 linebreak = nil
170 end
171 if specials == "" then
172 specials = nil
173 else
174 specials = lpegmatch(split_space_table,specials)
175 if tonumber(specials[1],16) then
176 for i=#specials,1,-1 do
177 specials[i+1] = tonumber(specials[i],16)
178 end
179 specials[1] = "char"
180 else
181 specials[1] = lower(gsub(specials[1],"[<>]",""))
182 for i=2,#specials do
183 specials[i] = tonumber(specials[i],16)
184 end
185 end
186 end
187 if cjkwd == "n" then
188 cjkwd = nil
189 end
190 local comment
191 if find(description,"MATHEMATICAL") then
192 comment = "check math properties"
193 end
194
195 local as = arabicshaping[unicode]
196 if as then
197 arabic = lower(as[3])
198 end
199
200 if not combining or combining == 0 then
201 combining = nil
202 end
203
204 local cf = casefolding[unicode]
205 if cf and tonumber(cf[1],16) == unicode then
206 local how = cf[2]
207 if how == "C" or how == "S" then
208 local fold = tonumber(cf[3],16)
209 if fold == unicode then
210
211 elseif category == "ll" then
212 uccode = fold
213 elseif category == "lu" then
214 lccode = fold
215 end
216 elseif how == "F" then
217
218 local folding = { }
219 for s in gmatch(cf[3],"%S+") do
220 folding[#folding+1] = tonumber(s,16)
221 end
222 if category == "ll" then
223 uccode = folding
224 elseif category == "ul" then
225 lccode = folding
226 end
227 else
228
229
230
231 end
232 end
233
234
235
236
237 if not char then
238 report("%U : adding entry %a",unicode,description)
239 char = {
240
241 category = category,
242 comment = comment,
243 cjkwd = cjkwd,
244 description = description,
245 direction = direction,
246 mirror = mirror,
247 linebreak = linebreak,
248 unicodeslot = unicode,
249 specials = specials,
250 arabic = arabic,
251 combining = combining,
252 uccode = uccode and uccode or nil,
253 lccode = lccode and lccode or nil,
254 }
255 characterdata[unicode] = char
256 else
257
258 if lccode then
259 if type(lccode) == "table" then
260 if type(char.lccode) ~= "table" or not are_equal(lccode,char.lccode) then
261 report("%U : setting lccode to % t, %a",unicode,lccode,description)
262 char.lccode = lccode
263 end
264 elseif char.lccode ~= lccode then
265 report("%U : setting lccode to %a, %a, %a",unicode,lccode,description)
266 char.lccode = lccode
267 end
268 end
269 if uccode then
270 if type(uccode) == "table" then
271 if type(char.uccode) ~= "table" or not are_equal(uccode,char.uccode) then
272 report("%U : setting uccode to % t, %a",unicode,uccode,description)
273 char.uccode = uccode
274 end
275 elseif char.uccode ~= uccode then
276 report("%U : setting uccode to %a, %a",unicode,uccode,description)
277 char.uccode = uccode
278 end
279 end
280 if direction then
281 if char.direction ~= direction then
282 report("%U : setting direction to %a, %a",unicode,direction,description)
283 char.direction = direction
284 end
285 else
286 if char.direction then
287 report("%U : resetting direction from %a, %a",unicode,char.direction,description)
288 char.direction = nil
289 end
290 end
291 if mirror then
292 if mirror ~= char.mirror then
293 report("%U : setting mirror to %a, %a",unicode,mirror,description)
294 char.mirror = mirror
295 end
296 else
297 if char.mirror then
298 report("%U : resetting mirror from %a, %a",unicode,char.mirror,description)
299 char.mirror = nil
300 end
301 end
302 if linebreak then
303 if linebreak ~= char.linebreak then
304 report("%U : setting linebreak to %a, %a",unicode,linebreak,description)
305 char.linebreak = linebreak
306 end
307 else
308 if char.linebreak then
309 report("%U : resetting linebreak from %a, %a",unicode,char.linebreak,description)
310 char.linebreak = nil
311 end
312 end
313 if cjkwd then
314 if cjkwd ~= char.cjkwd then
315 report("%U : setting cjkwd of to %a, %a",unicode,cjkwd,description)
316 char.cjkwd = cjkwd
317 end
318 else
319 if char.cjkwd then
320 report("%U : resetting cjkwd of from %a, %a",unicode,char.cjkwd,description)
321 char.cjkwd = nil
322 end
323 end
324 if arabic then
325 if arabic ~= char.arabic then
326 report("%U : setting arabic to %a, %a",unicode,arabic,description)
327 char.arabic = arabic
328 end
329 else
330 if char.arabic then
331 report("%U : resetting arabic from %a, %a",unicode,char.arabic,description)
332 char.arabic = nil
333 end
334 end
335 if combining then
336 if combining ~= char.combining then
337 report("%U : setting combining to %a, %a",unicode,combining,description)
338 char.combining = combining
339 end
340 else
341 if char.combining then
342 report("%U : resetting combining from %a, %a",unicode,char.combining,description)
343 end
344 end
345 if specials then
346 if not char.specials or not are_equal(specials,char.specials) then
347 local t = { specials[1] } for i=2,#specials do t[i] = formatters["%U"](specials[i]) end
348 report("%U : setting specials to % + t, %a",unicode,t,description)
349 char.specials = specials
350 end
351 else
352 local specials = char.specials
353 if specials then
354
355 local t = { } for i=2,#specials do t[i] = formatters["%U"](specials[i]) end
356 if false then
357 char.comment = nil
358 report("%U : resetting specials from % + t, %a",unicode,t,description)
359 else
360 local comment = char.comment
361 if not comment then
362 char.comment = "check special"
363 elseif not find(comment,"check special") then
364 char.comment = comment .. ", check special"
365 end
366
367 end
368 end
369 end
370 end
371
372 local visual = char.visual
373 if not visual and find(description,"MATH") then
374 if find(description,"BOLD ITALIC") then
375 visual = "bi"
376 elseif find(description,"ITALIC") then
377 visual = "it"
378 elseif find(description,"BOLD") then
379 visual = "bf"
380 end
381 if visual then
382 report("%U : setting visual to %a, %a",unicode,visual,description)
383 char.visual = visual
384 end
385 end
386
387 if category == "sm" or (category == "so" and char.mathclass) then
388 local mathextensible = char.mathextensible
389 if mathextensible then
390
391 elseif find(description,"ABOVE") then
392
393 elseif find(description,"ARROWHEAD") then
394
395 elseif find(description,"HALFWIDTH") then
396
397 elseif find(description,"ANGLE") then
398
399 elseif find(description,"THROUGH") then
400
401 elseif find(description,"ARROW") then
402
403 local u = find(description,"UP")
404 local d = find(description,"DOWN")
405 local l = find(description,"LEFT")
406 local r = find(description,"RIGHT")
407 if find(description,"ARROWHEAD") then
408
409 elseif find(description,"HALFWIDTH") then
410
411 elseif u and d then
412 if l or r then
413 mathextensible = 'm'
414 else
415 mathextensible = 'v'
416 end
417 elseif u then
418 if l or r then
419 mathextensible = 'm'
420 else
421 mathextensible = "u"
422 end
423 elseif d then
424 if l or r then
425 mathextensible = 'm'
426 else
427 mathextensible = "d"
428 end
429 elseif l and r then
430 mathextensible = "h"
431 elseif r then
432 mathextensible = "r"
433 elseif l then
434 mathextensible = "l"
435 end
436 if mathextensible then
437 report("%U : setting mathextensible to %a, %a",unicode,mathextensible,description)
438 char.mathextensible = mathextensible
439 end
440 end
441 end
442 end
443 end
444 end
445
446 if true then
447
448 for i=1,#characters.ranges do
449 local data = characters.ranges[i]
450 local common = rawget(data, "common")
451 if common then
452 for unicode=data.first,data.last do
453 local chardata = rawget(characterdata,unicode)
454 if chardata then
455 local same = true
456 for k, v in next, common do
457 if k == "description" then
458
459 elseif v == common[k] then
460
461 else
462 same = false
463 break
464 end
465 end
466 if same then
467 report("%U : wipe %s",unicode,chardata.description)
468 rawset(characterdata,unicode,nil)
469 else
470 report("%U : keep %s",unicode,chardata.description)
471 end
472 end
473 end
474 end
475 end
476 end
477
478 for unicode, data in sortedhash(characterdata) do
479 if not data.specials or data.comment and find(data.comment,"check special") then
480 local description = data.description
481 local b, m = match(description,"^(.+) WITH (.+)$")
482 if b and m and (find(b,"^LATIN") or find (b,"^CYRILLIC")) then
483 local base = descriptions[b]
484 local mark = descriptions[m]
485 if not mark and m == "STROKE" then
486 mark = descriptions["SOLIDUS"]
487 end
488 if base and mark then
489
490 data.specials = { "with", base, mark }
491 data.comment = nil
492 end
493 end
494 end
495 end
496
497 for i=1,#standardizedvariants do
498 local si = standardizedvariants[i]
499 local pair, addendum = si[1], strip(si[2])
500 local first, second = lpegmatch(split_space_two,pair)
501 first = tonumber(first,16)
502 second = tonumber(second,16)
503 if first then
504 local d = characterdata[first]
505 if d then
506
507 local v = rawget(d,"variants")
508 if not v then
509 v = { }
510 d.variants = v
511 end
512 if not v[second] then
513 report("%U : adding variant %U as %s, %a",first,second,addendum,d.description)
514 v[second] = addendum
515 end
516 end
517 end
518 end
519 for unicode, ud in table.sortedpairs(characterdata) do
520 if not rawget(ud,"category") and rawget(ud,"variants") then
521
522 characterdata[unicode] = nil
523 end
524 end
525end
526
527local preamble
528
529local function splitdefinition(filename,str,index)
530 local l = splitlines(str)
531 local t = { }
532 if index then
533 for i=1,#l do
534 local s = gsub(l[i]," *#.*$","")
535 if s ~= "" then
536 local d = lpegmatch(split_colon_table,s)
537 local o = d[1]
538 local u = tonumber(o,16)
539 if u then
540 t[u] = d
541 else
542
543 local b, e = lpegmatch(split_range_two,o)
544 if b and e then
545 b = tonumber(b,16)
546 e = tonumber(e,16)
547 for k=b,e do
548 t[k] = d
549 end
550 else
551 report("problem: %i %s => %s",i,filename,s)
552 end
553 end
554 end
555 end
556 else
557 local n = 0
558 for i=1,#l do
559 local s = gsub(l[i]," *#.*$","")
560 if s ~= "" then
561 n = n + 1
562 t[n] = lpegmatch(split_colon_table,s)
563 end
564 end
565 end
566 return t
567end
568
569local function splitindex(str)
570
571 local l = splitlines(str)
572 local n = { }
573 for i=1,#l do
574 local a, b, c = match(l[i],"([^%,]+)%,?(.-)\t(.*)")
575 if a and b and c then
576 local name = b .. " " .. a
577 name = strip(name)
578 name = gsub(name,"%s+"," ")
579 n[name] = tonumber(c,16)
580 end
581 end
582 return n
583end
584
585function scripts.unicode.load()
586 local fullname = resolvers.findfile("char-def.lua")
587 report("using: %s",fullname)
588 local data = io.loaddata(fullname)
589 if data then
590 loadstring(data)()
591
592 local fullname = resolvers.findfile("char-ini.lua")
593 report("using: %s",fullname)
594 dofile(fullname)
595
596 local fullname = resolvers.findfile("char-utf.lua")
597 report("using: %s",fullname)
598 dofile(fullname)
599
600 local fullname = resolvers.findfile("char-cjk.lua")
601 report("using: %s",fullname)
602 dofile(fullname)
603
604 local fullname = resolvers.findfile("char-ran.lua")
605 report("using: %s",fullname)
606 dofile(fullname)
607
608 preamble = gsub(data,"characters%.data%s*=%s*%{.*","")
609
610 textfiles = {
611 unicodedata = resolvers.findfile("unicodedata.txt") or "",
612 bidimirroring = resolvers.findfile("bidimirroring.txt") or "",
613 linebreak = resolvers.findfile("linebreak.txt") or "",
614 eastasianwidth = resolvers.findfile("eastasianwidth.txt") or "",
615 standardizedvariants = resolvers.findfile("standardizedvariants.txt") or "",
616 arabicshaping = resolvers.findfile("arabicshaping.txt") or "",
617 casefolding = resolvers.findfile("casefolding.txt") or "",
618 index = resolvers.findfile("index.txt") or "",
619 }
620
621 textdata = {
622 unicodedata = textfiles.unicodedata ~= "" and io.loaddata(textfiles.unicodedata) or "",
623 bidimirroring = textfiles.bidimirroring ~= "" and io.loaddata(textfiles.bidimirroring) or "",
624 linebreak = textfiles.linebreak ~= "" and io.loaddata(textfiles.linebreak) or "",
625 eastasianwidth = textfiles.eastasianwidth ~= "" and io.loaddata(textfiles.eastasianwidth) or "",
626 standardizedvariants = textfiles.standardizedvariants ~= "" and io.loaddata(textfiles.standardizedvariants) or "",
627 arabicshaping = textfiles.arabicshaping ~= "" and io.loaddata(textfiles.arabicshaping) or "",
628 casefolding = textfiles.casefolding ~= "" and io.loaddata(textfiles.casefolding) or "",
629 index = textfiles.index ~= "" and io.loaddata(textfiles.index) or "",
630 }
631 texttables = {
632 unicodedata = splitdefinition(textfiles.unicodedata,textdata.unicodedata,true),
633 bidimirroring = splitdefinition(textfiles.bidimirroring,textdata.bidimirroring,true),
634 linebreak = splitdefinition(textfiles.linebreak,textdata.linebreak,true),
635 eastasianwidth = splitdefinition(textfiles.eastasianwidth,textdata.eastasianwidth,true),
636 standardizedvariants = splitdefinition(textfiles.standardizedvariants,textdata.standardizedvariants,false),
637 arabicshaping = splitdefinition(textfiles.arabicshaping,textdata.arabicshaping,true),
638 casefolding = splitdefinition(textfiles.casefolding,textdata.casefolding,true),
639 index = splitindex(textdata.index),
640 }
641
642 for k, v in sortedhash(textfiles) do
643 report("using: %s",v)
644 end
645 return true
646 else
647 preamble = nil
648 return false
649 end
650end
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704function scripts.unicode.save(filename)
705 if preamble then
706
707
708
709
710 characters.data[0x1FE3].uccode={ 0x3C5, 0x308, 0x301 }
711 characters.data[0x1FD3].uccode={ 0x3B9, 0x308, 0x301 }
712 characters.data[0x00DF].uccode={ 0x53, 0x53 }
713
714 local data = table.serialize(characters.data,"characters.data", {
715 hexify = true,
716 noquotes = true,
717 })
718 data = gsub(data,
719 "%{%s+%[0xFE0E%]=\"text style\",%s+%[0xFE0F%]=\"emoji style\",%s+%}",
720 "variants_emoji"
721 )
722 data = gsub(data,
723 "%{%s+%[0xFE00%]=\"corner%-justified form\",%s+%[0xFE01%]=\"centered form\",%s+%}",
724 "variants_forms"
725 )
726 data = gsub(data,
727 "%{%s+%[0xFE00%]=\"chancery style\",%s+%[0xFE01%]=\"roundhand style\",%s+%}",
728 "variants_style"
729 )
730 data = gsub(data,
731 "%{%s+%[0xFE00%]=\"dotted form\",%s+%}",
732 "variants_dotted"
733 )
734 data = gsub(data,
735 "%{%s+%[0xFE00%]=\"expanded\",%s+%}",
736 "variants_expanded"
737 )
738 data = gsub(data,
739 "%{%s+%[0xFE0%d%]=\"rotated (%d+) degrees\",%s+%}",
740 "variants_%1"
741 )
742 data = gsub(data,
743 "%{%s+%[0xFE0%d%]=\"rotated (%d+) degrees\"," ..
744 "%s*%[0xFE0%d%]=\"rotated (%d+) degrees\"," ..
745 "%s+%}",
746 "variants_%1_%2"
747 )
748 data = gsub(data,
749 "%{%s+%[0xFE0%d%]=\"rotated (%d+) degrees\"," ..
750 "%s*%[0xFE0%d%]=\"rotated (%d+) degrees\"," ..
751 "%s*%[0xFE0%d%]=\"rotated (%d+) degrees\"," ..
752 "%s+%}",
753 "variants_%1_%2_%3"
754 )
755 io.savedata(filename,preamble .. data)
756 end
757end
758
759function scripts.unicode.extras()
760
761
762
763
764
765 local fullname = resolvers.findfile("blocks.txt") or ""
766 if fullname ~= "" then
767 local data = io.loaddata(fullname)
768 local lines = splitlines(data)
769 local map = { }
770 local blocks = characters.blocks
771 local result = { }
772 for i=1,#lines do
773 local line = gsub(lines[i]," *#.*$","")
774 if line ~= "" then
775 local specification = lpegmatch(split_colon_table,line)
776 local range = specification[1]
777 local description = specification[2]
778 if range and description then
779 local start, stop = lpegmatch(split_range_two,range)
780 if start and stop then
781 local start = tonumber(start,16)
782 local stop = tonumber(stop,16)
783 local name = gsub(lower(description),"[^a-z]+","")
784 if start and stop then
785 local b = blocks[name]
786 if not b then
787 result[#result+1] = formatters[ [[+ block: ["%s"] = { first = 0x%05X, last = 0x%05X, description = "%S" }]] ](name,start,stop,description)
788 blocks[name] = { first = start, last = stop, description = description }
789 elseif b.first ~= start or b.last ~= stop or b.description ~= description then
790 result[#result+1] = formatters[ [[? block: ["%s"] = { first = 0x%05X, last = 0x%05X, description = "%S" }]] ](name,start,stop,description)
791 end
792 end
793 map[#map+1] = name
794 end
795 end
796 end
797 end
798 sort(result)
799 for i=1,#result do
800 report(result[i])
801 end
802 sort(map)
803 for i=1,#map do
804 local m = map[i]
805 if not blocks[m] then
806 report("obsolete block %a",m)
807 end
808 end
809 end
810
811 local index = texttables.index
812 local blocks = characters.blocks
813 local data = characters.data
814 for k, v in next, index do
815 if k ~= lower(k) then
816 index[k] = nil
817 end
818 end
819
820
821
822
823 for k, v in sortedhash(index) do
824 local d = data[v]
825 if d and d.description ~= upper(k) then
826 local synonyms = d.synonyms
827 if synonyms then
828 local n = #synonyms
829 local f = false
830 for i=1,n do
831 if synonyms[i] == k then
832 f = true
833 break
834 end
835 end
836 if not f then
837 synonyms[n+1] = k
838 end
839
840
841 sort(synonyms)
842 else
843 d.synonyms = { k }
844 end
845 end
846 end
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894end
895
896do
897
898 local space = P(" ")
899 local spaces = space^0
900 local semicolon = P(";")
901 local hash = P("#")
902 local newline = S("\n\r")
903
904 local unicode = Cs(R("09","AF")^1)/function(n) return tonumber(n,16) end
905 * spaces
906 local components = Ct (unicode^1)
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927 local rubish_a = semicolon
928 * spaces
929 * P("non-")^0 * P("fully-qualified")
930 * spaces
931 * hash
932 * spaces
933 local textstring = C((1 - space)^1)
934 * spaces
935 local description = ((1 - (spaces * newline))^1) / string.lower
936 local rubish_b = (1-newline)^0
937 * newline^1
938
939 local pattern_2 = Ct ( (
940 Cf ( Ct("") *
941 Cg (Cc("components") * components)
942 * rubish_a
943 * Cg (Cc("textstring") * textstring)
944 * Cg (Cc("description") * description )
945 * rubish_b
946 , rawset)
947 + P(1) )^1 )
948
949 function scripts.unicode.emoji(filename)
950
951 local name = resolvers.findfile("emoji-test.txt") or ""
952 if name == "" then
953 return
954 end
955 local l = io.loaddata(name)
956 local t = lpegmatch(pattern_2,l)
957
958 local hash = { }
959
960 local crap = lpeg.P("e") * lpeg.R("09","..","09")^1 * lpeg.P(" ")^1
961
962 local replace = lpeg.replacer {
963 [crap] = "",
964 ["#"] = "hash",
965 ["*"] = "asterisk",
966 }
967
968 for i=1,#t do
969 local v = t[i]
970 local d = v.description
971 local k = lpegmatch(replace,d) or d
972 hash[k] = v.components
973 end
974 local new = table.serialize(hash,"return", { hexify = true })
975 local old = io.loaddata(resolvers.findfile("char-emj.lua"))
976 if old and old ~= "" then
977 new = gsub(old,"^(.-)return .*$","%1" .. new)
978 end
979 io.savedata(filename,new)
980 end
981
982end
983
984
985
986local filename = environment.files[1]
987
988if environment.arguments.exporthelp then
989 application.export(environment.arguments.exporthelp,filename)
990else
991 report("start working on %a, input char-def.lua",lfs.currentdir())
992 if scripts.unicode.load() then
993 scripts.unicode.update()
994 scripts.unicode.extras()
995 scripts.unicode.save("char-def-new.lua")
996 scripts.unicode.emoji("char-emj-new.lua")
997 report("saved file %a","char-def-new.lua")
998 report("saved file %a (current 15.1, check for updates, see above!)","char-emj-new.lua")
999 else
1000 report("nothing to do")
1001 end
1002 report("stop working on %a\n",lfs.currentdir())
1003end
1004 |