sort-ini.lua /size: 27 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['sort-ini'] = {
2    version   = 1.001,
3    comment   = "companion to sort-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- It took a while to get there, but with Fleetwood Mac's "Don't Stop" playing in
10-- the background we sort of got it done.
11--
12-- The code here evolved from the rather old mkii approach. There we concatinate the
13-- key and (raw) entry into a new string. Numbers and special characters get some
14-- treatment so that they sort ok. In addition some normalization (lowercasing,
15-- accent stripping) takes place and again data is appended ror prepended.
16-- Eventually these strings are sorted using a regular string sorter. The relative
17-- order of character is dealt with by weighting them. It took a while to figure
18-- this all out but eventually it worked ok for most languages, given that the right
19-- datatables were provided.
20--
21-- Here we do follow a similar approach but this time we don't append the
22-- manipulated keys and entries but create tables for each of them with entries
23-- being tables themselves having different properties. In these tables characters
24-- are represented by numbers and sorting takes place using these numbers. Strings
25-- are simplified using lowercasing as well as shape codes. Numbers are filtered and
26-- after getting an offset they end up at the right end of the spectrum (more clever
27-- parser will be added some day). There are definitely more solutions to the
28-- problem and it is a nice puzzle to solve.
29--
30-- In the future more methods can be added, as there is practically no limit to what
31-- goes into the tables. For that we will provide hooks.
32--
33-- Todo: decomposition with specific order of accents, this is relatively easy to
34-- do.
35--
36-- Todo: investigate what standards and conventions there are and see how they map
37-- onto this mechanism. I've learned that users can come up with any demand so
38-- nothing here is frozen.
39--
40-- Todo: I ran into the Unicode Collation document and noticed that there are some
41-- similarities (like the weights) but using that method would still demand extra
42-- code for language specifics. One option is to use the allkeys.txt file for the uc
43-- vectors but then we would also use the collapsed key (sq, code is now commented).
44-- In fact, we could just hook those into the replacer code that we reun beforehand.
45--
46-- In the future index entries will become more clever, i.e. they will have language
47-- etc properties that then can be used.
48
49local gsub, find, rep, sub, sort, concat, tohash, format = string.gsub, string.find, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format
50local utfbyte, utfchar, utfcharacters = utf.byte, utf.char, utf.characters
51local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
52local P, Cs, R, S, lpegmatch, lpegpatterns = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.match, lpeg.patterns
53
54local allocate          = utilities.storage.allocate
55local setmetatableindex = table.setmetatableindex
56
57local trace_tests       = false  trackers.register("sorters.tests",        function(v) trace_tests        = v end)
58local trace_methods     = false  trackers.register("sorters.methods",      function(v) trace_methods      = v end)
59local trace_orders      = false  trackers.register("sorters.orders",       function(v) trace_orders       = v end)
60local trace_replacements= false  trackers.register("sorters.replacements", function(v) trace_replacements = v end)
61
62local report_sorters    = logs.reporter("languages","sorters")
63
64local comparers         = { }
65local splitters         = { }
66local definitions       = allocate()
67local tracers           = allocate()
68local ignoredoffset     = 0x10000 -- frozen
69local replacementoffset = 0x10000 -- frozen
70local digitsoffset      = 0x20000 -- frozen
71local digitsmaximum     = 0xFFFFF -- frozen
72
73local lccodes           = characters.lccodes
74local uccodes           = characters.uccodes
75local lcchars           = characters.lcchars
76local ucchars           = characters.ucchars
77local shchars           = characters.shchars
78local fscodes           = characters.fscodes
79local fschars           = characters.fschars
80
81local decomposed        = characters.decomposed
82
83local variables         = interfaces.variables
84
85local v_numbers         = variables.numbers
86local v_default         = variables.default
87local v_before          = variables.before
88local v_after           = variables.after
89local v_first           = variables.first
90local v_last            = variables.last
91
92local validmethods      = tohash {
93    "ch", -- raw character (for tracing)
94    "mm", -- minus mapping
95    "zm", -- zero  mapping
96    "pm", -- plus  mapping
97    "mc", -- lower case - 1
98    "zc", -- lower case
99    "pc", -- lower case + 1
100    "uc", -- unicode
101}
102
103local predefinedmethods = {
104    [v_default] = "zc,pc,zm,pm,uc",
105    [v_before]  = "mm,mc,uc",
106    [v_after]   = "pm,mc,uc",
107    [v_first]   = "pc,mm,uc",
108    [v_last]    = "mc,mm,uc",
109}
110
111sorters = {
112    comparers    = comparers,
113    splitters    = splitters,
114    definitions  = definitions,
115    tracers      = tracers,
116    constants    = {
117        ignoredoffset     = ignoredoffset,
118        replacementoffset = replacementoffset,
119        digitsoffset      = digitsoffset,
120        digitsmaximum     = digitsmaximum,
121        defaultlanguage   = v_default,
122        defaultmethod     = v_default,
123        defaultdigits     = v_numbers,
124        validmethods      = validmethods,
125    }
126}
127
128local sorters   = sorters
129local constants = sorters.constants
130
131local data, language, method, digits
132local replacements, m_mappings, z_mappings, p_mappings, entries, orders, lower, upper, method, sequence, usedinsequence
133local thefirstofsplit
134
135local mte = { -- todo: assign to t
136    __index = function(t,k)
137        if k and k ~= "" and utfbyte(k) < digitsoffset then -- k check really needed (see s-lan-02)
138            local el
139            if k then
140                local l = lower[k] or lcchars[k]
141                el = rawget(t,l)
142            end
143            if not el then
144                local l = shchars[k]
145                if l and l ~= k then
146                    if #l > 1 then
147                        l = sub(l,1,1) -- todo
148                    end
149                    el = rawget(t,l)
150                    if not el then
151                        l = lower[k] or lcchars[l]
152                        if l then
153                            el = rawget(t,l)
154                        end
155                    end
156                end
157                el = el or k
158            end
159        --  rawset(t,k,el)
160            return el
161        else
162        --  rawset(t,k,k)
163        end
164    end
165}
166
167local noorder = false
168local nothing = { 0 }
169
170local function preparetables(data)
171    local orders, lower, m_mappings, z_mappings, p_mappings = data.orders, data.lower, { }, { }, { }
172    for i=1,#orders do
173        local oi = orders[i]
174        local n = { 2 * i }
175        m_mappings[oi], z_mappings[oi], p_mappings[oi] = n, n, n
176    end
177    local mtm = {
178        __index = function(t,k)
179            local n, nn
180            if k then
181                if trace_orders then
182                    report_sorters("simplifing character %C",k)
183                end
184                local l = lower[k] or lcchars[k]
185                if l then
186                    if trace_orders then
187                        report_sorters(" 1 lower: %C",l)
188                    end
189                    local ml = rawget(t,l)
190                    if ml then
191                        n = { }
192                        nn = 0
193                        for i=1,#ml do
194                            nn = nn + 1
195                            n[nn] = ml[i] + (t.__delta or 0)
196                        end
197                        if trace_orders then
198                            report_sorters(" 2 order: % t",n)
199                        end
200                    end
201                end
202                if not n then
203                    local s = shchars[k] -- maybe all components?
204                    if s and s ~= k then
205                        if trace_orders then
206                            report_sorters(" 3 shape: %C",s)
207                        end
208                        n = { }
209                        nn = 0
210                        for l in utfcharacters(s) do
211                            local ml = rawget(t,l)
212                            if ml then
213                                if trace_orders then
214                                    report_sorters(" 4 keep: %C",l)
215                                end
216                                if ml then
217                                    for i=1,#ml do
218                                        nn = nn + 1
219                                        n[nn] = ml[i]
220                                    end
221                                end
222                            else
223                                l = lower[l] or lcchars[l]
224                                if l then
225                                    if trace_orders then
226                                        report_sorters(" 5 lower: %C",l)
227                                    end
228                                    local ml = rawget(t,l)
229                                    if ml then
230                                        for i=1,#ml do
231                                            nn = nn + 1
232                                            n[nn] = ml[i] + (t.__delta or 0)
233                                        end
234                                    end
235                                end
236                            end
237                        end
238                    else
239                        -- this is a kind of last resort branch that we might want to revise
240                        -- one day
241                        --
242                        -- local b = utfbyte(k)
243                        -- n = decomposed[b] or { b }
244                        -- if trace_tests then
245                        --     report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
246                        -- end
247                        --
248                        -- we need to move way above valid order (new per 2014-10-16) .. maybe we
249                        -- need to move it even more up to get numbers right (not all have orders)
250                        --
251                        if k == "\000" then
252                            n = nothing -- shared
253                            if trace_orders then
254                                report_sorters(" 6 split: space") -- todo
255                            end
256                        else
257                            local b = 2 * #orders + utfbyte(k)
258                            n = decomposed[b] or { b } -- could be shared tables
259                            if trace_orders then
260                                report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
261                            end
262                        end
263                    end
264                    if n then
265                        if trace_orders then
266                            report_sorters(" 7 order: % t",n)
267                        end
268                    else
269                        n = noorder
270                        if trace_orders then
271                            report_sorters(" 8 order: 0")
272                        end
273                    end
274                end
275            else
276                n = noorder
277                if trace_orders then
278                    report_sorters(" 9 order: 0")
279                end
280            end
281            rawset(t,k,n)
282            return n
283        end
284    }
285    data.m_mappings = m_mappings
286    data.z_mappings = z_mappings
287    data.p_mappings = p_mappings
288    m_mappings.__delta = -1
289    z_mappings.__delta =  0
290    p_mappings.__delta =  1
291    setmetatable(data.entries,mte)
292    setmetatable(data.m_mappings,mtm)
293    setmetatable(data.z_mappings,mtm)
294    setmetatable(data.p_mappings,mtm)
295    thefirstofsplit = data.firstofsplit
296end
297
298local function update() -- prepare parent chains, needed when new languages are added
299    for language, data in next, definitions do
300        local parent = data.parent or "default"
301        if language ~= "default" then
302            setmetatableindex(data,definitions[parent] or definitions.default)
303        end
304        data.language   = language
305        data.parent     = parent
306        data.m_mappings = { } -- free temp data
307        data.z_mappings = { } -- free temp data
308        data.p_mappings = { } -- free temp data
309    end
310end
311
312local function setlanguage(l,m,d,u) -- this will become a specification table (also keep this one as it's used in manuals)
313    language = (l ~= "" and l) or constants.defaultlanguage
314    data     = definitions[language or constants.defaultlanguage] or definitions[constants.defaultlanguage]
315    method   = (m ~= "" and m) or (data.method ~= "" and data.method) or constants.defaultmethod
316    digits   = (d ~= "" and d) or (data.digits ~= "" and data.digits) or constants.defaultdigits
317    if trace_tests then
318        report_sorters("setting language %a, method %a, digits %a",language,method,digits)
319    end
320    replacements = data.replacements
321    entries      = data.entries
322    orders       = data.orders
323    lower        = data.lower
324    upper        = data.upper
325    preparetables(data)
326    m_mappings   = data.m_mappings
327    z_mappings   = data.z_mappings
328    p_mappings   = data.p_mappings
329    --
330    method = predefinedmethods[variables[method]] or method
331    data.method  = method
332    --
333    data.digits  = digits
334    --
335    local seq = utilities.parsers.settings_to_array(method or "") -- check the list
336    sequence = { }
337    local nofsequence = 0
338    for i=1,#seq do
339        local s = seq[i]
340        if validmethods[s] then
341            nofsequence = nofsequence + 1
342            sequence[nofsequence] = s
343        else
344            report_sorters("invalid sorter method %a in %a",s,method)
345        end
346    end
347    usedinsequence = tohash(sequence)
348    data.sequence = sequence
349    data.usedinsequence = usedinsequence
350-- usedinsequence.ch = true -- better just store the string
351    if trace_tests then
352        report_sorters("using sort sequence: % t",sequence)
353    end
354    --
355    return data
356end
357
358function sorters.update()
359    update()
360    setlanguage(language,method,numberorder) -- resync current language and method
361end
362
363function sorters.setlanguage(language,method,numberorder)
364    update()
365    setlanguage(language,method,numberorder) -- new language and method
366end
367
368-- tricky: { 0, 0, 0 } vs { 0, 0, 0, 0 } => longer wins and mm, pm, zm can have them
369
370-- inlining and checking first slot first doesn't speed up (the 400K complex author sort)
371
372local function basicsort(sort_a,sort_b)
373    if sort_a and sort_b then
374        local na = #sort_a
375        local nb = #sort_b
376        if na > nb then
377            na = nb
378        end
379        if na > 0 then
380            for i=1,na do
381                local ai, bi = sort_a[i], sort_b[i]
382                if ai > bi then
383                    return  1
384                elseif ai < bi then
385                    return -1
386                end
387            end
388        end
389    end
390    return 0
391end
392
393-- todo: compile compare function
394
395local function basic(a,b) -- trace ea and eb
396    if a == b then
397        -- hashed (shared) entries
398        return 0
399    end
400    local ea = a.split
401    local eb = b.split
402    local na = #ea
403    local nb = #eb
404    if na == 0 and nb == 0 then
405        -- simple variant (single word)
406        local result = 0
407        for j=1,#sequence do
408            local m = sequence[j]
409            result = basicsort(ea[m],eb[m])
410            if result ~= 0 then
411                return result
412            end
413        end
414        if result == 0 then
415            local la = #ea.uc
416            local lb = #eb.uc
417            if la > lb then
418                return 1
419            elseif lb > la then
420                return -1
421            else
422                return 0
423            end
424        else
425            return result
426        end
427    else
428        -- complex variant, used in register (multiple words)
429        local result = 0
430        for i=1,nb < na and nb or na do
431            local eai = ea[i]
432            local ebi = eb[i]
433            for j=1,#sequence do
434                local m = sequence[j]
435                result = basicsort(eai[m],ebi[m])
436                if result ~= 0 then
437                    return result
438                end
439            end
440            if result == 0 then
441                local la = #eai.uc
442                local lb = #ebi.uc
443                if la > lb then
444                    return 1
445                elseif lb > la then
446                    return -1
447                end
448            else
449                return result
450            end
451        end
452        if result ~= 0 then
453            return result
454        elseif na > nb then
455            return 1
456        elseif nb > na then
457            return -1
458        else
459            return 0
460        end
461    end
462end
463
464-- if we use sq:
465--
466-- local function basic(a,b) -- trace ea and eb
467--     local ea, eb = a.split, b.split
468--     local na, nb = #ea, #eb
469--     if na == 0 and nb == 0 then
470--         -- simple variant (single word)
471--         return basicsort(ea.sq,eb.sq)
472--     else
473--         -- complex variant, used in register (multiple words)
474--         local result = 0
475--         for i=1,nb < na and nb or na do
476--             local eai, ebi = ea[i], eb[i]
477--             result = basicsort(ea.sq,eb.sq)
478--             if result ~= 0 then
479--                 return result
480--             end
481--         end
482--         if result ~= 0 then
483--             return result
484--         elseif na > nb then
485--             return 1
486--         elseif nb > na then
487--             return -1
488--         else
489--             return 0
490--         end
491--     end
492-- end
493
494comparers.basic = basic
495
496function sorters.basicsorter(a,b)
497    return basic(a,b) == -1
498end
499
500local function numify(old)
501    if digits == v_numbers then -- was swapped, fixed 2014-11-10
502        local new = digitsoffset + tonumber(old) -- alternatively we can create range
503        if new > digitsmaximum then
504            new = digitsmaximum
505        end
506        return utfchar(new)
507    else
508        return old
509    end
510end
511
512local pattern = nil
513
514local function prepare() -- todo: test \Ux{hex}
515    pattern = Cs( (
516        characters.tex.toutfpattern()
517      + lpeg.patterns.whitespace / "\000"
518      + (P("\\Ux{") / "" * ((1-P("}"))^1/function(s) return utfchar(tonumber(s,16)) end) * (P("}")/""))
519      + (P("\\") / "") * R("AZ")^0 * (P(-1) + #(1-R("AZ")))
520      + (P("\\") * P(1) * R("az","AZ")^0) / ""
521      + S("[](){}$\"'") / ""
522      + R("09")^1 / numify
523      + P(1)
524    )^0 )
525    return pattern
526end
527
528local function strip(str) -- todo: only letters and such
529    if str and str ~= "" then
530        return lpegmatch(pattern or prepare(),str)
531    else
532        return ""
533    end
534end
535
536sorters.strip = strip
537
538local function firstofsplit(entry)
539    -- numbers are left padded by spaces
540    local split = entry.split
541    if #split > 0 then
542        split = split[1].ch
543    else
544        split = split.ch
545    end
546    local first = split and split[1] or ""
547    if thefirstofsplit then
548        return thefirstofsplit(first,data,entry) -- normally the first one is needed
549    else
550        return first, entries[first] or "\000" -- tag
551    end
552end
553
554sorters.firstofsplit = firstofsplit
555
556-- for the moment we use an inefficient bunch of tables but once
557-- we know what combinations make sense we can optimize this
558
559function splitters.utf(str,checked) -- we could append m and u but this is cleaner, s is for tracing
560    local nofreplacements = #replacements
561    if nofreplacements > 0 then
562        -- todo make an lpeg for this
563        local replacer = replacements.replacer
564        if not replacer then
565            local rep = { }
566            for i=1,nofreplacements do
567                local r = replacements[i]
568                rep[strip(r[1])] = strip(r[2])
569            end
570            replacer = lpeg.utfchartabletopattern(rep)
571            replacer = Cs((replacer/rep + lpegpatterns.utf8character)^0)
572            replacements.replacer = replacer
573        end
574        local rep = lpegmatch(replacer,str)
575        if rep and rep ~= str then
576            if trace_replacements then
577                report_sorters("original   : %s",str)
578                report_sorters("replacement: %s",rep)
579            end
580            str = rep
581        end
582     -- for k=1,#replacements do
583     --     local v = replacements[k]
584     --     local s = v[1]
585     --     if find(str,s) then
586     --         str = gsub(str,s,v[2])
587     --     end
588     -- end
589    end
590    local m_case    = { }
591    local z_case    = { }
592    local p_case    = { }
593    local m_mapping = { }
594    local z_mapping = { }
595    local p_mapping = { }
596    local char      = { }
597    local byte      = { }
598    local n         = 0
599    local nm        = 0
600    local nz        = 0
601    local np        = 0
602    for sc in utfcharacters(str) do
603        local b = utfbyte(sc)
604        if b >= digitsoffset then
605            if n == 0 then
606                -- we need to force number to the top
607                z_case[1] = 0
608                m_case[1] = 0
609                p_case[1] = 0
610                char[1] = sc
611                byte[1] = 0
612                m_mapping[1] = 0
613                z_mapping[1] = 0
614                p_mapping[1] = 0
615                n = 2
616            else
617                n = n + 1
618            end
619            z_case[n] = b
620            m_case[n] = b
621            p_case[n] = b
622            char[n] = sc
623            byte[n] = b
624            nm = nm + 1
625            nz = nz + 1
626            np = np + 1
627            m_mapping[nm] = b
628            z_mapping[nz] = b
629            p_mapping[np] = b
630        else
631            n = n + 1
632            local l = lower[sc]
633            l = l and utfbyte(l) or lccodes[b] or b
634         -- local u = upper[sc]
635         -- u = u and utfbyte(u) or uccodes[b] or b
636            if type(l) == "table" then
637                l = l[1] -- there are currently no tables in lccodes but it can be some, day
638            end
639         -- if type(u) == "table" then
640         --     u = u[1] -- there are currently no tables in lccodes but it can be some, day
641         -- end
642            z_case[n] = l
643            if l ~= b then
644                m_case[n] = l - 1
645                p_case[n] = l + 1
646            else
647                m_case[n] = l
648                p_case[n] = l
649            end
650            char[n], byte[n] = sc, b
651            local fs = fscodes[b] or b
652            local msc = m_mappings[sc]
653            if msc ~= noorder then
654                if not msc then
655                    msc = m_mappings[fs]
656                end
657                for i=1,#msc do
658                    nm = nm + 1
659                    m_mapping[nm] = msc[i]
660                end
661            end
662            local zsc = z_mappings[sc]
663            if zsc ~= noorder then
664                if not zsc then
665                    zsc = z_mappings[fs]
666                end
667                for i=1,#zsc do
668                    nz = nz + 1
669                    z_mapping[nz] = zsc[i]
670                end
671            end
672            local psc = p_mappings[sc]
673            if psc ~= noorder then
674                if not psc then
675                    psc = p_mappings[fs]
676                end
677                for i=1,#psc do
678                    np = np + 1
679                    p_mapping[np] = psc[i]
680                end
681            end
682        end
683    end
684    -- -- only those needed that are part of a sequence
685    --
686    -- local b = byte[1]
687    -- if b then
688    --     -- we set them to the first split code (korean)
689    --     local fs = fscodes[b] or b
690    --     if #m_mapping == 0 then
691    --         m_mapping = { m_mappings[fs][1] }
692    --     end
693    --     if #z_mapping == 0 then
694    --         z_mapping = { z_mappings[fs][1] }
695    --     end
696    --     if #p_mapping == 0 then
697    --         p_mapping = { p_mappings[fs][1] }
698    --     end
699    -- end
700    local result
701    if checked then
702        result = {
703            ch = trace_tests       and char      or nil, -- not in sequence
704            uc = usedinsequence.uc and byte      or nil,
705            mc = usedinsequence.mc and m_case    or nil,
706            zc = usedinsequence.zc and z_case    or nil,
707            pc = usedinsequence.pc and p_case    or nil,
708            mm = usedinsequence.mm and m_mapping or nil,
709            zm = usedinsequence.zm and z_mapping or nil,
710            pm = usedinsequence.pm and p_mapping or nil,
711        }
712    else
713        result = {
714            ch = char,
715            uc = byte,
716            mc = m_case,
717            zc = z_case,
718            pc = p_case,
719            mm = m_mapping,
720            zm = z_mapping,
721            pm = p_mapping,
722        }
723    end
724 -- local sq, n = { }, 0
725 -- for i=1,#byte do
726 --     for s=1,#sequence do
727 --         n = n + 1
728 --         sq[n] = result[sequence[s]][i]
729 --     end
730 -- end
731 -- result.sq = sq
732    return result
733end
734
735local function packch(entry)
736    local split = entry.split
737    if split and #split > 0 then -- useless test
738        local t = { }
739        for i=1,#split do
740            local tt = { }
741            local ch = split[i].ch
742            for j=1,#ch do
743                local chr = ch[j]
744                local byt = utfbyte(chr)
745                if byt > ignoredoffset then
746                    tt[j] = "[]"
747                elseif byt == 0 then
748                    tt[j] = " "
749                else
750                    tt[j] = chr
751                end
752            end
753            t[i] = concat(tt)
754        end
755        return concat(t," + ")
756    else
757        local t  = { }
758        local ch = (split and split.ch) or entry.ch or entry
759        if ch then
760            for i=1,#ch do
761                local chr = ch[i]
762                local byt = utfbyte(chr)
763                if byt > ignoredoffset then
764                    t[i] = "[]"
765                elseif byt == 0 then
766                    t[i] = " "
767                else
768                    t[i] = chr
769                end
770            end
771            return concat(t)
772        else
773            return ""
774        end
775    end
776end
777
778local function packuc(entry)
779    local split = entry.split
780    if split and #split > 0 then -- useless test
781        local t = { }
782        for i=1,#split do
783            t[i] = concat(split[i].uc, " ") -- sq
784        end
785        return concat(t," + ")
786    else
787        local uc = (split and split.uc) or entry.uc or entry
788        if uc then
789            return concat(uc," ") -- sq
790        else
791            return ""
792        end
793    end
794end
795
796sorters.packch = packch
797sorters.packuc = packuc
798
799function sorters.sort(entries,cmp)
800    if trace_methods then
801        local nofentries = #entries
802        report_sorters("entries: %s, language: %s, method: %s, digits: %s",nofentries,language,method,tostring(digits))
803        for i=1,nofentries do
804            report_sorters("entry %s",table.serialize(entries[i].split,i,true,true,true))
805        end
806    end
807    if trace_tests then
808        sort(entries,function(a,b)
809            local r = cmp(a,b)
810            local e = (not r and "?") or (r<0 and "<") or (r>0 and ">") or "="
811            report_sorters("%s %s %s | %s %s %s",packch(a),e,packch(b),packuc(a),e,packuc(b))
812            return r == -1
813        end)
814        local s
815        for i=1,#entries do
816            local entry = entries[i]
817            local letter, first = firstofsplit(entry)
818            if first == s then
819                first = "  "
820            else
821                s = first
822                if first and letter then
823                    report_sorters(">> %C (%C)",first,letter)
824                end
825            end
826            report_sorters("   %s | %s",packch(entry),packuc(entry))
827        end
828    else
829        sort(entries,function(a,b)
830            return cmp(a,b) == -1
831        end)
832    end
833end
834
835-- helper
836
837function sorters.replacementlist(list)
838    local replacements = { }
839    for i=1,#list do
840        replacements[i] = {
841            list[i],
842            utfchar(replacementoffset+i),
843        }
844    end
845    return replacements
846end
847