sort-ini.lua /size: 27 Kb    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['sort-ini'] = {
2    version   = 1.001,
3    comment   = "companion to sort-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- It took a while to get there, but with Fleetwood Mac's "Don't Stop"
10-- playing in the background we sort of got it done.
11
12--[[<p>The code here evolved from the rather old mkii approach. There
13we concatinate the key and (raw) entry into a new string. Numbers and
14special characters get some treatment so that they sort ok. In
15addition some normalization (lowercasing, accent stripping) takes
16place and again data is appended ror prepended. Eventually these
17strings are sorted using a regular string sorter. The relative order
18of character is dealt with by weighting them. It took a while to
19figure this all out but eventually it worked ok for most languages,
20given that the right datatables were provided.</p>
21
22<p>Here we do follow a similar approach but this time we don't append
23the manipulated keys and entries but create tables for each of them
24with entries being tables themselves having different properties. In
25these tables characters are represented by numbers and sorting takes
26place using these numbers. Strings are simplified using lowercasing
27as well as shape codes. Numbers are filtered and after getting an offset
28they end up at the right end of the spectrum (more clever parser will
29be added some day). There are definitely more solutions to the problem
30and it is a nice puzzle to solve.</p>
31
32<p>In the future more methods can be added, as there is practically no
33limit to what goes into the tables. For that we will provide hooks.</p>
34
35<p>Todo: decomposition with specific order of accents, this is
36relatively easy to do.</p>
37
38<p>Todo: investigate what standards and conventions there are and see
39how they map onto this mechanism. I've learned that users can come up
40with any demand so nothing here is frozen.</p>
41
42<p>Todo: I ran into the Unicode Collation document and noticed that
43there are some similarities (like the weights) but using that method
44would still demand extra code for language specifics. One option is
45to use the allkeys.txt file for the uc vectors but then we would also
46use the collapsed key (sq, code is now commented). In fact, we could
47just hook those into the replacer code that we reun beforehand.</p>
48
49<p>In the future index entries will become more clever, i.e. they will
50have language etc properties that then can be used.</p>
51]]--
52
53local gsub, find, rep, sub, sort, concat, tohash, format = string.gsub, string.find, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format
54local utfbyte, utfchar, utfcharacters = utf.byte, utf.char, utf.characters
55local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
56local P, Cs, R, S, lpegmatch, lpegpatterns = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.match, lpeg.patterns
57
58local allocate          = utilities.storage.allocate
59local setmetatableindex = table.setmetatableindex
60
61local trace_tests       = false  trackers.register("sorters.tests",        function(v) trace_tests        = v end)
62local trace_methods     = false  trackers.register("sorters.methods",      function(v) trace_methods      = v end)
63local trace_orders      = false  trackers.register("sorters.orders",       function(v) trace_orders       = v end)
64local trace_replacements= false  trackers.register("sorters.replacements", function(v) trace_replacements = v end)
65
66local report_sorters    = logs.reporter("languages","sorters")
67
68local comparers         = { }
69local splitters         = { }
70local definitions       = allocate()
71local tracers           = allocate()
72local ignoredoffset     = 0x10000 -- frozen
73local replacementoffset = 0x10000 -- frozen
74local digitsoffset      = 0x20000 -- frozen
75local digitsmaximum     = 0xFFFFF -- frozen
76
77local lccodes           = characters.lccodes
78local uccodes           = characters.uccodes
79local lcchars           = characters.lcchars
80local ucchars           = characters.ucchars
81local shchars           = characters.shchars
82local fscodes           = characters.fscodes
83local fschars           = characters.fschars
84
85local decomposed        = characters.decomposed
86
87local variables         = interfaces.variables
88
89local v_numbers         = variables.numbers
90local v_default         = variables.default
91local v_before          = variables.before
92local v_after           = variables.after
93local v_first           = variables.first
94local v_last            = variables.last
95
96local validmethods      = tohash {
97    "ch", -- raw character (for tracing)
98    "mm", -- minus mapping
99    "zm", -- zero  mapping
100    "pm", -- plus  mapping
101    "mc", -- lower case - 1
102    "zc", -- lower case
103    "pc", -- lower case + 1
104    "uc", -- unicode
105}
106
107local predefinedmethods = {
108    [v_default] = "zc,pc,zm,pm,uc",
109    [v_before]  = "mm,mc,uc",
110    [v_after]   = "pm,mc,uc",
111    [v_first]   = "pc,mm,uc",
112    [v_last]    = "mc,mm,uc",
113}
114
115sorters = {
116    comparers    = comparers,
117    splitters    = splitters,
118    definitions  = definitions,
119    tracers      = tracers,
120    constants    = {
121        ignoredoffset     = ignoredoffset,
122        replacementoffset = replacementoffset,
123        digitsoffset      = digitsoffset,
124        digitsmaximum     = digitsmaximum,
125        defaultlanguage   = v_default,
126        defaultmethod     = v_default,
127        defaultdigits     = v_numbers,
128        validmethods      = validmethods,
129    }
130}
131
132local sorters   = sorters
133local constants = sorters.constants
134
135local data, language, method, digits
136local replacements, m_mappings, z_mappings, p_mappings, entries, orders, lower, upper, method, sequence, usedinsequence
137local thefirstofsplit
138
139local mte = { -- todo: assign to t
140    __index = function(t,k)
141        if k and k ~= "" and utfbyte(k) < digitsoffset then -- k check really needed (see s-lan-02)
142            local el
143            if k then
144                local l = lower[k] or lcchars[k]
145                el = rawget(t,l)
146            end
147            if not el then
148                local l = shchars[k]
149                if l and l ~= k then
150                    if #l > 1 then
151                        l = sub(l,1,1) -- todo
152                    end
153                    el = rawget(t,l)
154                    if not el then
155                        l = lower[k] or lcchars[l]
156                        if l then
157                            el = rawget(t,l)
158                        end
159                    end
160                end
161                el = el or k
162            end
163        --  rawset(t,k,el)
164            return el
165        else
166        --  rawset(t,k,k)
167        end
168    end
169}
170
171local noorder = false
172local nothing = { 0 }
173
174local function preparetables(data)
175    local orders, lower, m_mappings, z_mappings, p_mappings = data.orders, data.lower, { }, { }, { }
176    for i=1,#orders do
177        local oi = orders[i]
178        local n = { 2 * i }
179        m_mappings[oi], z_mappings[oi], p_mappings[oi] = n, n, n
180    end
181    local mtm = {
182        __index = function(t,k)
183            local n, nn
184            if k then
185                if trace_orders then
186                    report_sorters("simplifing character %C",k)
187                end
188                local l = lower[k] or lcchars[k]
189                if l then
190                    if trace_orders then
191                        report_sorters(" 1 lower: %C",l)
192                    end
193                    local ml = rawget(t,l)
194                    if ml then
195                        n = { }
196                        nn = 0
197                        for i=1,#ml do
198                            nn = nn + 1
199                            n[nn] = ml[i] + (t.__delta or 0)
200                        end
201                        if trace_orders then
202                            report_sorters(" 2 order: % t",n)
203                        end
204                    end
205                end
206                if not n then
207                    local s = shchars[k] -- maybe all components?
208                    if s and s ~= k then
209                        if trace_orders then
210                            report_sorters(" 3 shape: %C",s)
211                        end
212                        n = { }
213                        nn = 0
214                        for l in utfcharacters(s) do
215                            local ml = rawget(t,l)
216                            if ml then
217                                if trace_orders then
218                                    report_sorters(" 4 keep: %C",l)
219                                end
220                                if ml then
221                                    for i=1,#ml do
222                                        nn = nn + 1
223                                        n[nn] = ml[i]
224                                    end
225                                end
226                            else
227                                l = lower[l] or lcchars[l]
228                                if l then
229                                    if trace_orders then
230                                        report_sorters(" 5 lower: %C",l)
231                                    end
232                                    local ml = rawget(t,l)
233                                    if ml then
234                                        for i=1,#ml do
235                                            nn = nn + 1
236                                            n[nn] = ml[i] + (t.__delta or 0)
237                                        end
238                                    end
239                                end
240                            end
241                        end
242                    else
243                        -- this is a kind of last resort branch that we might want to revise
244                        -- one day
245                        --
246                        -- local b = utfbyte(k)
247                        -- n = decomposed[b] or { b }
248                        -- if trace_tests then
249                        --     report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
250                        -- end
251                        --
252                        -- we need to move way above valid order (new per 2014-10-16) .. maybe we
253                        -- need to move it even more up to get numbers right (not all have orders)
254                        --
255                        if k == "\000" then
256                            n = nothing -- shared
257                            if trace_orders then
258                                report_sorters(" 6 split: space") -- todo
259                            end
260                        else
261                            local b = 2 * #orders + utfbyte(k)
262                            n = decomposed[b] or { b } -- could be shared tables
263                            if trace_orders then
264                                report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
265                            end
266                        end
267                    end
268                    if n then
269                        if trace_orders then
270                            report_sorters(" 7 order: % t",n)
271                        end
272                    else
273                        n = noorder
274                        if trace_orders then
275                            report_sorters(" 8 order: 0")
276                        end
277                    end
278                end
279            else
280                n = noorder
281                if trace_orders then
282                    report_sorters(" 9 order: 0")
283                end
284            end
285            rawset(t,k,n)
286            return n
287        end
288    }
289    data.m_mappings = m_mappings
290    data.z_mappings = z_mappings
291    data.p_mappings = p_mappings
292    m_mappings.__delta = -1
293    z_mappings.__delta =  0
294    p_mappings.__delta =  1
295    setmetatable(data.entries,mte)
296    setmetatable(data.m_mappings,mtm)
297    setmetatable(data.z_mappings,mtm)
298    setmetatable(data.p_mappings,mtm)
299    thefirstofsplit = data.firstofsplit
300end
301
302local function update() -- prepare parent chains, needed when new languages are added
303    for language, data in next, definitions do
304        local parent = data.parent or "default"
305        if language ~= "default" then
306            setmetatableindex(data,definitions[parent] or definitions.default)
307        end
308        data.language   = language
309        data.parent     = parent
310        data.m_mappings = { } -- free temp data
311        data.z_mappings = { } -- free temp data
312        data.p_mappings = { } -- free temp data
313    end
314end
315
316local function setlanguage(l,m,d,u) -- this will become a specification table (also keep this one as it's used in manuals)
317    language = (l ~= "" and l) or constants.defaultlanguage
318    data     = definitions[language or constants.defaultlanguage] or definitions[constants.defaultlanguage]
319    method   = (m ~= "" and m) or (data.method ~= "" and data.method) or constants.defaultmethod
320    digits   = (d ~= "" and d) or (data.digits ~= "" and data.digits) or constants.defaultdigits
321    if trace_tests then
322        report_sorters("setting language %a, method %a, digits %a",language,method,digits)
323    end
324    replacements = data.replacements
325    entries      = data.entries
326    orders       = data.orders
327    lower        = data.lower
328    upper        = data.upper
329    preparetables(data)
330    m_mappings   = data.m_mappings
331    z_mappings   = data.z_mappings
332    p_mappings   = data.p_mappings
333    --
334    method = predefinedmethods[variables[method]] or method
335    data.method  = method
336    --
337    data.digits  = digits
338    --
339    local seq = utilities.parsers.settings_to_array(method or "") -- check the list
340    sequence = { }
341    local nofsequence = 0
342    for i=1,#seq do
343        local s = seq[i]
344        if validmethods[s] then
345            nofsequence = nofsequence + 1
346            sequence[nofsequence] = s
347        else
348            report_sorters("invalid sorter method %a in %a",s,method)
349        end
350    end
351    usedinsequence = tohash(sequence)
352    data.sequence = sequence
353    data.usedinsequence = usedinsequence
354-- usedinsequence.ch = true -- better just store the string
355    if trace_tests then
356        report_sorters("using sort sequence: % t",sequence)
357    end
358    --
359    return data
360end
361
362function sorters.update()
363    update()
364    setlanguage(language,method,numberorder) -- resync current language and method
365end
366
367function sorters.setlanguage(language,method,numberorder)
368    update()
369    setlanguage(language,method,numberorder) -- new language and method
370end
371
372-- tricky: { 0, 0, 0 } vs { 0, 0, 0, 0 } => longer wins and mm, pm, zm can have them
373
374-- inlining and checking first slot first doesn't speed up (the 400K complex author sort)
375
376local function basicsort(sort_a,sort_b)
377    if sort_a and sort_b then
378        local na = #sort_a
379        local nb = #sort_b
380        if na > nb then
381            na = nb
382        end
383        if na > 0 then
384            for i=1,na do
385                local ai, bi = sort_a[i], sort_b[i]
386                if ai > bi then
387                    return  1
388                elseif ai < bi then
389                    return -1
390                end
391            end
392        end
393    end
394    return 0
395end
396
397-- todo: compile compare function
398
399local function basic(a,b) -- trace ea and eb
400    if a == b then
401        -- hashed (shared) entries
402        return 0
403    end
404    local ea = a.split
405    local eb = b.split
406    local na = #ea
407    local nb = #eb
408    if na == 0 and nb == 0 then
409        -- simple variant (single word)
410        local result = 0
411        for j=1,#sequence do
412            local m = sequence[j]
413            result = basicsort(ea[m],eb[m])
414            if result ~= 0 then
415                return result
416            end
417        end
418        if result == 0 then
419            local la = #ea.uc
420            local lb = #eb.uc
421            if la > lb then
422                return 1
423            elseif lb > la then
424                return -1
425            else
426                return 0
427            end
428        else
429            return result
430        end
431    else
432        -- complex variant, used in register (multiple words)
433        local result = 0
434        for i=1,nb < na and nb or na do
435            local eai = ea[i]
436            local ebi = eb[i]
437            for j=1,#sequence do
438                local m = sequence[j]
439                result = basicsort(eai[m],ebi[m])
440                if result ~= 0 then
441                    return result
442                end
443            end
444            if result == 0 then
445                local la = #eai.uc
446                local lb = #ebi.uc
447                if la > lb then
448                    return 1
449                elseif lb > la then
450                    return -1
451                end
452            else
453                return result
454            end
455        end
456        if result ~= 0 then
457            return result
458        elseif na > nb then
459            return 1
460        elseif nb > na then
461            return -1
462        else
463            return 0
464        end
465    end
466end
467
468-- if we use sq:
469--
470-- local function basic(a,b) -- trace ea and eb
471--     local ea, eb = a.split, b.split
472--     local na, nb = #ea, #eb
473--     if na == 0 and nb == 0 then
474--         -- simple variant (single word)
475--         return basicsort(ea.sq,eb.sq)
476--     else
477--         -- complex variant, used in register (multiple words)
478--         local result = 0
479--         for i=1,nb < na and nb or na do
480--             local eai, ebi = ea[i], eb[i]
481--             result = basicsort(ea.sq,eb.sq)
482--             if result ~= 0 then
483--                 return result
484--             end
485--         end
486--         if result ~= 0 then
487--             return result
488--         elseif na > nb then
489--             return 1
490--         elseif nb > na then
491--             return -1
492--         else
493--             return 0
494--         end
495--     end
496-- end
497
498comparers.basic = basic
499
500function sorters.basicsorter(a,b)
501    return basic(a,b) == -1
502end
503
504local function numify(old)
505    if digits == v_numbers then -- was swapped, fixed 2014-11-10
506        local new = digitsoffset + tonumber(old) -- alternatively we can create range
507        if new > digitsmaximum then
508            new = digitsmaximum
509        end
510        return utfchar(new)
511    else
512        return old
513    end
514end
515
516local pattern = nil
517
518local function prepare() -- todo: test \Ux{hex}
519    pattern = Cs( (
520        characters.tex.toutfpattern()
521      + lpeg.patterns.whitespace / "\000"
522      + (P("\\Ux{") / "" * ((1-P("}"))^1/function(s) return utfchar(tonumber(s,16)) end) * (P("}")/""))
523      + (P("\\") / "") * R("AZ")^0 * (P(-1) + #(1-R("AZ")))
524      + (P("\\") * P(1) * R("az","AZ")^0) / ""
525      + S("[](){}$\"'") / ""
526      + R("09")^1 / numify
527      + P(1)
528    )^0 )
529    return pattern
530end
531
532local function strip(str) -- todo: only letters and such
533    if str and str ~= "" then
534        return lpegmatch(pattern or prepare(),str)
535    else
536        return ""
537    end
538end
539
540sorters.strip = strip
541
542local function firstofsplit(entry)
543    -- numbers are left padded by spaces
544    local split = entry.split
545    if #split > 0 then
546        split = split[1].ch
547    else
548        split = split.ch
549    end
550    local first = split and split[1] or ""
551    if thefirstofsplit then
552        return thefirstofsplit(first,data,entry) -- normally the first one is needed
553    else
554        return first, entries[first] or "\000" -- tag
555    end
556end
557
558sorters.firstofsplit = firstofsplit
559
560-- for the moment we use an inefficient bunch of tables but once
561-- we know what combinations make sense we can optimize this
562
563function splitters.utf(str,checked) -- we could append m and u but this is cleaner, s is for tracing
564    local nofreplacements = #replacements
565    if nofreplacements > 0 then
566        -- todo make an lpeg for this
567        local replacer = replacements.replacer
568        if not replacer then
569            local rep = { }
570            for i=1,nofreplacements do
571                local r = replacements[i]
572                rep[strip(r[1])] = strip(r[2])
573            end
574            replacer = lpeg.utfchartabletopattern(rep)
575            replacer = Cs((replacer/rep + lpegpatterns.utf8character)^0)
576            replacements.replacer = replacer
577        end
578        local rep = lpegmatch(replacer,str)
579        if rep and rep ~= str then
580            if trace_replacements then
581                report_sorters("original   : %s",str)
582                report_sorters("replacement: %s",rep)
583            end
584            str = rep
585        end
586     -- for k=1,#replacements do
587     --     local v = replacements[k]
588     --     local s = v[1]
589     --     if find(str,s) then
590     --         str = gsub(str,s,v[2])
591     --     end
592     -- end
593    end
594    local m_case    = { }
595    local z_case    = { }
596    local p_case    = { }
597    local m_mapping = { }
598    local z_mapping = { }
599    local p_mapping = { }
600    local char      = { }
601    local byte      = { }
602    local n         = 0
603    local nm        = 0
604    local nz        = 0
605    local np        = 0
606    for sc in utfcharacters(str) do
607        local b = utfbyte(sc)
608        if b >= digitsoffset then
609            if n == 0 then
610                -- we need to force number to the top
611                z_case[1] = 0
612                m_case[1] = 0
613                p_case[1] = 0
614                char[1] = sc
615                byte[1] = 0
616                m_mapping[1] = 0
617                z_mapping[1] = 0
618                p_mapping[1] = 0
619                n = 2
620            else
621                n = n + 1
622            end
623            z_case[n] = b
624            m_case[n] = b
625            p_case[n] = b
626            char[n] = sc
627            byte[n] = b
628            nm = nm + 1
629            nz = nz + 1
630            np = np + 1
631            m_mapping[nm] = b
632            z_mapping[nz] = b
633            p_mapping[np] = b
634        else
635            n = n + 1
636            local l = lower[sc]
637            l = l and utfbyte(l) or lccodes[b] or b
638         -- local u = upper[sc]
639         -- u = u and utfbyte(u) or uccodes[b] or b
640            if type(l) == "table" then
641                l = l[1] -- there are currently no tables in lccodes but it can be some, day
642            end
643         -- if type(u) == "table" then
644         --     u = u[1] -- there are currently no tables in lccodes but it can be some, day
645         -- end
646            z_case[n] = l
647            if l ~= b then
648                m_case[n] = l - 1
649                p_case[n] = l + 1
650            else
651                m_case[n] = l
652                p_case[n] = l
653            end
654            char[n], byte[n] = sc, b
655            local fs = fscodes[b] or b
656            local msc = m_mappings[sc]
657            if msc ~= noorder then
658                if not msc then
659                    msc = m_mappings[fs]
660                end
661                for i=1,#msc do
662                    nm = nm + 1
663                    m_mapping[nm] = msc[i]
664                end
665            end
666            local zsc = z_mappings[sc]
667            if zsc ~= noorder then
668                if not zsc then
669                    zsc = z_mappings[fs]
670                end
671                for i=1,#zsc do
672                    nz = nz + 1
673                    z_mapping[nz] = zsc[i]
674                end
675            end
676            local psc = p_mappings[sc]
677            if psc ~= noorder then
678                if not psc then
679                    psc = p_mappings[fs]
680                end
681                for i=1,#psc do
682                    np = np + 1
683                    p_mapping[np] = psc[i]
684                end
685            end
686        end
687    end
688    -- -- only those needed that are part of a sequence
689    --
690    -- local b = byte[1]
691    -- if b then
692    --     -- we set them to the first split code (korean)
693    --     local fs = fscodes[b] or b
694    --     if #m_mapping == 0 then
695    --         m_mapping = { m_mappings[fs][1] }
696    --     end
697    --     if #z_mapping == 0 then
698    --         z_mapping = { z_mappings[fs][1] }
699    --     end
700    --     if #p_mapping == 0 then
701    --         p_mapping = { p_mappings[fs][1] }
702    --     end
703    -- end
704    local result
705    if checked then
706        result = {
707            ch = trace_tests       and char      or nil, -- not in sequence
708            uc = usedinsequence.uc and byte      or nil,
709            mc = usedinsequence.mc and m_case    or nil,
710            zc = usedinsequence.zc and z_case    or nil,
711            pc = usedinsequence.pc and p_case    or nil,
712            mm = usedinsequence.mm and m_mapping or nil,
713            zm = usedinsequence.zm and z_mapping or nil,
714            pm = usedinsequence.pm and p_mapping or nil,
715        }
716    else
717        result = {
718            ch = char,
719            uc = byte,
720            mc = m_case,
721            zc = z_case,
722            pc = p_case,
723            mm = m_mapping,
724            zm = z_mapping,
725            pm = p_mapping,
726        }
727    end
728 -- local sq, n = { }, 0
729 -- for i=1,#byte do
730 --     for s=1,#sequence do
731 --         n = n + 1
732 --         sq[n] = result[sequence[s]][i]
733 --     end
734 -- end
735 -- result.sq = sq
736    return result
737end
738
739local function packch(entry)
740    local split = entry.split
741    if split and #split > 0 then -- useless test
742        local t = { }
743        for i=1,#split do
744            local tt = { }
745            local ch = split[i].ch
746            for j=1,#ch do
747                local chr = ch[j]
748                local byt = utfbyte(chr)
749                if byt > ignoredoffset then
750                    tt[j] = "[]"
751                elseif byt == 0 then
752                    tt[j] = " "
753                else
754                    tt[j] = chr
755                end
756            end
757            t[i] = concat(tt)
758        end
759        return concat(t," + ")
760    else
761        local t  = { }
762        local ch = (split and split.ch) or entry.ch or entry
763        if ch then
764            for i=1,#ch do
765                local chr = ch[i]
766                local byt = utfbyte(chr)
767                if byt > ignoredoffset then
768                    t[i] = "[]"
769                elseif byt == 0 then
770                    t[i] = " "
771                else
772                    t[i] = chr
773                end
774            end
775            return concat(t)
776        else
777            return ""
778        end
779    end
780end
781
782local function packuc(entry)
783    local split = entry.split
784    if split and #split > 0 then -- useless test
785        local t = { }
786        for i=1,#split do
787            t[i] = concat(split[i].uc, " ") -- sq
788        end
789        return concat(t," + ")
790    else
791        local uc = (split and split.uc) or entry.uc or entry
792        if uc then
793            return concat(uc," ") -- sq
794        else
795            return ""
796        end
797    end
798end
799
800sorters.packch = packch
801sorters.packuc = packuc
802
803function sorters.sort(entries,cmp)
804    if trace_methods then
805        local nofentries = #entries
806        report_sorters("entries: %s, language: %s, method: %s, digits: %s",nofentries,language,method,tostring(digits))
807        for i=1,nofentries do
808            report_sorters("entry %s",table.serialize(entries[i].split,i,true,true,true))
809        end
810    end
811    if trace_tests then
812        sort(entries,function(a,b)
813            local r = cmp(a,b)
814            local e = (not r and "?") or (r<0 and "<") or (r>0 and ">") or "="
815            report_sorters("%s %s %s | %s %s %s",packch(a),e,packch(b),packuc(a),e,packuc(b))
816            return r == -1
817        end)
818        local s
819        for i=1,#entries do
820            local entry = entries[i]
821            local letter, first = firstofsplit(entry)
822            if first == s then
823                first = "  "
824            else
825                s = first
826                if first and letter then
827                    report_sorters(">> %C (%C)",first,letter)
828                end
829            end
830            report_sorters("   %s | %s",packch(entry),packuc(entry))
831        end
832    else
833        sort(entries,function(a,b)
834            return cmp(a,b) == -1
835        end)
836    end
837end
838
839-- helper
840
841function sorters.replacementlist(list)
842    local replacements = { }
843    for i=1,#list do
844        replacements[i] = {
845            list[i],
846            utfchar(replacementoffset+i),
847        }
848    end
849    return replacements
850end
851