typo-cap.lua /size: 19 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['typo-cap'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to typo-cap.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8    }
9
10local next, type, tonumber = next, type, tonumber
11local format, insert = string.format, table.insert
12local div, getrandom = math.div, utilities.randomizer.get
13
14local trace_casing = false  trackers  .register("typesetters.casing",            function(v) trace_casing = v end)
15local check_kerns  = true   directives.register("typesetters.casing.checkkerns", function(v) check_kerns  = v end)
16
17local report_casing = logs.reporter("typesetting","casing")
18
19local nodes, node = nodes, node
20
21local nuts            = nodes.nuts
22
23local getnext         = nuts.getnext
24local getprev         = nuts.getprev
25local getid           = nuts.getid
26----- getattr         = nuts.getattr
27local takeattr        = nuts.takeattr
28local getfont         = nuts.getfont
29local getsubtype      = nuts.getsubtype
30local getchar         = nuts.getchar
31local isglyph         = nuts.isglyph
32local getdisc         = nuts.getdisc
33
34local setattr         = nuts.setattr
35local setchar         = nuts.setchar
36local setfont         = nuts.setfont
37
38local copy_node       = nuts.copy
39local endofmath       = nuts.endofmath
40local insertafter     = nuts.insertafter
41local findattribute   = nuts.findattribute
42
43local nextglyph       = nuts.traversers.glyph
44
45local nodecodes       = nodes.nodecodes
46local kerncodes       = nodes.kerncodes
47
48local glyph_code      = nodecodes.glyph
49local kern_code       = nodecodes.kern
50local disc_code       = nodecodes.disc
51local math_code       = nodecodes.math
52
53local fontkern_code   = kerncodes.fontkern
54
55local enableaction    = nodes.tasks.enableaction
56
57local newkern         = nuts.pool.kern
58
59local fonthashes      = fonts.hashes
60local fontdata        = fonthashes.identifiers
61local fontchar        = fonthashes.characters
62
63local variables       = interfaces.variables
64local v_reset         = variables.reset
65
66local texsetattribute = tex.setattribute
67local unsetvalue      = attributes.unsetvalue
68
69typesetters           = typesetters or { }
70local typesetters     = typesetters
71
72typesetters.cases     = typesetters.cases or { }
73local cases           = typesetters.cases
74
75cases.actions         = { }
76local actions         = cases.actions
77local a_cases         = attributes.private("case")
78
79local extract         = bit32.extract
80local run             = 0 -- a trick to make neighbouring ranges work
81local blocked         = { }
82
83local function set(tag,font)
84    if run == 0x40 then -- 2^6
85        run = 1
86    else
87        run = run + 1
88    end
89    local a = font * 0x10000 + tag * 0x100 + run
90    blocked[a] = false
91    return a
92end
93
94local function get(a)
95    return
96        extract(a, 8, 8), -- tag
97        extract(a,16,12), -- font
98        extract(a, 0, 8)  -- run
99end
100
101-- a previous implementation used char(0) as placeholder for the larger font, so we needed
102-- to remove it before it can do further harm ... that was too tricky as we use char 0 for
103-- other cases too
104--
105-- we could do the whole glyph run here (till no more attributes match) but then we end up
106-- with more code .. maybe i will clean this up anyway as the lastfont hack is somewhat ugly
107-- ... on the other hand, we need to deal with cases like:
108--
109-- \WORD {far too \Word{many \WORD{more \word{pushed} in between} useless} words}
110
111local uccodes    = characters.uccodes
112local lccodes    = characters.lccodes
113local categories = characters.categories
114
115-- true false true == mixed
116
117local function replacer(start,codes)
118    local char, fnt = isglyph(start)
119    local dc = codes[char]
120    if dc then
121        local ifc = fontchar[fnt]
122        if type(dc) == "table" then
123            for i=1,#dc do
124                if not ifc[dc[i]] then
125                    return start, false
126                end
127            end
128            for i=#dc,1,-1 do
129                local chr = dc[i]
130                if i == 1 then
131                    setchar(start,chr)
132                else
133                    local g = copy_node(start)
134                    setchar(g,chr)
135                    insertafter(start,start,g)
136                end
137            end
138        elseif ifc[dc] then
139            setchar(start,dc)
140        end
141    end
142    return start
143end
144
145local registered, n = { }, 0
146
147local function register(name,f)
148    if type(f) == "function" then
149        n = n + 1
150        actions[n] = f
151        registered[name] = n
152        return n
153    else
154        local n = registered[f]
155        registered[name] = n
156        return n
157    end
158end
159
160cases.register = register
161
162local function WORD(start,attr,lastfont,n,count,where,first)
163    lastfont[n] = false
164    return replacer(first or start,uccodes)
165end
166
167local function word(start,attr,lastfont,n,count,where,first)
168    lastfont[n] = false
169    return replacer(first or start,lccodes)
170end
171
172local function Words(start,attr,lastfont,n,count,where,first) -- looks quite complex
173    if where == "post" then
174        return
175    end
176    if count == 1 and where ~= "post" then
177        replacer(first or start,uccodes)
178        return start, true
179    else
180        return start, true
181    end
182end
183
184local function Word(start,attr,lastfont,n,count,where,first)
185    blocked[attr] = true
186    return Words(start,attr,lastfont,n,count,where,first)
187end
188
189local function camel(start,attr,lastfont,n,count,where,first)
190    word(start,attr,lastfont,n,count,where,first)
191    Words(start,attr,lastfont,n,count,where,first)
192    return start, true
193end
194
195-- local function mixed(start,attr,lastfont,n,count,where,first)
196--     if where == "post" then
197--         return
198--     end
199--     local used = first or start
200--     local char = getchar(first)
201--     local dc   = uccodes[char]
202--     if not dc then
203--         -- quit
204--     elseif dc == char then
205--         local lfa = lastfont[n]
206--         if lfa then
207--             setfont(first,lfa)
208--         end
209--     else
210--         replacer(first or start,uccodes)
211--     end
212--     return start, true
213-- end
214
215local function mixed(start,attr,lastfont,n,count,where,first)
216    if where == "post" then
217        return
218    end
219    local used = first or start
220    local char = getchar(used)
221    local dc   = uccodes[char]
222    if not dc then
223        -- quit
224    elseif dc == char then
225        local lfa = lastfont[n]
226        if lfa then
227            setfont(used,lfa)
228        end
229    elseif check_kerns then
230        local p = getprev(used)
231        if p and getid(p) == glyph_code then
232            local c = lccodes[char]
233            local c = type(c) == "table" and c[1] or c
234            replacer(used,uccodes)
235            local fp = getfont(p)
236            local fc = getfont(used)
237            if fp ~= fc then
238                local k = fonts.getkern(fontdata[fp],getchar(p),c)
239                if k ~= 0 then
240                    insertafter(p,p,newkern(k))
241                end
242            end
243        else
244            replacer(used,uccodes)
245        end
246    else
247        replacer(used,uccodes)
248    end
249    return start, true
250end
251
252local function Capital(start,attr,lastfont,n,count,where,first,once) -- 3
253    local used = first or start
254    if count == 1 and where ~= "post" then
255        local lfa = lastfont[n]
256        if lfa then
257            local dc = uccodes[getchar(used)]
258            if dc then
259                setfont(used,lfa)
260            end
261        end
262    end
263    local s, c = replacer(first or start,uccodes)
264    if once then
265        lastfont[n] = false -- here
266    end
267    return start, c
268end
269
270local function capital(start,attr,lastfont,n,where,count,first,count) -- 4
271    return Capital(start,attr,lastfont,n,where,count,first,true)
272end
273
274local function none(start,attr,lastfont,n,count,where,first)
275    return start, true
276end
277
278local function randomized(start,attr,lastfont,n,count,where,first)
279    local used  = first or start
280    local char  = getchar(used)
281    local font  = getfont(used)
282    local tfm   = fontchar[font]
283    lastfont[n] = false
284    local kind  = categories[char]
285    if kind == "lu" then
286        while true do
287            local n = getrandom("capital lu",0x41,0x5A)
288            if tfm[n] then -- this also intercepts tables
289                setchar(used,n)
290                return start
291            end
292        end
293    elseif kind == "ll" then
294        while true do
295            local n = getrandom("capital ll",0x61,0x7A)
296            if tfm[n] then -- this also intercepts tables
297                setchar(used,n)
298                return start
299            end
300        end
301    end
302    return start
303end
304
305register(variables.WORD,   WORD)              --   1
306register(variables.word,   word)              --   2
307register(variables.Word,   Word)              --   3
308register(variables.Words,  Words)             --   4
309register(variables.capital,capital)           --   5
310register(variables.Capital,Capital)           --   6
311register(variables.none,   none)              --   7 (dummy)
312register(variables.random, randomized)        --   8
313register(variables.mixed,  mixed)             --   9
314register(variables.camel,  camel)             --  10
315
316register(variables.cap,    variables.capital) -- clone
317register(variables.Cap,    variables.Capital) -- clone
318
319-- This can be more clever: when we unset we can actually use the same attr ref if
320-- needed. Using properties to block further usage is not faster.
321
322function cases.handler(head) -- not real fast but also not used on much data
323    local start    = head
324    local lastfont = { }
325    local lastattr = nil
326    local count    = 0
327    local previd   = nil
328    local prev     = nil
329    while start do -- while because start can jump ahead
330        local id = getid(start)
331        if id == glyph_code then
332         -- local attr = getattr(start,a_cases)
333            local attr = takeattr(start,a_cases)
334            if attr and attr > 0 and not blocked[attr] then
335                if attr ~= lastattr then
336                    lastattr = attr
337                    count    = 1
338                else
339                    count    = count + 1
340                end
341             -- setattr(start,a_cases,unsetvalue) -- not needed
342                local n, id, m = get(attr)
343                if lastfont[n] == nil then
344                    lastfont[n] = id
345                end
346                local action = actions[n] -- map back to low number
347                if action then
348                    local quit
349                    start, quit = action(start,attr,lastfont,n,count)
350                    if trace_casing then
351                        report_casing("case trigger %a, instance %a, fontid %a, result %a",n,m,id,quit and "-" or "+")
352                    end
353                elseif trace_casing then
354                    report_casing("unknown case trigger %a",n)
355                end
356            end
357        elseif id == disc_code then
358         -- local attr = getattr(start,a_cases)
359            local attr = takeattr(start,a_cases)
360            if attr and attr > 0 and not blocked[attr] then
361                if attr ~= lastattr then
362                    lastattr = attr
363                    count    = 0
364                end
365             -- setattr(start,a_cases,unsetvalue) -- not needed
366                local n, id, m = get(attr)
367                if lastfont[n] == nil then
368                    lastfont[n] = id
369                end
370                local action = actions[n] -- map back to low number
371                if action then
372                    local pre, post, replace = getdisc(start)
373                    if replace then
374                        local cnt = count
375                        for g in nextglyph, replace do
376                            cnt = cnt + 1
377                            takeattr(g,a_cases)
378                         -- setattr(g,a_cases,unsetvalue)
379                            local h, quit = action(start,attr,lastfont,n,cnt,"replace",g)
380                            if quit then
381                                break
382                            end
383                        end
384                    end
385                    if pre then
386                        local cnt = count
387                        for g in nextglyph, pre do
388                            cnt = cnt + 1
389                            takeattr(g,a_cases)
390                         -- setattr(g,a_cases,unsetvalue)
391                            local h, quit = action(start,attr,lastfont,n,cnt,"pre",g)
392                            if quit then
393                                break
394                            end
395                        end
396                    end
397                    if post then
398                        local cnt = count
399                        for g in nextglyph, post do
400                            cnt = cnt + 1
401                            takeattr(g,a_cases)
402                         -- setattr(g,a_cases,unsetvalue)
403                            local h, quit = action(start,attr,lastfont,n,cnt,"post",g)
404                            if quit then
405                                break
406                            end
407                        end
408                    end
409                end
410                count = count + 1
411            end
412        elseif id == math_code then
413            start = endofmath(start)
414            count = 0
415        elseif prev_id == kern_code and getsubtype(prev) == fontkern_code then
416            -- still inside a word ...normally kerns are added later
417        else
418            count = 0
419        end
420        if start then
421            prev   = start
422            previd = id
423            start  = getnext(start)
424        end
425    end
426    return head
427end
428
429-- function cases.handler(head) -- not real fast but also not used on much data
430--     local attr, start = findattribute(head,a_cases)
431--     if not start then
432--         return head, false
433--     end
434--     local lastfont = { }
435--     local lastattr = nil
436--     local count    = 0
437--     local previd   = nil
438--     local prev     = nil
439--     while start do
440--         while start do -- while because start can jump ahead
441--             local id = getid(start)
442--             if id == glyph_code then
443--              -- local attr = getattr(start,a_cases)
444--                 local attr = takeattr(start,a_cases)
445--                 if attr and attr > 0 and not blocked[attr] then
446--                     if attr ~= lastattr then
447--                         lastattr = attr
448--                         count    = 1
449--                     else
450--                         count    = count + 1
451--                     end
452--                  -- setattr(start,a_cases,unsetvalue) -- not needed
453--                     local n, id, m = get(attr)
454--                     if lastfont[n] == nil then
455--                         lastfont[n] = id
456--                     end
457--                     local action = actions[n] -- map back to low number
458--                     if action then
459--                         start = action(start,attr,lastfont,n,count)
460--                         if trace_casing then
461--                             report_casing("case trigger %a, instance %a, fontid %a, result %a",n,m,id,ok)
462--                         end
463--                     elseif trace_casing then
464--                         report_casing("unknown case trigger %a",n)
465--                     end
466--                 end
467--             elseif id == disc_code then
468--              -- local attr = getattr(start,a_cases)
469--                 local attr = takeattr(start,a_cases)
470--                 if attr and attr > 0 and not blocked[attr] then
471--                     if attr ~= lastattr then
472--                         lastattr = attr
473--                         count    = 0
474--                     end
475--                  -- setattr(start,a_cases,unsetvalue) -- not needed
476--                     local n, id, m = get(attr)
477--                     if lastfont[n] == nil then
478--                         lastfont[n] = id
479--                     end
480--                     local action = actions[n] -- map back to low number
481--                     if action then
482--                         local pre, post, replace = getdisc(start)
483--                         if replace then
484--                             local cnt = count
485--                             for g in glyph_code, replace do
486--                                 cnt = cnt + 1
487--                                 takeattr(g,a_cases)
488--                              -- setattr(g,a_cases,unsetvalue)
489--                                 local h, quit = action(start,attr,lastfont,n,cnt,"replace",g)
490--                                 if quit then
491--                                      break
492--                                 end
493--                             end
494--                         end
495--                         if pre then
496--                             local cnt = count
497--                             for g in nextglyph, pre do
498--                                 cnt = cnt + 1
499--                                 takeattr(g,a_cases)
500--                              -- setattr(g,a_cases,unsetvalue)
501--                                 local h, quit = action(start,attr,lastfont,n,cnt,"pre",g)
502--                                 if quit then
503--                                      break
504--                                 end
505--                             end
506--                         end
507--                         if post then
508--                             local cnt = count
509--                             for g in nextglyph, post do
510--                                 cnt = cnt + 1
511--                                 takeattr(g,a_cases)
512--                              -- setattr(g,a_cases,unsetvalue)
513--                                 local h, quit = action(start,attr,lastfont,n,cnt,"post",g)
514--                                 if quit then
515--                                      break
516--                                 end
517--                             end
518--                         end
519--                     end
520--                     count = count + 1
521--                 end
522--             elseif id == math_code then
523--                 start = endofmath(start)
524--                 count = 0
525--             elseif prev_id == kern_code and getsubtype(prev) == fontkern_code then
526--                 -- still inside a word ...normally kerns are added later
527--             else
528--                 count = 0
529--                 start = getnext(start)
530--                 break
531--             end
532--             if start then
533--                 prev   = start
534--                 previd = id
535--                 start  = getnext(start)
536--             end
537--         end
538--         if start then
539--             attr, start = findattribute(start,a_cases)
540--         end
541--     end
542--     return head
543-- end
544
545-- function cases.handler(head) -- let's assume head doesn't change ... no reason
546--     local lastfont = { }
547--     for first, last, size, attr in nuts.words(head,a_cases) do
548--         local n, id, m = get(attr)
549--         if lastfont[n] == nil then
550--             lastfont[n] = id
551--         end
552--         local action = actions[n]
553--         if action then
554--             action(first,attr,lastfont,n)
555--         end
556--     end
557--     return head
558-- end
559
560local enabled = false
561
562function cases.set(n,id)
563    if n == v_reset then
564        n = unsetvalue
565    else
566        n = registered[n] or tonumber(n)
567        if n then
568            if not enabled then
569                enableaction("processors","typesetters.cases.handler")
570                if trace_casing then
571                    report_casing("enabling case handler")
572                end
573                enabled = true
574            end
575            n = set(n,id)
576        else
577            n = unsetvalue
578        end
579    end
580    texsetattribute(a_cases,n)
581 -- return n -- bonus
582end
583
584-- interface
585
586interfaces.implement {
587    name      = "setcharactercasing",
588    actions   = cases.set,
589    arguments = { "string", "integer" }
590}
591