lang-hup.lmt /size: 8501 b    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['lang-hup'] = {
2    version   = 1.001,
3    comment   = "companion to lang-hup.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local type, next = type, next
10local utfchar = utf.char
11local concat, sortedhash = table.concat, table.sortedhash
12local basename = file.basename
13
14local status        = status
15local nodes         = nodes
16
17local is_letter     = characters.is_letter
18local is_hyphenator = characters.is_hyphenator
19
20local specialskips  = nodes.specialskipcodes
21
22local nodecodes     = nodes.nodecodes
23
24local disc_code     <const> = nodecodes.disc
25local glyph_code    <const> = nodecodes.glyph
26local glue_code     <const> = nodecodes.glue
27local hlist_code    <const> = nodecodes.hlist
28local kern_code     <const> = nodecodes.kern
29local par_code      <const> = nodecodes.par
30
31local line_code     <const> = nodes.listcodes.line
32local fontkern_code <const> = nodes.kerncodes.fontkern
33
34local nuts          = nodes.nuts
35local getlist       = nuts.getlist
36local getnext       = nuts.getnext
37local getprev       = nuts.getprev
38local getid         = nuts.getid
39local getsubtype    = nuts.getsubtype
40local getreplace    = nuts.getreplace
41local getdiscpart   = nuts.getdiscpart
42local isnextglyph   = nuts.isnextglyph
43local nexthlist     = nuts.traversers.list
44local nextglyph     = nuts.traversers.glyph
45local traverse      = nuts.traverse
46
47local setcolor      = nodes.tracers.colors.set
48local setaction     = nodes.tasks.setaction
49
50local hash          = table.setmetatableindex("number")
51
52local report        = logs.reporter("hyphenated")
53local trace_detail  = false
54
55local characters    = fonts.hashes.characters
56
57local word = { }
58local w    = 0
59
60----- function collect(word,head)
61local function collect(head)
62    local last = nil
63    while head do
64        local nxt, char, id = isnextglyph(head)
65        if char then
66            local u = characters[id][char].unicode -- we could cache it
67            if type(u) == "table" then
68                for i=1,#u do
69                    local c = u[i]
70                    if is_letter[c] or is_hyphenator[c] then
71                     -- word[#word+1] = utfchar(c)
72                        w = w + 1 ; word[w] = utfchar(c)
73                    end
74                end
75            else
76                local c = u or char
77                if is_letter[c] or is_hyphenator[c] then
78                 -- word[#word+1] = utfchar(c)
79                    w = w + 1 ; word[w] = utfchar(c)
80                end
81            end
82            last = head
83        elseif id == disc_code then
84         -- collect(word,getreplace(head))
85            collect(getreplace(head))
86        elseif id == kern_code and getsubtype(head,fontkern_code) then
87            -- we're ok
88        else
89            break
90        end
91        head = nxt
92    end
93    return last and getdiscpart(last) == 1
94end
95
96local function getpostpart(current)
97    for n, id, subtype in traverse(current) do
98        if id == glue_code then
99            if not specialskips[subtype] then
100                break
101            end
102        elseif id == glyph_code or id == disc_code then
103            return n
104        elseif id ~= par_code then
105            break
106        end
107    end
108end
109
110local function findprepart(current)
111    for n, id, subtype in traverse(current,true,true) do
112        if id == glue_code then
113            if not specialskips[subtype] then
114                break
115            end
116        elseif id == glyph_code and getdiscpart(n) == 1 then
117            return n
118        else
119            break
120        end
121    end
122end
123
124local function getprepart(disc)
125    local back = disc
126    for n, id, subtype in traverse(getprev(disc),true) do
127        if id == glyph_code or id == disc_code or (id == kern_code and subtype == fontkern_code) then
128            back = n
129        else
130            return back
131        end
132    end
133    return back
134end
135
136function nodes.handlers.showhyphenation(head)
137 -- local word = { }
138    w = 0
139    for current, id, subtype, list in nexthlist, head do
140        if list and subtype == line_code then
141         -- if #word > 0 then
142            if w > 0 then
143                local after = getpostpart(list)
144             -- local more  = collect(word,after)
145                local more  = collect(after)
146                if more then
147                    goto skip
148                else
149                 -- local result = concat(word)
150                    local result = concat(word,"",1,w)
151                    if trace_detail then
152                        local r = status.readstate
153                        report("around line %s in file %s: %s",r.linenumber or "-",basename(r.filename),result)
154                    end
155                    hash[result] = hash[result] + 1
156                 -- word = { }
157                    w = 0
158                end
159            end
160            local last = findprepart(list)
161            if last then
162                local before = getprepart(last)
163             -- collect(word,before)
164                collect(before)
165            end
166          ::skip::
167        end
168    end
169    return head
170end
171
172local initialize  initialize = function()
173    logs.registerfinalactions(function()
174        logs.startfilelogging(report,"hyphenated words")
175        if hash and next(hash) then
176            local oldname = file.nameonly(tex.jobname) .. "-hyphenated-words-old.txt"
177            local newname = file.nameonly(tex.jobname) .. "-hyphenated-words-new.txt"
178            local old = string.splitlines(string.strip(io.loaddata(oldname) or "")) or { }
179            local hsh = table.tohash(old)
180            local new = { }
181            for word, count in sortedhash(hash) do
182                report("%4i : %s",count,word)
183                if not hsh[word] then
184                    new[#new+1] = word
185                end
186            end
187            logs.stopfilelogging()
188            report("old word list : %a",oldname)
189            report("new word list : %a",newname)
190            report("to be checked : %a",#new)
191            io.savedata(newname,concat(new,"\n"))
192        else
193            report("nothing hyphenated")
194            logs.stopfilelogging()
195        end
196    end)
197    --
198    statistics.register("hyphenation",function()
199        local n = 0
200        local m = 0
201        for k, v in sortedhash(hash) do
202            n = n + 1
203            m = m + v
204        end
205        return string.format("%i hyphenated words, %i unique words",m,n)
206    end)
207    initialize = false
208end
209
210trackers.register("hyphenation.applied", function(v)
211    setaction("finalizers","nodes.handlers.showhyphenation",v)
212    if v and initialize then
213        initialize()
214    end
215end)
216
217trackers.register("hyphenation.applied.console", function(v)
218    setaction("finalizers","nodes.handlers.showhyphenation",v)
219    trace_detail = v
220    if v and initialize then
221        initialize()
222    end
223end)
224
225-- local c, f = isglyph(current)
226-- local char = chardata[f][c]
227-- if char and type(char.unicode) == "table" then -- hackery test
228
229local ligature_code <const> = 0x8000 + nodes.glyphcodes.ligature -- still valid?
230local ligature_mode = false
231
232local color_n = { "red",     "green",     "blue"     }
233local color_l = { "darkred", "darkgreen", "darkblue" }
234
235function nodes.handlers.visualizehyphenation(head)
236    for current, id, subtype, list in nexthlist, head do
237        if list and subtype == line_code then
238            if ligature_mode then
239                for n in nextglyph, list do
240                    local d = getdiscpart(n)
241                    local s = getsubtype(n)
242                    if d > 0 and d < 4 then
243                        if s == ligature_code then
244                            setcolor(n,color_l[d])
245                        else
246                            setcolor(n,color_n[d])
247                        end
248                    elseif s == ligature_code then
249                        setcolor(n,"darkgray")
250                    end
251                end
252            else
253                for n in nextglyph, list do
254                    local d = getdiscpart(n)
255                    if d > 0 and d < 4 then
256                        setcolor(n,color_n[d])
257                    end
258                end
259            end
260        end
261    end
262    return head
263end
264
265trackers.register("hyphenation.applied.visualize", { true, false, "ligatures" }, function(v)
266    setaction("finalizers","nodes.handlers.visualizehyphenation",v)
267    ligature_mode = v == "ligatures"
268end)
269