lang-hup.lmt /size: 8409 b    last modification: 2021-10-28 13:51
1if not modules then modules = { } end modules ['lang-hup'] = {
2    version   = 1.001,
3    comment   = "companion to lang-hup.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local type, next = type, next
10local utfchar = utf.char
11local concat, sortedhash = table.concat, table.sortedhash
12local basename = file.basename
13
14local status        = status
15local nodes         = nodes
16
17local is_letter     = characters.is_letter
18local is_hyphenator = characters.is_hyphenator
19
20local specialskips  = nodes.specialskipcodes
21local nodecodes     = nodes.nodecodes
22local disc_code     = nodecodes.disc
23local glyph_code    = nodecodes.glyph
24local glue_code     = nodecodes.glue
25local hlist_code    = nodecodes.hlist
26local kern_code     = nodecodes.kern
27local par_code      = nodecodes.par
28local line_code     = nodes.listcodes.line
29local fontkern_code = nodes.kerncodes.fontkern
30local nuts          = nodes.nuts
31local getlist       = nuts.getlist
32local getnext       = nuts.getnext
33local getprev       = nuts.getprev
34local getid         = nuts.getid
35local getsubtype    = nuts.getsubtype
36local getreplace    = nuts.getreplace
37local getdiscpart   = nuts.getdiscpart
38local isnextglyph   = nuts.isnextglyph
39local nexthlist     = nuts.traversers.list
40local nextglyph     = nuts.traversers.glyph
41local traverse      = nuts.traverse
42
43local setcolor      = nodes.tracers.colors.set
44local setaction     = nodes.tasks.setaction
45
46local hash          = table.setmetatableindex("number")
47
48local report        = logs.reporter("hyphenated")
49local trace_detail  = false
50
51local characters    = fonts.hashes.characters
52
53local word = { }
54local w    = 0
55
56----- function collect(word,head)
57local function collect(head)
58    local last = nil
59    while head do
60        local nxt, char, id = isnextglyph(head)
61        if char then
62            local u = characters[id][char].unicode -- we could cache it
63            if type(u) == "table" then
64                for i=1,#u do
65                    local c = u[i]
66                    if is_letter[c] or is_hyphenator[c] then
67                     -- word[#word+1] = utfchar(c)
68                        w = w + 1 ; word[w] = utfchar(c)
69                    end
70                end
71            else
72                local c = u or char
73                if is_letter[c] or is_hyphenator[c] then
74                 -- word[#word+1] = utfchar(c)
75                    w = w + 1 ; word[w] = utfchar(c)
76                end
77            end
78            last = head
79        elseif id == disc_code then
80         -- collect(word,getreplace(head))
81            collect(getreplace(head))
82        elseif id == kern_code and getsubtype(head,fontkern_code) then
83            -- we're ok
84        else
85            break
86        end
87        head = nxt
88    end
89    return last and getdiscpart(last) == 1
90end
91
92local function getpostpart(current)
93    for n, id, subtype in traverse(current) do
94        if id == glue_code then
95            if not specialskips[subtype] then
96                break
97            end
98        elseif id == glyph_code or id == disc_code then
99            return n
100        elseif id ~= par_code then
101            break
102        end
103    end
104end
105
106local function findprepart(current)
107    for n, id, subtype in traverse(current,true,true) do
108        if id == glue_code then
109            if not specialskips[subtype] then
110                break
111            end
112        elseif id == glyph_code and getdiscpart(n) == 1 then
113            return n
114        else
115            break
116        end
117    end
118end
119
120local function getprepart(disc)
121    local back = disc
122    for n, id, subtype in traverse(getprev(disc),true) do
123        if id == glyph_code or id == disc_code or (id == kern_code and subtype == fontkern_code) then
124            back = n
125        else
126            return back
127        end
128    end
129    return back
130end
131
132function nodes.handlers.showhyphenation(head)
133 -- local word = { }
134    w = 0
135    for current, id, subtype, list in nexthlist, head do
136        if list and subtype == line_code then
137         -- if #word > 0 then
138            if w > 0 then
139                local after = getpostpart(list)
140             -- local more  = collect(word,after)
141                local more  = collect(after)
142                if more then
143                    goto skip
144                else
145                 -- local result = concat(word)
146                    local result = concat(word,"",1,w)
147                    if trace_detail then
148                        local r = status.readstate
149                        report("around line %s in file %s: %s",r.linenumber or "-",basename(r.filename),result)
150                    end
151                    hash[result] = hash[result] + 1
152                 -- word = { }
153                    w = 0
154                end
155            end
156            local last = findprepart(list)
157            if last then
158                local before = getprepart(last)
159             -- collect(word,before)
160                collect(before)
161            end
162          ::skip::
163        end
164    end
165    return head
166end
167
168local initialize  initialize = function()
169    logs.registerfinalactions(function()
170        logs.startfilelogging(report,"hyphenated words")
171        if hash and next(hash) then
172            local oldname = file.nameonly(tex.jobname) .. "-hyphenated-words-old.txt"
173            local newname = file.nameonly(tex.jobname) .. "-hyphenated-words-new.txt"
174            local old = string.splitlines(string.strip(io.loaddata(oldname) or "")) or { }
175            local hsh = table.tohash(old)
176            local new = { }
177            for word, count in sortedhash(hash) do
178                report("%4i : %s",count,word)
179                if not hsh[word] then
180                    new[#new+1] = word
181                end
182            end
183            logs.stopfilelogging()
184            report("old word list : %a",oldname)
185            report("new word list : %a",newname)
186            report("to be checked : %a",#new)
187            io.savedata(newname,concat(new,"\n"))
188        else
189            report("nothing hyphenated")
190            logs.stopfilelogging()
191        end
192    end)
193    --
194    statistics.register("hyphenation",function()
195        local n = 0
196        local m = 0
197        for k, v in sortedhash(hash) do
198            n = n + 1
199            m = m + v
200        end
201        return string.format("%i hyphenated words, %i unique words",m,n)
202    end)
203    initialize = false
204end
205
206trackers.register("hyphenation.applied", function(v)
207    setaction("finalizers","nodes.handlers.showhyphenation",v)
208    if v and initialize then
209        initialize()
210    end
211end)
212
213trackers.register("hyphenation.applied.console", function(v)
214    setaction("finalizers","nodes.handlers.showhyphenation",v)
215    trace_detail = v
216    if v and initialize then
217        initialize()
218    end
219end)
220
221-- local c, f = isglyph(current)
222-- local char = chardata[f][c]
223-- if char and type(char.unicode) == "table" then -- hackery test
224
225local ligature_code = 0x8000 + nodes.glyphcodes.ligature
226local ligature_mode = false
227
228local color_n = { "red",     "green",     "blue"     }
229local color_l = { "darkred", "darkgreen", "darkblue" }
230
231function nodes.handlers.visualizehyphenation(head)
232    for current, id, subtype, list in nexthlist, head do
233        if list and subtype == line_code then
234            if ligature_mode then
235                for n in nextglyph, list do
236                    local d = getdiscpart(n)
237                    local s = getsubtype(n)
238                    if d > 0 and d < 4 then
239                        if s == ligature_code then
240                            setcolor(n,color_l[d])
241                        else
242                            setcolor(n,color_n[d])
243                        end
244                    elseif s == ligature_code then
245                        setcolor(n,"darkgray")
246                    end
247                end
248            else
249                for n in nextglyph, list do
250                    local d = getdiscpart(n)
251                    if d > 0 and d < 4 then
252                        setcolor(n,color_n[d])
253                    end
254                end
255            end
256        end
257    end
258    return head
259end
260
261trackers.register("hyphenation.applied.visualize", { true, false, "ligatures" }, function(v)
262    setaction("finalizers","nodes.handlers.visualizehyphenation",v)
263    ligature_mode = v == "ligatures"
264end)
265