1if not modules then modules = { } end modules ['lang-hup'] = {
2 version = 1.001,
3 comment = "companion to lang-hup.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local type, next = type, next
10local utfchar = utf.char
11local concat, sortedhash = table.concat, table.sortedhash
12local basename = file.basename
13
14local status = status
15local nodes = nodes
16
17local is_letter = characters.is_letter
18local is_hyphenator = characters.is_hyphenator
19
20local specialskips = nodes.specialskipcodes
21local nodecodes = nodes.nodecodes
22local disc_code = nodecodes.disc
23local glyph_code = nodecodes.glyph
24local glue_code = nodecodes.glue
25local hlist_code = nodecodes.hlist
26local kern_code = nodecodes.kern
27local par_code = nodecodes.par
28local line_code = nodes.listcodes.line
29local fontkern_code = nodes.kerncodes.fontkern
30local nuts = nodes.nuts
31local getlist = nuts.getlist
32local getnext = nuts.getnext
33local getprev = nuts.getprev
34local getid = nuts.getid
35local getsubtype = nuts.getsubtype
36local getreplace = nuts.getreplace
37local getdiscpart = nuts.getdiscpart
38local isnextglyph = nuts.isnextglyph
39local nexthlist = nuts.traversers.list
40local nextglyph = nuts.traversers.glyph
41local traverse = nuts.traverse
42
43local setcolor = nodes.tracers.colors.set
44local setaction = nodes.tasks.setaction
45
46local hash = table.setmetatableindex("number")
47
48local report = logs.reporter("hyphenated")
49local trace_detail = false
50
51local characters = fonts.hashes.characters
52
53local word = { }
54local w = 0
55
56
57local function collect(head)
58 local last = nil
59 while head do
60 local nxt, char, id = isnextglyph(head)
61 if char then
62 local u = characters[id][char].unicode
63 if type(u) == "table" then
64 for i=1,#u do
65 local c = u[i]
66 if is_letter[c] or is_hyphenator[c] then
67
68 w = w + 1 ; word[w] = utfchar(c)
69 end
70 end
71 else
72 local c = u or char
73 if is_letter[c] or is_hyphenator[c] then
74
75 w = w + 1 ; word[w] = utfchar(c)
76 end
77 end
78 last = head
79 elseif id == disc_code then
80
81 collect(getreplace(head))
82 elseif id == kern_code and getsubtype(head,fontkern_code) then
83
84 else
85 break
86 end
87 head = nxt
88 end
89 return last and getdiscpart(last) == 1
90end
91
92local function getpostpart(current)
93 for n, id, subtype in traverse(current) do
94 if id == glue_code then
95 if not specialskips[subtype] then
96 break
97 end
98 elseif id == glyph_code or id == disc_code then
99 return n
100 elseif id ~= par_code then
101 break
102 end
103 end
104end
105
106local function findprepart(current)
107 for n, id, subtype in traverse(current,true,true) do
108 if id == glue_code then
109 if not specialskips[subtype] then
110 break
111 end
112 elseif id == glyph_code and getdiscpart(n) == 1 then
113 return n
114 else
115 break
116 end
117 end
118end
119
120local function getprepart(disc)
121 local back = disc
122 for n, id, subtype in traverse(getprev(disc),true) do
123 if id == glyph_code or id == disc_code or (id == kern_code and subtype == fontkern_code) then
124 back = n
125 else
126 return back
127 end
128 end
129 return back
130end
131
132function nodes.handlers.showhyphenation(head)
133
134 w = 0
135 for current, id, subtype, list in nexthlist, head do
136 if list and subtype == line_code then
137
138 if w > 0 then
139 local after = getpostpart(list)
140
141 local more = collect(after)
142 if more then
143 goto skip
144 else
145
146 local result = concat(word,"",1,w)
147 if trace_detail then
148 local r = status.readstate
149 report("around line %s in file %s: %s",r.linenumber or "-",basename(r.filename),result)
150 end
151 hash[result] = hash[result] + 1
152
153 w = 0
154 end
155 end
156 local last = findprepart(list)
157 if last then
158 local before = getprepart(last)
159
160 collect(before)
161 end
162 ::skip::
163 end
164 end
165 return head
166end
167
168local initialize initialize = function()
169 logs.registerfinalactions(function()
170 logs.startfilelogging(report,"hyphenated words")
171 if hash and next(hash) then
172 local oldname = file.nameonly(tex.jobname) .. "-hyphenated-words-old.txt"
173 local newname = file.nameonly(tex.jobname) .. "-hyphenated-words-new.txt"
174 local old = string.splitlines(string.strip(io.loaddata(oldname) or "")) or { }
175 local hsh = table.tohash(old)
176 local new = { }
177 for word, count in sortedhash(hash) do
178 report("%4i : %s",count,word)
179 if not hsh[word] then
180 new[#new+1] = word
181 end
182 end
183 logs.stopfilelogging()
184 report("old word list : %a",oldname)
185 report("new word list : %a",newname)
186 report("to be checked : %a",#new)
187 io.savedata(newname,concat(new,"\n"))
188 else
189 report("nothing hyphenated")
190 logs.stopfilelogging()
191 end
192 end)
193
194 statistics.register("hyphenation",function()
195 local n = 0
196 local m = 0
197 for k, v in sortedhash(hash) do
198 n = n + 1
199 m = m + v
200 end
201 return string.format("%i hyphenated words, %i unique words",m,n)
202 end)
203 initialize = false
204end
205
206trackers.register("hyphenation.applied", function(v)
207 setaction("finalizers","nodes.handlers.showhyphenation",v)
208 if v and initialize then
209 initialize()
210 end
211end)
212
213trackers.register("hyphenation.applied.console", function(v)
214 setaction("finalizers","nodes.handlers.showhyphenation",v)
215 trace_detail = v
216 if v and initialize then
217 initialize()
218 end
219end)
220
221
222
223
224
225local ligature_code = 0x8000 + nodes.glyphcodes.ligature
226local ligature_mode = false
227
228local color_n = { "red", "green", "blue" }
229local color_l = { "darkred", "darkgreen", "darkblue" }
230
231function nodes.handlers.visualizehyphenation(head)
232 for current, id, subtype, list in nexthlist, head do
233 if list and subtype == line_code then
234 if ligature_mode then
235 for n in nextglyph, list do
236 local d = getdiscpart(n)
237 local s = getsubtype(n)
238 if d > 0 and d < 4 then
239 if s == ligature_code then
240 setcolor(n,color_l[d])
241 else
242 setcolor(n,color_n[d])
243 end
244 elseif s == ligature_code then
245 setcolor(n,"darkgray")
246 end
247 end
248 else
249 for n in nextglyph, list do
250 local d = getdiscpart(n)
251 if d > 0 and d < 4 then
252 setcolor(n,color_n[d])
253 end
254 end
255 end
256 end
257 end
258 return head
259end
260
261trackers.register("hyphenation.applied.visualize", { true, false, "ligatures" }, function(v)
262 setaction("finalizers","nodes.handlers.visualizehyphenation",v)
263 ligature_mode = v == "ligatures"
264end)
265 |