1if not modules then modules = { } end modules ['lang-hup'] = {
2 version = 1.001,
3 comment = "companion to lang-hup.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local type, next = type, next
10local utfchar = utf.char
11local concat, sortedhash = table.concat, table.sortedhash
12local basename = file.basename
13
14local status = status
15local nodes = nodes
16
17local is_letter = characters.is_letter
18local is_hyphenator = characters.is_hyphenator
19
20local specialskips = nodes.specialskipcodes
21
22local nodecodes = nodes.nodecodes
23
24local disc_code <const> = nodecodes.disc
25local glyph_code <const> = nodecodes.glyph
26local glue_code <const> = nodecodes.glue
27local hlist_code <const> = nodecodes.hlist
28local kern_code <const> = nodecodes.kern
29local par_code <const> = nodecodes.par
30
31local line_code <const> = nodes.listcodes.line
32local fontkern_code <const> = nodes.kerncodes.fontkern
33
34local nuts = nodes.nuts
35local getlist = nuts.getlist
36local getnext = nuts.getnext
37local getprev = nuts.getprev
38local getid = nuts.getid
39local getsubtype = nuts.getsubtype
40local getreplace = nuts.getreplace
41local getdiscpart = nuts.getdiscpart
42local isnextglyph = nuts.isnextglyph
43local nexthlist = nuts.traversers.list
44local nextglyph = nuts.traversers.glyph
45local traverse = nuts.traverse
46
47local setcolor = nodes.tracers.colors.set
48local setaction = nodes.tasks.setaction
49
50local hash = table.setmetatableindex("number")
51
52local report = logs.reporter("hyphenated")
53local trace_detail = false
54
55local characters = fonts.hashes.characters
56
57local word = { }
58local w = 0
59
60
61local function collect(head)
62 local last = nil
63 while head do
64 local nxt, char, id = isnextglyph(head)
65 if char then
66 local u = characters[id][char].unicode
67 if type(u) == "table" then
68 for i=1,#u do
69 local c = u[i]
70 if is_letter[c] or is_hyphenator[c] then
71
72 w = w + 1 ; word[w] = utfchar(c)
73 end
74 end
75 else
76 local c = u or char
77 if is_letter[c] or is_hyphenator[c] then
78
79 w = w + 1 ; word[w] = utfchar(c)
80 end
81 end
82 last = head
83 elseif id == disc_code then
84
85 collect(getreplace(head))
86 elseif id == kern_code and getsubtype(head,fontkern_code) then
87
88 else
89 break
90 end
91 head = nxt
92 end
93 return last and getdiscpart(last) == 1
94end
95
96local function getpostpart(current)
97 for n, id, subtype in traverse(current) do
98 if id == glue_code then
99 if not specialskips[subtype] then
100 break
101 end
102 elseif id == glyph_code or id == disc_code then
103 return n
104 elseif id ~= par_code then
105 break
106 end
107 end
108end
109
110local function findprepart(current)
111 for n, id, subtype in traverse(current,true,true) do
112 if id == glue_code then
113 if not specialskips[subtype] then
114 break
115 end
116 elseif id == glyph_code and getdiscpart(n) == 1 then
117 return n
118 else
119 break
120 end
121 end
122end
123
124local function getprepart(disc)
125 local back = disc
126 for n, id, subtype in traverse(getprev(disc),true) do
127 if id == glyph_code or id == disc_code or (id == kern_code and subtype == fontkern_code) then
128 back = n
129 else
130 return back
131 end
132 end
133 return back
134end
135
136function nodes.handlers.showhyphenation(head)
137
138 w = 0
139 for current, id, subtype, list in nexthlist, head do
140 if list and subtype == line_code then
141
142 if w > 0 then
143 local after = getpostpart(list)
144
145 local more = collect(after)
146 if more then
147 goto skip
148 else
149
150 local result = concat(word,"",1,w)
151 if trace_detail then
152 local r = status.readstate
153 report("around line %s in file %s: %s",r.linenumber or "-",basename(r.filename),result)
154 end
155 hash[result] = hash[result] + 1
156
157 w = 0
158 end
159 end
160 local last = findprepart(list)
161 if last then
162 local before = getprepart(last)
163
164 collect(before)
165 end
166 ::skip::
167 end
168 end
169 return head
170end
171
172local initialize initialize = function()
173 logs.registerfinalactions(function()
174 logs.startfilelogging(report,"hyphenated words")
175 if hash and next(hash) then
176 local oldname = file.nameonly(tex.jobname) .. "-hyphenated-words-old.txt"
177 local newname = file.nameonly(tex.jobname) .. "-hyphenated-words-new.txt"
178 local old = string.splitlines(string.strip(io.loaddata(oldname) or "")) or { }
179 local hsh = table.tohash(old)
180 local new = { }
181 for word, count in sortedhash(hash) do
182 report("%4i : %s",count,word)
183 if not hsh[word] then
184 new[#new+1] = word
185 end
186 end
187 logs.stopfilelogging()
188 report("old word list : %a",oldname)
189 report("new word list : %a",newname)
190 report("to be checked : %a",#new)
191 io.savedata(newname,concat(new,"\n"))
192 else
193 report("nothing hyphenated")
194 logs.stopfilelogging()
195 end
196 end)
197
198 statistics.register("hyphenation",function()
199 local n = 0
200 local m = 0
201 for k, v in sortedhash(hash) do
202 n = n + 1
203 m = m + v
204 end
205 return string.format("%i hyphenated words, %i unique words",m,n)
206 end)
207 initialize = false
208end
209
210trackers.register("hyphenation.applied", function(v)
211 setaction("finalizers","nodes.handlers.showhyphenation",v)
212 if v and initialize then
213 initialize()
214 end
215end)
216
217trackers.register("hyphenation.applied.console", function(v)
218 setaction("finalizers","nodes.handlers.showhyphenation",v)
219 trace_detail = v
220 if v and initialize then
221 initialize()
222 end
223end)
224
225
226
227
228
229local ligature_code <const> = 0x8000 + nodes.glyphcodes.ligature
230local ligature_mode = false
231
232local color_n = { "red", "green", "blue" }
233local color_l = { "darkred", "darkgreen", "darkblue" }
234
235function nodes.handlers.visualizehyphenation(head)
236 for current, id, subtype, list in nexthlist, head do
237 if list and subtype == line_code then
238 if ligature_mode then
239 for n in nextglyph, list do
240 local d = getdiscpart(n)
241 local s = getsubtype(n)
242 if d > 0 and d < 4 then
243 if s == ligature_code then
244 setcolor(n,color_l[d])
245 else
246 setcolor(n,color_n[d])
247 end
248 elseif s == ligature_code then
249 setcolor(n,"darkgray")
250 end
251 end
252 else
253 for n in nextglyph, list do
254 local d = getdiscpart(n)
255 if d > 0 and d < 4 then
256 setcolor(n,color_n[d])
257 end
258 end
259 end
260 end
261 end
262 return head
263end
264
265trackers.register("hyphenation.applied.visualize", { true, false, "ligatures" }, function(v)
266 setaction("finalizers","nodes.handlers.visualizehyphenation",v)
267 ligature_mode = v == "ligatures"
268end)
269 |