lang-url.lmt /size: 7337 b    last modification: 2021-10-28 13:51
1if not modules then modules = { } end modules ['lang-url'] = {
2    version   = 1.001,
3    comment   = "companion to lang-url.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local next = next
10local utfcharacters, utfbyte, utfchar = utf.characters, utf.byte, utf.char
11local min, max = math.min, math.max
12local setmetatableindex = table.setmetatableindex
13
14local context          = context
15local ctx_pushcatcodes = context.pushcatcodes
16local ctx_popcatcodes  = context.popcatcodes
17
18local implement = interfaces.implement
19local variables = interfaces.variables
20
21local v_before  = variables.before
22local v_after   = variables.after
23
24local is_letter = characters.is_letter
25
26--[[
27<p>Hyphenating <l n='url'/>'s is somewhat tricky and a matter of taste. I did
28consider using a dedicated hyphenation pattern or dealing with it by node
29parsing, but the following solution suits as well. After all, we're mostly
30dealing with <l n='ascii'/> characters.</p>
31]]--
32
33local urls     = { }
34languages.urls = urls
35
36local characters = utilities.storage.allocate {
37    ["!"] = "before",
38    ['"'] = "before",
39    ["#"] = "before",
40    ["$"] = "before",
41    ["%"] = "before",
42    ["&"] = "before",
43    ["("] = "before",
44    ["*"] = "before",
45    ["+"] = "before",
46    [","] = "before",
47    ["-"] = "before",
48    ["."] = "before",
49    ["/"] = "before",
50    [":"] = "before",
51    [";"] = "before",
52    ["<"] = "before",
53    ["="] = "before",
54    [">"] = "before",
55    ["?"] = "before",
56    ["@"] = "before",
57    ["["] = "before",
58   ["\\"] = "before",
59    ["^"] = "before",
60    ["_"] = "before",
61    ["`"] = "before",
62    ["{"] = "before",
63    ["|"] = "before",
64    ["~"] = "before",
65
66    ["'"] = "after",
67    [")"] = "after",
68    ["]"] = "after",
69    ["}"] = "after",
70}
71
72local mapping = utilities.storage.allocate {
73  -- [utfchar(0xA0)] = "~", -- nbsp (catch)
74}
75
76urls.characters     = characters
77urls.mapping        = mapping
78urls.lefthyphenmin  = 2
79urls.righthyphenmin = 3
80urls.discretionary  = nil
81urls.packslashes    = false
82
83directives.register("hyphenators.urls.packslashes",function(v) urls.packslashes = v end)
84
85local trace  = false   trackers.register("hyphenators.urls",function(v) trace = v end)
86local report = logs.reporter("hyphenators","urls")
87
88-- local ctx_a = context.a
89-- local ctx_b = context.b
90-- local ctx_d = context.d
91-- local ctx_c = context.c
92-- local ctx_l = context.l
93-- local ctx_C = context.C
94-- local ctx_L = context.L
95
96-- local function action(hyphenatedurl,str,left,right,disc)
97--     --
98--     left  = max(      left  or urls.lefthyphenmin,    2)
99--     right = min(#str-(right or urls.righthyphenmin)+2,#str)
100--     disc  = disc or urls.discretionary
101--     --
102--     local word   = nil
103--     local prev   = nil
104--     local pack   = urls.packslashes
105--     local length = 0
106--     --
107--     for char in utfcharacters(str) do
108--         length  = length + 1
109--         char    = mapping[char] or char
110--         local b = utfbyte(char)
111--         if prev == char and prev == "/" then
112--             ctx_c(b)
113--         elseif char == disc then
114--             ctx_d()
115--         else
116--             if prev == "/" then
117--                 ctx_d()
118--             end
119--             local how = characters[char]
120--             if how == v_before then
121--                 word = false
122--                 ctx_b(b)
123--             elseif how == v_after then
124--                 word = false
125--                 ctx_a(b)
126--             else
127--                 local letter = is_letter[char]
128--                 if length <= left or length >= right then
129--                     if word and letter then
130--                         ctx_L(b)
131--                     else
132--                         ctx_C(b)
133--                     end
134--                 elseif word and letter then
135--                     ctx_l(b)
136--                 else
137--                     ctx_c(b)
138--                 end
139--                 word = letter
140--             end
141--         end
142--         if pack then
143--             prev = char
144--         else
145--             prev = nil
146--         end
147--     end
148-- end
149
150local function action(hyphenatedurl,str,left,right,disc)
151    --
152    left  = max(      left  or urls.lefthyphenmin,    2)
153    right = min(#str-(right or urls.righthyphenmin)+2,#str)
154    disc  = disc or urls.discretionary
155    --
156    local word   = nil
157    local pack   = urls.packslashes
158    local length = 0
159    local list   = utf.split(str)
160    local size   = #list
161    local prev   = nil
162
163    for i=1,size do
164        local what = nil
165        local dodi = false
166        local char = list[i]
167        length     = length + 1
168        char       = mapping[char] or char
169        if char == disc then
170            dodi = true
171        elseif pack and char == "/" and (list[i+1] == "/" or prev == "/") then
172            what = "c"
173        else
174            local how = characters[char]
175            if how == v_before then
176                what = "b"
177            elseif how == v_after then
178                word = false
179                what = "a"
180            else
181                local letter = is_letter[char]
182                if length <= left or length >= right then
183                    if word and letter then
184                        what = "L"
185                    else
186                        what = "C"
187                    end
188                elseif word and letter then
189                    what = "l"
190                else
191                    what = "c"
192                end
193                word = letter
194            end
195        end
196        if dodi then
197            list[i] = "\\lang_url_d "
198        else
199            list[i] = "\\lang_url_" .. what .. "{" .. utfbyte(char) .. "}"
200        end
201        prev = char
202    end
203    if trace then
204        report("old : %s",str)
205        report("new : %t",list)
206    end
207    ctx_pushcatcodes("prtcatcodes")
208    context("%t",list)
209    ctx_popcatcodes()
210end
211
212-- urls.action = function(_,...) action(...) end -- sort of obsolete
213
214table.setmetatablecall(hyphenatedurl,action) -- watch out: a caller
215
216-- todo, no interface in mkiv yet
217
218local registerfunction   = context.functions.register
219local unregisterfunction = context.functions.unregister
220local savelua            = token.savelua
221
222local function restorevalues(savedchars,restore)
223    for k, v in next, savedchars do
224        characters[k] = v
225    end
226    unregisterfunction(restore)
227end
228
229function urls.setcharacters(str,value) -- 1, 2 == before, after
230    local savedchars = { }
231    local newvalue   = value or v_before
232    for s in utfcharacters(str) do
233        local oldvalue = characters[s]
234        if oldvalue ~= newvalue then
235            savedchars[s] = oldvalue
236            characters[s] = newvalue
237        end
238    end
239    if next(savedchars) then
240        local restore = nil
241        restore = registerfunction(function() restorevalues(savedchars,restore) end)
242        savelua(restore)
243    end
244end
245
246-- .urls.setcharacters("')]}",2)
247
248implement {
249    name      = "sethyphenatedurlcharacters",
250    actions   = urls.setcharacters,
251    arguments = "2 strings",
252}
253
254implement {
255    name      = "hyphenatedurl",
256    scope     = "private",
257    actions   = function(...) action(hyphenatedurl,...) end,
258    arguments = { "string", "integer", "integer", "string" }
259}
260