lang-url.lmt /size: 7314 b    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['lang-url'] = {
2    version   = 1.001,
3    comment   = "companion to lang-url.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local next = next
10local utfcharacters, utfbyte, utfchar = utf.characters, utf.byte, utf.char
11local min, max = math.min, math.max
12local setmetatableindex = table.setmetatableindex
13
14local context          = context
15local ctx_pushcatcodes = context.pushcatcodes
16local ctx_popcatcodes  = context.popcatcodes
17
18local implement = interfaces.implement
19local variables = interfaces.variables
20
21local v_before  = variables.before
22local v_after   = variables.after
23
24local is_letter = characters.is_letter
25
26-- Hyphenating URL's is somewhat tricky and a matter of taste. I did consider using
27-- a dedicated hyphenation pattern or dealing with it by node parsing, but the
28-- following solution suits as well. After all, we're mostly dealing with ASCII
29-- characters.
30
31local urls     = { }
32languages.urls = urls
33
34local characters = utilities.storage.allocate {
35    ["!"] = "before",
36    ['"'] = "before",
37    ["#"] = "before",
38    ["$"] = "before",
39    ["%"] = "before",
40    ["&"] = "before",
41    ["("] = "before",
42    ["*"] = "before",
43    ["+"] = "before",
44    [","] = "before",
45    ["-"] = "before",
46    ["."] = "before",
47    ["/"] = "before",
48    [":"] = "before",
49    [";"] = "before",
50    ["<"] = "before",
51    ["="] = "before",
52    [">"] = "before",
53    ["?"] = "before",
54    ["@"] = "before",
55    ["["] = "before",
56   ["\\"] = "before",
57    ["^"] = "before",
58    ["_"] = "before",
59    ["`"] = "before",
60    ["{"] = "before",
61    ["|"] = "before",
62    ["~"] = "before",
63
64    ["'"] = "after",
65    [")"] = "after",
66    ["]"] = "after",
67    ["}"] = "after",
68}
69
70local mapping = utilities.storage.allocate {
71  -- [utfchar(0xA0)] = "~", -- nbsp (catch)
72}
73
74urls.characters     = characters
75urls.mapping        = mapping
76urls.lefthyphenmin  = 2
77urls.righthyphenmin = 3
78urls.discretionary  = nil
79urls.packslashes    = false
80
81directives.register("hyphenators.urls.packslashes",function(v) urls.packslashes = v end)
82
83local trace  = false   trackers.register("hyphenators.urls",function(v) trace = v end)
84local report = logs.reporter("hyphenators","urls")
85
86-- local ctx_a = context.a
87-- local ctx_b = context.b
88-- local ctx_d = context.d
89-- local ctx_c = context.c
90-- local ctx_l = context.l
91-- local ctx_C = context.C
92-- local ctx_L = context.L
93
94-- local function action(hyphenatedurl,str,left,right,disc)
95--     --
96--     left  = max(      left  or urls.lefthyphenmin,    2)
97--     right = min(#str-(right or urls.righthyphenmin)+2,#str)
98--     disc  = disc or urls.discretionary
99--     --
100--     local word   = nil
101--     local prev   = nil
102--     local pack   = urls.packslashes
103--     local length = 0
104--     --
105--     for char in utfcharacters(str) do
106--         length  = length + 1
107--         char    = mapping[char] or char
108--         local b = utfbyte(char)
109--         if prev == char and prev == "/" then
110--             ctx_c(b)
111--         elseif char == disc then
112--             ctx_d()
113--         else
114--             if prev == "/" then
115--                 ctx_d()
116--             end
117--             local how = characters[char]
118--             if how == v_before then
119--                 word = false
120--                 ctx_b(b)
121--             elseif how == v_after then
122--                 word = false
123--                 ctx_a(b)
124--             else
125--                 local letter = is_letter[char]
126--                 if length <= left or length >= right then
127--                     if word and letter then
128--                         ctx_L(b)
129--                     else
130--                         ctx_C(b)
131--                     end
132--                 elseif word and letter then
133--                     ctx_l(b)
134--                 else
135--                     ctx_c(b)
136--                 end
137--                 word = letter
138--             end
139--         end
140--         if pack then
141--             prev = char
142--         else
143--             prev = nil
144--         end
145--     end
146-- end
147
148local function action(hyphenatedurl,str,left,right,disc)
149    --
150    left  = max(      left  or urls.lefthyphenmin,    2)
151    right = min(#str-(right or urls.righthyphenmin)+2,#str)
152    disc  = disc or urls.discretionary
153    --
154    local word   = nil
155    local pack   = urls.packslashes
156    local length = 0
157    local list   = utf.split(str)
158    local size   = #list
159    local prev   = nil
160
161    for i=1,size do
162        local what = nil
163        local dodi = false
164        local char = list[i]
165        length     = length + 1
166        char       = mapping[char] or char
167        if char == disc then
168            dodi = true
169        elseif pack and char == "/" and (list[i+1] == "/" or prev == "/") then
170            what = "c"
171        else
172            local how = characters[char]
173            if how == v_before then
174                what = "b"
175            elseif how == v_after then
176                word = false
177                what = "a"
178            else
179                local letter = is_letter[char]
180                if length <= left or length >= right then
181                    if word and letter then
182                        what = "L"
183                    else
184                        what = "C"
185                    end
186                elseif word and letter then
187                    what = "l"
188                else
189                    what = "c"
190                end
191                word = letter
192            end
193        end
194        if dodi then
195            list[i] = "\\lang_url_d "
196        else
197            list[i] = "\\lang_url_" .. what .. "{" .. utfbyte(char) .. "}"
198        end
199        prev = char
200    end
201    if trace then
202        report("old : %s",str)
203        report("new : %t",list)
204    end
205    ctx_pushcatcodes("prtcatcodes")
206    context("%t",list)
207    ctx_popcatcodes()
208end
209
210-- urls.action = function(_,...) action(...) end -- sort of obsolete
211
212table.setmetatablecall(hyphenatedurl,action) -- watch out: a caller
213
214-- todo, no interface in mkiv yet
215
216local registerfunction   = context.functions.register
217local unregisterfunction = context.functions.unregister
218local savelua            = token.savelua
219
220local function restorevalues(savedchars,restore)
221    for k, v in next, savedchars do
222        characters[k] = v
223    end
224    unregisterfunction(restore)
225end
226
227function urls.setcharacters(str,value) -- 1, 2 == before, after
228    local savedchars = { }
229    local newvalue   = value or v_before
230    for s in utfcharacters(str) do
231        local oldvalue = characters[s]
232        if oldvalue ~= newvalue then
233            savedchars[s] = oldvalue
234            characters[s] = newvalue
235        end
236    end
237    if next(savedchars) then
238        local restore = nil
239        restore = registerfunction(function() restorevalues(savedchars,restore) end)
240        savelua(restore)
241    end
242end
243
244-- .urls.setcharacters("')]}",2)
245
246implement {
247    name      = "sethyphenatedurlcharacters",
248    actions   = urls.setcharacters,
249    arguments = "2 strings",
250}
251
252implement {
253    name      = "hyphenatedurl",
254    scope     = "private",
255    actions   = function(...) action(hyphenatedurl,...) end,
256    arguments = { "string", "integer", "integer", "string" }
257}
258