lang-url.lua /size: 6575 b    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['lang-url'] = {
2    version   = 1.001,
3    comment   = "companion to lang-url.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local utfcharacters, utfbyte, utfchar = utf.characters, utf.byte, utf.char
10local min, max = math.min, math.max
11
12local context          = context
13local ctx_pushcatcodes = context.pushcatcodes
14local ctx_popcatcodes  = context.popcatcodes
15
16local implement = interfaces.implement
17local variables = interfaces.variables
18
19local v_before  = variables.before
20local v_after   = variables.after
21
22local is_letter = characters.is_letter
23
24-- Hyphenating URL's is somewhat tricky and a matter of taste. I did consider using
25-- a dedicated hyphenation pattern or dealing with it by node parsing, but the
26-- following solution suits as well. After all, we're mostly dealing with ASCII
27-- characters.
28
29local urls     = { }
30languages.urls = urls
31
32local characters = utilities.storage.allocate {
33    ["!"] = "before",
34    ['"'] = "before",
35    ["#"] = "before",
36    ["$"] = "before",
37    ["%"] = "before",
38    ["&"] = "before",
39    ["("] = "before",
40    ["*"] = "before",
41    ["+"] = "before",
42    [","] = "before",
43    ["-"] = "before",
44    ["."] = "before",
45    ["/"] = "before",
46    [":"] = "before",
47    [";"] = "before",
48    ["<"] = "before",
49    ["="] = "before",
50    [">"] = "before",
51    ["?"] = "before",
52    ["@"] = "before",
53    ["["] = "before",
54   ["\\"] = "before",
55    ["^"] = "before",
56    ["_"] = "before",
57    ["`"] = "before",
58    ["{"] = "before",
59    ["|"] = "before",
60    ["~"] = "before",
61
62    ["'"] = "after",
63    [")"] = "after",
64    ["]"] = "after",
65    ["}"] = "after",
66}
67
68local mapping = utilities.storage.allocate {
69  -- [utfchar(0xA0)] = "~", -- nbsp (catch)
70}
71
72urls.characters     = characters
73urls.mapping        = mapping
74urls.lefthyphenmin  = 2
75urls.righthyphenmin = 3
76urls.discretionary  = nil
77urls.packslashes    = false
78
79directives.register("hyphenators.urls.packslashes",function(v) urls.packslashes = v end)
80
81local trace  = false   trackers.register("hyphenators.urls",function(v) trace = v end)
82local report = logs.reporter("hyphenators","urls")
83
84-- local ctx_a = context.a
85-- local ctx_b = context.b
86-- local ctx_d = context.d
87-- local ctx_c = context.c
88-- local ctx_l = context.l
89-- local ctx_C = context.C
90-- local ctx_L = context.L
91
92-- local function action(hyphenatedurl,str,left,right,disc)
93--     --
94--     left  = max(      left  or urls.lefthyphenmin,    2)
95--     right = min(#str-(right or urls.righthyphenmin)+2,#str)
96--     disc  = disc or urls.discretionary
97--     --
98--     local word   = nil
99--     local prev   = nil
100--     local pack   = urls.packslashes
101--     local length = 0
102--     --
103--     for char in utfcharacters(str) do
104--         length  = length + 1
105--         char    = mapping[char] or char
106--         local b = utfbyte(char)
107--         if prev == char and prev == "/" then
108--             ctx_c(b)
109--         elseif char == disc then
110--             ctx_d()
111--         else
112--             if prev == "/" then
113--                 ctx_d()
114--             end
115--             local how = characters[char]
116--             if how == v_before then
117--                 word = false
118--                 ctx_b(b)
119--             elseif how == v_after then
120--                 word = false
121--                 ctx_a(b)
122--             else
123--                 local letter = is_letter[char]
124--                 if length <= left or length >= right then
125--                     if word and letter then
126--                         ctx_L(b)
127--                     else
128--                         ctx_C(b)
129--                     end
130--                 elseif word and letter then
131--                     ctx_l(b)
132--                 else
133--                     ctx_c(b)
134--                 end
135--                 word = letter
136--             end
137--         end
138--         if pack then
139--             prev = char
140--         else
141--             prev = nil
142--         end
143--     end
144-- end
145
146local function action(hyphenatedurl,str,left,right,disc)
147    --
148    left  = max(      left  or urls.lefthyphenmin,    2)
149    right = min(#str-(right or urls.righthyphenmin)+2,#str)
150    disc  = disc or urls.discretionary
151    --
152    local word   = nil
153    local pack   = urls.packslashes
154    local length = 0
155    local list   = utf.split(str)
156    local size   = #list
157    local prev   = nil
158
159    for i=1,size do
160        local what = nil
161        local dodi = false
162        local char = list[i]
163        length     = length + 1
164        char       = mapping[char] or char
165        if char == disc then
166            dodi = true
167        elseif pack and char == "/" and (list[i+1] == "/" or prev == "/") then
168            what = "c"
169        else
170            local how = characters[char]
171            if how == v_before then
172                what = "b"
173            elseif how == v_after then
174                word = false
175                what = "a"
176            else
177                local letter = is_letter[char]
178                if length <= left or length >= right then
179                    if word and letter then
180                        what = "L"
181                    else
182                        what = "C"
183                    end
184                elseif word and letter then
185                    what = "l"
186                else
187                    what = "c"
188                end
189                word = letter
190            end
191        end
192        if dodi then
193            list[i] = "\\lang_url_d "
194        else
195            list[i] = "\\lang_url_" .. what .. "{" .. utfbyte(char) .. "}"
196        end
197        prev = char
198    end
199    if trace then
200        report("old : %s",str)
201        report("new : %t",list)
202    end
203    ctx_pushcatcodes("prtcatcodes")
204    context("%t",list)
205    ctx_popcatcodes()
206end
207
208-- urls.action = function(_,...) action(...) end -- sort of obsolete
209
210table.setmetatablecall(hyphenatedurl,action) -- watch out: a caller
211
212-- todo, no interface in mkiv yet
213
214function urls.setcharacters(str,value) -- 1, 2 == before, after
215    for s in utfcharacters(str) do
216        characters[s] = value or v_before
217    end
218end
219
220-- .urls.setcharacters("')]}",2)
221
222implement {
223    name      = "sethyphenatedurlcharacters",
224    actions   = urls.setcharacters,
225    arguments = "2 strings",
226}
227
228implement {
229    name      = "hyphenatedurl",
230    scope     = "private",
231    actions   = function(...) action(hyphenatedurl,...) end,
232    arguments = { "string", "integer", "integer", "string" }
233}
234