1if not modules then modules = { } end modules ['lang-url'] = {
2 version = 1.001,
3 comment = "companion to lang-url.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local utfcharacters, utfbyte, utfchar = utf.characters, utf.byte, utf.char
10local min, max = math.min, math.max
11
12local context = context
13local ctx_pushcatcodes = context.pushcatcodes
14local ctx_popcatcodes = context.popcatcodes
15
16local implement = interfaces.implement
17local variables = interfaces.variables
18
19local v_before = variables.before
20local v_after = variables.after
21
22local is_letter = characters.is_letter
23
24
25
26
27
28
29local urls = { }
30languages.urls = urls
31
32local characters = utilities.storage.allocate {
33 ["!"] = "before",
34 ['"'] = "before",
35 ["#"] = "before",
36 ["$"] = "before",
37 ["%"] = "before",
38 ["&"] = "before",
39 ["("] = "before",
40 ["*"] = "before",
41 ["+"] = "before",
42 [","] = "before",
43 ["-"] = "before",
44 ["."] = "before",
45 ["/"] = "before",
46 [":"] = "before",
47 [";"] = "before",
48 ["<"] = "before",
49 ["="] = "before",
50 [">"] = "before",
51 ["?"] = "before",
52 ["@"] = "before",
53 ["["] = "before",
54 ["\\"] = "before",
55 ["^"] = "before",
56 ["_"] = "before",
57 ["`"] = "before",
58 ["{"] = "before",
59 ["|"] = "before",
60 ["~"] = "before",
61
62 ["'"] = "after",
63 [")"] = "after",
64 ["]"] = "after",
65 ["}"] = "after",
66}
67
68local mapping = utilities.storage.allocate {
69
70}
71
72urls.characters = characters
73urls.mapping = mapping
74urls.lefthyphenmin = 2
75urls.righthyphenmin = 3
76urls.discretionary = nil
77urls.packslashes = false
78
79directives.register("hyphenators.urls.packslashes",function(v) urls.packslashes = v end)
80
81local trace = false trackers.register("hyphenators.urls",function(v) trace = v end)
82local report = logs.reporter("hyphenators","urls")
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146local function action(hyphenatedurl,str,left,right,disc)
147
148 left = max( left or urls.lefthyphenmin, 2)
149 right = min(#str-(right or urls.righthyphenmin)+2,#str)
150 disc = disc or urls.discretionary
151
152 local word = nil
153 local pack = urls.packslashes
154 local length = 0
155 local list = utf.split(str)
156 local size = #list
157 local prev = nil
158
159 for i=1,size do
160 local what = nil
161 local dodi = false
162 local char = list[i]
163 length = length + 1
164 char = mapping[char] or char
165 if char == disc then
166 dodi = true
167 elseif pack and char == "/" and (list[i+1] == "/" or prev == "/") then
168 what = "c"
169 else
170 local how = characters[char]
171 if how == v_before then
172 what = "b"
173 elseif how == v_after then
174 word = false
175 what = "a"
176 else
177 local letter = is_letter[char]
178 if length <= left or length >= right then
179 if word and letter then
180 what = "L"
181 else
182 what = "C"
183 end
184 elseif word and letter then
185 what = "l"
186 else
187 what = "c"
188 end
189 word = letter
190 end
191 end
192 if dodi then
193 list[i] = "\\lang_url_d "
194 else
195 list[i] = "\\lang_url_" .. what .. "{" .. utfbyte(char) .. "}"
196 end
197 prev = char
198 end
199 if trace then
200 report("old : %s",str)
201 report("new : %t",list)
202 end
203 ctx_pushcatcodes("prtcatcodes")
204 context("%t",list)
205 ctx_popcatcodes()
206end
207
208
209
210table.setmetatablecall(hyphenatedurl,action)
211
212
213
214function urls.setcharacters(str,value)
215 for s in utfcharacters(str) do
216 characters[s] = value or v_before
217 end
218end
219
220
221
222implement {
223 name = "sethyphenatedurlcharacters",
224 actions = urls.setcharacters,
225 arguments = "2 strings",
226}
227
228implement {
229 name = "hyphenatedurl",
230 scope = "private",
231 actions = function(...) action(hyphenatedurl,...) end,
232 arguments = { "string", "integer", "integer", "string" }
233}
234 |