1if not modules then modules = { } end modules ['bibl-bib'] = {
2 version = 1.001,
3 comment = "this module is the basis for the lxml-* ones",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
14
15local lower, format, gsub, concat = string.lower, string.format, string.gsub, table.concat
16local next = next
17local utfchar = utf.char
18local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
19local textoutf = characters and characters.tex.toutf
20local variables = interfaces and interfaces.variables
21local settings_to_hash = utilities.parsers.settings_to_hash
22local finalizers = xml.finalizers.tex
23local xmlfilter, xmltext, getid = xml.filter, xml.text, lxml.getid
24local formatters = string.formatters
25
26local P, R, S, C, Cc, Cs, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct
27
28local trace_bibxml = false trackers.register("publications.bibxml", function(v) trace_bibtex = v end)
29
30local report_xml = logs.reporter("publications","xml")
31
32bibtex = bibtex or { }
33local bibtex = bibtex
34
35bibtex.statistics = bibtex.statistics or { }
36local bibtexstats = bibtex.statistics
37
38bibtexstats.nofbytes = 0
39bibtexstats.nofdefinitions = 0
40bibtexstats.nofshortcuts = 0
41
42local defaultshortcuts = {
43 jan = "1",
44 feb = "2",
45 mar = "3",
46 apr = "4",
47 may = "5",
48 jun = "6",
49 jul = "7",
50 aug = "8",
51 sep = "9",
52 oct = "10",
53 nov = "11",
54 dec = "12",
55}
56
57local shortcuts = { }
58local data = { }
59local entries
60
61
62
63
64
65local function do_shortcut(tag,key,value)
66 bibtexstats.nofshortcuts = bibtexstats.nofshortcuts + 1
67 if lower(tag) == "@string" then
68 shortcuts[key] = value
69 end
70end
71
72local function do_definition(tag,key,tab)
73 if not entries or entries[key] then
74 bibtexstats.nofdefinitions = bibtexstats.nofdefinitions + 1
75 local t = { }
76 for i=1,#tab,2 do
77 t[tab[i]] = tab[i+1]
78 end
79 local p = data[tag]
80 if not p then
81 data[tag] = { [key] = t }
82 else
83 p[key] = t
84 end
85 end
86end
87
88local function resolve(s)
89 return shortcuts[s] or defaultshortcuts[s] or s
90end
91
92local percent = P("%")
93local start = P("@")
94local comma = P(",")
95local hash = P("#")
96local escape = P("\\")
97local single = P("'")
98local double = P('"')
99local left = P('{')
100local right = P('}')
101local both = left + right
102local lineending = S("\n\r")
103local space = S(" \t\n\r\f")
104local spacing = space^0
105local equal = P("=")
106local collapsed = (space^1)/ " "
107
108
109
110local keyword = C((R("az","AZ","09") + S("@_:-"))^1)
111local s_quoted = ((escape*single) + collapsed + (1-single))^0
112local d_quoted = ((escape*double) + collapsed + (1-double))^0
113local balanced = lpegpatterns.balanced
114
115local s_value = (single/"") * s_quoted * (single/"")
116local d_value = (double/"") * d_quoted * (double/"")
117local b_value = (left /"") * balanced * (right /"")
118local r_value = keyword/resolve
119
120local somevalue = s_value + d_value + b_value + r_value
121local value = Cs((somevalue * ((spacing * hash * spacing)/"" * somevalue)^0))
122
123local assignment = spacing * keyword * spacing * equal * spacing * value * spacing
124local shortcut = keyword * spacing * left * spacing * (assignment * comma^0)^0 * spacing * right
125local definition = keyword * spacing * left * spacing * keyword * comma * Ct((assignment * comma^0)^0) * spacing * right
126local comment = keyword * spacing * left * (1-right)^0 * spacing * right
127local forget = percent^1 * (1-lineending)^0
128
129
130
131local grammar = (space + forget + shortcut/do_shortcut + definition/do_definition + comment + 1)^0
132
133function bibtex.convert(session,content)
134 statistics.starttiming(bibtex)
135 data, shortcuts, entries = session.data, session.shortcuts, session.entries
136 bibtexstats.nofbytes = bibtexstats.nofbytes + #content
137 session.nofbytes = session.nofbytes + #content
138 lpegmatch(grammar,content or "")
139 statistics.stoptiming(bibtex)
140end
141
142function bibtex.load(session,filename)
143 statistics.starttiming(bibtex)
144 local filename = resolvers.findfile(filename,"bib")
145 if filename ~= "" then
146 local data = io.loaddata(filename) or ""
147 if data == "" then
148 report_xml("empty file %a, no conversion to xml",filename)
149 elseif trace_bibxml then
150 report_xml("converting file %a to xml",filename)
151 end
152 bibtex.convert(session,data)
153 end
154 statistics.stoptiming(bibtex)
155end
156
157function bibtex.new()
158 return {
159 data = { },
160 shortcuts = { },
161 xml = xml.convert("<?xml version='1.0' standalone='yes'?>\n<bibtex></bibtex>"),
162 nofbytes = 0,
163 entries = nil,
164 loaded = false,
165 }
166end
167
168local p_escaped = lpegpatterns.xml.escaped
169
170local ihatethis = {
171 f = "\\f",
172 n = "\\n",
173 r = "\\r",
174 s = "\\s",
175 t = "\\t",
176 v = "\\v",
177 z = "\\z",
178}
179
180local command = P("\\")/"" * Cc("\\bibtexcommand{") * (R("az","AZ")^1) * Cc("}")
181local any = P(1)
182local done = P(-1)
183local one_l = P("{") / ""
184local one_r = P("}") / ""
185local two_l = P("{{") / ""
186local two_r = P("}}") / ""
187
188local filter = Cs(
189 two_l * (command + any - two_r - done)^0 * two_r * done +
190 one_l * (command + any - one_r - done)^0 * one_r * done +
191 (command + any )^0
192)
193
194function bibtex.toxml(session,options)
195 if session.loaded then
196 return
197 else
198 session.loaded = true
199 end
200
201
202 statistics.starttiming(bibtex)
203 local result, r = { }, 0
204 local options = settings_to_hash(options)
205 local convert = options.convert
206 local strip = options.strip
207 local entries = session.entries
208 r = r + 1 ; result[r] = "<?xml version='1.0' standalone='yes'?>"
209 r = r + 1 ; result[r] = "<bibtex>"
210 for id, categories in next, session.data do
211 id = lower(gsub(id,"^@",""))
212 for name, entry in next, categories do
213 if not entries or entries[name] then
214 r = r + 1 ; result[r] = formatters["<entry tag='%s' category='%s'>"](lower(name),id)
215 for key, value in next, entry do
216 value = gsub(value,"\\(.)",ihatethis)
217 value = lpegmatch(p_escaped,value)
218 if value ~= "" then
219 if convert then
220 value = textoutf(value,true)
221 end
222 if strip then
223
224
225 value = lpegmatch(filter,value) or value
226 end
227 r = r + 1 ; result[r] = formatters[" <field name='%s'>%s</field>"](key,value)
228 end
229 end
230 r = r + 1 ; result[r] = "</entry>"
231 end
232 end
233 end
234 r = r + 1 ; result[r] = "</bibtex>"
235 result = concat(result,"\n")
236
237 session.xml = xml.convert(result, {
238 resolve_entities = true,
239 resolve_predefined_entities = true,
240
241 utfize_entities = true,
242 } )
243 session.data = nil
244 session.shortcuts = nil
245 statistics.stoptiming(bibtex)
246end
247
248statistics.register("bibtex load time", function()
249 local nofbytes = bibtexstats.nofbytes
250 if nofbytes > 0 then
251 return format("%s seconds, %s bytes, %s definitions, %s shortcuts",
252 statistics.elapsedtime(bibtex),nofbytes,bibtexstats.nofdefinitions,bibtexstats.nofshortcuts)
253 else
254 return nil
255 end
256end)
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299if not characters then dofile(resolvers.findfile("char-def.lua")) end
300
301local chardata = characters.data
302local concat = table.concat
303
304local lpeg = lpeg
305
306local P, Ct, lpegmatch, lpegpatterns = lpeg.P, lpeg.Ct, lpeg.match, lpeg.patterns
307
308local space, comma = P(" "), P(",")
309
310local andsplitter = lpeg.tsplitat(space^1 * "and" * space^1)
311local commasplitter = lpeg.tsplitat(space^0 * comma * space^0)
312local spacesplitter = lpeg.tsplitat(space^1)
313local firstcharacter = lpegpatterns.utf8byte
314
315local function is_upper(str)
316 local first = lpegmatch(firstcharacter,str)
317 local okay = chardata[first]
318 return okay and okay.category == "lu"
319end
320
321local function splitauthors(str)
322 local authors = lpegmatch(andsplitter,str)
323 for i=1,#authors do
324 local firstnames, vons, surnames, initials, juniors, words
325 local author = authors[i]
326 local split = lpegmatch(commasplitter,author)
327 local n = #split
328 if n == 1 then
329
330 words = lpegmatch(spacesplitter,author)
331 firstnames, vons, surnames = { }, { }, { }
332 local i, n = 1, #words
333 while i <= n do
334 local w = words[i]
335 if is_upper(w) then
336 firstnames[#firstnames+1], i = w, i + 1
337 else
338 break
339 end
340 end
341 while i <= n do
342 local w = words[i]
343 if is_upper(w) then
344 break
345 else
346 vons[#vons+1], i = w, i + 1
347 end
348 end
349 while i <= n do
350 surnames[#surnames+1], i = words[i], i + 1
351 end
352 elseif n == 2 then
353
354 words = lpegmatch(spacesplitter,split[2])
355 surnames = lpegmatch(spacesplitter,split[1])
356 firstnames, vons = { }, { }
357 local i, n = 1, #words
358 while i <= n do
359 local w = words[i]
360 if is_upper(w) then
361 firstnames[#firstnames+1], i = w, i + 1
362 else
363 break
364 end
365 end
366 while i <= n do
367 vons[#vons+1], i = words[i], i + 1
368 end
369 else
370
371 firstnames = lpegmatch(spacesplitter,split[1])
372 juniors = lpegmatch(spacesplitter,split[2])
373 surnames = lpegmatch(spacesplitter,split[3])
374 if n > 3 then
375
376 end
377 end
378 if #surnames == 0 then
379 surnames[1] = firstnames[#firstnames]
380 firstnames[#firstnames] = nil
381 end
382 if firstnames then
383 initials = { }
384 for i=1,#firstnames do
385 initials[i] = utfchar(lpegmatch(firstcharacter,firstnames[i]))
386 end
387 end
388 authors[i] = {
389 original = author,
390 firstnames = firstnames,
391 vons = vons,
392 surnames = surnames,
393 initials = initials,
394 juniors = juniors,
395 }
396 end
397 authors.original = str
398 return authors
399end
400
401local function the_initials(initials,symbol)
402 local t, symbol = { }, symbol or "."
403 for i=1,#initials do
404 t[i] = initials[i] .. symbol
405 end
406 return t
407end
408
409
410
411bibtex.authors = bibtex.authors or { }
412
413local authors = bibtex.authors
414
415local defaultsettings = {
416 firstnamesep = " ",
417 vonsep = " ",
418 surnamesep = " ",
419 juniorsep = " ",
420 surnamejuniorsep = ", ",
421 juniorjuniorsep = ", ",
422 surnamefirstnamesep = ", ",
423 surnameinitialsep = ", ",
424 namesep = ", ",
425 lastnamesep = " and ",
426 finalnamesep = " and ",
427}
428
429function authors.normal(author,settings)
430 local firstnames, vons, surnames, juniors = author.firstnames, author.vons, author.surnames, author.juniors
431 local result, settings = { }, settings or defaultsettings
432 if firstnames and #firstnames > 0 then
433 result[#result+1] = concat(firstnames," ")
434 result[#result+1] = settings.firstnamesep or defaultsettings.firstnamesep
435 end
436 if vons and #vons > 0 then
437 result[#result+1] = concat(vons," ")
438 result[#result+1] = settings.vonsep or defaultsettings.vonsep
439 end
440 if surnames then
441 result[#result+1] = concat(surnames," ")
442 end
443 if juniors and #juniors > 0 then
444 result[#result+1] = concat(juniors," ")
445 result[#result+1] = settings.surnamesep or defaultsettings.surnamesep
446 end
447 return concat(result)
448end
449
450function authors.normalshort(author,settings)
451 local firstnames, vons, surnames, juniors = author.firstnames, author.vons, author.surnames, author.juniors
452 local result, settings = { }, settings or defaultsettings
453 if firstnames and #firstnames > 0 then
454 result[#result+1] = concat(firstnames," ")
455 result[#result+1] = settings.firstnamesep or defaultsettings.firstnamesep
456 end
457 if vons and #vons > 0 then
458 result[#result+1] = concat(vons," ")
459 result[#result+1] = settings.vonsep or defaultsettings.vonsep
460 end
461 if surnames then
462 result[#result+1] = concat(surnames," ")
463 end
464 if juniors and #juniors > 0 then
465 result[#result+1] = concat(juniors," ")
466 result[#result+1] = settings.surnamejuniorsep or defaultsettings.surnamejuniorsep
467 end
468 return concat(result)
469end
470
471function authors.inverted(author,settings)
472 local firstnames, vons, surnames, juniors = author.firstnames, author.vons, author.surnames, author.juniors
473 local result, settings = { }, settings or defaultsettings
474 if vons and #vons > 0 then
475 result[#result+1] = concat(vons," ")
476 result[#result+1] = settings.vonsep or defaultsettings.vonsep
477 end
478 if surnames then
479 result[#result+1] = concat(surnames," ")
480 end
481 if juniors and #juniors > 0 then
482 result[#result+1] = settings.juniorjuniorsep or defaultsettings.juniorjuniorsep
483 result[#result+1] = concat(juniors," ")
484 end
485 if firstnames and #firstnames > 0 then
486 result[#result+1] = settings.surnamefirstnamesep or defaultsettings.surnamefirstnamesep
487 result[#result+1] = concat(firstnames," ")
488 end
489 return concat(result)
490end
491
492function authors.invertedshort(author,settings)
493 local vons, surnames, initials, juniors = author.vons, author.surnames, author.initials, author.juniors
494 local result, settings = { }, settings or defaultsettings
495 if vons and #vons > 0 then
496 result[#result+1] = concat(vons," ")
497 result[#result+1] = settings.vonsep or defaultsettings.vonsep
498 end
499 if surnames then
500 result[#result+1] = concat(surnames," ")
501 end
502 if juniors and #juniors > 0 then
503 result[#result+1] = settings.juniorjuniorsep or defaultsettings.juniorjuniorsep
504 result[#result+1] = concat(juniors," ")
505 end
506 if initials and #initials > 0 then
507 result[#result+1] = settings.surnameinitialsep or defaultsettings.surnameinitialsep
508 result[#result+1] = concat(the_initials(initials)," ")
509 end
510 return concat(result)
511end
512
513local lastconcatsize = 1
514
515local function bibtexconcat(t,settings)
516 local namesep = settings.namesep or defaultsettings.namesep or ", "
517 local lastnamesep = settings.lastnamesep or defaultsettings.lastnamesep or namesep
518 local finalnamesep = settings.finalnamesep or defaultsettings.finalnamesep or lastnamesep
519 local lastconcatsize = #t
520 if lastconcatsize > 2 then
521 local s = { }
522 for i=1,lastconcatsize-2 do
523 s[i] = t[i] .. namesep
524 end
525 s[lastconcatsize-1], s[lastconcatsize] = t[lastconcatsize-1] .. finalnamesep, t[lastconcatsize]
526 return concat(s)
527 elseif lastconcatsize > 1 then
528 return concat(t,lastnamesep)
529 elseif lastconcatsize > 0 then
530 return t[1]
531 else
532 return ""
533 end
534end
535
536function authors.concat(author,combiner,what,settings)
537 if type(combiner) == "string" then
538 combiner = authors[combiner or "normal"] or authors.normal
539 end
540 local split = splitauthors(author)
541 local setting = settings[what]
542 local etallimit, etaldisplay, etaltext = 1000, 1000, ""
543 if setting then
544 etallimit = settings.etallimit or 1000
545 etaldisplay = settings.etaldisplay or etallimit
546 etalltext = settings.etaltext or ""
547 end
548 local max = #split
549 if max > etallimit and etaldisplay < max then
550 max = etaldisplay
551 end
552 for i=1,max do
553 split[i] = combiner(split[i],settings)
554 end
555 local result = bibtexconcat(split,settings)
556 if max < #split then
557 return result
558 else
559 return result .. etaltext
560 end
561end
562
563function authors.short(author,year)
564 local result = { }
565 if author then
566 local authors = splitauthors(author)
567 for a=1,#authors do
568 local aa = authors[a]
569 local initials = aa.initials
570 for i=1,#initials do
571 result[#result+1] = initials[i]
572 end
573 local surnames = aa.surnames
574 for s=1,#surnames do
575 result[#result+1] = utfchar(lpegmatch(firstcharacter,surnames[s]))
576 end
577 end
578 end
579 if year then
580 result[#result+1] = year
581 end
582 return concat(result)
583end
584
585
586
587
588local function collectauthoryears(id,list)
589 list = settings_to_hash(list)
590 id = getid(id)
591 local found = { }
592 for e in xml.collected(id,"/bibtex/entry") do
593 if list[e.at.tag] then
594 local year = xmlfilter(e,"xml:///field[@name='year']/text()")
595 local author = xmlfilter(e,"xml:///field[@name='author']/text()")
596 if author and year then
597 local a = found[author]
598 if not a then
599 a = { }
600 found[author] = a
601 end
602 local y = a[year]
603 if not y then
604 y = { }
605 a[year] = y
606 end
607 y[#y+1] = e
608 end
609 end
610 end
611
612 local done = { }
613 for author, years in next, found do
614 local yrs = { }
615 for year, entries in next, years do
616 if subyears then
617
618
619
620
621
622
623
624
625 else
626 yrs[#yrs+1] = year
627 end
628 end
629 done[author] = yrs
630 end
631 return done
632end
633
634local method, settings = "normal", { }
635
636function authors.setsettings(s)
637 settings = s or settings
638end
639
640if commands then
641
642 local sessions = { }
643
644 function commands.definebibtexsession(name)
645 sessions[name] = bibtex.new()
646 end
647
648 function commands.preparebibtexsession(name,xmlname,options)
649 bibtex.toxml(sessions[name],options)
650 lxml.register(xmlname,sessions[name].xml)
651 end
652
653 function commands.registerbibtexfile(name,filename)
654 bibtex.load(sessions[name],filename)
655 end
656
657 function commands.registerbibtexentry(name,entry)
658 local session = sessions[name]
659 local entries = session.entries
660 if not entries then
661 session.entries = { [entry] = true }
662 else
663 entries[entry] = true
664 end
665 end
666
667
668
669
670
671 function finalizers.bibtexconcat(collected,method,what)
672 if collected then
673 local author = collected[1].dt[1] or ""
674 if author ~= "" then
675 context(authors.concat(author,method,what,settings))
676 end
677 end
678 end
679
680 function finalizers.bibtexshort(collected)
681 if collected then
682 local c = collected[1]
683 local year = xmlfilter(c,"xml://field[@name='year']/text()")
684 local author = xmlfilter(c,"xml://field[@name='author']/text()")
685 context(authors.short(author,year))
686 end
687 end
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716 function bibtex.authorref(id,list)
717 local result = collectauthoryears(id,list,method,what)
718 for author, years in next, result do
719 context(authors.concat(author,method,what,settings))
720 end
721 end
722
723 function bibtex.authoryearref(id,list)
724 local result = collectauthoryears(id,list,method,what)
725 for author, years in next, result do
726 context("%s (%s)",authors.concat(author,method,what,settings),concat(years,", "))
727 end
728 end
729
730 function bibtex.authoryearsref(id,list)
731 local result = collectauthoryears(id,list,method,what)
732 for author, years in next, result do
733 context("(%s, %s)",authors.concat(author,method,what,settings),concat(years,", "))
734 end
735 end
736
737 function bibtex.singularorplural(singular,plural)
738 if lastconcatsize and lastconcatsize > 1 then
739 context(plural)
740 else
741 context(singular)
742 end
743 end
744
745end
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769 |