1if not modules then modules = { } end modules ['bibl-bib'] = {
2 version = 1.001,
3 comment = "this module is the basis for the lxml-* ones",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13local lower, format, gsub, concat = string.lower, string.format, string.gsub, table.concat
14local next = next
15local utfchar = utf.char
16local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
17local textoutf = characters and characters.tex.toutf
18local variables = interfaces and interfaces.variables
19local settings_to_hash = utilities.parsers.settings_to_hash
20local finalizers = xml.finalizers.tex
21local xmlfilter, xmltext, getid = xml.filter, xml.text, lxml.getid
22local formatters = string.formatters
23
24local P, R, S, C, Cc, Cs, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct
25
26local trace_bibxml = false trackers.register("publications.bibxml", function(v) trace_bibtex = v end)
27
28local report_xml = logs.reporter("publications","xml")
29
30bibtex = bibtex or { }
31local bibtex = bibtex
32
33bibtex.statistics = bibtex.statistics or { }
34local bibtexstats = bibtex.statistics
35
36bibtexstats.nofbytes = 0
37bibtexstats.nofdefinitions = 0
38bibtexstats.nofshortcuts = 0
39
40local defaultshortcuts = {
41 jan = "1",
42 feb = "2",
43 mar = "3",
44 apr = "4",
45 may = "5",
46 jun = "6",
47 jul = "7",
48 aug = "8",
49 sep = "9",
50 oct = "10",
51 nov = "11",
52 dec = "12",
53}
54
55local shortcuts = { }
56local data = { }
57local entries
58
59
60
61
62
63local function do_shortcut(tag,key,value)
64 bibtexstats.nofshortcuts = bibtexstats.nofshortcuts + 1
65 if lower(tag) == "@string" then
66 shortcuts[key] = value
67 end
68end
69
70local function do_definition(tag,key,tab)
71 if not entries or entries[key] then
72 bibtexstats.nofdefinitions = bibtexstats.nofdefinitions + 1
73 local t = { }
74 for i=1,#tab,2 do
75 t[tab[i]] = tab[i+1]
76 end
77 local p = data[tag]
78 if not p then
79 data[tag] = { [key] = t }
80 else
81 p[key] = t
82 end
83 end
84end
85
86local function resolve(s)
87 return shortcuts[s] or defaultshortcuts[s] or s
88end
89
90local percent = P("%")
91local start = P("@")
92local comma = P(",")
93local hash = P("#")
94local escape = P("\\")
95local single = P("'")
96local double = P('"')
97local left = P('{')
98local right = P('}')
99local both = left + right
100local lineending = S("\n\r")
101local space = S(" \t\n\r\f")
102local spacing = space^0
103local equal = P("=")
104local collapsed = (space^1)/ " "
105
106
107
108local keyword = C((R("az","AZ","09") + S("@_:-"))^1)
109local s_quoted = ((escape*single) + collapsed + (1-single))^0
110local d_quoted = ((escape*double) + collapsed + (1-double))^0
111local balanced = lpegpatterns.balanced
112
113local s_value = (single/"") * s_quoted * (single/"")
114local d_value = (double/"") * d_quoted * (double/"")
115local b_value = (left /"") * balanced * (right /"")
116local r_value = keyword/resolve
117
118local somevalue = s_value + d_value + b_value + r_value
119local value = Cs((somevalue * ((spacing * hash * spacing)/"" * somevalue)^0))
120
121local assignment = spacing * keyword * spacing * equal * spacing * value * spacing
122local shortcut = keyword * spacing * left * spacing * (assignment * comma^0)^0 * spacing * right
123local definition = keyword * spacing * left * spacing * keyword * comma * Ct((assignment * comma^0)^0) * spacing * right
124local comment = keyword * spacing * left * (1-right)^0 * spacing * right
125local forget = percent^1 * (1-lineending)^0
126
127
128
129local grammar = (space + forget + shortcut/do_shortcut + definition/do_definition + comment + 1)^0
130
131function bibtex.convert(session,content)
132 statistics.starttiming(bibtex)
133 data, shortcuts, entries = session.data, session.shortcuts, session.entries
134 bibtexstats.nofbytes = bibtexstats.nofbytes + #content
135 session.nofbytes = session.nofbytes + #content
136 lpegmatch(grammar,content or "")
137 statistics.stoptiming(bibtex)
138end
139
140function bibtex.load(session,filename)
141 statistics.starttiming(bibtex)
142 local filename = resolvers.findfile(filename,"bib")
143 if filename ~= "" then
144 local data = io.loaddata(filename) or ""
145 if data == "" then
146 report_xml("empty file %a, no conversion to xml",filename)
147 elseif trace_bibxml then
148 report_xml("converting file %a to xml",filename)
149 end
150 bibtex.convert(session,data)
151 end
152 statistics.stoptiming(bibtex)
153end
154
155function bibtex.new()
156 return {
157 data = { },
158 shortcuts = { },
159 xml = xml.convert("<?xml version='1.0' standalone='yes'?>\n<bibtex></bibtex>"),
160 nofbytes = 0,
161 entries = nil,
162 loaded = false,
163 }
164end
165
166local p_escaped = lpegpatterns.xml.escaped
167
168local ihatethis = {
169 f = "\\f",
170 n = "\\n",
171 r = "\\r",
172 s = "\\s",
173 t = "\\t",
174 v = "\\v",
175 z = "\\z",
176}
177
178local command = P("\\")/"" * Cc("\\bibtexcommand{") * (R("az","AZ")^1) * Cc("}")
179local any = P(1)
180local done = P(-1)
181local one_l = P("{") / ""
182local one_r = P("}") / ""
183local two_l = P("{{") / ""
184local two_r = P("}}") / ""
185
186local filter = Cs(
187 two_l * (command + any - two_r - done)^0 * two_r * done +
188 one_l * (command + any - one_r - done)^0 * one_r * done +
189 (command + any )^0
190)
191
192function bibtex.toxml(session,options)
193 if session.loaded then
194 return
195 else
196 session.loaded = true
197 end
198
199
200 statistics.starttiming(bibtex)
201 local result, r = { }, 0
202 local options = settings_to_hash(options)
203 local convert = options.convert
204 local strip = options.strip
205 local entries = session.entries
206 r = r + 1 ; result[r] = "<?xml version='1.0' standalone='yes'?>"
207 r = r + 1 ; result[r] = "<bibtex>"
208 for id, categories in next, session.data do
209 id = lower(gsub(id,"^@",""))
210 for name, entry in next, categories do
211 if not entries or entries[name] then
212 r = r + 1 ; result[r] = formatters["<entry tag='%s' category='%s'>"](lower(name),id)
213 for key, value in next, entry do
214 value = gsub(value,"\\(.)",ihatethis)
215 value = lpegmatch(p_escaped,value)
216 if value ~= "" then
217 if convert then
218 value = textoutf(value,true)
219 end
220 if strip then
221
222
223 value = lpegmatch(filter,value) or value
224 end
225 r = r + 1 ; result[r] = formatters[" <field name='%s'>%s</field>"](key,value)
226 end
227 end
228 r = r + 1 ; result[r] = "</entry>"
229 end
230 end
231 end
232 r = r + 1 ; result[r] = "</bibtex>"
233 result = concat(result,"\n")
234
235 session.xml = xml.convert(result, {
236 resolve_entities = true,
237 resolve_predefined_entities = true,
238
239 utfize_entities = true,
240 } )
241 session.data = nil
242 session.shortcuts = nil
243 statistics.stoptiming(bibtex)
244end
245
246statistics.register("bibtex load time", function()
247 local nofbytes = bibtexstats.nofbytes
248 if nofbytes > 0 then
249 return format("%s seconds, %s bytes, %s definitions, %s shortcuts",
250 statistics.elapsedtime(bibtex),nofbytes,bibtexstats.nofdefinitions,bibtexstats.nofshortcuts)
251 else
252 return nil
253 end
254end)
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297if not characters then dofile(resolvers.findfile("char-def.lua")) end
298
299local chardata = characters.data
300local concat = table.concat
301
302local lpeg = lpeg
303
304local P, Ct, lpegmatch, lpegpatterns = lpeg.P, lpeg.Ct, lpeg.match, lpeg.patterns
305
306local space, comma = P(" "), P(",")
307
308local andsplitter = lpeg.tsplitat(space^1 * "and" * space^1)
309local commasplitter = lpeg.tsplitat(space^0 * comma * space^0)
310local spacesplitter = lpeg.tsplitat(space^1)
311local firstcharacter = lpegpatterns.utf8byte
312
313local function is_upper(str)
314 local first = lpegmatch(firstcharacter,str)
315 local okay = chardata[first]
316 return okay and okay.category == "lu"
317end
318
319local function splitauthors(str)
320 local authors = lpegmatch(andsplitter,str)
321 for i=1,#authors do
322 local firstnames, vons, surnames, initials, juniors, words
323 local author = authors[i]
324 local split = lpegmatch(commasplitter,author)
325 local n = #split
326 if n == 1 then
327
328 words = lpegmatch(spacesplitter,author)
329 firstnames, vons, surnames = { }, { }, { }
330 local i, n = 1, #words
331 while i <= n do
332 local w = words[i]
333 if is_upper(w) then
334 firstnames[#firstnames+1], i = w, i + 1
335 else
336 break
337 end
338 end
339 while i <= n do
340 local w = words[i]
341 if is_upper(w) then
342 break
343 else
344 vons[#vons+1], i = w, i + 1
345 end
346 end
347 while i <= n do
348 surnames[#surnames+1], i = words[i], i + 1
349 end
350 elseif n == 2 then
351
352 words = lpegmatch(spacesplitter,split[2])
353 surnames = lpegmatch(spacesplitter,split[1])
354 firstnames, vons = { }, { }
355 local i, n = 1, #words
356 while i <= n do
357 local w = words[i]
358 if is_upper(w) then
359 firstnames[#firstnames+1], i = w, i + 1
360 else
361 break
362 end
363 end
364 while i <= n do
365 vons[#vons+1], i = words[i], i + 1
366 end
367 else
368
369 firstnames = lpegmatch(spacesplitter,split[1])
370 juniors = lpegmatch(spacesplitter,split[2])
371 surnames = lpegmatch(spacesplitter,split[3])
372 if n > 3 then
373
374 end
375 end
376 if #surnames == 0 then
377 surnames[1] = firstnames[#firstnames]
378 firstnames[#firstnames] = nil
379 end
380 if firstnames then
381 initials = { }
382 for i=1,#firstnames do
383 initials[i] = utfchar(lpegmatch(firstcharacter,firstnames[i]))
384 end
385 end
386 authors[i] = {
387 original = author,
388 firstnames = firstnames,
389 vons = vons,
390 surnames = surnames,
391 initials = initials,
392 juniors = juniors,
393 }
394 end
395 authors.original = str
396 return authors
397end
398
399local function the_initials(initials,symbol)
400 local t, symbol = { }, symbol or "."
401 for i=1,#initials do
402 t[i] = initials[i] .. symbol
403 end
404 return t
405end
406
407
408
409bibtex.authors = bibtex.authors or { }
410
411local authors = bibtex.authors
412
413local defaultsettings = {
414 firstnamesep = " ",
415 vonsep = " ",
416 surnamesep = " ",
417 juniorsep = " ",
418 surnamejuniorsep = ", ",
419 juniorjuniorsep = ", ",
420 surnamefirstnamesep = ", ",
421 surnameinitialsep = ", ",
422 namesep = ", ",
423 lastnamesep = " and ",
424 finalnamesep = " and ",
425}
426
427function authors.normal(author,settings)
428 local firstnames, vons, surnames, juniors = author.firstnames, author.vons, author.surnames, author.juniors
429 local result, settings = { }, settings or defaultsettings
430 if firstnames and #firstnames > 0 then
431 result[#result+1] = concat(firstnames," ")
432 result[#result+1] = settings.firstnamesep or defaultsettings.firstnamesep
433 end
434 if vons and #vons > 0 then
435 result[#result+1] = concat(vons," ")
436 result[#result+1] = settings.vonsep or defaultsettings.vonsep
437 end
438 if surnames then
439 result[#result+1] = concat(surnames," ")
440 end
441 if juniors and #juniors > 0 then
442 result[#result+1] = concat(juniors," ")
443 result[#result+1] = settings.surnamesep or defaultsettings.surnamesep
444 end
445 return concat(result)
446end
447
448function authors.normalshort(author,settings)
449 local firstnames, vons, surnames, juniors = author.firstnames, author.vons, author.surnames, author.juniors
450 local result, settings = { }, settings or defaultsettings
451 if firstnames and #firstnames > 0 then
452 result[#result+1] = concat(firstnames," ")
453 result[#result+1] = settings.firstnamesep or defaultsettings.firstnamesep
454 end
455 if vons and #vons > 0 then
456 result[#result+1] = concat(vons," ")
457 result[#result+1] = settings.vonsep or defaultsettings.vonsep
458 end
459 if surnames then
460 result[#result+1] = concat(surnames," ")
461 end
462 if juniors and #juniors > 0 then
463 result[#result+1] = concat(juniors," ")
464 result[#result+1] = settings.surnamejuniorsep or defaultsettings.surnamejuniorsep
465 end
466 return concat(result)
467end
468
469function authors.inverted(author,settings)
470 local firstnames, vons, surnames, juniors = author.firstnames, author.vons, author.surnames, author.juniors
471 local result, settings = { }, settings or defaultsettings
472 if vons and #vons > 0 then
473 result[#result+1] = concat(vons," ")
474 result[#result+1] = settings.vonsep or defaultsettings.vonsep
475 end
476 if surnames then
477 result[#result+1] = concat(surnames," ")
478 end
479 if juniors and #juniors > 0 then
480 result[#result+1] = settings.juniorjuniorsep or defaultsettings.juniorjuniorsep
481 result[#result+1] = concat(juniors," ")
482 end
483 if firstnames and #firstnames > 0 then
484 result[#result+1] = settings.surnamefirstnamesep or defaultsettings.surnamefirstnamesep
485 result[#result+1] = concat(firstnames," ")
486 end
487 return concat(result)
488end
489
490function authors.invertedshort(author,settings)
491 local vons, surnames, initials, juniors = author.vons, author.surnames, author.initials, author.juniors
492 local result, settings = { }, settings or defaultsettings
493 if vons and #vons > 0 then
494 result[#result+1] = concat(vons," ")
495 result[#result+1] = settings.vonsep or defaultsettings.vonsep
496 end
497 if surnames then
498 result[#result+1] = concat(surnames," ")
499 end
500 if juniors and #juniors > 0 then
501 result[#result+1] = settings.juniorjuniorsep or defaultsettings.juniorjuniorsep
502 result[#result+1] = concat(juniors," ")
503 end
504 if initials and #initials > 0 then
505 result[#result+1] = settings.surnameinitialsep or defaultsettings.surnameinitialsep
506 result[#result+1] = concat(the_initials(initials)," ")
507 end
508 return concat(result)
509end
510
511local lastconcatsize = 1
512
513local function bibtexconcat(t,settings)
514 local namesep = settings.namesep or defaultsettings.namesep or ", "
515 local lastnamesep = settings.lastnamesep or defaultsettings.lastnamesep or namesep
516 local finalnamesep = settings.finalnamesep or defaultsettings.finalnamesep or lastnamesep
517 local lastconcatsize = #t
518 if lastconcatsize > 2 then
519 local s = { }
520 for i=1,lastconcatsize-2 do
521 s[i] = t[i] .. namesep
522 end
523 s[lastconcatsize-1], s[lastconcatsize] = t[lastconcatsize-1] .. finalnamesep, t[lastconcatsize]
524 return concat(s)
525 elseif lastconcatsize > 1 then
526 return concat(t,lastnamesep)
527 elseif lastconcatsize > 0 then
528 return t[1]
529 else
530 return ""
531 end
532end
533
534function authors.concat(author,combiner,what,settings)
535 if type(combiner) == "string" then
536 combiner = authors[combiner or "normal"] or authors.normal
537 end
538 local split = splitauthors(author)
539 local setting = settings[what]
540 local etallimit, etaldisplay, etaltext = 1000, 1000, ""
541 if setting then
542 etallimit = settings.etallimit or 1000
543 etaldisplay = settings.etaldisplay or etallimit
544 etalltext = settings.etaltext or ""
545 end
546 local max = #split
547 if max > etallimit and etaldisplay < max then
548 max = etaldisplay
549 end
550 for i=1,max do
551 split[i] = combiner(split[i],settings)
552 end
553 local result = bibtexconcat(split,settings)
554 if max < #split then
555 return result
556 else
557 return result .. etaltext
558 end
559end
560
561function authors.short(author,year)
562 local result = { }
563 if author then
564 local authors = splitauthors(author)
565 for a=1,#authors do
566 local aa = authors[a]
567 local initials = aa.initials
568 for i=1,#initials do
569 result[#result+1] = initials[i]
570 end
571 local surnames = aa.surnames
572 for s=1,#surnames do
573 result[#result+1] = utfchar(lpegmatch(firstcharacter,surnames[s]))
574 end
575 end
576 end
577 if year then
578 result[#result+1] = year
579 end
580 return concat(result)
581end
582
583
584
585
586local function collectauthoryears(id,list)
587 list = settings_to_hash(list)
588 id = getid(id)
589 local found = { }
590 for e in xml.collected(id,"/bibtex/entry") do
591 if list[e.at.tag] then
592 local year = xmlfilter(e,"xml:///field[@name='year']/text()")
593 local author = xmlfilter(e,"xml:///field[@name='author']/text()")
594 if author and year then
595 local a = found[author]
596 if not a then
597 a = { }
598 found[author] = a
599 end
600 local y = a[year]
601 if not y then
602 y = { }
603 a[year] = y
604 end
605 y[#y+1] = e
606 end
607 end
608 end
609
610 local done = { }
611 for author, years in next, found do
612 local yrs = { }
613 for year, entries in next, years do
614 if subyears then
615
616
617
618
619
620
621
622
623 else
624 yrs[#yrs+1] = year
625 end
626 end
627 done[author] = yrs
628 end
629 return done
630end
631
632local method, settings = "normal", { }
633
634function authors.setsettings(s)
635 settings = s or settings
636end
637
638if commands then
639
640 local sessions = { }
641
642 function commands.definebibtexsession(name)
643 sessions[name] = bibtex.new()
644 end
645
646 function commands.preparebibtexsession(name,xmlname,options)
647 bibtex.toxml(sessions[name],options)
648 lxml.register(xmlname,sessions[name].xml)
649 end
650
651 function commands.registerbibtexfile(name,filename)
652 bibtex.load(sessions[name],filename)
653 end
654
655 function commands.registerbibtexentry(name,entry)
656 local session = sessions[name]
657 local entries = session.entries
658 if not entries then
659 session.entries = { [entry] = true }
660 else
661 entries[entry] = true
662 end
663 end
664
665
666
667
668
669 function finalizers.bibtexconcat(collected,method,what)
670 if collected then
671 local author = collected[1].dt[1] or ""
672 if author ~= "" then
673 context(authors.concat(author,method,what,settings))
674 end
675 end
676 end
677
678 function finalizers.bibtexshort(collected)
679 if collected then
680 local c = collected[1]
681 local year = xmlfilter(c,"xml://field[@name='year']/text()")
682 local author = xmlfilter(c,"xml://field[@name='author']/text()")
683 context(authors.short(author,year))
684 end
685 end
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714 function bibtex.authorref(id,list)
715 local result = collectauthoryears(id,list,method,what)
716 for author, years in next, result do
717 context(authors.concat(author,method,what,settings))
718 end
719 end
720
721 function bibtex.authoryearref(id,list)
722 local result = collectauthoryears(id,list,method,what)
723 for author, years in next, result do
724 context("%s (%s)",authors.concat(author,method,what,settings),concat(years,", "))
725 end
726 end
727
728 function bibtex.authoryearsref(id,list)
729 local result = collectauthoryears(id,list,method,what)
730 for author, years in next, result do
731 context("(%s, %s)",authors.concat(author,method,what,settings),concat(years,", "))
732 end
733 end
734
735 function bibtex.singularorplural(singular,plural)
736 if lastconcatsize and lastconcatsize > 1 then
737 context(plural)
738 else
739 context(singular)
740 end
741 end
742
743end
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767 |