1if not modules then modules = { } end modules ['publ-dat'] = {
2 version = 1.001,
3 comment = "this module part of publication support",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
19
20if not characters then
21 dofile(resolvers.findfile("char-utf.lua"))
22 dofile(resolvers.findfile("char-tex.lua"))
23end
24
25if not utilities.sequencers then
26 dofile(resolvers.findfile("util-seq.lua"))
27end
28
29local lower, find, sub = string.lower, string.find, string.sub
30local concat, copy, tohash = table.concat, table.copy, table.tohash
31local next, type, rawget, tonumber = next, type, rawget, tonumber
32local utfchar = utf.char
33local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
34local textoutf = characters and characters.tex.toutf
35local settings_to_hash, settings_to_array = utilities.parsers.settings_to_hash, utilities.parsers.settings_to_array
36local formatters = string.formatters
37local sortedkeys, sortedhash, keys, sort = table.sortedkeys, table.sortedhash, table.keys, table.sort
38local xmlcollected, xmltext, xmlconvert = xml.collected, xml.text, xml.convert
39local setmetatableindex = table.setmetatableindex
40
41
42
43local P, R, S, V, C, Cc, Cs, Ct, Carg, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Carg, lpeg.Cmt, lpeg.Cp
44
45local p_whitespace = lpegpatterns.whitespace
46local p_utf8character = lpegpatterns.utf8character
47
48local trace = false trackers.register("publications", function(v) trace = v end)
49local trace_duplicates = true trackers.register("publications.duplicates", function(v) trace = v end)
50local trace_strings = false trackers.register("publications.strings", function(v) trace = v end)
51
52local report = logs.reporter("publications")
53local report_duplicates = logs.reporter("publications","duplicates")
54local report_strings = logs.reporter("publications","strings")
55
56local allocate = utilities.storage.allocate
57
58local commands = commands
59local implement = interfaces and interfaces.implement
60
61publications = publications or { }
62local publications = publications
63
64local datasets = publications.datasets or { }
65publications.datasets = datasets
66
67local writers = publications.writers or { }
68publications.writers = writers
69
70local tables = publications.tables or { }
71publications.tables = tables
72
73publications.statistics = publications.statistics or { }
74local publicationsstats = publications.statistics
75
76local loaders = publications.loaders or { }
77publications.loaders = loaders
78
79local casters = { }
80publications.casters = casters
81
82
83
84
85
86
87
88local components = { }
89publications.components = components
90
91local enhancers = publications.enhancers or { }
92publications.enhancers = enhancers
93
94local enhancer = publications.enhancer or utilities.sequencers.new { arguments = "dataset" }
95publications.enhancer = enhancer
96
97utilities.sequencers.appendgroup(enhancer,"system")
98
99publicationsstats.nofbytes = 0
100publicationsstats.nofdefinitions = 0
101publicationsstats.nofshortcuts = 0
102publicationsstats.nofdatasets = 0
103
104local privates = allocate {
105 category = true,
106 tag = true,
107 index = true,
108 suffix = true,
109 specification = true,
110}
111
112local specials = allocate {
113 key = true,
114 crossref = true,
115 keywords = true,
116 language = true,
117 comment = true,
118}
119
120local implicits = allocate {
121 category = "implicit",
122 tag = "implicit",
123 key = "implicit",
124 keywords = "implicit",
125 language = "implicit",
126 crossref = "implicit",
127}
128
129local origins = allocate {
130 "optional",
131 "extra",
132 "required",
133 "virtual",
134}
135
136local virtuals = allocate {
137 "authoryear",
138 "authoryears",
139 "authornum",
140 "num",
141 "suffix",
142}
143
144local defaulttypes = allocate {
145 author = "author",
146 editor = "author",
147 translator = "author",
148
149 page = "pagenumber",
150 pages = "pagenumber",
151 keywords = "keyword",
152 doi = "url",
153 url = "url",
154}
155
156local defaultsets = allocate {
157 page = { "page", "pages" },
158}
159
160tables.implicits = implicits
161tables.origins = origins
162tables.virtuals = virtuals
163tables.types = defaulttypes
164tables.sets = defaultsets
165tables.privates = privates
166tables.specials = specials
167
168local variables = interfaces and interfaces.variables or setmetatableindex("self")
169
170local v_all = variables.all
171local v_default = variables.default
172
173if not publications.usedentries then
174 function publications.usedentries()
175 return { }
176 end
177end
178
179local xmlplaceholder = "<?xml version='1.0' standalone='yes'?>\n<bibtex></bibtex>"
180
181local defaultshortcuts = allocate {
182 jan = "1",
183 feb = "2",
184 mar = "3",
185 apr = "4",
186 may = "5",
187 jun = "6",
188 jul = "7",
189 aug = "8",
190 sep = "9",
191 oct = "10",
192 nov = "11",
193 dec = "12",
194}
195
196local space = p_whitespace^0
197local separator = space * "+" * space
198local p_splitter = lpeg.tsplitat(separator)
199
200local unknownfield = function(t,k)
201 local v = "extra"
202 t[k] = v
203 return v
204end
205
206local unknowncategory = function(t,k)
207 local v = {
208 required = false,
209 optional = false,
210 virtual = false,
211 fields = setmetatableindex(unknownfield),
212 types = unknowntypes,
213 sets = setmetatableindex(defaultsets),
214 }
215 t[k] = v
216 return v
217end
218
219local unknowntype = function(t,k)
220 local v = "string"
221 t[k] = v
222 return v
223end
224
225local default = {
226 name = name,
227 version = "1.00",
228 comment = "unknown specification.",
229 author = "anonymous",
230 copyright = "no one",
231 categories = setmetatableindex(unknowncategory),
232 types = setmetatableindex(defaulttypes,unknowntype),
233}
234
235
236
237
238local function checkfield(specification,category,data)
239 local list = setmetatableindex({},implicits)
240 data.fields = list
241 data.category = category
242 local sets = data.sets or { }
243 for i=1,#origins do
244 local t = origins[i]
245 local d = data[t]
246 if d then
247 for i=1,#d do
248 local di = d[i]
249 di = sets[di] or di
250 if type(di) == "table" then
251 for i=1,#di do
252 list[di[i]] = t
253 end
254 else
255 list[di] = t
256 end
257 end
258 else
259 data[t] = { }
260 end
261 end
262 return data
263end
264
265local specifications = setmetatableindex(function(t,name)
266 if not name then
267 return default
268 end
269 local filename = formatters["publ-imp-%s.lua"](name)
270 local fullname = resolvers.findfile(filename) or ""
271 if fullname == "" then
272 report("no data definition file %a for %a",filename,name)
273 t[name] = default
274 return default
275 end
276 local specification = table.load(fullname)
277 if not specification then
278 report("invalid data definition file %a for %a",fullname,name)
279 t[name] = default
280 return default
281 end
282
283 local categories = specification.categories
284 if not categories then
285 categories = { }
286 specification.categories = categories
287 end
288 setmetatableindex(categories,unknowncategory)
289
290 local types = specification.types
291 if not types then
292 types = defaulttypes
293 specification.types = types
294 end
295 setmetatableindex(types,unknowntype)
296
297 local fields = setmetatableindex(unknownfield)
298 specification.fields = fields
299
300 local virtual = specification.virtual
301 if virtual == nil then
302 virtual = { }
303 elseif virtual == false then
304 virtual = { }
305 elseif type(virtual) ~= table then
306 virtual = virtuals
307 end
308 specification.virtual = virtual
309 specification.virtualfields = tohash(virtual)
310
311 for category, data in next, categories do
312 categories[category] = checkfield(specification,category,copy(data))
313 end
314
315 t[name] = specification
316
317 return specification
318end)
319
320publications.specifications = specifications
321
322function publications.setcategory(target,category,data)
323 local specification = specifications[target]
324 specification.categories[category] = checkfield(specification,category,data)
325end
326
327function publications.parenttag(dataset,tag)
328 if not dataset or not tag then
329 report("error in specification, dataset %a, tag %a",dataset,tag)
330 elseif find(tag,"+",1,true) then
331 local tags = lpegmatch(p_splitter,tag)
332 local parent = tags[1]
333 local current = datasets[dataset]
334 local luadata = current.luadata
335 local details = current.details
336 local first = luadata[parent]
337 if first then
338 local detail = details[parent]
339 local children = detail.children
340 if not children then
341 children = { }
342 detail.children = children
343 end
344
345 for i=2,#tags do
346 local tag = tags[i]
347 for j=1,#children do
348 if children[j] == tag then
349 tag = false
350 end
351 end
352 if tag then
353 local entry = luadata[tag]
354 if entry then
355 local detail = details[tag]
356 children[#children+1] = tag
357 if detail.parent then
358 report("error in combination, dataset %a, tag %a, parent %a, ignored %a",dataset,tag,detail.parent,parent)
359 else
360 report("combining, dataset %a, tag %a, parent %a",dataset,tag,parent)
361 detail.parent = parent
362 end
363 end
364 end
365 end
366 return parent
367 end
368 end
369 return tag or ""
370end
371
372function publications.new(name)
373 publicationsstats.nofdatasets = publicationsstats.nofdatasets + 1
374 local dataset = {
375 name = name or "dataset " .. publicationsstats.nofdatasets,
376 nofentries = 0,
377 shortcuts = { },
378 luadata = { },
379 suffixes = { },
380 xmldata = xmlconvert(xmlplaceholder),
381 details = { },
382 ordered = { },
383 nofbytes = 0,
384 entries = nil,
385 sources = { },
386 loaded = { },
387 fields = { },
388 userdata = { },
389 used = { },
390 commands = { },
391 citestate = { },
392 status = {
393 resources = false,
394 userdata = false,
395 },
396 specifications = {
397
398 },
399 suffixed = false,
400 }
401
402
403
404 return dataset
405end
406
407setmetatableindex(datasets,function(t,k)
408 if type(k) == "table" then
409 return k
410 else
411 local v = publications.new(k)
412 datasets[k] = v
413 return v
414 end
415end)
416
417local function getindex(dataset,luadata,tag)
418 local found = luadata[tag]
419 if found then
420 local index = found.index or 0
421 dataset.ordered[tag] = index
422 return index
423 else
424 local index = dataset.nofentries + 1
425 dataset.nofentries = index
426 dataset.ordered[index] = tag
427 return index
428 end
429end
430
431publications.getindex = getindex
432
433do
434
435
436
437 local space = S(" \t\n\r\f")
438 local collapsed = space^1/" "
439 local csletter = lpegpatterns.csletter or R("az","AZ")
440
441
442
443
444 local command = P("\\") * (Carg(1) * C(csletter^1) * space^0 / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end)
445 local whatever = P("\\") * P(" ")^1 / " "
446
447 local somemath = P("$") * ((1-P("$"))^1) * P("$")
448
449 local any = P(1)
450 local done = P(-1)
451
452
453
454
455 local zero_l_r = P("{}") / "" * #P(1)
456 local special = P("#") / "\\letterhash "
457
458 local filter_0 = S('\\{}#')
459 local filter_1 = (1-filter_0)^0 * filter_0
460 local filter_2 = Cs(
461
462
463
464 (
465 somemath +
466 whatever +
467 command +
468 special +
469 collapsed +
470 zero_l_r +
471 any
472 )^0
473 )
474
475
476
477
478
479 local function do_shortcut(key,value,dataset)
480 publicationsstats.nofshortcuts = publicationsstats.nofshortcuts + 1
481 dataset.shortcuts[key] = value
482 end
483
484
485
486
487 local tags = table.setmetatableindex("table")
488
489 local indirectcrossrefs = true
490
491 local function do_definition(category,tag,tab,dataset)
492 publicationsstats.nofdefinitions = publicationsstats.nofdefinitions + 1
493 if tag == "" then
494 tag = "no-tag-set"
495 end
496 local fields = dataset.fields
497 local luadata = dataset.luadata
498 local hashtag = tag
499 if luadata[tag] then
500 local t = tags[tag]
501 local d = dataset.name
502 local n = (t[d] or 0) + 1
503 t[d] = n
504 hashtag = tag .. "-" .. n
505 if trace_duplicates then
506 local p = { }
507 for k, v in sortedhash(t) do
508 p[#p+1] = formatters["%s:%s"](k,v)
509 end
510 report_duplicates("tag %a is present multiple times: % t, assigning hashtag %a",tag,p,hashtag)
511 end
512 end
513 local index = getindex(dataset,luadata,hashtag)
514 local entries = {
515 category = lower(category),
516 tag = tag,
517 index = index,
518 }
519 for i=1,#tab,2 do
520 local original = tab[i]
521 local normalized = fields[original]
522 if not normalized then
523 normalized = lower(original)
524 fields[original] = normalized
525 end
526
527 if rawget(entries,normalized) then
528 if trace_duplicates then
529 report_duplicates("redundant field %a is ignored for tag %a in dataset %a",normalized,tag,dataset.name)
530 end
531 else
532 local value = tab[i+1]
533 value = textoutf(value)
534 if lpegmatch(filter_1,value) then
535 value = lpegmatch(filter_2,value,1,dataset.commands)
536 end
537 if normalized == "crossref" then
538 if indirectcrossrefs then
539 setmetatableindex(entries,function(t,k)
540 local parent = rawget(luadata,value)
541 if parent == entries then
542 report_duplicates("bad parent %a for %a in dataset %s",value,hashtag,dataset.name)
543 setmetatableindex(entries,nil)
544 return entries
545 elseif parent then
546 setmetatableindex(entries,parent)
547 return entries[k]
548 else
549 report_duplicates("no valid parent %a for %a in dataset %s",value,hashtag,dataset.name)
550 setmetatableindex(entries,nil)
551 end
552 end)
553 else
554 dataset.nofcrossrefs = dataset.nofcrossrefs +1
555 end
556 end
557 entries[normalized] = value
558 end
559 end
560 luadata[hashtag] = entries
561 end
562
563 local f_invalid = formatters["<invalid: %s>"]
564
565 local function resolve(s,dataset)
566 local e = dataset.shortcuts[s]
567 if e then
568 if trace_strings then
569 report_strings("%a resolves to %a",s,e)
570 end
571 return e
572 end
573 e = defaultshortcuts[s]
574 if e then
575 if trace_strings then
576 report_strings("%a resolves to default %a",s,e)
577 end
578 return e
579 end
580 if tonumber(s) then
581 return s
582 end
583 report("error in database, invalid value %a",s)
584 return f_invalid(s)
585 end
586
587 local pattern = p_whitespace^0
588 * C(P("message") + P("warning") + P("error") + P("comment")) * p_whitespace^0 * P(":")
589 * p_whitespace^0
590 * C(P(1)^1)
591
592 local function do_comment(s,dataset)
593 local how, what = lpegmatch(pattern,s)
594 if how and what then
595 local t = string.splitlines(utilities.strings.striplines(what))
596 local b = file.basename(dataset.fullname or dataset.name or "unset")
597 for i=1,#t do
598 report("%s > %s : %s",b,how,t[i])
599 end
600 end
601 end
602
603 local percent = P("%")
604 local start = P("@")
605 local comma = P(",")
606 local hash = P("#")
607 local escape = P("\\")
608 local single = P("'")
609 local double = P('"')
610 local left = P('{')
611 local right = P('}')
612 local both = left + right
613 local lineending = S("\n\r")
614 local space = S(" \t\n\r\f")
615 local spacing = space^0
616 local equal = P("=")
617
618 local collapsed = p_whitespace^1/" "
619 local nospaces = p_whitespace^1/""
620
621 local p_left = (p_whitespace^0 * left) / ""
622 local p_right = (right * p_whitespace^0) / ""
623
624 local keyword = C((R("az","AZ","09") + S("@_:-"))^1)
625 local key = C((1-space-equal)^1)
626 local tag = C((1-space-comma)^0)
627 local category = C((1-space-left)^1)
628 local s_quoted = ((escape*single) + collapsed + (1-single))^0
629 local d_quoted = ((escape*double) + collapsed + (1-double))^0
630
631 local reference = P("@{") * C((R("az","AZ","09") + S("_:-"))^1) * P("}")
632 local r_value = reference * Carg(1) / resolve
633
634 local balanced = P {
635 ((escape * (left+right)) + (collapsed + r_value + 1 - (left+right))^1 + V(2))^0,
636 left * V(1) * right,
637 }
638
639
640
641
642
643
644 local unbalanced = (left/"") * balanced * (right/"") * P(-1)
645
646 local reference = C((R("az","AZ","09") + S("_:-"))^1)
647 local b_value = p_left * balanced * p_right
648 local s_value = (single/"") * (unbalanced + s_quoted) * (single/"")
649 local d_value = (double/"") * (unbalanced + d_quoted) * (double/"")
650 local r_value = P("@") * reference * Carg(1) / resolve
651 + reference * Carg(1) / resolve
652 local n_value = C(R("09")^1)
653
654 local e_value = Cs((left * balanced * right + (1 - S(",}")))^0) * Carg(1) / function(s,dataset)
655 return resolve(s,dataset)
656 end
657
658 local somevalue = d_value + b_value + s_value + r_value + n_value + e_value
659 local value = Cs((somevalue * ((spacing * hash * spacing)/"" * somevalue)^0))
660
661 local stripper = lpegpatterns.collapser
662 local stripped = value / function(s) return lpegmatch(stripper,s) end
663
664 local forget = percent^1 * (1-lineending)^0
665 local spacing = spacing * forget^0 * spacing
666 local replacement= spacing * key * spacing * equal * spacing * value * spacing
667 local assignment = spacing * key * spacing * equal * spacing * stripped * spacing
668 local definition = category * spacing * left * spacing * tag * spacing * comma * Ct((assignment * comma^0)^0) * spacing * right * Carg(1) / do_definition
669
670 local crapword = C((1-space-left)^1)
671 local shortcut = Cmt(crapword,function(_,p,s) return lower(s) == "string" and p end) * spacing * left * ((replacement * Carg(1))/do_shortcut * comma^0)^0 * spacing * right
672 local comment = Cmt(crapword,function(_,p,s) return lower(s) == "comment" and p end) * spacing * lpegpatterns.argument * Carg(1) / do_comment
673
674 local casecrap = #S("sScC") * (shortcut + comment)
675
676 local bibtotable = (space + forget + P("@") * (casecrap + definition) + 1)^0
677
678
679
680
681
682
683
684
685
686
687
688
689 function publications.loadbibdata(dataset,content,source,kind)
690 if not source then
691 report("invalid source for dataset %a",dataset)
692 return
693 end
694 local current = datasets[dataset]
695 local size = #content
696 if size == 0 then
697 report("empty source %a for dataset %a",source,current.name)
698 else
699 report("adding bib data to set %a from source %a",current.name,source)
700 end
701 statistics.starttiming(publications)
702 publicationsstats.nofbytes = publicationsstats.nofbytes + size
703 current.nofbytes = current.nofbytes + size
704 current.nofcrossrefs = 0
705 if source then
706 table.insert(current.sources, { filename = source, checksum = md5.HEX(content) })
707 current.loaded[source] = kind or true
708 end
709 local luadata = current.luadata
710 current.newtags = #luadata > 0 and { } or current.newtags
711 lpegmatch(bibtotable,content or "",1,current)
712 if current.nofcrossrefs > 0 then
713 for tag, entries in next, luadata do
714 local value = entries.crossref
715 if value then
716 local parent = luadata[value]
717 if parent == entries then
718 report_duplicates("bad parent %a for %a in dataset %s",value,hashtag,dataset.name)
719 elseif parent then
720 local t = { }
721 for k, v in next, parent do
722 if not entries[k] then
723 entries[k] = v
724 t[#t+1] = k
725 end
726 end
727 sort(t)
728 entries.inherited = concat(t,",")
729 else
730 report_duplicates("no valid parent %a for %a in dataset %s",value,hashtag,dataset.name)
731 end
732 end
733 end
734 end
735 statistics.stoptiming(publications)
736 end
737
738end
739
740do
741
742
743
744 local cleaner_0 = S('<>&')
745 local cleaner_1 = (1-cleaner_0)^0 * cleaner_0
746 local cleaner_2 = Cs ( (
747 P("<") / "<" +
748 P(">") / ">" +
749 P("&") / "&" +
750 P(1)
751 )^0)
752
753 local compact = false
754
755 function publications.converttoxml(dataset,nice,dontstore,usedonly,subset,noversion,rawtoo)
756 local current = datasets[dataset]
757 local luadata = subset or (current and current.luadata)
758 if luadata then
759 statistics.starttiming(publications)
760
761 local result, r, n = { }, 0, 0
762 if usedonly then
763 usedonly = publications.usedentries()
764 usedonly = usedonly[current.name]
765 end
766
767 r = r + 1 ; result[r] = "<?xml version='1.0' standalone='yes'?>"
768 r = r + 1 ; result[r] = formatters["<bibtex dataset='%s'>"](current.name)
769
770 if nice then
771 local f_entry_start = formatters[" <entry tag='%s' category='%s' index='%s'>"]
772 local s_entry_stop = " </entry>"
773 local f_field = formatters[" <field name='%s'>%s</field>"]
774 local f_cdata = formatters[" <field name='rawbibtex'><![CDATA[%s]]></field>"]
775
776 for tag, entry in sortedhash(luadata) do
777 if not usedonly or usedonly[tag] then
778 r = r + 1 ; result[r] = f_entry_start(tag,entry.category,entry.index)
779 for key, value in sortedhash(entry) do
780 if key ~= "tag" and key ~= "category" and key ~= "index" then
781 if lpegmatch(cleaner_1,value) then
782 value = lpegmatch(cleaner_2,value)
783 end
784 if value ~= "" then
785 r = r + 1 ; result[r] = f_field(key,value)
786 end
787 end
788 end
789 if rawtoo then
790 local s = publications.savers.bib(current,false,{ [tag] = entry })
791 s = utilities.strings.striplines(s,"prune and collapse")
792 r = r + 1 ; result[r] = f_cdata(s)
793 end
794 r = r + 1 ; result[r] = s_entry_stop
795 n = n + 1
796 end
797 end
798 else
799 local f_entry_start = formatters["<entry tag='%s' category='%s' index='%s'>"]
800 local s_entry_stop = "</entry>"
801 local f_field = formatters["<field name='%s'>%s</field>"]
802 for tag, entry in next, luadata do
803 if not usedonly or usedonly[tag] then
804 r = r + 1 ; result[r] = f_entry_start(entry.tag,entry.category,entry.index)
805 for key, value in next, entry do
806 if key ~= "tag" and key ~= "category" and key ~= "index" then
807 if lpegmatch(cleaner_1,value) then
808 value = lpegmatch(cleaner_2,value)
809 end
810 if value ~= "" then
811 r = r + 1 ; result[r] = f_field(key,value)
812 end
813 end
814 end
815 r = r + 1 ; result[r] = s_entry_stop
816 n = n + 1
817 end
818 end
819 end
820
821 r = r + 1 ; result[r] = "</bibtex>"
822
823 result = concat(result,nice and "\n" or nil,noversion and 2 or 1,#result)
824
825 if dontstore then
826
827 else
828 statistics.starttiming(xml)
829 current.xmldata = xmlconvert(result, {
830 resolve_entities = true,
831 resolve_predefined_entities = true,
832
833 utfize_entities = true,
834 } )
835 statistics.stoptiming(xml)
836 if lxml then
837 lxml.register(formatters["btx:%s"](current.name),current.xmldata)
838 end
839 end
840 statistics.stoptiming(publications)
841 return result, n
842 end
843 end
844
845end
846
847do
848
849 local function resolvedname(dataset,filename)
850 local current = datasets[dataset]
851 if type(filename) ~= "string" then
852 report("invalid filename %a",tostring(filename))
853 end
854 local fullname = resolvers.findfile(filename,"bib")
855 if fullname == "" then
856 fullname = resolvers.findfile(filename)
857 end
858 if not fullname or fullname == "" then
859 report("no file %a",filename)
860 current.fullname = filename
861 return current, false
862 else
863 current.fullname = fullname
864 return current, fullname
865 end
866 end
867
868 publications.resolvedname = resolvedname
869
870 local cleaner = false
871 local cleaned = false
872
873 function loaders.registercleaner(what,fullname)
874 if not fullname or fullname == "" then
875 report("no %s file %a",what,fullname)
876 return
877 end
878 local list = table.load(fullname)
879 if not list then
880 report("invalid %s file %a",what,fullname)
881 return
882 end
883 list = list.replacements
884 if not list then
885 report("no replacement table in %a",fullname)
886 return
887 end
888 if cleaned then
889 report("adding replacements from %a",fullname)
890 for k, v in next, list do
891 cleaned[k] = v
892 end
893 else
894 report("using replacements from %a",fullname)
895 cleaned = list
896 end
897 cleaner = true
898 end
899
900 function loaders.bib(dataset,filename,kind)
901 local dataset, fullname = resolvedname(dataset,filename)
902 if not fullname then
903 return
904 end
905 local data = io.loaddata(fullname) or ""
906 if data == "" then
907 report("empty file %a, nothing loaded",fullname)
908 return
909 end
910 if cleaner == true then
911 cleaner = Cs((lpeg.utfchartabletopattern(keys(cleaned)) / cleaned + p_utf8character)^1)
912 end
913 if cleaner ~= false then
914 data = lpegmatch(cleaner,data)
915 end
916 if trace then
917 report("loading file %a",fullname)
918 end
919 publications.loadbibdata(dataset,data,fullname,kind)
920 end
921
922 function loaders.lua(dataset,filename,loader)
923 local current, data, fullname
924 if type(filename) == "table" then
925 current = datasets[dataset]
926 data = filename
927 else
928 dataset, fullname = resolvedname(dataset,filename)
929 if not fullname then
930 return
931 end
932 current = datasets[dataset]
933 data = (loader or table.load)(fullname)
934 end
935 if data then
936 local luadata = current.luadata
937
938 for tag, entry in sortedhash(data) do
939 if type(entry) == "table" then
940 entry.index = getindex(current,luadata,tag)
941 entry.tag = tag
942 luadata[tag] = entry
943 end
944 end
945 end
946 end
947
948 function loaders.json(dataset,filename)
949 loaders.lua(dataset,filename,utilities.json.load)
950 end
951
952 function loaders.buffer(dataset,name)
953 local current = datasets[dataset]
954 local barename = file.removesuffix(name)
955 local data = buffers.getcontent(barename) or ""
956 if data == "" then
957 report("empty buffer %a, nothing loaded",barename)
958 return
959 end
960 if trace then
961 report("loading buffer",barename)
962 end
963 publications.loadbibdata(current,data,barename,"bib")
964 end
965
966 function loaders.xml(dataset,filename)
967 local dataset, fullname = resolvedname(dataset,filename)
968 if not fullname then
969 return
970 end
971 local current = datasets[dataset]
972 local luadata = current.luadata
973 local root = xml.load(fullname)
974 for bibentry in xmlcollected(root,"/bibtex/entry") do
975 local attributes = bibentry.at
976 local tag = attributes.tag
977 local entry = {
978 category = attributes.category,
979 tag = tag,
980 index = 0,
981 }
982 for field in xmlcollected(bibentry,"/field") do
983 entry[field.at.name] = field.dt[1]
984 end
985 entry.index = getindex(current,luadata,tag)
986 entry.tag = tag
987 luadata[tag] = entry
988 end
989 end
990
991 setmetatableindex(loaders,function(t,filetype)
992 local v = function(dataset,filename)
993 report("no loader for file %a with filetype %a",filename,filetype)
994 end
995 t[filetype] = v
996 return v
997 end)
998
999 local done = setmetatableindex("table")
1000
1001 function publications.load(specification)
1002 local name = specification.dataset or v_default
1003 local current = datasets[name]
1004 local files = settings_to_array(specification.filename)
1005 local kind = specification.kind
1006 local dataspec = specification.specification
1007 statistics.starttiming(publications)
1008 local somedone = false
1009 for i=1,#files do
1010 local filetype, filename = string.splitup(files[i],"::")
1011 if not filename then
1012 filename = filetype
1013 filetype = file.suffix(filename)
1014 end
1015 if filename then
1016 if not filetype or filetype == "" then
1017 filetype = "bib"
1018 end
1019 if file.suffix(filename) == "" then
1020 file.addsuffix(filename,filetype)
1021 end
1022 if done[current][filename] then
1023 report("file %a is already loaded in dataset %a",filename,name)
1024 else
1025 loaders[filetype](current,filename)
1026 done[current][filename] = true
1027 somedone = true
1028 end
1029 if kind then
1030 current.loaded[current.fullname or filename] = kind
1031 end
1032 if dataspec then
1033 current.specifications[dataspec] = true
1034 end
1035 end
1036 end
1037 if somedone then
1038 local runner = enhancer.runner
1039 if runner then
1040 runner(current)
1041 end
1042 end
1043 statistics.stoptiming(publications)
1044 return current
1045 end
1046
1047end
1048
1049do
1050
1051 function enhancers.order(dataset)
1052 local luadata = dataset.luadata
1053 local ordered = dataset.ordered
1054 for i=1,#ordered do
1055 local tag = ordered[i]
1056 if type(tag) == "string" then
1057 ordered[i] = luadata[tag]
1058 end
1059 end
1060 end
1061
1062 function enhancers.details(dataset)
1063 local luadata = dataset.luadata
1064 local details = dataset.details
1065 for tag, entry in next, luadata do
1066 if not details[tag] then
1067 details[tag] = { }
1068 end
1069 end
1070 end
1071
1072 utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.order")
1073 utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.details")
1074
1075end
1076
1077do
1078
1079 local checked = function(s,d) d[s] = (d[s] or 0) + 1 end
1080 local checktex = ( (1-P("\\"))^1 + P("\\") * ((C(R("az","AZ")^1) * Carg(1))/checked))^0
1081
1082 function publications.analyze(dataset)
1083 local current = datasets[dataset]
1084 local data = current.luadata
1085 local categories = { }
1086 local fields = { }
1087 local commands = { }
1088 for k, v in next, data do
1089 categories[v.category] = (categories[v.category] or 0) + 1
1090 for k, v in next, v do
1091 fields[k] = (fields[k] or 0) + 1
1092 lpegmatch(checktex,v,1,commands)
1093 end
1094 end
1095 current.analysis = {
1096 categories = categories,
1097 fields = fields,
1098 commands = commands,
1099 }
1100 end
1101
1102end
1103
1104function publications.tags(dataset)
1105 return sortedkeys(datasets[dataset].luadata)
1106end
1107
1108function publications.sortedentries(dataset)
1109 return sortedhash(datasets[dataset].luadata)
1110end
1111
1112
1113
1114function publications.concatstate(i,n)
1115 if i == 0 then
1116 return 0
1117 elseif i == 1 then
1118 return 1
1119 elseif i == 2 and n == 2 then
1120 return 4
1121 elseif i == n then
1122 return 3
1123 else
1124 return 2
1125 end
1126end
1127
1128
1129
1130do
1131
1132 local savers = { }
1133
1134 local s_preamble = [[
1135% this is an export from context mkiv
1136
1137@preamble{
1138 \ifdefined\btxcmd
1139 % we're probably in context
1140 \else
1141 \def\btxcmd#1{\begincsname#1\endcsname}
1142 \fi
1143}
1144
1145]]
1146
1147 function savers.bib(dataset,filename,tobesaved)
1148 local f_start = formatters["@%s{%s,\n"]
1149 local f_field = formatters[" %s = {%s},\n"]
1150 local s_stop = "}\n\n"
1151 local result = { }
1152 local n, r = 0, 0
1153 for tag, data in sortedhash(tobesaved) do
1154 r = r + 1 ; result[r] = f_start(data.category or "article",tag)
1155 for key, value in sortedhash(data) do
1156 if not privates[key] then
1157 r = r + 1 ; result[r] = f_field(key,value)
1158 end
1159 end
1160 r = r + 1 ; result[r] = s_stop
1161 n = n + 1
1162 end
1163 result = concat(result)
1164 if find(result,"\\btxcmd") then
1165 result = s_preamble .. result
1166 end
1167 if filename then
1168 report("%s entries from dataset %a saved in %a",n,dataset,filename)
1169 io.savedata(filename,result)
1170 else
1171 return result
1172 end
1173 end
1174
1175 function savers.lua(dataset,filename,tobesaved)
1176 local list = { }
1177 local n = 0
1178 for tag, data in next, tobesaved do
1179 local t = { }
1180 for key, value in next, data do
1181 if not privates[key] then
1182 d[key] = value
1183 end
1184 end
1185 list[tag] = t
1186 n = n + 1
1187 end
1188 report("%s entries from dataset %a saved in %a",n,dataset,filename)
1189 table.save(filename,list)
1190 end
1191
1192 function savers.xml(dataset,filename,tobesaved,rawtoo)
1193 local result, n = publications.converttoxml(dataset,true,true,false,tobesaved,false,rawtoo)
1194 report("%s entries from dataset %a saved in %a",n,dataset,filename)
1195 io.savedata(filename,result)
1196 end
1197
1198 function publications.save(specification)
1199 local dataset = specification.dataset
1200 local filename = specification.filename
1201 local filetype = specification.filetype
1202 local criterium = specification.criterium
1203 statistics.starttiming(publications)
1204 if not filename or filename == "" then
1205 report("no filename for saving given")
1206 return
1207 end
1208 if not filetype or filetype == "" then
1209 filetype = file.suffix(filename)
1210 end
1211 if not criterium or criterium == "" then
1212 criterium = v_all
1213 end
1214 local saver = savers[filetype]
1215 if saver then
1216 local current = datasets[dataset]
1217 local luadata = current.luadata or { }
1218 local tobesaved = { }
1219 local result = structures.lists.filter({criterium = criterium, names = "btx"}) or { }
1220 for i=1,#result do
1221 local userdata = result[i].userdata
1222 if userdata then
1223 local set = userdata.btxset or v_default
1224 if set == dataset then
1225 local tag = userdata.btxref
1226 if tag then
1227 tobesaved[tag] = luadata[tag]
1228 end
1229 end
1230 end
1231 end
1232 saver(dataset,filename,tobesaved)
1233 else
1234 report("unknown format %a for saving %a",filetype,dataset)
1235 end
1236 statistics.stoptiming(publications)
1237 return dataset
1238 end
1239
1240 publications.savers = savers
1241
1242 if implement then
1243
1244 implement {
1245 name = "btxsavedataset",
1246 actions = publications.save,
1247 arguments = {
1248 {
1249 { "dataset" },
1250 { "filename" },
1251 { "filetype" },
1252 { "criterium" },
1253 }
1254 }
1255 }
1256
1257 end
1258
1259end
1260
1261
1262
1263do
1264
1265 publications.detailed = setmetatableindex(function(detailed,kind)
1266 local values = setmetatableindex(function(values,value)
1267 local caster = casters[kind]
1268 local cast = caster and caster(value) or value
1269 values[value] = cast
1270 return cast
1271 end)
1272 detailed[kind] = values
1273 return values
1274 end)
1275
1276 local keywordsplitter = utilities.parsers.groupedsplitat(";,")
1277
1278 casters.keyword = function(str)
1279 return lpegmatch(keywordsplitter,str)
1280 end
1281
1282
1283 writers.keyword = function(k)
1284 if type(k) == "table" then
1285 return concat(p,";")
1286 else
1287 return k
1288 end
1289 end
1290
1291 local pagessplitter = lpeg.splitat((
1292 P("-") +
1293 P("—") +
1294 P("–") +
1295 P("‒")
1296 )^1)
1297
1298 casters.range = function(str)
1299 local first, last = lpegmatch(pagessplitter,str)
1300 return first and last and { first, last } or str
1301 end
1302
1303 writers.range = function(p)
1304 if type(p) == "table" then
1305 return concat(p,"-")
1306 else
1307 return p
1308 end
1309 end
1310
1311 casters.pagenumber = casters.range
1312 writers.pagenumber = writers.range
1313
1314end
1315
1316if implement then
1317
1318 implement {
1319 name = "btxshortcut",
1320 arguments = "2 strings",
1321 actions = function(instance,key)
1322 local d = publications.datasets[instance]
1323 context(d and d.shortcuts[key] or "?")
1324 end,
1325 }
1326
1327end
1328
1329
1330 |