1if not modules then modules = { } end modules ['publ-dat'] = {
2 version = 1.001,
3 comment = "this module part of publication support",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14if not characters then
15 dofile(resolvers.findfile("char-utf.lua"))
16 dofile(resolvers.findfile("char-tex.lua"))
17end
18
19if not utilities.sequencers then
20 dofile(resolvers.findfile("util-seq.lua"))
21end
22
23local lower, find, sub = string.lower, string.find, string.sub
24local concat, copy, tohash = table.concat, table.copy, table.tohash
25local next, type, rawget, tonumber = next, type, rawget, tonumber
26local utfchar = utf.char
27local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
28local textoutf = characters and characters.tex.toutf
29local settings_to_hash, settings_to_array = utilities.parsers.settings_to_hash, utilities.parsers.settings_to_array
30local formatters = string.formatters
31local sortedkeys, sortedhash, keys, sort = table.sortedkeys, table.sortedhash, table.keys, table.sort
32local xmlcollected, xmltext, xmlconvert = xml.collected, xml.text, xml.convert
33local setmetatableindex = table.setmetatableindex
34
35
36
37local P, R, S, V, C, Cc, Cs, Ct, Carg, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Carg, lpeg.Cmt, lpeg.Cp
38
39local p_whitespace = lpegpatterns.whitespace
40local p_utf8character = lpegpatterns.utf8character
41
42local trace = false trackers.register("publications", function(v) trace = v end)
43local trace_duplicates = true trackers.register("publications.duplicates", function(v) trace = v end)
44local trace_strings = false trackers.register("publications.strings", function(v) trace = v end)
45
46local report = logs.reporter("publications")
47local report_duplicates = logs.reporter("publications","duplicates")
48local report_strings = logs.reporter("publications","strings")
49
50local allocate = utilities.storage.allocate
51
52local commands = commands
53local implement = interfaces and interfaces.implement
54
55publications = publications or { }
56local publications = publications
57
58local datasets = publications.datasets or { }
59publications.datasets = datasets
60
61local writers = publications.writers or { }
62publications.writers = writers
63
64local tables = publications.tables or { }
65publications.tables = tables
66
67publications.statistics = publications.statistics or { }
68local publicationsstats = publications.statistics
69
70local loaders = publications.loaders or { }
71publications.loaders = loaders
72
73local casters = { }
74publications.casters = casters
75
76
77
78
79
80
81
82local components = { }
83publications.components = components
84
85local enhancers = publications.enhancers or { }
86publications.enhancers = enhancers
87
88local enhancer = publications.enhancer or utilities.sequencers.new { arguments = "dataset" }
89publications.enhancer = enhancer
90
91utilities.sequencers.appendgroup(enhancer,"system")
92
93publicationsstats.nofbytes = 0
94publicationsstats.nofdefinitions = 0
95publicationsstats.nofshortcuts = 0
96publicationsstats.nofdatasets = 0
97
98local privates = allocate {
99 category = true,
100 tag = true,
101 index = true,
102 suffix = true,
103 specification = true,
104}
105
106local specials = allocate {
107 key = true,
108 crossref = true,
109 keywords = true,
110 language = true,
111 comment = true,
112}
113
114local implicits = allocate {
115 category = "implicit",
116 tag = "implicit",
117 key = "implicit",
118 keywords = "implicit",
119 language = "implicit",
120 crossref = "implicit",
121}
122
123local origins = allocate {
124 "optional",
125 "extra",
126 "required",
127 "virtual",
128}
129
130local virtuals = allocate {
131 "authoryear",
132 "authoryears",
133 "authornum",
134 "num",
135 "suffix",
136}
137
138local defaulttypes = allocate {
139 author = "author",
140 editor = "author",
141 translator = "author",
142
143 page = "pagenumber",
144 pages = "pagenumber",
145 keywords = "keyword",
146 doi = "url",
147 url = "url",
148}
149
150local defaultsets = allocate {
151 page = { "page", "pages" },
152}
153
154tables.implicits = implicits
155tables.origins = origins
156tables.virtuals = virtuals
157tables.types = defaulttypes
158tables.sets = defaultsets
159tables.privates = privates
160tables.specials = specials
161
162local variables = interfaces and interfaces.variables or setmetatableindex("self")
163
164local v_all = variables.all
165local v_default = variables.default
166
167if not publications.usedentries then
168 function publications.usedentries()
169 return { }
170 end
171end
172
173local xmlplaceholder = "<?xml version='1.0' standalone='yes'?>\n<bibtex></bibtex>"
174
175local defaultshortcuts = allocate {
176 jan = "1",
177 feb = "2",
178 mar = "3",
179 apr = "4",
180 may = "5",
181 jun = "6",
182 jul = "7",
183 aug = "8",
184 sep = "9",
185 oct = "10",
186 nov = "11",
187 dec = "12",
188}
189
190local space = p_whitespace^0
191local separator = space * "+" * space
192local p_splitter = lpeg.tsplitat(separator)
193
194local unknownfield = function(t,k)
195 local v = "extra"
196 t[k] = v
197 return v
198end
199
200local unknowncategory = function(t,k)
201 local v = {
202 required = false,
203 optional = false,
204 virtual = false,
205 fields = setmetatableindex(unknownfield),
206 types = unknowntypes,
207 sets = setmetatableindex(defaultsets),
208 }
209 t[k] = v
210 return v
211end
212
213local unknowntype = function(t,k)
214 local v = "string"
215 t[k] = v
216 return v
217end
218
219local default = {
220 name = name,
221 version = "1.00",
222 comment = "unknown specification.",
223 author = "anonymous",
224 copyright = "no one",
225 categories = setmetatableindex(unknowncategory),
226 types = setmetatableindex(defaulttypes,unknowntype),
227}
228
229
230
231
232local function checkfield(specification,category,data)
233 local list = setmetatableindex({},implicits)
234 data.fields = list
235 data.category = category
236 local sets = data.sets or { }
237 for i=1,#origins do
238 local t = origins[i]
239 local d = data[t]
240 if d then
241 for i=1,#d do
242 local di = d[i]
243 di = sets[di] or di
244 if type(di) == "table" then
245 for i=1,#di do
246 list[di[i]] = t
247 end
248 else
249 list[di] = t
250 end
251 end
252 else
253 data[t] = { }
254 end
255 end
256 return data
257end
258
259local specifications = setmetatableindex(function(t,name)
260 if not name then
261 return default
262 end
263 local filename = formatters["publ-imp-%s.lua"](name)
264 local fullname = resolvers.findfile(filename) or ""
265 if fullname == "" then
266 report("no data definition file %a for %a",filename,name)
267 t[name] = default
268 return default
269 end
270 local specification = table.load(fullname)
271 if not specification then
272 report("invalid data definition file %a for %a",fullname,name)
273 t[name] = default
274 return default
275 end
276
277 local categories = specification.categories
278 if not categories then
279 categories = { }
280 specification.categories = categories
281 end
282 setmetatableindex(categories,unknowncategory)
283
284 local types = specification.types
285 if not types then
286 types = defaulttypes
287 specification.types = types
288 end
289 setmetatableindex(types,unknowntype)
290
291 local fields = setmetatableindex(unknownfield)
292 specification.fields = fields
293
294 local virtual = specification.virtual
295 if virtual == nil then
296 virtual = { }
297 elseif virtual == false then
298 virtual = { }
299 elseif type(virtual) ~= table then
300 virtual = virtuals
301 end
302 specification.virtual = virtual
303 specification.virtualfields = tohash(virtual)
304
305 for category, data in next, categories do
306 categories[category] = checkfield(specification,category,copy(data))
307 end
308
309 t[name] = specification
310
311 return specification
312end)
313
314publications.specifications = specifications
315
316function publications.setcategory(target,category,data)
317 local specification = specifications[target]
318 specification.categories[category] = checkfield(specification,category,data)
319end
320
321function publications.parenttag(dataset,tag)
322 if not dataset or not tag then
323 report("error in specification, dataset %a, tag %a",dataset,tag)
324 elseif find(tag,"+",1,true) then
325 local tags = lpegmatch(p_splitter,tag)
326 local parent = tags[1]
327 local current = datasets[dataset]
328 local luadata = current.luadata
329 local details = current.details
330 local first = luadata[parent]
331 if first then
332 local detail = details[parent]
333 local children = detail.children
334 if not children then
335 children = { }
336 detail.children = children
337 end
338
339 for i=2,#tags do
340 local tag = tags[i]
341 for j=1,#children do
342 if children[j] == tag then
343 tag = false
344 end
345 end
346 if tag then
347 local entry = luadata[tag]
348 if entry then
349 local detail = details[tag]
350 children[#children+1] = tag
351 if detail.parent then
352 report("error in combination, dataset %a, tag %a, parent %a, ignored %a",dataset,tag,detail.parent,parent)
353 else
354 report("combining, dataset %a, tag %a, parent %a",dataset,tag,parent)
355 detail.parent = parent
356 end
357 end
358 end
359 end
360 return parent
361 end
362 end
363 return tag or ""
364end
365
366function publications.new(name)
367 publicationsstats.nofdatasets = publicationsstats.nofdatasets + 1
368 local dataset = {
369 name = name or "dataset " .. publicationsstats.nofdatasets,
370 nofentries = 0,
371 shortcuts = { },
372 luadata = { },
373 suffixes = { },
374 xmldata = xmlconvert(xmlplaceholder),
375 details = { },
376 missing = { },
377 ordered = { },
378 nofbytes = 0,
379 entries = nil,
380 sources = { },
381 loaded = { },
382 fields = { },
383 userdata = { },
384 used = { },
385 commands = { },
386 citestate = { },
387 status = {
388 resources = false,
389 userdata = false,
390 },
391 specifications = {
392
393 },
394 suffixed = false,
395 }
396
397
398
399 return dataset
400end
401
402setmetatableindex(datasets,function(t,k)
403 if type(k) == "table" then
404 return k
405 else
406 local v = publications.new(k)
407 datasets[k] = v
408 return v
409 end
410end)
411
412local function getindex(dataset,luadata,tag)
413 local found = luadata[tag]
414 if found then
415 local index = found.index or 0
416 dataset.ordered[tag] = index
417 return index
418 else
419 local index = dataset.nofentries + 1
420 dataset.nofentries = index
421 dataset.ordered[index] = tag
422 return index
423 end
424end
425
426publications.getindex = getindex
427
428do
429
430
431
432 local space = S(" \t\n\r\f")
433 local collapsed = space^1/" "
434 local csletter = lpegpatterns.csletter or R("az","AZ")
435
436
437
438
439 local command = P("\\") * (Carg(1) * C(csletter^1) * space^0 / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end)
440 local whatever = P("\\") * P(" ")^1 / " "
441
442 local somemath = P("$") * ((1-P("$"))^1) * P("$")
443
444 local any = P(1)
445 local done = P(-1)
446
447
448
449
450 local zero_l_r = P("{}") / "" * #P(1)
451 local special = P("#") / "\\letterhash "
452
453 local filter_0 = S('\\{}#')
454 local filter_1 = (1-filter_0)^0 * filter_0
455 local filter_2 = Cs(
456
457
458
459 (
460 somemath +
461 whatever +
462 command +
463 special +
464 collapsed +
465 zero_l_r +
466 any
467 )^0
468 )
469
470
471
472
473
474 local function do_shortcut(key,value,dataset)
475 publicationsstats.nofshortcuts = publicationsstats.nofshortcuts + 1
476 dataset.shortcuts[key] = value
477 end
478
479
480
481
482 local tags = table.setmetatableindex("table")
483
484 local indirectcrossrefs = true
485
486 local function do_definition(category,tag,tab,dataset)
487 publicationsstats.nofdefinitions = publicationsstats.nofdefinitions + 1
488 if tag == "" then
489 tag = "no-tag-set"
490 end
491 local fields = dataset.fields
492 local luadata = dataset.luadata
493 local hashtag = tag
494 if luadata[tag] then
495 local t = tags[tag]
496 local d = dataset.name
497 local n = (t[d] or 0) + 1
498 t[d] = n
499 hashtag = tag .. "-" .. n
500 if trace_duplicates then
501 local p = { }
502 for k, v in sortedhash(t) do
503 p[#p+1] = formatters["%s:%s"](k,v)
504 end
505 report_duplicates("tag %a is present multiple times: % t, assigning hashtag %a",tag,p,hashtag)
506 end
507 end
508 local index = getindex(dataset,luadata,hashtag)
509 local entries = {
510 category = lower(category),
511 tag = tag,
512 index = index,
513 }
514 for i=1,#tab,2 do
515 local original = tab[i]
516 local normalized = fields[original]
517 if not normalized then
518 normalized = lower(original)
519 fields[original] = normalized
520 end
521
522 if rawget(entries,normalized) then
523 if trace_duplicates then
524 report_duplicates("redundant field %a is ignored for tag %a in dataset %a",normalized,tag,dataset.name)
525 end
526 else
527 local value = tab[i+1]
528 value = textoutf(value)
529 if lpegmatch(filter_1,value) then
530 value = lpegmatch(filter_2,value,1,dataset.commands)
531 end
532 if normalized == "crossref" then
533 if indirectcrossrefs then
534 setmetatableindex(entries,function(t,k)
535 local parent = rawget(luadata,value)
536 if parent == entries then
537 report_duplicates("bad parent %a for %a in dataset %s",value,hashtag,dataset.name)
538 setmetatableindex(entries,nil)
539 return entries
540 elseif parent then
541 setmetatableindex(entries,parent)
542 return entries[k]
543 else
544 report_duplicates("no valid parent %a for %a in dataset %s",value,hashtag,dataset.name)
545 setmetatableindex(entries,nil)
546 end
547 end)
548 else
549 dataset.nofcrossrefs = dataset.nofcrossrefs +1
550 end
551 end
552 entries[normalized] = value
553 end
554 end
555 luadata[hashtag] = entries
556 end
557
558 local f_invalid = formatters["<invalid: %s>"]
559
560 local function resolve(s,dataset)
561 local e = dataset.shortcuts[s]
562 if e then
563 if trace_strings then
564 report_strings("%a resolves to %a",s,e)
565 end
566 return e
567 end
568 e = defaultshortcuts[s]
569 if e then
570 if trace_strings then
571 report_strings("%a resolves to default %a",s,e)
572 end
573 return e
574 end
575 if tonumber(s) then
576 return s
577 end
578 report("error in database, invalid value %a",s)
579 return f_invalid(s)
580 end
581
582 local pattern = p_whitespace^0
583 * C(P("message") + P("warning") + P("error") + P("comment")) * p_whitespace^0 * P(":")
584 * p_whitespace^0
585 * C(P(1)^1)
586
587 local function do_comment(s,dataset)
588 local how, what = lpegmatch(pattern,s)
589 if how and what then
590 local t = string.splitlines(utilities.strings.striplines(what))
591 local b = file.basename(dataset.fullname or dataset.name or "unset")
592 for i=1,#t do
593 report("%s > %s : %s",b,how,t[i])
594 end
595 end
596 end
597
598 local percent = P("%")
599 local start = P("@")
600 local comma = P(",")
601 local hash = P("#")
602 local escape = P("\\")
603 local single = P("'")
604 local double = P('"')
605 local left = P('{')
606 local right = P('}')
607 local both = left + right
608 local lineending = S("\n\r")
609 local space = S(" \t\n\r\f")
610 local spacing = space^0
611 local equal = P("=")
612
613 local collapsed = p_whitespace^1/" "
614 local nospaces = p_whitespace^1/""
615
616 local p_left = (p_whitespace^0 * left) / ""
617 local p_right = (right * p_whitespace^0) / ""
618
619 local keyword = C((R("az","AZ","09") + S("@_:-"))^1)
620 local key = C((1-space-equal)^1)
621 local tag = C((1-space-comma)^0)
622 local category = C((1-space-left)^1)
623 local s_quoted = ((escape*single) + collapsed + (1-single))^0
624 local d_quoted = ((escape*double) + collapsed + (1-double))^0
625
626 local reference = P("@{") * C((R("az","AZ","09") + S("_:-"))^1) * P("}")
627 local r_value = reference * Carg(1) / resolve
628
629 local balanced = P {
630 ((escape * (left+right)) + (collapsed + r_value + 1 - (left+right))^1 + V(2))^0,
631 left * V(1) * right,
632 }
633
634
635
636
637
638
639 local unbalanced = (left/"") * balanced * (right/"") * P(-1)
640
641 local reference = C((R("az","AZ","09") + S("_:-"))^1)
642 local b_value = p_left * balanced * p_right
643 local s_value = (single/"") * (unbalanced + s_quoted) * (single/"")
644 local d_value = (double/"") * (unbalanced + d_quoted) * (double/"")
645 local r_value = P("@") * reference * Carg(1) / resolve
646 + reference * Carg(1) / resolve
647 local n_value = C(R("09")^1)
648
649 local e_value = Cs((left * balanced * right + (1 - S(",}")))^0) * Carg(1) / function(s,dataset)
650 return resolve(s,dataset)
651 end
652
653 local somevalue = d_value + b_value + s_value + r_value + n_value + e_value
654 local value = Cs((somevalue * ((spacing * hash * spacing)/"" * somevalue)^0))
655
656 local stripper = lpegpatterns.collapser
657 local stripped = value / function(s) return lpegmatch(stripper,s) end
658
659 local forget = percent^1 * (1-lineending)^0
660 local spacing = spacing * forget^0 * spacing
661 local replacement= spacing * key * spacing * equal * spacing * value * spacing
662 local assignment = spacing * key * spacing * equal * spacing * stripped * spacing
663 local definition = category * spacing * left * spacing * tag * spacing * comma * Ct((assignment * comma^0)^0) * spacing * right * Carg(1) / do_definition
664
665 local crapword = C((1-space-left)^1)
666 local shortcut = Cmt(crapword,function(_,p,s) return lower(s) == "string" and p end) * spacing * left * ((replacement * Carg(1))/do_shortcut * comma^0)^0 * spacing * right
667 local comment = Cmt(crapword,function(_,p,s) return lower(s) == "comment" and p end) * spacing * lpegpatterns.argument * Carg(1) / do_comment
668
669 local casecrap = #S("sScC") * (shortcut + comment)
670
671 local bibtotable = (space + forget + P("@") * (casecrap + definition) + 1)^0
672
673
674
675
676
677
678
679
680
681
682
683
684 function publications.loadbibdata(dataset,content,source,kind)
685 if not source then
686 report("invalid source for dataset %a",dataset)
687 return
688 end
689 local current = datasets[dataset]
690 local size = #content
691 if size == 0 then
692 report("empty source %a for dataset %a",source,current.name)
693 else
694 report("adding bib data to set %a from source %a",current.name,source)
695 end
696 statistics.starttiming(publications)
697 publicationsstats.nofbytes = publicationsstats.nofbytes + size
698 current.nofbytes = current.nofbytes + size
699 current.nofcrossrefs = 0
700 if source then
701 table.insert(current.sources, { filename = source, checksum = md5.HEX(content) })
702 current.loaded[source] = kind or true
703 end
704 local luadata = current.luadata
705 current.newtags = #luadata > 0 and { } or current.newtags
706 lpegmatch(bibtotable,content or "",1,current)
707 if current.nofcrossrefs > 0 then
708 for tag, entries in next, luadata do
709 local value = entries.crossref
710 if value then
711 local parent = luadata[value]
712 if parent == entries then
713 report_duplicates("bad parent %a for %a in dataset %s",value,hashtag,dataset.name)
714 elseif parent then
715 local t = { }
716 for k, v in next, parent do
717 if not entries[k] then
718 entries[k] = v
719 t[#t+1] = k
720 end
721 end
722 sort(t)
723 entries.inherited = concat(t,",")
724 else
725 report_duplicates("no valid parent %a for %a in dataset %s",value,hashtag,dataset.name)
726 end
727 end
728 end
729 end
730
731 statistics.stoptiming(publications)
732 end
733
734end
735
736do
737
738
739
740 local cleaner_0 = S('<>&')
741 local cleaner_1 = (1-cleaner_0)^0 * cleaner_0
742 local cleaner_2 = Cs ( (
743 P("<") / "<" +
744 P(">") / ">" +
745 P("&") / "&" +
746 P(1)
747 )^0)
748
749 local compact = false
750
751 function publications.converttoxml(dataset,nice,dontstore,usedonly,subset,noversion,rawtoo)
752 local current = datasets[dataset]
753 local luadata = subset or (current and current.luadata)
754 if luadata then
755 statistics.starttiming(publications)
756
757 local result, r, n = { }, 0, 0
758 if usedonly then
759 usedonly = publications.usedentries()
760 usedonly = usedonly[current.name]
761 end
762
763 r = r + 1 ; result[r] = "<?xml version='1.0' standalone='yes'?>"
764 r = r + 1 ; result[r] = formatters["<bibtex dataset='%s'>"](current.name)
765
766 if nice then
767 local f_entry_start = formatters[" <entry tag='%s' category='%s' index='%s'>"]
768 local s_entry_stop = " </entry>"
769 local f_field = formatters[" <field name='%s'>%s</field>"]
770 local f_cdata = formatters[" <field name='rawbibtex'><![CDATA[%s]]></field>"]
771
772 for tag, entry in sortedhash(luadata) do
773 if not usedonly or usedonly[tag] then
774 r = r + 1 ; result[r] = f_entry_start(tag,entry.category,entry.index)
775 for key, value in sortedhash(entry) do
776 if key ~= "tag" and key ~= "category" and key ~= "index" then
777 if lpegmatch(cleaner_1,value) then
778 value = lpegmatch(cleaner_2,value)
779 end
780 if value ~= "" then
781 r = r + 1 ; result[r] = f_field(key,value)
782 end
783 end
784 end
785 if rawtoo then
786 local s = publications.savers.bib(current,false,{ [tag] = entry })
787 s = utilities.strings.striplines(s,"prune and collapse")
788 r = r + 1 ; result[r] = f_cdata(s)
789 end
790 r = r + 1 ; result[r] = s_entry_stop
791 n = n + 1
792 end
793 end
794 else
795 local f_entry_start = formatters["<entry tag='%s' category='%s' index='%s'>"]
796 local s_entry_stop = "</entry>"
797 local f_field = formatters["<field name='%s'>%s</field>"]
798 for tag, entry in next, luadata do
799 if not usedonly or usedonly[tag] then
800 r = r + 1 ; result[r] = f_entry_start(entry.tag,entry.category,entry.index)
801 for key, value in next, entry do
802 if key ~= "tag" and key ~= "category" and key ~= "index" then
803 if lpegmatch(cleaner_1,value) then
804 value = lpegmatch(cleaner_2,value)
805 end
806 if value ~= "" then
807 r = r + 1 ; result[r] = f_field(key,value)
808 end
809 end
810 end
811 r = r + 1 ; result[r] = s_entry_stop
812 n = n + 1
813 end
814 end
815 end
816
817 r = r + 1 ; result[r] = "</bibtex>"
818
819 result = concat(result,nice and "\n" or nil,noversion and 2 or 1,#result)
820
821 if dontstore then
822
823 else
824 statistics.starttiming(xml)
825 current.xmldata = xmlconvert(result, {
826 resolve_entities = true,
827 resolve_predefined_entities = true,
828
829 utfize_entities = true,
830 } )
831 statistics.stoptiming(xml)
832 if lxml then
833 lxml.register(formatters["btx:%s"](current.name),current.xmldata)
834 end
835 end
836 statistics.stoptiming(publications)
837 return result, n
838 end
839 end
840
841end
842
843do
844
845 local function resolvedname(dataset,filename)
846 local current = datasets[dataset]
847 if type(filename) ~= "string" then
848 report("invalid filename %a",tostring(filename))
849 end
850 local fullname = resolvers.findfile(filename,"bib")
851 if fullname == "" then
852 fullname = resolvers.findfile(filename)
853 end
854 if not fullname or fullname == "" then
855 report("no file %a",filename)
856 current.fullname = filename
857 return current, false
858 else
859 current.fullname = fullname
860 return current, fullname
861 end
862 end
863
864 publications.resolvedname = resolvedname
865
866 local cleaner = false
867 local cleaned = false
868
869 function loaders.registercleaner(what,fullname)
870 if not fullname or fullname == "" then
871 report("no %s file %a",what,fullname)
872 return
873 end
874 local list = table.load(fullname)
875 if not list then
876 report("invalid %s file %a",what,fullname)
877 return
878 end
879 list = list.replacements
880 if not list then
881 report("no replacement table in %a",fullname)
882 return
883 end
884 if cleaned then
885 report("adding replacements from %a",fullname)
886 for k, v in next, list do
887 cleaned[k] = v
888 end
889 else
890 report("using replacements from %a",fullname)
891 cleaned = list
892 end
893 cleaner = true
894 end
895
896 function loaders.bib(dataset,filename,kind)
897 local dataset, fullname = resolvedname(dataset,filename)
898 if not fullname then
899 return
900 end
901 local data = io.loaddata(fullname) or ""
902 if data == "" then
903 report("empty file %a, nothing loaded",fullname)
904 return
905 end
906 if cleaner == true then
907 cleaner = Cs((lpeg.utfchartabletopattern(keys(cleaned)) / cleaned + p_utf8character)^1)
908 end
909 if cleaner ~= false then
910 data = lpegmatch(cleaner,data)
911 end
912 if trace then
913 report("loading file %a",fullname)
914 end
915 publications.loadbibdata(dataset,data,fullname,kind)
916 end
917
918 function loaders.lua(dataset,filename,loader)
919 local current, data, fullname
920 if type(filename) == "table" then
921 current = datasets[dataset]
922 data = filename
923 else
924 dataset, fullname = resolvedname(dataset,filename)
925 if not fullname then
926 return
927 end
928 current = datasets[dataset]
929 data = (loader or table.load)(fullname)
930 end
931 if data then
932 local luadata = current.luadata
933
934 for tag, entry in sortedhash(data) do
935 if type(entry) == "table" then
936 entry.index = getindex(current,luadata,tag)
937 entry.tag = tag
938 luadata[tag] = entry
939 end
940 end
941 end
942 end
943
944 function loaders.json(dataset,filename)
945 loaders.lua(dataset,filename,utilities.json.load)
946 end
947
948 function loaders.buffer(dataset,name)
949 local current = datasets[dataset]
950 local barename = file.removesuffix(name)
951 local data = buffers.getcontent(barename) or ""
952 if data == "" then
953 report("empty buffer %a, nothing loaded",barename)
954 return
955 end
956 if trace then
957 report("loading buffer",barename)
958 end
959 publications.loadbibdata(current,data,barename,"bib")
960 end
961
962 function loaders.xml(dataset,filename)
963 local dataset, fullname = resolvedname(dataset,filename)
964 if not fullname then
965 return
966 end
967 local current = datasets[dataset]
968 local luadata = current.luadata
969 local root = xml.load(fullname)
970 for bibentry in xmlcollected(root,"/bibtex/entry") do
971 local attributes = bibentry.at
972 local tag = attributes.tag
973 local entry = {
974 category = attributes.category,
975 tag = tag,
976 index = 0,
977 }
978 for field in xmlcollected(bibentry,"/field") do
979 entry[field.at.name] = field.dt[1]
980 end
981 entry.index = getindex(current,luadata,tag)
982 entry.tag = tag
983 luadata[tag] = entry
984 end
985 end
986
987 setmetatableindex(loaders,function(t,filetype)
988 local v = function(dataset,filename)
989 report("no loader for file %a with filetype %a",filename,filetype)
990 end
991 t[filetype] = v
992 return v
993 end)
994
995 local done = setmetatableindex("table")
996
997 function publications.load(specification)
998 local name = specification.dataset or v_default
999 local current = datasets[name]
1000 local files = settings_to_array(specification.filename)
1001 local kind = specification.kind
1002 local dataspec = specification.specification
1003 statistics.starttiming(publications)
1004 local somedone = false
1005 for i=1,#files do
1006 local filetype, filename = string.splitup(files[i],"::")
1007 if not filename then
1008 filename = filetype
1009 filetype = file.suffix(filename)
1010 end
1011 if filename then
1012 if not filetype or filetype == "" then
1013 filetype = "bib"
1014 end
1015 if file.suffix(filename) == "" then
1016 file.addsuffix(filename,filetype)
1017 end
1018 if done[current][filename] then
1019 report("file %a is already loaded in dataset %a",filename,name)
1020 else
1021 loaders[filetype](current,filename)
1022 done[current][filename] = true
1023 somedone = true
1024 end
1025 if kind then
1026 current.loaded[current.fullname or filename] = kind
1027 end
1028 if dataspec then
1029 current.specifications[dataspec] = true
1030 end
1031 end
1032 end
1033 if somedone then
1034 local runner = enhancer.runner
1035 if runner then
1036 runner(current)
1037 end
1038 end
1039 statistics.stoptiming(publications)
1040 return current
1041 end
1042
1043end
1044
1045do
1046
1047 function enhancers.order(dataset)
1048 local luadata = dataset.luadata
1049 local ordered = dataset.ordered
1050 for i=1,#ordered do
1051 local tag = ordered[i]
1052 if type(tag) == "string" then
1053 ordered[i] = luadata[tag]
1054 end
1055 end
1056 end
1057
1058 function enhancers.details(dataset)
1059 local luadata = dataset.luadata
1060 local details = dataset.details
1061 for tag, entry in next, luadata do
1062 if not details[tag] then
1063 details[tag] = { }
1064 end
1065 end
1066 end
1067
1068 utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.order")
1069 utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.details")
1070
1071end
1072
1073do
1074
1075 local checked = function(s,d) d[s] = (d[s] or 0) + 1 end
1076 local checktex = ( (1-P("\\"))^1 + P("\\") * ((C(R("az","AZ")^1) * Carg(1))/checked))^0
1077
1078 function publications.analyze(dataset)
1079 local current = datasets[dataset]
1080 local data = current.luadata
1081 local categories = { }
1082 local fields = { }
1083 local commands = { }
1084 for k, v in next, data do
1085 categories[v.category] = (categories[v.category] or 0) + 1
1086 for k, v in next, v do
1087 fields[k] = (fields[k] or 0) + 1
1088 lpegmatch(checktex,v,1,commands)
1089 end
1090 end
1091 current.analysis = {
1092 categories = categories,
1093 fields = fields,
1094 commands = commands,
1095 }
1096 end
1097
1098end
1099
1100function publications.tags(dataset)
1101 return sortedkeys(datasets[dataset].luadata)
1102end
1103
1104function publications.sortedentries(dataset)
1105 return sortedhash(datasets[dataset].luadata)
1106end
1107
1108
1109
1110function publications.concatstate(i,n)
1111 if i == 0 then
1112 return 0
1113 elseif i == 1 then
1114 return 1
1115 elseif i == 2 and n == 2 then
1116 return 4
1117 elseif i == n then
1118 return 3
1119 else
1120 return 2
1121 end
1122end
1123
1124
1125
1126do
1127
1128 local savers = { }
1129
1130 local s_preamble = [[
1131% this is an export from context mkiv
1132
1133@preamble{
1134 \ifdefined\btxcmd
1135 % we're probably in context
1136 \else
1137 \def\btxcmd#1{\begincsname#1\endcsname}
1138 \fi
1139}
1140
1141]]
1142
1143 function savers.bib(dataset,filename,tobesaved)
1144 local f_start = formatters["@%s{%s,\n"]
1145 local f_field = formatters[" %s = {%s},\n"]
1146 local s_stop = "}\n\n"
1147 local result = { }
1148 local n, r = 0, 0
1149 for tag, data in sortedhash(tobesaved) do
1150 r = r + 1 ; result[r] = f_start(data.category or "article",tag)
1151 for key, value in sortedhash(data) do
1152 if not privates[key] then
1153 r = r + 1 ; result[r] = f_field(key,value)
1154 end
1155 end
1156 r = r + 1 ; result[r] = s_stop
1157 n = n + 1
1158 end
1159 result = concat(result)
1160 if find(result,"\\btxcmd") then
1161 result = s_preamble .. result
1162 end
1163 if filename then
1164 report("%s entries from dataset %a saved in %a",n,dataset,filename)
1165 io.savedata(filename,result)
1166 else
1167 return result
1168 end
1169 end
1170
1171 function savers.lua(dataset,filename,tobesaved,options)
1172 local list = { }
1173 local n = 0
1174
1175 local function totable(data,category)
1176 local t = { }
1177 for key, value in next, data do
1178 if not privates[key] then
1179 t[key] = value
1180 end
1181 end
1182 t.category = category
1183 n = n + 1
1184 return t
1185 end
1186
1187 if options.category then
1188 setmetatableindex(list,"table")
1189 for tag, data in next, tobesaved do
1190 list[data.category or "unknown"][tag] = totable(data)
1191 end
1192 else
1193 for tag, data in next, tobesaved do
1194 list[tag] = totable(data,data.category)
1195 end
1196 end
1197 report("%s entries from dataset %a saved in %a",n,dataset,filename)
1198 table.save(filename,list)
1199 end
1200
1201 function savers.xml(dataset,filename,tobesaved,rawtoo)
1202 local result, n = publications.converttoxml(dataset,true,true,false,tobesaved,false,rawtoo)
1203 report("%s entries from dataset %a saved in %a",n,dataset,filename)
1204 io.savedata(filename,result)
1205 end
1206
1207 function publications.save(specification)
1208 local dataset = specification.dataset
1209 local filename = specification.filename
1210 local filetype = specification.filetype
1211 local criterium = specification.criterium
1212 local options = settings_to_hash(specification.options or "")
1213 statistics.starttiming(publications)
1214 if not filename or filename == "" then
1215 report("no filename for saving given")
1216 return
1217 end
1218 if not filetype or filetype == "" then
1219 filetype = file.suffix(filename)
1220 end
1221 if not criterium or criterium == "" then
1222 criterium = v_all
1223 end
1224 local saver = savers[filetype]
1225 if saver then
1226 local current = datasets[dataset]
1227 local luadata = current.luadata or { }
1228 local tobesaved = { }
1229 local result = structures.lists.filter({criterium = criterium, names = "btx"}) or { }
1230 for i=1,#result do
1231 local userdata = result[i].userdata
1232 if userdata then
1233 local set = userdata.btxset or v_default
1234 if set == dataset then
1235 local tag = userdata.btxref
1236 if tag then
1237 tobesaved[tag] = luadata[tag]
1238 end
1239 end
1240 end
1241 end
1242 saver(dataset,filename,tobesaved,options)
1243 else
1244 report("unknown format %a for saving %a",filetype,dataset)
1245 end
1246 statistics.stoptiming(publications)
1247 return dataset
1248 end
1249
1250 publications.savers = savers
1251
1252 if implement then
1253
1254 implement {
1255 name = "btxsavedataset",
1256 actions = publications.save,
1257 arguments = {
1258 {
1259 { "dataset" },
1260 { "filename" },
1261 { "filetype" },
1262 { "criterium" },
1263 { "options" },
1264 }
1265 }
1266 }
1267
1268 end
1269
1270end
1271
1272
1273
1274do
1275
1276 publications.detailed = setmetatableindex(function(detailed,kind)
1277 local values = setmetatableindex(function(values,value)
1278 local caster = casters[kind]
1279 local cast = caster and caster(value) or value
1280 values[value] = cast
1281 return cast
1282 end)
1283 detailed[kind] = values
1284 return values
1285 end)
1286
1287 local keywordsplitter = utilities.parsers.groupedsplitat(";,")
1288
1289 casters.keyword = function(str)
1290 return lpegmatch(keywordsplitter,str)
1291 end
1292
1293
1294 writers.keyword = function(k)
1295 if type(k) == "table" then
1296 return concat(p,";")
1297 else
1298 return k
1299 end
1300 end
1301
1302 local pagessplitter = lpeg.splitat((
1303 P("-") +
1304 P("—") +
1305 P("–") +
1306 P("‒")
1307 )^1)
1308
1309 casters.range = function(str)
1310 local first, last = lpegmatch(pagessplitter,str)
1311 return first and last and { first, last } or str
1312 end
1313
1314 writers.range = function(p)
1315 if type(p) == "table" then
1316 return concat(p,"-")
1317 else
1318 return p
1319 end
1320 end
1321
1322 casters.pagenumber = casters.range
1323 writers.pagenumber = writers.range
1324
1325end
1326
1327if implement then
1328
1329 implement {
1330 name = "btxshortcut",
1331 arguments = "2 strings",
1332 actions = function(instance,key)
1333 local d = publications.datasets[instance]
1334 context(d and d.shortcuts[key] or "?")
1335 end,
1336 }
1337
1338end
1339
1340
1341 |