if not modules then modules = { } end modules ['publ-dat'] = {
version = 1.001,
comment = "this module part of publication support",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
-- todo: strip the @ in the lpeg instead of on do_definition and do_shortcut
-- todo: store bibroot and bibrootdt
-- todo: dataset = datasets[dataset] => current = datasets[dataset]
-- todo: maybe split this file
--[[ldx--
This is a prelude to integrated bibliography support. This file just loads
bibtex files and converts them to xml so that the we access the content
in a convenient way. Actually handling the data takes place elsewhere.
--ldx]]--
if not characters then
dofile(resolvers.findfile("char-utf.lua"))
dofile(resolvers.findfile("char-tex.lua"))
end
local chardata = characters.data
local lowercase = characters.lower
local lower, find, sub = string.lower, string.find, string.sub
local concat, copy, tohash = table.concat, table.copy, table.tohash
local next, type, rawget = next, type, rawget
local utfchar = utf.char
local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
local textoutf = characters and characters.tex.toutf
local settings_to_hash, settings_to_array = utilities.parsers.settings_to_hash, utilities.parsers.settings_to_array
local formatters = string.formatters
local sortedkeys, sortedhash, keys = table.sortedkeys, table.sortedhash, table.keys
local xmlcollected, xmltext, xmlconvert = xml.collected, xml.text, xml.convert
local setmetatableindex = table.setmetatableindex
-- todo: more allocate
local P, R, S, V, C, Cc, Cs, Ct, Carg, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Carg, lpeg.Cmt, lpeg.Cp
local p_whitespace = lpegpatterns.whitespace
local p_utf8character = lpegpatterns.utf8character
local trace = false trackers.register("publications", function(v) trace = v end)
local trace_duplicates = true trackers.register("publications.duplicates", function(v) trace = v end)
local report = logs.reporter("publications")
local report_duplicates = logs.reporter("publications","duplicates")
local allocate = utilities.storage.allocate
local commands = commands
local implement = interfaces.implement
publications = publications or { }
local publications = publications
local datasets = publications.datasets or { }
publications.datasets = datasets
local writers = publications.writers or { }
publications.writers = writers
local tables = publications.tables or { }
publications.tables = tables
publications.statistics = publications.statistics or { }
local publicationsstats = publications.statistics
local loaders = publications.loaders or { }
publications.loaders = loaders
local casters = { }
publications.casters = casters
-- local sorters = { }
-- publications.sorters = sorters
--
-- local indexers = { }
-- publications.indexers = indexers
local components = { }
publications.components = components -- register components
local enhancers = publications.enhancers or { }
publications.enhancers = enhancers
local enhancer = publications.enhancer or utilities.sequencers.new { arguments = "dataset" }
publications.enhancer = enhancer
utilities.sequencers.appendgroup(enhancer,"system") -- private
publicationsstats.nofbytes = 0
publicationsstats.nofdefinitions = 0
publicationsstats.nofshortcuts = 0
publicationsstats.nofdatasets = 0
local privates = allocate {
category = true,
tag = true,
index = true,
suffix = true,
specification = true,
}
local specials = allocate {
key = true,
crossref = true,
keywords = true,
language = true,
comment = true,
}
local implicits = allocate {
category = "implicit",
tag = "implicit",
key = "implicit",
keywords = "implicit",
language = "implicit",
crossref = "implicit",
}
local origins = allocate {
"optional",
"extra",
"required",
"virtual",
}
local virtuals = allocate {
"authoryear",
"authoryears",
"authornum",
"num",
"suffix",
}
local defaulttypes = allocate {
author = "author",
editor = "author",
publisher = "author",
page = "pagenumber",
pages = "pagenumber",
keywords = "keyword",
doi = "url",
url = "url",
}
local defaultsets = allocate {
page = { "page", "pages" },
}
tables.implicits = implicits
tables.origins = origins
tables.virtuals = virtuals
tables.types = defaulttypes
tables.sets = defaultsets
tables.privates = privates
tables.specials = specials
local variables = interfaces and interfaces.variables or setmetatableindex("self")
local v_all = variables.all
local v_default = variables.default
if not publications.usedentries then
function publications.usedentries()
return { }
end
end
local xmlplaceholder = "\n"
local defaultshortcuts = allocate {
jan = "1",
feb = "2",
mar = "3",
apr = "4",
may = "5",
jun = "6",
jul = "7",
aug = "8",
sep = "9",
oct = "10",
nov = "11",
dec = "12",
}
local space = p_whitespace^0
local separator = space * "+" * space
local l_splitter = lpeg.tsplitat(separator)
local d_splitter = lpeg.splitat (separator)
local unknownfield = function(t,k)
local v = "extra"
t[k] = v
return v
end
local unknowncategory = function(t,k)
local v = {
required = false,
optional = false,
virtual = false,
fields = setmetatableindex(unknownfield), -- this will remember them
types = unknowntypes,
sets = setmetatableindex(defaultsets), -- new, but rather small
}
t[k] = v
return v
end
local unknowntype = function(t,k)
local v = "string"
t[k] = v
return v
end
local default = {
name = name,
version = "1.00",
comment = "unknown specification.",
author = "anonymous",
copyright = "no one",
categories = setmetatableindex(unknowncategory),
types = setmetatableindex(defaulttypes,unknowntype),
}
-- maybe at some point we can have a handlers table with per field
-- a found, fetch, ... method
local function checkfield(specification,category,data)
local list = setmetatableindex({},implicits)
data.fields = list
data.category = category
local sets = data.sets or { }
for i=1,#origins do
local t = origins[i]
local d = data[t]
if d then
for i=1,#d do
local di = d[i]
di = sets[di] or di
if type(di) == "table" then
for i=1,#di do
list[di[i]] = t
end
else
list[di] = t
end
end
else
data[t] = { }
end
end
return data
end
local specifications = setmetatableindex(function(t,name)
if not name then
return default -- initializer
end
local filename = formatters["publ-imp-%s.lua"](name)
local fullname = resolvers.findfile(filename) or ""
if fullname == "" then
report("no data definition file %a for %a",filename,name)
return default
end
local specification = table.load(fullname)
if not specification then
report("invalid data definition file %a for %a",fullname,name)
return default
end
--
local categories = specification.categories
if not categories then
categories = { }
specification.categories = categories
end
setmetatableindex(categories,unknowncategory)
--
local types = specification.types
if not types then
types = defaulttypes
specification.types = types
end
setmetatableindex(types,unknowntype)
--
local fields = setmetatableindex(unknownfield)
specification.fields = fields
--
local virtual = specification.virtual
if virtual == nil then -- so false is valid
virtual = { }
elseif virtual == false then
virtual = { }
elseif type(virtual) ~= table then
virtual = virtuals
end
specification.virtual = virtual
specification.virtualfields = tohash(virtual)
--
for category, data in next, categories do
categories[category] = checkfield(specification,category,copy(data)) -- we make sure we have no clones
end
--
t[name] = specification
--
return specification
end)
publications.specifications = specifications
function publications.setcategory(target,category,data)
local specification = specifications[target]
specification.categories[category] = checkfield(specification,category,data)
end
function publications.parenttag(dataset,tag)
if not dataset or not tag then
report("error in specification, dataset %a, tag %a",dataset,tag)
elseif find(tag,"%+") then
local tags = lpegmatch(l_splitter,tag)
local parent = tags[1]
local current = datasets[dataset]
local luadata = current.luadata
local details = current.details
local first = luadata[parent]
if first then
local detail = details[parent]
local children = detail.children
if not children then
children = { }
detail.children = children
end
-- add new ones but only once
for i=2,#tags do
local tag = tags[i]
for j=1,#children do
if children[j] == tag then
tag = false
end
end
if tag then
local entry = luadata[tag]
if entry then
local detail = details[tag]
children[#children+1] = tag
if detail.parent then
report("error in combination, dataset %a, tag %a, parent %a, ignored %a",dataset,tag,detail.parent,parent)
else
report("combining, dataset %a, tag %a, parent %a",dataset,tag,parent)
detail.parent = parent
end
end
end
end
return parent
end
end
return tag or ""
end
function publications.new(name)
publicationsstats.nofdatasets = publicationsstats.nofdatasets + 1
local dataset = {
name = name or "dataset " .. publicationsstats.nofdatasets,
nofentries = 0,
shortcuts = { },
luadata = { },
suffixes = { },
xmldata = xmlconvert(xmlplaceholder),
details = { },
ordered = { },
nofbytes = 0,
entries = nil, -- empty == all
sources = { },
loaded = { },
fields = { },
userdata = { },
used = { },
commands = { }, -- for statistical purposes
status = {
resources = false,
userdata = false,
},
specifications = {
-- used specifications
},
suffixed = false,
}
-- we delay details till we need it (maybe we just delay the
-- individual fields but that is tricky as there can be some
-- depedencies)
return dataset
end
setmetatableindex(datasets,function(t,k)
if type(k) == "table" then
return k -- so we can use this accessor as checker
else
local v = publications.new(k)
datasets[k] = v
return v
end
end)
local function getindex(dataset,luadata,tag)
local found = luadata[tag]
if found then
local index = found.index or 0
dataset.ordered[tag] = index
return index
else
local index = dataset.nofentries + 1
dataset.nofentries = index
dataset.ordered[index] = tag
return index
end
end
publications.getindex = getindex
do
-- we apply some normalization
local space = S(" \t\n\r\f") -- / " "
local collapsed = space^1/" "
----- csletter = R("az","AZ")
local csletter = lpegpatterns.csletter
----- command = P("\\") * Cc("btxcmd{") * (R("az","AZ")^1) * Cc("}")
----- command = P("\\") * (Carg(1) * C(R("az","AZ")^1) / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end)
----- command = P("\\") * (Carg(1) * C(R("az","AZ")^1) * space^0 / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end)
local command = P("\\") * (Carg(1) * C(csletter^1) * space^0 / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end)
local whatever = P("\\") * P(" ")^1 / " "
+ P("\\") * ( P("hbox") + P("raise") ) -- bah
local somemath = P("$") * ((1-P("$"))^1) * P("$") -- let's not assume nested math
----- character = lpegpatterns.utf8character
local any = P(1)
local done = P(-1)
-- local one_l = P("{") / ""
-- local one_r = P("}") / ""
-- local two_l = P("{{") / ""
-- local two_r = P("}}") / ""
local zero_l_r = P("{}") / "" * #P(1)
local special = P("#") / "\\letterhash "
local filter_0 = S('\\{}#')
local filter_1 = (1-filter_0)^0 * filter_0
local filter_2 = Cs(
-- {{...}} ... {{...}}
-- two_l * (command + special + any - two_r - done)^0 * two_r * done +
-- one_l * (command + special + any - one_r - done)^0 * one_r * done +
(
somemath +
whatever +
command +
special +
collapsed +
zero_l_r +
any
)^0
)
-- Currently we expand shortcuts and for large ones (like the acknowledgements
-- in tugboat.bib) this is not that efficient. However, eventually strings get
-- hashed again.
local function do_shortcut(key,value,dataset)
publicationsstats.nofshortcuts = publicationsstats.nofshortcuts + 1
dataset.shortcuts[key] = value
end
-- todo: categories : metatable that lowers and also counts
-- todo: fields : metatable that lowers
local tags = table.setmetatableindex("table")
local function do_definition(category,tag,tab,dataset)
publicationsstats.nofdefinitions = publicationsstats.nofdefinitions + 1
if tag == "" then
tag = "no-tag-set"
end
local fields = dataset.fields
local luadata = dataset.luadata
local hashtag = tag
if luadata[tag] then
local t = tags[tag]
local d = dataset.name
local n = (t[d] or 0) + 1
t[d] = n
hashtag = tag .. "-" .. n
if trace_duplicates then
local p = { }
for k, v in sortedhash(t) do
p[#p+1] = formatters["%s:%s"](k,v)
end
report_duplicates("tag %a is present multiple times: % t, assigning hashtag %a",tag,p,hashtag)
end
end
local index = getindex(dataset,luadata,hashtag)
local entries = {
category = lower(category),
tag = tag,
index = index,
}
for i=1,#tab,2 do
local original = tab[i]
local normalized = fields[original]
if not normalized then
normalized = lower(original) -- we assume ascii fields
fields[original] = normalized
end
-- if entries[normalized] then
if rawget(entries,normalized) then
if trace_duplicates then
report_duplicates("redundant field %a is ignored for tag %a in dataset %a",normalized,tag,dataset.name)
end
else
local value = tab[i+1]
value = textoutf(value)
if lpegmatch(filter_1,value) then
value = lpegmatch(filter_2,value,1,dataset.commands) -- we need to start at 1 for { }
end
if normalized == "crossref" then
local parent = luadata[value]
if parent then
setmetatableindex(entries,parent)
else
-- warning
end
end
entries[normalized] = value
end
end
luadata[hashtag] = entries
end
local function resolve(s,dataset)
return dataset.shortcuts[s] or defaultshortcuts[s] or s -- can be number
end
local pattern = p_whitespace^0
* C(P("message") + P("warning") + P("error") + P("comment")) * p_whitespace^0 * P(":")
* p_whitespace^0
* C(P(1)^1)
local function do_comment(s,dataset)
local how, what = lpegmatch(pattern,s)
if how and what then
local t = string.splitlines(utilities.strings.striplines(what))
local b = file.basename(dataset.fullname or dataset.name or "unset")
for i=1,#t do
report("%s > %s : %s",b,how,t[i])
end
end
end
local percent = P("%")
local start = P("@")
local comma = P(",")
local hash = P("#")
local escape = P("\\")
local single = P("'")
local double = P('"')
local left = P('{')
local right = P('}')
local both = left + right
local lineending = S("\n\r")
local space = S(" \t\n\r\f") -- / " "
local spacing = space^0
local equal = P("=")
----- collapsed = (space^1)/ " "
local collapsed = p_whitespace^1/" "
local nospaces = p_whitespace^1/""
local p_left = (p_whitespace^0 * left) / ""
local p_right = (right * p_whitespace^0) / ""
local balanced = P {
[1] = ((escape * (left+right)) + (collapsed + 1 - (left+right))^1 + V(2))^0,
[2] = left * V(1) * right,
}
-- local unbalanced = P {
-- [1] = left * V(2) * right,
-- [2] = ((escape * (left+right)) + (collapsed + 1 - (left+right))^1 + V(1))^0,
-- }
local unbalanced = (left/"") * balanced * (right/"") * P(-1)
local keyword = C((R("az","AZ","09") + S("@_:-"))^1)
local key = C((1-space-equal)^1)
local tag = C((1-space-comma)^0)
local reference = keyword
local category = C((1-space-left)^1)
local s_quoted = ((escape*single) + collapsed + (1-single))^0
local d_quoted = ((escape*double) + collapsed + (1-double))^0
local b_value = p_left * balanced * p_right
-- local u_value = p_left * unbalanced * p_right -- get rid of outer { }
-- local s_value = (single/"") * (u_value + s_quoted) * (single/"")
-- local d_value = (double/"") * (u_value + d_quoted) * (double/"")
local s_value = (single/"") * (unbalanced + s_quoted) * (single/"")
local d_value = (double/"") * (unbalanced + d_quoted) * (double/"")
local r_value = reference * Carg(1) /resolve
local somevalue = d_value + b_value + s_value + r_value
local value = Cs((somevalue * ((spacing * hash * spacing)/"" * somevalue)^0))
value = value / function(s) return lpegmatch(lpegpatterns.stripper,s) end
local forget = percent^1 * (1-lineending)^0
local spacing = spacing * forget^0 * spacing
local assignment = spacing * key * spacing * equal * spacing * value * spacing
local definition = category * spacing * left * spacing * tag * spacing * comma * Ct((assignment * comma^0)^0) * spacing * right * Carg(1) / do_definition
local crapword = C((1-space-left)^1)
local shortcut = Cmt(crapword,function(_,p,s) return lower(s) == "string" and p end) * spacing * left * ((assignment * Carg(1))/do_shortcut * comma^0)^0 * spacing * right
local comment = Cmt(crapword,function(_,p,s) return lower(s) == "comment" and p end) * spacing * lpegpatterns.argument * Carg(1) / do_comment
local casecrap = #S("sScC") * (shortcut + comment)
local bibtotable = (space + forget + P("@") * (casecrap + definition) + 1)^0
-- todo \%
-- loadbibdata -> dataset.luadata
-- loadtexdata -> dataset.luadata
-- loadluadata -> dataset.luadata
-- converttoxml -> dataset.xmldata from dataset.luadata
function publications.loadbibdata(dataset,content,source,kind)
if not source then
report("invalid source for dataset %a",dataset)
return
end
local current = datasets[dataset]
local size = #content
if size == 0 then
report("empty source %a for dataset %a",source,current.name)
else
report("adding bib data to set %a from source %a",current.name,source)
end
statistics.starttiming(publications)
publicationsstats.nofbytes = publicationsstats.nofbytes + size
current.nofbytes = current.nofbytes + size
if source then
table.insert(current.sources, { filename = source, checksum = md5.HEX(content) })
current.loaded[source] = kind or true
end
current.newtags = #current.luadata > 0 and { } or current.newtags
lpegmatch(bibtotable,content or "",1,current)
statistics.stoptiming(publications)
end
end
do
-- we could use xmlescape again
local cleaner_0 = S('<>&')
local cleaner_1 = (1-cleaner_0)^0 * cleaner_0
local cleaner_2 = Cs ( (
P("<") / "<" +
P(">") / ">" +
P("&") / "&" +
P(1)
)^0)
local compact = false -- can be a directive but then we also need to deal with newlines ... not now
function publications.converttoxml(dataset,nice,dontstore,usedonly,subset) -- we have fields !
local current = datasets[dataset]
local luadata = subset or (current and current.luadata)
if luadata then
statistics.starttiming(publications)
--
local result, r, n = { }, 0, 0
local usedonly = usedonly and publications.usedentries()
--
r = r + 1 ; result[r] = ""
r = r + 1 ; result[r] = ""
--
if nice then -- will be default
local f_entry_start = formatters[" "]
local s_entry_stop = " "
local f_field = formatters[" %s"]
for tag, entry in sortedhash(luadata) do
if not usedonly or usedonly[tag] then
r = r + 1 ; result[r] = f_entry_start(tag,entry.category,entry.index)
for key, value in sortedhash(entry) do
if key ~= "tag" and key ~= "category" and key ~= "index" then
if lpegmatch(cleaner_1,value) then
value = lpegmatch(cleaner_2,value)
end
if value ~= "" then
r = r + 1 ; result[r] = f_field(key,value)
end
end
end
r = r + 1 ; result[r] = s_entry_stop
n = n + 1
end
end
else
local f_entry_start = formatters[""]
local s_entry_stop = ""
local f_field = formatters["%s"]
for tag, entry in next, luadata do
if not usedonly or usedonly[tag] then
r = r + 1 ; result[r] = f_entry_start(entry.tag,entry.category,entry.index)
for key, value in next, entry do
if key ~= "tag" and key ~= "category" and key ~= "index" then
if lpegmatch(cleaner_1,value) then
value = lpegmatch(cleaner_2,value)
end
if value ~= "" then
r = r + 1 ; result[r] = f_field(key,value)
end
end
end
r = r + 1 ; result[r] = s_entry_stop
n = n + 1
end
end
end
--
r = r + 1 ; result[r] = ""
--
result = concat(result,nice and "\n" or nil)
--
if dontstore then
-- indeed
else
statistics.starttiming(xml)
current.xmldata = xmlconvert(result, {
resolve_entities = true,
resolve_predefined_entities = true, -- in case we have escaped entities
-- unify_predefined_entities = true, -- & -> &
utfize_entities = true,
} )
statistics.stoptiming(xml)
if lxml then
lxml.register(formatters["btx:%s"](current.name),current.xmldata)
end
end
statistics.stoptiming(publications)
return result, n
end
end
end
do
local function resolvedname(dataset,filename)
local current = datasets[dataset]
if type(filename) ~= "string" then
report("invalid filename %a",tostring(filename))
end
local fullname = resolvers.findfile(filename,"bib")
if fullname == "" then
fullname = resolvers.findfile(filename) -- let's not be too picky
end
if not fullname or fullname == "" then
report("no file %a",filename)
current.fullname = filename
return current, false
else
current.fullname = fullname
return current, fullname
end
end
publications.resolvedname = resolvedname
local cleaner = false
local cleaned = false
function loaders.registercleaner(what,fullname)
if not fullname or fullname == "" then
report("no %s file %a",what,fullname)
return
end
local list = table.load(fullname)
if not list then
report("invalid %s file %a",what,fullname)
return
end
list = list.replacements
if not list then
report("no replacement table in %a",fullname)
return
end
if cleaned then
report("adding replacements from %a",fullname)
for k, v in next, list do
cleaned[k] = v
end
else
report("using replacements from %a",fullname)
cleaned = list
end
cleaner = true
end
function loaders.bib(dataset,filename,kind)
local dataset, fullname = resolvedname(dataset,filename)
if not fullname then
return
end
local data = io.loaddata(fullname) or ""
if data == "" then
report("empty file %a, nothing loaded",fullname)
return
end
if cleaner == true then
cleaner = Cs((lpeg.utfchartabletopattern(keys(cleaned)) / cleaned + p_utf8character)^1)
end
if cleaner ~= false then
data = lpegmatch(cleaner,data)
end
if trace then
report("loading file %a",fullname)
end
publications.loadbibdata(dataset,data,fullname,kind)
end
function loaders.lua(dataset,filename) -- if filename is a table we load that one
local current, data, fullname
if type(filename) == "table" then
current = datasets[dataset]
data = filename
else
dataset, fullname = resolvedname(dataset,filename)
if not fullname then
return
end
current = datasets[dataset]
data = table.load(fullname)
end
if data then
local luadata = current.luadata
-- we want the same index each run
for tag, entry in sortedhash(data) do
if type(entry) == "table" then
entry.index = getindex(current,luadata,tag)
entry.tag = tag
luadata[tag] = entry -- no cleaning yet
end
end
end
end
function loaders.buffer(dataset,name) -- if filename is a table we load that one
local current = datasets[dataset]
local barename = file.removesuffix(name)
local data = buffers.getcontent(barename) or ""
if data == "" then
report("empty buffer %a, nothing loaded",barename)
return
end
if trace then
report("loading buffer",barename)
end
publications.loadbibdata(current,data,barename,"bib")
end
function loaders.xml(dataset,filename)
local dataset, fullname = resolvedname(dataset,filename)
if not fullname then
return
end
local current = datasets[dataset]
local luadata = current.luadata
local root = xml.load(fullname)
for bibentry in xmlcollected(root,"/bibtex/entry") do
local attributes = bibentry.at
local tag = attributes.tag
local entry = {
category = attributes.category,
tag = tag, -- afterwards also set, to prevent overload
index = 0, -- prelocated
}
for field in xmlcollected(bibentry,"/field") do
entry[field.at.name] = field.dt[1] -- no cleaning yet | xmltext(field)
end
entry.index = getindex(current,luadata,tag)
entry.tag = tag
luadata[tag] = entry
end
end
setmetatableindex(loaders,function(t,filetype)
local v = function(dataset,filename)
report("no loader for file %a with filetype %a",filename,filetype)
end
t[filetype] = v
return v
end)
function publications.load(specification)
local current = datasets[specification.dataset or v_default]
local files = settings_to_array(specification.filename)
local kind = specification.kind
local dataspec = specification.specification
statistics.starttiming(publications)
for i=1,#files do
local filetype, filename = string.splitup(files[i],"::")
if not filename then
filename = filetype
filetype = file.suffix(filename)
end
if filename then
if not filetype or filetype == "" then
filetype = "bib"
end
if file.suffix(filename) == "" then
file.addsuffix(filename,filetype)
end
loaders[filetype](current,filename)
if kind then
current.loaded[current.fullname or filename] = kind
end
if dataspec then
current.specifications[dataspec] = true
end
end
end
local runner = enhancer.runner
if runner then
runner(current)
end
statistics.stoptiming(publications)
return current
end
end
do
function enhancers.order(dataset)
local luadata = dataset.luadata
local ordered = dataset.ordered
for i=1,#ordered do
local tag = ordered[i]
if type(tag) == "string" then
ordered[i] = luadata[tag]
end
end
end
function enhancers.details(dataset)
local luadata = dataset.luadata
local details = dataset.details
for tag, entry in next, luadata do
if not details[tag] then
details[tag] = { }
end
end
end
utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.order")
utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.details")
end
do
local checked = function(s,d) d[s] = (d[s] or 0) + 1 end
local checktex = ( (1-P("\\"))^1 + P("\\") * ((C(R("az","AZ")^1) * Carg(1))/checked))^0
function publications.analyze(dataset)
local current = datasets[dataset]
local data = current.luadata
local categories = { }
local fields = { }
local commands = { }
for k, v in next, data do
categories[v.category] = (categories[v.category] or 0) + 1
for k, v in next, v do
fields[k] = (fields[k] or 0) + 1
lpegmatch(checktex,v,1,commands)
end
end
current.analysis = {
categories = categories,
fields = fields,
commands = commands,
}
end
end
function publications.tags(dataset)
return sortedkeys(datasets[dataset].luadata)
end
function publications.sortedentries(dataset)
return sortedhash(datasets[dataset].luadata)
end
-- a helper:
function publications.concatstate(i,n)
if i == 0 then
return 0
elseif i == 1 then
return 1
elseif i == 2 and n == 2 then
return 4
elseif i == n then
return 3
else
return 2
end
end
-- savers
do
local savers = { }
local s_preamble = [[
% this is an export from context mkiv
@preamble{
\ifdefined\btxcmd
% we're probably in context
\else
\def\btxcmd#1{\csname#1\endcsname}
\fi
}
]]
function savers.bib(dataset,filename,tobesaved)
local f_start = formatters["@%s{%s,\n"]
local f_field = formatters[" %s = {%s},\n"]
local s_stop = "}\n\n"
local result = { s_preamble }
local n, r = 0, 1
for tag, data in sortedhash(tobesaved) do
r = r + 1 ; result[r] = f_start(data.category or "article",tag)
for key, value in sortedhash(data) do
if not privates[key] then
r = r + 1 ; result[r] = f_field(key,value)
end
end
r = r + 1 ; result[r] = s_stop
n = n + 1
end
report("%s entries from dataset %a saved in %a",n,dataset,filename)
io.savedata(filename,concat(result))
end
function savers.lua(dataset,filename,tobesaved)
local list = { }
local n = 0
for tag, data in next, tobesaved do
local t = { }
for key, value in next, data do
if not privates[key] then
d[key] = value
end
end
list[tag] = t
n = n + 1
end
report("%s entries from dataset %a saved in %a",n,dataset,filename)
table.save(filename,list)
end
function savers.xml(dataset,filename,tobesaved)
local result, n = publications.converttoxml(dataset,true,true,false,tobesaved)
report("%s entries from dataset %a saved in %a",n,dataset,filename)
io.savedata(filename,result)
end
function publications.save(specification)
local dataset = specification.dataset
local filename = specification.filename
local filetype = specification.filetype
local criterium = specification.criterium
statistics.starttiming(publications)
if not filename or filename == "" then
report("no filename for saving given")
return
end
if not filetype or filetype == "" then
filetype = file.suffix(filename)
end
if not criterium or criterium == "" then
criterium = v_all
end
local saver = savers[filetype]
if saver then
local current = datasets[dataset]
local luadata = current.luadata or { }
local tobesaved = { }
local result = structures.lists.filter({criterium = criterium, names = "btx"}) or { }
for i=1,#result do
local userdata = result[i].userdata
if userdata then
local set = userdata.btxset or v_default
if set == dataset then
local tag = userdata.btxref
if tag then
tobesaved[tag] = luadata[tag]
end
end
end
end
saver(dataset,filename,tobesaved)
else
report("unknown format %a for saving %a",filetype,dataset)
end
statistics.stoptiming(publications)
return dataset
end
implement {
name = "btxsavedataset",
actions = publications.save,
arguments = {
{
{ "dataset" },
{ "filename" },
{ "filetype" },
{ "criterium" },
}
}
}
end
-- casters
do
publications.detailed = setmetatableindex(function(detailed,kind)
local values = setmetatableindex(function(values,value)
local caster = casters[kind]
local cast = caster and caster(value) or value
values[value] = cast
return cast
end)
detailed[kind] = values
return values
end)
local keywordsplitter = utilities.parsers.groupedsplitat(";,")
casters.keyword = function(str)
return lpegmatch(keywordsplitter,str)
end
writers.keyword = function(k)
if type(k) == "table" then
return concat(p,";")
else
return k
end
end
local pagessplitter = lpeg.splitat(P("-")^1)
casters.range = function(str)
local first, last = lpegmatch(pagessplitter,str)
return first and last and { first, last } or str
end
writers.range = function(p)
if type(p) == "table" then
return concat(p,"-")
else
return p
end
end
casters.pagenumber = casters.range
writers.pagenumber = writers.range
end