This repository provides User Manual for setting up a Docker environment tailored for testing DGTD code.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

598 lines
22 KiB

"""
Classes and methods to maintain any bibtex information that is stored
outside the doctree.
.. autoclass:: Citation
:members:
.. autoclass:: BibtexDomain
:members:
"""
import ast
import re
from typing import (
TYPE_CHECKING,
Dict,
Iterable,
List,
NamedTuple,
Optional,
Set,
Tuple,
cast,
)
import docutils.frontend
import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import pybtex.plugin
import pybtex_docutils
import sphinx.util
from pybtex.richtext import Tag
from pybtex.style import FormattedEntry
from pybtex.style.template import FieldIsMissing
from sphinx.domains import Domain, ObjType
from sphinx.errors import ExtensionError
from sphinx.locale import _
import sphinxcontrib.bibtex.plugin
from .bibfile import BibData, normpath_filename, process_bibdata
from .citation_target import CitationTarget, parse_citation_targets
from .roles import CiteRole
from .style.referencing import BaseReferenceStyle, format_references
from .style.template import SphinxReferenceInfo
if TYPE_CHECKING:
from pybtex.database import Entry
from pybtex.style.formatting import BaseStyle
from sphinx.addnodes import pending_xref
from sphinx.application import Sphinx
from sphinx.builders import Builder
from sphinx.environment import BuildEnvironment
from .directives import BibliographyKey, BibliographyValue
from .roles import CitationRef
logger = sphinx.util.logging.getLogger(__name__)
def _raise_invalid_node(node):
"""Helper method to raise an exception when an invalid node is
visited.
"""
raise ValueError("invalid node %s in filter expression" % node)
class _FilterVisitor(ast.NodeVisitor):
"""Visit the abstract syntax tree of a parsed filter expression."""
entry = None
"""The bibliographic entry to which the filter must be applied."""
cited_docnames = False
"""The documents where the entry is cited (empty if not cited)."""
def __init__(self, entry, docname, cited_docnames):
self.entry = entry
self.docname = docname
self.cited_docnames = cited_docnames
def visit_Module(self, node):
if len(node.body) != 1:
raise ValueError("filter expression cannot contain multiple expressions")
return self.visit(node.body[0])
def visit_Expr(self, node):
return self.visit(node.value)
def visit_BoolOp(self, node):
outcomes = (self.visit(value) for value in node.values)
if isinstance(node.op, ast.And):
return all(outcomes)
elif isinstance(node.op, ast.Or):
return any(outcomes)
else: # pragma: no cover
# there are no other boolean operators
# so this code should never execute
assert False, "unexpected boolean operator %s" % node.op
def visit_UnaryOp(self, node):
if isinstance(node.op, ast.Not):
return not self.visit(node.operand)
else:
_raise_invalid_node(node)
def visit_BinOp(self, node):
left = self.visit(node.left)
op = node.op
right = self.visit(node.right)
if isinstance(op, ast.Mod):
# modulo operator is used for regular expression matching
if not isinstance(left, str):
raise ValueError("expected a string on left side of %s" % node.op)
if not isinstance(right, str):
raise ValueError("expected a string on right side of %s" % node.op)
return re.search(right, left, re.IGNORECASE)
elif isinstance(op, ast.BitOr):
return left | right
elif isinstance(op, ast.BitAnd):
return left & right
else:
_raise_invalid_node(node)
def visit_Compare(self, node):
# keep it simple: binary comparators only
if len(node.ops) != 1:
raise ValueError("syntax for multiple comparators not supported")
left = self.visit(node.left)
op = node.ops[0]
right = self.visit(node.comparators[0])
if isinstance(op, ast.Eq):
return left == right
elif isinstance(op, ast.NotEq):
return left != right
elif isinstance(op, ast.Lt):
return left < right
elif isinstance(op, ast.LtE):
return left <= right
elif isinstance(op, ast.Gt):
return left > right
elif isinstance(op, ast.GtE):
return left >= right
elif isinstance(op, ast.In):
return left in right
elif isinstance(op, ast.NotIn):
return left not in right
else:
# not used currently: ast.Is | ast.IsNot
_raise_invalid_node(op)
def visit_Name(self, node):
"""Calculate the value of the given identifier."""
id_ = node.id
if id_ == "type":
return self.entry.type.lower()
elif id_ == "key":
return self.entry.key.lower()
elif id_ == "cited":
return bool(self.cited_docnames)
elif id_ == "docname":
return self.docname
elif id_ == "docnames":
return self.cited_docnames
elif id_ == "author" or id_ == "editor":
if id_ in self.entry.persons:
return " and ".join(
str(person) # XXX needs fix in pybtex?
for person in self.entry.persons[id_]
)
else:
return ""
else:
return self.entry.fields.get(id_, "")
def visit_Set(self, node):
return frozenset(self.visit(elt) for elt in node.elts)
# NameConstant is Python 3.4 only
def visit_NameConstant(self, node):
return node.value # pragma: no cover
# Constant is Python 3.6+ only
# Since 3.8 Num, Str, Bytes, NameConstant and Ellipsis are just Constant
def visit_Constant(self, node):
return node.value
# Not used on 3.8+
def visit_Str(self, node):
return node.s # pragma: no cover
def generic_visit(self, node):
_raise_invalid_node(node)
def get_docnames(env):
"""Get document names in order."""
rel = env.collect_relations()
docname = (
env.config.master_doc if sphinx.version_info < (4, 0) else env.config.root_doc
)
docnames = set()
while docname is not None:
docnames.add(docname)
yield docname
parent, prevdoc, nextdoc = rel[docname]
docname = nextdoc
for docname in sorted(env.found_docs - docnames):
yield docname
class Citation(NamedTuple):
"""Information about a citation."""
citation_id: str #: Unique id of this citation.
bibliography_key: "BibliographyKey" #: Key of its bibliography directive.
key: str #: Key (with prefix).
entry: "Entry" #: Entry from pybtex.
formatted_entry: "FormattedEntry" #: Formatted entry for bibliography.
tooltip_entry: Optional["FormattedEntry"] #: Formatted entry for tooltip.
def env_updated(app: "Sphinx", env: "BuildEnvironment") -> Iterable[str]:
domain = cast(BibtexDomain, env.get_domain("cite"))
return domain.env_updated()
def parse_header(header: str, source_path: str):
parser = docutils.parsers.rst.Parser()
# note: types stub for docutils doesn't know about components argument
settings = docutils.frontend.OptionParser(
components=(docutils.parsers.rst.Parser,) # type: ignore
).get_default_values()
document = docutils.utils.new_document(source_path, settings)
parser.parse(header, document)
return document[0]
class BibtexDomain(Domain):
"""Sphinx domain for the bibtex extension."""
name = "cite"
label = "BibTeX Citations"
data_version = 4
initial_data = dict(
bibdata=BibData(
encoding="", bibfiles={}, data=pybtex.database.BibliographyData()
),
bibliography_header=docutils.nodes.container(),
bibliographies={},
citations=[],
citation_refs=[],
)
backend = pybtex_docutils.Backend()
reference_style: BaseReferenceStyle
@property
def bibdata(self) -> BibData:
"""Information about the bibliography files."""
return self.data["bibdata"]
@property
def bibliography_header(self) -> docutils.nodes.Element:
return self.data["bibliography_header"]
@property
def bibliographies(self) -> Dict["BibliographyKey", "BibliographyValue"]:
"""Map storing information about each bibliography directive."""
return self.data["bibliographies"]
@property
def citations(self) -> List[Citation]:
"""Citation data."""
return self.data["citations"]
@property
def citation_refs(self) -> List["CitationRef"]:
"""Citation reference data."""
return self.data["citation_refs"]
def __init__(self, env: "BuildEnvironment"):
# set up referencing style
style = sphinxcontrib.bibtex.plugin.find_plugin(
"sphinxcontrib.bibtex.style.referencing",
env.app.config.bibtex_reference_style,
)
self.reference_style = style()
# set up object types and roles for referencing style
role_names = self.reference_style.role_names()
self.object_types = dict(
citation=ObjType(_("citation"), *role_names, searchprio=-1),
)
self.roles = dict((name, CiteRole()) for name in role_names)
# initialize the domain
super().__init__(env)
# connect env-updated
env.app.connect("env-updated", env_updated)
# check config
if env.app.config.bibtex_bibfiles is None:
raise ExtensionError("You must configure the bibtex_bibfiles setting")
# update bib file information in the cache
bibfiles = [
normpath_filename(env, "/" + bibfile)
for bibfile in env.app.config.bibtex_bibfiles
]
self.data["bibdata"] = process_bibdata(
self.bibdata, bibfiles, env.app.config.bibtex_encoding
)
# parse bibliography header
header = getattr(env.app.config, "bibtex_bibliography_header")
if header:
self.data["bibliography_header"] = docutils.nodes.container()
self.data["bibliography_header"] += parse_header(
header, "bibliography_header"
)
def clear_doc(self, docname: str) -> None:
self.data["citations"] = [
citation
for citation in self.citations
if citation.bibliography_key.docname != docname
]
self.data["citation_refs"] = [
ref for ref in self.citation_refs if ref.docname != docname
]
for bib_key in list(self.bibliographies.keys()):
if bib_key.docname == docname:
del self.bibliographies[bib_key]
def merge_domaindata(self, docnames: List[str], otherdata: Dict) -> None:
for bib_key, bib_value in otherdata["bibliographies"].items():
if bib_key.docname in docnames:
self.bibliographies[bib_key] = bib_value
for citation_ref in otherdata["citation_refs"]:
if citation_ref.docname in docnames:
self.citation_refs.append(citation_ref)
# 'citations' domain data calculated in env_updated
def env_updated(self) -> Iterable[str]:
# This function is called when all doctrees are parsed,
# but before any post transforms are applied. We use it to
# determine which citations will be added to which bibliography
# directive, and also to format the labels. We need to format
# the labels here because they must be known when resolve_xref is
# called.
self.citations.clear() # might have been restored from pickle
docnames = list(get_docnames(self.env))
# we keep track of this to quickly check for duplicates
used_keys: Set[str] = set()
used_labels: Dict[str, str] = {}
for bibliography_key, bibliography in self.bibliographies.items():
for entry, formatted_entry, tooltip_entry in self.get_formatted_entries(
bibliography_key,
docnames,
self.env.app.config.bibtex_tooltips,
self.env.app.config.bibtex_tooltips_style,
):
key = bibliography.keyprefix + formatted_entry.key
if bibliography.list_ == "citation" and key in used_keys:
logger.warning(
'duplicate citation for key "%s"' % key,
location=(bibliography_key.docname, bibliography.line),
type="bibtex",
subtype="duplicate_citation",
)
self.citations.append(
Citation(
citation_id=bibliography.citation_nodes[key]["ids"][0],
bibliography_key=bibliography_key,
key=key,
entry=entry,
formatted_entry=formatted_entry,
tooltip_entry=tooltip_entry,
)
)
if bibliography.list_ == "citation":
used_keys.add(key)
if formatted_entry.label not in used_labels:
used_labels[formatted_entry.label] = key
elif used_labels[formatted_entry.label] != key:
# if used_label[label] == key then already
# duplicate key warning
logger.warning(
'duplicate label "%s" for keys "%s" and "%s"'
% (
formatted_entry.label,
used_labels[formatted_entry.label],
key,
),
location=(bibliography_key.docname, bibliography.line),
type="bibtex",
subtype="duplicate_label",
)
return [] # expects list of updated docnames
def resolve_xref(
self,
env: "BuildEnvironment",
fromdocname: str,
builder: "Builder",
typ: str,
target: str,
node: "pending_xref",
contnode: docutils.nodes.Element,
) -> docutils.nodes.Element:
"""Replace node by list of citation references (one for each key)."""
targets = parse_citation_targets(target)
keys: Dict[str, CitationTarget] = {target2.key: target2 for target2 in targets}
citations: Dict[str, Citation] = {
cit.key: cit
for cit in self.citations
if cit.key in keys
and self.bibliographies[cit.bibliography_key].list_ == "citation"
}
for key in keys:
if key not in citations:
logger.warning(
'could not find bibtex key "%s"' % key,
location=node,
type="bibtex",
subtype="key_not_found",
)
plaintext = pybtex.plugin.find_plugin("pybtex.backends", "plaintext")()
references = [
(
citation.entry,
citation.formatted_entry,
SphinxReferenceInfo(
builder=builder,
fromdocname=fromdocname,
todocname=citation.bibliography_key.docname,
citation_id=citation.citation_id,
title=(
citation.tooltip_entry.text.render(plaintext).replace(
"\\url ", ""
)
if citation.tooltip_entry
else None
),
pre_text=keys[citation.key].pre,
post_text=keys[citation.key].post,
),
)
for citation in citations.values()
]
formatted_references = format_references(self.reference_style, typ, references)
result_node = docutils.nodes.inline(rawsource=target)
result_node += formatted_references.render(self.backend)
return result_node
def resolve_any_xref(
self,
env: "BuildEnvironment",
fromdocname: str,
builder: "Builder",
target: str,
node: "pending_xref",
contnode: docutils.nodes.Element,
) -> List[Tuple[str, docutils.nodes.Element]]:
"""Replace node by list of citation references (one for each key),
provided that the target has citation keys.
"""
keys = [key.strip() for key in target.split(",")]
citations: Set[str] = {
cit.key
for cit in self.citations
if cit.key in keys
and self.bibliographies[cit.bibliography_key].list_ == "citation"
}
if any(key in citations for key in keys):
result_node = self.resolve_xref(
env, fromdocname, builder, "p", target, node, contnode
)
return [("p", result_node)]
else:
return []
def get_all_cited_keys(self, docnames):
"""Yield all citation keys for given *docnames* in order, then
ordered by citation order.
"""
for citation_ref in sorted(
self.citation_refs, key=lambda c: docnames.index(c.docname)
):
for target in citation_ref.targets:
yield target.key
def get_entries(self, bibfiles: List[str]) -> Iterable["Entry"]:
"""Return all bibliography entries from the bib files, unsorted (i.e.
in order of appearance in the bib files.
"""
for bibfile in bibfiles:
for key in self.bibdata.bibfiles[bibfile].keys:
yield self.bibdata.data.entries[key]
def get_filtered_entries(
self, bibliography_key: "BibliographyKey"
) -> Iterable[Tuple[str, "Entry"]]:
"""Return unsorted bibliography entries filtered by the filter
expression.
"""
bibliography = self.bibliographies[bibliography_key]
for entry in self.get_entries(bibliography.bibfiles):
key = bibliography.keyprefix + entry.key
cited_docnames = {
citation_ref.docname
for citation_ref in self.citation_refs
if key in {target.key for target in citation_ref.targets}
}
visitor = _FilterVisitor(
entry=entry,
docname=bibliography_key.docname,
cited_docnames=cited_docnames,
)
try:
success = visitor.visit(bibliography.filter_)
except ValueError as err:
logger.warning(
"syntax error in :filter: expression; %s" % err,
location=(bibliography_key.docname, bibliography.line),
type="bibtex",
subtype="filter_syntax_error",
)
# recover by falling back to the default
success = bool(cited_docnames)
if success or entry.key in bibliography.keys:
yield key, entry
def get_sorted_entries(
self, bibliography_key: "BibliographyKey", docnames: List[str]
) -> Iterable[Tuple[str, "Entry"]]:
"""Return filtered bibliography entries sorted by citation order."""
entries = dict(self.get_filtered_entries(bibliography_key))
for key in self.get_all_cited_keys(docnames):
try:
entry = entries.pop(key)
except KeyError:
pass
else:
yield key, entry
# then all remaining keys, in order of bibliography file
for key, entry in entries.items():
yield key, entry
def get_formatted_entries(
self,
bibliography_key: "BibliographyKey",
docnames: List[str],
tooltips: bool,
tooltips_style: str,
) -> Iterable[Tuple["Entry", "FormattedEntry", Optional["FormattedEntry"]]]:
"""Get sorted bibliography entries along with their pybtex labels,
with additional sorting and formatting applied from the pybtex style.
"""
bibliography = self.bibliographies[bibliography_key]
entries = dict(self.get_sorted_entries(bibliography_key, docnames))
style: BaseStyle = cast(
"BaseStyle",
pybtex.plugin.find_plugin("pybtex.style.formatting", bibliography.style)(),
)
style2: Optional[BaseStyle] = (
(
cast(
"BaseStyle",
pybtex.plugin.find_plugin(
"pybtex.style.formatting", tooltips_style
)(),
)
if tooltips_style
else style
)
if tooltips
else None
)
sorted_entries: Iterable[Entry] = style.sort(entries.values())
labels = style.format_labels(sorted_entries)
for label, entry in zip(labels, sorted_entries):
try:
yield (
entry,
style.format_entry(bibliography.labelprefix + label, entry),
style2.format_entry(bibliography.labelprefix + label, entry)
if style2
else None,
)
except FieldIsMissing as exc:
logger.warning(
str(exc),
location=(bibliography_key.docname, bibliography.line),
type="bibtex",
subtype="missing_field",
)
formatted_error_entry = FormattedEntry(
entry.key, Tag("b", str(exc)), bibliography.labelprefix + label
)
yield entry, formatted_error_entry, None