This repository provides User Manual for setting up a Docker environment tailored for testing DGTD code.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

219 lines
7.2 KiB

8 months ago
"""Highlight code blocks using Pygments."""
from __future__ import annotations
from functools import partial
from importlib import import_module
from typing import TYPE_CHECKING, Any
from pygments import highlight
from pygments.filters import ErrorToken
from pygments.formatters import HtmlFormatter, LatexFormatter
from pygments.lexers import (
CLexer,
PythonConsoleLexer,
PythonLexer,
RstLexer,
TextLexer,
get_lexer_by_name,
guess_lexer,
)
from pygments.styles import get_style_by_name
from pygments.util import ClassNotFound
from sphinx.locale import __
from sphinx.pygments_styles import NoneStyle, SphinxStyle
from sphinx.util import logging, texescape
if TYPE_CHECKING:
from pygments.formatter import Formatter
from pygments.lexer import Lexer
from pygments.style import Style
logger = logging.getLogger(__name__)
lexers: dict[str, Lexer] = {}
lexer_classes: dict[str, type[Lexer] | partial[Lexer]] = {
'none': partial(TextLexer, stripnl=False),
'python': partial(PythonLexer, stripnl=False),
'pycon': partial(PythonConsoleLexer, stripnl=False),
'rest': partial(RstLexer, stripnl=False),
'c': partial(CLexer, stripnl=False),
}
escape_hl_chars = {ord('\\'): '\\PYGZbs{}', ord('{'): '\\PYGZob{}', ord('}'): '\\PYGZcb{}'}
# used if Pygments is available
# MEMO: no use of \protected here to avoid having to do hyperref extras,
# (if in future code highlighting in sectioning titles is activated):
# the definitions here use only robust, protected or chardef tokens,
# which are all known to the hyperref re-encoding for bookmarks.
# The " is troublesome because we would like to use \text\textquotedbl
# but \textquotedbl is *defined to raise an error* (!) if the font
# encoding is OT1. This however could happen from 'fontenc' key.
# MEMO: the Pygments escapes with \char`\<char> syntax, if the document
# uses old OT1 font encoding, work correctly only in monospace font.
# MEMO: the Pygmentize output mark-up is always with a {} after.
_LATEX_ADD_STYLES = r"""
% Sphinx redefinitions
% Originally to obtain a straight single quote via package textcomp, then
% to fix problems for the 5.0.0 inline code highlighting (captions!).
% The \text is from amstext, a dependency of sphinx.sty. It is here only
% to avoid build errors if for some reason expansion is in math mode.
\def\PYGZbs{\text\textbackslash}
\def\PYGZus{\_}
\def\PYGZob{\{}
\def\PYGZcb{\}}
\def\PYGZca{\text\textasciicircum}
\def\PYGZam{\&}
\def\PYGZlt{\text\textless}
\def\PYGZgt{\text\textgreater}
\def\PYGZsh{\#}
\def\PYGZpc{\%}
\def\PYGZdl{\$}
\def\PYGZhy{\sphinxhyphen}% defined in sphinxlatexstyletext.sty
\def\PYGZsq{\text\textquotesingle}
\def\PYGZdq{"}
\def\PYGZti{\text\textasciitilde}
\makeatletter
% use \protected to allow syntax highlighting in captions
\protected\def\PYG#1#2{\PYG@reset\PYG@toks#1+\relax+{\PYG@do{#2}}}
\makeatother
"""
class PygmentsBridge:
# Set these attributes if you want to have different Pygments formatters
# than the default ones.
html_formatter = HtmlFormatter
latex_formatter = LatexFormatter
def __init__(
self, dest: str = 'html', stylename: str = 'sphinx', latex_engine: str | None = None
) -> None:
self.dest = dest
self.latex_engine = latex_engine
style = self.get_style(stylename)
self.formatter_args: dict[str, Any] = {'style': style}
if dest == 'html':
self.formatter = self.html_formatter
else:
self.formatter = self.latex_formatter
self.formatter_args['commandprefix'] = 'PYG'
def get_style(self, stylename: str) -> type[Style]:
if not stylename or stylename == 'sphinx':
return SphinxStyle
elif stylename == 'none':
return NoneStyle
elif '.' in stylename:
module, stylename = stylename.rsplit('.', 1)
return getattr(import_module(module), stylename)
else:
return get_style_by_name(stylename)
def get_formatter(self, **kwargs: Any) -> Formatter:
kwargs.update(self.formatter_args)
return self.formatter(**kwargs)
def get_lexer(
self,
source: str,
lang: str,
opts: dict | None = None,
force: bool = False,
location: Any = None,
) -> Lexer:
if not opts:
opts = {}
# find out which lexer to use
if lang in {'py', 'python', 'py3', 'python3', 'default'}:
if source.startswith('>>>'):
# interactive session
lang = 'pycon'
else:
lang = 'python'
if lang == 'pycon3':
lang = 'pycon'
if lang in lexers:
# just return custom lexers here (without installing raiseonerror filter)
return lexers[lang]
elif lang in lexer_classes:
lexer = lexer_classes[lang](**opts)
else:
try:
if lang == 'guess':
lexer = guess_lexer(source, **opts)
else:
lexer = get_lexer_by_name(lang, **opts)
except ClassNotFound:
logger.warning(
__('Pygments lexer name %r is not known'), lang, location=location
)
lexer = lexer_classes['none'](**opts)
if not force:
lexer.add_filter('raiseonerror')
return lexer
def highlight_block(
self,
source: str,
lang: str,
opts: dict | None = None,
force: bool = False,
location: Any = None,
**kwargs: Any,
) -> str:
if not isinstance(source, str):
source = source.decode()
lexer = self.get_lexer(source, lang, opts, force, location)
# highlight via Pygments
formatter = self.get_formatter(**kwargs)
try:
hlsource = highlight(source, lexer, formatter)
except ErrorToken as err:
# this is most probably not the selected language,
# so let it pass un highlighted
if lang == 'default':
lang = 'none' # automatic highlighting failed.
else:
logger.warning(
__(
'Lexing literal_block %r as "%s" resulted in an error at token: %r. '
'Retrying in relaxed mode.'
),
source,
lang,
str(err),
type='misc',
subtype='highlighting_failure',
location=location,
)
if force:
lang = 'none'
else:
force = True
lexer = self.get_lexer(source, lang, opts, force, location)
hlsource = highlight(source, lexer, formatter)
if self.dest == 'html':
return hlsource
else:
# MEMO: this is done to escape Unicode chars with non-Unicode engines
return texescape.hlescape(hlsource, self.latex_engine)
def get_stylesheet(self) -> str:
formatter = self.get_formatter()
if self.dest == 'html':
return formatter.get_style_defs('.highlight')
else:
return formatter.get_style_defs() + _LATEX_ADD_STYLES