You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
219 lines
7.2 KiB
219 lines
7.2 KiB
8 months ago
|
"""Highlight code blocks using Pygments."""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
from functools import partial
|
||
|
from importlib import import_module
|
||
|
from typing import TYPE_CHECKING, Any
|
||
|
|
||
|
from pygments import highlight
|
||
|
from pygments.filters import ErrorToken
|
||
|
from pygments.formatters import HtmlFormatter, LatexFormatter
|
||
|
from pygments.lexers import (
|
||
|
CLexer,
|
||
|
PythonConsoleLexer,
|
||
|
PythonLexer,
|
||
|
RstLexer,
|
||
|
TextLexer,
|
||
|
get_lexer_by_name,
|
||
|
guess_lexer,
|
||
|
)
|
||
|
from pygments.styles import get_style_by_name
|
||
|
from pygments.util import ClassNotFound
|
||
|
|
||
|
from sphinx.locale import __
|
||
|
from sphinx.pygments_styles import NoneStyle, SphinxStyle
|
||
|
from sphinx.util import logging, texescape
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from pygments.formatter import Formatter
|
||
|
from pygments.lexer import Lexer
|
||
|
from pygments.style import Style
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
lexers: dict[str, Lexer] = {}
|
||
|
lexer_classes: dict[str, type[Lexer] | partial[Lexer]] = {
|
||
|
'none': partial(TextLexer, stripnl=False),
|
||
|
'python': partial(PythonLexer, stripnl=False),
|
||
|
'pycon': partial(PythonConsoleLexer, stripnl=False),
|
||
|
'rest': partial(RstLexer, stripnl=False),
|
||
|
'c': partial(CLexer, stripnl=False),
|
||
|
}
|
||
|
|
||
|
|
||
|
escape_hl_chars = {ord('\\'): '\\PYGZbs{}', ord('{'): '\\PYGZob{}', ord('}'): '\\PYGZcb{}'}
|
||
|
|
||
|
# used if Pygments is available
|
||
|
# MEMO: no use of \protected here to avoid having to do hyperref extras,
|
||
|
# (if in future code highlighting in sectioning titles is activated):
|
||
|
# the definitions here use only robust, protected or chardef tokens,
|
||
|
# which are all known to the hyperref re-encoding for bookmarks.
|
||
|
# The " is troublesome because we would like to use \text\textquotedbl
|
||
|
# but \textquotedbl is *defined to raise an error* (!) if the font
|
||
|
# encoding is OT1. This however could happen from 'fontenc' key.
|
||
|
# MEMO: the Pygments escapes with \char`\<char> syntax, if the document
|
||
|
# uses old OT1 font encoding, work correctly only in monospace font.
|
||
|
# MEMO: the Pygmentize output mark-up is always with a {} after.
|
||
|
_LATEX_ADD_STYLES = r"""
|
||
|
% Sphinx redefinitions
|
||
|
% Originally to obtain a straight single quote via package textcomp, then
|
||
|
% to fix problems for the 5.0.0 inline code highlighting (captions!).
|
||
|
% The \text is from amstext, a dependency of sphinx.sty. It is here only
|
||
|
% to avoid build errors if for some reason expansion is in math mode.
|
||
|
\def\PYGZbs{\text\textbackslash}
|
||
|
\def\PYGZus{\_}
|
||
|
\def\PYGZob{\{}
|
||
|
\def\PYGZcb{\}}
|
||
|
\def\PYGZca{\text\textasciicircum}
|
||
|
\def\PYGZam{\&}
|
||
|
\def\PYGZlt{\text\textless}
|
||
|
\def\PYGZgt{\text\textgreater}
|
||
|
\def\PYGZsh{\#}
|
||
|
\def\PYGZpc{\%}
|
||
|
\def\PYGZdl{\$}
|
||
|
\def\PYGZhy{\sphinxhyphen}% defined in sphinxlatexstyletext.sty
|
||
|
\def\PYGZsq{\text\textquotesingle}
|
||
|
\def\PYGZdq{"}
|
||
|
\def\PYGZti{\text\textasciitilde}
|
||
|
\makeatletter
|
||
|
% use \protected to allow syntax highlighting in captions
|
||
|
\protected\def\PYG#1#2{\PYG@reset\PYG@toks#1+\relax+{\PYG@do{#2}}}
|
||
|
\makeatother
|
||
|
"""
|
||
|
|
||
|
|
||
|
class PygmentsBridge:
|
||
|
# Set these attributes if you want to have different Pygments formatters
|
||
|
# than the default ones.
|
||
|
html_formatter = HtmlFormatter
|
||
|
latex_formatter = LatexFormatter
|
||
|
|
||
|
def __init__(
|
||
|
self, dest: str = 'html', stylename: str = 'sphinx', latex_engine: str | None = None
|
||
|
) -> None:
|
||
|
self.dest = dest
|
||
|
self.latex_engine = latex_engine
|
||
|
|
||
|
style = self.get_style(stylename)
|
||
|
self.formatter_args: dict[str, Any] = {'style': style}
|
||
|
if dest == 'html':
|
||
|
self.formatter = self.html_formatter
|
||
|
else:
|
||
|
self.formatter = self.latex_formatter
|
||
|
self.formatter_args['commandprefix'] = 'PYG'
|
||
|
|
||
|
def get_style(self, stylename: str) -> type[Style]:
|
||
|
if not stylename or stylename == 'sphinx':
|
||
|
return SphinxStyle
|
||
|
elif stylename == 'none':
|
||
|
return NoneStyle
|
||
|
elif '.' in stylename:
|
||
|
module, stylename = stylename.rsplit('.', 1)
|
||
|
return getattr(import_module(module), stylename)
|
||
|
else:
|
||
|
return get_style_by_name(stylename)
|
||
|
|
||
|
def get_formatter(self, **kwargs: Any) -> Formatter:
|
||
|
kwargs.update(self.formatter_args)
|
||
|
return self.formatter(**kwargs)
|
||
|
|
||
|
def get_lexer(
|
||
|
self,
|
||
|
source: str,
|
||
|
lang: str,
|
||
|
opts: dict | None = None,
|
||
|
force: bool = False,
|
||
|
location: Any = None,
|
||
|
) -> Lexer:
|
||
|
if not opts:
|
||
|
opts = {}
|
||
|
|
||
|
# find out which lexer to use
|
||
|
if lang in {'py', 'python', 'py3', 'python3', 'default'}:
|
||
|
if source.startswith('>>>'):
|
||
|
# interactive session
|
||
|
lang = 'pycon'
|
||
|
else:
|
||
|
lang = 'python'
|
||
|
if lang == 'pycon3':
|
||
|
lang = 'pycon'
|
||
|
|
||
|
if lang in lexers:
|
||
|
# just return custom lexers here (without installing raiseonerror filter)
|
||
|
return lexers[lang]
|
||
|
elif lang in lexer_classes:
|
||
|
lexer = lexer_classes[lang](**opts)
|
||
|
else:
|
||
|
try:
|
||
|
if lang == 'guess':
|
||
|
lexer = guess_lexer(source, **opts)
|
||
|
else:
|
||
|
lexer = get_lexer_by_name(lang, **opts)
|
||
|
except ClassNotFound:
|
||
|
logger.warning(
|
||
|
__('Pygments lexer name %r is not known'), lang, location=location
|
||
|
)
|
||
|
lexer = lexer_classes['none'](**opts)
|
||
|
|
||
|
if not force:
|
||
|
lexer.add_filter('raiseonerror')
|
||
|
|
||
|
return lexer
|
||
|
|
||
|
def highlight_block(
|
||
|
self,
|
||
|
source: str,
|
||
|
lang: str,
|
||
|
opts: dict | None = None,
|
||
|
force: bool = False,
|
||
|
location: Any = None,
|
||
|
**kwargs: Any,
|
||
|
) -> str:
|
||
|
if not isinstance(source, str):
|
||
|
source = source.decode()
|
||
|
|
||
|
lexer = self.get_lexer(source, lang, opts, force, location)
|
||
|
|
||
|
# highlight via Pygments
|
||
|
formatter = self.get_formatter(**kwargs)
|
||
|
try:
|
||
|
hlsource = highlight(source, lexer, formatter)
|
||
|
except ErrorToken as err:
|
||
|
# this is most probably not the selected language,
|
||
|
# so let it pass un highlighted
|
||
|
if lang == 'default':
|
||
|
lang = 'none' # automatic highlighting failed.
|
||
|
else:
|
||
|
logger.warning(
|
||
|
__(
|
||
|
'Lexing literal_block %r as "%s" resulted in an error at token: %r. '
|
||
|
'Retrying in relaxed mode.'
|
||
|
),
|
||
|
source,
|
||
|
lang,
|
||
|
str(err),
|
||
|
type='misc',
|
||
|
subtype='highlighting_failure',
|
||
|
location=location,
|
||
|
)
|
||
|
if force:
|
||
|
lang = 'none'
|
||
|
else:
|
||
|
force = True
|
||
|
lexer = self.get_lexer(source, lang, opts, force, location)
|
||
|
hlsource = highlight(source, lexer, formatter)
|
||
|
|
||
|
if self.dest == 'html':
|
||
|
return hlsource
|
||
|
else:
|
||
|
# MEMO: this is done to escape Unicode chars with non-Unicode engines
|
||
|
return texescape.hlescape(hlsource, self.latex_engine)
|
||
|
|
||
|
def get_stylesheet(self) -> str:
|
||
|
formatter = self.get_formatter()
|
||
|
if self.dest == 'html':
|
||
|
return formatter.get_style_defs('.highlight')
|
||
|
else:
|
||
|
return formatter.get_style_defs() + _LATEX_ADD_STYLES
|