This repository provides User Manual for setting up a Docker environment tailored for testing DGTD code.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

611 lines
18 KiB

# Copyright (c) 2006-2021 Andrey Golovizin
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from __future__ import absolute_import, unicode_literals
import re
import six
from pybtex.bibtex.exceptions import BibTeXError
from pybtex.py3compat import fix_unicode_literals_in_doctest
from pybtex.utils import pairwise
from pybtex import py3compat
whitespace_re = re.compile(r'(\s)')
purify_special_char_re = re.compile(r'^\\[A-Za-z]+')
def wrap(string, width=79, subsequent_indent=' '):
r"""
Wrap long string into multiple lines by inserting line breaks.
The string is broken at whitespace characters so that each line is as long
as possible, but no longer than ``width`` characters.
If there are no possible break points in the first ``width`` characters, a
longer line will be produced, with the line break inserted at the first
possible whitespace characters after ``width``.
After each line break, the subsequent line is indented with
``subsequent_indent`` (two spaces by default).
The lines are not allowed to be shorter than ``len(subsequent_indent) + 1``
(3 characters by default), so that each line contains at least one
non-whitespace character after the indent.
>>> print(wrap('', width=3))
<BLANKLINE>
>>> print(wrap('0123456789 12345', width=10))
0123456789
12345
>>> print(wrap('01234 6789 12345', width=10))
01234 6789
12345
>>> print(wrap('01234 6789 12345', width=11))
01234 6789
12345
>>> print(wrap('01234 6789 12345', width=9))
01234
6789
12345
>>> print(wrap(' a b c', width=3))
a b
c
>>> print(wrap('aa bb c', width=3))
aa bb
c
"""
min_width = len(subsequent_indent)
def find_break(string):
for prev_match, match in pairwise(whitespace_re.finditer(string)):
if (match is None or match.start() > width) and prev_match.start() > min_width:
return prev_match.start()
def iter_lines(string):
while len(string) > width:
break_pos = find_break(string)
if not break_pos:
yield string
return
yield string[:break_pos]
string = subsequent_indent + string[break_pos + 1:]
if string:
yield string
return '\n'.join(line.rstrip() for line in iter_lines(string))
@py3compat.python_2_unicode_compatible
class BibTeXString(object):
def __init__(self, chars, level=0, max_level=100):
if level > max_level:
raise BibTeXError('too many nested braces')
self.level = level
self.is_closed = False
self.contents = list(self.find_closing_brace(iter(chars)))
def __iter__(self):
return self.traverse()
def find_closing_brace(self, chars):
for char in chars:
if char == '{':
yield BibTeXString(chars, self.level + 1)
elif char == '}' and self.level > 0:
self.is_closed = True
return
else:
yield char
def is_special_char(self):
return self.level == 1 and self.contents and self.contents[0] == '\\'
def traverse(self, open=None, f=lambda char, string: char, close=None):
if open is not None and self.level > 0:
yield open(self)
for child in self.contents:
if hasattr(child, 'traverse'):
if child.is_special_char():
if open is not None:
yield open(child)
yield f(child.inner_string(), child)
if close is not None:
yield close(child)
else:
for result in child.traverse(open, f, close):
yield result
else:
yield f(child, self)
if close is not None and self.level > 0 and self.is_closed:
yield close(self)
def __str__(self):
return ''.join(self.traverse(open=lambda string: '{', close=lambda string: '}'))
def inner_string(self):
return ''.join(six.text_type(child) for child in self.contents)
def change_case(string, mode):
r"""
>>> print(change_case('aBcD', 'l'))
abcd
>>> print(change_case('aBcD', 'u'))
ABCD
>>> print(change_case('ABcD', 't'))
Abcd
>>> print(change_case(r'The {\TeX book \noop}', 'u'))
THE {\TeX BOOK \noop}
>>> print(change_case(r'And Now: BOOO!!!', 't'))
And now: Booo!!!
>>> print(change_case(r'And {Now: BOOO!!!}', 't'))
And {Now: BOOO!!!}
>>> print(change_case(r'And {Now: {BOOO}!!!}', 'l'))
and {Now: {BOOO}!!!}
>>> print(change_case(r'And {\Now: BOOO!!!}', 't'))
And {\Now: booo!!!}
>>> print(change_case(r'And {\Now: {BOOO}!!!}', 'l'))
and {\Now: {booo}!!!}
>>> print(change_case(r'{\TeX\ and databases\Dash\TeX DBI}', 't'))
{\TeX\ and databases\Dash\TeX DBI}
"""
def title(char, state):
if state == 'start':
return char
else:
return char.lower()
lower = lambda char, state: char.lower()
upper = lambda char, state: char.upper()
convert = {'l': lower, 'u': upper, 't': title}[mode]
def convert_special_char(special_char, state):
# FIXME BibTeX treats some accented and foreign characterss specially
def convert_words(words):
for word in words:
if word.startswith('\\'):
yield word
else:
yield convert(word, state)
return ' '.join(convert_words(special_char.split(' ')))
def change_case_iter(string, mode):
state = 'start'
for char, brace_level in scan_bibtex_string(string):
if brace_level == 0:
yield convert(char, state)
if char == ':':
state = 'after colon'
elif char.isspace() and state == 'after colon':
state = 'start'
else:
state = 'normal'
else:
if brace_level == 1 and char.startswith('\\'):
yield convert_special_char(char, state)
else:
yield char
return ''.join(change_case_iter(string, mode))
def bibtex_substring(string, start, length):
r"""
Return a substring of the given length, starting from the given position.
start and length are 1-based. If start is < 0, it is counted from the end
of the string. If start is 0, an empty string is returned.
>>> print(bibtex_substring('abcdef', 1, 3))
abc
>>> print(bibtex_substring('abcdef', 2, 3))
bcd
>>> print(bibtex_substring('abcdef', 2, 1000))
bcdef
>>> print(bibtex_substring('abcdef', 0, 1000))
<BLANKLINE>
>>> print(bibtex_substring('abcdef', -1, 1))
f
>>> print(bibtex_substring('abcdef', -1, 2))
ef
>>> print(bibtex_substring('abcdef', -2, 3))
cde
>>> print(bibtex_substring('abcdef', -2, 1000))
abcde
"""
if start > 0:
start0 = start - 1
end0 = start0 + length
elif start < 0:
end0 = len(string) + start + 1
start0 = end0 - length
else: # start == 0:
return u''
return string[start0:end0]
def bibtex_len(string):
r"""Return the number of characters in the string.
Braces are ignored. "Special characters" are ignored. A "special character"
is a substring at brace level 1, if the first character after the opening
brace is a backslash, like in "de la Vall{\'e}e Poussin".
>>> print(bibtex_len(r"de la Vall{\'e}e Poussin"))
20
>>> print(bibtex_len(r"de la Vall{e}e Poussin"))
20
>>> print(bibtex_len(r"de la Vallee Poussin"))
20
>>> print(bibtex_len(r'\ABC 123'))
8
>>> print(bibtex_len(r'{\abc}'))
1
>>> print(bibtex_len(r'{\abc'))
1
>>> print(bibtex_len(r'}\abc'))
4
>>> print(bibtex_len(r'\abc}'))
4
>>> print(bibtex_len(r'\abc{'))
4
>>> print(bibtex_len(r'level 0 {1 {2}}'))
11
>>> print(bibtex_len(r'level 0 {\1 {2}}'))
9
>>> print(bibtex_len(r'level 0 {1 {\2}}'))
12
"""
length = 0
for char, brace_level in scan_bibtex_string(string):
if char not in '{}':
length += 1
return length
def bibtex_width(string):
r"""
Determine the width of the given string, in relative units.
>>> bibtex_width('')
0
>>> bibtex_width('abc')
1500
>>> bibtex_width('ab{c}')
2500
>>> bibtex_width(r"ab{\'c}")
1500
>>> bibtex_width(r"ab{\'c{}}")
1500
>>> bibtex_width(r"ab{\'c{}")
1500
>>> bibtex_width(r"ab{\'c{d}}")
2056
"""
from pybtex.charwidths import charwidths
width = 0
for token, brace_level in scan_bibtex_string(string):
if brace_level == 1 and token.startswith('\\'):
for char in token[2:]:
if char not in '{}':
width += charwidths.get(char, 0)
width -= 1000 # two braces
else:
width += charwidths.get(token, 0)
return width
def bibtex_prefix(string, num_chars):
r"""Return the firxt num_char characters of the string.
Braces and "special characters" are ignored, as in bibtex_len. If the
resulting prefix ends at brace level > 0, missing closing braces are
appended.
>>> print(bibtex_prefix('abc', 1))
a
>>> print(bibtex_prefix('abc', 5))
abc
>>> print(bibtex_prefix('ab{c}d', 3))
ab{c}
>>> print(bibtex_prefix('ab{cd}', 3))
ab{c}
>>> print(bibtex_prefix('ab{cd', 3))
ab{c}
>>> print(bibtex_prefix(r'ab{\cd}', 3))
ab{\cd}
>>> print(bibtex_prefix(r'ab{\cd', 3))
ab{\cd}
"""
def prefix():
length = 0
for char, brace_level in scan_bibtex_string(string):
yield char
if char not in '{}':
length += 1
if length >= num_chars:
break
for i in range(brace_level):
yield '}'
return ''.join(prefix())
def bibtex_purify(string):
r"""Strip special characters from the string.
>>> print(bibtex_purify('Abc 1234'))
Abc 1234
>>> print(bibtex_purify('Abc 1234'))
Abc 1234
>>> print(bibtex_purify('Abc-Def'))
Abc Def
>>> print(bibtex_purify('Abc-~-Def'))
Abc Def
>>> print(bibtex_purify('{XXX YYY}'))
XXX YYY
>>> print(bibtex_purify('{XXX {YYY}}'))
XXX YYY
>>> print(bibtex_purify(r'XXX {\YYY} XXX'))
XXX XXX
>>> print(bibtex_purify(r'{XXX {\YYY} XXX}'))
XXX YYY XXX
>>> print(bibtex_purify(r'\\abc def'))
abc def
>>> print(bibtex_purify('a@#$@#$b@#$@#$c'))
abc
>>> print(bibtex_purify(r'{\noopsort{1973b}}1973'))
1973b1973
>>> print(bibtex_purify(r'{sort{1973b}}1973'))
sort1973b1973
>>> print(bibtex_purify(r'{sort{\abc1973b}}1973'))
sortabc1973b1973
>>> print(bibtex_purify(r'{\noopsort{1973a}}{\switchargs{--90}{1968}}'))
1973a901968
"""
# FIXME BibTeX treats some accented and foreign characterss specially
def purify_iter(string):
for token, brace_level in scan_bibtex_string(string):
if brace_level == 1 and token.startswith('\\'):
for char in purify_special_char_re.sub('', token):
if char.isalnum():
yield char
else:
if token.isalnum():
yield token
elif token.isspace() or token in '-~':
yield ' '
return ''.join(purify_iter(string))
def scan_bibtex_string(string):
""" Yield (char, brace_level) tuples.
"Special characters", as in bibtex_len, are treated as a single character
"""
return BibTeXString(string).traverse(
open=lambda string: ('{', string.level),
f=lambda char, string: (char, string.level),
close=lambda string: ('}', string.level - 1),
)
@fix_unicode_literals_in_doctest
def split_name_list(string):
r"""
Split a list of names, separated by ' and '.
>>> split_name_list('Johnson and Peterson')
[u'Johnson', u'Peterson']
>>> split_name_list('Johnson AND Peterson')
[u'Johnson', u'Peterson']
>>> split_name_list('Johnson AnD Peterson')
[u'Johnson', u'Peterson']
>>> split_name_list('Armand and Peterson')
[u'Armand', u'Peterson']
>>> split_name_list('Armand and anderssen')
[u'Armand', u'anderssen']
>>> split_name_list('{Armand and Anderssen}')
[u'{Armand and Anderssen}']
>>> split_name_list('What a Strange{ }and Bizzare Name! and Peterson')
[u'What a Strange{ }and Bizzare Name!', u'Peterson']
>>> split_name_list('What a Strange and{ }Bizzare Name! and Peterson')
[u'What a Strange and{ }Bizzare Name!', u'Peterson']
"""
return split_tex_string(string, ' [Aa][Nn][Dd] ')
@fix_unicode_literals_in_doctest
def _find_closing_brace(string):
r"""
>>> _find_closing_brace('')
(u'', u'')
>>> _find_closing_brace('no braces')
(u'no braces', u'')
>>> _find_closing_brace('brace at the end}')
(u'brace at the end}', u'')
>>> _find_closing_brace('two closing braces}}')
(u'two closing braces}', u'}')
>>> _find_closing_brace('two closing} braces} and some text')
(u'two closing}', u' braces} and some text')
>>> _find_closing_brace('more {nested{}}{braces}} and the rest}')
(u'more {nested{}}{braces}}', u' and the rest}')
"""
up_to_brace = []
brace_level = 1
while brace_level >= 1:
next_brace = BRACE_RE.search(string)
if not next_brace:
break
up_to_brace.append(string[:next_brace.end()])
string = string[next_brace.end():]
if next_brace.group() == '{':
brace_level += 1
elif next_brace.group() == '}':
brace_level -= 1
else:
raise ValueError(next_brace.group())
if not up_to_brace:
up_to_brace, string = [string], ''
return ''.join(up_to_brace), string
# "\ " is a "control space" in TeX, i. e. "a space that is not to be ignored"
# -- The TeXbook, Chapter 3: Controlling TeX, p 8
# ~ is a space character, according to BibTeX
# \~ is not a space character
BIBTEX_SPACE_RE = re.compile(r'(?:\\ |\s|(?<!\\)~)+')
BRACE_RE = re.compile(r'{|}')
@fix_unicode_literals_in_doctest
def split_tex_string(string, sep=None, strip=True, filter_empty=False):
r"""Split a string using the given separator (regexp).
Everything at brace level > 0 is ignored.
>>> split_tex_string('')
[]
>>> split_tex_string(' ')
[]
>>> split_tex_string('.a.b.c.', r'\.')
[u'', u'a', u'b', u'c', u'']
>>> split_tex_string('.a.b.c.{d.}.', r'\.')
[u'', u'a', u'b', u'c', u'{d.}', u'']
>>> split_tex_string('Matsui Fuuka')
[u'Matsui', u'Fuuka']
>>> split_tex_string('{Matsui Fuuka}')
[u'{Matsui Fuuka}']
>>> split_tex_string(r'Matsui\ Fuuka')
[u'Matsui', u'Fuuka']
>>> split_tex_string(r'{Matsui\ Fuuka}')
[u'{Matsui\\ Fuuka}']
>>> split_tex_string('a')
[u'a']
>>> split_tex_string('on a')
[u'on', u'a']
>>> split_tex_string(r'Qui\~{n}onero-Candela, J.')
[u'Qui\\~{n}onero-Candela,', u'J.']
"""
if sep is None:
sep = BIBTEX_SPACE_RE
filter_empty = True
sep = re.compile(sep)
result = []
word_parts = []
while True:
head, brace, string = string.partition('{')
if head:
head_parts = sep.split(head)
for word in head_parts[:-1]:
result.append(''.join(word_parts + [word]))
word_parts = []
word_parts.append(head_parts[-1])
if brace:
word_parts.append(brace)
up_to_closing_brace, string = _find_closing_brace(string)
word_parts.append(up_to_closing_brace)
else:
break
if word_parts:
result.append(''.join(word_parts))
if strip:
result = [part.strip() for part in result]
if filter_empty:
result = [part for part in result if part]
return result
def bibtex_first_letter(string):
r""" Return the first letter or special character of the string.
>>> print(bibtex_first_letter('Andrew Blake'))
A
>>> print(bibtex_first_letter('{Andrew} Blake'))
A
>>> print(bibtex_first_letter('1Andrew'))
A
>>> print(bibtex_first_letter(r'{\TeX} markup'))
{\TeX}
>>> print(bibtex_first_letter(''))
<BLANKLINE>
>>> print(bibtex_first_letter('123 123 123 {}'))
<BLANKLINE>
>>> print(bibtex_first_letter(r'\LaTeX Project Team'))
L
"""
for char in BibTeXString(string):
if char.startswith('\\') and char != '\\':
return u'{{{0}}}'.format(char)
elif char.isalpha():
return char
return ''
def bibtex_abbreviate(string, delimiter=None, separator='-'):
r"""
Abbreviate string.
>>> print(bibtex_abbreviate('Andrew Blake'))
A
>>> print(bibtex_abbreviate('Jean-Pierre'))
J.-P
>>> print(bibtex_abbreviate('Jean--Pierre'))
J.-P
"""
def _bibtex_abbreviate():
for token in split_tex_string(string, sep=separator):
letter = bibtex_first_letter(token)
if letter:
yield letter
if delimiter is None:
delimiter = '.-'
return delimiter.join(_bibtex_abbreviate())