mirror of
https://github.com/drewcassidy/yaclog.git
synced 2024-09-01 14:58:58 +00:00
reworked 2-step parser that can handle setext headers
This commit is contained in:
parent
fa97e9154b
commit
849438a5f5
@ -23,6 +23,13 @@ from typing import List, Tuple, Optional
|
|||||||
bullets = '+-*'
|
bullets = '+-*'
|
||||||
brackets = '[]'
|
brackets = '[]'
|
||||||
|
|
||||||
|
code_regex = re.compile(r'^```')
|
||||||
|
header_regex = re.compile(r'^(?P<hashes>#+)\s+(?P<contents>[^#]+)(?:\s+#+)?$')
|
||||||
|
under1_regex = re.compile(r'^=+\s*$')
|
||||||
|
under2_regex = re.compile(r'^-+\s*$')
|
||||||
|
bullet_regex = re.compile(r'^[-+*]')
|
||||||
|
linkid_regex = re.compile(r'^\[(?P<link_id>\S*)]:\s*(?P<link>.*)')
|
||||||
|
|
||||||
|
|
||||||
def _strip_link(token):
|
def _strip_link(token):
|
||||||
if link_literal := re.fullmatch(r'\[(.*?)]\((.*?)\)', token):
|
if link_literal := re.fullmatch(r'\[(.*?)]\((.*?)\)', token):
|
||||||
@ -36,6 +43,22 @@ def _strip_link(token):
|
|||||||
return token, None, None
|
return token, None, None
|
||||||
|
|
||||||
|
|
||||||
|
def _join_markdown(segments: List[str]) -> str:
|
||||||
|
text: List[str] = []
|
||||||
|
last_bullet = False
|
||||||
|
for segment in segments:
|
||||||
|
is_bullet = bullet_regex.match(segment) and '\n' not in segment
|
||||||
|
|
||||||
|
if not is_bullet or not last_bullet:
|
||||||
|
text.append('')
|
||||||
|
|
||||||
|
text.append(segment)
|
||||||
|
|
||||||
|
last_bullet = is_bullet
|
||||||
|
|
||||||
|
return '\n'.join(text).strip()
|
||||||
|
|
||||||
|
|
||||||
class VersionEntry:
|
class VersionEntry:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.sections = {'': []}
|
self.sections = {'': []}
|
||||||
@ -64,7 +87,6 @@ class Changelog:
|
|||||||
self.path = path
|
self.path = path
|
||||||
self.header = ''
|
self.header = ''
|
||||||
self.versions = []
|
self.versions = []
|
||||||
self.links = {}
|
|
||||||
|
|
||||||
# Read file
|
# Read file
|
||||||
with open(path, 'r') as fp:
|
with open(path, 'r') as fp:
|
||||||
@ -74,69 +96,104 @@ class Changelog:
|
|||||||
in_block = False
|
in_block = False
|
||||||
in_code = False
|
in_code = False
|
||||||
|
|
||||||
# loop over lines in the file
|
self.links = {}
|
||||||
|
|
||||||
|
links = {}
|
||||||
|
segments: List[Tuple[int, List[str], str]] = []
|
||||||
|
header_segments = []
|
||||||
|
|
||||||
for line_no, line in enumerate(self.lines):
|
for line_no, line in enumerate(self.lines):
|
||||||
if in_code:
|
if in_code:
|
||||||
if re.match(r'^```', line):
|
# this is the contents of a code block
|
||||||
line = '```'
|
segments[-1][1].append(line)
|
||||||
|
if code_regex.match(line):
|
||||||
in_code = False
|
in_code = False
|
||||||
in_block = False
|
in_block = False
|
||||||
|
|
||||||
if len(self.versions) == 0:
|
elif code_regex.match(line):
|
||||||
self.header += line
|
# this is the start of a code block
|
||||||
else:
|
|
||||||
self.versions[-1].sections[section][-1] += line
|
|
||||||
|
|
||||||
elif re.match(r'^```', line):
|
|
||||||
in_code = True
|
in_code = True
|
||||||
if len(self.versions) == 0:
|
segments.append((line_no, [line], 'code'))
|
||||||
self.header += line
|
|
||||||
else:
|
|
||||||
self.versions[-1].sections[section].append(line)
|
|
||||||
|
|
||||||
elif match := re.fullmatch(
|
elif under1_regex.match(line) and in_block and len(segments[-1][1]) == 1 and segments[-1][2] == 'p':
|
||||||
r'^##\s+(?P<name>\S*)(?:\s+-\s+(?P<date>\S+))?\s*?(?P<extra>.*?)\s*#*$', line):
|
# this is an underline for a setext-style H1
|
||||||
# this is a version header in the form '## Name (- date) (tags*) (#*)'
|
# ugly but it works
|
||||||
section = ''
|
last = segments.pop()
|
||||||
|
segments.append((last[0], last[1] + [line], 'h1'))
|
||||||
|
|
||||||
|
elif under2_regex.match(line) and in_block and len(segments[-1][1]) == 1 and segments[-1][2] == 'p':
|
||||||
|
# this is an underline for a setext-style H2
|
||||||
|
# ugly but it works
|
||||||
|
last = segments.pop()
|
||||||
|
segments.append((last[0], last[1] + [line], 'h2'))
|
||||||
|
|
||||||
|
elif bullet_regex.match(line):
|
||||||
|
in_block = True
|
||||||
|
segments.append((line_no, [line], 'li'))
|
||||||
|
|
||||||
|
elif match := header_regex.match(line):
|
||||||
|
# this is a header
|
||||||
|
kind = f'h{len(match["hashes"])}'
|
||||||
|
segments.append((line_no, [line], kind))
|
||||||
in_block = False
|
in_block = False
|
||||||
self._add_version_header(match['name'], match['date'], match['extra'], line_no)
|
|
||||||
|
|
||||||
elif match := re.fullmatch(r'\[(\S*)]:\s*(\S*)\n', line):
|
elif match := linkid_regex.match(line):
|
||||||
# this is a link definition in the form '[id]: link', so add it to the link table
|
# this is a link definition in the form '[id]: link', so add it to the link table
|
||||||
self.links[match[1].lower()] = match[2]
|
links[match['link_id'].lower()] = match['link']
|
||||||
|
|
||||||
elif len(self.versions) == 0:
|
|
||||||
# we haven't encountered any version headers yet,
|
|
||||||
# so its best to just add this line to the header string
|
|
||||||
self.header += line
|
|
||||||
|
|
||||||
elif line.isspace():
|
elif line.isspace():
|
||||||
# skip empty lines
|
# skip empty lines
|
||||||
in_block = False
|
in_block = False
|
||||||
pass
|
|
||||||
|
|
||||||
elif match := re.fullmatch(r'###\s+(\S*?)(\s+#*)?', line):
|
|
||||||
# this is a version section header in the form '### Name' or '### Name ###'
|
|
||||||
section = match[1].title()
|
|
||||||
if section not in self.versions[-1].sections.keys():
|
|
||||||
self.versions[-1].sections[section] = []
|
|
||||||
in_block = False
|
|
||||||
|
|
||||||
elif line[0] in '+-*#':
|
|
||||||
# bullet point or subheader
|
|
||||||
# subheaders are mostly preserved for round-trip accuracy, and are discouraged in favor of bullet points
|
|
||||||
# bullet points are preserved since some people like to use '+', '-' or '*' for different things
|
|
||||||
self.versions[-1].sections[section].append(line.strip())
|
|
||||||
in_block = True
|
|
||||||
|
|
||||||
elif in_block:
|
elif in_block:
|
||||||
# not a bullet point, header, etc, and in a block, so this line should be appended to the last
|
# this is a line to be added to a paragraph
|
||||||
self.versions[-1].sections[section][-1] += '\n' + line.strip()
|
segments[-1][1].append(line)
|
||||||
|
else:
|
||||||
|
# this is a new paragraph
|
||||||
|
in_block = True
|
||||||
|
segments.append((line_no, [line], 'p'))
|
||||||
|
|
||||||
|
for segment in segments:
|
||||||
|
text = ''.join(segment[1]).strip()
|
||||||
|
|
||||||
|
if segment[2] == 'h2':
|
||||||
|
# start of a version
|
||||||
|
|
||||||
|
split = text.rstrip('-').strip('#').strip().split()
|
||||||
|
if '-' in split:
|
||||||
|
split.remove('-')
|
||||||
|
|
||||||
|
version = VersionEntry()
|
||||||
|
section = ''
|
||||||
|
|
||||||
|
version.name, version.link, version.link_id = _strip_link(split[0])
|
||||||
|
version.line_no = segment[0]
|
||||||
|
|
||||||
|
if len(split) > 1:
|
||||||
|
try:
|
||||||
|
version.date = datetime.date.fromisoformat(split[1].strip(string.punctuation))
|
||||||
|
except ValueError:
|
||||||
|
version.date = None
|
||||||
|
|
||||||
|
if len(split) > 2:
|
||||||
|
version.tags = [s.strip('[]') for s in split[2:]]
|
||||||
|
|
||||||
|
self.versions.append(version)
|
||||||
|
|
||||||
|
elif len(self.versions) == 0:
|
||||||
|
# we haven't encountered any version headers yet,
|
||||||
|
# so its best to just add this line to the header string
|
||||||
|
header_segments.append(text)
|
||||||
|
|
||||||
|
elif segment[2] == 'h3':
|
||||||
|
# start of a version section
|
||||||
|
section = text.strip('#').strip()
|
||||||
|
if section not in self.versions[-1].sections.keys():
|
||||||
|
self.versions[-1].sections[section] = []
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# not a bullet point, header, etc, and not in a block, so this is the start of a new paragraph
|
# change log entry
|
||||||
self.versions[-1].sections[section].append(line.strip())
|
self.versions[-1].sections[section].append(text)
|
||||||
in_block = True
|
|
||||||
|
|
||||||
# handle links
|
# handle links
|
||||||
for version in self.versions:
|
for version in self.versions:
|
||||||
@ -153,7 +210,7 @@ class Changelog:
|
|||||||
version.link = self.links.pop(version.link_id)
|
version.link = self.links.pop(version.link_id)
|
||||||
|
|
||||||
# strip whitespace from header
|
# strip whitespace from header
|
||||||
self.header = self.header.strip()
|
self.header = _join_markdown(header_segments)
|
||||||
|
|
||||||
def write(self, path: os.PathLike = None):
|
def write(self, path: os.PathLike = None):
|
||||||
if path is None:
|
if path is None:
|
||||||
@ -176,13 +233,9 @@ class Changelog:
|
|||||||
if section:
|
if section:
|
||||||
fp.write(f'### {section}\n\n')
|
fp.write(f'### {section}\n\n')
|
||||||
|
|
||||||
for entry in version.sections[section]:
|
|
||||||
fp.write(entry + '\n')
|
|
||||||
if entry[0] not in '-+*':
|
|
||||||
fp.write('\n')
|
|
||||||
|
|
||||||
if len(version.sections[section]) > 0:
|
if len(version.sections[section]) > 0:
|
||||||
fp.write('\n')
|
fp.write(_join_markdown(version.sections[section]))
|
||||||
|
fp.write('\n\n')
|
||||||
|
|
||||||
for link_id, link in v_links.items():
|
for link_id, link in v_links.items():
|
||||||
fp.write(f'[{link_id.lower()}]: {link}\n')
|
fp.write(f'[{link_id.lower()}]: {link}\n')
|
||||||
|
Loading…
Reference in New Issue
Block a user