Improved parsing and add write() method

Tool now has round-trip accuracy with test file!
This commit is contained in:
Andrew Cassidy 2021-04-18 03:02:33 -07:00
parent b5c4a1757e
commit 9ee8096e33

View File

@ -43,9 +43,13 @@ class VersionEntry:
self.date: Optional[datetime.date] = None
self.tags: List[str] = []
self.link: str = ''
self.line_no = -1
def __str__(self) -> str:
segments = ['##', self.name]
if self.link:
segments = [f'[{self.name}]']
else:
segments = [self.name]
if self.date:
segments += ['-', self.date.isoformat()]
@ -62,75 +66,126 @@ class Changelog:
self.versions = []
self.links = {}
# Read file
with open(path, 'r') as fp:
# Read file
line = fp.readline()
while line and not line.startswith('##'):
self.lines = fp.readlines()
section = ''
last_line = ''
in_block = False
# loop over lines in the file
for line_no, line in enumerate(self.lines):
if match := re.fullmatch(
r'^##\s+(?P<name>\S*)(?:\s+-\s+(?P<date>\S+))?\s*?(?P<extra>.*?)\s*#*$', line):
# this is a version header in the form '## Name (- date) (tags*) (#*)'
section = ''
in_block = False
self._add_version_header(match['name'], match['date'], match['extra'], line_no)
elif match := re.fullmatch(r'\[(\S*)]:\s*(\S*)\n', line):
# this is a link definition in the form '[id]: link', so add it to the link table
self.links[match[1].lower()] = match[2]
elif len(self.versions) == 0:
# we haven't encountered any version headers yet,
# so its best to just add this line to the header string
self.header += line
line = fp.readline()
version = None
section = ''
last_line = ''
elif line.isspace():
# skip empty lines
pass
while line:
if line.isspace():
# skip empty lines
pass
elif match := re.fullmatch(
r'^##\s+(?P<name>\S*)(?:\s+-\s+(?P<date>\S+))?\s*?(?P<extra>.*?)\s*#*$', line):
# this is a version header in the form '## Name (- date) (tags*) (#*)'
version = VersionEntry()
section = ''
elif match := re.fullmatch(r'###\s+(\S*?)(\s+#*)?', line):
# this is a version section header in the form '### Name' or '### Name ###'
section = match[1].title()
if section not in self.versions[-1].sections.keys():
self.versions[-1].sections[section] = []
in_block = False
version.name, version.link, version.link_id = _strip_link(match['name'])
elif line[0] in '+-*#':
# bullet point or subheader
# subheaders are mostly preserved for round-trip accuracy, and are discouraged in favor of bullet points
# bullet points are preserved since some people like to use '+', '-' or '*' for different things
self.versions[-1].sections[section].append(line.strip())
in_block = True
if match['date']:
try:
version.date = datetime.date.fromisoformat(match['date'].strip(string.punctuation))
except ValueError:
version.date = None
elif in_block:
# not a bullet point, header, etc, and in a block, so this line should be appended to the last
self.versions[-1].sections[section][-1] += '\n' + line.strip()
if match['extra']:
version.tags = [s.strip('[]') for s in re.findall(r'\[.*?]', match['extra'])]
else:
# not a bullet point, header, etc, and not in a block, so this is the start of a new paragraph
self.versions[-1].sections[section].append(line.strip())
in_block = True
self.versions.append(version)
last_line = line
elif match := re.fullmatch(r'###\s+(\S*?)(\s+#*)?', line):
# this is a version section header in the form '### Name' or '### Name ###'
section = match[1].title()
if section not in version.sections.keys():
version.sections[section] = []
# handle links
for version in self.versions:
if match := re.fullmatch(r'\[(.*)]', version.name):
# ref-matched link
link_id = match[1].lower()
if link_id in self.links:
version.link = self.links.pop(link_id)
version.link_id = None
version.name = match[1]
elif match := re.fullmatch(r'\[(\S*)]:\s*(\S*)\n', line):
# this is a link definition in the form '[id]: link', so add it to the link table
self.links[match[1].lower()] = match[2]
elif version.link_id in self.links:
# id-matched link
version.link = self.links.pop(version.link_id)
elif line[0] in bullets or last_line.isspace():
# bullet point or new paragraph
# bullet points are preserved since some people like to use '+', '-' or '*' for different things
version.sections[section].append(line.strip())
# strip whitespace from header
self.header = self.header.strip()
else:
# not a bullet point, and no whitespace on last line, so append to the last entry
version.sections[section][-1] += '\n' + line.strip()
def write(self, path: os.PathLike = None):
if path is None:
path = self.path
last_line = line
line = fp.readline()
v_links = {}
v_links.update(self.links)
with open(path, 'w') as fp:
fp.write(self.header)
fp.write('\n\n')
for version in self.versions:
# handle links
if match := re.fullmatch(r'\[(.*)]', version.name):
# ref-matched link
link_id = match[1].lower()
if link_id in self.links:
version.link = self.links.pop(link_id)
version.link_id = None
version.name = match[1]
fp.write(f'## {version}\n\n')
elif version.link_id in self.links:
# id-matched link
version.link = self.links.pop(version.link_id)
if version.link:
v_links[version.name] = version.link
for section in version.sections:
if section:
fp.write(f'### {section}\n\n')
for entry in version.sections[section]:
fp.write(entry + '\n')
if entry[0] not in '-+*':
fp.write('\n')
if len(version.sections[section]) > 0:
fp.write('\n')
for link_id, link in v_links.items():
fp.write(f'[{link_id.lower()}]: {link}\n')
def _add_version_header(self, name, date, extra, line_no):
version = VersionEntry()
version.name, version.link, version.link_id = _strip_link(name)
version.line_no = line_no
if date:
try:
version.date = datetime.date.fromisoformat(date.strip(string.punctuation))
except ValueError:
version.date = None
if extra:
version.tags = [s.strip('[]') for s in re.findall(r'\[.*?]', extra)]
self.versions.append(version)
def read_version_header(line: str) -> Tuple[str, datetime.date, List[str]]: