mirror of
https://github.com/drewcassidy/yaclog.git
synced 2024-09-01 14:58:58 +00:00
Refactor changelog class and make tokenizer seperate
This commit is contained in:
parent
1676b28f03
commit
c09df3a770
@ -1,6 +1,9 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import os.path
|
import os.path
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
|
import changelog
|
||||||
|
import version
|
||||||
import yaclog.changelog
|
import yaclog.changelog
|
||||||
|
|
||||||
log_segments = [
|
log_segments = [
|
||||||
@ -47,7 +50,7 @@ log_text = '\n\n'.join(log_segments)
|
|||||||
log = yaclog.Changelog()
|
log = yaclog.Changelog()
|
||||||
log.header = '# Changelog\n\nThis changelog is for testing the parser, and has many things in it that might trip it up.'
|
log.header = '# Changelog\n\nThis changelog is for testing the parser, and has many things in it that might trip it up.'
|
||||||
log.links = {'id': 'http://www.koalastothemax.com'}
|
log.links = {'id': 'http://www.koalastothemax.com'}
|
||||||
log.versions = [yaclog.changelog.VersionEntry(), yaclog.changelog.VersionEntry(), yaclog.changelog.VersionEntry()]
|
log.versions = [changelog.VersionEntry(), changelog.VersionEntry(), changelog.VersionEntry()]
|
||||||
|
|
||||||
log.versions[0].name = '[Tests]'
|
log.versions[0].name = '[Tests]'
|
||||||
log.versions[0].sections = {
|
log.versions[0].sections = {
|
||||||
|
@ -2,6 +2,8 @@ import unittest
|
|||||||
import os.path
|
import os.path
|
||||||
import git
|
import git
|
||||||
|
|
||||||
|
import changelog
|
||||||
|
import version
|
||||||
import yaclog
|
import yaclog
|
||||||
from yaclog.cli.__main__ import cli
|
from yaclog.cli.__main__ import cli
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
@ -64,7 +66,7 @@ class TestTagging(unittest.TestCase):
|
|||||||
|
|
||||||
with runner.isolated_filesystem():
|
with runner.isolated_filesystem():
|
||||||
in_log = yaclog.Changelog(location)
|
in_log = yaclog.Changelog(location)
|
||||||
in_log.versions = [yaclog.changelog.VersionEntry(), yaclog.changelog.VersionEntry()]
|
in_log.versions = [changelog.VersionEntry(), changelog.VersionEntry()]
|
||||||
|
|
||||||
in_log.versions[0].name = '1.0.0'
|
in_log.versions[0].name = '1.0.0'
|
||||||
in_log.versions[1].name = '0.9.0'
|
in_log.versions[1].name = '0.9.0'
|
||||||
@ -92,7 +94,7 @@ class TestTagging(unittest.TestCase):
|
|||||||
with runner.isolated_filesystem():
|
with runner.isolated_filesystem():
|
||||||
in_log = yaclog.Changelog(location)
|
in_log = yaclog.Changelog(location)
|
||||||
in_log.versions = [None, None]
|
in_log.versions = [None, None]
|
||||||
in_log.versions = [yaclog.changelog.VersionEntry(), yaclog.changelog.VersionEntry()]
|
in_log.versions = [changelog.VersionEntry(), changelog.VersionEntry()]
|
||||||
|
|
||||||
in_log.versions[0].name = '1.0.0'
|
in_log.versions[0].name = '1.0.0'
|
||||||
in_log.versions[0].tags = ['TAG1']
|
in_log.versions[0].tags = ['TAG1']
|
||||||
|
@ -17,49 +17,13 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from typing import List, Tuple, Optional, Dict
|
from typing import List, Optional, Dict
|
||||||
|
|
||||||
bullets = '+-*'
|
import markdown
|
||||||
brackets = '[]'
|
|
||||||
|
|
||||||
code_regex = re.compile(r'^```')
|
|
||||||
header_regex = re.compile(r'^(?P<hashes>#+)\s+(?P<contents>[^#]+)(?:\s+#+)?$')
|
|
||||||
under1_regex = re.compile(r'^=+\s*$')
|
|
||||||
under2_regex = re.compile(r'^-+\s*$')
|
|
||||||
bullet_regex = re.compile(r'^[-+*]')
|
|
||||||
linkid_regex = re.compile(r'^\[(?P<link_id>\S*)]:\s*(?P<link>.*)')
|
|
||||||
|
|
||||||
default_header = '# Changelog\n\nAll notable changes to this project will be documented in this file'
|
default_header = '# Changelog\n\nAll notable changes to this project will be documented in this file'
|
||||||
|
|
||||||
|
|
||||||
def _strip_link(token):
|
|
||||||
if link_literal := re.fullmatch(r'\[(.*?)]\((.*?)\)', token):
|
|
||||||
# in the form [name](link)
|
|
||||||
return link_literal[1], link_literal[2], None
|
|
||||||
|
|
||||||
if link_id := re.fullmatch(r'\[(.*?)]\[(.*?)]', token):
|
|
||||||
# in the form [name][id] where id is hopefully linked somewhere else in the document
|
|
||||||
return link_id[1], None, link_id[2].lower()
|
|
||||||
|
|
||||||
return token, None, None
|
|
||||||
|
|
||||||
|
|
||||||
def _join_markdown(segments: List[str]) -> str:
|
|
||||||
text: List[str] = []
|
|
||||||
last_bullet = False
|
|
||||||
for segment in segments:
|
|
||||||
is_bullet = bullet_regex.match(segment)
|
|
||||||
|
|
||||||
if not is_bullet or not last_bullet:
|
|
||||||
text.append('')
|
|
||||||
|
|
||||||
text.append(segment)
|
|
||||||
|
|
||||||
last_bullet = is_bullet
|
|
||||||
|
|
||||||
return '\n'.join(text).strip()
|
|
||||||
|
|
||||||
|
|
||||||
class VersionEntry:
|
class VersionEntry:
|
||||||
"""Holds a single version entry in a :py:class:`Changelog`"""
|
"""Holds a single version entry in a :py:class:`Changelog`"""
|
||||||
|
|
||||||
@ -73,16 +37,32 @@ class VersionEntry:
|
|||||||
:param date: When the version was released
|
:param date: When the version was released
|
||||||
:param tags: The version's tags
|
:param tags: The version's tags
|
||||||
:param link: The version's URL
|
:param link: The version's URL
|
||||||
:param link_id: The version's link ID, uses the version name by default when writing
|
:param link_id: The version's link ID
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.name: str = name
|
self.name: str = name
|
||||||
|
"""The version's name"""
|
||||||
|
|
||||||
self.date: Optional[datetime.date] = date
|
self.date: Optional[datetime.date] = date
|
||||||
|
"""WHen the version was released"""
|
||||||
|
|
||||||
self.tags: List[str] = tags if tags else []
|
self.tags: List[str] = tags if tags else []
|
||||||
|
"""The version's tags"""
|
||||||
|
|
||||||
self.link: Optional[str] = link
|
self.link: Optional[str] = link
|
||||||
|
"""The version's URL"""
|
||||||
|
|
||||||
self.link_id: Optional[str] = link_id
|
self.link_id: Optional[str] = link_id
|
||||||
self.line_no: int = -1
|
"""The version's link ID, uses the version name by default when writing"""
|
||||||
|
|
||||||
|
self.line_no: Optional[int] = None
|
||||||
|
"""What line the version occurs at in the file, or None if the version was not read from a file.
|
||||||
|
This is not guaranteed to be correct after the changelog has been modified,
|
||||||
|
and it has no effect on the written file"""
|
||||||
|
|
||||||
self.sections: Dict[str, List[str]] = {'': []}
|
self.sections: Dict[str, List[str]] = {'': []}
|
||||||
|
"""The dictionary of change entries in the version, organized by section.
|
||||||
|
Uncategorized changes have a section of an empty string."""
|
||||||
|
|
||||||
def add_entry(self, contents: str, section: str = '') -> None:
|
def add_entry(self, contents: str, section: str = '') -> None:
|
||||||
"""
|
"""
|
||||||
@ -118,7 +98,7 @@ class VersionEntry:
|
|||||||
if len(entries) > 0:
|
if len(entries) > 0:
|
||||||
segments += entries
|
segments += entries
|
||||||
|
|
||||||
return _join_markdown(segments)
|
return markdown.join(segments)
|
||||||
|
|
||||||
def header(self, md: bool = True) -> str:
|
def header(self, md: bool = True) -> str:
|
||||||
"""
|
"""
|
||||||
@ -167,6 +147,8 @@ class VersionEntry:
|
|||||||
|
|
||||||
|
|
||||||
class Changelog:
|
class Changelog:
|
||||||
|
"""A changelog made up of a header, several versions, and a link table"""
|
||||||
|
|
||||||
def __init__(self, path=None, header: str = default_header):
|
def __init__(self, path=None, header: str = default_header):
|
||||||
"""
|
"""
|
||||||
Create a new changelog object. Contents will be automatically read from disk if the file exists
|
Create a new changelog object. Contents will be automatically read from disk if the file exists
|
||||||
@ -174,10 +156,17 @@ class Changelog:
|
|||||||
:param path: The changelog's path on disk
|
:param path: The changelog's path on disk
|
||||||
:param header: The header at the top of the changelog to use if the file does not exist
|
:param header: The header at the top of the changelog to use if the file does not exist
|
||||||
"""
|
"""
|
||||||
self.path = path
|
self.path = os.path.abspath(path) if path else None
|
||||||
|
"""The path of the changelog's file on disk"""
|
||||||
|
|
||||||
self.header: str = header
|
self.header: str = header
|
||||||
|
"""Any text at the top of the changelog before any H2s"""
|
||||||
|
|
||||||
self.versions: List[VersionEntry] = []
|
self.versions: List[VersionEntry] = []
|
||||||
|
"""A list of versions in the changelog"""
|
||||||
|
|
||||||
self.links = {}
|
self.links = {}
|
||||||
|
"""Link IDs at the end of the changelog"""
|
||||||
|
|
||||||
if path and os.path.exists(path):
|
if path and os.path.exists(path):
|
||||||
self.read()
|
self.read()
|
||||||
@ -195,70 +184,15 @@ class Changelog:
|
|||||||
|
|
||||||
# Read file
|
# Read file
|
||||||
with open(path, 'r') as fp:
|
with open(path, 'r') as fp:
|
||||||
lines = fp.readlines()
|
tokens, self.links = markdown.tokenize(fp.read())
|
||||||
|
|
||||||
section = ''
|
section = ''
|
||||||
in_block = False
|
|
||||||
in_code = False
|
|
||||||
|
|
||||||
segments: List[Tuple[int, List[str], str]] = []
|
|
||||||
header_segments = []
|
header_segments = []
|
||||||
|
|
||||||
for line_no, line in enumerate(lines):
|
for token in tokens:
|
||||||
if in_code:
|
text = '\n'.join(token.lines)
|
||||||
# this is the contents of a code block
|
|
||||||
segments[-1][1].append(line)
|
|
||||||
if code_regex.match(line):
|
|
||||||
in_code = False
|
|
||||||
in_block = False
|
|
||||||
|
|
||||||
elif code_regex.match(line):
|
if token.kind == 'h2':
|
||||||
# this is the start of a code block
|
|
||||||
in_code = True
|
|
||||||
segments.append((line_no, [line], 'code'))
|
|
||||||
|
|
||||||
elif under1_regex.match(line) and in_block and len(segments[-1][1]) == 1 and segments[-1][2] == 'p':
|
|
||||||
# this is an underline for a setext-style H1
|
|
||||||
# ugly but it works
|
|
||||||
last = segments.pop()
|
|
||||||
segments.append((last[0], last[1] + [line], 'h1'))
|
|
||||||
|
|
||||||
elif under2_regex.match(line) and in_block and len(segments[-1][1]) == 1 and segments[-1][2] == 'p':
|
|
||||||
# this is an underline for a setext-style H2
|
|
||||||
# ugly but it works
|
|
||||||
last = segments.pop()
|
|
||||||
segments.append((last[0], last[1] + [line], 'h2'))
|
|
||||||
|
|
||||||
elif bullet_regex.match(line):
|
|
||||||
in_block = True
|
|
||||||
segments.append((line_no, [line], 'li'))
|
|
||||||
|
|
||||||
elif match := header_regex.match(line):
|
|
||||||
# this is a header
|
|
||||||
kind = f'h{len(match["hashes"])}'
|
|
||||||
segments.append((line_no, [line], kind))
|
|
||||||
in_block = False
|
|
||||||
|
|
||||||
elif match := linkid_regex.match(line):
|
|
||||||
# this is a link definition in the form '[id]: link', so add it to the link table
|
|
||||||
self.links[match['link_id'].lower()] = match['link']
|
|
||||||
|
|
||||||
elif line.isspace():
|
|
||||||
# skip empty lines
|
|
||||||
in_block = False
|
|
||||||
|
|
||||||
elif in_block:
|
|
||||||
# this is a line to be added to a paragraph
|
|
||||||
segments[-1][1].append(line)
|
|
||||||
else:
|
|
||||||
# this is a new paragraph
|
|
||||||
in_block = True
|
|
||||||
segments.append((line_no, [line], 'p'))
|
|
||||||
|
|
||||||
for segment in segments:
|
|
||||||
text = ''.join(segment[1]).strip()
|
|
||||||
|
|
||||||
if segment[2] == 'h2':
|
|
||||||
# start of a version
|
# start of a version
|
||||||
|
|
||||||
slug = text.rstrip('-').strip('#').strip()
|
slug = text.rstrip('-').strip('#').strip()
|
||||||
@ -270,7 +204,7 @@ class Changelog:
|
|||||||
section = ''
|
section = ''
|
||||||
|
|
||||||
version.name = slug
|
version.name = slug
|
||||||
version.line_no = segment[0]
|
version.line_no = token.line_no
|
||||||
tags = []
|
tags = []
|
||||||
date = None
|
date = None
|
||||||
|
|
||||||
@ -288,7 +222,7 @@ class Changelog:
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
# matches the schema
|
# matches the schema
|
||||||
version.name, version.link, version.link_id = _strip_link(split[0])
|
version.name, version.link, version.link_id = markdown.strip_link(split[0])
|
||||||
version.date = date
|
version.date = date
|
||||||
version.tags = tags
|
version.tags = tags
|
||||||
|
|
||||||
@ -299,7 +233,7 @@ class Changelog:
|
|||||||
# so its best to just add this line to the header string
|
# so its best to just add this line to the header string
|
||||||
header_segments.append(text)
|
header_segments.append(text)
|
||||||
|
|
||||||
elif segment[2] == 'h3':
|
elif token.kind == 'h3':
|
||||||
# start of a version section
|
# start of a version section
|
||||||
section = text.strip('#').strip()
|
section = text.strip('#').strip()
|
||||||
if section not in self.versions[-1].sections.keys():
|
if section not in self.versions[-1].sections.keys():
|
||||||
@ -324,7 +258,7 @@ class Changelog:
|
|||||||
version.link = self.links[version.link_id]
|
version.link = self.links[version.link_id]
|
||||||
|
|
||||||
# strip whitespace from header
|
# strip whitespace from header
|
||||||
self.header = _join_markdown(header_segments)
|
self.header = markdown.join(header_segments)
|
||||||
|
|
||||||
def write(self, path: os.PathLike = None) -> None:
|
def write(self, path: os.PathLike = None) -> None:
|
||||||
"""
|
"""
|
||||||
@ -348,7 +282,19 @@ class Changelog:
|
|||||||
|
|
||||||
segments += [f'[{link_id}]: {link}' for link_id, link in v_links.items()]
|
segments += [f'[{link_id}]: {link}' for link_id, link in v_links.items()]
|
||||||
|
|
||||||
text = _join_markdown(segments)
|
text = markdown.join(segments)
|
||||||
|
|
||||||
with open(path, 'w') as fp:
|
with open(path, 'w') as fp:
|
||||||
fp.write(text)
|
fp.write(text)
|
||||||
|
|
||||||
|
def add_version(self, index: int = 0, *args, **kwargs) -> VersionEntry:
|
||||||
|
version = VersionEntry(*args, **kwargs)
|
||||||
|
self.versions.insert(index, version)
|
||||||
|
|
||||||
|
return version
|
||||||
|
|
||||||
|
def current(self, new_version_name='Unreleased') -> VersionEntry:
|
||||||
|
if len(self.versions) == 0:
|
||||||
|
return self.add_version(name=new_version_name)
|
||||||
|
|
||||||
|
return self.versions[0]
|
||||||
|
@ -18,7 +18,9 @@ import click
|
|||||||
import os.path
|
import os.path
|
||||||
import datetime
|
import datetime
|
||||||
import git
|
import git
|
||||||
import yaclog.cli.version_util
|
|
||||||
|
import changelog
|
||||||
|
import yaclog.version
|
||||||
from yaclog import Changelog
|
from yaclog import Changelog
|
||||||
|
|
||||||
|
|
||||||
@ -141,7 +143,7 @@ def entry(obj: Changelog, bullets, paragraphs, section_name, version_name):
|
|||||||
else:
|
else:
|
||||||
matches = [v for v in obj.versions if v.name.lower() == 'unreleased']
|
matches = [v for v in obj.versions if v.name.lower() == 'unreleased']
|
||||||
if len(matches) == 0:
|
if len(matches) == 0:
|
||||||
version = yaclog.changelog.VersionEntry()
|
version = changelog.VersionEntry()
|
||||||
obj.versions.insert(0, version)
|
obj.versions.insert(0, version)
|
||||||
else:
|
else:
|
||||||
version = matches[0]
|
version = matches[0]
|
||||||
@ -192,11 +194,11 @@ def release(obj: Changelog, v_flag, commit):
|
|||||||
|
|
||||||
if v_flag:
|
if v_flag:
|
||||||
if v_flag[0] == '+':
|
if v_flag[0] == '+':
|
||||||
new_name = yaclog.cli.version_util.increment_version(version, v_flag)
|
new_name = yaclog.version.increment_version(version, v_flag)
|
||||||
else:
|
else:
|
||||||
new_name = v_flag
|
new_name = v_flag
|
||||||
|
|
||||||
if yaclog.cli.version_util.is_release(cur_version.name):
|
if yaclog.version.is_release(cur_version.name):
|
||||||
click.confirm(f'Rename release version "{cur_version.name}" to "{new_name}"?', abort=True)
|
click.confirm(f'Rename release version "{cur_version.name}" to "{new_name}"?', abort=True)
|
||||||
|
|
||||||
cur_version.name = new_name
|
cur_version.name = new_name
|
||||||
@ -213,7 +215,7 @@ def release(obj: Changelog, v_flag, commit):
|
|||||||
|
|
||||||
repo.index.add(obj.path)
|
repo.index.add(obj.path)
|
||||||
|
|
||||||
version_type = '' if yaclog.cli.version_util.is_release(cur_version.name) else 'non-release '
|
version_type = '' if yaclog.version.is_release(cur_version.name) else 'non-release '
|
||||||
tracked = len(repo.index.diff(repo.head.commit))
|
tracked = len(repo.index.diff(repo.head.commit))
|
||||||
tracked_warning = 'Create tag'
|
tracked_warning = 'Create tag'
|
||||||
untracked = len(repo.index.diff(None))
|
untracked = len(repo.index.diff(None))
|
||||||
|
153
yaclog/markdown.py
Normal file
153
yaclog/markdown.py
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# yaclog: yet another changelog tool
|
||||||
|
# Copyright (c) 2021. Andrew Cassidy
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as
|
||||||
|
# published by the Free Software Foundation, either version 3 of the
|
||||||
|
# License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
bullets = '+-*'
|
||||||
|
brackets = '[]'
|
||||||
|
code_regex = re.compile(r'^```')
|
||||||
|
header_regex = re.compile(r'^(?P<hashes>#+)\s+(?P<contents>[^#]+)(?:\s+#+)?$')
|
||||||
|
li_regex = re.compile(r'^[-+*] |\d+\. ')
|
||||||
|
numbered_regex = re.compile(r'^\d+\. ')
|
||||||
|
bullet_regex = re.compile(r'^[-+*] ')
|
||||||
|
link_id_regex = re.compile(r'^\[(?P<link_id>\S*)]:\s*(?P<link>.*)')
|
||||||
|
link_def_regex = re.compile(r'\[(?P<text>.*?)]\[(?P<link_id>.*?)]') # deferred link in the form [name][id]
|
||||||
|
link_lit_regex = re.compile(r'\[(?P<text>.*?)]\((?P<link>.*?)\)') # literal link in the form [name](url)
|
||||||
|
|
||||||
|
setext_h1_replace_regex = re.compile(r'(?<=\n)(?P<header>[^\n]+?)\n=+[ \t]*(?=\n)')
|
||||||
|
setext_h2_replace_regex = re.compile(r'(?<=\n)(?P<header>[^\n]+?)\n-+[ \t]*(?=\n)')
|
||||||
|
|
||||||
|
|
||||||
|
def strip_link(token):
|
||||||
|
"""
|
||||||
|
Parses and removes any links from the token
|
||||||
|
|
||||||
|
:param token: An input token which may be a markdown link, either literal or an ID
|
||||||
|
:return: A tuple of (name, url, id)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if link_lit := link_lit_regex.fullmatch(token):
|
||||||
|
# in the form [name](link)
|
||||||
|
return link_lit['text'], link_lit['link'], None
|
||||||
|
|
||||||
|
if link_def := link_def_regex.fullmatch(token):
|
||||||
|
# in the form [name][id] where id is hopefully linked somewhere else in the document
|
||||||
|
return link_def['text'], None, link_def['link_id'].lower()
|
||||||
|
|
||||||
|
return token, None, None
|
||||||
|
|
||||||
|
|
||||||
|
def join(segments: List[str]) -> str:
|
||||||
|
"""
|
||||||
|
Joins multiple lines of markdown by adding double newlines between them, or a single newline between list items
|
||||||
|
|
||||||
|
:param segments: A list of strings to join
|
||||||
|
:return: A joined markdown string
|
||||||
|
"""
|
||||||
|
|
||||||
|
text: List[str] = []
|
||||||
|
last_segment = ''
|
||||||
|
for segment in segments:
|
||||||
|
if bullet_regex.match(segment) and bullet_regex.match(last_segment):
|
||||||
|
pass
|
||||||
|
elif numbered_regex.match(segment) and numbered_regex.match(last_segment):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
text.append('')
|
||||||
|
|
||||||
|
text.append(segment)
|
||||||
|
|
||||||
|
last_segment = segment
|
||||||
|
|
||||||
|
return '\n'.join(text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
class Token:
|
||||||
|
def __init__(self, line_no: int, lines: List[str], kind: str):
|
||||||
|
self.line_no = line_no
|
||||||
|
self.lines = lines
|
||||||
|
self.kind = kind
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f'{self.kind}: {self.lines}'
|
||||||
|
|
||||||
|
|
||||||
|
def tokenize(text: str):
|
||||||
|
"""
|
||||||
|
Tokenize a markdown string
|
||||||
|
|
||||||
|
The tokenizer is very basic, and only cares about the highest-level blocks
|
||||||
|
(Headers, top-level list items, links, code blocks, paragraphs).
|
||||||
|
|
||||||
|
:param text: input text to tokenize
|
||||||
|
:return: A list of tokens
|
||||||
|
"""
|
||||||
|
|
||||||
|
# convert setext-style headers
|
||||||
|
# The extra newline is to preserve line numbers
|
||||||
|
text = setext_h1_replace_regex.sub(r'# \g<header>\n', text)
|
||||||
|
text = setext_h2_replace_regex.sub(r'## \g<header>\n', text)
|
||||||
|
|
||||||
|
lines = text.split('\n')
|
||||||
|
tokens: List[Token] = []
|
||||||
|
links = {}
|
||||||
|
|
||||||
|
# state variables for parsing
|
||||||
|
block = None
|
||||||
|
|
||||||
|
for line_no, line in enumerate(lines):
|
||||||
|
if block == 'code':
|
||||||
|
# this is the contents of a code block
|
||||||
|
assert block == tokens[-1].kind, 'block state variable in invalid state!'
|
||||||
|
tokens[-1].lines.append(line)
|
||||||
|
if code_regex.match(line):
|
||||||
|
block = None
|
||||||
|
|
||||||
|
elif code_regex.match(line):
|
||||||
|
# this is the start of a code block
|
||||||
|
tokens.append(Token(line_no, [line], 'code'))
|
||||||
|
block = 'code'
|
||||||
|
|
||||||
|
elif li_regex.match(line):
|
||||||
|
# this is a list item
|
||||||
|
tokens.append(Token(line_no, [line], 'li'))
|
||||||
|
block = 'li'
|
||||||
|
|
||||||
|
elif match := header_regex.match(line):
|
||||||
|
# this is a header
|
||||||
|
kind = f'h{len(match["hashes"])}'
|
||||||
|
tokens.append(Token(line_no, [line], kind))
|
||||||
|
|
||||||
|
elif match := link_id_regex.match(line):
|
||||||
|
# this is a link definition in the form '[id]: link'
|
||||||
|
links[match['link_id'].lower()] = match['link']
|
||||||
|
block = None
|
||||||
|
|
||||||
|
elif not line or line.isspace():
|
||||||
|
# skip empty lines and reset block
|
||||||
|
block = None
|
||||||
|
|
||||||
|
elif block:
|
||||||
|
# this is a line to be added to a paragraph or list item
|
||||||
|
assert block == tokens[-1].kind, f'block state variable in invalid state! {block} != {tokens[-1].kind}'
|
||||||
|
tokens[-1].lines.append(line)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# this is a new paragraph
|
||||||
|
tokens.append(Token(line_no, [line], 'p'))
|
||||||
|
block = 'p'
|
||||||
|
|
||||||
|
return tokens, links
|
@ -81,3 +81,5 @@ def join_version(epoch, release, pre, post, dev, local) -> str:
|
|||||||
parts.append(f"+{local}")
|
parts.append(f"+{local}")
|
||||||
|
|
||||||
return "".join(parts)
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user