Update and add more tool scripts

2022-09-18 23:08:43 -04:00
parent 711f5c87b3
commit 6c26e2617c
9 changed files with 355 additions and 199 deletions
--- a/tools/toc.py
+++ b/tools/toc.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-

 """
-Usage: python3 toc.py [-n] files.md...
+Usage: python toc.py file.md

 Replace a "## TOC" heading in a Markdown file with a table of contents,
 generated from the other headings in the file. Supports multiple files.
@@ -18,17 +18,18 @@ toc_name = 'Contents'
 valid_toc_headings = {'## TOC', '##TOC'}

 TocItem = namedtuple('TocItem', ['name', 'anchor', 'level'])
-punctuation_regexp = re.compile(r'[^\w\- ]+')
-specialchar_regexp = re.compile(r'[⅔]+')
+punctuation_rx = re.compile(r'[^\w\- ]+')
+numbered_heading_rx = re.compile(r'^[0-9]+\. ')
+specialchar_rx = re.compile(r'[⅔]+')

 def name_to_anchor(name):
 	# GitHub's algorithm for generating anchors from headings
 	# https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
-	anchor = name.strip().lower()                   # lowercase
-	anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation
-	anchor = anchor.replace(' ', '-')               # replace spaces with dash
-	anchor = re.sub(specialchar_regexp, '', anchor) # remove misc special chars
-	anchor = quote(anchor)                          # url encode
+	anchor = name.strip().lower()               # lowercase
+	anchor = re.sub(punctuation_rx, '', anchor) # remove punctuation
+	anchor = anchor.replace(' ', '-')           # replace spaces with dash
+	anchor = re.sub(specialchar_rx, '', anchor) # remove misc special chars
+	anchor = quote(anchor)                      # url encode
 	return anchor

 def get_toc_index(lines):
@@ -51,42 +52,46 @@ def get_toc_items(lines, toc_index):
 			yield TocItem(name, anchor, level)

 def toc_string(toc_items):
-	lines = ['## %s' % toc_name, '']
+	lines = [f'## {toc_name}', '']
 	for name, anchor, level in toc_items:
 		padding = '  ' * level
-		line = '%s- [%s](#%s)' % (padding, name, anchor)
-		lines.append(line)
+		if re.match(numbered_heading_rx, name):
+			bullet, name = name.split('.', 1)
+			bullet += '.'
+			name = name.lstrip()
+		else:
+			bullet = '-'
+		lines.append(f'{padding}{bullet} [{name}](#{anchor})')
 	return '\n'.join(lines) + '\n'

 def add_toc(filename):
-	with open(filename, 'r', encoding='utf-8') as f:
-		lines = f.readlines()
+	with open(filename, 'r', encoding='utf-8') as file:
+		lines = file.readlines()
 	toc_index = get_toc_index(lines)
 	if toc_index is None:
 		return None # no TOC heading
 	toc_items = list(get_toc_items(lines, toc_index))
 	if not toc_items:
 		return False # no content headings
-	with open(filename, 'w', encoding='utf-8') as f:
+	with open(filename, 'w', encoding='utf-8') as file:
 		for i, line in enumerate(lines):
 			if i == toc_index:
-				f.write(toc_string(toc_items))
+				file.write(toc_string(toc_items))
 			else:
-				f.write(line)
+				file.write(line)
 	return True # OK

 def main():
 	if len(sys.argv) < 2:
-		print('*** ERROR: No filenames specified')
-		print(__doc__)
-		exit(1)
+		print(f'Usage: {sys.argv[0]} file.md', file=sys.stderr)
+		sys.exit(1)
 	for filename in sys.argv[1:]:
 		print(filename)
 		result = add_toc(filename)
 		if result is None:
-			print('*** WARNING: No "## TOC" heading found')
+			print('Warning: No "## TOC" heading found', file=sys.stderr)
 		elif result is False:
-			print('*** WARNING: No content headings found')
+			print('Warning: No content headings found', file=sys.stderr)
 		else:
 			print('OK')