body, b, strong, u {
font-family: 'Roboto', sans-serif;
}
-a.tgt { font-face: symbol; font-weight: 400; font-size: 70%; visibility: hidden; text-decoration: none; color: #ddd; padding: 0 4px; border: 0; vertical-align: top; }
+a.tgt { font-face: symbol; font-weight: 400; font-size: 70%; visibility: hidden; text-decoration: none; color: #ddd; padding: 0 4px; border: 0; }
a.tgt:after { content: '🔗'; }
a.tgt:hover { color: #444; background-color: #eaeaea; }
h1:hover > a.tgt, h2:hover > a.tgt, h3:hover > a.tgt, dt:hover > a.tgt { visibility: visible; }
FILENAME_RE = re.compile(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+?)(\.(?P<sect>\d+))?)\.md)$')
ASSIGNMENT_RE = re.compile(r'^(\w+)=(.+)')
-QUOTED_RE = re.compile(r'"(.+?)"')
+VER_RE = re.compile(r'^#define\s+RSYNC_VERSION\s+"(\d.+?)"', re.M)
+TZ_RE = re.compile(r'^#define\s+MAINTAINER_TZ_OFFSET\s+(-?\d+(\.\d+)?)', re.M)
VAR_REF_RE = re.compile(r'\$\{(\w+)\}')
VERSION_RE = re.compile(r' (\d[.\d]+)[, ]')
BIN_CHARS_RE = re.compile(r'[\1-\7]+')
fi = argparse.Namespace(**fi.groupdict())
fi.want_manpage = not not fi.sect
if fi.want_manpage:
- fi.title = fi.prog + '(' + fi.sect + ') man page'
+ fi.title = fi.prog + '(' + fi.sect + ') manpage'
else:
fi.title = fi.prog + ' for rsync'
env_subs['VERSION'] = '1.0.0'
env_subs['bindir'] = '/usr/bin'
env_subs['libdir'] = '/usr/lib/rsync'
+ tz_offset = 0
else:
for fn in (srcdir + 'version.h', 'Makefile'):
try:
with open(srcdir + 'version.h', 'r', encoding='utf-8') as fh:
txt = fh.read()
- m = QUOTED_RE.search(txt)
+ m = VER_RE.search(txt)
env_subs['VERSION'] = m.group(1)
+ m = TZ_RE.search(txt) # the tzdata lib may not be installed, so we use a simple hour offset
+ tz_offset = float(m.group(1)) * 60 * 60
with open('Makefile', 'r', encoding='utf-8') as fh:
for line in fh:
if var == 'srcdir':
break
- env_subs['date'] = time.strftime('%d %b %Y', time.localtime(mtime))
+ env_subs['date'] = time.strftime('%d %b %Y', time.gmtime(mtime + tz_offset)).lstrip('0')
def html_via_commonmark(txt):
bad_hashtags = set(),
latest_targets = [ ],
opt_prefix = 'opt',
+ a_href = None,
+ a_href_external = False,
a_txt_start = None,
+ after_a_tag = False,
target_suf = '',
)
for bad in st.referenced_hashtags - st.created_hashtags:
warn('Unknown hashtag link in', self.fn + ':', '#' + bad)
+ def handle_UE(self):
+ st = self.state
+ if st.txt.startswith(('.', ',', '!', '?', ';', ':')):
+ st.man_out[-1] = ".UE " + st.txt[0] + "\n"
+ st.txt = st.txt[1:]
+ st.after_a_tag = False
+
def handle_starttag(self, tag, attrs_list):
st = self.state
if args.debug:
st.txt += BOLD_FONT[0]
elif tag == 'em' or tag == 'i':
if st.want_manpage:
- tag = 'u' # Change it into underline to be more like the man page
+ tag = 'u' # Change it into underline to be more like the manpage
st.txt += UNDR_FONT[0]
elif tag == 'ol':
start = 1
for var, val in attrs_list:
if var == 'href':
if val.startswith(('https://', 'http://', 'mailto:', 'ftp:')):
- pass # nothing to check
+ if st.after_a_tag:
+ self.handle_UE()
+ st.man_out.append(manify(st.txt.strip()) + "\n")
+ st.man_out.append(".UR " + val + "\n")
+ st.txt = ''
+ st.a_href = val
+ st.a_href_external = True
elif '#' in val:
- pg, tgt = val.split('#', 2)
+ pg, tgt = val.split('#', 1)
if pg and pg not in VALID_PAGES or '#' in tgt:
st.bad_hashtags.add(val)
elif tgt in ('', 'opt', 'dopt'):
st.a_href = val
+ st.a_href_external = False
elif pg == '':
st.referenced_hashtags.add(tgt)
if tgt in st.latest_targets:
st = self.state
if args.debug:
self.output_debug('END', (tag,))
+ if st.after_a_tag:
+ self.handle_UE()
if tag in CONSUMES_TXT or st.dt_from == tag:
txt = st.txt.strip()
st.txt = ''
if m:
tgt = m.group(1)
st.target_suf = '-' + tgt
- self.add_targets(tgt)
+ self.add_targets(tag, tgt)
elif tag == 'h2':
st.man_out.append(st.p_macro + '.SH "' + manify(txt) + '"\n')
- self.add_targets(txt, st.target_suf)
+ self.add_targets(tag, txt, st.target_suf)
st.opt_prefix = 'dopt' if txt == 'DAEMON OPTIONS' else 'opt'
elif tag == 'h3':
st.man_out.append(st.p_macro + '.SS "' + manify(txt) + '"\n')
- self.add_targets(txt, st.target_suf)
+ self.add_targets(tag, txt, st.target_suf)
elif tag == 'p':
if st.dt_from == 'p':
tag = 'dt'
st.man_out.append('.IP "' + manify(txt) + '"\n')
if txt.startswith(BOLD_FONT[0]):
- self.add_targets(txt)
+ self.add_targets(tag, txt)
st.dt_from = None
elif txt != '':
st.man_out.append(manify(txt) + "\n")
add_to_txt = NORM_FONT[0]
elif tag == 'em' or tag == 'i':
if st.want_manpage:
- tag = 'u' # Change it into underline to be more like the man page
+ tag = 'u' # Change it into underline to be more like the manpage
add_to_txt = NORM_FONT[0]
elif tag == 'ol' or tag == 'ul':
if st.list_state.pop() == 'dl':
elif tag == 'hr':
return
elif tag == 'a':
- if st.a_href:
+ if st.a_href_external:
+ st.txt = st.txt.strip()
+ if args.force_link_text or st.a_href != st.txt:
+ st.man_out.append(manify(st.txt) + "\n")
+ st.man_out.append(".UE\n") # This might get replaced with a punctuation version in handle_UE()
+ st.after_a_tag = True
+ st.a_href_external = False
+ st.txt = ''
+ elif st.a_href:
atxt = st.txt[st.a_txt_start:]
find = 'href="' + st.a_href + '"'
for j in range(len(st.html_out)-1, 0, -1):
if find in st.html_out[j]:
- pg, tgt = st.a_href.split('#', 2)
+ pg, tgt = st.a_href.split('#', 1)
derived = txt2target(atxt, tgt)
if pg == '':
if derived in st.latest_targets:
st.txt += txt
- def add_targets(self, txt, suf=None):
+ def add_targets(self, tag, txt, suf=None):
st = self.state
+ tag = '<' + tag + '>'
targets = CODE_BLOCK_RE.findall(txt)
if not targets:
targets = [ txt ]
- first_one = True
+ tag_pos = 0
for txt in targets:
txt = txt2target(txt, st.opt_prefix)
if not txt:
print('Made link target unique:', chk)
txt = chk
break
- if first_one:
- st.html_out.append('<a id="' + txt + '" href="#' + txt + '" class="tgt"></a>')
- first_one = False
+ if tag_pos == 0:
+ tag_pos -= 1
+ while st.html_out[tag_pos] != tag:
+ tag_pos -= 1
+ st.html_out[tag_pos] = tag[:-1] + ' id="' + txt + '">'
+ st.html_out.append('<a href="#' + txt + '" class="tgt"></a>')
+ tag_pos -= 1 # take into account the append
else:
- st.html_out.append('<span id="' + txt + '"></span>')
+ st.html_out[tag_pos] = '<span id="' + txt + '"></span>' + st.html_out[tag_pos]
st.created_hashtags.add(txt)
st.latest_targets = targets
if __name__ == '__main__':
- parser = argparse.ArgumentParser(description="Output html and (optionally) nroff for markdown pages.", add_help=False)
+ parser = argparse.ArgumentParser(description="Convert markdown into html and (optionally) nroff. Each input filename must have a .md suffix, which is changed to .html for the output filename. If the input filename ends with .num.md (e.g. foo.1.md) then a nroff file is also output with the input filename's .md suffix removed (e.g. foo.1).", add_help=False)
parser.add_argument('--test', action='store_true', help="Just test the parsing without outputting any files.")
- parser.add_argument('--dest', metavar='DIR', help="Put files into DIR instead of the current directory.")
+ parser.add_argument('--dest', metavar='DIR', help="Create files in DIR instead of the current directory.")
+ parser.add_argument('--force-link-text', action='store_true', help="Don't remove the link text if it matches the link href. Useful when nroff doesn't understand .UR and .UE.")
parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing. Repeat for even more.')
parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
- parser.add_argument("mdfiles", nargs='+', help="The source .md files to convert.")
+ parser.add_argument("mdfiles", metavar='FILE.md', nargs='+', help="One or more .md files to convert.")
args = parser.parse_args()
try: