The new manual pages converted from rst using sphinx or rst2man
has somewhat different syntax. man-to-mdwn.pl is now adjusted
to produce even better output from this syntax. The changes also
include using utf-8 locale (e.g. for tables and generated hypens)
and and quite a few bugs fixes.
This tool still produces better results than just using the
html pages generated using sphinx / rst2html. For example those
tools don't create inter-page hyperlinks -- and the preformatted
pages written by man-to-mdwn.pl just works well with manual page
content.
# This program is used to generate mdwn-formatted notmuch manual pages
# for notmuch wiki. Example run:
#
# This program is used to generate mdwn-formatted notmuch manual pages
# for notmuch wiki. Example run:
#
-# $ ./devel/man-to-mdwn.pl man ../notmuch-wiki
+# $ ./devel/man-to-mdwn.pl doc/_build/man ../notmuch-wiki
#
# In case taken into more generic use, modify these comments and examples.
#
# In case taken into more generic use, modify these comments and examples.
use strict;
use warnings;
unless (@ARGV == 2) {
warn "\n$0 <source-directory> <destination-directory>\n\n";
# Remove/edit this comment if this script is taken into generic use.
use strict;
use warnings;
unless (@ARGV == 2) {
warn "\n$0 <source-directory> <destination-directory>\n\n";
# Remove/edit this comment if this script is taken into generic use.
- warn "Example: ./devel/man-to-mdwn.pl man ../notmuch-wiki\n\n";
+ warn "Example: ./devel/man-to-mdwn.pl doc/_build/man ../notmuch-wiki\n\n";
-#undef $ENV{'GROFF_NO_SGR'};
-#delete $ENV{'GROFF_NO_SGR'};
-$ENV{'GROFF_NO_SGR'} = '1';
-$ENV{'TERM'} = 'vt100'; # does this matter ?
-
my %htmlqh = qw/& & < < > > ' ' " "/;
# do html quotation to $_[0] (which is an alias to the given arg)
sub htmlquote($)
my %htmlqh = qw/& & < < > > ' ' " "/;
# do html quotation to $_[0] (which is an alias to the given arg)
sub htmlquote($)
#next if -l $v; # skip symlinks here. -- not... references there may be.
my @lines;
#next if -l $v; # skip symlinks here. -- not... references there may be.
my @lines;
- #open I, '-|', qw/groff -man -T utf8/, $v;
- open I, '-|', qw/groff -man -T latin1/, $v; # this and GROFF_NO_SGR='1'
+ open I, '-|', qw/env -i/, "PATH=$ENV{PATH}",
+ qw/TERM=vt100 LANG=en_US.utf8 LC_ALL=en_US.utf8/,
+ qw/GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 MANWIDTH=80/,
+ qw/man/, $v or die "$!";
+ binmode I, ':utf8';
my ($emptyline, $pre, $hl) = (0, 0, 'h1');
while (<I>) {
my ($emptyline, $pre, $hl) = (0, 0, 'h1');
while (<I>) {
- s/(?<=\S)\s{8,}.*//; # $hl = 'h1' if s/(?<=\S)\s{8,}.*//;
- htmlquote $_;
+ # keep only leftmost in lines like 'NOTMUCH(1) notmuch NOTMUCH(1)'
+ s/\S\K\s{8,}\S.+\s{8,}\S.*//; # $hl = 'h1' if s/(?<=\S)\s{8,}.*//;
- s/((?:_\010[^_])+)/<u>$1<\/u>/g;
+ s/((?:_\010[^_])+)/\001u\002$1\001\/u\002/g;
- s/((?:.\010.)+)/<b>$1<\/b>/g;
+ s/((?:.\010.)+)/\001b\002$1\001\/b\002/g;
+ htmlquote $_;
+ s/\001/</g; s/\002/>/g;
if (/^\S/) {
$pre = 0, push @lines, "</pre>\n" if $pre;
if (/^\S/) {
$pre = 0, push @lines, "</pre>\n" if $pre;
$lines[0] =~ s/^\n//;
$k = "$ARGV[1]/manpages/$k.mdwn";
open O, '>', $k or die;
$lines[0] =~ s/^\n//;
$k = "$ARGV[1]/manpages/$k.mdwn";
open O, '>', $k or die;
print STDOUT 'Writing ', "'$k'\n";
select O;
print STDOUT 'Writing ', "'$k'\n";
select O;
+ my ($pe, $hyphen) = ('', '');
- if (s/^(\s+)<b>([^<]+)<\/b>\((\d+)\)//) {
+ if (s/^(\s+)<b>([^<]+)\((\d+)\)<\/b>//) {
my $link = maymakelink "$pe-$2-$3";
$link = maymakelink "$pe$2-$3" unless $link;
if ($link) {
my $link = maymakelink "$pe-$2-$3";
$link = maymakelink "$pe$2-$3" unless $link;
if ($link) {
- print "<a href='$link'>$pe-</a>\n";
+ print "<a href='$link'>$pe$hyphen</a>\n";
print "$1<a href='$link'>$2</a>($3)";
}
else {
print "$1<a href='$link'>$2</a>($3)";
}
else {
- s/<b>([^<]+)<\/b>\((\d+)\)/mayconvert($1, $2)/ge;
- $pe = $1 if s/<b>([^<]+)-<\/b>\s*$//;
+ s/<b>([^<]+)\((\d+)\)<\/b>/mayconvert($1, $2)/ge;
+ ($pe, $hyphen) = ($1, $2) if s/<b>([^<]+)([-\x{2010}])<\/b>\s*$//;
open I, '<', $in or die $!;
my $s;
while (<I>) {
open I, '<', $in or die $!;
my $s;
while (<I>) {
- if (/^\s*[.]TH\s+\S+\s+(\S+)/) {
+ if (/^\s*[.]TH\s+\S+\s+"?(\S+?)"?\s/) {