summaryrefslogtreecommitdiff
path: root/share/misc/nanpa.sed
blob: 342969abae7a6b9be1a0f1c680f920e494cee8bd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $
#
# Parse HTML tables output by 
#   http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
# Specifically, for each html table row (TR),
# print the <TD> elements separated by colons.
#
# This could break on HTML comments.
#
:top
#				Strip ^Ms
s/
//g
#				Join all lines with unterminated HTML tags
/<[^>]*$/{
	N
	b top
}
#				Replace all </TR> with EOL tag
s;</[Tt][Rr]>;$;g
# 				Join lines with only <TR>.
/<[Tt][Rr][^>]*>$/{
	N
	s/\n//g
	b top
}
#				Also, join all lines starting with <TR>.
/<[TtRr][^>]*>[^$]*$/{
	N
	s/\n//g
	b top
}
#				Remove EOL markers
s/\$$//
#				Remove lines not starting with <TR>
/<[Tt][Rr][^>]*>/!d
#				Replace all <TD> with colon
s/[ 	]*<TD[^>]*> */:/g
#				Strip all HTML tags
s/<[^>]*>//g
#				Handle HTML characters
s/&nbsp;/ /g
#				Compress spaces/tabs
s/[ 	][ 	]*/ /g
#				Strip leading colons
s/^://
#				Strip leading/trailing whitespace
s/^ //
s/ $//