Skip to content

Commit 8933c9e

Browse files
author
scritch
committed
Added back convert_html.rb
git-svn-id: http://rhg.rubyforge.org/svn@34 2ba632a7-620d-0410-bd84-d74392fff1da
1 parent cd949d6 commit 8933c9e

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

ja/convert_html.rb

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# This script automatically converts the HTML pages
2+
# of the Ruby Hacking Guide to the textile-based text
3+
# format used for the translation.
4+
# It's far from being perfect and
5+
# won't be useful in any other case because it's
6+
# dependant on the way these HTML pages are written,
7+
# it's still better than doing this by hand.
8+
$KCODE = 'u'
9+
10+
if ARGV.length != 2
11+
puts "syntax: #{$0} input_html output_txt"
12+
exit
13+
end
14+
15+
require 'nkf'
16+
17+
# read the file, convert it to UTF-8 and tranform full width characters in ASCII
18+
data = File.open(ARGV[0], 'r') { |input_file| NKF::nkf('-w -Z', input_file.read) }
19+
20+
File.open(ARGV[1], 'w') do |output|
21+
in_code = false
22+
data.gsub!(%r{&(amp|gt|lt);|</?pre\b|</?code>}) do |m|
23+
if m[0] == ?<
24+
in_code = (m[1] != ?/)
25+
if /code/.match(m) then '`' else m end
26+
else
27+
if in_code # replaces &xxx; in code and pre blocs
28+
{ '&amp;' => '&', '&gt;' => '>', '&lt;' => '<' }[m]
29+
else
30+
m
31+
end
32+
end
33+
end
34+
35+
# different types of list
36+
list_type = nil
37+
data.gsub!(/<(ul|ol|li)>/) do |m|
38+
if m == '<li>'
39+
if list_type == '<ol>' then '# ' else '* ' end
40+
else
41+
list_type = m
42+
''
43+
end
44+
end
45+
46+
[
47+
[ /.*?<body>(.*?)<\/body>.*/m, '\1' ], # we only want the body
48+
[ /<\/?(table|p( class=".+?")?)>|<\/(li|h\d|ol|ul)>/, '' ], # remove useless tags
49+
[ /▼/, '▼ ' ], # just add a space after the arrow
50+
[ /<h(\d)>/, 'h\1. ' ], # headers
51+
[ /<a href="(.+?)">(.+?)<\/a>/m, '"\2":\1' ], # images
52+
[ /<tr><td>|<td><td>|<td><\/tr>/, '|' ], # tables
53+
[ /<img src="(.+?)" alt=".+?"><br>\n\d+: (.*)/, '!\1(\2)!' ], # images and captions
54+
[ /[ \t]+$/, '' ], # trims line ends
55+
[ /\A\n+|\n+\Z/, '' ], # remove beginning and ending empty lines
56+
[ /\n\n+/, "\n\n" ], # succession of empty lines
57+
].each { |re, str| data.gsub!(re, str) }
58+
59+
output.puts(data)
60+
end

0 commit comments

Comments
 (0)