1
1
#!/usr/bin/env ruby
2
2
# -*- coding: utf-8 -*- vim:set encoding=utf-8:
3
3
# TODO:
4
- # - cleanup (and remove dependency with rhg_html_gen)
4
+ # - cleanup
5
5
# - images
6
- # - when generating the output data, if Japanese = English , add in the English something like "(To translate)"
6
+ # - when generating the output data, if source language = destination language , add in the destination language something like "(to translate)" (and make it depend on the language)
7
7
$KCODE = 'u'
8
8
9
- ISOLanguage = 'en-US'
10
-
11
9
$LOAD_PATH. unshift ( '../lib' )
12
- require 'rhg_html_gen'
10
+ require 'redcloth'
11
+ require 'yaml'
12
+
13
+ Languages = YAML ::load ( IO . read ( 'languages.yml' ) )
14
+ AvailableDestinationLanguages = Languages . keys . select { |lang | Languages [ lang ] [ :can_be_destination_language ] } . sort
15
+ AvailableSourceLanguages = Languages . keys . sort
16
+
17
+ def syntax
18
+ puts "syntax: #{ $0} source_language destination_language chapter_number"
19
+ puts "where the source language is one of the following: #{ AvailableSourceLanguages . join ( ', ' ) } "
20
+ puts "and the destination language is one of the following: #{ AvailableDestinationLanguages . join ( ', ' ) } "
21
+ exit 1
22
+ end
23
+
24
+ syntax if ARGV . length != 3 or not AvailableSourceLanguages . include? ( ARGV [ 0 ] ) or not AvailableDestinationLanguages . include? ( ARGV [ 1 ] ) or ARGV [ 2 ] . to_i == 0
25
+ src_lang = ARGV [ 0 ]
26
+ dst_lang = ARGV [ 1 ]
27
+ chapter_num = ARGV [ 2 ] . to_i
28
+
29
+ $tags = { }
30
+
31
+ HEADER = <<EOS
32
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
33
+ <html lang="#{ Languages [ dst_lang ] [ :iso_language ] } ">
34
+ <head>
35
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
36
+ <meta http-equiv="Content-Language" content="#{ Languages [ dst_lang ] [ :iso_language ] } ">
37
+ <link rel="stylesheet" type="text/css" href="rhg.css">
38
+ <title>$tag(title)$</title>
39
+ </head>
40
+ <body>
41
+ EOS
42
+ FOOTER = Languages [ dst_lang ] [ :footer ]
13
43
14
44
COMMENT_RE = /\$ comment\( (.+?)\) \$ /
15
45
AUTOLINK_RE = %r{(^|[^:])\b ((?:ht|f)tp://\S +?)([^\w \/ ;]*?)(?=\s |<|$)}
16
46
NEW_CODE_RE = /`([^<]*?)`/m
17
47
TAG_RE = /\$ tag\( (.+?)\) \$ /
48
+ BLOCK_REGROUPING_RE = /^(h[1-9]\. |<pre\b |<p\b |▼)/
49
+
50
+ # manages tags
51
+ def replace_tags ( text )
52
+ text . gsub ( TAG_RE ) do |m |
53
+ tag_name = $~[ 1 ]
54
+ if $tags[ tag_name ]
55
+ $tags[ tag_name ]
56
+ else
57
+ puts "Warning: The tag #{ tag_name } is not defined"
58
+ ''
59
+ end
60
+ end
61
+ end
18
62
19
63
AUTO_CONV_ENDING = <<END
20
64
<hr>
29
73
Copyright (c) 2002-2004 Minero Aoki, All rights reserved.
30
74
END
31
75
32
- TranslatedByRE = /^Translated by (.+)$/
33
-
34
- def rhg_redcloth_replace ( text )
35
- text = text . dup
36
- if md = TranslatedByRE . match ( text )
37
- $tags[ 'translated by' ] = md [ 1 ]
38
- text . sub! ( TranslatedByRE , '' )
39
- end
40
- text . sub! ( AUTO_CONV_ENDING , '' ) # remove the ending in the automatically generated Japanese files
41
- text . gsub! ( COMMENT_RE ) { |m | '' } # remove comments
42
- text . gsub ( TAG_RE ) do |m | # manages tags
43
- tag_name = $~[ 1 ]
44
- if $tags[ tag_name ]
45
- $tags[ tag_name ]
46
- else
47
- puts "Warning: The tag #{ tag_name } is not defined"
48
- ''
49
- end
50
- end
51
- fig_counter = 0
52
- text . gsub! ( RedCloth ::IMAGE_RE ) do |m | # must be done before the `` replacement
53
- fig_counter += 1
54
- stln , algn , atts , url , title , href , href_a1 , href_a2 = $~[ 1 ..8 ]
55
- #puts "Warning: the images used the the RHG should be PNGs, not JPEGs" if /\.jpe?g$/i.match(url)
56
- "\n \n <p style=\" text-align:center;\" >\n #{ m . gsub ( /`/ , '' ) } <br />Figure #{ fig_counter } : #{ title } \n </p>\n \n "
57
- end
58
- text . gsub! ( NEW_CODE_RE ) { |m | "<code>#{ $~[ 1 ] } </code>" }
59
- text . gsub! ( AUTOLINK_RE ) do |m |
60
- before , address , after = $~[ 1 ..3 ]
61
- "#{ before } \" #{ address } \" :#{ address } #{ after } "
62
- end
63
- text
64
- end
65
-
66
76
class Blocks
67
- def initialize ( filename )
68
- @data = rhg_redcloth_replace ( IO . read ( filename ) ) . split ( /\n / ) . map { |l | l . rstrip }
69
- @boundaries = [ ]
77
+ def initialize ( filename , lang , is_destination_lang )
78
+ @lang = lang
79
+ @is_destination_lang = is_destination_lang
80
+ @data = rhg_redcloth_replace ( filename )
70
81
82
+ @boundaries = [ ]
71
83
find_boundaries
72
84
end
73
85
@@ -79,6 +91,10 @@ def [](i)
79
91
@data [ @boundaries [ i ] ] . join ( "\n " )
80
92
end
81
93
94
+ def each_from ( i )
95
+ i . upto ( self . length -1 ) { yield self [ i ] }
96
+ end
97
+
82
98
def regroup_with_following ( i )
83
99
@data [ @boundaries [ i ] . last ] << "\n <==================================>"
84
100
@boundaries [ i ] = @boundaries [ i ] . first ..@boundaries [ i +1 ] . last
@@ -114,85 +130,119 @@ def find_boundaries
114
130
end
115
131
end
116
132
end
133
+
134
+ # transforms the modified RHG RedCloth syntax to normal RedCloth
135
+ # and returns an array of lines (without end of lines)
136
+ def rhg_redcloth_replace ( filename )
137
+ text = IO . read ( filename )
138
+ translated_by_re = Languages [ @lang ] [ :translated_by_re ] # note: translated_by_re is not defined for Japanese
139
+ if translated_by_re and md = translated_by_re . match ( text )
140
+ $tags[ 'translated by' ] = md [ 1 ] if @is_destination_lang
141
+ text . sub! ( translated_by_re , '' )
142
+ end
143
+ text . sub! ( AUTO_CONV_ENDING , '' ) if @lang == 'ja' # remove the ending in the automatically generated Japanese files if it's there
144
+ text . gsub! ( COMMENT_RE ) { |m | '' } # remove comments
145
+ text = replace_tags ( text )
146
+ fig_counter = 0
147
+ text . gsub! ( RedCloth ::IMAGE_RE ) do |m | # must be done before the `` replacement
148
+ fig_counter += 1
149
+ stln , algn , atts , url , title , href , href_a1 , href_a2 = $~[ 1 ..8 ]
150
+ #puts "Warning: the images used the the RHG should be PNGs, not JPEGs" if /\.jpe?g$/i.match(url)
151
+ "\n \n <p style=\" text-align:center;\" >\n #{ m . gsub ( /`/ , '' ) } <br />Figure #{ fig_counter } : #{ title } \n </p>\n \n "
152
+ end
153
+ text . gsub! ( NEW_CODE_RE ) { |m | "<code>#{ $~[ 1 ] } </code>" }
154
+ text . gsub! ( AUTOLINK_RE ) do |m |
155
+ before , address , after = $~[ 1 ..3 ]
156
+ "#{ before } \" #{ address } \" :#{ address } #{ after } "
157
+ end
158
+ text . split ( /\n / ) . map { |l | l . rstrip }
159
+ end
117
160
end
118
161
119
- chapter_num = sprintf ( "%02d" , ARGV [ 0 ] . to_i )
162
+ dst_lang_file_name = "../#{ dst_lang } /#{ sprintf ( Languages [ dst_lang ] [ :chapter_name ] , chapter_num ) } "
163
+ src_lang_file_name = "../#{ src_lang } /#{ sprintf ( Languages [ src_lang ] [ :chapter_name ] , chapter_num ) } "
120
164
121
- en_file_name = "../en/chapter#{ chapter_num } .txt"
122
- ja_file_name = "../ja/chapter#{ chapter_num } .txt"
123
- # if the English file does not exist yet, just use the Japanese one as source
124
- if File . exists? ( en_file_name )
125
- blocks_en = Blocks . new ( en_file_name )
165
+ blocks_src_lang = Blocks . new ( src_lang_file_name , src_lang , false )
166
+ # if the file in the destination language does not exist yet, just use the one in the source language as source
167
+ if File . exists? ( dst_lang_file_name )
168
+ blocks_dst_lang = Blocks . new ( dst_lang_file_name , dst_lang , true )
126
169
else
127
- $tags[ 'translated by' ] = '(not translated yet)'
128
- blocks_en = Blocks . new ( ja_file_name )
170
+ puts "warning: the translation is not available for this chapter"
171
+ blocks_dst_lang = Blocks . new ( src_lang_file_name , src_lang , false )
172
+ $tags[ 'translated by' ] = Languages [ dst_lang ] [ :not_translated ]
129
173
end
130
- blocks_ja = Blocks . new ( ja_file_name )
131
-
132
- BLOCK_REGROUPING_RE = /^(h[1-9]\. |<pre|▼)/
133
174
175
+ # the following code tries to have as many blocks of text in each language
176
+ # it searches for anchors (defined by the BLOCK_REGROUPING_RE regexp) and tries to aligns the anchors in both languages
134
177
i = 0
135
178
regroup_pos = 0
136
- while i < blocks_ja . length and i < blocks_en . length
137
- block_ja = blocks_ja [ i ]
138
- block_en = blocks_en [ i ]
139
- if BLOCK_REGROUPING_RE . match ( block_ja )
140
- if BLOCK_REGROUPING_RE . match ( block_en )
141
- regroup_pos = i
142
- i += 1
179
+ while i < blocks_src_lang . length and i < blocks_dst_lang . length
180
+ block_src_lang = blocks_src_lang [ i ]
181
+ block_dst_lang = blocks_dst_lang [ i ]
182
+ if md_src = BLOCK_REGROUPING_RE . match ( block_src_lang )
183
+ if md_dst = BLOCK_REGROUPING_RE . match ( block_dst_lang )
184
+ if md_src [ 0 ] != md_dst [ 0 ]
185
+ # if the anchors found at the current position are different in the two languages,
186
+ # we search for the next anchor to know which side is the more likely to need a regroupment
187
+ next_md_src = nil
188
+ next_md_dst = nil
189
+ blocks_src_lang . each_from ( i +1 ) { |block | break if next_md_src = BLOCK_REGROUPING_RE . match ( block ) }
190
+ blocks_dst_lang . each_from ( i +1 ) { |block | break if next_md_dst = BLOCK_REGROUPING_RE . match ( block ) }
191
+ if next_md_src and next_md_src [ 0 ] == md_dst [ 0 ]
192
+ blocks_src_lang . regroup_with_following ( regroup_pos )
193
+ elsif next_md_dst and next_md_dst [ 0 ] == md_src [ 0 ]
194
+ blocks_dst_lang . regroup_with_following ( regroup_pos )
195
+ else
196
+ i += 1
197
+ regroup_pos = i
198
+ end
199
+ else
200
+ i += 1
201
+ regroup_pos = i
202
+ end
143
203
else
144
- blocks_en . regroup_with_following ( regroup_pos )
204
+ blocks_dst_lang . regroup_with_following ( regroup_pos )
145
205
end
146
- elsif BLOCK_REGROUPING_RE . match ( block_en )
147
- blocks_ja . regroup_with_following ( regroup_pos )
206
+ elsif md_dst = BLOCK_REGROUPING_RE . match ( block_dst_lang )
207
+ blocks_src_lang . regroup_with_following ( regroup_pos )
148
208
else
149
209
i += 1
150
210
end
151
211
end
152
212
153
213
# regroup the last blocks to have the same number of blocks in both
154
- blocks_en . regroup_with_following ( blocks_en . length -2 ) while blocks_ja . length < blocks_en . length
155
- blocks_ja . regroup_with_following ( blocks_ja . length -2 ) while blocks_en . length < blocks_ja . length
214
+ blocks_dst_lang . regroup_with_following ( blocks_dst_lang . length -2 ) while blocks_src_lang . length < blocks_dst_lang . length
215
+ blocks_src_lang . regroup_with_following ( blocks_src_lang . length -2 ) while blocks_dst_lang . length < blocks_src_lang . length
156
216
157
- blocks_en . each do |b |
217
+ blocks_dst_lang . each do |b |
158
218
if md = /h1\. \s *(.+)$/ . match ( b )
159
- $tags[ 'title' ] = md [ 1 ] . gsub ( /(<[^>]*>|`)/ , '' ) # remove markup and backquotes from the title
219
+ $tags[ 'title' ] = md [ 1 ] . gsub ( /(<[^>]*>|`)/ , '' ) # remove markup and backquotes from the title
160
220
break
161
- end
221
+ end
162
222
end
163
223
if not $tags[ 'title' ]
164
- STDERR . puts "error: no h1 section in source file"
224
+ STDERR . puts "error: no h1 section in the file in the destination language "
165
225
return
166
226
end
167
227
168
- File . open ( "chapter#{ chapter_num } .txt" , "w" ) do |f |
169
- f . puts "<table>"
170
- blocks_ja . length . times do |i |
171
- f . puts "<tr><td>"
172
- f . puts
173
- f . puts blocks_en [ i ]
174
- f . puts
175
- f . puts "</td><td>"
176
- f . puts
177
- f . puts blocks_ja [ i ]
178
- f . puts
179
- f . print "</td></tr>"
180
- end
181
- f . puts
182
- f . puts "</table>"
183
- end
184
-
185
- FOOTER = <<EOS
186
- <hr>
228
+ base_file_name = sprintf ( "chapter%02d_#{ src_lang } _#{ dst_lang } " , chapter_num )
229
+ html_file = "#{ base_file_name } .html"
230
+ redcloth_file = "#{ base_file_name } .redcloth.txt"
187
231
188
- The original work is Copyright © 2002 - 2004 Minero AOKI.<br />
189
- Translated by #{ $tags[ 'translated by' ] } <br />
190
- <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/2.5/"><img alt="Creative Commons License" border="0" src="images/somerights20.png"/></a><br/>This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/2.5/">Creative Commons Attribution-NonCommercial-ShareAlike2.5 License</a>.
232
+ redcloth_text = '<table>'
233
+ blocks_src_lang . length . times do |i |
234
+ redcloth_text << "<tr><td>\n \n #{ blocks_dst_lang [ i ] } \n \n </td>"
235
+ redcloth_text << "<td>\n \n #{ blocks_src_lang [ i ] } \n \n </td></tr>\n "
236
+ end
237
+ redcloth_text << "\n </table>\n "
191
238
192
- </body>
193
- </html>
194
- EOS
239
+ File . open ( redcloth_file , "w" ) do |f | f . puts redcloth_text end
195
240
196
- RedClothRules = [ :textile ]
241
+ r = RedCloth . new ( redcloth_text )
197
242
198
- generate_html ( "chapter#{ chapter_num } .html" , "chapter#{ chapter_num } .txt" )
243
+ File . open ( html_file , 'w' ) do |io |
244
+ puts "Generating '#{ $tags[ 'title' ] } ' - #{ html_file } ..."
245
+ io . write ( replace_tags ( HEADER ) )
246
+ io . write ( r . to_html )
247
+ io . write ( replace_tags ( FOOTER ) )
248
+ end
0 commit comments