Warning, /sdk/kde-dev-scripts/extractrc is written in an unsupported language. File is not indexed.
0001 #! /usr/bin/env perl 0002 0003 ### TODO: other copyrights, license? 0004 # Copyright (c) 2004 Richard Evans <rich@ridas.com> 0005 0006 sub usage 0007 { 0008 warn <<"EOF"; 0009 0010 extractrc [flags] filenames 0011 0012 This script extracts messages from designer (.ui) and XMLGUI (.rc) files and 0013 writes on standard output (usually redirected to rc.cpp) the equivalent 0014 i18n() calls so that xgettext can parse them. 0015 0016 --tag=name : Also extract the tag name(s). Repeat the flag to specify 0017 multiple names: --tag=tag_one --tag=tag_two 0018 0019 --tag-group=group : Use a group of tags - uses 'default' if omitted. 0020 Valid groups are: @{[TAG_GROUPS()]} 0021 0022 --context=name : Give i18n calls a context name: i18nc("name", ...) 0023 --lines : Include source line numbers in comments (deprecated, it is switched on by default now) 0024 --cstart=chars : Start of to-EOL style comments in output, defaults to // 0025 --language=lang : Create i18n calls appropriate for KDE bindings 0026 in the given language. Currently known languages: 0027 C++ (default), Python 0028 --ignore-no-input : Do not warn if there were no filenames specified 0029 --help|? : Display this summary 0030 --no-unescape-xml : Don't do xml unescaping 0031 0032 EOF 0033 0034 exit; 0035 } 0036 0037 ########################################################################################### 0038 0039 use strict; 0040 use warnings; 0041 use Getopt::Long; 0042 use Data::Dumper; 0043 0044 use constant TAG_GROUP => 0045 { 0046 default => "[tT][eE][xX][tT]|title|string|whatsthis|tooltip|label", 0047 koffice => "Example|GroupName|Text|Comment|Syntax|TypeName", 0048 none => "", 0049 }; 0050 0051 use constant TAG_GROUPS => join ", ", map "'$_'", sort keys %{&TAG_GROUP}; 0052 0053 # Specification to extract nice element-context for strings. 0054 use constant ECTX_SPEC => 0055 { 0056 # Data structure: extension => {tag => [ctxlevel, [attribute, ...]], ...} 0057 # Order of attributes determines their order in the extracted comment. 0058 "ui" => { 0059 "widget" => [10, ["class", "name"]], 0060 "item" => [15, []], 0061 "property" => [20, ["name"]], 0062 "attribute" => [20, ["name"]], 0063 }, 0064 "rc" => { 0065 "Menu" => [10, ["name"]], 0066 "ToolBar" => [10, ["name"]], 0067 }, 0068 "kcfg" => { 0069 "group" => [10, ["name"]], 0070 "entry" => [20, ["name"]], 0071 "whatsthis" => [30, []], 0072 "tooltip" => [30, []], 0073 "label" => [30, []], 0074 }, 0075 }; 0076 0077 # Specification to exclude strings by trailing section of element-context. 0078 use constant ECTX_EXCLUDE => 0079 [ 0080 # Data structure: [[tag, attribute, attrvalue], [...]] 0081 # Empty ("") attribute means all elements with given tag, 0082 # empty attrvalue means element with given tag and attribute of any value. 0083 [["widget", "class", "KFontComboBox"], ["item", "", ""], ["property", "", ""]], 0084 [["widget", "class", "KPushButton"], ["attribute", "name", "buttonGroup"]], 0085 [["widget", "class", "QRadioButton"], ["attribute", "name", "buttonGroup"]], 0086 [["widget", "class", "QToolButton"], ["attribute", "name", "buttonGroup"]], 0087 [["widget", "class", "QCheckBox"], ["attribute", "name", "buttonGroup"]], 0088 [["widget", "class", "QPushButton"], ["attribute", "name", "buttonGroup"]], 0089 [["widget", "class", "KTimeZoneWidget"], ["property", "name", "text"]], 0090 ]; 0091 0092 # The parts between the tags of the extensions will be copied verbatim 0093 # Same data structure as in ECTX_EXCLUDE, but per extension. 0094 my %EXTENSION_VERBATIM_TAGS = ( 0095 "kcfg" => [["code", "", ""], ["default", "code", "true"], 0096 ["min", "code", "true"], ["max", "code", "true"]], 0097 ); 0098 0099 # Add attribute lists as hashes, for membership checks. 0100 for my $ext ( keys %{&ECTX_SPEC} ) { 0101 for my $tag ( keys %{ECTX_SPEC->{$ext}} ) { 0102 my $arr = ECTX_SPEC->{$ext}{$tag}[1]; 0103 ECTX_SPEC->{$ext}{$tag}[2] = {map {$_ => 1} @{$arr}}; 0104 } 0105 } 0106 0107 ########################################################################################### 0108 # Add options here as necessary - perldoc Getopt::Long for details on GetOptions 0109 0110 GetOptions ( "tag=s" => \my @opt_extra_tags, 0111 "tag-group=s" => \my $opt_tag_group, 0112 "context=s" => \my $opt_context, # I18N context 0113 "lines" => \my $opt_lines, 0114 "cstart=s" => \my $opt_cstart, 0115 "language=s" => \my $opt_language, 0116 "ignore-no-input" => \my $opt_ignore_no_input, 0117 "no-unescape-xml" => \my $opt_no_unescape_xml, 0118 "help|?" => \&usage ); 0119 0120 unless( @ARGV ) 0121 { 0122 warn "No filename specified" unless $opt_ignore_no_input; 0123 exit; 0124 } 0125 0126 $opt_tag_group ||= "default"; 0127 0128 die "Unknown tag group: '$opt_tag_group', should be one of " . TAG_GROUPS 0129 unless exists TAG_GROUP->{$opt_tag_group}; 0130 0131 my $tags = TAG_GROUP->{$opt_tag_group}; 0132 my $extra_tags = join "", map "|" . quotemeta, @opt_extra_tags; 0133 my $text_string = qr/($tags$extra_tags)( [^>]*)?>/; # Precompile regexp 0134 my $cstart = $opt_cstart; # no default, selected by language if not given 0135 my $language = $opt_language || "C++"; 0136 my $ectx_known_exts = join "|", keys %{&ECTX_SPEC}; 0137 0138 ########################################################################################### 0139 0140 # Unescape basic XML entities. 0141 sub unescape_xml ($) { 0142 my $text = shift; 0143 0144 if (not $opt_no_unescape_xml) { 0145 $text =~ s/</</g; 0146 $text =~ s/>/>/g; 0147 $text =~ s/"/"/g; 0148 $text =~ s/&/&/g; 0149 } 0150 0151 return $text; 0152 } 0153 0154 # Convert uic to C escaping. 0155 sub escape_uic_to_c ($) { 0156 my $text = shift; 0157 0158 $text = unescape_xml($text); 0159 0160 $text =~ s/\\/\\\\/g; # escape \ 0161 $text =~ s/\"/\\\"/g; # escape " 0162 $text =~ s/\r//g; # remove CR (Carriage Return) 0163 $text =~ s/\n/\\n\"\n\"/g; # escape LF (Line Feed). uic also change the code line at a LF, we do not do that. 0164 0165 return $text; 0166 } 0167 0168 ########################################################################################### 0169 0170 sub dummy_call_infix { 0171 my ($cstart, $stend, $ctxt, $text, @cmnts) = @_; 0172 for my $cmnt (@cmnts) { 0173 print qq|$cstart $cmnt\n|; 0174 } 0175 if (defined $text) { 0176 $text = escape_uic_to_c($text); 0177 if (defined $ctxt) { 0178 $ctxt = escape_uic_to_c($ctxt); 0179 print qq|i18nc("$ctxt", "$text")$stend\n|; 0180 } else { 0181 print qq|i18n("$text")$stend\n|; 0182 } 0183 } 0184 } 0185 0186 my %dummy_calls = ( 0187 "C++" => sub { 0188 dummy_call_infix($cstart || "//", ";", @_); 0189 }, 0190 "Python" => sub { 0191 dummy_call_infix($cstart || "#", "", @_); 0192 }, 0193 ); 0194 0195 die "unknown language '$language'" if not defined $dummy_calls{$language}; 0196 my $dummy_call = $dummy_calls{$language}; 0197 0198 # Program start proper - NB $. is the current line number 0199 0200 for my $file_name ( @ARGV ) 0201 { 0202 my $fh; 0203 0204 unless ( open $fh, "<", $file_name ) 0205 { 0206 # warn "Failed to open: '$file_name': $!"; 0207 next; 0208 } 0209 0210 # Ready element-context extraction. 0211 my $ectx_ext; 0212 my $ectx_string; 0213 if ( $file_name =~ /\.($ectx_known_exts)(\.(in|cmake))?$/ ) { 0214 $ectx_ext = $1; 0215 my $ectx_tag_gr = join "|", keys %{ECTX_SPEC->{$ectx_ext}}; 0216 $ectx_string = qr/($ectx_tag_gr)( [^>]*)?>/; # precompile regexp 0217 } 0218 0219 my $string = ""; 0220 my $origstring = ""; 0221 my $in_text = 0; 0222 my $start_line_no = 0; 0223 my $in_skipped_prop = 0; 0224 my $tag = ""; 0225 my $attr = ""; 0226 my $context = ""; 0227 my $notr = ""; 0228 0229 # Element-context data: [[level, tag, [[attribute, value], ...]], ...] 0230 # such that subarrays are ordered increasing by level. 0231 my @ectx = (); 0232 0233 # All comments to pending dummy call. 0234 my @comments = (); 0235 0236 while ( <$fh> ) 0237 { 0238 if ( $. == 1 and $_ !~ /^(?:<!DOCTYPE|<\?xml|<!--|<ui version=)/ ) 0239 { 0240 print STDERR "Warning: $file_name does not have a recognised first line and texts won't be extracted\n"; 0241 last; 0242 } 0243 0244 chomp; 0245 0246 $string .= "\n" . $_; 0247 $origstring = $string; 0248 0249 # 'database', 'associations', 'populationText' and 'styleSheet' properties contain strings that shouldn't be translated 0250 0251 if ( $in_skipped_prop == 0 and $string =~ /<property name=\"(?:database|associations|populationText|styleSheet)\"/ ) 0252 { 0253 $in_skipped_prop = 1; 0254 } 0255 elsif ( $in_skipped_prop and $string =~ /<\/property/ ) 0256 { 0257 $string = ""; 0258 $in_skipped_prop = 0; 0259 } 0260 0261 $context = $opt_context unless $in_text; 0262 $notr = "" unless $in_text; 0263 0264 unless ( $in_skipped_prop or $in_text ) 0265 { 0266 # Check if this line contains context-worthy element. 0267 if ( $ectx_ext 0268 and ( ($tag, $attr) = $string =~ /<$ectx_string/ ) # no /o here 0269 and exists ECTX_SPEC->{$ectx_ext}{$tag} ) 0270 { 0271 my @atts; 0272 for my $ectx_att ( @{ECTX_SPEC->{$ectx_ext}{$tag}[1]} ) 0273 { 0274 if ( $attr and $attr =~ /\b$ectx_att\s*=\s*(["'])([^"']*?)\1/ ) 0275 { 0276 my $aval = $2; 0277 push @atts, [$ectx_att, $aval]; 0278 } 0279 } 0280 # Kill all tags in element-context with level higer or equal to this, 0281 # and add it to the end. 0282 my $clevel = ECTX_SPEC->{$ectx_ext}{$tag}[0]; 0283 for ( my $i = 0; $i < @ectx; ++$i ) 0284 { 0285 if ( $clevel <= $ectx[$i][0] ) 0286 { 0287 @ectx = @ectx[0 .. ($i - 1)]; 0288 last; 0289 } 0290 } 0291 push @ectx, [$clevel, $tag, [@atts]]; 0292 } 0293 0294 if ( ($tag, $attr) = $string =~ /<$text_string/o ) 0295 { 0296 my ($attr_comment) = $attr =~ /\bcomment=\"([^\"]*)\"/ if $attr; 0297 $context = $attr_comment if $attr_comment; 0298 my ($attr_context) = $attr =~ /\bcontext=\"([^\"]*)\"/ if $attr; 0299 $context = $attr_context if $attr_context; 0300 # It is unlikely that both attributes 'context' and 'comment' 0301 # will be present, but if so happens, 'context' has priority. 0302 my ($attr_extracomment) = $attr =~ /\bextracomment=\"([^\"]*)\"/ if $attr; 0303 push @comments, "i18n: $attr_extracomment" if $attr_extracomment; 0304 0305 my ($attr_notr) = $attr =~ /\bnotr=\"([^\"]*)\"/ if $attr; 0306 $notr = $attr_notr if $attr_notr; 0307 0308 my $nongreedystring = $string; 0309 $string =~ s/^.*<$text_string//so; 0310 $nongreedystring =~ s/^.*?<$text_string//so; 0311 if ($string cmp $nongreedystring) 0312 { 0313 print STDERR "Warning: Line $origstring in file $file_name has more than one tag to extract on the same line, that is not supported by extractrc\n"; 0314 } 0315 if ( not $attr or $attr !~ /\/ *$/ ) 0316 { 0317 $in_text = 1; 0318 $start_line_no = $.; 0319 } 0320 } 0321 else 0322 { 0323 @comments = (); 0324 $string = ""; 0325 } 0326 } 0327 0328 next unless $in_text; 0329 next unless $string =~ /<\/$text_string/o; 0330 0331 my $text = $string; 0332 $text =~ s/<\/$text_string.*$//o; 0333 0334 if ( $text cmp "" ) 0335 { 0336 # See if the string should be excluded by trailing element-context. 0337 my $exclude_by_ectx = 0; 0338 my @rev_ectx = reverse @ectx; 0339 for my $ectx_tail (@{&ECTX_EXCLUDE}) 0340 { 0341 my @rev_ectx_tail = reverse @{$ectx_tail}; 0342 my $i = 0; 0343 $exclude_by_ectx = (@rev_ectx > 0 and @rev_ectx_tail > 0); 0344 while ($i < @rev_ectx and $i < @rev_ectx_tail) 0345 { 0346 my ($tag, $attr, $aval) = @{$rev_ectx_tail[$i]}; 0347 $exclude_by_ectx = (not $tag or ($tag eq $rev_ectx[$i][1])); 0348 if ($exclude_by_ectx and $attr) 0349 { 0350 $exclude_by_ectx = 0; 0351 for my $ectx_attr_aval (@{$rev_ectx[$i][2]}) 0352 { 0353 if ($attr eq $ectx_attr_aval->[0]) 0354 { 0355 $exclude_by_ectx = $aval ? $aval eq $ectx_attr_aval->[1] : 1; 0356 last; 0357 } 0358 } 0359 } 0360 last if not $exclude_by_ectx; 0361 ++$i; 0362 } 0363 last if $exclude_by_ectx; 0364 } 0365 0366 if (($context and $context eq "KDE::DoNotExtract") or ($notr eq "true")) 0367 { 0368 push @comments, "Manually excluded message at $file_name line $."; 0369 } 0370 elsif ( $exclude_by_ectx ) 0371 { 0372 push @comments, "Automatically excluded message at $file_name line $."; 0373 } 0374 else 0375 { 0376 (my $norm_fname = $file_name) =~ s/^\.\///; 0377 push @comments, "i18n: file: $norm_fname:$."; 0378 if ( @ectx ) { 0379 # Format element-context. 0380 my @tag_gr; 0381 for my $tgr (reverse @ectx) 0382 { 0383 my @attr_gr; 0384 for my $agr ( @{$tgr->[2]} ) 0385 { 0386 #push @attr_gr, "$agr->[0]=$agr->[1]"; 0387 push @attr_gr, "$agr->[1]"; # no real nead for attribute name 0388 } 0389 my $attr = join(", ", @attr_gr); 0390 push @tag_gr, "$tgr->[1] ($attr)" if $attr; 0391 push @tag_gr, "$tgr->[1]" if not $attr; 0392 } 0393 my $ectx_str = join ", ", @tag_gr; 0394 push @comments, "i18n: ectx: $ectx_str"; 0395 } 0396 push @comments, "xgettext: no-c-format" if $text =~ /%/o; 0397 $dummy_call->($context, $text, @comments); 0398 @comments = (); 0399 } 0400 } 0401 else 0402 { 0403 push @comments, "Skipped empty message at $file_name line $."; 0404 } 0405 0406 $string =~ s/^.*<\/$text_string//o; 0407 $in_text = 0; 0408 0409 # Text can be multiline in .ui files (possibly), but we warn about it in XMLGUI .rc files. 0410 0411 warn "there is <text> floating in: '$file_name'" if $. != $start_line_no and $file_name =~ /\.rc$/i; 0412 } 0413 0414 close $fh or warn "Failed to close: '$file_name': $!"; 0415 0416 die "parsing error in $file_name" if $in_text; 0417 0418 if ($ectx_ext && exists $EXTENSION_VERBATIM_TAGS{$ectx_ext}) 0419 { 0420 unless ( open $fh, "<", $file_name ) 0421 { 0422 # warn "Failed to open: '$file_name': $!"; 0423 next; 0424 } 0425 0426 while ( <$fh> ) 0427 { 0428 chomp; 0429 $string .= "\n" . $_; 0430 0431 for my $elspec (@{ $EXTENSION_VERBATIM_TAGS{$ectx_ext} }) 0432 { 0433 my ($tag, $attr, $aval) = @{$elspec}; 0434 my $rx; 0435 if ($attr and $aval) { 0436 $rx = qr/<$tag[^<]*$attr=["']$aval["'][^<]*>(.*)<\/$tag>/s 0437 } elsif ($attr) { 0438 $rx = qr/<$tag[^<]*$attr=[^<]*>(.*)<\/$tag>/s 0439 } else { 0440 $rx = qr/<$tag>(.*)<\/$tag>/s 0441 } 0442 if ($string =~ $rx) 0443 { 0444 # Add comment before any line that has an i18n substring in it. 0445 my @matched = split /\n/, $1; 0446 my $mlno = $.; 0447 (my $norm_fname = $file_name) =~ s/^\.\///; 0448 for my $mline (@matched) { 0449 # Assume verbatim code is in language given by --language. 0450 # Therefore format only comment, and write code line as-is. 0451 if ($mline =~ /i18n/) { 0452 $dummy_call->(undef, undef, ("i18n: file: $norm_fname:$mlno")); 0453 } 0454 $mline = unescape_xml($mline); 0455 print "$mline\n"; 0456 ++$mlno; 0457 } 0458 $string = ""; 0459 } 0460 } 0461 } 0462 0463 close $fh or warn "Failed to close: '$file_name': $!"; 0464 } 0465 }