Warning, /sdk/kde-dev-scripts/extractrc is written in an unsupported language. File is not indexed.
0001 #! /usr/bin/env perl
0002
0003 ### TODO: other copyrights, license?
0004 # Copyright (c) 2004 Richard Evans <rich@ridas.com>
0005
0006 sub usage
0007 {
0008 warn <<"EOF";
0009
0010 extractrc [flags] filenames
0011
0012 This script extracts messages from designer (.ui) and XMLGUI (.rc) files and
0013 writes on standard output (usually redirected to rc.cpp) the equivalent
0014 i18n() calls so that xgettext can parse them.
0015
0016 --tag=name : Also extract the tag name(s). Repeat the flag to specify
0017 multiple names: --tag=tag_one --tag=tag_two
0018
0019 --tag-group=group : Use a group of tags - uses 'default' if omitted.
0020 Valid groups are: @{[TAG_GROUPS()]}
0021
0022 --context=name : Give i18n calls a context name: i18nc("name", ...)
0023 --lines : Include source line numbers in comments (deprecated, it is switched on by default now)
0024 --cstart=chars : Start of to-EOL style comments in output, defaults to //
0025 --language=lang : Create i18n calls appropriate for KDE bindings
0026 in the given language. Currently known languages:
0027 C++ (default), Python
0028 --ignore-no-input : Do not warn if there were no filenames specified
0029 --help|? : Display this summary
0030 --no-unescape-xml : Don't do xml unescaping
0031
0032 EOF
0033
0034 exit;
0035 }
0036
0037 ###########################################################################################
0038
0039 use strict;
0040 use warnings;
0041 use Getopt::Long;
0042 use Data::Dumper;
0043
0044 use constant TAG_GROUP =>
0045 {
0046 default => "[tT][eE][xX][tT]|title|string|whatsthis|tooltip|label",
0047 koffice => "Example|GroupName|Text|Comment|Syntax|TypeName",
0048 none => "",
0049 };
0050
0051 use constant TAG_GROUPS => join ", ", map "'$_'", sort keys %{&TAG_GROUP};
0052
0053 # Specification to extract nice element-context for strings.
0054 use constant ECTX_SPEC =>
0055 {
0056 # Data structure: extension => {tag => [ctxlevel, [attribute, ...]], ...}
0057 # Order of attributes determines their order in the extracted comment.
0058 "ui" => {
0059 "widget" => [10, ["class", "name"]],
0060 "item" => [15, []],
0061 "property" => [20, ["name"]],
0062 "attribute" => [20, ["name"]],
0063 },
0064 "rc" => {
0065 "Menu" => [10, ["name"]],
0066 "ToolBar" => [10, ["name"]],
0067 },
0068 "kcfg" => {
0069 "group" => [10, ["name"]],
0070 "entry" => [20, ["name"]],
0071 "whatsthis" => [30, []],
0072 "tooltip" => [30, []],
0073 "label" => [30, []],
0074 },
0075 };
0076
0077 # Specification to exclude strings by trailing section of element-context.
0078 use constant ECTX_EXCLUDE =>
0079 [
0080 # Data structure: [[tag, attribute, attrvalue], [...]]
0081 # Empty ("") attribute means all elements with given tag,
0082 # empty attrvalue means element with given tag and attribute of any value.
0083 [["widget", "class", "KFontComboBox"], ["item", "", ""], ["property", "", ""]],
0084 [["widget", "class", "KPushButton"], ["attribute", "name", "buttonGroup"]],
0085 [["widget", "class", "QRadioButton"], ["attribute", "name", "buttonGroup"]],
0086 [["widget", "class", "QToolButton"], ["attribute", "name", "buttonGroup"]],
0087 [["widget", "class", "QCheckBox"], ["attribute", "name", "buttonGroup"]],
0088 [["widget", "class", "QPushButton"], ["attribute", "name", "buttonGroup"]],
0089 [["widget", "class", "KTimeZoneWidget"], ["property", "name", "text"]],
0090 ];
0091
0092 # The parts between the tags of the extensions will be copied verbatim
0093 # Same data structure as in ECTX_EXCLUDE, but per extension.
0094 my %EXTENSION_VERBATIM_TAGS = (
0095 "kcfg" => [["code", "", ""], ["default", "code", "true"],
0096 ["min", "code", "true"], ["max", "code", "true"]],
0097 );
0098
0099 # Add attribute lists as hashes, for membership checks.
0100 for my $ext ( keys %{&ECTX_SPEC} ) {
0101 for my $tag ( keys %{ECTX_SPEC->{$ext}} ) {
0102 my $arr = ECTX_SPEC->{$ext}{$tag}[1];
0103 ECTX_SPEC->{$ext}{$tag}[2] = {map {$_ => 1} @{$arr}};
0104 }
0105 }
0106
0107 ###########################################################################################
0108 # Add options here as necessary - perldoc Getopt::Long for details on GetOptions
0109
0110 GetOptions ( "tag=s" => \my @opt_extra_tags,
0111 "tag-group=s" => \my $opt_tag_group,
0112 "context=s" => \my $opt_context, # I18N context
0113 "lines" => \my $opt_lines,
0114 "cstart=s" => \my $opt_cstart,
0115 "language=s" => \my $opt_language,
0116 "ignore-no-input" => \my $opt_ignore_no_input,
0117 "no-unescape-xml" => \my $opt_no_unescape_xml,
0118 "help|?" => \&usage );
0119
0120 unless( @ARGV )
0121 {
0122 warn "No filename specified" unless $opt_ignore_no_input;
0123 exit;
0124 }
0125
0126 $opt_tag_group ||= "default";
0127
0128 die "Unknown tag group: '$opt_tag_group', should be one of " . TAG_GROUPS
0129 unless exists TAG_GROUP->{$opt_tag_group};
0130
0131 my $tags = TAG_GROUP->{$opt_tag_group};
0132 my $extra_tags = join "", map "|" . quotemeta, @opt_extra_tags;
0133 my $text_string = qr/($tags$extra_tags)( [^>]*)?>/; # Precompile regexp
0134 my $cstart = $opt_cstart; # no default, selected by language if not given
0135 my $language = $opt_language || "C++";
0136 my $ectx_known_exts = join "|", keys %{&ECTX_SPEC};
0137
0138 ###########################################################################################
0139
0140 # Unescape basic XML entities.
0141 sub unescape_xml ($) {
0142 my $text = shift;
0143
0144 if (not $opt_no_unescape_xml) {
0145 $text =~ s/</</g;
0146 $text =~ s/>/>/g;
0147 $text =~ s/"/"/g;
0148 $text =~ s/&/&/g;
0149 }
0150
0151 return $text;
0152 }
0153
0154 # Convert uic to C escaping.
0155 sub escape_uic_to_c ($) {
0156 my $text = shift;
0157
0158 $text = unescape_xml($text);
0159
0160 $text =~ s/\\/\\\\/g; # escape \
0161 $text =~ s/\"/\\\"/g; # escape "
0162 $text =~ s/\r//g; # remove CR (Carriage Return)
0163 $text =~ s/\n/\\n\"\n\"/g; # escape LF (Line Feed). uic also change the code line at a LF, we do not do that.
0164
0165 return $text;
0166 }
0167
0168 ###########################################################################################
0169
0170 sub dummy_call_infix {
0171 my ($cstart, $stend, $ctxt, $text, @cmnts) = @_;
0172 for my $cmnt (@cmnts) {
0173 print qq|$cstart $cmnt\n|;
0174 }
0175 if (defined $text) {
0176 $text = escape_uic_to_c($text);
0177 if (defined $ctxt) {
0178 $ctxt = escape_uic_to_c($ctxt);
0179 print qq|i18nc("$ctxt", "$text")$stend\n|;
0180 } else {
0181 print qq|i18n("$text")$stend\n|;
0182 }
0183 }
0184 }
0185
0186 my %dummy_calls = (
0187 "C++" => sub {
0188 dummy_call_infix($cstart || "//", ";", @_);
0189 },
0190 "Python" => sub {
0191 dummy_call_infix($cstart || "#", "", @_);
0192 },
0193 );
0194
0195 die "unknown language '$language'" if not defined $dummy_calls{$language};
0196 my $dummy_call = $dummy_calls{$language};
0197
0198 # Program start proper - NB $. is the current line number
0199
0200 for my $file_name ( @ARGV )
0201 {
0202 my $fh;
0203
0204 unless ( open $fh, "<", $file_name )
0205 {
0206 # warn "Failed to open: '$file_name': $!";
0207 next;
0208 }
0209
0210 # Ready element-context extraction.
0211 my $ectx_ext;
0212 my $ectx_string;
0213 if ( $file_name =~ /\.($ectx_known_exts)(\.(in|cmake))?$/ ) {
0214 $ectx_ext = $1;
0215 my $ectx_tag_gr = join "|", keys %{ECTX_SPEC->{$ectx_ext}};
0216 $ectx_string = qr/($ectx_tag_gr)( [^>]*)?>/; # precompile regexp
0217 }
0218
0219 my $string = "";
0220 my $origstring = "";
0221 my $in_text = 0;
0222 my $start_line_no = 0;
0223 my $in_skipped_prop = 0;
0224 my $tag = "";
0225 my $attr = "";
0226 my $context = "";
0227 my $notr = "";
0228
0229 # Element-context data: [[level, tag, [[attribute, value], ...]], ...]
0230 # such that subarrays are ordered increasing by level.
0231 my @ectx = ();
0232
0233 # All comments to pending dummy call.
0234 my @comments = ();
0235
0236 while ( <$fh> )
0237 {
0238 if ( $. == 1 and $_ !~ /^(?:<!DOCTYPE|<\?xml|<!--|<ui version=)/ )
0239 {
0240 print STDERR "Warning: $file_name does not have a recognised first line and texts won't be extracted\n";
0241 last;
0242 }
0243
0244 chomp;
0245
0246 $string .= "\n" . $_;
0247 $origstring = $string;
0248
0249 # 'database', 'associations', 'populationText' and 'styleSheet' properties contain strings that shouldn't be translated
0250
0251 if ( $in_skipped_prop == 0 and $string =~ /<property name=\"(?:database|associations|populationText|styleSheet)\"/ )
0252 {
0253 $in_skipped_prop = 1;
0254 }
0255 elsif ( $in_skipped_prop and $string =~ /<\/property/ )
0256 {
0257 $string = "";
0258 $in_skipped_prop = 0;
0259 }
0260
0261 $context = $opt_context unless $in_text;
0262 $notr = "" unless $in_text;
0263
0264 unless ( $in_skipped_prop or $in_text )
0265 {
0266 # Check if this line contains context-worthy element.
0267 if ( $ectx_ext
0268 and ( ($tag, $attr) = $string =~ /<$ectx_string/ ) # no /o here
0269 and exists ECTX_SPEC->{$ectx_ext}{$tag} )
0270 {
0271 my @atts;
0272 for my $ectx_att ( @{ECTX_SPEC->{$ectx_ext}{$tag}[1]} )
0273 {
0274 if ( $attr and $attr =~ /\b$ectx_att\s*=\s*(["'])([^"']*?)\1/ )
0275 {
0276 my $aval = $2;
0277 push @atts, [$ectx_att, $aval];
0278 }
0279 }
0280 # Kill all tags in element-context with level higer or equal to this,
0281 # and add it to the end.
0282 my $clevel = ECTX_SPEC->{$ectx_ext}{$tag}[0];
0283 for ( my $i = 0; $i < @ectx; ++$i )
0284 {
0285 if ( $clevel <= $ectx[$i][0] )
0286 {
0287 @ectx = @ectx[0 .. ($i - 1)];
0288 last;
0289 }
0290 }
0291 push @ectx, [$clevel, $tag, [@atts]];
0292 }
0293
0294 if ( ($tag, $attr) = $string =~ /<$text_string/o )
0295 {
0296 my ($attr_comment) = $attr =~ /\bcomment=\"([^\"]*)\"/ if $attr;
0297 $context = $attr_comment if $attr_comment;
0298 my ($attr_context) = $attr =~ /\bcontext=\"([^\"]*)\"/ if $attr;
0299 $context = $attr_context if $attr_context;
0300 # It is unlikely that both attributes 'context' and 'comment'
0301 # will be present, but if so happens, 'context' has priority.
0302 my ($attr_extracomment) = $attr =~ /\bextracomment=\"([^\"]*)\"/ if $attr;
0303 push @comments, "i18n: $attr_extracomment" if $attr_extracomment;
0304
0305 my ($attr_notr) = $attr =~ /\bnotr=\"([^\"]*)\"/ if $attr;
0306 $notr = $attr_notr if $attr_notr;
0307
0308 my $nongreedystring = $string;
0309 $string =~ s/^.*<$text_string//so;
0310 $nongreedystring =~ s/^.*?<$text_string//so;
0311 if ($string cmp $nongreedystring)
0312 {
0313 print STDERR "Warning: Line $origstring in file $file_name has more than one tag to extract on the same line, that is not supported by extractrc\n";
0314 }
0315 if ( not $attr or $attr !~ /\/ *$/ )
0316 {
0317 $in_text = 1;
0318 $start_line_no = $.;
0319 }
0320 }
0321 else
0322 {
0323 @comments = ();
0324 $string = "";
0325 }
0326 }
0327
0328 next unless $in_text;
0329 next unless $string =~ /<\/$text_string/o;
0330
0331 my $text = $string;
0332 $text =~ s/<\/$text_string.*$//o;
0333
0334 if ( $text cmp "" )
0335 {
0336 # See if the string should be excluded by trailing element-context.
0337 my $exclude_by_ectx = 0;
0338 my @rev_ectx = reverse @ectx;
0339 for my $ectx_tail (@{&ECTX_EXCLUDE})
0340 {
0341 my @rev_ectx_tail = reverse @{$ectx_tail};
0342 my $i = 0;
0343 $exclude_by_ectx = (@rev_ectx > 0 and @rev_ectx_tail > 0);
0344 while ($i < @rev_ectx and $i < @rev_ectx_tail)
0345 {
0346 my ($tag, $attr, $aval) = @{$rev_ectx_tail[$i]};
0347 $exclude_by_ectx = (not $tag or ($tag eq $rev_ectx[$i][1]));
0348 if ($exclude_by_ectx and $attr)
0349 {
0350 $exclude_by_ectx = 0;
0351 for my $ectx_attr_aval (@{$rev_ectx[$i][2]})
0352 {
0353 if ($attr eq $ectx_attr_aval->[0])
0354 {
0355 $exclude_by_ectx = $aval ? $aval eq $ectx_attr_aval->[1] : 1;
0356 last;
0357 }
0358 }
0359 }
0360 last if not $exclude_by_ectx;
0361 ++$i;
0362 }
0363 last if $exclude_by_ectx;
0364 }
0365
0366 if (($context and $context eq "KDE::DoNotExtract") or ($notr eq "true"))
0367 {
0368 push @comments, "Manually excluded message at $file_name line $.";
0369 }
0370 elsif ( $exclude_by_ectx )
0371 {
0372 push @comments, "Automatically excluded message at $file_name line $.";
0373 }
0374 else
0375 {
0376 (my $norm_fname = $file_name) =~ s/^\.\///;
0377 push @comments, "i18n: file: $norm_fname:$.";
0378 if ( @ectx ) {
0379 # Format element-context.
0380 my @tag_gr;
0381 for my $tgr (reverse @ectx)
0382 {
0383 my @attr_gr;
0384 for my $agr ( @{$tgr->[2]} )
0385 {
0386 #push @attr_gr, "$agr->[0]=$agr->[1]";
0387 push @attr_gr, "$agr->[1]"; # no real nead for attribute name
0388 }
0389 my $attr = join(", ", @attr_gr);
0390 push @tag_gr, "$tgr->[1] ($attr)" if $attr;
0391 push @tag_gr, "$tgr->[1]" if not $attr;
0392 }
0393 my $ectx_str = join ", ", @tag_gr;
0394 push @comments, "i18n: ectx: $ectx_str";
0395 }
0396 push @comments, "xgettext: no-c-format" if $text =~ /%/o;
0397 $dummy_call->($context, $text, @comments);
0398 @comments = ();
0399 }
0400 }
0401 else
0402 {
0403 push @comments, "Skipped empty message at $file_name line $.";
0404 }
0405
0406 $string =~ s/^.*<\/$text_string//o;
0407 $in_text = 0;
0408
0409 # Text can be multiline in .ui files (possibly), but we warn about it in XMLGUI .rc files.
0410
0411 warn "there is <text> floating in: '$file_name'" if $. != $start_line_no and $file_name =~ /\.rc$/i;
0412 }
0413
0414 close $fh or warn "Failed to close: '$file_name': $!";
0415
0416 die "parsing error in $file_name" if $in_text;
0417
0418 if ($ectx_ext && exists $EXTENSION_VERBATIM_TAGS{$ectx_ext})
0419 {
0420 unless ( open $fh, "<", $file_name )
0421 {
0422 # warn "Failed to open: '$file_name': $!";
0423 next;
0424 }
0425
0426 while ( <$fh> )
0427 {
0428 chomp;
0429 $string .= "\n" . $_;
0430
0431 for my $elspec (@{ $EXTENSION_VERBATIM_TAGS{$ectx_ext} })
0432 {
0433 my ($tag, $attr, $aval) = @{$elspec};
0434 my $rx;
0435 if ($attr and $aval) {
0436 $rx = qr/<$tag[^<]*$attr=["']$aval["'][^<]*>(.*)<\/$tag>/s
0437 } elsif ($attr) {
0438 $rx = qr/<$tag[^<]*$attr=[^<]*>(.*)<\/$tag>/s
0439 } else {
0440 $rx = qr/<$tag>(.*)<\/$tag>/s
0441 }
0442 if ($string =~ $rx)
0443 {
0444 # Add comment before any line that has an i18n substring in it.
0445 my @matched = split /\n/, $1;
0446 my $mlno = $.;
0447 (my $norm_fname = $file_name) =~ s/^\.\///;
0448 for my $mline (@matched) {
0449 # Assume verbatim code is in language given by --language.
0450 # Therefore format only comment, and write code line as-is.
0451 if ($mline =~ /i18n/) {
0452 $dummy_call->(undef, undef, ("i18n: file: $norm_fname:$mlno"));
0453 }
0454 $mline = unescape_xml($mline);
0455 print "$mline\n";
0456 ++$mlno;
0457 }
0458 $string = "";
0459 }
0460 }
0461 }
0462
0463 close $fh or warn "Failed to close: '$file_name': $!";
0464 }
0465 }