File indexing completed on 2024-11-10 04:07:30
0001 #! /usr/bin/env perl 0002 # 0003 # SPDX-FileCopyrightText: 2004 Richard Evans <rich@ridas.com> 0004 # 0005 # SPDX-License-Identifier: GPL-3.0-or-later 0006 # 0007 0008 0009 sub usage 0010 { 0011 warn <<"EOF"; 0012 0013 extractrc [flags] filenames 0014 0015 This script extracts messages from designer (.ui) and XMLGUI (.rc) files and 0016 writes on standard output (usually redirected to rc.cpp) the equivalent 0017 i18n() calls so that xgettext can parse them. 0018 0019 --tag=name : Also extract the tag name(s). Repeat the flag to specify 0020 multiple names: --tag=tag_one --tag=tag_two 0021 0022 --tag-group=group : Use a group of tags - uses 'default' if omitted. 0023 Valid groups are: @{[TAG_GROUPS()]} 0024 0025 --context=name : Give i18n calls a context name: i18nc("name", ...) 0026 --lines : Include source line numbers in comments (deprecated, it is switched on by default now) 0027 --cstart=chars : Start of to-EOL style comments in output, defaults to // 0028 --language=lang : Create i18n calls appropriate for KDE bindings 0029 in the given language. Currently known languages: 0030 C++ (default), Python 0031 --ignore-no-input : Do not warn if there were no filenames specified 0032 --help|? : Display this summary 0033 --no-unescape-xml : Don't do xml unescaping 0034 0035 EOF 0036 0037 exit; 0038 } 0039 0040 ########################################################################################### 0041 0042 use strict; 0043 use warnings; 0044 use Getopt::Long; 0045 use Data::Dumper; # Provides debugging command: print Dumper(\%hash); 0046 0047 use constant TAG_GROUP => 0048 { 0049 krita => "[tT][eE][xX][tT]|title|string|whatsThis|toolTip|iconText", 0050 none => "", 0051 }; 0052 0053 use constant TAG_GROUPS => join ", ", map "'$_'", sort keys %{&TAG_GROUP}; 0054 0055 # Specification to extract nice element-context for strings. 0056 use constant CONTEXT_SPEC => 0057 { 0058 # Data structure: extension => {tag => [ctxlevel, [attribute, ...]], ...} 0059 # Order of attributes determines their order in the extracted comment. 0060 "ui" => { 0061 "widget" => [10, ["class", "name"]], 0062 "item" => [15, []], 0063 "property" => [20, ["name"]], 0064 "attribute" => [20, ["name"]], 0065 }, 0066 "rc" => { 0067 "Menu" => [10, ["name"]], 0068 "ToolBar" => [10, ["name"]], 0069 }, 0070 "kcfg" => { 0071 "group" => [10, ["name"]], 0072 "entry" => [20, ["name"]], 0073 "whatsthis" => [30, []], 0074 "tooltip" => [30, []], 0075 "label" => [30, []], 0076 }, 0077 "action" => { 0078 "ActionCollection" => [10, ["name"]], 0079 "Actions" => [20, ["category"]], 0080 "Action" => [30, ["name"]], 0081 } 0082 }; 0083 0084 # Specification to exclude strings by trailing section of element-context. 0085 use constant CONTEXT_EXCLUDE => 0086 [ 0087 # Data structure: [[tag, attribute, attrvalue], [...]] 0088 # Empty ("") attribute means all elements with given tag, 0089 # empty attrvalue means element with given tag and attribute of any value. 0090 [["widget", "class", "KFontComboBox"], ["item", "", ""], ["property", "", ""]], 0091 [["widget", "class", "KPushButton"], ["attribute", "name", "buttonGroup"]], 0092 [["widget", "class", "QRadioButton"], ["attribute", "name", "buttonGroup"]], 0093 [["widget", "class", "QToolButton"], ["attribute", "name", "buttonGroup"]], 0094 [["widget", "class", "QCheckBox"], ["attribute", "name", "buttonGroup"]], 0095 [["widget", "class", "QPushButton"], ["attribute", "name", "buttonGroup"]], 0096 [["widget", "class", "KTimeZoneWidget"], ["property", "name", "text"]], 0097 ]; 0098 0099 # The parts between the tags of the extensions will be copied verbatim 0100 # Same data structure as in CONTEXT_EXCLUDE, but per extension. 0101 my %EXTENSION_VERBATIM_TAGS = ( 0102 "kcfg" => [["code", "", ""], ["default", "code", "true"], 0103 ["min", "code", "true"], ["max", "code", "true"]], 0104 ); 0105 0106 # Add attribute lists as hashes, for membership checks. 0107 for my $ext ( keys %{&CONTEXT_SPEC} ) { 0108 for my $tag ( keys %{CONTEXT_SPEC->{$ext}} ) { 0109 my $arr = CONTEXT_SPEC->{$ext}{$tag}[1]; 0110 CONTEXT_SPEC->{$ext}{$tag}[2] = {map {$_ => 1} @{$arr}}; 0111 } 0112 } 0113 0114 ########################################################################################### 0115 # Add options here as necessary - perldoc Getopt::Long for details on GetOptions 0116 0117 GetOptions ( "tag=s" => \my @opt_extra_tags, 0118 "tag-group=s" => \my $opt_tag_group, 0119 "context=s" => \my $opt_context, # I18N context 0120 "lines" => \my $opt_lines, 0121 "cstart=s" => \my $opt_cstart, 0122 "language=s" => \my $opt_language, 0123 "ignore-no-input" => \my $opt_ignore_no_input, 0124 "no-unescape-xml" => \my $opt_no_unescape_xml, 0125 "help|?" => \&usage ); 0126 0127 unless( @ARGV ) 0128 { 0129 warn "No filename specified" unless $opt_ignore_no_input; 0130 exit; 0131 } 0132 0133 $opt_tag_group ||= "krita"; 0134 0135 die "Unknown tag group: '$opt_tag_group', should be one of " . TAG_GROUPS 0136 unless exists TAG_GROUP->{$opt_tag_group}; 0137 0138 my $tags = TAG_GROUP->{$opt_tag_group}; 0139 my $extra_tags = join "", map "|" . quotemeta, @opt_extra_tags; 0140 my $text_string = qr/($tags$extra_tags)( [^>]*)?>/; # Precompile regexp 0141 my $cstart = $opt_cstart; # no default, selected by language if not given 0142 my $language = $opt_language || "C++"; 0143 my $context_known_exts = join "|", keys %{&CONTEXT_SPEC}; 0144 0145 ########################################################################################### 0146 0147 # Unescape basic XML entities. 0148 sub unescape_xml ($) { 0149 my $text = shift; 0150 0151 if (not $opt_no_unescape_xml) { 0152 $text =~ s/</</g; 0153 $text =~ s/>/>/g; 0154 $text =~ s/&/&/g; 0155 $text =~ s/"/"/g; 0156 } 0157 0158 return $text; 0159 } 0160 0161 # Convert uic to C escaping. 0162 sub escape_uic_to_c ($) { 0163 my $text = shift; 0164 0165 $text = unescape_xml($text); 0166 0167 $text =~ s/\\/\\\\/g; # escape \ 0168 $text =~ s/\"/\\\"/g; # escape " 0169 $text =~ s/\r//g; # remove CR (Carriage Return) 0170 $text =~ s/\n/\\n\"\n\"/g; # escape LF (Line Feed). uic also change the code line at a LF, we do not do that. 0171 0172 return $text; 0173 } 0174 0175 ########################################################################################### 0176 0177 sub dummy_call_infix { 0178 my ($cstart, $stend, $ctxt, $text, @cmnts) = @_; 0179 for my $cmnt (@cmnts) { 0180 print qq|$cstart $cmnt\n|; 0181 } 0182 if (defined $text) { 0183 $text = escape_uic_to_c($text); 0184 if (defined $ctxt) { 0185 $ctxt = escape_uic_to_c($ctxt); 0186 print qq|i18nc("$ctxt", "$text")$stend\n|; 0187 } else { 0188 print qq|i18n("$text")$stend\n|; 0189 } 0190 } 0191 } 0192 0193 my %dummy_calls = ( 0194 "C++" => sub { 0195 dummy_call_infix($cstart || "//", ";", @_); 0196 }, 0197 "Python" => sub { 0198 dummy_call_infix($cstart || "#", "", @_); 0199 }, 0200 ); 0201 0202 die "unknown language '$language'" if not defined $dummy_calls{$language}; 0203 my $dummy_call = $dummy_calls{$language}; 0204 0205 # Program start proper - outer loop runs once for each file in the argument list. 0206 for my $file_name ( @ARGV ) 0207 { 0208 my $fh; 0209 0210 unless ( open $fh, "<", $file_name ) 0211 { 0212 # warn "Failed to open: '$file_name': $!"; 0213 next; 0214 } 0215 0216 # Ready element-context extraction. 0217 my $context_ext; 0218 my $context_string; # Regexp used to validate context 0219 if ( $file_name =~ /\.($context_known_exts)(\.(in|cmake))?$/ ) { 0220 $context_ext = $1; 0221 my $context_tag_gr = join "|", keys %{CONTEXT_SPEC->{$context_ext}}; 0222 $context_string = qr/($context_tag_gr)( [^>]*)?>/; # precompile regexp 0223 } 0224 0225 my $string = ""; 0226 my $origstring = ""; 0227 my $in_text = 0; # Are we currently inside a block of raw text? 0228 my $start_line_no = 0; 0229 my $in_skipped_prop = 0; # Are we currently inside XML property that shouldn't be translated? 0230 my $tag = ""; 0231 my $attr = ""; 0232 my $context = ""; 0233 my $notr = ""; 0234 0235 # Element-context data: [[level, tag, [[attribute, value], ...]], ...] 0236 # such that subarrays are ordered increasing by level. 0237 my @context = (); 0238 0239 # All comments to pending dummy call. 0240 my @comments = (); 0241 0242 # Begin looping through the file 0243 while ( <$fh> ) 0244 { 0245 # If your Perl is a bit rusty: $. is the current line number 0246 # Also, =~ and !~ are pattern-matching operators. :) 0247 if ( $. == 1 and $_ !~ /^(?:<!DOCTYPE|<\?xml|<!--|<ui version=)/ ) 0248 { 0249 print STDERR "Warning: $file_name does not have a recognised first line and texts won't be extracted\n"; 0250 last; 0251 } 0252 0253 chomp; 0254 0255 $string .= "\n" . $_; 0256 $origstring = $string; 0257 0258 # 'database', 'associations', 'populationText' and 'styleSheet' properties contain strings that shouldn't be translated 0259 if ( $in_skipped_prop == 0 and $string =~ /<property name=\"(?:database|associations|populationText|styleSheet)\"/ ) 0260 { 0261 $in_skipped_prop = 1; 0262 } 0263 elsif ( $in_skipped_prop and $string =~ /<\/property/ ) 0264 { 0265 $string = ""; 0266 $in_skipped_prop = 0; 0267 } 0268 0269 $context = $opt_context unless $in_text; 0270 $notr = "" unless $in_text; 0271 0272 # print "context = " . $opt_context . "\n"; 0273 0274 unless ( $in_skipped_prop or $in_text ) 0275 { 0276 # Check if this line contains context-worthy element. 0277 if ( $context_ext 0278 and ( ($tag, $attr) = $string =~ /<$context_string/ ) # no /o here 0279 and exists CONTEXT_SPEC->{$context_ext}{$tag} ) 0280 { 0281 my @atts; 0282 for my $context_att ( @{CONTEXT_SPEC->{$context_ext}{$tag}[1]} ) 0283 { 0284 if ( $attr and $attr =~ /\b$context_att\s*=\s*(["'])([^"']*?)\1/ ) 0285 { 0286 my $aval = $2; 0287 push @atts, [$context_att, $aval]; 0288 } 0289 } 0290 # Kill all tags in element-context with level higher or equal to this, 0291 # and add it to the end. 0292 my $clevel = CONTEXT_SPEC->{$context_ext}{$tag}[0]; 0293 for ( my $i = 0; $i < @context; ++$i ) 0294 { 0295 if ( $clevel <= $context[$i][0] ) 0296 { 0297 @context = @context[0 .. ($i - 1)]; 0298 last; 0299 } 0300 } 0301 push @context, [$clevel, $tag, [@atts]]; 0302 } 0303 0304 if ( ($tag, $attr) = $string =~ /<$text_string/o ) 0305 { 0306 # Only treat "context" as the gettext i18n context, not "comment". 0307 my ($attr_context) = $attr =~ /\bcontext=\"([^\"]*)\"/ if $attr; 0308 $context = $attr_context if $attr_context; 0309 my ($attr_extracomment) = $attr =~ /\bextracomment=\"([^\"]*)\"/ if $attr; 0310 push @comments, "i18n: $attr_extracomment" if $attr_extracomment; 0311 0312 my ($attr_notr) = $attr =~ /\bnotr=\"([^\"]*)\"/ if $attr; 0313 $notr = $attr_notr if $attr_notr; 0314 0315 my $nongreedystring = $string; 0316 $string =~ s/^.*<$text_string//so; 0317 $nongreedystring =~ s/^.*?<$text_string//so; 0318 if ($string cmp $nongreedystring) 0319 { 0320 print STDERR "Warning: Line $origstring in file $file_name has more than one tag to extract on the same line, that is not supported by extractrc\n"; 0321 } 0322 if ( not $attr or $attr !~ /\/ *$/ ) 0323 { 0324 $in_text = 1; 0325 $start_line_no = $.; 0326 } 0327 } 0328 else 0329 { 0330 @comments = (); 0331 $string = ""; 0332 } 0333 } 0334 0335 next unless $in_text; 0336 next unless $string =~ /<\/$text_string/o; 0337 0338 my $text = $string; 0339 $text =~ s/<\/$text_string.*$//o; 0340 0341 if ( $text cmp "" ) 0342 { 0343 # See if the string should be excluded by trailing element-context. 0344 my $exclude_by_context = 0; 0345 my @rev_context = reverse @context; 0346 for my $context_tail (@{&CONTEXT_EXCLUDE}) 0347 { 0348 my @rev_context_tail = reverse @{$context_tail}; 0349 my $i = 0; 0350 $exclude_by_context = (@rev_context > 0 and @rev_context_tail > 0); 0351 while ($i < @rev_context and $i < @rev_context_tail) 0352 { 0353 my ($tag, $attr, $aval) = @{$rev_context_tail[$i]}; 0354 $exclude_by_context = (not $tag or ($tag eq $rev_context[$i][1])); 0355 if ($exclude_by_context and $attr) 0356 { 0357 $exclude_by_context = 0; 0358 for my $context_attr_aval (@{$rev_context[$i][2]}) 0359 { 0360 if ($attr eq $context_attr_aval->[0]) 0361 { 0362 $exclude_by_context = $aval ? $aval eq $context_attr_aval->[1] : 1; 0363 last; 0364 } 0365 } 0366 } 0367 last if not $exclude_by_context; 0368 ++$i; 0369 } 0370 last if $exclude_by_context; 0371 } 0372 0373 if (($context and $context eq "KDE::DoNotExtract") or ($notr eq "true")) 0374 { 0375 push @comments, "Manually excluded message at $file_name line $."; 0376 } 0377 elsif ( $exclude_by_context ) 0378 { 0379 push @comments, "Automatically excluded message at $file_name line $."; 0380 } 0381 else 0382 { 0383 # Write everything to file 0384 (my $clean_file_name = $file_name) =~ s/^\.\///; 0385 push @comments, "i18n: file: $clean_file_name:$."; 0386 if ( @context ) { 0387 # Format element-context. 0388 my @tag_gr; 0389 for my $tgr (reverse @context) 0390 { 0391 my @attr_gr; 0392 for my $agr ( @{$tgr->[2]} ) 0393 { 0394 #push @attr_gr, "$agr->[0]=$agr->[1]"; 0395 push @attr_gr, "$agr->[1]"; # no real need for attribute name 0396 } 0397 my $attr = join(", ", @attr_gr); 0398 push @tag_gr, "$tgr->[1] ($attr)" if $attr; 0399 push @tag_gr, "$tgr->[1]" if not $attr; 0400 } 0401 my $context_str = join ", ", @tag_gr; 0402 push @comments, "i18n: context: $tag @ $context_str"; 0403 } 0404 push @comments, "xgettext: no-c-format" if $text =~ /%/o; 0405 $dummy_call->($context, $text, @comments); 0406 @comments = (); 0407 } 0408 } 0409 else 0410 { 0411 push @comments, "Skipped empty message at $file_name line $."; 0412 } 0413 0414 $string =~ s/^.*<\/$text_string//o; 0415 $in_text = 0; 0416 0417 # Text can be multiline in .ui files (possibly), but we warn about it in XMLGUI .rc files. 0418 0419 warn "there is <text> floating in: '$file_name'" if $. != $start_line_no and $file_name =~ /\.rc$/i; 0420 } 0421 0422 close $fh or warn "Failed to close: '$file_name': $!"; 0423 0424 die "parsing error in $file_name" if $in_text; 0425 0426 if ($context_ext && exists $EXTENSION_VERBATIM_TAGS{$context_ext}) 0427 { 0428 unless ( open $fh, "<", $file_name ) 0429 { 0430 # warn "Failed to open: '$file_name': $!"; 0431 next; 0432 } 0433 0434 while ( <$fh> ) 0435 { 0436 chomp; 0437 $string .= "\n" . $_; 0438 0439 for my $elspec (@{ $EXTENSION_VERBATIM_TAGS{$context_ext} }) 0440 { 0441 my ($tag, $attr, $aval) = @{$elspec}; 0442 my $rx; 0443 if ($attr and $aval) { 0444 $rx = qr/<$tag[^<]*$attr=["']$aval["'][^<]*>(.*)<\/$tag>/s 0445 } elsif ($attr) { 0446 $rx = qr/<$tag[^<]*$attr=[^<]*>(.*)<\/$tag>/s 0447 } else { 0448 $rx = qr/<$tag>(.*)<\/$tag>/s 0449 } 0450 if ($string =~ $rx) 0451 { 0452 # Add comment before any line that has an i18n substring in it. 0453 my @matched = split /\n/, $1; 0454 my $mlno = $.; 0455 (my $norm_fname = $file_name) =~ s/^\.\///; 0456 for my $mline (@matched) { 0457 # Assume verbatim code is in language given by --language. 0458 # Therefore format only comment, and write code line as-is. 0459 if ($mline =~ /i18n/) { 0460 $dummy_call->(undef, undef, ("i18n: file: $norm_fname:$mlno")); 0461 } 0462 $mline = unescape_xml($mline); 0463 print "$mline\n"; 0464 ++$mlno; 0465 } 0466 $string = ""; 0467 } 0468 } 0469 } 0470 0471 close $fh or warn "Failed to close: '$file_name': $!"; 0472 } 0473 }