File indexing completed on 2024-04-14 14:45:32

0001 #! /usr/bin/env perl
0002 #
0003 # SPDX-FileCopyrightText: 2004 Richard Evans <rich@ridas.com>
0004 #
0005 # SPDX-License-Identifier: GPL-3.0-or-later
0006 #
0007 
0008 
0009 sub usage
0010 {
0011   warn <<"EOF";
0012 
0013 extractrc [flags] filenames
0014 
0015 This script extracts messages from designer (.ui) and XMLGUI (.rc) files and
0016 writes on standard output (usually redirected to rc.cpp) the equivalent
0017 i18n() calls so that xgettext can parse them.
0018 
0019 --tag=name        : Also extract the tag name(s). Repeat the flag to specify
0020                     multiple names: --tag=tag_one --tag=tag_two
0021 
0022 --tag-group=group : Use a group of tags - uses 'default' if omitted.
0023                     Valid groups are: @{[TAG_GROUPS()]}
0024 
0025 --context=name    : Give i18n calls a context name: i18nc("name", ...)
0026 --lines           : Include source line numbers in comments (deprecated, it is switched on by default now)
0027 --cstart=chars    : Start of to-EOL style comments in output, defaults to //
0028 --language=lang   : Create i18n calls appropriate for KDE bindings
0029                     in the given language. Currently known languages:
0030                     C++ (default), Python
0031 --ignore-no-input : Do not warn if there were no filenames specified
0032 --help|?          : Display this summary
0033 --no-unescape-xml : Don't do xml unescaping
0034 
0035 EOF
0036 
0037   exit;
0038 }
0039 
0040 ###########################################################################################
0041 
0042 use strict;
0043 use warnings;
0044 use Getopt::Long;
0045 use Data::Dumper; # Provides debugging command: print Dumper(\%hash);
0046 
0047 use constant TAG_GROUP =>
0048 {
0049   krita   => "[tT][eE][xX][tT]|title|string|whatsThis|toolTip|iconText",
0050   none    => "",
0051 };
0052 
0053 use constant TAG_GROUPS => join ", ", map "'$_'", sort keys %{&TAG_GROUP};
0054 
0055 # Specification to extract nice element-context for strings.
0056 use constant CONTEXT_SPEC =>
0057 {
0058   # Data structure: extension => {tag => [ctxlevel, [attribute, ...]], ...}
0059   # Order of attributes determines their order in the extracted comment.
0060   "ui" => {
0061     "widget" => [10, ["class", "name"]],
0062     "item" => [15, []],
0063     "property" => [20, ["name"]],
0064     "attribute" => [20, ["name"]],
0065   },
0066   "rc" => {
0067     "Menu" => [10, ["name"]],
0068     "ToolBar" => [10, ["name"]],
0069   },
0070   "kcfg" => {
0071     "group" => [10, ["name"]],
0072     "entry" => [20, ["name"]],
0073     "whatsthis" => [30, []],
0074     "tooltip" => [30, []],
0075     "label" => [30, []],
0076   },
0077   "action" => {
0078       "ActionCollection" => [10, ["name"]],
0079       "Actions" => [20, ["category"]],
0080       "Action" => [30, ["name"]],
0081   }
0082 };
0083 
0084 # Specification to exclude strings by trailing section of element-context.
0085 use constant CONTEXT_EXCLUDE =>
0086 [
0087     # Data structure: [[tag, attribute, attrvalue], [...]]
0088     # Empty ("") attribute means all elements with given tag,
0089     # empty attrvalue means element with given tag and attribute of any value.
0090     [["widget", "class", "KFontComboBox"], ["item", "", ""], ["property", "", ""]],
0091     [["widget", "class", "KPushButton"], ["attribute", "name", "buttonGroup"]],
0092     [["widget", "class", "QRadioButton"], ["attribute", "name", "buttonGroup"]],
0093     [["widget", "class", "QToolButton"], ["attribute", "name", "buttonGroup"]],
0094     [["widget", "class", "QCheckBox"], ["attribute", "name", "buttonGroup"]],
0095     [["widget", "class", "QPushButton"], ["attribute", "name", "buttonGroup"]],
0096     [["widget", "class", "KTimeZoneWidget"], ["property", "name", "text"]],
0097 ];
0098 
0099 # The parts between the tags of the extensions will be copied verbatim
0100 # Same data structure as in CONTEXT_EXCLUDE, but per extension.
0101 my %EXTENSION_VERBATIM_TAGS = (
0102        "kcfg" => [["code", "", ""], ["default", "code", "true"],
0103                   ["min", "code", "true"], ["max", "code", "true"]],
0104      );
0105 
0106 # Add attribute lists as hashes, for membership checks.
0107 for my $ext ( keys %{&CONTEXT_SPEC} ) {
0108   for my $tag ( keys %{CONTEXT_SPEC->{$ext}} ) {
0109     my $arr = CONTEXT_SPEC->{$ext}{$tag}[1];
0110     CONTEXT_SPEC->{$ext}{$tag}[2] = {map {$_ => 1} @{$arr}};
0111   }
0112 }
0113 
0114 ###########################################################################################
0115 # Add options here as necessary - perldoc Getopt::Long for details on GetOptions
0116 
0117 GetOptions ( "tag=s"       => \my @opt_extra_tags,
0118              "tag-group=s" => \my $opt_tag_group,
0119              "context=s"   => \my $opt_context,       # I18N context
0120              "lines"       => \my $opt_lines,
0121              "cstart=s"    => \my $opt_cstart,
0122              "language=s"  => \my $opt_language,
0123              "ignore-no-input" => \my $opt_ignore_no_input,
0124              "no-unescape-xml" => \my $opt_no_unescape_xml,
0125              "help|?"      => \&usage );
0126 
0127 unless( @ARGV )
0128 {
0129   warn "No filename specified" unless $opt_ignore_no_input;
0130   exit;
0131 }
0132 
0133 $opt_tag_group ||= "krita";
0134 
0135 die "Unknown tag group: '$opt_tag_group', should be one of " . TAG_GROUPS
0136     unless exists TAG_GROUP->{$opt_tag_group};
0137 
0138 my $tags = TAG_GROUP->{$opt_tag_group};
0139 my $extra_tags  = join "", map "|" . quotemeta, @opt_extra_tags;
0140 my $text_string = qr/($tags$extra_tags)( [^>]*)?>/;    # Precompile regexp
0141 my $cstart = $opt_cstart; # no default, selected by language if not given
0142 my $language = $opt_language || "C++";
0143 my $context_known_exts = join "|", keys %{&CONTEXT_SPEC};
0144 
0145 ###########################################################################################
0146 
0147 # Unescape basic XML entities.
0148 sub unescape_xml ($) {
0149     my $text = shift;
0150 
0151     if (not $opt_no_unescape_xml) {
0152         $text =~ s/&lt;/</g;
0153         $text =~ s/&gt;/>/g;
0154         $text =~ s/&amp;/&/g;
0155         $text =~ s/&quot;/"/g;
0156     }
0157 
0158     return $text;
0159 }
0160 
0161 # Convert uic to C escaping.
0162 sub escape_uic_to_c ($) {
0163     my $text = shift;
0164 
0165     $text = unescape_xml($text);
0166 
0167     $text =~ s/\\/\\\\/g; # escape \
0168     $text =~ s/\"/\\\"/g; # escape "
0169     $text =~ s/\r//g; # remove CR (Carriage Return)
0170     $text =~ s/\n/\\n\"\n\"/g; # escape LF (Line Feed). uic also change the code line at a LF, we do not do that.
0171 
0172     return $text;
0173 }
0174 
0175 ###########################################################################################
0176 
0177 sub dummy_call_infix {
0178     my ($cstart, $stend, $ctxt, $text, @cmnts) = @_;
0179     for my $cmnt (@cmnts) {
0180         print qq|$cstart $cmnt\n|;
0181     }
0182     if (defined $text) {
0183         $text = escape_uic_to_c($text);
0184         if (defined $ctxt) {
0185             $ctxt = escape_uic_to_c($ctxt);
0186             print qq|i18nc("$ctxt", "$text")$stend\n|;
0187         } else {
0188             print qq|i18n("$text")$stend\n|;
0189         }
0190     }
0191 }
0192 
0193 my %dummy_calls = (
0194     "C++" => sub {
0195         dummy_call_infix($cstart || "//", ";", @_);
0196     },
0197     "Python" => sub {
0198         dummy_call_infix($cstart || "#", "", @_);
0199     },
0200 );
0201 
0202 die "unknown language '$language'" if not defined $dummy_calls{$language};
0203 my $dummy_call = $dummy_calls{$language};
0204 
0205 # Program start proper - outer loop runs once for each file in the argument list.
0206 for my $file_name ( @ARGV )
0207 {
0208   my $fh;
0209 
0210   unless ( open $fh, "<", $file_name )
0211   {
0212     # warn "Failed to open: '$file_name': $!";
0213     next;
0214   }
0215 
0216   # Ready element-context extraction.
0217   my $context_ext;
0218   my $context_string;  # Regexp used to validate context
0219   if ( $file_name =~ /\.($context_known_exts)(\.(in|cmake))?$/ ) {
0220     $context_ext = $1;
0221     my $context_tag_gr = join "|", keys %{CONTEXT_SPEC->{$context_ext}};
0222     $context_string = qr/($context_tag_gr)( [^>]*)?>/; # precompile regexp
0223   }
0224 
0225   my $string          = "";
0226   my $origstring      = "";
0227   my $in_text         = 0;   # Are we currently inside a block of raw text?
0228   my $start_line_no   = 0;
0229   my $in_skipped_prop = 0;   # Are we currently inside XML property that shouldn't be translated?
0230   my $tag = "";
0231   my $attr = "";
0232   my $context = "";
0233   my $notr = "";
0234 
0235   # Element-context data: [[level, tag, [[attribute, value], ...]], ...]
0236   # such that subarrays are ordered increasing by level.
0237   my @context = ();
0238 
0239   # All comments to pending dummy call.
0240   my @comments = ();
0241 
0242   # Begin looping through the file
0243   while ( <$fh> )
0244   {
0245      # If your Perl is a bit rusty: $. is the current line number
0246      # Also, =~ and !~ are pattern-matching operators. :)
0247      if ( $. == 1 and $_ !~ /^(?:<!DOCTYPE|<\?xml|<!--|<ui version=)/ )
0248      {
0249        print STDERR "Warning: $file_name does not have a recognised first line and texts won't be extracted\n";
0250        last;
0251      }
0252 
0253      chomp;
0254 
0255      $string .= "\n" . $_;
0256      $origstring = $string;
0257 
0258      # 'database', 'associations', 'populationText' and 'styleSheet' properties contain strings that shouldn't be translated
0259      if ( $in_skipped_prop == 0 and $string =~ /<property name=\"(?:database|associations|populationText|styleSheet)\"/ )
0260      {
0261        $in_skipped_prop = 1;
0262      }
0263      elsif ( $in_skipped_prop and $string =~ /<\/property/ )
0264      {
0265        $string          = "";
0266        $in_skipped_prop = 0;
0267      }
0268 
0269      $context = $opt_context unless $in_text;
0270      $notr = "" unless $in_text;
0271 
0272      # print "context = " . $opt_context . "\n";
0273 
0274      unless ( $in_skipped_prop or $in_text )
0275      {
0276        # Check if this line contains context-worthy element.
0277        if (    $context_ext
0278            and ( ($tag, $attr) = $string =~ /<$context_string/ ) # no /o here
0279            and exists CONTEXT_SPEC->{$context_ext}{$tag} )
0280        {
0281          my @atts;
0282          for my $context_att ( @{CONTEXT_SPEC->{$context_ext}{$tag}[1]} )
0283          {
0284            if ( $attr and $attr =~ /\b$context_att\s*=\s*(["'])([^"']*?)\1/ )
0285            {
0286              my $aval = $2;
0287              push @atts, [$context_att, $aval];
0288            }
0289          }
0290          # Kill all tags in element-context with level higher or equal to this,
0291          # and add it to the end.
0292          my $clevel = CONTEXT_SPEC->{$context_ext}{$tag}[0];
0293          for ( my $i = 0; $i < @context; ++$i )
0294          {
0295            if ( $clevel <= $context[$i][0] )
0296            {
0297              @context = @context[0 .. ($i - 1)];
0298              last;
0299            }
0300          }
0301          push @context, [$clevel, $tag, [@atts]];
0302        }
0303 
0304        if ( ($tag, $attr) = $string =~ /<$text_string/o )
0305        {
0306          # Only treat "context" as the gettext i18n context, not "comment".
0307          my ($attr_context) = $attr =~ /\bcontext=\"([^\"]*)\"/ if $attr;
0308          $context = $attr_context if $attr_context;
0309          my ($attr_extracomment) = $attr =~ /\bextracomment=\"([^\"]*)\"/ if $attr;
0310          push @comments, "i18n: $attr_extracomment" if $attr_extracomment;
0311 
0312          my ($attr_notr) = $attr =~ /\bnotr=\"([^\"]*)\"/ if $attr;
0313          $notr = $attr_notr if $attr_notr;
0314 
0315          my $nongreedystring = $string;
0316          $string        =~ s/^.*<$text_string//so;
0317          $nongreedystring  =~ s/^.*?<$text_string//so;
0318          if ($string cmp $nongreedystring)
0319          {
0320             print STDERR "Warning: Line $origstring in file $file_name has more than one tag to extract on the same line, that is not supported by extractrc\n";
0321          }
0322          if ( not $attr or $attr !~ /\/ *$/ )
0323          {
0324            $in_text       =  1;
0325            $start_line_no =  $.;
0326          }
0327        }
0328        else
0329        {
0330          @comments = ();
0331          $string = "";
0332        }
0333      }
0334 
0335      next unless $in_text;
0336      next unless $string =~ /<\/$text_string/o;
0337 
0338      my $text = $string;
0339      $text =~ s/<\/$text_string.*$//o;
0340 
0341      if ( $text cmp "" )
0342      {
0343        # See if the string should be excluded by trailing element-context.
0344        my $exclude_by_context = 0;
0345        my @rev_context = reverse @context;
0346        for my $context_tail (@{&CONTEXT_EXCLUDE})
0347        {
0348          my @rev_context_tail = reverse @{$context_tail};
0349          my $i = 0;
0350          $exclude_by_context = (@rev_context > 0 and @rev_context_tail > 0);
0351          while ($i < @rev_context and $i < @rev_context_tail)
0352          {
0353            my ($tag, $attr, $aval) = @{$rev_context_tail[$i]};
0354            $exclude_by_context = (not $tag or ($tag eq $rev_context[$i][1]));
0355            if ($exclude_by_context and $attr)
0356            {
0357              $exclude_by_context = 0;
0358              for my $context_attr_aval (@{$rev_context[$i][2]})
0359              {
0360                if ($attr eq $context_attr_aval->[0])
0361                {
0362                  $exclude_by_context = $aval ? $aval eq $context_attr_aval->[1] : 1;
0363                  last;
0364                }
0365              }
0366            }
0367            last if not $exclude_by_context;
0368            ++$i;
0369          }
0370          last if $exclude_by_context;
0371        }
0372 
0373        if (($context and $context eq "KDE::DoNotExtract") or ($notr eq "true"))
0374        {
0375          push @comments, "Manually excluded message at $file_name line $.";
0376        }
0377        elsif ( $exclude_by_context )
0378        {
0379          push @comments, "Automatically excluded message at $file_name line $.";
0380        }
0381        else
0382        {
0383          # Write everything to file
0384          (my $clean_file_name = $file_name) =~ s/^\.\///;
0385          push @comments, "i18n: file: $clean_file_name:$.";
0386          if ( @context ) {
0387            # Format element-context.
0388            my @tag_gr;
0389            for my $tgr (reverse @context)
0390            {
0391              my @attr_gr;
0392              for my $agr ( @{$tgr->[2]} )
0393              {
0394                #push @attr_gr, "$agr->[0]=$agr->[1]";
0395                push @attr_gr, "$agr->[1]"; # no real need for attribute name
0396              }
0397              my $attr = join(", ", @attr_gr);
0398              push @tag_gr, "$tgr->[1] ($attr)" if $attr;
0399              push @tag_gr, "$tgr->[1]" if not $attr;
0400            }
0401            my $context_str = join ", ", @tag_gr;
0402            push @comments, "i18n: context: $tag @ $context_str";
0403          }
0404          push @comments, "xgettext: no-c-format" if $text =~ /%/o;
0405          $dummy_call->($context, $text, @comments);
0406          @comments = ();
0407        }
0408      }
0409      else
0410      {
0411        push @comments, "Skipped empty message at $file_name line $.";
0412      }
0413 
0414      $string  =~ s/^.*<\/$text_string//o;
0415      $in_text =  0;
0416 
0417      # Text can be multiline in .ui files (possibly), but we warn about it in XMLGUI .rc files.
0418 
0419      warn "there is <text> floating in: '$file_name'" if $. != $start_line_no and $file_name =~ /\.rc$/i;
0420   }
0421 
0422   close $fh or warn "Failed to close: '$file_name': $!";
0423 
0424   die "parsing error in $file_name" if $in_text;
0425 
0426   if ($context_ext && exists $EXTENSION_VERBATIM_TAGS{$context_ext})
0427   {
0428     unless ( open $fh, "<", $file_name )
0429     {
0430       # warn "Failed to open: '$file_name': $!";
0431       next;
0432     }
0433 
0434     while ( <$fh> )
0435     {
0436       chomp;
0437       $string .= "\n" . $_;
0438 
0439       for my $elspec (@{ $EXTENSION_VERBATIM_TAGS{$context_ext} })
0440       {
0441         my ($tag, $attr, $aval) = @{$elspec};
0442         my $rx;
0443         if ($attr and $aval) {
0444             $rx = qr/<$tag[^<]*$attr=["']$aval["'][^<]*>(.*)<\/$tag>/s
0445         } elsif ($attr) {
0446             $rx = qr/<$tag[^<]*$attr=[^<]*>(.*)<\/$tag>/s
0447         } else {
0448             $rx = qr/<$tag>(.*)<\/$tag>/s
0449         }
0450         if ($string =~ $rx)
0451         {
0452           # Add comment before any line that has an i18n substring in it.
0453           my @matched = split /\n/, $1;
0454           my $mlno = $.;
0455           (my $norm_fname = $file_name) =~ s/^\.\///;
0456           for my $mline (@matched) {
0457             # Assume verbatim code is in language given by --language.
0458             # Therefore format only comment, and write code line as-is.
0459             if ($mline =~ /i18n/) {
0460               $dummy_call->(undef, undef, ("i18n: file: $norm_fname:$mlno"));
0461             }
0462             $mline = unescape_xml($mline);
0463             print "$mline\n";
0464             ++$mlno;
0465           }
0466           $string = "";
0467         }
0468       }
0469     }
0470 
0471     close $fh or warn "Failed to close: '$file_name': $!";
0472   }
0473 }