/kf6-qt6/sdk/kde-dev-scripts/extractrc

0001 #! /usr/bin/env perl
0002
0003 ### TODO: other copyrights, license?
0004 # Copyright (c) 2004 Richard Evans <rich@ridas.com>
0005
0006 sub usage
0007 {
0008   warn <<"EOF";
0009
0010 extractrc [flags] filenames
0011
0012 This script extracts messages from designer (.ui) and XMLGUI (.rc) files and
0013 writes on standard output (usually redirected to rc.cpp) the equivalent
0014 i18n() calls so that xgettext can parse them.
0015
0016 --tag=name        : Also extract the tag name(s). Repeat the flag to specify
0017                     multiple names: --tag=tag_one --tag=tag_two
0018
0019 --tag-group=group : Use a group of tags - uses 'default' if omitted.
0020                     Valid groups are: @{[TAG_GROUPS()]}
0021
0022 --context=name    : Give i18n calls a context name: i18nc("name", ...)
0023 --lines           : Include source line numbers in comments (deprecated, it is switched on by default now)
0024 --cstart=chars    : Start of to-EOL style comments in output, defaults to //
0025 --language=lang   : Create i18n calls appropriate for KDE bindings
0026                     in the given language. Currently known languages:
0027                     C++ (default), Python
0028 --ignore-no-input : Do not warn if there were no filenames specified
0029 --help|?          : Display this summary
0030 --no-unescape-xml : Don't do xml unescaping
0031
0032 EOF
0033
0034   exit;
0035 }
0036
0037 ###########################################################################################
0038
0039 use strict;
0040 use warnings;
0041 use Getopt::Long;
0042 use Data::Dumper;
0043
0044 use constant TAG_GROUP =>
0045 {
0046   default => "[tT][eE][xX][tT]|title|string|whatsthis|tooltip|label",
0047   koffice => "Example|GroupName|Text|Comment|Syntax|TypeName",
0048   none    => "",
0049 };
0050
0051 use constant TAG_GROUPS => join ", ", map "'$_'", sort keys %{&TAG_GROUP};
0052
0053 # Specification to extract nice element-context for strings.
0054 use constant ECTX_SPEC =>
0055 {
0056   # Data structure: extension => {tag => [ctxlevel, [attribute, ...]], ...}
0057   # Order of attributes determines their order in the extracted comment.
0058   "ui" => {
0059     "widget" => [10, ["class", "name"]],
0060     "item" => [15, []],
0061     "property" => [20, ["name"]],
0062     "attribute" => [20, ["name"]],
0063   },
0064   "rc" => {
0065     "Menu" => [10, ["name"]],
0066     "ToolBar" => [10, ["name"]],
0067   },
0068   "kcfg" => {
0069     "group" => [10, ["name"]],
0070     "entry" => [20, ["name"]],
0071     "whatsthis" => [30, []],
0072     "tooltip" => [30, []],
0073     "label" => [30, []],
0074   },
0075 };
0076
0077 # Specification to exclude strings by trailing section of element-context.
0078 use constant ECTX_EXCLUDE =>
0079 [
0080     # Data structure: [[tag, attribute, attrvalue], [...]]
0081     # Empty ("") attribute means all elements with given tag,
0082     # empty attrvalue means element with given tag and attribute of any value.
0083     [["widget", "class", "KFontComboBox"], ["item", "", ""], ["property", "", ""]],
0084     [["widget", "class", "KPushButton"], ["attribute", "name", "buttonGroup"]],
0085     [["widget", "class", "QRadioButton"], ["attribute", "name", "buttonGroup"]],
0086     [["widget", "class", "QToolButton"], ["attribute", "name", "buttonGroup"]],
0087     [["widget", "class", "QCheckBox"], ["attribute", "name", "buttonGroup"]],
0088     [["widget", "class", "QPushButton"], ["attribute", "name", "buttonGroup"]],
0089     [["widget", "class", "KTimeZoneWidget"], ["property", "name", "text"]],
0090 ];
0091
0092 # The parts between the tags of the extensions will be copied verbatim
0093 # Same data structure as in ECTX_EXCLUDE, but per extension.
0094 my %EXTENSION_VERBATIM_TAGS = (
0095        "kcfg" => [["code", "", ""], ["default", "code", "true"],
0096                   ["min", "code", "true"], ["max", "code", "true"]],
0097      );
0098
0099 # Add attribute lists as hashes, for membership checks.
0100 for my $ext ( keys %{&ECTX_SPEC} ) {
0101   for my $tag ( keys %{ECTX_SPEC->{$ext}} ) {
0102     my $arr = ECTX_SPEC->{$ext}{$tag}[1];
0103     ECTX_SPEC->{$ext}{$tag}[2] = {map {$_ => 1} @{$arr}};
0104   }
0105 }
0106
0107 ###########################################################################################
0108 # Add options here as necessary - perldoc Getopt::Long for details on GetOptions
0109
0110 GetOptions ( "tag=s"       => \my @opt_extra_tags,
0111              "tag-group=s" => \my $opt_tag_group,
0112              "context=s"   => \my $opt_context,       # I18N context
0113              "lines"       => \my $opt_lines,
0114              "cstart=s"    => \my $opt_cstart,
0115              "language=s"  => \my $opt_language,
0116              "ignore-no-input" => \my $opt_ignore_no_input,
0117              "no-unescape-xml" => \my $opt_no_unescape_xml,
0118              "help|?"      => \&usage );
0119
0120 unless( @ARGV )
0121 {
0122   warn "No filename specified" unless $opt_ignore_no_input;
0123   exit;
0124 }
0125
0126 $opt_tag_group ||= "default";
0127
0128 die "Unknown tag group: '$opt_tag_group', should be one of " . TAG_GROUPS
0129     unless exists TAG_GROUP->{$opt_tag_group};
0130
0131 my $tags = TAG_GROUP->{$opt_tag_group};
0132 my $extra_tags  = join "", map "|" . quotemeta, @opt_extra_tags;
0133 my $text_string = qr/($tags$extra_tags)( [^>]*)?>/;    # Precompile regexp
0134 my $cstart = $opt_cstart; # no default, selected by language if not given
0135 my $language = $opt_language || "C++";
0136 my $ectx_known_exts = join "|", keys %{&ECTX_SPEC};
0137
0138 ###########################################################################################
0139
0140 # Unescape basic XML entities.
0141 sub unescape_xml ($) {
0142     my $text = shift;
0143
0144     if (not $opt_no_unescape_xml) {
0145         $text =~ s/&lt;/</g;
0146         $text =~ s/&gt;/>/g;
0147         $text =~ s/&quot;/"/g;
0148         $text =~ s/&amp;/&/g;
0149     }
0150
0151     return $text;
0152 }
0153
0154 # Convert uic to C escaping.
0155 sub escape_uic_to_c ($) {
0156     my $text = shift;
0157
0158     $text = unescape_xml($text);
0159
0160     $text =~ s/\\/\\\\/g; # escape \
0161     $text =~ s/\"/\\\"/g; # escape "
0162     $text =~ s/\r//g; # remove CR (Carriage Return)
0163     $text =~ s/\n/\\n\"\n\"/g; # escape LF (Line Feed). uic also change the code line at a LF, we do not do that.
0164
0165     return $text;
0166 }
0167
0168 ###########################################################################################
0169
0170 sub dummy_call_infix {
0171     my ($cstart, $stend, $ctxt, $text, @cmnts) = @_;
0172     for my $cmnt (@cmnts) {
0173         print qq|$cstart $cmnt\n|;
0174     }
0175     if (defined $text) {
0176         $text = escape_uic_to_c($text);
0177         if (defined $ctxt) {
0178             $ctxt = escape_uic_to_c($ctxt);
0179             print qq|i18nc("$ctxt", "$text")$stend\n|;
0180         } else {
0181             print qq|i18n("$text")$stend\n|;
0182         }
0183     }
0184 }
0185
0186 my %dummy_calls = (
0187     "C++" => sub {
0188         dummy_call_infix($cstart || "//", ";", @_);
0189     },
0190     "Python" => sub {
0191         dummy_call_infix($cstart || "#", "", @_);
0192     },
0193 );
0194
0195 die "unknown language '$language'" if not defined $dummy_calls{$language};
0196 my $dummy_call = $dummy_calls{$language};
0197
0198 # Program start proper - NB $. is the current line number
0199
0200 for my $file_name ( @ARGV )
0201 {
0202   my $fh;
0203
0204   unless ( open $fh, "<", $file_name )
0205   {
0206     # warn "Failed to open: '$file_name': $!";
0207     next;
0208   }
0209
0210   # Ready element-context extraction.
0211   my $ectx_ext;
0212   my $ectx_string;
0213   if ( $file_name =~ /\.($ectx_known_exts)(\.(in|cmake))?$/ ) {
0214     $ectx_ext = $1;
0215     my $ectx_tag_gr = join "|", keys %{ECTX_SPEC->{$ectx_ext}};
0216     $ectx_string = qr/($ectx_tag_gr)( [^>]*)?>/; # precompile regexp
0217   }
0218
0219   my $string          = "";
0220   my $origstring      = "";
0221   my $in_text         = 0;
0222   my $start_line_no   = 0;
0223   my $in_skipped_prop = 0;
0224   my $tag = "";
0225   my $attr = "";
0226   my $context = "";
0227   my $notr = "";
0228
0229   # Element-context data: [[level, tag, [[attribute, value], ...]], ...]
0230   # such that subarrays are ordered increasing by level.
0231   my @ectx = ();
0232
0233   # All comments to pending dummy call.
0234   my @comments = ();
0235
0236   while ( <$fh> )
0237   {
0238      if ( $. == 1 and $_ !~ /^(?:<!DOCTYPE|<\?xml|<!--|<ui version=)/ )
0239      {
0240              print STDERR "Warning: $file_name does not have a recognised first line and texts won't be extracted\n";
0241              last;
0242      }
0243
0244      chomp;
0245
0246      $string .= "\n" . $_;
0247      $origstring = $string;
0248
0249      # 'database', 'associations', 'populationText' and 'styleSheet' properties contain strings that shouldn't be translated
0250
0251      if ( $in_skipped_prop == 0 and $string =~ /<property name=\"(?:database|associations|populationText|styleSheet)\"/ )
0252      {
0253        $in_skipped_prop = 1;
0254      }
0255      elsif ( $in_skipped_prop and $string =~ /<\/property/ )
0256      {
0257        $string          = "";
0258        $in_skipped_prop = 0;
0259      }
0260
0261      $context = $opt_context unless $in_text;
0262      $notr = "" unless $in_text;
0263
0264      unless ( $in_skipped_prop or $in_text )
0265      {
0266        # Check if this line contains context-worthy element.
0267        if (    $ectx_ext
0268            and ( ($tag, $attr) = $string =~ /<$ectx_string/ ) # no /o here
0269            and exists ECTX_SPEC->{$ectx_ext}{$tag} )
0270        {
0271          my @atts;
0272          for my $ectx_att ( @{ECTX_SPEC->{$ectx_ext}{$tag}[1]} )
0273          {
0274            if ( $attr and $attr =~ /\b$ectx_att\s*=\s*(["'])([^"']*?)\1/ )
0275            {
0276              my $aval = $2;
0277              push @atts, [$ectx_att, $aval];
0278            }
0279          }
0280          # Kill all tags in element-context with level higer or equal to this,
0281          # and add it to the end.
0282          my $clevel = ECTX_SPEC->{$ectx_ext}{$tag}[0];
0283          for ( my $i = 0; $i < @ectx; ++$i )
0284          {
0285            if ( $clevel <= $ectx[$i][0] )
0286            {
0287              @ectx = @ectx[0 .. ($i - 1)];
0288              last;
0289            }
0290          }
0291          push @ectx, [$clevel, $tag, [@atts]];
0292        }
0293
0294        if ( ($tag, $attr) = $string =~ /<$text_string/o )
0295        {
0296          my ($attr_comment) = $attr =~ /\bcomment=\"([^\"]*)\"/ if $attr;
0297          $context = $attr_comment if $attr_comment;
0298          my ($attr_context) = $attr =~ /\bcontext=\"([^\"]*)\"/ if $attr;
0299          $context = $attr_context if $attr_context;
0300          # It is unlikely that both attributes 'context' and 'comment'
0301          # will be present, but if so happens, 'context' has priority.
0302          my ($attr_extracomment) = $attr =~ /\bextracomment=\"([^\"]*)\"/ if $attr;
0303          push @comments, "i18n: $attr_extracomment" if $attr_extracomment;
0304
0305          my ($attr_notr) = $attr =~ /\bnotr=\"([^\"]*)\"/ if $attr;
0306          $notr = $attr_notr if $attr_notr;
0307
0308          my $nongreedystring = $string;
0309          $string        =~ s/^.*<$text_string//so;
0310          $nongreedystring  =~ s/^.*?<$text_string//so;
0311          if ($string cmp $nongreedystring)
0312          {
0313             print STDERR "Warning: Line $origstring in file $file_name has more than one tag to extract on the same line, that is not supported by extractrc\n";
0314          }
0315          if ( not $attr or $attr !~ /\/ *$/ )
0316          {
0317            $in_text       =  1;
0318            $start_line_no =  $.;
0319          }
0320        }
0321        else
0322        {
0323          @comments = ();
0324          $string = "";
0325        }
0326      }
0327
0328      next unless $in_text;
0329      next unless $string =~ /<\/$text_string/o;
0330
0331      my $text = $string;
0332      $text =~ s/<\/$text_string.*$//o;
0333
0334      if ( $text cmp "" )
0335      {
0336        # See if the string should be excluded by trailing element-context.
0337        my $exclude_by_ectx = 0;
0338        my @rev_ectx = reverse @ectx;
0339        for my $ectx_tail (@{&ECTX_EXCLUDE})
0340        {
0341          my @rev_ectx_tail = reverse @{$ectx_tail};
0342          my $i = 0;
0343          $exclude_by_ectx = (@rev_ectx > 0 and @rev_ectx_tail > 0);
0344          while ($i < @rev_ectx and $i < @rev_ectx_tail)
0345          {
0346            my ($tag, $attr, $aval) = @{$rev_ectx_tail[$i]};
0347            $exclude_by_ectx = (not $tag or ($tag eq $rev_ectx[$i][1]));
0348            if ($exclude_by_ectx and $attr)
0349            {
0350              $exclude_by_ectx = 0;
0351              for my $ectx_attr_aval (@{$rev_ectx[$i][2]})
0352              {
0353                if ($attr eq $ectx_attr_aval->[0])
0354                {
0355                  $exclude_by_ectx = $aval ? $aval eq $ectx_attr_aval->[1] : 1;
0356                  last;
0357                }
0358              }
0359            }
0360            last if not $exclude_by_ectx;
0361            ++$i;
0362          }
0363          last if $exclude_by_ectx;
0364        }
0365
0366        if (($context and $context eq "KDE::DoNotExtract") or ($notr eq "true"))
0367        {
0368          push @comments, "Manually excluded message at $file_name line $.";
0369        }
0370        elsif ( $exclude_by_ectx )
0371        {
0372          push @comments, "Automatically excluded message at $file_name line $.";
0373        }
0374        else
0375        {
0376          (my $norm_fname = $file_name) =~ s/^\.\///;
0377          push @comments, "i18n: file: $norm_fname:$.";
0378          if ( @ectx ) {
0379            # Format element-context.
0380            my @tag_gr;
0381            for my $tgr (reverse @ectx)
0382            {
0383              my @attr_gr;
0384              for my $agr ( @{$tgr->[2]} )
0385              {
0386                #push @attr_gr, "$agr->[0]=$agr->[1]";
0387                push @attr_gr, "$agr->[1]"; # no real nead for attribute name
0388              }
0389              my $attr = join(", ", @attr_gr);
0390              push @tag_gr, "$tgr->[1] ($attr)" if $attr;
0391              push @tag_gr, "$tgr->[1]" if not $attr;
0392            }
0393            my $ectx_str = join ", ", @tag_gr;
0394            push @comments, "i18n: ectx: $ectx_str";
0395          }
0396          push @comments, "xgettext: no-c-format" if $text =~ /%/o;
0397          $dummy_call->($context, $text, @comments);
0398          @comments = ();
0399        }
0400      }
0401      else
0402      {
0403        push @comments, "Skipped empty message at $file_name line $.";
0404      }
0405
0406      $string  =~ s/^.*<\/$text_string//o;
0407      $in_text =  0;
0408
0409      # Text can be multiline in .ui files (possibly), but we warn about it in XMLGUI .rc files.
0410
0411      warn "there is <text> floating in: '$file_name'" if $. != $start_line_no and $file_name =~ /\.rc$/i;
0412   }
0413
0414   close $fh or warn "Failed to close: '$file_name': $!";
0415
0416   die "parsing error in $file_name" if $in_text;
0417
0418   if ($ectx_ext && exists $EXTENSION_VERBATIM_TAGS{$ectx_ext})
0419   {
0420     unless ( open $fh, "<", $file_name )
0421     {
0422       # warn "Failed to open: '$file_name': $!";
0423       next;
0424     }
0425
0426     while ( <$fh> )
0427     {
0428       chomp;
0429       $string .= "\n" . $_;
0430
0431       for my $elspec (@{ $EXTENSION_VERBATIM_TAGS{$ectx_ext} })
0432       {
0433         my ($tag, $attr, $aval) = @{$elspec};
0434         my $rx;
0435         if ($attr and $aval) {
0436             $rx = qr/<$tag[^<]*$attr=["']$aval["'][^<]*>(.*)<\/$tag>/s
0437         } elsif ($attr) {
0438             $rx = qr/<$tag[^<]*$attr=[^<]*>(.*)<\/$tag>/s
0439         } else {
0440             $rx = qr/<$tag>(.*)<\/$tag>/s
0441         }
0442         if ($string =~ $rx)
0443         {
0444           # Add comment before any line that has an i18n substring in it.
0445           my @matched = split /\n/, $1;
0446           my $mlno = $.;
0447           (my $norm_fname = $file_name) =~ s/^\.\///;
0448           for my $mline (@matched) {
0449             # Assume verbatim code is in language given by --language.
0450             # Therefore format only comment, and write code line as-is.
0451             if ($mline =~ /i18n/) {
0452               $dummy_call->(undef, undef, ("i18n: file: $norm_fname:$mlno"));
0453             }
0454             $mline = unescape_xml($mline);
0455             print "$mline\n";
0456             ++$mlno;
0457           }
0458           $string = "";
0459         }
0460       }
0461     }
0462
0463     close $fh or warn "Failed to close: '$file_name': $!";
0464   }
0465 }