Warning, /sdk/kde-dev-scripts/extractrc is written in an unsupported language. File is not indexed.

0001 #! /usr/bin/env perl
0002 
0003 ### TODO: other copyrights, license?
0004 # Copyright (c) 2004 Richard Evans <rich@ridas.com>
0005 
0006 sub usage
0007 {
0008   warn <<"EOF";
0009 
0010 extractrc [flags] filenames
0011 
0012 This script extracts messages from designer (.ui) and XMLGUI (.rc) files and
0013 writes on standard output (usually redirected to rc.cpp) the equivalent
0014 i18n() calls so that xgettext can parse them.
0015 
0016 --tag=name        : Also extract the tag name(s). Repeat the flag to specify 
0017                     multiple names: --tag=tag_one --tag=tag_two
0018 
0019 --tag-group=group : Use a group of tags - uses 'default' if omitted.
0020                     Valid groups are: @{[TAG_GROUPS()]}
0021 
0022 --context=name    : Give i18n calls a context name: i18nc("name", ...)
0023 --lines           : Include source line numbers in comments (deprecated, it is switched on by default now)
0024 --cstart=chars    : Start of to-EOL style comments in output, defaults to //
0025 --language=lang   : Create i18n calls appropriate for KDE bindings
0026                     in the given language. Currently known languages:
0027                     C++ (default), Python
0028 --ignore-no-input : Do not warn if there were no filenames specified
0029 --help|?          : Display this summary
0030 --no-unescape-xml : Don't do xml unescaping
0031 
0032 EOF
0033 
0034   exit;
0035 }
0036 
0037 ###########################################################################################
0038 
0039 use strict;
0040 use warnings;
0041 use Getopt::Long;
0042 use Data::Dumper;
0043 
0044 use constant TAG_GROUP => 
0045 {
0046   default => "[tT][eE][xX][tT]|title|string|whatsthis|tooltip|label",
0047   koffice => "Example|GroupName|Text|Comment|Syntax|TypeName",
0048   none    => "",
0049 };
0050 
0051 use constant TAG_GROUPS => join ", ", map "'$_'", sort keys %{&TAG_GROUP};
0052 
0053 # Specification to extract nice element-context for strings.
0054 use constant ECTX_SPEC =>
0055 {
0056   # Data structure: extension => {tag => [ctxlevel, [attribute, ...]], ...}
0057   # Order of attributes determines their order in the extracted comment.
0058   "ui" => {
0059     "widget" => [10, ["class", "name"]],
0060     "item" => [15, []],
0061     "property" => [20, ["name"]],
0062     "attribute" => [20, ["name"]],
0063   },
0064   "rc" => {
0065     "Menu" => [10, ["name"]],
0066     "ToolBar" => [10, ["name"]],
0067   },
0068   "kcfg" => {
0069     "group" => [10, ["name"]],
0070     "entry" => [20, ["name"]],
0071     "whatsthis" => [30, []],
0072     "tooltip" => [30, []],
0073     "label" => [30, []],
0074   },
0075 };
0076 
0077 # Specification to exclude strings by trailing section of element-context.
0078 use constant ECTX_EXCLUDE =>
0079 [
0080     # Data structure: [[tag, attribute, attrvalue], [...]]
0081     # Empty ("") attribute means all elements with given tag,
0082     # empty attrvalue means element with given tag and attribute of any value.
0083     [["widget", "class", "KFontComboBox"], ["item", "", ""], ["property", "", ""]],
0084     [["widget", "class", "KPushButton"], ["attribute", "name", "buttonGroup"]],
0085     [["widget", "class", "QRadioButton"], ["attribute", "name", "buttonGroup"]],
0086     [["widget", "class", "QToolButton"], ["attribute", "name", "buttonGroup"]],
0087     [["widget", "class", "QCheckBox"], ["attribute", "name", "buttonGroup"]],
0088     [["widget", "class", "QPushButton"], ["attribute", "name", "buttonGroup"]],
0089     [["widget", "class", "KTimeZoneWidget"], ["property", "name", "text"]],
0090 ];
0091 
0092 # The parts between the tags of the extensions will be copied verbatim
0093 # Same data structure as in ECTX_EXCLUDE, but per extension.
0094 my %EXTENSION_VERBATIM_TAGS = (
0095        "kcfg" => [["code", "", ""], ["default", "code", "true"],
0096                   ["min", "code", "true"], ["max", "code", "true"]],
0097      );
0098 
0099 # Add attribute lists as hashes, for membership checks.
0100 for my $ext ( keys %{&ECTX_SPEC} ) {
0101   for my $tag ( keys %{ECTX_SPEC->{$ext}} ) {
0102     my $arr = ECTX_SPEC->{$ext}{$tag}[1];
0103     ECTX_SPEC->{$ext}{$tag}[2] = {map {$_ => 1} @{$arr}};
0104   }
0105 }
0106 
0107 ###########################################################################################
0108 # Add options here as necessary - perldoc Getopt::Long for details on GetOptions
0109 
0110 GetOptions ( "tag=s"       => \my @opt_extra_tags,
0111              "tag-group=s" => \my $opt_tag_group,
0112              "context=s"   => \my $opt_context,       # I18N context
0113              "lines"       => \my $opt_lines,
0114              "cstart=s"    => \my $opt_cstart,
0115              "language=s"  => \my $opt_language,
0116              "ignore-no-input" => \my $opt_ignore_no_input,
0117              "no-unescape-xml" => \my $opt_no_unescape_xml,
0118              "help|?"      => \&usage );
0119 
0120 unless( @ARGV )
0121 {
0122   warn "No filename specified" unless $opt_ignore_no_input;
0123   exit;
0124 }
0125 
0126 $opt_tag_group ||= "default";
0127 
0128 die "Unknown tag group: '$opt_tag_group', should be one of " . TAG_GROUPS
0129     unless exists TAG_GROUP->{$opt_tag_group};
0130 
0131 my $tags = TAG_GROUP->{$opt_tag_group};
0132 my $extra_tags  = join "", map "|" . quotemeta, @opt_extra_tags;
0133 my $text_string = qr/($tags$extra_tags)( [^>]*)?>/;    # Precompile regexp
0134 my $cstart = $opt_cstart; # no default, selected by language if not given
0135 my $language = $opt_language || "C++";
0136 my $ectx_known_exts = join "|", keys %{&ECTX_SPEC};
0137 
0138 ###########################################################################################
0139 
0140 # Unescape basic XML entities.
0141 sub unescape_xml ($) {
0142     my $text = shift;
0143 
0144     if (not $opt_no_unescape_xml) {
0145         $text =~ s/&lt;/</g;
0146         $text =~ s/&gt;/>/g;
0147         $text =~ s/&quot;/"/g;
0148         $text =~ s/&amp;/&/g;
0149     }
0150 
0151     return $text;
0152 }
0153 
0154 # Convert uic to C escaping.
0155 sub escape_uic_to_c ($) {
0156     my $text = shift;
0157 
0158     $text = unescape_xml($text);
0159 
0160     $text =~ s/\\/\\\\/g; # escape \
0161     $text =~ s/\"/\\\"/g; # escape "
0162     $text =~ s/\r//g; # remove CR (Carriage Return)
0163     $text =~ s/\n/\\n\"\n\"/g; # escape LF (Line Feed). uic also change the code line at a LF, we do not do that.
0164 
0165     return $text;
0166 }
0167 
0168 ###########################################################################################
0169 
0170 sub dummy_call_infix {
0171     my ($cstart, $stend, $ctxt, $text, @cmnts) = @_;
0172     for my $cmnt (@cmnts) {
0173         print qq|$cstart $cmnt\n|;
0174     }
0175     if (defined $text) {
0176         $text = escape_uic_to_c($text);
0177         if (defined $ctxt) {
0178             $ctxt = escape_uic_to_c($ctxt);
0179             print qq|i18nc("$ctxt", "$text")$stend\n|;
0180         } else {
0181             print qq|i18n("$text")$stend\n|;
0182         }
0183     }
0184 }
0185 
0186 my %dummy_calls = (
0187     "C++" => sub {
0188         dummy_call_infix($cstart || "//", ";", @_);
0189     },
0190     "Python" => sub {
0191         dummy_call_infix($cstart || "#", "", @_);
0192     },
0193 );
0194 
0195 die "unknown language '$language'" if not defined $dummy_calls{$language};
0196 my $dummy_call = $dummy_calls{$language};
0197 
0198 # Program start proper - NB $. is the current line number
0199 
0200 for my $file_name ( @ARGV )
0201 {
0202   my $fh;
0203 
0204   unless ( open $fh, "<", $file_name )
0205   {
0206     # warn "Failed to open: '$file_name': $!";
0207     next;
0208   }
0209 
0210   # Ready element-context extraction.
0211   my $ectx_ext;
0212   my $ectx_string;
0213   if ( $file_name =~ /\.($ectx_known_exts)(\.(in|cmake))?$/ ) {
0214     $ectx_ext = $1;
0215     my $ectx_tag_gr = join "|", keys %{ECTX_SPEC->{$ectx_ext}};
0216     $ectx_string = qr/($ectx_tag_gr)( [^>]*)?>/; # precompile regexp
0217   }
0218 
0219   my $string          = "";
0220   my $origstring      = "";
0221   my $in_text         = 0;
0222   my $start_line_no   = 0;
0223   my $in_skipped_prop = 0;
0224   my $tag = "";
0225   my $attr = "";
0226   my $context = "";
0227   my $notr = "";
0228 
0229   # Element-context data: [[level, tag, [[attribute, value], ...]], ...]
0230   # such that subarrays are ordered increasing by level.
0231   my @ectx = ();
0232 
0233   # All comments to pending dummy call.
0234   my @comments = ();
0235 
0236   while ( <$fh> )
0237   {
0238      if ( $. == 1 and $_ !~ /^(?:<!DOCTYPE|<\?xml|<!--|<ui version=)/ )
0239      {
0240              print STDERR "Warning: $file_name does not have a recognised first line and texts won't be extracted\n";
0241              last;
0242      }
0243 
0244      chomp;
0245 
0246      $string .= "\n" . $_;
0247      $origstring = $string;
0248      
0249      # 'database', 'associations', 'populationText' and 'styleSheet' properties contain strings that shouldn't be translated
0250 
0251      if ( $in_skipped_prop == 0 and $string =~ /<property name=\"(?:database|associations|populationText|styleSheet)\"/ )
0252      {
0253        $in_skipped_prop = 1;
0254      }
0255      elsif ( $in_skipped_prop and $string =~ /<\/property/ )
0256      {
0257        $string          = "";
0258        $in_skipped_prop = 0;
0259      }
0260 
0261      $context = $opt_context unless $in_text;
0262      $notr = "" unless $in_text;
0263 
0264      unless ( $in_skipped_prop or $in_text )
0265      {
0266        # Check if this line contains context-worthy element.
0267        if (    $ectx_ext
0268            and ( ($tag, $attr) = $string =~ /<$ectx_string/ ) # no /o here
0269            and exists ECTX_SPEC->{$ectx_ext}{$tag} )
0270        {
0271          my @atts;
0272          for my $ectx_att ( @{ECTX_SPEC->{$ectx_ext}{$tag}[1]} )
0273          {
0274            if ( $attr and $attr =~ /\b$ectx_att\s*=\s*(["'])([^"']*?)\1/ )
0275            {
0276              my $aval = $2;
0277              push @atts, [$ectx_att, $aval];
0278            }
0279          }
0280          # Kill all tags in element-context with level higer or equal to this,
0281          # and add it to the end.
0282          my $clevel = ECTX_SPEC->{$ectx_ext}{$tag}[0];
0283          for ( my $i = 0; $i < @ectx; ++$i )
0284          {
0285            if ( $clevel <= $ectx[$i][0] )
0286            {
0287              @ectx = @ectx[0 .. ($i - 1)];
0288              last;
0289            }
0290          }
0291          push @ectx, [$clevel, $tag, [@atts]];
0292        }
0293 
0294        if ( ($tag, $attr) = $string =~ /<$text_string/o )
0295        {
0296          my ($attr_comment) = $attr =~ /\bcomment=\"([^\"]*)\"/ if $attr;
0297          $context = $attr_comment if $attr_comment;
0298          my ($attr_context) = $attr =~ /\bcontext=\"([^\"]*)\"/ if $attr;
0299          $context = $attr_context if $attr_context;
0300          # It is unlikely that both attributes 'context' and 'comment'
0301          # will be present, but if so happens, 'context' has priority.
0302          my ($attr_extracomment) = $attr =~ /\bextracomment=\"([^\"]*)\"/ if $attr;
0303          push @comments, "i18n: $attr_extracomment" if $attr_extracomment;
0304 
0305          my ($attr_notr) = $attr =~ /\bnotr=\"([^\"]*)\"/ if $attr;
0306          $notr = $attr_notr if $attr_notr;
0307 
0308          my $nongreedystring = $string;
0309          $string        =~ s/^.*<$text_string//so;
0310          $nongreedystring  =~ s/^.*?<$text_string//so;
0311          if ($string cmp $nongreedystring)
0312          {
0313             print STDERR "Warning: Line $origstring in file $file_name has more than one tag to extract on the same line, that is not supported by extractrc\n";
0314          }
0315          if ( not $attr or $attr !~ /\/ *$/ )
0316          {
0317            $in_text       =  1;
0318            $start_line_no =  $.;
0319          }
0320        }
0321        else
0322        {
0323          @comments = ();
0324          $string = "";
0325        }
0326      }
0327 
0328      next unless $in_text;
0329      next unless $string =~ /<\/$text_string/o;
0330 
0331      my $text = $string;
0332      $text =~ s/<\/$text_string.*$//o;
0333 
0334      if ( $text cmp "" )
0335      {
0336        # See if the string should be excluded by trailing element-context.
0337        my $exclude_by_ectx = 0;
0338        my @rev_ectx = reverse @ectx;
0339        for my $ectx_tail (@{&ECTX_EXCLUDE})
0340        {
0341          my @rev_ectx_tail = reverse @{$ectx_tail};
0342          my $i = 0;
0343          $exclude_by_ectx = (@rev_ectx > 0 and @rev_ectx_tail > 0);
0344          while ($i < @rev_ectx and $i < @rev_ectx_tail)
0345          {
0346            my ($tag, $attr, $aval) = @{$rev_ectx_tail[$i]};
0347            $exclude_by_ectx = (not $tag or ($tag eq $rev_ectx[$i][1]));
0348            if ($exclude_by_ectx and $attr)
0349            {
0350              $exclude_by_ectx = 0;
0351              for my $ectx_attr_aval (@{$rev_ectx[$i][2]})
0352              {
0353                if ($attr eq $ectx_attr_aval->[0])
0354                {
0355                  $exclude_by_ectx = $aval ? $aval eq $ectx_attr_aval->[1] : 1;
0356                  last;
0357                }
0358              }
0359            }
0360            last if not $exclude_by_ectx;
0361            ++$i;
0362          }
0363          last if $exclude_by_ectx;
0364        }
0365 
0366        if (($context and $context eq "KDE::DoNotExtract") or ($notr eq "true"))
0367        {
0368          push @comments, "Manually excluded message at $file_name line $.";
0369        }
0370        elsif ( $exclude_by_ectx )
0371        {
0372          push @comments, "Automatically excluded message at $file_name line $.";
0373        }
0374        else
0375        {
0376          (my $norm_fname = $file_name) =~ s/^\.\///;
0377          push @comments, "i18n: file: $norm_fname:$.";
0378          if ( @ectx ) {
0379            # Format element-context.
0380            my @tag_gr;
0381            for my $tgr (reverse @ectx)
0382            {
0383              my @attr_gr;
0384              for my $agr ( @{$tgr->[2]} )
0385              {
0386                #push @attr_gr, "$agr->[0]=$agr->[1]";
0387                push @attr_gr, "$agr->[1]"; # no real nead for attribute name
0388              }
0389              my $attr = join(", ", @attr_gr);
0390              push @tag_gr, "$tgr->[1] ($attr)" if $attr;
0391              push @tag_gr, "$tgr->[1]" if not $attr;
0392            }
0393            my $ectx_str = join ", ", @tag_gr;
0394            push @comments, "i18n: ectx: $ectx_str";
0395          }
0396          push @comments, "xgettext: no-c-format" if $text =~ /%/o;
0397          $dummy_call->($context, $text, @comments);
0398          @comments = ();
0399        }
0400      }
0401      else
0402      {
0403        push @comments, "Skipped empty message at $file_name line $.";
0404      }
0405 
0406      $string  =~ s/^.*<\/$text_string//o;
0407      $in_text =  0;
0408 
0409      # Text can be multiline in .ui files (possibly), but we warn about it in XMLGUI .rc files.
0410 
0411      warn "there is <text> floating in: '$file_name'" if $. != $start_line_no and $file_name =~ /\.rc$/i;
0412   }
0413 
0414   close $fh or warn "Failed to close: '$file_name': $!";
0415 
0416   die "parsing error in $file_name" if $in_text;
0417 
0418   if ($ectx_ext && exists $EXTENSION_VERBATIM_TAGS{$ectx_ext})
0419   {
0420     unless ( open $fh, "<", $file_name )
0421     {
0422       # warn "Failed to open: '$file_name': $!";
0423       next;
0424     }
0425 
0426     while ( <$fh> )
0427     {
0428       chomp;
0429       $string .= "\n" . $_;
0430 
0431       for my $elspec (@{ $EXTENSION_VERBATIM_TAGS{$ectx_ext} })
0432       {
0433         my ($tag, $attr, $aval) = @{$elspec};
0434         my $rx;
0435         if ($attr and $aval) {
0436             $rx = qr/<$tag[^<]*$attr=["']$aval["'][^<]*>(.*)<\/$tag>/s
0437         } elsif ($attr) {
0438             $rx = qr/<$tag[^<]*$attr=[^<]*>(.*)<\/$tag>/s
0439         } else {
0440             $rx = qr/<$tag>(.*)<\/$tag>/s
0441         }
0442         if ($string =~ $rx)
0443         {
0444           # Add comment before any line that has an i18n substring in it.
0445           my @matched = split /\n/, $1;
0446           my $mlno = $.;
0447           (my $norm_fname = $file_name) =~ s/^\.\///;
0448           for my $mline (@matched) {
0449             # Assume verbatim code is in language given by --language.
0450             # Therefore format only comment, and write code line as-is.
0451             if ($mline =~ /i18n/) {
0452               $dummy_call->(undef, undef, ("i18n: file: $norm_fname:$mlno"));
0453             }
0454             $mline = unescape_xml($mline);
0455             print "$mline\n";
0456             ++$mlno;
0457           }
0458           $string = "";
0459         }
0460       }
0461     }
0462 
0463     close $fh or warn "Failed to close: '$file_name': $!";
0464   }
0465 }