File indexing completed on 2024-11-03 13:21:47

0001 #!/usr/bin/perl
0002 
0003 # converts a KPresenter document from the old format to the new one (v2)
0004 # due to the new text object
0005 
0006 use Time::Local;
0007 
0008 open(INPUT, "<$ARGV[0]") || die "Cannot open $ARGV[0]";
0009 open(OUTPUT, ">$ARGV[1]") || die "Cannot create $ARGV[1]";
0010 $objType="";
0011 $insideParag=0;
0012 $insideObj=0;
0013 $insidePixmaps=0;  # are we inside the <PIXMAPS> tags?
0014 $currentText="";
0015 $currentTextType=0;
0016 while (<INPUT>)
0017 {
0018   study;  # This speeds up the whole RE stuff because Perl creates some hash for the string
0019   if (/<DOC/)
0020     {
0021       # store the url because this is a prart of the "path" for the images
0022       $url=$1 if(m/url=\"(.*?)\"/);
0023       s/>$/ syntaxVersion=\"2\">/;
0024     }
0025   elsif (/<PIXMAPS>/)
0026     {
0027       $insidePixmaps=1;
0028     }
0029   elsif (/<\/PIXMAPS>/)
0030     {
0031       $insidePixmaps=0;
0032     }
0033   elsif (/<TEXTOBJ/)
0034     {
0035       # Save object type of the TEXTOBJ tag
0036       $objType=$1 if (m/objType=(\"[0-9]+\")/);
0037       s/gap=/margin=/;
0038     }
0039   elsif (/<PARAGRAPH/)
0040     {
0041       $insideParag=1;
0042       s/<PARAGRAPH /<P /;
0043       # In the old format we had horzAlign="[0|1|2]" 0=left, 1=center, 2=right
0044       # In the new one it's align="..." and uses the Qt::AlignmentFlags enums.
0045       # Qt::AlignLeft=1, AlignRight=2, AlignHCenter=4
0046       $alignment=$1 if(/horzAlign=(\"[0-2]+\")/);
0047       $alignment =~ tr/01/14/;
0048       s/horzAlign=\"[0-2]+\"/align=$alignment/;
0049       s/>$/ type=$objType>/;
0050     }
0051   elsif (/<\/PARAGRAPH>/)
0052     {
0053       $insideParag=0;
0054       s/<\/PARAGRAPH/<\/P/;
0055       # Flush last text tag
0056       $_ = $currentText . "</TEXT>\n" . $_ if ($currentText);
0057       $currentText="";
0058     }
0059   elsif (/<LINE/ || /<\/LINE/)
0060     {
0061       $_ = "" if ($insideParag); # ignore
0062     }
0063   elsif (/<OBJ>/)
0064     {
0065       $insideObj=1;
0066       $_ = ""; # ignore
0067     }
0068   elsif (/<\/OBJ>/)
0069     {
0070       $insideObj=0;
0071       $_ = ""; # ignore
0072     }
0073   elsif ($insideObj)
0074     {
0075       $toprint="";
0076       if (m/<TYPE value="([0-9]+)"/)
0077     {
0078       $currentTextType=$1;
0079       if ($currentTextType) # 1 -> this is a white space
0080         {
0081           # If we have a previous text element, we keep it (merging)
0082           # Otherwise this white space is the first one in the object -> cheat
0083           $currentTextType=0 if (!$currentText);
0084         }
0085       if (!$currentTextType) # 0 -> normal text
0086         {
0087           # If we have a previous text element, write it out
0088           $toprint = $currentText . "</TEXT>\n" if ($currentText);
0089           # Start a new text element
0090           $currentText = "      <TEXT ";
0091         }
0092     }
0093       elsif (/<FONT/ && !$currentTextType) # normal text
0094     {
0095       s/\s*<FONT //;
0096       s/\/>//;
0097       chomp;
0098       # Append all attributes
0099       $currentText .= $_;  # the same as $currentText = $currentText . $_, but faster
0100     }
0101       elsif (/<COLOR/ && !$currentTextType) # normal text
0102     {
0103       $red=$1 if (m/red=\"([0-9]+)\"/);
0104       $green=$1 if (m/green=\"([0-9]+)\"/);
0105       $blue=$1 if (m/blue=\"([0-9]+)\"/);
0106       # Convert color to HTML representation
0107       $currentText .= sprintf(" color=\"#%02x%02x%02x\"", $red, $green, $blue );
0108     }
0109       elsif (m/<TEXT>(.*)<\/TEXT>/)
0110     {
0111       if (!$currentTextType) # normal text
0112         {
0113           # Close opening tag and append the text - but don t do more yet
0114           @entities=split( "(&[a-z]+;)", $1);
0115           $text="";
0116           foreach ( @entities )
0117         {
0118           if (!/&lt;/ && !/&gt;/ )
0119             {
0120               # Replace & by &amp; but only if not in an entity
0121               s/\&/\&amp;/g;
0122             }
0123           $text .= $_;
0124         }
0125           $currentText .= ">" . $text;
0126         }
0127       else
0128         {
0129           # White space. Simply appending, closing the text tag.
0130           $toprint = $currentText . $1 . "</TEXT>\n";
0131           $currentText = ""; # reset
0132         }
0133     }
0134       $_=$toprint;
0135     }
0136   elsif ($insidePixmaps)
0137     {
0138       if(/\s+name=\"/)
0139         {
0140           # Aha - this file is not version 2 but was created as "proper" tgz storage... medieval times :)
0141           print "Found a name attribute, no need to create one.\n";
0142         }
0143       elsif (/<KEY(.*)\/>/)
0144         {
0145           # Okay - plain old kpresenter magic...
0146           $key=$1;
0147           study($key); # should be faster ;)
0148           # Note: The .*? is needed because it would be too greedy otherwise
0149           $filename=$1 if($key =~ /filename=\"(.*?)\"/);
0150           # Get the values - really straightforward
0151           $year=$1 if($key =~ /year=\"(\d+)\"/);
0152           $month=$1 if($key =~ /month=\"(\d+)\"/);
0153           $day=$1 if($key =~ /day=\"(\d+)\"/);
0154           $hour=$1 if($key =~ /hour=\"(\d+)\"/);
0155           $minute=$1 if($key =~ /minute=\"(\d+)\"/);
0156           $second=$1 if($key =~ /second=\"(\d+)\"/);
0157           # In Perl the month is <0...11>!!!
0158           $timestamp=timegm($second, $minute, $hour, $day, $month-1, $year);
0159           # Unfortunately we even have to mess with that string...
0160           $timestring=scalar gmtime($timestamp);
0161           # There are still some spaces too much when day<10
0162           $timestring =~ s/  / /;
0163           # Okay. Now let's cat the whole caboodle...
0164           $nameattrib=$url . $filename . "_" . $timestring;
0165           # ...and put it in place.
0166           s/\/>/ name=\"$nameattrib\" \/>/;
0167         }
0168     }
0169 
0170   print OUTPUT $_;
0171 }
0172 
0173 close(INPUT);
0174 close(OUTPUT);