File indexing completed on 2024-06-09 05:04:43

0001 /*  This is RTF to HTML converter, implemented as a text filter, generally.
0002     Copyright (C) 2003 Valentin Lavrinenko,
0004     available at
0006     Original available under the terms of the GNU LGPL2, and according
0007     to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */
0009 /***************************************************************************
0010  *                                                                         *
0011  *   This program is free software; you can redistribute it and/or         *
0012  *   modify it under the terms of the GNU General Public License as        *
0013  *   published by the Free Software Foundation; either version 2 of        *
0014  *   the License or (at your option) version 3 or any later version        *
0015  *   accepted by the membership of KDE e.V. (or its successor approved     *
0016  *   by the membership of KDE e.V.), which shall act as a proxy            *
0017  *   defined in Section 14 of version 3 of the license.                    *
0018  *                                                                         *
0019  *   This program is distributed in the hope that it will be useful,       *
0020  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0022  *   GNU General Public License for more details.                          *
0023  *                                                                         *
0024  *   You should have received a copy of the GNU General Public License     *
0025  *   along with this program.  If not, see <>. *
0026  *                                                                         *
0027  ***************************************************************************/
0029 #include "rtf2html.h"
0030 #include "rtf_table.h"
0031 #include "rtf_tools.h"
0032 #include "rtf_keyword.h"
0033 #include "fmt_opts.h"
0035 #include <cstdlib>
0036 #include <stdexcept>
0037 #include <fstream>
0038 #include <iostream>
0039 #include <string>
0041 using Tellico::RTF2HTML;
0042 using namespace rtf;
0044 RTF2HTML::RTF2HTML(const QString& text) : m_text(text) {
0045 }
0047 QString RTF2HTML::toHTML() const {
0048    std::string str_in = m_text.toStdString();
0050    std::string::iterator buf_in=str_in.begin(), buf_in_end=str_in.end();
0051    colorvect colortbl;
0052    fontmap fonttbl;
0053    std::string title;
0055    bool bAsterisk=false;
0056    fo_stack foStack;
0057    formatting_options cur_options;
0058    std::string html;
0059    html_text par_html(cur_options);
0061    /* CellDefs in rtf are really queer. We'll keep a list of them in main()
0062       and will give an iterator into this list to a row */
0063    table_cell_defs_list CellDefsList;
0064    table_cell_defs_list::iterator CurCellDefs;
0065    table_cell_def* tcdCurCellDef=new table_cell_def;
0066    table_cell* tcCurCell=new table_cell;
0067    table_row* trCurRow=new table_row;
0068    table* tblCurTable=new table;
0069    int iLastRowLeft=0, iLastRowHeight=0;
0070    std::string t_str;
0072    bool bInTable=false;
0073 //   int iDocWidth=12240;
0074 //   int iMarginLeft=1800;
0075    while(buf_in!=buf_in_end)
0076    {
0077       switch (*buf_in)
0078       {
0079       case '\\':
0080       {
0081          rtf_keyword kw(++buf_in);
0082          if (kw.is_control_char())
0083             switch (kw.control_char())
0084             {
0085             case '\\': case '{': case '}':
0086                par_html.write(kw.control_char());
0087                break;
0088             case '\'':
0089             {
0090                std::string stmp(1,*buf_in++);
0091                stmp+=*buf_in++;
0092                long int code=std::strtol(stmp.c_str(), NULL, 16);
0093                switch (code)
0094                {
0095                   case 167:
0096                      par_html.write("&bull;");
0097                      break;
0098                   case 188:
0099                      par_html.write("&hellip;");
0100                      break;
0101                   default:
0102                      par_html.write((char)code);
0103                }
0104                break;
0105             }
0106             case '*':
0107                bAsterisk=true;
0108                break;
0109             case '~':
0110                par_html.write("&nbsp;");
0111                break;
0112             case '\n':
0113                par_html.write("<br><br>");
0114                break;
0115             }
0116          else //kw.is_control_char
0117             if (bAsterisk)
0118             {
0119                bAsterisk=false;
0120                skip_group(buf_in);
0121             }
0122             else
0123             {
0124                switch (kw.keyword())
0125                {
0126                case rtf_keyword::rkw_filetbl:
0127                case rtf_keyword::rkw_stylesheet:
0128                case rtf_keyword::rkw_header:
0129                case rtf_keyword::rkw_footer: case rtf_keyword::rkw_headerf:
0130                case rtf_keyword::rkw_footerf: case rtf_keyword::rkw_pict:
0131                case rtf_keyword::rkw_object:
0132                   // we'll skip such groups
0133                   skip_group(buf_in);
0134                   break;
0135                // document title
0136                case rtf_keyword::rkw_info:
0137                {
0138                   int depth=1;
0139                   bool in_title=false;
0140                   while (depth>0)
0141                   {
0142 //                     std::cout<<std::string(buf_in).substr(0,20)<<"\t"<<depth<<std::endl;
0143                      switch (*buf_in)
0144                      {
0145                      case '\\':
0146                      {
0147                         rtf_keyword kw(++buf_in);
0148                         if (kw.keyword()==rtf_keyword::rkw_title)
0149                            in_title=true;
0150                         break;
0151                      }
0152                      case '{': ++depth; ++buf_in; break;
0153                      case '}': --depth; ++buf_in; in_title=false; break;
0154                      default: if (in_title) title+=*buf_in; ++buf_in; break;
0155                      }
0156                   }
0157                   break;
0158                }
0159                // color table
0160                case rtf_keyword::rkw_colortbl:
0161                {
0162                   color clr;
0163                   while (*buf_in!='}')
0164                   {
0165                      switch (*buf_in)
0166                      {
0167                      case '\\':
0168                      {
0169                         rtf_keyword kw(++buf_in);
0170                         switch (kw.keyword())
0171                         {
0172                         case rtf_keyword::rkw_red:
0173                            clr.r=kw.parameter();
0174                            break;
0175                         case rtf_keyword::rkw_green:
0176                            clr.g=kw.parameter();
0177                            break;
0178                         case rtf_keyword::rkw_blue:
0179                            clr.b=kw.parameter();
0180                            break;
0181                         default: break;
0182                         }
0183                         break;
0184                      }
0185                      case ';':
0186                         colortbl.push_back(clr);
0187                         ++buf_in;
0188                         break;
0189                      default:
0190                         ++buf_in;
0191                         break;
0192                      }
0193                   }
0194                   ++buf_in;
0195                   break;
0196                }
0197                // font table
0198                case rtf_keyword::rkw_fonttbl:
0199                {
0200                   font fnt;
0201                   int font_num=0;
0202                   bool full_name=false;
0203                   bool in_font=false;
0204                   while (! (*buf_in=='}' && !in_font))
0205                   {
0206                      switch (*buf_in)
0207                      {
0208                      case '\\':
0209                      {
0210                         rtf_keyword kw(++buf_in);
0211                         if (kw.is_control_char() && kw.control_char()=='*')
0212                            skip_group(buf_in);
0213                         else
0214                            switch (kw.keyword())
0215                            {
0216                            case rtf_keyword::rkw_f:
0217                               font_num=kw.parameter();
0218                               break;
0219                            case rtf_keyword::rkw_fprq:
0220                               fnt.pitch=kw.parameter();
0221                               break;
0222                            case rtf_keyword::rkw_fcharset:
0223                               fnt.charset=kw.parameter();
0224                               break;
0225                            case rtf_keyword::rkw_fnil:
0226                     ;
0227                               break;
0228                            case rtf_keyword::rkw_froman:
0229                     ;
0230                               break;
0231                            case rtf_keyword::rkw_fswiss:
0232                     ;
0233                               break;
0234                            case rtf_keyword::rkw_fmodern:
0235                     ;
0236                               break;
0237                            case rtf_keyword::rkw_fscript:
0238                     ;
0239                               break;
0240                            case rtf_keyword::rkw_fdecor:
0241                     ;
0242                               break;
0243                            default: break;
0244                            }
0245                         break;
0246                      }
0247                      case '{':
0248                         in_font=true;
0249                         ++buf_in;
0250                         break;
0251                      case '}':
0252                         in_font=false;
0253                         fonttbl.insert(std::make_pair(font_num, fnt));
0254                         fnt=font();
0255                         full_name=false;
0256                         ++buf_in;
0257                         break;
0258                      case ';':
0259                         full_name=true;
0260                         ++buf_in;
0261                         break;
0262                      default:
0263                         if (!full_name && in_font)
0264                  *buf_in;
0265                         ++buf_in;
0266                         break;
0267                      }
0268                   }
0269                   ++buf_in;
0270                   break;
0271                }
0272                // special characters
0273                case rtf_keyword::rkw_line: case rtf_keyword::rkw_softline:
0274                   par_html.write("<br>");
0275                   break;
0276                case rtf_keyword::rkw_tab:
0277                   par_html.write("&nbsp;&nbsp;");  // maybe, this can be done better
0278                   break;
0279                case rtf_keyword::rkw_enspace: case rtf_keyword::rkw_emspace:
0280                   par_html.write("&nbsp;");
0281                   break;
0282                case rtf_keyword::rkw_qmspace:
0283                   par_html.write("&thinsp;");
0284                   break;
0285                case rtf_keyword::rkw_endash:
0286                   par_html.write("&ndash;");
0287                   break;
0288                case rtf_keyword::rkw_emdash:
0289                   par_html.write("&mdash;");
0290                   break;
0291                case rtf_keyword::rkw_bullet:
0292                   par_html.write("&bull;");
0293                   break;
0294                case rtf_keyword::rkw_lquote:
0295                   par_html.write("&lsquo;");
0296                   break;
0297                case rtf_keyword::rkw_rquote:
0298                   par_html.write("&rsquo;");
0299                   break;
0300                case rtf_keyword::rkw_ldblquote:
0301                   par_html.write("&ldquo;");
0302                   break;
0303                case rtf_keyword::rkw_rdblquote:
0304                   par_html.write("&rdquo;");
0305                   break;
0306                // paragraph formatting
0307                case rtf_keyword::rkw_ql:
0308                   cur_options.papAlign=formatting_options::align_left;
0309                   break;
0310                case rtf_keyword::rkw_qr:
0311                   cur_options.papAlign=formatting_options::align_right;
0312                   break;
0313                case rtf_keyword::rkw_qc:
0314                   cur_options.papAlign=formatting_options::align_center;
0315                   break;
0316                case rtf_keyword::rkw_qj:
0317                   cur_options.papAlign=formatting_options::align_justify;
0318                   break;
0319                case rtf_keyword::rkw_fi:
0320                   cur_options.papFirst=(int)rint(kw.parameter()/20);
0321                   break;
0322                case rtf_keyword::rkw_li:
0323                   cur_options.papLeft=(int)rint(kw.parameter()/20);
0324                   break;
0325                case rtf_keyword::rkw_ri:
0326                   cur_options.papRight=(int)rint(kw.parameter()/20);
0327                   break;
0328                case rtf_keyword::rkw_sb:
0329                   cur_options.papBefore=(int)rint(kw.parameter()/20);
0330                   break;
0331                case rtf_keyword::rkw_sa:
0332                   cur_options.papAfter=(int)rint(kw.parameter()/20);
0333                   break;
0334                case rtf_keyword::rkw_pard:
0335                   cur_options.papBefore=cur_options.papAfter=0;
0336                   cur_options.papLeft=cur_options.papRight=0;
0337                   cur_options.papFirst=0;
0338                   cur_options.papAlign=formatting_options::align_left;
0339                   cur_options.papInTbl=false;
0340                   break;
0341                case rtf_keyword::rkw_par:
0342                case rtf_keyword::rkw_sect:
0343                   t_str=cur_options.get_par_str()+par_html.str()
0344                         +"&nbsp;"+par_html.close()+"</p>\n";
0345                   if (!bInTable)
0346                   {
0347                      html+=t_str;
0348                   }
0349                   else
0350                   {
0351                      if (cur_options.papInTbl)
0352                      {
0353                         tcCurCell->Text+=t_str;
0354                      }
0355                      else
0356                      {
0357                         html+=tblCurTable->make()+t_str;
0358                         bInTable=false;
0359                         tblCurTable=new table;
0360                      }
0361                   }
0362                   par_html.clear();
0363                   break;
0364                // character formatting
0365                case rtf_keyword::rkw_super:
0366                   cur_options.chpVAlign=
0367                      kw.parameter()==0?formatting_options::va_normal
0368                                       :formatting_options::va_sup;
0369                   break;
0370                case rtf_keyword::rkw_sub:
0371                   cur_options.chpVAlign=
0372                      kw.parameter()==0?formatting_options::va_normal
0373                                       :formatting_options::va_sub;
0374                   break;
0375                case rtf_keyword::rkw_b:
0376                   cur_options.chpBold=!(kw.parameter()==0);
0377                   break;
0378                case rtf_keyword::rkw_i:
0379                   cur_options.chpItalic=!(kw.parameter()==0);
0380                   break;
0381                case rtf_keyword::rkw_ul:
0382                   cur_options.chpUnderline=!(kw.parameter()==0);
0383                   break;
0384                case rtf_keyword::rkw_ulnone:
0385                   cur_options.chpUnderline=false;
0386                   break;
0387                case rtf_keyword::rkw_fs:
0388                   cur_options.chpFontSize=kw.parameter();
0389                   break;
0390                case rtf_keyword::rkw_cf:
0391                   cur_options.chpFColor=colortbl[kw.parameter()];
0392                   break;
0393                case rtf_keyword::rkw_cb:
0394                   cur_options.chpBColor=colortbl[kw.parameter()];
0395                   break;
0396                case rtf_keyword::rkw_highlight:
0397                   cur_options.chpHighlight=kw.parameter();
0398                   break;
0399                case rtf_keyword::rkw_f:
0400                   cur_options.chpFont=fonttbl[kw.parameter()];
0401                   break;
0402                case rtf_keyword::rkw_plain:
0403                   cur_options.chpBold=cur_options.chpItalic
0404                     =cur_options.chpUnderline=false;
0405                   cur_options.chpVAlign=formatting_options::va_normal;
0406                   cur_options.chpFontSize=cur_options.chpHighlight=0;
0407                   cur_options.chpFColor=cur_options.chpBColor=color();
0408                   cur_options.chpFont=font();
0409                   break;
0410                // table formatting
0411                case rtf_keyword::rkw_intbl:
0412                   cur_options.papInTbl=true;
0413                   break;
0414                case rtf_keyword::rkw_trowd:
0415                   CurCellDefs=CellDefsList.insert(CellDefsList.end(),
0416                                                   table_cell_defs());
0417                case rtf_keyword::rkw_row:
0418                   if (!trCurRow->Cells.empty())
0419                   {
0420                      trCurRow->CellDefs=CurCellDefs;
0421                      if (trCurRow->Left==-1000)
0422                         trCurRow->Left=iLastRowLeft;
0423                      if (trCurRow->Height==-1000)
0424                         trCurRow->Height=iLastRowHeight;
0425                      tblCurTable->push_back(trCurRow);
0426                      trCurRow=new table_row;
0427                   }
0428                   bInTable=true;
0429                   break;
0430                case rtf_keyword::rkw_cell:
0431                   t_str=cur_options.get_par_str()+par_html.str()
0432                         +"&nbsp;"+par_html.close()+"</p>\n";
0433                   tcCurCell->Text+=t_str;
0434                   par_html.clear();
0435                   trCurRow->Cells.push_back(tcCurCell);
0436                   tcCurCell=new table_cell;
0437                   break;
0438                case rtf_keyword::rkw_cellx:
0439                   tcdCurCellDef->Right=kw.parameter();
0440                   CurCellDefs->push_back(tcdCurCellDef);
0441                   tcdCurCellDef=new table_cell_def;
0442                   break;
0443                case rtf_keyword::rkw_trleft:
0444                   trCurRow->Left=kw.parameter();
0445                   iLastRowLeft=kw.parameter();
0446                   break;
0447                case rtf_keyword::rkw_trrh:
0448                   trCurRow->Height=kw.parameter();
0449                   iLastRowHeight=kw.parameter();
0450                   break;
0451                case rtf_keyword::rkw_clvmgf:
0452                   tcdCurCellDef->FirstMerged=true;
0453                   break;
0454                case rtf_keyword::rkw_clvmrg:
0455                   tcdCurCellDef->Merged=true;
0456                   break;
0457                case rtf_keyword::rkw_clbrdrb:
0458                   tcdCurCellDef->BorderBottom=true;
0459                   tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderBottom);
0460                   break;
0461                case rtf_keyword::rkw_clbrdrt:
0462                   tcdCurCellDef->BorderTop=true;
0463                   tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderTop);
0464                   break;
0465                case rtf_keyword::rkw_clbrdrl:
0466                   tcdCurCellDef->BorderLeft=true;
0467                   tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderLeft);
0468                   break;
0469                case rtf_keyword::rkw_clbrdrr:
0470                   tcdCurCellDef->BorderRight=true;
0471                   tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderRight);
0472                   break;
0473                case rtf_keyword::rkw_brdrnone:
0474                   if (tcdCurCellDef->ActiveBorder!=NULL)
0475                   {
0476                      *(tcdCurCellDef->ActiveBorder)=false;
0477                   }
0478                   break;
0479                case rtf_keyword::rkw_clvertalt:
0480                   tcdCurCellDef->VAlign=table_cell_def::valign_top;
0481                   break;
0482                case rtf_keyword::rkw_clvertalc:
0483                   tcdCurCellDef->VAlign=table_cell_def::valign_center;
0484                   break;
0485                case rtf_keyword::rkw_clvertalb:
0486                   tcdCurCellDef->VAlign=table_cell_def::valign_bottom;
0487                   break;
0488                // page formatting
0489                case rtf_keyword::rkw_paperw:
0490 //                  iDocWidth=kw.parameter();
0491                   break;
0492                case rtf_keyword::rkw_margl:
0493 //                  iMarginLeft=kw.parameter();
0494                   break;
0495                default: break;
0496                }
0497             }
0498          break;
0499       }
0500       case '{':
0501          // perform group opening actions here
0502          foStack.push(cur_options);
0503          ++buf_in;
0504          break;
0505       case '}':
0506          // perform group closing actions here
0508          foStack.pop();
0509          ++buf_in;
0510          break;
0511       case 13:
0512       case 10:
0513          ++buf_in;
0514          break;
0515       case '<':
0516          par_html.write("&lt;");
0517          ++buf_in;
0518          break;
0519       case '>':
0520          par_html.write("&gt;");
0521          ++buf_in;
0522          break;
0523 /*      case ' ':
0524          par_html.write("&ensp;");
0525          ++buf_in;
0526          break;*/
0527       default:
0528          par_html.write(*buf_in++);
0529       }
0530    }
0532    t_str=cur_options.get_par_str()+par_html.str()
0533         +"&nbsp;"+par_html.close()+"</p>\n";
0534    html+=t_str;
0536    delete tcCurCell;
0537    delete trCurRow;
0538    delete tblCurTable;
0539    delete tcdCurCellDef;
0541    return QString::fromStdString(html);
0542 }