File indexing completed on 2024-06-09 05:04:43
0001 /* This is RTF to HTML converter, implemented as a text filter, generally. 0002 Copyright (C) 2003 Valentin Lavrinenko, vlavrinenko@users.sourceforge.net 0003 0004 available at http://rtf2html.sf.net 0005 0006 Original available under the terms of the GNU LGPL2, and according 0007 to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */ 0008 0009 /*************************************************************************** 0010 * * 0011 * This program is free software; you can redistribute it and/or * 0012 * modify it under the terms of the GNU General Public License as * 0013 * published by the Free Software Foundation; either version 2 of * 0014 * the License or (at your option) version 3 or any later version * 0015 * accepted by the membership of KDE e.V. (or its successor approved * 0016 * by the membership of KDE e.V.), which shall act as a proxy * 0017 * defined in Section 14 of version 3 of the license. * 0018 * * 0019 * This program is distributed in the hope that it will be useful, * 0020 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0021 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0022 * GNU General Public License for more details. * 0023 * * 0024 * You should have received a copy of the GNU General Public License * 0025 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0026 * * 0027 ***************************************************************************/ 0028 0029 #include "rtf2html.h" 0030 #include "rtf_table.h" 0031 #include "rtf_tools.h" 0032 #include "rtf_keyword.h" 0033 #include "fmt_opts.h" 0034 0035 #include <cstdlib> 0036 #include <stdexcept> 0037 #include <fstream> 0038 #include <iostream> 0039 #include <string> 0040 0041 using Tellico::RTF2HTML; 0042 using namespace rtf; 0043 0044 RTF2HTML::RTF2HTML(const QString& text) : m_text(text) { 0045 } 0046 0047 QString RTF2HTML::toHTML() const { 0048 std::string str_in = m_text.toStdString(); 0049 0050 std::string::iterator buf_in=str_in.begin(), buf_in_end=str_in.end(); 0051 colorvect colortbl; 0052 fontmap fonttbl; 0053 std::string title; 0054 0055 bool bAsterisk=false; 0056 fo_stack foStack; 0057 formatting_options cur_options; 0058 std::string html; 0059 html_text par_html(cur_options); 0060 0061 /* CellDefs in rtf are really queer. We'll keep a list of them in main() 0062 and will give an iterator into this list to a row */ 0063 table_cell_defs_list CellDefsList; 0064 table_cell_defs_list::iterator CurCellDefs; 0065 table_cell_def* tcdCurCellDef=new table_cell_def; 0066 table_cell* tcCurCell=new table_cell; 0067 table_row* trCurRow=new table_row; 0068 table* tblCurTable=new table; 0069 int iLastRowLeft=0, iLastRowHeight=0; 0070 std::string t_str; 0071 0072 bool bInTable=false; 0073 // int iDocWidth=12240; 0074 // int iMarginLeft=1800; 0075 while(buf_in!=buf_in_end) 0076 { 0077 switch (*buf_in) 0078 { 0079 case '\\': 0080 { 0081 rtf_keyword kw(++buf_in); 0082 if (kw.is_control_char()) 0083 switch (kw.control_char()) 0084 { 0085 case '\\': case '{': case '}': 0086 par_html.write(kw.control_char()); 0087 break; 0088 case '\'': 0089 { 0090 std::string stmp(1,*buf_in++); 0091 stmp+=*buf_in++; 0092 long int code=std::strtol(stmp.c_str(), NULL, 16); 0093 switch (code) 0094 { 0095 case 167: 0096 par_html.write("•"); 0097 break; 0098 case 188: 0099 par_html.write("…"); 0100 break; 0101 default: 0102 par_html.write((char)code); 0103 } 0104 break; 0105 } 0106 case '*': 0107 bAsterisk=true; 0108 break; 0109 case '~': 0110 par_html.write(" "); 0111 break; 0112 case '\n': 0113 par_html.write("<br><br>"); 0114 break; 0115 } 0116 else //kw.is_control_char 0117 if (bAsterisk) 0118 { 0119 bAsterisk=false; 0120 skip_group(buf_in); 0121 } 0122 else 0123 { 0124 switch (kw.keyword()) 0125 { 0126 case rtf_keyword::rkw_filetbl: 0127 case rtf_keyword::rkw_stylesheet: 0128 case rtf_keyword::rkw_header: 0129 case rtf_keyword::rkw_footer: case rtf_keyword::rkw_headerf: 0130 case rtf_keyword::rkw_footerf: case rtf_keyword::rkw_pict: 0131 case rtf_keyword::rkw_object: 0132 // we'll skip such groups 0133 skip_group(buf_in); 0134 break; 0135 // document title 0136 case rtf_keyword::rkw_info: 0137 { 0138 int depth=1; 0139 bool in_title=false; 0140 while (depth>0) 0141 { 0142 // std::cout<<std::string(buf_in).substr(0,20)<<"\t"<<depth<<std::endl; 0143 switch (*buf_in) 0144 { 0145 case '\\': 0146 { 0147 rtf_keyword kw(++buf_in); 0148 if (kw.keyword()==rtf_keyword::rkw_title) 0149 in_title=true; 0150 break; 0151 } 0152 case '{': ++depth; ++buf_in; break; 0153 case '}': --depth; ++buf_in; in_title=false; break; 0154 default: if (in_title) title+=*buf_in; ++buf_in; break; 0155 } 0156 } 0157 break; 0158 } 0159 // color table 0160 case rtf_keyword::rkw_colortbl: 0161 { 0162 color clr; 0163 while (*buf_in!='}') 0164 { 0165 switch (*buf_in) 0166 { 0167 case '\\': 0168 { 0169 rtf_keyword kw(++buf_in); 0170 switch (kw.keyword()) 0171 { 0172 case rtf_keyword::rkw_red: 0173 clr.r=kw.parameter(); 0174 break; 0175 case rtf_keyword::rkw_green: 0176 clr.g=kw.parameter(); 0177 break; 0178 case rtf_keyword::rkw_blue: 0179 clr.b=kw.parameter(); 0180 break; 0181 default: break; 0182 } 0183 break; 0184 } 0185 case ';': 0186 colortbl.push_back(clr); 0187 ++buf_in; 0188 break; 0189 default: 0190 ++buf_in; 0191 break; 0192 } 0193 } 0194 ++buf_in; 0195 break; 0196 } 0197 // font table 0198 case rtf_keyword::rkw_fonttbl: 0199 { 0200 font fnt; 0201 int font_num=0; 0202 bool full_name=false; 0203 bool in_font=false; 0204 while (! (*buf_in=='}' && !in_font)) 0205 { 0206 switch (*buf_in) 0207 { 0208 case '\\': 0209 { 0210 rtf_keyword kw(++buf_in); 0211 if (kw.is_control_char() && kw.control_char()=='*') 0212 skip_group(buf_in); 0213 else 0214 switch (kw.keyword()) 0215 { 0216 case rtf_keyword::rkw_f: 0217 font_num=kw.parameter(); 0218 break; 0219 case rtf_keyword::rkw_fprq: 0220 fnt.pitch=kw.parameter(); 0221 break; 0222 case rtf_keyword::rkw_fcharset: 0223 fnt.charset=kw.parameter(); 0224 break; 0225 case rtf_keyword::rkw_fnil: 0226 fnt.family=font::ff_none; 0227 break; 0228 case rtf_keyword::rkw_froman: 0229 fnt.family=font::ff_serif; 0230 break; 0231 case rtf_keyword::rkw_fswiss: 0232 fnt.family=font::ff_sans_serif; 0233 break; 0234 case rtf_keyword::rkw_fmodern: 0235 fnt.family=font::ff_monospace; 0236 break; 0237 case rtf_keyword::rkw_fscript: 0238 fnt.family=font::ff_cursive; 0239 break; 0240 case rtf_keyword::rkw_fdecor: 0241 fnt.family=font::ff_fantasy; 0242 break; 0243 default: break; 0244 } 0245 break; 0246 } 0247 case '{': 0248 in_font=true; 0249 ++buf_in; 0250 break; 0251 case '}': 0252 in_font=false; 0253 fonttbl.insert(std::make_pair(font_num, fnt)); 0254 fnt=font(); 0255 full_name=false; 0256 ++buf_in; 0257 break; 0258 case ';': 0259 full_name=true; 0260 ++buf_in; 0261 break; 0262 default: 0263 if (!full_name && in_font) 0264 fnt.name+=*buf_in; 0265 ++buf_in; 0266 break; 0267 } 0268 } 0269 ++buf_in; 0270 break; 0271 } 0272 // special characters 0273 case rtf_keyword::rkw_line: case rtf_keyword::rkw_softline: 0274 par_html.write("<br>"); 0275 break; 0276 case rtf_keyword::rkw_tab: 0277 par_html.write(" "); // maybe, this can be done better 0278 break; 0279 case rtf_keyword::rkw_enspace: case rtf_keyword::rkw_emspace: 0280 par_html.write(" "); 0281 break; 0282 case rtf_keyword::rkw_qmspace: 0283 par_html.write(" "); 0284 break; 0285 case rtf_keyword::rkw_endash: 0286 par_html.write("–"); 0287 break; 0288 case rtf_keyword::rkw_emdash: 0289 par_html.write("—"); 0290 break; 0291 case rtf_keyword::rkw_bullet: 0292 par_html.write("•"); 0293 break; 0294 case rtf_keyword::rkw_lquote: 0295 par_html.write("‘"); 0296 break; 0297 case rtf_keyword::rkw_rquote: 0298 par_html.write("’"); 0299 break; 0300 case rtf_keyword::rkw_ldblquote: 0301 par_html.write("“"); 0302 break; 0303 case rtf_keyword::rkw_rdblquote: 0304 par_html.write("”"); 0305 break; 0306 // paragraph formatting 0307 case rtf_keyword::rkw_ql: 0308 cur_options.papAlign=formatting_options::align_left; 0309 break; 0310 case rtf_keyword::rkw_qr: 0311 cur_options.papAlign=formatting_options::align_right; 0312 break; 0313 case rtf_keyword::rkw_qc: 0314 cur_options.papAlign=formatting_options::align_center; 0315 break; 0316 case rtf_keyword::rkw_qj: 0317 cur_options.papAlign=formatting_options::align_justify; 0318 break; 0319 case rtf_keyword::rkw_fi: 0320 cur_options.papFirst=(int)rint(kw.parameter()/20); 0321 break; 0322 case rtf_keyword::rkw_li: 0323 cur_options.papLeft=(int)rint(kw.parameter()/20); 0324 break; 0325 case rtf_keyword::rkw_ri: 0326 cur_options.papRight=(int)rint(kw.parameter()/20); 0327 break; 0328 case rtf_keyword::rkw_sb: 0329 cur_options.papBefore=(int)rint(kw.parameter()/20); 0330 break; 0331 case rtf_keyword::rkw_sa: 0332 cur_options.papAfter=(int)rint(kw.parameter()/20); 0333 break; 0334 case rtf_keyword::rkw_pard: 0335 cur_options.papBefore=cur_options.papAfter=0; 0336 cur_options.papLeft=cur_options.papRight=0; 0337 cur_options.papFirst=0; 0338 cur_options.papAlign=formatting_options::align_left; 0339 cur_options.papInTbl=false; 0340 break; 0341 case rtf_keyword::rkw_par: 0342 case rtf_keyword::rkw_sect: 0343 t_str=cur_options.get_par_str()+par_html.str() 0344 +" "+par_html.close()+"</p>\n"; 0345 if (!bInTable) 0346 { 0347 html+=t_str; 0348 } 0349 else 0350 { 0351 if (cur_options.papInTbl) 0352 { 0353 tcCurCell->Text+=t_str; 0354 } 0355 else 0356 { 0357 html+=tblCurTable->make()+t_str; 0358 bInTable=false; 0359 tblCurTable=new table; 0360 } 0361 } 0362 par_html.clear(); 0363 break; 0364 // character formatting 0365 case rtf_keyword::rkw_super: 0366 cur_options.chpVAlign= 0367 kw.parameter()==0?formatting_options::va_normal 0368 :formatting_options::va_sup; 0369 break; 0370 case rtf_keyword::rkw_sub: 0371 cur_options.chpVAlign= 0372 kw.parameter()==0?formatting_options::va_normal 0373 :formatting_options::va_sub; 0374 break; 0375 case rtf_keyword::rkw_b: 0376 cur_options.chpBold=!(kw.parameter()==0); 0377 break; 0378 case rtf_keyword::rkw_i: 0379 cur_options.chpItalic=!(kw.parameter()==0); 0380 break; 0381 case rtf_keyword::rkw_ul: 0382 cur_options.chpUnderline=!(kw.parameter()==0); 0383 break; 0384 case rtf_keyword::rkw_ulnone: 0385 cur_options.chpUnderline=false; 0386 break; 0387 case rtf_keyword::rkw_fs: 0388 cur_options.chpFontSize=kw.parameter(); 0389 break; 0390 case rtf_keyword::rkw_cf: 0391 cur_options.chpFColor=colortbl[kw.parameter()]; 0392 break; 0393 case rtf_keyword::rkw_cb: 0394 cur_options.chpBColor=colortbl[kw.parameter()]; 0395 break; 0396 case rtf_keyword::rkw_highlight: 0397 cur_options.chpHighlight=kw.parameter(); 0398 break; 0399 case rtf_keyword::rkw_f: 0400 cur_options.chpFont=fonttbl[kw.parameter()]; 0401 break; 0402 case rtf_keyword::rkw_plain: 0403 cur_options.chpBold=cur_options.chpItalic 0404 =cur_options.chpUnderline=false; 0405 cur_options.chpVAlign=formatting_options::va_normal; 0406 cur_options.chpFontSize=cur_options.chpHighlight=0; 0407 cur_options.chpFColor=cur_options.chpBColor=color(); 0408 cur_options.chpFont=font(); 0409 break; 0410 // table formatting 0411 case rtf_keyword::rkw_intbl: 0412 cur_options.papInTbl=true; 0413 break; 0414 case rtf_keyword::rkw_trowd: 0415 CurCellDefs=CellDefsList.insert(CellDefsList.end(), 0416 table_cell_defs()); 0417 case rtf_keyword::rkw_row: 0418 if (!trCurRow->Cells.empty()) 0419 { 0420 trCurRow->CellDefs=CurCellDefs; 0421 if (trCurRow->Left==-1000) 0422 trCurRow->Left=iLastRowLeft; 0423 if (trCurRow->Height==-1000) 0424 trCurRow->Height=iLastRowHeight; 0425 tblCurTable->push_back(trCurRow); 0426 trCurRow=new table_row; 0427 } 0428 bInTable=true; 0429 break; 0430 case rtf_keyword::rkw_cell: 0431 t_str=cur_options.get_par_str()+par_html.str() 0432 +" "+par_html.close()+"</p>\n"; 0433 tcCurCell->Text+=t_str; 0434 par_html.clear(); 0435 trCurRow->Cells.push_back(tcCurCell); 0436 tcCurCell=new table_cell; 0437 break; 0438 case rtf_keyword::rkw_cellx: 0439 tcdCurCellDef->Right=kw.parameter(); 0440 CurCellDefs->push_back(tcdCurCellDef); 0441 tcdCurCellDef=new table_cell_def; 0442 break; 0443 case rtf_keyword::rkw_trleft: 0444 trCurRow->Left=kw.parameter(); 0445 iLastRowLeft=kw.parameter(); 0446 break; 0447 case rtf_keyword::rkw_trrh: 0448 trCurRow->Height=kw.parameter(); 0449 iLastRowHeight=kw.parameter(); 0450 break; 0451 case rtf_keyword::rkw_clvmgf: 0452 tcdCurCellDef->FirstMerged=true; 0453 break; 0454 case rtf_keyword::rkw_clvmrg: 0455 tcdCurCellDef->Merged=true; 0456 break; 0457 case rtf_keyword::rkw_clbrdrb: 0458 tcdCurCellDef->BorderBottom=true; 0459 tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderBottom); 0460 break; 0461 case rtf_keyword::rkw_clbrdrt: 0462 tcdCurCellDef->BorderTop=true; 0463 tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderTop); 0464 break; 0465 case rtf_keyword::rkw_clbrdrl: 0466 tcdCurCellDef->BorderLeft=true; 0467 tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderLeft); 0468 break; 0469 case rtf_keyword::rkw_clbrdrr: 0470 tcdCurCellDef->BorderRight=true; 0471 tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderRight); 0472 break; 0473 case rtf_keyword::rkw_brdrnone: 0474 if (tcdCurCellDef->ActiveBorder!=NULL) 0475 { 0476 *(tcdCurCellDef->ActiveBorder)=false; 0477 } 0478 break; 0479 case rtf_keyword::rkw_clvertalt: 0480 tcdCurCellDef->VAlign=table_cell_def::valign_top; 0481 break; 0482 case rtf_keyword::rkw_clvertalc: 0483 tcdCurCellDef->VAlign=table_cell_def::valign_center; 0484 break; 0485 case rtf_keyword::rkw_clvertalb: 0486 tcdCurCellDef->VAlign=table_cell_def::valign_bottom; 0487 break; 0488 // page formatting 0489 case rtf_keyword::rkw_paperw: 0490 // iDocWidth=kw.parameter(); 0491 break; 0492 case rtf_keyword::rkw_margl: 0493 // iMarginLeft=kw.parameter(); 0494 break; 0495 default: break; 0496 } 0497 } 0498 break; 0499 } 0500 case '{': 0501 // perform group opening actions here 0502 foStack.push(cur_options); 0503 ++buf_in; 0504 break; 0505 case '}': 0506 // perform group closing actions here 0507 cur_options=foStack.top(); 0508 foStack.pop(); 0509 ++buf_in; 0510 break; 0511 case 13: 0512 case 10: 0513 ++buf_in; 0514 break; 0515 case '<': 0516 par_html.write("<"); 0517 ++buf_in; 0518 break; 0519 case '>': 0520 par_html.write(">"); 0521 ++buf_in; 0522 break; 0523 /* case ' ': 0524 par_html.write(" "); 0525 ++buf_in; 0526 break;*/ 0527 default: 0528 par_html.write(*buf_in++); 0529 } 0530 } 0531 0532 t_str=cur_options.get_par_str()+par_html.str() 0533 +" "+par_html.close()+"</p>\n"; 0534 html+=t_str; 0535 0536 delete tcCurCell; 0537 delete trCurRow; 0538 delete tblCurTable; 0539 delete tcdCurCellDef; 0540 0541 return QString::fromStdString(html); 0542 }