File indexing completed on 2024-04-28 11:21:08
0001 /* markdown: a C implementation of John Gruber's Markdown markup language. 0002 * 0003 * Copyright (C) 2007 David L Parsons. 0004 * The redistribution terms are provided in the COPYRIGHT file that must 0005 * be distributed with this source code. 0006 */ 0007 #include <stdio.h> 0008 #include <string.h> 0009 #include <stdarg.h> 0010 #include <stdlib.h> 0011 #include <time.h> 0012 #include <ctype.h> 0013 0014 #include "config.h" 0015 0016 #include "cstring.h" 0017 #include "markdown.h" 0018 #include "amalloc.h" 0019 0020 typedef int (*stfu)(const void*,const void*); 0021 typedef void (*spanhandler)(MMIOT*,int); 0022 0023 /* forward declarations */ 0024 static void text(MMIOT *f); 0025 static Paragraph *display(Paragraph*, MMIOT*); 0026 0027 /* externals from markdown.c */ 0028 int __mkd_footsort(Footnote *, Footnote *); 0029 0030 /* 0031 * push text into the generator input buffer 0032 */ 0033 static void 0034 push(char *bfr, int size, MMIOT *f) 0035 { 0036 while ( size-- > 0 ) 0037 EXPAND(f->in) = *bfr++; 0038 } 0039 0040 0041 /* 0042 * push a character into the generator input buffer 0043 */ 0044 static void 0045 pushc(char c, MMIOT *f) 0046 { 0047 EXPAND(f->in) = c; 0048 } 0049 0050 0051 /* look <i> characters ahead of the cursor. 0052 */ 0053 static inline int 0054 peek(MMIOT *f, int i) 0055 { 0056 0057 i += (f->isp-1); 0058 0059 return (i >= 0) && (i < S(f->in)) ? (unsigned char)T(f->in)[i] : EOF; 0060 } 0061 0062 0063 /* pull a byte from the input buffer 0064 */ 0065 static inline unsigned int 0066 pull(MMIOT *f) 0067 { 0068 return ( f->isp < S(f->in) ) ? (unsigned char)T(f->in)[f->isp++] : EOF; 0069 } 0070 0071 0072 /* return a pointer to the current position in the input buffer. 0073 */ 0074 static inline char* 0075 cursor(MMIOT *f) 0076 { 0077 return T(f->in) + f->isp; 0078 } 0079 0080 0081 static inline int 0082 isthisspace(MMIOT *f, int i) 0083 { 0084 int c = peek(f, i); 0085 0086 if ( c == EOF ) 0087 return 1; 0088 if ( c & 0x80 ) 0089 return 0; 0090 return isspace(c) || (c < ' '); 0091 } 0092 0093 0094 static inline int 0095 isthisalnum(MMIOT *f, int i) 0096 { 0097 int c = peek(f, i); 0098 0099 return (c != EOF) && isalnum(c); 0100 } 0101 0102 0103 static inline int 0104 isthisnonword(MMIOT *f, int i) 0105 { 0106 return isthisspace(f, i) || ispunct(peek(f,i)); 0107 } 0108 0109 0110 /* return/set the current cursor position 0111 * (when setting the current cursor position we also need to flush the 0112 * last character written cache) 0113 */ 0114 #define mmiotseek(f,x) ((f->isp = x), (f->last = 0)) 0115 #define mmiottell(f) (f->isp) 0116 0117 0118 /* move n characters forward ( or -n characters backward) in the input buffer. 0119 */ 0120 static void 0121 shift(MMIOT *f, int i) 0122 { 0123 if (f->isp + i >= 0 ) 0124 f->isp += i; 0125 } 0126 0127 0128 /* Qchar() 0129 */ 0130 static void 0131 Qchar(int c, MMIOT *f) 0132 { 0133 block *cur; 0134 0135 if ( S(f->Q) == 0 ) { 0136 cur = &EXPAND(f->Q); 0137 memset(cur, 0, sizeof *cur); 0138 cur->b_type = bTEXT; 0139 } 0140 else 0141 cur = &T(f->Q)[S(f->Q)-1]; 0142 0143 EXPAND(cur->b_text) = c; 0144 } 0145 0146 0147 /* Qstring() 0148 */ 0149 static void 0150 Qstring(char *s, MMIOT *f) 0151 { 0152 while (*s) 0153 Qchar(*s++, f); 0154 } 0155 0156 0157 /* Qwrite() 0158 */ 0159 static void 0160 Qwrite(char *s, int size, MMIOT *f) 0161 { 0162 while (size-- > 0) 0163 Qchar(*s++, f); 0164 } 0165 0166 0167 /* Qprintf() 0168 */ 0169 static void 0170 Qprintf(MMIOT *f, char *fmt, ...) 0171 { 0172 char bfr[80]; 0173 va_list ptr; 0174 0175 va_start(ptr,fmt); 0176 vsnprintf(bfr, sizeof bfr, fmt, ptr); 0177 va_end(ptr); 0178 Qstring(bfr, f); 0179 } 0180 0181 0182 /* Qanchor() prints out a suitable-for-id-tag version of a string 0183 */ 0184 static void 0185 Qanchor(struct line *p, MMIOT *f) 0186 { 0187 mkd_string_to_anchor(T(p->text), S(p->text), 0188 (mkd_sta_function_t)Qchar, f, 1, f); 0189 } 0190 0191 0192 /* Qem() 0193 */ 0194 static void 0195 Qem(MMIOT *f, char c, int count) 0196 { 0197 block *p = &EXPAND(f->Q); 0198 0199 memset(p, 0, sizeof *p); 0200 p->b_type = (c == '*') ? bSTAR : bUNDER; 0201 p->b_char = c; 0202 p->b_count = count; 0203 0204 memset(&EXPAND(f->Q), 0, sizeof(block)); 0205 } 0206 0207 0208 /* generate html from a markup fragment 0209 */ 0210 void 0211 ___mkd_reparse(char *bfr, int size, mkd_flag_t flags, MMIOT *f, char *esc) 0212 { 0213 MMIOT sub; 0214 struct escaped e; 0215 0216 ___mkd_initmmiot(&sub, f->footnotes); 0217 0218 sub.flags = f->flags | flags; 0219 sub.cb = f->cb; 0220 sub.ref_prefix = f->ref_prefix; 0221 0222 if ( esc ) { 0223 sub.esc = &e; 0224 e.up = f->esc; 0225 e.text = esc; 0226 } 0227 else 0228 sub.esc = f->esc; 0229 0230 push(bfr, size, &sub); 0231 pushc(0, &sub); 0232 S(sub.in)--; 0233 0234 text(&sub); 0235 ___mkd_emblock(&sub); 0236 0237 Qwrite(T(sub.out), S(sub.out), f); 0238 /* inherit the last character printed from the reparsed 0239 * text; this way superscripts can work when they're 0240 * applied to something embedded in a link 0241 */ 0242 f->last = sub.last; 0243 0244 ___mkd_freemmiot(&sub, f->footnotes); 0245 } 0246 0247 0248 /* 0249 * check the escape list for special cases 0250 */ 0251 static int 0252 escaped(MMIOT *f, char c) 0253 { 0254 struct escaped *thing = f->esc; 0255 0256 while ( thing ) { 0257 if ( strchr(thing->text, c) ) 0258 return 1; 0259 thing = thing->up; 0260 } 0261 return 0; 0262 } 0263 0264 0265 /* 0266 * write out a url, escaping problematic characters 0267 */ 0268 static void 0269 puturl(char *s, int size, MMIOT *f, int display) 0270 { 0271 unsigned char c; 0272 0273 while ( size-- > 0 ) { 0274 c = *s++; 0275 0276 if ( c == '\\' && size-- > 0 ) { 0277 c = *s++; 0278 0279 if ( !( ispunct(c) || isspace(c) ) ) 0280 Qchar('\\', f); 0281 } 0282 0283 if ( c == '&' ) 0284 Qstring("&", f); 0285 else if ( c == '<' ) 0286 Qstring("<", f); 0287 else if ( c == '"' ) 0288 Qstring("%22", f); 0289 else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) ) 0290 Qchar(c, f); 0291 else if ( c == MKD_EOLN ) /* untokenize hard return */ 0292 Qstring(" ", f); 0293 else 0294 Qprintf(f, "%%%02X", c); 0295 } 0296 } 0297 0298 0299 /* advance forward until the next character is not whitespace 0300 */ 0301 static int 0302 eatspace(MMIOT *f) 0303 { 0304 int c; 0305 0306 for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) ) 0307 ; 0308 return c; 0309 } 0310 0311 0312 /* (match (a (nested (parenthetical (string.))))) 0313 */ 0314 static int 0315 parenthetical(int in, int out, MMIOT *f) 0316 { 0317 int size, indent, c; 0318 0319 for ( indent=1,size=0; indent; size++ ) { 0320 if ( (c = pull(f)) == EOF ) 0321 return EOF; 0322 else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) { 0323 ++size; 0324 pull(f); 0325 } 0326 else if ( c == in ) 0327 ++indent; 0328 else if ( c == out ) 0329 --indent; 0330 } 0331 return size ? (size-1) : 0; 0332 } 0333 0334 0335 /* extract a []-delimited label from the input stream. 0336 */ 0337 static int 0338 linkylabel(MMIOT *f, Cstring *res) 0339 { 0340 char *ptr = cursor(f); 0341 int size; 0342 0343 if ( (size = parenthetical('[',']',f)) != EOF ) { 0344 T(*res) = ptr; 0345 S(*res) = size; 0346 return 1; 0347 } 0348 return 0; 0349 } 0350 0351 0352 /* see if the quote-prefixed linky segment is actually a title. 0353 */ 0354 static int 0355 linkytitle(MMIOT *f, char quote, Footnote *ref) 0356 { 0357 int whence = mmiottell(f); 0358 char *title = cursor(f); 0359 char *e; 0360 register int c; 0361 0362 while ( (c = pull(f)) != EOF ) { 0363 e = cursor(f); 0364 if ( c == quote ) { 0365 if ( (c = eatspace(f)) == ')' ) { 0366 T(ref->title) = 1+title; 0367 S(ref->title) = (e-title)-2; 0368 return 1; 0369 } 0370 } 0371 } 0372 mmiotseek(f, whence); 0373 return 0; 0374 } 0375 0376 0377 /* extract a =HHHxWWW size from the input stream 0378 */ 0379 static int 0380 linkysize(MMIOT *f, Footnote *ref) 0381 { 0382 int height=0, width=0; 0383 int whence = mmiottell(f); 0384 int c; 0385 0386 if ( isspace(peek(f,0)) ) { 0387 pull(f); /* eat '=' */ 0388 0389 for ( c = pull(f); isdigit(c); c = pull(f)) 0390 width = (width * 10) + (c - '0'); 0391 0392 if ( c == 'x' ) { 0393 for ( c = pull(f); isdigit(c); c = pull(f)) 0394 height = (height*10) + (c - '0'); 0395 0396 if ( isspace(c) ) 0397 c = eatspace(f); 0398 0399 if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) { 0400 ref->height = height; 0401 ref->width = width; 0402 return 1; 0403 } 0404 } 0405 } 0406 mmiotseek(f, whence); 0407 return 0; 0408 } 0409 0410 0411 /* extract a <...>-encased url from the input stream. 0412 * (markdown 1.0.2b8 compatibility; older versions 0413 * of markdown treated the < and > as syntactic 0414 * sugar that didn't have to be there. 1.0.2b8 0415 * requires a closing >, and then falls into the 0416 * title or closing ) 0417 */ 0418 static int 0419 linkybroket(MMIOT *f, int image, Footnote *p) 0420 { 0421 int c; 0422 int good = 0; 0423 0424 T(p->link) = cursor(f); 0425 for ( S(p->link)=0; (c = pull(f)) != '>'; ++S(p->link) ) { 0426 /* pull in all input until a '>' is found, or die trying. 0427 */ 0428 if ( c == EOF ) 0429 return 0; 0430 else if ( (c == '\\') && ispunct(peek(f,2)) ) { 0431 ++S(p->link); 0432 pull(f); 0433 } 0434 } 0435 0436 c = eatspace(f); 0437 0438 /* next nonspace needs to be a title, a size, or ) 0439 */ 0440 if ( ( c == '\'' || c == '"' ) && linkytitle(f,c,p) ) 0441 good=1; 0442 else if ( image && (c == '=') && linkysize(f,p) ) 0443 good=1; 0444 else 0445 good=( c == ')' ); 0446 0447 if ( good ) { 0448 if ( peek(f, 1) == ')' ) 0449 pull(f); 0450 0451 ___mkd_tidy(&p->link); 0452 } 0453 0454 return good; 0455 } /* linkybroket */ 0456 0457 0458 /* extract a (-prefixed url from the input stream. 0459 * the label is either of the format `<link>`, where I 0460 * extract until I find a >, or it is of the format 0461 * `text`, where I extract until I reach a ')', a quote, 0462 * or (if image) a '=' 0463 */ 0464 static int 0465 linkyurl(MMIOT *f, int image, Footnote *p) 0466 { 0467 int c; 0468 int mayneedtotrim=0; 0469 0470 if ( (c = eatspace(f)) == EOF ) 0471 return 0; 0472 0473 if ( c == '<' ) { 0474 pull(f); 0475 if ( !is_flag_set(f->flags, MKD_1_COMPAT) ) 0476 return linkybroket(f,image,p); 0477 mayneedtotrim=1; 0478 } 0479 0480 T(p->link) = cursor(f); 0481 for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) { 0482 if ( c == EOF ) 0483 return 0; 0484 else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) ) 0485 break; 0486 else if ( image && (c == '=') && linkysize(f, p) ) 0487 break; 0488 else if ( (c == '\\') && ispunct(peek(f,2)) ) { 0489 ++S(p->link); 0490 pull(f); 0491 } 0492 pull(f); 0493 } 0494 if ( peek(f, 1) == ')' ) 0495 pull(f); 0496 0497 ___mkd_tidy(&p->link); 0498 0499 if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') ) 0500 --S(p->link); 0501 0502 return 1; 0503 } 0504 0505 0506 0507 /* prefixes for <automatic links> 0508 */ 0509 static struct _protocol { 0510 char *name; 0511 int nlen; 0512 } protocol[] = { 0513 #define _aprotocol(x) { x, (sizeof x)-1 } 0514 _aprotocol( "https:" ), 0515 _aprotocol( "http:" ), 0516 _aprotocol( "news:" ), 0517 _aprotocol( "ftp:" ), 0518 #undef _aprotocol 0519 }; 0520 #define NRPROTOCOLS (sizeof protocol / sizeof protocol[0]) 0521 0522 0523 static int 0524 isautoprefix(char *text, int size) 0525 { 0526 int i; 0527 struct _protocol *p; 0528 0529 for (i=0, p=protocol; i < NRPROTOCOLS; i++, p++) 0530 if ( (size >= p->nlen) && strncasecmp(text, p->name, p->nlen) == 0 ) 0531 return 1; 0532 return 0; 0533 } 0534 0535 0536 /* 0537 * all the tag types that linkylinky can produce are 0538 * defined by this structure. 0539 */ 0540 typedef struct linkytype { 0541 char *pat; 0542 int szpat; 0543 char *link_pfx; /* tag prefix and link pointer (eg: "<a href="\"" */ 0544 char *link_sfx; /* link suffix (eg: "\"" */ 0545 int WxH; /* this tag allows width x height arguments */ 0546 char *text_pfx; /* text prefix (eg: ">" */ 0547 char *text_sfx; /* text suffix (eg: "</a>" */ 0548 int flags; /* reparse flags */ 0549 int kind; /* tag is url or something else? */ 0550 #define IS_URL 0x01 0551 } linkytype; 0552 0553 static linkytype imaget = { 0, 0, "<img src=\"", "\"", 0554 1, " alt=\"", "\" />", MKD_NOIMAGE|MKD_TAGTEXT, IS_URL }; 0555 static linkytype linkt = { 0, 0, "<a href=\"", "\"", 0556 0, ">", "</a>", MKD_NOLINKS, IS_URL }; 0557 0558 /* 0559 * pseudo-protocols for [][]; 0560 * 0561 * id: generates <a id="link">tag</a> 0562 * class: generates <span class="link">tag</span> 0563 * raw: just dump the link without any processing 0564 */ 0565 static linkytype specials[] = { 0566 { "id:", 3, "<span id=\"", "\"", 0, ">", "</span>", 0, 0 }, 0567 { "raw:", 4, 0, 0, 0, 0, 0, MKD_NOHTML, 0 }, 0568 { "lang:", 5, "<span lang=\"", "\"", 0, ">", "</span>", 0, 0 }, 0569 { "abbr:", 5, "<abbr title=\"", "\"", 0, ">", "</abbr>", 0, 0 }, 0570 { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0, 0 }, 0571 } ; 0572 0573 #define NR(x) (sizeof x / sizeof x[0]) 0574 0575 /* see if t contains one of our pseudo-protocols. 0576 */ 0577 static linkytype * 0578 pseudo(Cstring t) 0579 { 0580 int i; 0581 linkytype *r; 0582 0583 for ( i=0, r=specials; i < NR(specials); i++,r++ ) { 0584 if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) ) 0585 return r; 0586 } 0587 return 0; 0588 } 0589 0590 0591 /* print out the start of an `img' or `a' tag, applying callbacks as needed. 0592 */ 0593 static void 0594 printlinkyref(MMIOT *f, linkytype *tag, char *link, int size) 0595 { 0596 char *edit; 0597 0598 if ( is_flag_set(f->flags, IS_LABEL) ) 0599 return; 0600 0601 Qstring(tag->link_pfx, f); 0602 0603 if ( tag->kind & IS_URL ) { 0604 if ( f->cb && f->cb->e_url && (edit = (*f->cb->e_url)(link, size, f->cb->e_data)) ) { 0605 puturl(edit, strlen(edit), f, 0); 0606 if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data); 0607 } 0608 else 0609 puturl(link + tag->szpat, size - tag->szpat, f, 0); 0610 } 0611 else 0612 ___mkd_reparse(link + tag->szpat, size - tag->szpat, MKD_TAGTEXT, f, 0); 0613 0614 Qstring(tag->link_sfx, f); 0615 0616 if ( f->cb && f->cb->e_flags && (edit = (*f->cb->e_flags)(link, size, f->cb->e_data)) ) { 0617 Qchar(' ', f); 0618 Qstring(edit, f); 0619 if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data); 0620 } 0621 } /* printlinkyref */ 0622 0623 0624 /* helper function for php markdown extra footnotes; allow the user to 0625 * define a prefix tag instead of just `fn` 0626 */ 0627 static char * 0628 p_or_nothing(p) 0629 MMIOT *p; 0630 { 0631 return p->ref_prefix ? p->ref_prefix : "fn"; 0632 } 0633 0634 0635 /* php markdown extra/daring fireball style print footnotes 0636 */ 0637 static int 0638 extra_linky(MMIOT *f, Cstring text, Footnote *ref) 0639 { 0640 if ( ref->flags & REFERENCED ) 0641 return 0; 0642 0643 if ( f->flags & IS_LABEL ) 0644 ___mkd_reparse(T(text), S(text), linkt.flags, f, 0); 0645 else { 0646 ref->flags |= REFERENCED; 0647 ref->refnumber = ++ f->footnotes->reference; 0648 Qprintf(f, "<sup id=\"%sref:%d\"><a href=\"#%s:%d\" rel=\"footnote\">%d</a></sup>", 0649 p_or_nothing(f), ref->refnumber, 0650 p_or_nothing(f), ref->refnumber, ref->refnumber); 0651 } 0652 return 1; 0653 } /* extra_linky */ 0654 0655 0656 0657 /* check a url (or url fragment to see that it begins with a known good 0658 * protocol (or no protocol at all) 0659 */ 0660 static int 0661 safelink(Cstring link) 0662 { 0663 char *p, *colon; 0664 0665 if ( T(link) == 0 ) /* no link; safe */ 0666 return 1; 0667 0668 p = T(link); 0669 if ( (colon = memchr(p, ':', S(link))) == 0 ) 0670 return 1; /* no protocol specified: safe */ 0671 0672 if ( !isalpha(*p) ) /* protocol/method is [alpha][alnum or '+.-'] */ 0673 return 1; 0674 while ( ++p < colon ) 0675 if ( !(isalnum(*p) || *p == '.' || *p == '+' || *p == '-') ) 0676 return 1; 0677 0678 return isautoprefix(T(link), S(link)); 0679 } 0680 0681 0682 /* print out a linky (or fail if it's Not Allowed) 0683 */ 0684 static int 0685 linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref) 0686 { 0687 linkytype *tag; 0688 0689 0690 if ( image ) 0691 tag = &imaget; 0692 else if ( tag = pseudo(ref->link) ) { 0693 if ( is_flag_set(f->flags, MKD_NO_EXT) || is_flag_set(f->flags, MKD_SAFELINK) ) 0694 return 0; 0695 } 0696 else if ( is_flag_set(f->flags, MKD_SAFELINK) && !safelink(ref->link) ) 0697 /* if MKD_SAFELINK, only accept links that are local or 0698 * a well-known protocol 0699 */ 0700 return 0; 0701 else 0702 tag = &linkt; 0703 0704 if ( f->flags & tag->flags ) 0705 return 0; 0706 0707 if ( is_flag_set(f->flags, IS_LABEL) ) 0708 ___mkd_reparse(T(text), S(text), tag->flags, f, 0); 0709 else if ( tag->link_pfx ) { 0710 printlinkyref(f, tag, T(ref->link), S(ref->link)); 0711 0712 if ( tag->WxH ) { 0713 if ( ref->height ) Qprintf(f," height=\"%d\"", ref->height); 0714 if ( ref->width ) Qprintf(f, " width=\"%d\"", ref->width); 0715 } 0716 0717 if ( S(ref->title) ) { 0718 Qstring(" title=\"", f); 0719 ___mkd_reparse(T(ref->title), S(ref->title), MKD_TAGTEXT, f, 0); 0720 Qchar('"', f); 0721 } 0722 0723 Qstring(tag->text_pfx, f); 0724 ___mkd_reparse(T(text), S(text), tag->flags, f, 0); 0725 Qstring(tag->text_sfx, f); 0726 } 0727 else 0728 Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f); 0729 0730 return 1; 0731 } /* linkyformat */ 0732 0733 0734 /* 0735 * process embedded links and images 0736 */ 0737 static int 0738 linkylinky(int image, MMIOT *f) 0739 { 0740 int start = mmiottell(f); 0741 Cstring name; 0742 Footnote key, *ref; 0743 0744 int status = 0; 0745 int extra_footnote = 0; 0746 0747 CREATE(name); 0748 memset(&key, 0, sizeof key); 0749 0750 if ( linkylabel(f, &name) ) { 0751 if ( peek(f,1) == '(' ) { 0752 pull(f); 0753 if ( linkyurl(f, image, &key) ) 0754 status = linkyformat(f, name, image, &key); 0755 } 0756 else { 0757 int goodlink, implicit_mark = mmiottell(f); 0758 0759 if ( isspace(peek(f,1)) ) 0760 pull(f); 0761 0762 if ( peek(f,1) == '[' ) { 0763 pull(f); /* consume leading '[' */ 0764 goodlink = linkylabel(f, &key.tag); 0765 } 0766 else { 0767 /* new markdown implicit name syntax doesn't 0768 * require a second [] 0769 */ 0770 mmiotseek(f, implicit_mark); 0771 goodlink = !is_flag_set(f->flags, MKD_1_COMPAT); 0772 0773 if ( is_flag_set(f->flags, MKD_EXTRA_FOOTNOTE) && (!image) && S(name) && T(name)[0] == '^' ) 0774 extra_footnote = 1; 0775 } 0776 0777 if ( goodlink ) { 0778 if ( !S(key.tag) ) { 0779 DELETE(key.tag); 0780 T(key.tag) = T(name); 0781 S(key.tag) = S(name); 0782 } 0783 0784 if ( ref = bsearch(&key, T(f->footnotes->note), 0785 S(f->footnotes->note), 0786 sizeof key, (stfu)__mkd_footsort) ) { 0787 if ( extra_footnote ) 0788 status = extra_linky(f,name,ref); 0789 else 0790 status = linkyformat(f, name, image, ref); 0791 } 0792 } 0793 } 0794 } 0795 0796 DELETE(name); 0797 ___mkd_freefootnote(&key); 0798 0799 if ( status == 0 ) 0800 mmiotseek(f, start); 0801 0802 return status; 0803 } 0804 0805 0806 /* write a character to output, doing text escapes ( & -> &, 0807 * > -> > < -> < ) 0808 */ 0809 static void 0810 cputc(int c, MMIOT *f) 0811 { 0812 switch (c) { 0813 case '&': Qstring("&", f); break; 0814 case '>': Qstring(">", f); break; 0815 case '<': Qstring("<", f); break; 0816 default : Qchar(c, f); break; 0817 } 0818 } 0819 0820 0821 /* 0822 * convert an email address to a string of nonsense 0823 */ 0824 static void 0825 mangle(char *s, int len, MMIOT *f) 0826 { 0827 while ( len-- > 0 ) { 0828 #if DEBIAN_GLITCH 0829 Qprintf(f, "&#%02d;", *((unsigned char*)(s++)) ); 0830 #else 0831 Qstring("&#", f); 0832 Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) ); 0833 #endif 0834 } 0835 } 0836 0837 0838 /* nrticks() -- count up a row of tick marks 0839 */ 0840 static int 0841 nrticks(int offset, int tickchar, MMIOT *f) 0842 { 0843 int tick = 0; 0844 0845 while ( peek(f, offset+tick) == tickchar ) tick++; 0846 0847 return tick; 0848 } /* nrticks */ 0849 0850 0851 /* matchticks() -- match a certain # of ticks, and if that fails 0852 * match the largest subset of those ticks. 0853 * 0854 * if a subset was matched, return the # of ticks 0855 * that were matched. 0856 */ 0857 static int 0858 matchticks(MMIOT *f, int tickchar, int ticks, int *endticks) 0859 { 0860 int size, count, c; 0861 int subsize=0, subtick=0; 0862 0863 *endticks = ticks; 0864 for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) { 0865 if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) { 0866 if ( count == ticks ) 0867 return size; 0868 else if ( count ) { 0869 if ( (count > subtick) && (count < ticks) ) { 0870 subsize = size; 0871 subtick = count; 0872 } 0873 size += count; 0874 } 0875 } 0876 } 0877 if ( subsize ) { 0878 *endticks = subtick; 0879 return subsize; 0880 } 0881 return 0; 0882 } /* matchticks */ 0883 0884 0885 /* code() -- write a string out as code. The only characters that have 0886 * special meaning in a code block are * `<' and `&' , which 0887 * are /always/ expanded to < and & 0888 */ 0889 static void 0890 code(MMIOT *f, char *s, int length) 0891 { 0892 int i,c; 0893 0894 for ( i=0; i < length; i++ ) 0895 if ( (c = s[i]) == MKD_EOLN) /* expand back to 2 spaces */ 0896 Qstring(" ", f); 0897 else if ( c == '\\' && (i < length-1) && escaped(f, s[i+1]) ) 0898 cputc(s[++i], f); 0899 else 0900 cputc(c, f); 0901 } /* code */ 0902 0903 /* delspan() -- write out a chunk of text, blocking with <del>...</del> 0904 */ 0905 static void 0906 delspan(MMIOT *f, int size) 0907 { 0908 Qstring("<del>", f); 0909 ___mkd_reparse(cursor(f)-1, size, 0, f, 0); 0910 Qstring("</del>", f); 0911 } 0912 0913 0914 /* codespan() -- write out a chunk of text as code, trimming one 0915 * space off the front and/or back as appropriate. 0916 */ 0917 static void 0918 codespan(MMIOT *f, int size) 0919 { 0920 int i=0; 0921 0922 if ( size > 1 && peek(f, size-1) == ' ' ) --size; 0923 if ( peek(f,i) == ' ' ) ++i, --size; 0924 0925 Qstring("<code>", f); 0926 code(f, cursor(f)+(i-1), size); 0927 Qstring("</code>", f); 0928 } /* codespan */ 0929 0930 0931 /* before letting a tag through, validate against 0932 * MKD_NOLINKS and MKD_NOIMAGE 0933 */ 0934 static int 0935 forbidden_tag(MMIOT *f) 0936 { 0937 int c = toupper(peek(f, 1)); 0938 0939 if ( is_flag_set(f->flags, MKD_NOHTML) ) 0940 return 1; 0941 0942 if ( c == 'A' && is_flag_set(f->flags, MKD_NOLINKS) && !isthisalnum(f,2) ) 0943 return 1; 0944 if ( c == 'I' && is_flag_set(f->flags, MKD_NOIMAGE) 0945 && strncasecmp(cursor(f)+1, "MG", 2) == 0 0946 && !isthisalnum(f,4) ) 0947 return 1; 0948 return 0; 0949 } 0950 0951 0952 /* Check a string to see if it looks like a mail address 0953 * "looks like a mail address" means alphanumeric + some 0954 * specials, then a `@`, then alphanumeric + some specials, 0955 * but with a `.` 0956 */ 0957 static int 0958 maybe_address(char *p, int size) 0959 { 0960 int ok = 0; 0961 0962 for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size) 0963 ; 0964 0965 if ( ! (size && *p == '@') ) 0966 return 0; 0967 0968 --size, ++p; 0969 0970 if ( size && *p == '.' ) return 0; 0971 0972 for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size ) 0973 if ( *p == '.' && size > 1 ) ok = 1; 0974 0975 return size ? 0 : ok; 0976 } 0977 0978 0979 /* The size-length token at cursor(f) is either a mailto:, an 0980 * implicit mailto:, one of the approved url protocols, or just 0981 * plain old text. If it's a mailto: or an approved protocol, 0982 * linkify it, otherwise say "no" 0983 */ 0984 static int 0985 process_possible_link(MMIOT *f, int size) 0986 { 0987 int address= 0; 0988 int mailto = 0; 0989 char *text = cursor(f); 0990 0991 if ( is_flag_set(f->flags, MKD_NOLINKS) ) return 0; 0992 0993 if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) { 0994 /* if it says it's a mailto, it's a mailto -- who am 0995 * I to second-guess the user? 0996 */ 0997 address = 1; 0998 mailto = 7; /* 7 is the length of "mailto:"; we need this */ 0999 } 1000 else 1001 address = maybe_address(text, size); 1002 1003 if ( address ) { 1004 Qstring("<a href=\"", f); 1005 if ( !mailto ) { 1006 /* supply a mailto: protocol if one wasn't attached */ 1007 mangle("mailto:", 7, f); 1008 } 1009 mangle(text, size, f); 1010 Qstring("\">", f); 1011 mangle(text+mailto, size-mailto, f); 1012 Qstring("</a>", f); 1013 return 1; 1014 } 1015 else if ( isautoprefix(text, size) ) { 1016 printlinkyref(f, &linkt, text, size); 1017 Qchar('>', f); 1018 puturl(text,size,f, 1); 1019 Qstring("</a>", f); 1020 return 1; 1021 } 1022 return 0; 1023 } /* process_possible_link */ 1024 1025 1026 /* a < may be just a regular character, the start of an embedded html 1027 * tag, or the start of an <automatic link>. If it's an automatic 1028 * link, we also need to know if it's an email address because if it 1029 * is we need to mangle it in our futile attempt to cut down on the 1030 * spaminess of the rendered page. 1031 */ 1032 static int 1033 maybe_tag_or_link(MMIOT *f) 1034 { 1035 int c, size; 1036 int maybetag = 1; 1037 1038 if ( is_flag_set(f->flags, MKD_TAGTEXT) ) 1039 return 0; 1040 1041 for ( size=0; (c = peek(f, size+1)) != '>'; size++) { 1042 if ( c == EOF ) 1043 return 0; 1044 else if ( c == '\\' ) { 1045 maybetag=0; 1046 if ( peek(f, size+2) != EOF ) 1047 size++; 1048 } 1049 else if ( isspace(c) ) 1050 break; 1051 else if ( ! (c == '/' 1052 || (is_flag_set(f->flags, MKD_GITHUBTAGS) && (c == '-' || c == '_')) 1053 || isalnum(c) ) ) 1054 maybetag=0; 1055 } 1056 1057 if ( size ) { 1058 if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) { 1059 1060 /* It is not a html tag unless we find the closing '>' in 1061 * the same block. 1062 */ 1063 while ( (c = peek(f, size+1)) != '>' ) 1064 if ( c == EOF ) 1065 return 0; 1066 else 1067 size++; 1068 1069 if ( forbidden_tag(f) ) 1070 return 0; 1071 1072 Qchar('<', f); 1073 while ( ((c = peek(f, 1)) != EOF) && (c != '>') ) 1074 Qchar(pull(f), f); 1075 return 1; 1076 } 1077 else if ( !isspace(c) && process_possible_link(f, size) ) { 1078 shift(f, size+1); 1079 return 1; 1080 } 1081 } 1082 1083 return 0; 1084 } 1085 1086 1087 /* autolinking means that all inline html is <a href'ified>. A 1088 * autolink url is alphanumerics, slashes, periods, underscores, 1089 * the at sign, colon, and the % character. 1090 */ 1091 static int 1092 maybe_autolink(MMIOT *f) 1093 { 1094 register int c; 1095 int size; 1096 1097 /* greedily scan forward for the end of a legitimate link. 1098 */ 1099 for ( size=0; (c=peek(f, size+1)) != EOF; size++ ) { 1100 if ( c == '\\' ) { 1101 if ( peek(f, size+2) != EOF ) 1102 ++size; 1103 } 1104 else if ( c & 0x80 ) /* HACK: ignore utf-8 extended characters */ 1105 continue; 1106 else if ( isspace(c) || strchr("'\"()[]{}<>`", c) || c == MKD_EOLN ) 1107 break; 1108 } 1109 1110 if ( (size > 1) && process_possible_link(f, size) ) { 1111 shift(f, size); 1112 return 1; 1113 } 1114 return 0; 1115 } 1116 1117 1118 /* smartyquote code that's common for single and double quotes 1119 */ 1120 static int 1121 smartyquote(int *flags, char typeofquote, MMIOT *f) 1122 { 1123 int bit = (typeofquote == 's') ? 0x01 : 0x02; 1124 1125 if ( bit & (*flags) ) { 1126 if ( isthisnonword(f,1) ) { 1127 Qprintf(f, "&r%cquo;", typeofquote); 1128 (*flags) &= ~bit; 1129 return 1; 1130 } 1131 } 1132 else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) { 1133 Qprintf(f, "&l%cquo;", typeofquote); 1134 (*flags) |= bit; 1135 return 1; 1136 } 1137 return 0; 1138 } 1139 1140 1141 static int 1142 islike(MMIOT *f, char *s) 1143 { 1144 int len; 1145 int i; 1146 1147 if ( s[0] == '|' ) { 1148 if ( !isthisnonword(f, -1) ) 1149 return 0; 1150 ++s; 1151 } 1152 1153 if ( !(len = strlen(s)) ) 1154 return 0; 1155 1156 if ( s[len-1] == '|' ) { 1157 if ( !isthisnonword(f,len-1) ) 1158 return 0; 1159 len--; 1160 } 1161 1162 for (i=1; i < len; i++) 1163 if (tolower(peek(f,i)) != s[i]) 1164 return 0; 1165 return 1; 1166 } 1167 1168 1169 static struct smarties { 1170 char c0; 1171 char *pat; 1172 char *entity; 1173 int shift; 1174 } smarties[] = { 1175 { '\'', "'s|", "rsquo", 0 }, 1176 { '\'', "'t|", "rsquo", 0 }, 1177 { '\'', "'re|", "rsquo", 0 }, 1178 { '\'', "'ll|", "rsquo", 0 }, 1179 { '\'', "'ve|", "rsquo", 0 }, 1180 { '\'', "'m|", "rsquo", 0 }, 1181 { '\'', "'d|", "rsquo", 0 }, 1182 { '-', "---", "mdash", 2 }, 1183 { '-', "--", "ndash", 1 }, 1184 { '.', "...", "hellip", 2 }, 1185 { '.', ". . .", "hellip", 4 }, 1186 { '(', "(c)", "copy", 2 }, 1187 { '(', "(r)", "reg", 2 }, 1188 { '(', "(tm)", "trade", 3 }, 1189 { '3', "|3/4|", "frac34", 2 }, 1190 { '3', "|3/4ths|", "frac34", 2 }, 1191 { '1', "|1/2|", "frac12", 2 }, 1192 { '1', "|1/4|", "frac14", 2 }, 1193 { '1', "|1/4th|", "frac14", 2 }, 1194 { '&', "�", 0, 3 }, 1195 } ; 1196 #define NRSMART ( sizeof smarties / sizeof smarties[0] ) 1197 1198 1199 /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm) 1200 */ 1201 static int 1202 smartypants(int c, int *flags, MMIOT *f) 1203 { 1204 int i; 1205 1206 if ( is_flag_set(f->flags, MKD_NOPANTS) 1207 || is_flag_set(f->flags, MKD_TAGTEXT) 1208 || is_flag_set(f->flags, IS_LABEL) ) 1209 return 0; 1210 1211 for ( i=0; i < NRSMART; i++) 1212 if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) { 1213 if ( smarties[i].entity ) 1214 Qprintf(f, "&%s;", smarties[i].entity); 1215 shift(f, smarties[i].shift); 1216 return 1; 1217 } 1218 1219 switch (c) { 1220 case '<' : return 0; 1221 case '\'': if ( smartyquote(flags, 's', f) ) return 1; 1222 break; 1223 1224 case '"': if ( smartyquote(flags, 'd', f) ) return 1; 1225 break; 1226 1227 case '`': if ( peek(f, 1) == '`' ) { 1228 int j = 2; 1229 1230 while ( (c=peek(f,j)) != EOF ) { 1231 if ( c == '\\' ) 1232 j += 2; 1233 else if ( c == '`' ) 1234 break; 1235 else if ( c == '\'' && peek(f, j+1) == '\'' ) { 1236 Qstring("“", f); 1237 ___mkd_reparse(cursor(f)+1, j-2, 0, f, 0); 1238 Qstring("”", f); 1239 shift(f,j+1); 1240 return 1; 1241 } 1242 else ++j; 1243 } 1244 1245 } 1246 break; 1247 } 1248 return 0; 1249 } /* smartypants */ 1250 1251 1252 /* process latex with arbitrary 2-character ( $$ .. $$, \[ .. \], \( .. \) 1253 * delimiters 1254 */ 1255 static int 1256 mathhandler(MMIOT *f, int e1, int e2) 1257 { 1258 int i = 0; 1259 1260 while(peek(f, ++i) != EOF) { 1261 if (peek(f, i) == e1 && peek(f, i+1) == e2) { 1262 cputc(peek(f,-1), f); 1263 cputc(peek(f, 0), f); 1264 cputc(6, f); 1265 EXPAND(f->latex) = peek(f,-1); 1266 EXPAND(f->latex) = peek(f,0); 1267 EXPAND(f->latex) = 6; 1268 while ( i-- > -1 ) { 1269 char c = pull(f); 1270 EXPAND(f->latex) = c; 1271 cputc(c, f); 1272 } 1273 EXPAND(f->latex) = 31; 1274 return 1; 1275 } 1276 } 1277 return 0; 1278 } 1279 1280 /* 1281 * process latex with arbitrary custom delimiters 1282 */ 1283 static int 1284 mathhandlerExtended(MMIOT *f, char* begin, char* end) 1285 { 1286 int beginLength = strlen(begin); 1287 int endLength = strlen(end); 1288 1289 for (int i = 0; i < beginLength; i++) 1290 if (peek(f, i) != begin[i]) 1291 return 0; 1292 1293 int i = beginLength; 1294 while(peek(f, ++i) != EOF) { 1295 int matchEnd = 1; 1296 for (int j = 0; j < endLength; j++) 1297 if (peek(f, i + j) != end[j]) 1298 matchEnd = 0; 1299 1300 if (matchEnd == 1) { 1301 i += endLength; 1302 1303 cputc(6, f); 1304 EXPAND(f->latex) = '\\'; 1305 EXPAND(f->latex) = 6; 1306 1307 while ( --i > 0 ) 1308 { 1309 char c = pull(f); 1310 EXPAND(f->latex) = c; 1311 cputc(c, f); 1312 } 1313 1314 EXPAND(f->latex) = 31; 1315 return 1; 1316 } 1317 } 1318 return 0; 1319 } 1320 1321 1322 /* process a body of text encased in some sort of tick marks. If it 1323 * works, generate the output and return 1, otherwise just return 0 and 1324 * let the caller figure it out. 1325 */ 1326 static int 1327 tickhandler(MMIOT *f, int tickchar, int minticks, int allow_space, spanhandler spanner) 1328 { 1329 int endticks, size; 1330 int tick = nrticks(0, tickchar, f); 1331 1332 if ( !allow_space && isspace(peek(f,tick)) ) 1333 return 0; 1334 1335 if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) { 1336 if ( endticks < tick ) { 1337 size += (tick - endticks); 1338 tick = endticks; 1339 } 1340 1341 shift(f, tick); 1342 (*spanner)(f,size); 1343 shift(f, size+tick-1); 1344 return 1; 1345 } 1346 return 0; 1347 } 1348 1349 #define tag_text(f) is_flag_set(f->flags, MKD_TAGTEXT) 1350 1351 1352 static void 1353 text(MMIOT *f) 1354 { 1355 int c, j; 1356 int rep; 1357 int smartyflags = 0; 1358 1359 while (1) { 1360 if ( is_flag_set(f->flags, MKD_AUTOLINK) && isalpha(peek(f,1)) && !tag_text(f) ) 1361 maybe_autolink(f); 1362 1363 c = pull(f); 1364 1365 if (c == EOF) 1366 break; 1367 1368 if ( smartypants(c, &smartyflags, f) ) 1369 continue; 1370 switch (c) { 1371 case 0: break; 1372 1373 case MKD_EOLN: 1374 Qstring(tag_text(f) ? " " : "<br/>", f); 1375 break; 1376 1377 case '>': if ( tag_text(f) ) 1378 Qstring(">", f); 1379 else 1380 Qchar(c, f); 1381 break; 1382 1383 case '"': if ( tag_text(f) ) 1384 Qstring(""", f); 1385 else 1386 Qchar(c, f); 1387 break; 1388 1389 case '!': if ( peek(f,1) == '[' ) { 1390 pull(f); 1391 if ( tag_text(f) || !linkylinky(1, f) ) 1392 Qstring("![", f); 1393 } 1394 else 1395 Qchar(c, f); 1396 break; 1397 1398 case '[': if ( tag_text(f) || !linkylinky(0, f) ) 1399 Qchar(c, f); 1400 break; 1401 /* A^B -> A<sup>B</sup> */ 1402 case '^': if ( is_flag_set(f->flags, MKD_NOSUPERSCRIPT) 1403 || is_flag_set(f->flags, MKD_STRICT) 1404 || is_flag_set(f->flags, MKD_TAGTEXT) 1405 || (f->last == 0) 1406 || ((ispunct(f->last) || isspace(f->last)) 1407 && f->last != ')') 1408 || isthisspace(f,1) ) 1409 Qchar(c,f); 1410 else { 1411 char *sup = cursor(f); 1412 int len = 0; 1413 1414 if ( peek(f,1) == '(' ) { 1415 int here = mmiottell(f); 1416 pull(f); 1417 1418 if ( (len = parenthetical('(',')',f)) <= 0 ) { 1419 mmiotseek(f,here); 1420 Qchar(c, f); 1421 break; 1422 } 1423 sup++; 1424 } 1425 else { 1426 while ( isthisalnum(f,1+len) ) 1427 ++len; 1428 if ( !len ) { 1429 Qchar(c,f); 1430 break; 1431 } 1432 shift(f,len); 1433 } 1434 Qstring("<sup>",f); 1435 ___mkd_reparse(sup, len, 0, f, "()"); 1436 Qstring("</sup>", f); 1437 } 1438 break; 1439 case '_': 1440 /* Underscores don't count if they're in the middle of a word */ 1441 if ( !(is_flag_set(f->flags, MKD_NORELAXED) || is_flag_set(f->flags, MKD_STRICT)) 1442 && isthisalnum(f,-1) && isthisalnum(f,1) ) { 1443 Qchar(c, f); 1444 break; 1445 } 1446 case '*': 1447 /* Underscores & stars don't count if they're out in the middle 1448 * of whitespace */ 1449 if ( isthisspace(f,-1) && isthisspace(f,1) ) { 1450 Qchar(c, f); 1451 break; 1452 } 1453 /* else fall into the regular old emphasis case */ 1454 if ( tag_text(f) ) 1455 Qchar(c, f); 1456 else { 1457 for (rep = 1; peek(f,1) == c; pull(f) ) 1458 ++rep; 1459 Qem(f,c,rep); 1460 } 1461 break; 1462 1463 case '~': if ( is_flag_set(f->flags, MKD_NOSTRIKETHROUGH) 1464 || is_flag_set(f->flags, MKD_STRICT) 1465 || is_flag_set(f->flags, MKD_TAGTEXT) 1466 || ! tickhandler(f,c,2,0, delspan) ) 1467 Qchar(c, f); 1468 break; 1469 1470 case '`': if ( tag_text(f) || !tickhandler(f,c,1,1,codespan) ) 1471 Qchar(c, f); 1472 break; 1473 1474 case '\\': switch ( c = pull(f) ) { 1475 case '&': Qstring("&", f); 1476 break; 1477 case '<': c = peek(f,1); 1478 if ( (c == EOF) || isspace(c) ) 1479 Qstring("<", f); 1480 else { 1481 /* Markdown.pl does not escape <[nonwhite] 1482 * sequences */ 1483 Qchar('\\', f); 1484 shift(f, -1); 1485 } 1486 1487 break; 1488 case '^': if ( is_flag_set(f->flags, MKD_STRICT) 1489 || is_flag_set(f->flags, MKD_NOSUPERSCRIPT) ) { 1490 Qchar('\\', f); 1491 shift(f,-1); 1492 break; 1493 } 1494 Qchar(c, f); 1495 break; 1496 1497 case ':': case '|': 1498 if ( is_flag_set(f->flags, MKD_NOTABLES) ) { 1499 Qchar('\\', f); 1500 shift(f,-1); 1501 break; 1502 } 1503 Qchar(c, f); 1504 break; 1505 1506 case EOF: Qchar('\\', f); 1507 break; 1508 1509 case '[': 1510 case '(': 1511 Qchar(c, f); 1512 break; 1513 1514 case '$': if ( is_flag_set(f->flags, MKD_LATEX) ) { 1515 Qchar(c, f); 1516 break; 1517 } 1518 1519 default: 1520 if ( escaped(f,c) || 1521 strchr(">#.-+{}]![*_\\()`", c) ) 1522 Qchar(c, f); 1523 else { 1524 Qchar('\\', f); 1525 shift(f, -1); 1526 if ( is_flag_set(f->flags, MKD_LATEX) ) { 1527 mathhandlerExtended(f, "\\begin{equation}", "\\end{equation}") 1528 || mathhandlerExtended(f, "\\begin{equation*}", "\\end{equation*}") 1529 || mathhandlerExtended(f, "\\begin{align}", "\\end{align}") 1530 || mathhandlerExtended(f, "\\begin{align*}", "\\end{align*}") 1531 || mathhandlerExtended(f, "\\begin{bmatrix}", "\\end{bmatrix}") 1532 || mathhandlerExtended(f, "\\begin{cases}", "\\end{cases}"); 1533 } 1534 } 1535 break; 1536 } 1537 break; 1538 1539 case '<': if ( !maybe_tag_or_link(f) ) 1540 Qstring("<", f); 1541 break; 1542 1543 case '&': j = (peek(f,1) == '#' ) ? 2 : 1; 1544 while ( isthisalnum(f,j) ) 1545 ++j; 1546 1547 if ( peek(f,j) != ';' ) 1548 Qstring("&", f); 1549 else 1550 Qchar(c, f); 1551 break; 1552 1553 case '$': if ( is_flag_set(f->flags, MKD_LATEX) ) { 1554 if (peek(f, 1) == '$' ) { 1555 pull(f); 1556 if ( mathhandler(f, '$', '$') ) 1557 break; 1558 Qchar('$', f); 1559 } 1560 else { 1561 int c2; 1562 int i = 1; 1563 1564 while ( ((c2=peek(f,i)) != '$') && (c2 != EOF) ) 1565 i++; 1566 if ( c2 != EOF ) { 1567 Qchar('$', f); 1568 cputc(6, f); 1569 EXPAND(f->latex) = '$'; 1570 EXPAND(f->latex) = 6; 1571 while (i-- > 0 ) { 1572 char sym = pull(f); 1573 EXPAND(f->latex) = sym; 1574 Qchar(sym, f); 1575 } 1576 EXPAND(f->latex) = 31; 1577 break; 1578 } 1579 } 1580 } 1581 /* fall through to default */ 1582 1583 default: f->last = c; 1584 Qchar(c, f); 1585 break; 1586 } 1587 } 1588 /* truncate the input string after we've finished processing it */ 1589 S(f->in) = f->isp = 0; 1590 } /* text */ 1591 1592 1593 /* print a header block 1594 */ 1595 static void 1596 printheader(Paragraph *pp, MMIOT *f) 1597 { 1598 if ( is_flag_set(f->flags, MKD_IDANCHOR) ) { 1599 Qprintf(f, "<h%d", pp->hnumber); 1600 if ( is_flag_set(f->flags, MKD_TOC) ) { 1601 Qstring(" id=\"", f); 1602 Qanchor(pp->text, f); 1603 Qchar('"', f); 1604 } 1605 Qchar('>', f); 1606 } else { 1607 if ( is_flag_set(f->flags, MKD_TOC) ) { 1608 Qstring("<a name=\"", f); 1609 Qanchor(pp->text, f); 1610 Qstring("\"></a>\n", f); 1611 } 1612 Qprintf(f, "<h%d>", pp->hnumber); 1613 } 1614 push(T(pp->text->text), S(pp->text->text), f); 1615 text(f); 1616 Qprintf(f, "</h%d>", pp->hnumber); 1617 } 1618 1619 1620 enum e_alignments { a_NONE, a_CENTER, a_LEFT, a_RIGHT }; 1621 1622 static char* alignments[] = { "", " style=\"text-align:center;\"", 1623 " style=\"text-align:left;\"", 1624 " style=\"text-align:right;\"" }; 1625 1626 typedef STRING(int) Istring; 1627 1628 static int 1629 splat(Line *p, char *block, Istring align, int force, MMIOT *f) 1630 { 1631 int first, 1632 idx = p->dle, 1633 colno = 0; 1634 1635 1636 ___mkd_tidy(&p->text); 1637 if ( T(p->text)[S(p->text)-1] == '|' ) 1638 --S(p->text); 1639 1640 Qstring("<tr>\n", f); 1641 while ( idx < S(p->text) ) { 1642 first = idx; 1643 if ( force && (colno >= S(align)-1) ) 1644 idx = S(p->text); 1645 else 1646 while ( (idx < S(p->text)) && (T(p->text)[idx] != '|') ) { 1647 if ( T(p->text)[idx] == '\\' ) 1648 ++idx; 1649 ++idx; 1650 } 1651 1652 Qprintf(f, "<%s%s>", 1653 block, 1654 alignments[ (colno < S(align)) ? T(align)[colno] : a_NONE ]); 1655 ___mkd_reparse(T(p->text)+first, idx-first, 0, f, "|"); 1656 Qprintf(f, "</%s>\n", block); 1657 idx++; 1658 colno++; 1659 } 1660 if ( force ) 1661 while (colno < S(align) ) { 1662 Qprintf(f, "<%s></%s>\n", block, block); 1663 ++colno; 1664 } 1665 Qstring("</tr>\n", f); 1666 return colno; 1667 } 1668 1669 1670 static int 1671 printtable(Paragraph *pp, MMIOT *f) 1672 { 1673 /* header, dashes, then lines of content */ 1674 1675 Line *hdr, *dash, *body; 1676 Istring align; 1677 int hcols,start; 1678 char *p; 1679 enum e_alignments it; 1680 1681 hdr = pp->text; 1682 dash= hdr->next; 1683 body= dash->next; 1684 1685 if ( T(hdr->text)[hdr->dle] == '|' ) { 1686 /* trim leading pipe off all lines 1687 */ 1688 Line *r; 1689 for ( r = pp->text; r; r = r->next ) 1690 r->dle ++; 1691 } 1692 1693 /* figure out cell alignments */ 1694 1695 CREATE(align); 1696 1697 for (p=T(dash->text), start=dash->dle; start < S(dash->text); ) { 1698 char first, last; 1699 int end; 1700 1701 last=first=0; 1702 for (end=start ; (end < S(dash->text)) && p[end] != '|'; ++ end ) { 1703 if ( p[end] == '\\' ) 1704 ++ end; 1705 else if ( !isspace(p[end]) ) { 1706 if ( !first) first = p[end]; 1707 last = p[end]; 1708 } 1709 } 1710 it = ( first == ':' ) ? (( last == ':') ? a_CENTER : a_LEFT) 1711 : (( last == ':') ? a_RIGHT : a_NONE ); 1712 1713 EXPAND(align) = it; 1714 start = 1+end; 1715 } 1716 1717 Qstring("<table>\n", f); 1718 Qstring("<thead>\n", f); 1719 hcols = splat(hdr, "th", align, 0, f); 1720 Qstring("</thead>\n", f); 1721 1722 if ( hcols < S(align) ) 1723 S(align) = hcols; 1724 else 1725 while ( hcols > S(align) ) 1726 EXPAND(align) = a_NONE; 1727 1728 Qstring("<tbody>\n", f); 1729 for ( ; body; body = body->next) 1730 splat(body, "td", align, 1, f); 1731 Qstring("</tbody>\n", f); 1732 Qstring("</table>\n", f); 1733 1734 DELETE(align); 1735 return 1; 1736 } 1737 1738 1739 static int 1740 printblock(Paragraph *pp, MMIOT *f) 1741 { 1742 static char *Begin[] = { "", "<p>", "<p style=\"text-align:center;\">" }; 1743 static char *End[] = { "", "</p>","</p>" }; 1744 Line *t = pp->text; 1745 int align = pp->align; 1746 1747 while (t) { 1748 if ( S(t->text) ) { 1749 if ( t->next && S(t->text) > 2 1750 && T(t->text)[S(t->text)-2] == ' ' 1751 && T(t->text)[S(t->text)-1] == ' ' ) { 1752 push(T(t->text), S(t->text)-2, f); 1753 pushc(MKD_EOLN, f); 1754 pushc('\n', f); 1755 } 1756 else { 1757 ___mkd_tidy(&t->text); 1758 push(T(t->text), S(t->text), f); 1759 if ( t->next ) 1760 pushc('\n', f); 1761 } 1762 } 1763 t = t->next; 1764 } 1765 Qstring(Begin[align], f); 1766 text(f); 1767 Qstring(End[align], f); 1768 return 1; 1769 } 1770 1771 1772 static void 1773 printcode(Line *t, char *lang, MMIOT *f) 1774 { 1775 int blanks; 1776 1777 if ( f->cb->e_codefmt ) { 1778 /* external code block formatter; copy the text into a buffer, 1779 * call the formatter to style it, then dump that styled text 1780 * directly to the queue 1781 */ 1782 char *text; 1783 char *fmt; 1784 int size, copy_p; 1785 Line *p; 1786 1787 for (size=0, p = t; p; p = p->next ) 1788 size += 1+S(p->text); 1789 1790 text = malloc(1+size); 1791 1792 for ( copy_p = 0; t ; t = t->next ) { 1793 memcpy(text+copy_p, T(t->text), S(t->text)); 1794 copy_p += S(t->text); 1795 text[copy_p++] = '\n'; 1796 } 1797 text[copy_p] = 0; 1798 1799 fmt = (*(f->cb->e_codefmt))(text, copy_p, (lang && lang[0]) ? lang : 0); 1800 free(text); 1801 1802 if ( fmt ) { 1803 Qwrite(fmt, strlen(fmt), f); 1804 if ( f->cb->e_free ) 1805 (*(f->cb->e_free))(fmt, f->cb->e_data); 1806 return; 1807 } 1808 /* otherwise the external formatter failed and we need to 1809 * fall back to the traditional codeblock format 1810 */ 1811 } 1812 1813 Qstring("<pre><code", f); 1814 if (lang && lang[0]) { 1815 Qstring(" class=\"", f); 1816 Qstring(lang, f); 1817 Qstring("\"", f); 1818 } 1819 Qstring(">", f); 1820 for ( blanks = 0; t ; t = t->next ) { 1821 if ( S(t->text) > t->dle ) { 1822 while ( blanks ) { 1823 Qchar('\n', f); 1824 --blanks; 1825 } 1826 code(f, T(t->text), S(t->text)); 1827 Qchar('\n', f); 1828 } 1829 else blanks++; 1830 } 1831 Qstring("</code></pre>", f); 1832 } 1833 1834 1835 static void 1836 printhtml(Line *t, MMIOT *f) 1837 { 1838 int blanks; 1839 1840 for ( blanks=0; t ; t = t->next ) 1841 if ( S(t->text) ) { 1842 for ( ; blanks; --blanks ) 1843 Qchar('\n', f); 1844 1845 Qwrite(T(t->text), S(t->text), f); 1846 Qchar('\n', f); 1847 } 1848 else 1849 blanks++; 1850 } 1851 1852 1853 static void 1854 htmlify_paragraphs(Paragraph *p, MMIOT *f) 1855 { 1856 ___mkd_emblock(f); 1857 1858 while (( p = display(p, f) )) { 1859 ___mkd_emblock(f); 1860 Qstring("\n\n", f); 1861 } 1862 } 1863 1864 1865 #ifdef GITHUB_CHECKBOX 1866 static void 1867 li_htmlify(Paragraph *p, char *arguments, mkd_flag_t flags, MMIOT *f) 1868 { 1869 ___mkd_emblock(f); 1870 1871 Qprintf(f, "<li"); 1872 if ( arguments ) 1873 Qprintf(f, " %s", arguments); 1874 if ( flags & GITHUB_CHECK ) 1875 Qprintf(f, " class=\"github_checkbox\""); 1876 Qprintf(f, ">"); 1877 #if CHECKBOX_AS_INPUT 1878 if ( flags & GITHUB_CHECK ) { 1879 Qprintf(f, "<input disabled=\"\" type=\"checkbox\""); 1880 if ( flags & IS_CHECKED ) 1881 Qprintf(f, " checked=\"checked\""); 1882 Qprintf(f, "/>"); 1883 } 1884 #else 1885 if ( flags & GITHUB_CHECK ) 1886 Qprintf(f, flags & IS_CHECKED ? "☑" : "☐"); 1887 #endif 1888 1889 htmlify_paragraphs(p, f); 1890 1891 Qprintf(f, "</li>"); 1892 ___mkd_emblock(f); 1893 } 1894 #endif 1895 1896 1897 static void 1898 htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f) 1899 { 1900 ___mkd_emblock(f); 1901 if ( block ) 1902 Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments); 1903 1904 htmlify_paragraphs(p, f); 1905 1906 if ( block ) 1907 Qprintf(f, "</%s>", block); 1908 ___mkd_emblock(f); 1909 } 1910 1911 1912 static void 1913 definitionlist(Paragraph *p, MMIOT *f) 1914 { 1915 Line *tag; 1916 1917 if ( p ) { 1918 Qstring("<dl>\n", f); 1919 1920 for ( ; p ; p = p->next) { 1921 for ( tag = p->text; tag; tag = tag->next ) { 1922 Qstring("<dt>", f); 1923 ___mkd_reparse(T(tag->text), S(tag->text), 0, f, 0); 1924 Qstring("</dt>\n", f); 1925 } 1926 1927 htmlify(p->down, "dd", p->ident, f); 1928 Qchar('\n', f); 1929 } 1930 1931 Qstring("</dl>", f); 1932 } 1933 } 1934 1935 1936 static void 1937 listdisplay(int typ, Paragraph *p, MMIOT* f) 1938 { 1939 if ( p ) { 1940 Qprintf(f, "<%cl", (typ==UL)?'u':'o'); 1941 if ( typ == AL ) 1942 Qprintf(f, " type=\"a\""); 1943 Qprintf(f, ">\n"); 1944 1945 for ( ; p ; p = p->next ) { 1946 #ifdef GITHUB_CHECKBOX 1947 li_htmlify(p->down, p->ident, p->flags, f); 1948 #else 1949 htmlify(p->down, "li", p->ident, f); 1950 #endif 1951 Qchar('\n', f); 1952 } 1953 1954 Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o'); 1955 } 1956 } 1957 1958 1959 /* dump out a Paragraph in the desired manner 1960 */ 1961 static Paragraph* 1962 display(Paragraph *p, MMIOT *f) 1963 { 1964 if ( !p ) return 0; 1965 1966 switch ( p->typ ) { 1967 case STYLE: 1968 case WHITESPACE: 1969 break; 1970 1971 case HTML: 1972 printhtml(p->text, f); 1973 break; 1974 1975 case CODE: 1976 printcode(p->text, p->lang, f); 1977 break; 1978 1979 case QUOTE: 1980 htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f); 1981 break; 1982 1983 case UL: 1984 case OL: 1985 case AL: 1986 listdisplay(p->typ, p->down, f); 1987 break; 1988 1989 case DL: 1990 definitionlist(p->down, f); 1991 break; 1992 1993 case HR: 1994 Qstring("<hr />", f); 1995 break; 1996 1997 case HDR: 1998 printheader(p, f); 1999 break; 2000 2001 case TABLE: 2002 printtable(p, f); 2003 break; 2004 2005 case SOURCE: 2006 htmlify(p->down, 0, 0, f); 2007 break; 2008 2009 default: 2010 printblock(p, f); 2011 break; 2012 } 2013 return p->next; 2014 } 2015 2016 2017 /* dump out a list of footnotes 2018 */ 2019 static void 2020 mkd_extra_footnotes(MMIOT *m) 2021 { 2022 int j, i; 2023 Footnote *t; 2024 2025 if ( m->footnotes->reference == 0 ) 2026 return; 2027 2028 Csprintf(&m->out, "\n<div class=\"footnotes\">\n<hr/>\n<ol>\n"); 2029 2030 for ( i=1; i <= m->footnotes->reference; i++ ) { 2031 for ( j=0; j < S(m->footnotes->note); j++ ) { 2032 t = &T(m->footnotes->note)[j]; 2033 if ( (t->refnumber == i) && (t->flags & REFERENCED) ) { 2034 Csprintf(&m->out, "<li id=\"%s:%d\">\n", 2035 p_or_nothing(m), t->refnumber); 2036 htmlify(t->text, 0, 0, m); 2037 Csprintf(&m->out, "<a href=\"#%sref:%d\" rev=\"footnote\">↩</a>", 2038 p_or_nothing(m), t->refnumber); 2039 Csprintf(&m->out, "</li>\n"); 2040 } 2041 } 2042 } 2043 Csprintf(&m->out, "</ol>\n</div>\n"); 2044 } 2045 2046 2047 /* return a pointer to the compiled markdown 2048 * document. 2049 */ 2050 int 2051 mkd_document(Document *p, char **res) 2052 { 2053 int size; 2054 2055 if ( p && p->compiled ) { 2056 if ( ! p->html ) { 2057 htmlify(p->code, 0, 0, p->ctx); 2058 if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) ) 2059 mkd_extra_footnotes(p->ctx); 2060 p->html = 1; 2061 size = S(p->ctx->out); 2062 2063 if ( (size == 0) || T(p->ctx->out)[size-1] ) { 2064 /* Add a null byte at the end of the generated html, 2065 * but pretend it doesn't exist. 2066 */ 2067 EXPAND(p->ctx->out) = 0; 2068 --S(p->ctx->out); 2069 } 2070 } 2071 2072 *res = T(p->ctx->out); 2073 return S(p->ctx->out); 2074 } 2075 return EOF; 2076 } 2077 2078 /* Return list of founded latex textes (only textes, without positions) separeted by ASCII unit separator (code - 31) 2079 * Ugly, but works 2080 */ 2081 int 2082 mkd_latextext(Document *p, char **res) 2083 { 2084 int size; 2085 2086 if ( p && p->compiled ) { 2087 if ( ! p->html ) { 2088 htmlify(p->code, 0, 0, p->ctx); 2089 if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) ) 2090 mkd_extra_footnotes(p->ctx); 2091 p->html = 1; 2092 size = S(p->ctx->latex); 2093 2094 if ( (size == 0) || T(p->ctx->latex)[size-1] ) { 2095 /* Add a null byte at the end of the generated html, 2096 * but pretend it doesn't exist. 2097 */ 2098 EXPAND(p->ctx->latex) = 0; 2099 --S(p->ctx->latex); 2100 } 2101 } 2102 2103 *res = T(p->ctx->latex); 2104 return S(p->ctx->latex); 2105 } 2106 return EOF; 2107 }