File indexing completed on 2024-04-28 11:21:09
0001 /* markdown: a C implementation of John Gruber's Markdown markup language. 0002 * 0003 * Copyright (C) 2007 David L Parsons. 0004 * The redistribution terms are provided in the COPYRIGHT file that must 0005 * be distributed with this source code. 0006 */ 0007 #include "config.h" 0008 0009 #include <stdio.h> 0010 #include <string.h> 0011 #include <stdarg.h> 0012 #include <stdlib.h> 0013 #include <time.h> 0014 #include <ctype.h> 0015 0016 #include "cstring.h" 0017 #include "markdown.h" 0018 #include "amalloc.h" 0019 #include "tags.h" 0020 0021 typedef int (*stfu)(const void*,const void*); 0022 0023 typedef ANCHOR(Paragraph) ParagraphRoot; 0024 0025 static Paragraph *Pp(ParagraphRoot *, Line *, int); 0026 static Paragraph *compile(Line *, int, MMIOT *); 0027 0028 /* case insensitive string sort for Footnote tags. 0029 */ 0030 int 0031 __mkd_footsort(Footnote *a, Footnote *b) 0032 { 0033 int i; 0034 char ac, bc; 0035 0036 if ( S(a->tag) != S(b->tag) ) 0037 return S(a->tag) - S(b->tag); 0038 0039 for ( i=0; i < S(a->tag); i++) { 0040 ac = tolower(T(a->tag)[i]); 0041 bc = tolower(T(b->tag)[i]); 0042 0043 if ( isspace(ac) && isspace(bc) ) 0044 continue; 0045 if ( ac != bc ) 0046 return ac - bc; 0047 } 0048 return 0; 0049 } 0050 0051 0052 /* find the first blank character after position <i> 0053 */ 0054 static int 0055 nextblank(Line *t, int i) 0056 { 0057 while ( (i < S(t->text)) && !isspace(T(t->text)[i]) ) 0058 ++i; 0059 return i; 0060 } 0061 0062 0063 /* find the next nonblank character after position <i> 0064 */ 0065 static int 0066 nextnonblank(Line *t, int i) 0067 { 0068 while ( (i < S(t->text)) && isspace(T(t->text)[i]) ) 0069 ++i; 0070 return i; 0071 } 0072 0073 0074 /* find the first nonblank character on the Line. 0075 */ 0076 int 0077 mkd_firstnonblank(Line *p) 0078 { 0079 return nextnonblank(p,0); 0080 } 0081 0082 0083 static inline int 0084 blankline(Line *p) 0085 { 0086 return ! (p && (S(p->text) > p->dle) ); 0087 } 0088 0089 0090 static Line * 0091 skipempty(Line *p) 0092 { 0093 while ( p && (p->dle == S(p->text)) ) 0094 p = p->next; 0095 return p; 0096 } 0097 0098 0099 void 0100 ___mkd_tidy(Cstring *t) 0101 { 0102 while ( S(*t) && isspace(T(*t)[S(*t)-1]) ) 0103 --S(*t); 0104 } 0105 0106 0107 static struct kw comment = { "!--", 3, 0 }; 0108 0109 static struct kw * 0110 isopentag(Line *p) 0111 { 0112 int i=0, len; 0113 char *line; 0114 0115 if ( !p ) return 0; 0116 0117 line = T(p->text); 0118 len = S(p->text); 0119 0120 if ( len < 3 || line[0] != '<' ) 0121 return 0; 0122 0123 if ( line[1] == '!' && line[2] == '-' && line[3] == '-' ) 0124 /* comments need special case handling, because 0125 * the !-- doesn't need to end in a whitespace 0126 */ 0127 return &comment; 0128 0129 /* find how long the tag is so we can check to see if 0130 * it's a block-level tag 0131 */ 0132 for ( i=1; i < len && T(p->text)[i] != '>' 0133 && T(p->text)[i] != '/' 0134 && !isspace(T(p->text)[i]); ++i ) 0135 ; 0136 0137 0138 return mkd_search_tags(T(p->text)+1, i-1); 0139 } 0140 0141 0142 typedef struct _flo { 0143 Line *t; 0144 int i; 0145 } FLO; 0146 0147 #define floindex(x) (x.i) 0148 0149 0150 static unsigned int 0151 flogetc(FLO *f) 0152 { 0153 if ( f && f->t ) { 0154 if ( f->i < S(f->t->text) ) 0155 return (unsigned char)T(f->t->text)[f->i++]; 0156 f->t = f->t->next; 0157 f->i = 0; 0158 return flogetc(f); 0159 } 0160 return EOF; 0161 } 0162 0163 0164 static void 0165 splitline(Line *t, int cutpoint) 0166 { 0167 if ( t && (cutpoint < S(t->text)) ) { 0168 Line *tmp = calloc(1, sizeof *tmp); 0169 0170 tmp->next = t->next; 0171 t->next = tmp; 0172 0173 SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint); 0174 EXPAND(tmp->text) = 0; 0175 S(tmp->text)--; 0176 0177 S(t->text) = cutpoint; 0178 } 0179 } 0180 0181 #define UNCHECK(l) ((l)->flags &= ~CHECKED) 0182 0183 #define UNLESS_FENCED(t) if (fenced) { \ 0184 other = 1; l->count += (c == ' ' ? 0 : -1); \ 0185 } else { t; } 0186 0187 /* 0188 * walk a line, seeing if it's any of half a dozen interesting regular 0189 * types. 0190 */ 0191 static void 0192 checkline(Line *l, mkd_flag_t flags) 0193 { 0194 int eol, i; 0195 int dashes = 0, spaces = 0, 0196 equals = 0, underscores = 0, 0197 stars = 0, tildes = 0, other = 0, 0198 backticks = 0, fenced = 0; 0199 0200 l->flags |= CHECKED; 0201 l->kind = chk_text; 0202 l->count = 0; 0203 0204 if (l->dle >= 4) { l->kind=chk_code; return; } 0205 0206 for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol ) 0207 ; 0208 0209 for (i=l->dle; i<eol; i++) { 0210 register int c = T(l->text)[i]; 0211 int is_fence_char = 0; 0212 0213 if ( c != ' ' ) l->count++; 0214 0215 switch (c) { 0216 case '-': UNLESS_FENCED(dashes = 1); break; 0217 case ' ': UNLESS_FENCED(spaces = 1); break; 0218 case '=': equals = 1; break; 0219 case '_': UNLESS_FENCED(underscores = 1); break; 0220 case '*': stars = 1; break; 0221 default: 0222 if ( is_flag_set(flags, MKD_FENCEDCODE) ) { 0223 switch (c) { 0224 case '~': if (other) return; is_fence_char = 1; tildes = 1; break; 0225 case '`': if (other) return; is_fence_char = 1; backticks = 1; break; 0226 } 0227 if (is_fence_char) { 0228 fenced = 1; 0229 break; 0230 } 0231 } 0232 other = 1; 0233 l->count--; 0234 if (!fenced) return; 0235 } 0236 } 0237 0238 if ( dashes + equals + underscores + stars + tildes + backticks > 1 ) 0239 return; 0240 0241 if ( spaces ) { 0242 if ( (underscores || stars || dashes) ) 0243 l->kind = chk_hr; 0244 return; 0245 } 0246 0247 if ( stars || underscores ) { l->kind = chk_hr; } 0248 else if ( dashes ) { l->kind = chk_dash; } 0249 else if ( equals ) { l->kind = chk_equal; } 0250 else if ( tildes ) { l->kind = chk_tilde; } 0251 else if ( backticks ) { l->kind = chk_backtick; } 0252 } 0253 0254 0255 0256 /* markdown only does special handling of comments if the comment end 0257 * is at the end of a line 0258 */ 0259 static Line * 0260 commentblock(Paragraph *p, int *unclosed) 0261 { 0262 Line *t, *ret; 0263 char *end; 0264 0265 for ( t = p->text; t ; t = t->next) { 0266 if ( end = strstr(T(t->text), "-->") ) { 0267 if ( nextnonblank(t, 3 + (end - T(t->text))) < S(t->text) ) 0268 continue; 0269 /*splitline(t, 3 + (end - T(t->text)) );*/ 0270 ret = t->next; 0271 t->next = 0; 0272 return ret; 0273 } 0274 } 0275 0276 *unclosed = 1; 0277 return t; 0278 0279 } 0280 0281 0282 static Line * 0283 htmlblock(Paragraph *p, struct kw *tag, int *unclosed) 0284 { 0285 Line *ret; 0286 FLO f = { p->text, 0 }; 0287 int c; 0288 int i, closing, depth=0; 0289 0290 *unclosed = 0; 0291 0292 if ( tag == &comment ) 0293 return commentblock(p, unclosed); 0294 0295 if ( tag->selfclose ) { 0296 ret = f.t->next; 0297 f.t->next = 0; 0298 return ret; 0299 } 0300 0301 while ( (c = flogetc(&f)) != EOF ) { 0302 if ( c == '<' ) { 0303 /* tag? */ 0304 c = flogetc(&f); 0305 if ( c == '!' ) { /* comment? */ 0306 if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) { 0307 /* yes */ 0308 while ( (c = flogetc(&f)) != EOF ) { 0309 if ( c == '-' && flogetc(&f) == '-' 0310 && flogetc(&f) == '>') 0311 /* consumed whole comment */ 0312 break; 0313 } 0314 } 0315 } 0316 else { 0317 if ( closing = (c == '/') ) c = flogetc(&f); 0318 0319 for ( i=0; i < tag->size; i++, c=flogetc(&f) ) { 0320 if ( tag->id[i] != toupper(c) ) 0321 break; 0322 } 0323 0324 if ( (i == tag->size) && !isalnum(c) ) { 0325 depth = depth + (closing ? -1 : 1); 0326 if ( depth == 0 ) { 0327 while ( c != EOF && c != '>' ) { 0328 /* consume trailing gunk in close tag */ 0329 c = flogetc(&f); 0330 } 0331 if ( c == EOF ) 0332 break; 0333 if ( !f.t ) 0334 return 0; 0335 splitline(f.t, floindex(f)); 0336 ret = f.t->next; 0337 f.t->next = 0; 0338 return ret; 0339 } 0340 } 0341 } 0342 } 0343 } 0344 *unclosed = 1; 0345 return 0; 0346 } 0347 0348 0349 /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$ 0350 */ 0351 static int 0352 isfootnote(Line *t) 0353 { 0354 int i; 0355 0356 if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') ) 0357 return 0; 0358 0359 for ( ++i; i < S(t->text) ; ++i ) { 0360 if ( T(t->text)[i] == '[' ) 0361 return 0; 0362 else if ( T(t->text)[i] == ']' ) 0363 return ( T(t->text)[i+1] == ':' ) ; 0364 } 0365 return 0; 0366 } 0367 0368 0369 static inline int 0370 isquote(Line *t) 0371 { 0372 return (t->dle < 4 && T(t->text)[t->dle] == '>'); 0373 } 0374 0375 0376 static inline int 0377 iscode(Line *t) 0378 { 0379 return (t->dle >= 4); 0380 } 0381 0382 0383 static inline int 0384 ishr(Line *t, mkd_flag_t flags) 0385 { 0386 if ( ! (t->flags & CHECKED) ) 0387 checkline(t, flags); 0388 0389 if ( t->count > 2 ) 0390 return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal; 0391 return 0; 0392 } 0393 0394 0395 static int 0396 issetext(Line *t, int *htyp, mkd_flag_t flags) 0397 { 0398 Line *n; 0399 0400 /* check for setext-style HEADER 0401 * ====== 0402 */ 0403 0404 if ( (n = t->next) ) { 0405 if ( !(n->flags & CHECKED) ) 0406 checkline(n, flags); 0407 0408 if ( n->kind == chk_dash || n->kind == chk_equal ) { 0409 *htyp = SETEXT; 0410 return 1; 0411 } 0412 } 0413 return 0; 0414 } 0415 0416 0417 static int 0418 ishdr(Line *t, int *htyp, mkd_flag_t flags) 0419 { 0420 /* ANY leading `#`'s make this into an ETX header 0421 */ 0422 if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) { 0423 *htyp = ETX; 0424 return 1; 0425 } 0426 0427 /* And if not, maybe it's a SETEXT header instead 0428 */ 0429 return issetext(t, htyp, flags); 0430 } 0431 0432 0433 static inline int 0434 end_of_block(Line *t, mkd_flag_t flags) 0435 { 0436 int dummy; 0437 0438 if ( !t ) 0439 return 0; 0440 0441 return ( (S(t->text) <= t->dle) || ishr(t, flags) || ishdr(t, &dummy, flags) ); 0442 } 0443 0444 0445 static Line* 0446 is_discount_dt(Line *t, int *clip, mkd_flag_t flags) 0447 { 0448 if ( !is_flag_set(flags, MKD_NODLDISCOUNT) 0449 && t 0450 && t->next 0451 && (S(t->text) > 2) 0452 && (t->dle == 0) 0453 && (T(t->text)[0] == '=') 0454 && (T(t->text)[S(t->text)-1] == '=') ) { 0455 if ( t->next->dle >= 4 ) { 0456 *clip = 4; 0457 return t; 0458 } 0459 else 0460 return is_discount_dt(t->next, clip, flags); 0461 } 0462 return 0; 0463 } 0464 0465 0466 static int 0467 is_extra_dd(Line *t) 0468 { 0469 return (t->dle < 4) && (T(t->text)[t->dle] == ':') 0470 && isspace(T(t->text)[t->dle+1]); 0471 } 0472 0473 0474 static Line* 0475 is_extra_dt(Line *t, int *clip, mkd_flag_t flags) 0476 { 0477 if ( is_flag_set(flags, MKD_DLEXTRA) 0478 && t 0479 && t->next && S(t->text) && T(t->text)[0] != '=' 0480 && T(t->text)[S(t->text)-1] != '=') { 0481 Line *x; 0482 0483 if ( iscode(t) || end_of_block(t, flags) ) 0484 return 0; 0485 0486 if ( (x = skipempty(t->next)) && is_extra_dd(x) ) { 0487 *clip = x->dle+2; 0488 return t; 0489 } 0490 0491 if ( x=is_extra_dt(t->next, clip, flags) ) 0492 return x; 0493 } 0494 return 0; 0495 } 0496 0497 0498 static Line* 0499 isdefinition(Line *t, int *clip, int *kind, mkd_flag_t flags) 0500 { 0501 Line *ret; 0502 0503 *kind = 1; 0504 if ( ret = is_discount_dt(t,clip,flags) ) 0505 return ret; 0506 0507 *kind=2; 0508 return is_extra_dt(t,clip,flags); 0509 } 0510 0511 0512 static int 0513 islist(Line *t, int *clip, mkd_flag_t flags, int *list_type) 0514 { 0515 int i, j; 0516 char *q; 0517 0518 if ( end_of_block(t, flags) ) 0519 return 0; 0520 0521 if ( !(is_flag_set(flags, MKD_NODLIST) || is_flag_set(flags, MKD_STRICT)) 0522 && isdefinition(t,clip,list_type,flags) ) 0523 return DL; 0524 0525 if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) { 0526 i = nextnonblank(t, t->dle+1); 0527 *clip = (i > 4) ? 4 : i; 0528 *list_type = UL; 0529 return is_flag_set(flags, MKD_EXPLICITLIST) ? UL : AL; 0530 } 0531 0532 if ( (j = nextblank(t,t->dle)) > t->dle ) { 0533 if ( T(t->text)[j-1] == '.' ) { 0534 0535 if ( !(is_flag_set(flags, MKD_NOALPHALIST) || is_flag_set(flags, MKD_STRICT)) 0536 && (j == t->dle + 2) 0537 && isalpha(T(t->text)[t->dle]) ) { 0538 j = nextnonblank(t,j); 0539 *clip = (j > 4) ? 4 : j; 0540 *list_type = AL; 0541 return AL; 0542 } 0543 0544 strtoul(T(t->text)+t->dle, &q, 10); 0545 if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) { 0546 j = nextnonblank(t,j); 0547 *clip = j; 0548 *list_type = OL; 0549 return AL; 0550 } 0551 } 0552 } 0553 return 0; 0554 } 0555 0556 0557 static Line * 0558 headerblock(Paragraph *pp, int htyp) 0559 { 0560 Line *ret = 0; 0561 Line *p = pp->text; 0562 int i, j; 0563 0564 switch (htyp) { 0565 case SETEXT: 0566 /* p->text is header, p->next->text is -'s or ='s 0567 */ 0568 pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2; 0569 0570 ret = p->next->next; 0571 ___mkd_freeLine(p->next); 0572 p->next = 0; 0573 break; 0574 0575 case ETX: 0576 /* p->text is ###header###, so we need to trim off 0577 * the leading and trailing `#`'s 0578 */ 0579 0580 for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1) 0581 && (i < 6); i++) 0582 ; 0583 0584 pp->hnumber = i; 0585 0586 while ( (i < S(p->text)) && isspace(T(p->text)[i]) ) 0587 ++i; 0588 0589 CLIP(p->text, 0, i); 0590 UNCHECK(p); 0591 0592 for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j) 0593 ; 0594 0595 while ( j && isspace(T(p->text)[j-1]) ) 0596 --j; 0597 0598 S(p->text) = j; 0599 0600 ret = p->next; 0601 p->next = 0; 0602 break; 0603 } 0604 return ret; 0605 } 0606 0607 0608 static Line * 0609 codeblock(Paragraph *p) 0610 { 0611 Line *t = p->text, *r; 0612 0613 for ( ; t; t = r ) { 0614 __mkd_trim_line(t,4); 0615 0616 if ( !( (r = skipempty(t->next)) && iscode(r)) ) { 0617 ___mkd_freeLineRange(t,r); 0618 t->next = 0; 0619 return r; 0620 } 0621 } 0622 return t; 0623 } 0624 0625 0626 static int 0627 iscodefence(Line *r, int size, line_type kind, mkd_flag_t flags) 0628 { 0629 if ( !is_flag_set(flags, MKD_FENCEDCODE) ) 0630 return 0; 0631 0632 if ( !(r->flags & CHECKED) ) 0633 checkline(r, flags); 0634 0635 if ( kind ) 0636 return (r->kind == kind) && (r->count >= size); 0637 else 0638 return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size); 0639 } 0640 0641 0642 static Paragraph * 0643 fencedcodeblock(ParagraphRoot *d, Line **ptr, mkd_flag_t flags) 0644 { 0645 Line *first, *r; 0646 Paragraph *ret; 0647 0648 first = (*ptr); 0649 0650 /* don't allow zero-length code fences 0651 */ 0652 if ( (first->next == 0) || iscodefence(first->next, first->count, 0, flags) ) 0653 return 0; 0654 0655 /* find the closing fence, discard the fences, 0656 * return a Paragraph with the contents 0657 */ 0658 for ( r = first; r && r->next; r = r->next ) 0659 if ( iscodefence(r->next, first->count, first->kind, flags) ) { 0660 (*ptr) = r->next->next; 0661 ret = Pp(d, first->next, CODE); 0662 if (S(first->text) - first->count > 0) { 0663 char *lang_attr = T(first->text) + first->count; 0664 while ( *lang_attr != 0 && *lang_attr == ' ' ) lang_attr++; 0665 ret->lang = strdup(lang_attr); 0666 } 0667 else { 0668 ret->lang = 0; 0669 } 0670 ___mkd_freeLine(first); 0671 ___mkd_freeLine(r->next); 0672 r->next = 0; 0673 return ret; 0674 } 0675 return 0; 0676 } 0677 0678 0679 static int 0680 centered(Line *first, Line *last) 0681 { 0682 0683 if ( first&&last ) { 0684 int len = S(last->text); 0685 0686 if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0) 0687 && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) { 0688 CLIP(first->text, 0, 2); 0689 S(last->text) -= 2; 0690 return CENTER; 0691 } 0692 } 0693 return 0; 0694 } 0695 0696 0697 static int 0698 endoftextblock(Line *t, int toplevelblock, mkd_flag_t flags) 0699 { 0700 int z; 0701 0702 if ( end_of_block(t, flags) || isquote(t) ) 0703 return 1; 0704 0705 /* HORRIBLE STANDARDS KLUDGES: 0706 * 1. non-toplevel paragraphs absorb adjacent code blocks 0707 * 2. Toplevel paragraphs eat absorb adjacent list items, 0708 * but sublevel blocks behave properly. 0709 * (What this means is that we only need to check for code 0710 * blocks at toplevel, and only check for list items at 0711 * nested levels.) 0712 */ 0713 return toplevelblock ? 0 : islist(t,&z,flags,&z); 0714 } 0715 0716 0717 static Line * 0718 textblock(Paragraph *p, int toplevel, mkd_flag_t flags) 0719 { 0720 Line *t, *next; 0721 0722 for ( t = p->text; t ; t = next ) { 0723 if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) { 0724 p->align = centered(p->text, t); 0725 t->next = 0; 0726 return next; 0727 } 0728 } 0729 return t; 0730 } 0731 0732 0733 /* length of the id: or class: kind in a special div-not-quote block 0734 */ 0735 static int 0736 szmarkerclass(char *p) 0737 { 0738 if ( strncasecmp(p, "id:", 3) == 0 ) 0739 return 3; 0740 if ( strncasecmp(p, "class:", 6) == 0 ) 0741 return 6; 0742 return 0; 0743 } 0744 0745 0746 /* 0747 * check if the first line of a quoted block is the special div-not-quote 0748 * marker %[kind:]name% 0749 */ 0750 #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') ) 0751 0752 static int 0753 isdivmarker(Line *p, int start, mkd_flag_t flags) 0754 { 0755 char *s; 0756 int last, i; 0757 0758 if ( is_flag_set(flags, MKD_NODIVQUOTE) || is_flag_set(flags, MKD_STRICT) ) 0759 return 0; 0760 0761 start = nextnonblank(p, start); 0762 last= S(p->text) - (1 + start); 0763 s = T(p->text) + start; 0764 0765 if ( (last <= 0) || (*s != '%') || (s[last] != '%') ) 0766 return 0; 0767 0768 i = szmarkerclass(s+1); 0769 0770 if ( !iscsschar(s[i+1]) ) 0771 return 0; 0772 while ( ++i < last ) 0773 if ( !(isdigit(s[i]) || iscsschar(s[i])) ) 0774 return 0; 0775 0776 return 1; 0777 } 0778 0779 0780 /* 0781 * accumulate a blockquote. 0782 * 0783 * one sick horrible thing about blockquotes is that even though 0784 * it just takes ^> to start a quote, following lines, if quoted, 0785 * assume that the prefix is ``> ''. This means that code needs 0786 * to be indented *5* spaces from the leading '>', but *4* spaces 0787 * from the start of the line. This does not appear to be 0788 * documented in the reference implementation, but it's the 0789 * way the markdown sample web form at Daring Fireball works. 0790 */ 0791 static Line * 0792 quoteblock(Paragraph *p, mkd_flag_t flags) 0793 { 0794 Line *t, *q; 0795 int qp; 0796 0797 for ( t = p->text; t ; t = q ) { 0798 if ( isquote(t) ) { 0799 /* clip leading spaces */ 0800 for (qp = 0; T(t->text)[qp] != '>'; qp ++) 0801 /* assert: the first nonblank character on this line 0802 * will be a > 0803 */; 0804 /* clip '>' */ 0805 qp++; 0806 /* clip next space, if any */ 0807 if ( T(t->text)[qp] == ' ' ) 0808 qp++; 0809 __mkd_trim_line(t,qp); 0810 UNCHECK(t); 0811 } 0812 0813 q = skipempty(t->next); 0814 0815 if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) { 0816 ___mkd_freeLineRange(t, q); 0817 t = q; 0818 break; 0819 } 0820 } 0821 if ( isdivmarker(p->text,0,flags) ) { 0822 char *prefix = "class"; 0823 int i; 0824 0825 q = p->text; 0826 p->text = p->text->next; 0827 0828 if ( (i = szmarkerclass(1+T(q->text))) == 3 ) 0829 /* and this would be an "%id:" prefix */ 0830 prefix="id"; 0831 0832 if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) ) 0833 sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2), 0834 T(q->text)+(i+1) ); 0835 0836 ___mkd_freeLine(q); 0837 } 0838 return t; 0839 } 0840 0841 0842 typedef int (*linefn)(Line *); 0843 0844 0845 /* 0846 * pull in a list block. A list block starts with a list marker and 0847 * runs until the next list marker, the next non-indented paragraph, 0848 * or EOF. You do not have to indent nonblank lines after the list 0849 * marker, but multiple paragraphs need to start with a 4-space indent. 0850 */ 0851 static Line * 0852 listitem(Paragraph *p, int indent, mkd_flag_t flags, linefn check) 0853 { 0854 Line *t, *q; 0855 int clip = indent; 0856 int z; 0857 #ifdef GITHUB_CHECKBOX 0858 int firstpara = 1; 0859 int ischeck; 0860 #define CHECK_NOT 0 0861 #define CHECK_NO 1 0862 #define CHECK_YES 2 0863 #endif 0864 0865 for ( t = p->text; t ; t = q) { 0866 UNCHECK(t); 0867 __mkd_trim_line(t, clip); 0868 0869 #ifdef GITHUB_CHECKBOX 0870 if ( firstpara ) { 0871 ischeck = CHECK_NOT; 0872 if ( strncmp(T(t->text)+t->dle, "[ ]", 3) == 0 ) 0873 ischeck = CHECK_NO; 0874 else if ( strncasecmp(T(t->text)+t->dle, "[x]", 3) == 0 ) 0875 ischeck = CHECK_YES; 0876 0877 if ( ischeck != CHECK_NOT ) { 0878 __mkd_trim_line(t, 3); 0879 p->flags |= GITHUB_CHECK; 0880 if ( ischeck == CHECK_YES ) 0881 p->flags |= IS_CHECKED; 0882 } 0883 firstpara = 0; 0884 } 0885 #endif 0886 0887 /* even though we had to trim a long leader off this item, 0888 * the indent for trailing paragraphs is still 4... 0889 */ 0890 if (indent > 4) { 0891 indent = 4; 0892 } 0893 if ( (q = skipempty(t->next)) == 0 ) { 0894 ___mkd_freeLineRange(t,q); 0895 return 0; 0896 } 0897 0898 /* after a blank line, the next block needs to start with a line 0899 * that's indented 4(? -- reference implementation allows a 1 0900 * character indent, but that has unfortunate side effects here) 0901 * spaces, but after that the line doesn't need any indentation 0902 */ 0903 if ( q != t->next ) { 0904 if (q->dle < indent) { 0905 q = t->next; 0906 t->next = 0; 0907 return q; 0908 } 0909 /* indent at least 2, and at most as 0910 * as far as the initial line was indented. */ 0911 indent = clip ? clip : 2; 0912 } 0913 0914 if ( (q->dle < indent) && (ishr(q,flags) || islist(q,&z,flags,&z) 0915 || (check && (*check)(q))) 0916 && !issetext(q,&z,flags) ) { 0917 q = t->next; 0918 t->next = 0; 0919 return q; 0920 } 0921 0922 clip = (q->dle > indent) ? indent : q->dle; 0923 } 0924 return t; 0925 } 0926 0927 0928 static Line * 0929 definition_block(Paragraph *top, int clip, MMIOT *f, int kind) 0930 { 0931 ParagraphRoot d = { 0, 0 }; 0932 Paragraph *p; 0933 Line *q = top->text, *text = 0, *labels; 0934 int z, para; 0935 0936 while (( labels = q )) { 0937 0938 if ( (q = isdefinition(labels, &z, &kind, f->flags)) == 0 ) 0939 break; 0940 0941 if ( (text = skipempty(q->next)) == 0 ) 0942 break; 0943 0944 if ( para = (text != q->next) ) 0945 ___mkd_freeLineRange(q, text); 0946 0947 q->next = 0; 0948 if ( kind == 1 /* discount dl */ ) 0949 for ( q = labels; q; q = q->next ) { 0950 CLIP(q->text, 0, 1); 0951 UNCHECK(q); 0952 S(q->text)--; 0953 } 0954 0955 dd_block: 0956 p = Pp(&d, text, LISTITEM); 0957 0958 text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0); 0959 p->down = compile(p->text, 0, f); 0960 p->text = labels; labels = 0; 0961 0962 if ( para && p->down ) p->down->align = PARA; 0963 0964 if ( (q = skipempty(text)) == 0 ) 0965 break; 0966 0967 if ( para = (q != text) ) { 0968 Line anchor; 0969 0970 anchor.next = text; 0971 ___mkd_freeLineRange(&anchor,q); 0972 text = q; 0973 0974 } 0975 0976 if ( kind == 2 && is_extra_dd(q) ) 0977 goto dd_block; 0978 } 0979 top->text = 0; 0980 top->down = T(d); 0981 return text; 0982 } 0983 0984 0985 static Line * 0986 enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class) 0987 { 0988 ParagraphRoot d = { 0, 0 }; 0989 Paragraph *p; 0990 Line *q = top->text, *text; 0991 int para = 0, z; 0992 0993 while (( text = q )) { 0994 0995 p = Pp(&d, text, LISTITEM); 0996 text = listitem(p, clip, f->flags, 0); 0997 0998 p->down = compile(p->text, 0, f); 0999 p->text = 0; 1000 1001 if ( para && p->down ) p->down->align = PARA; 1002 1003 if ( (q = skipempty(text)) == 0 1004 || islist(q, &clip, f->flags, &z) != list_class ) 1005 break; 1006 1007 if ( para = (q != text) ) { 1008 Line anchor; 1009 1010 anchor.next = text; 1011 ___mkd_freeLineRange(&anchor, q); 1012 1013 if ( p->down ) p->down->align = PARA; 1014 } 1015 } 1016 top->text = 0; 1017 top->down = T(d); 1018 return text; 1019 } 1020 1021 1022 static int 1023 tgood(char c) 1024 { 1025 switch (c) { 1026 case '\'': 1027 case '"': return c; 1028 case '(': return ')'; 1029 } 1030 return 0; 1031 } 1032 1033 1034 /* 1035 * eat lines for a markdown extra footnote 1036 */ 1037 static Line * 1038 extrablock(Line *p) 1039 { 1040 Line *np; 1041 1042 while ( p && p->next ) { 1043 np = p->next; 1044 1045 if ( np->dle < 4 && np->dle < S(np->text) ) { 1046 p->next = 0; 1047 return np; 1048 } 1049 __mkd_trim_line(np,4); 1050 p = np; 1051 } 1052 return 0; 1053 } 1054 1055 1056 /* 1057 * add a new (image or link) footnote to the footnote table 1058 */ 1059 static Line* 1060 addfootnote(Line *p, MMIOT* f) 1061 { 1062 int j, i; 1063 int c; 1064 Line *np = p->next; 1065 1066 Footnote *foot = &EXPAND(f->footnotes->note); 1067 1068 CREATE(foot->tag); 1069 CREATE(foot->link); 1070 CREATE(foot->title); 1071 foot->text = 0; 1072 foot->flags = foot->height = foot->width = 0; 1073 1074 /* keep the footnote label */ 1075 for (j=i=p->dle+1; T(p->text)[j] != ']'; j++) 1076 EXPAND(foot->tag) = T(p->text)[j]; 1077 EXPAND(foot->tag) = 0; 1078 S(foot->tag)--; 1079 1080 /* consume the closing ]: */ 1081 j = nextnonblank(p, j+2); 1082 1083 if ( is_flag_set(f->flags, MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) { 1084 /* markdown extra footnote: All indented lines past this point; 1085 * the first line includes the footnote reference, so we need to 1086 * snip that out as we go. 1087 */ 1088 foot->flags |= EXTRA_FOOTNOTE; 1089 __mkd_trim_line(p,j); 1090 1091 np = extrablock(p); 1092 1093 foot->text = compile(p, 0, f); 1094 1095 return np; 1096 } 1097 1098 while ( (j < S(p->text)) && !isspace(T(p->text)[j]) ) 1099 EXPAND(foot->link) = T(p->text)[j++]; 1100 EXPAND(foot->link) = 0; 1101 S(foot->link)--; 1102 j = nextnonblank(p,j); 1103 1104 if ( T(p->text)[j] == '=' ) { 1105 sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height); 1106 j = nextblank(p, j); 1107 j = nextnonblank(p,j); 1108 } 1109 1110 1111 if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) { 1112 ___mkd_freeLine(p); 1113 p = np; 1114 np = p->next; 1115 j = p->dle; 1116 } 1117 1118 if ( (c = tgood(T(p->text)[j])) ) { 1119 /* Try to take the rest of the line as a comment; read to 1120 * EOL, then shrink the string back to before the final 1121 * quote. 1122 */ 1123 ++j; /* skip leading quote */ 1124 1125 while ( j < S(p->text) ) 1126 EXPAND(foot->title) = T(p->text)[j++]; 1127 1128 while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c ) 1129 --S(foot->title); 1130 if ( S(foot->title) ) /* skip trailing quote */ 1131 --S(foot->title); 1132 EXPAND(foot->title) = 0; 1133 --S(foot->title); 1134 } 1135 1136 ___mkd_freeLine(p); 1137 return np; 1138 } 1139 1140 1141 /* 1142 * allocate a paragraph header, link it to the 1143 * tail of the current document 1144 */ 1145 static Paragraph * 1146 Pp(ParagraphRoot *d, Line *ptr, int typ) 1147 { 1148 Paragraph *ret = calloc(sizeof *ret, 1); 1149 1150 ret->text = ptr; 1151 ret->typ = typ; 1152 1153 return ATTACH(*d, ret); 1154 } 1155 1156 1157 1158 static Line* 1159 consume(Line *ptr, int *eaten) 1160 { 1161 Line *next; 1162 int blanks=0; 1163 1164 for (; ptr && blankline(ptr); ptr = next, blanks++ ) { 1165 next = ptr->next; 1166 ___mkd_freeLine(ptr); 1167 } 1168 if ( ptr ) *eaten = blanks; 1169 return ptr; 1170 } 1171 1172 1173 typedef ANCHOR(Line) Cache; 1174 1175 static void 1176 uncache(Cache *cache, ParagraphRoot *d, MMIOT *f) 1177 { 1178 Paragraph *p; 1179 1180 if ( T(*cache) ) { 1181 E(*cache)->next = 0; 1182 p = Pp(d, 0, SOURCE); 1183 p->down = compile(T(*cache), 1, f); 1184 T(*cache) = E(*cache) = 0; 1185 } 1186 } 1187 1188 1189 /* 1190 * top-level compilation; break the document into 1191 * style, html, and source blocks with footnote links 1192 * weeded out. 1193 */ 1194 static Paragraph * 1195 compile_document(Line *ptr, MMIOT *f) 1196 { 1197 ParagraphRoot d = { 0, 0 }; 1198 Cache source = { 0, 0 }; 1199 Paragraph *p = 0; 1200 struct kw *tag; 1201 int eaten, unclosed; 1202 int previous_was_break = 1; 1203 1204 while ( ptr ) { 1205 if ( !is_flag_set(f->flags, MKD_NOHTML) && (tag = isopentag(ptr)) ) { 1206 int blocktype; 1207 /* If we encounter a html/style block, compile and save all 1208 * of the cached source BEFORE processing the html/style. 1209 */ 1210 uncache(&source, &d, f); 1211 1212 if (is_flag_set(f->flags, MKD_NOSTYLE) ) 1213 blocktype = HTML; 1214 else 1215 blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML; 1216 p = Pp(&d, ptr, blocktype); 1217 ptr = htmlblock(p, tag, &unclosed); 1218 if ( unclosed ) { 1219 p->typ = SOURCE; 1220 p->down = compile(p->text, 1, f); 1221 p->text = 0; 1222 } 1223 previous_was_break = 1; 1224 } 1225 else if ( isfootnote(ptr) ) { 1226 /* footnotes, like cats, sleep anywhere; pull them 1227 * out of the input stream and file them away for 1228 * later processing 1229 */ 1230 ptr = consume(addfootnote(ptr, f), &eaten); 1231 previous_was_break = 1; 1232 } 1233 else if ( previous_was_break && iscodefence(ptr,3,0,f->flags)) { 1234 uncache(&source, &d, f); 1235 if ( !fencedcodeblock(&d, &ptr, f->flags) ) /* just source */ 1236 goto attach; 1237 } 1238 else { 1239 attach: 1240 /* source; cache it up to wait for eof or the 1241 * next html/style block 1242 */ 1243 ATTACH(source,ptr); 1244 previous_was_break = blankline(ptr); 1245 ptr = ptr->next; 1246 } 1247 } 1248 /* if there's any cached source at EOF, compile 1249 * it now. 1250 */ 1251 uncache(&source, &d, f); 1252 return T(d); 1253 } 1254 1255 1256 static int 1257 first_nonblank_before(Line *j, int dle) 1258 { 1259 return (j->dle < dle) ? j->dle : dle; 1260 } 1261 1262 1263 static int 1264 actually_a_table(MMIOT *f, Line *pp) 1265 { 1266 Line *r; 1267 int j; 1268 int c; 1269 1270 /* tables need to be turned on */ 1271 if ( is_flag_set(f->flags, MKD_STRICT) || is_flag_set(f->flags, MKD_NOTABLES) ) 1272 return 0; 1273 1274 /* tables need three lines */ 1275 if ( !(pp && pp->next && pp->next->next) ) { 1276 return 0; 1277 } 1278 1279 /* all lines must contain |'s */ 1280 for (r = pp; r; r = r->next ) 1281 if ( !(r->flags & PIPECHAR) ) { 1282 return 0; 1283 } 1284 1285 /* if the header has a leading |, all lines must have leading |'s */ 1286 if ( T(pp->text)[pp->dle] == '|' ) { 1287 for ( r = pp; r; r = r->next ) 1288 if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) { 1289 return 0; 1290 } 1291 } 1292 1293 /* second line must be only whitespace, -, |, or : */ 1294 r = pp->next; 1295 1296 for ( j=r->dle; j < S(r->text); ++j ) { 1297 c = T(r->text)[j]; 1298 1299 if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) { 1300 return 0; 1301 } 1302 } 1303 1304 return 1; 1305 } 1306 1307 1308 /* 1309 * break a collection of markdown input into 1310 * blocks of lists, code, html, and text to 1311 * be marked up. 1312 */ 1313 static Paragraph * 1314 compile(Line *ptr, int toplevel, MMIOT *f) 1315 { 1316 ParagraphRoot d = { 0, 0 }; 1317 Paragraph *p = 0; 1318 Line *r; 1319 int para = toplevel; 1320 int blocks = 0; 1321 int hdr_type, list_type, list_class, indent; 1322 1323 ptr = consume(ptr, ¶); 1324 1325 while ( ptr ) { 1326 1327 if ( iscode(ptr) ) { 1328 p = Pp(&d, ptr, CODE); 1329 1330 if ( is_flag_set(f->flags, MKD_1_COMPAT) ) { 1331 /* HORRIBLE STANDARDS KLUDGE: the first line of every block 1332 * has trailing whitespace trimmed off. 1333 */ 1334 ___mkd_tidy(&p->text->text); 1335 } 1336 1337 ptr = codeblock(p); 1338 } 1339 else if ( iscodefence(ptr,3,0,f->flags) && (p=fencedcodeblock(&d, &ptr, f->flags)) ) 1340 /* yay, it's already done */ ; 1341 else if ( ishr(ptr, f->flags) ) { 1342 p = Pp(&d, 0, HR); 1343 r = ptr; 1344 ptr = ptr->next; 1345 ___mkd_freeLine(r); 1346 } 1347 else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) { 1348 if ( list_class == DL ) { 1349 p = Pp(&d, ptr, DL); 1350 ptr = definition_block(p, indent, f, list_type); 1351 } 1352 else { 1353 p = Pp(&d, ptr, list_type); 1354 ptr = enumerated_block(p, indent, f, list_class); 1355 } 1356 } 1357 else if ( isquote(ptr) ) { 1358 p = Pp(&d, ptr, QUOTE); 1359 ptr = quoteblock(p, f->flags); 1360 p->down = compile(p->text, 1, f); 1361 p->text = 0; 1362 } 1363 else if ( ishdr(ptr, &hdr_type, f->flags) ) { 1364 p = Pp(&d, ptr, HDR); 1365 ptr = headerblock(p, hdr_type); 1366 } 1367 else { 1368 /* either markup or an html block element 1369 */ 1370 struct kw *tag; 1371 int unclosed = 1; 1372 1373 p = Pp(&d, ptr, MARKUP); /* default to regular markup, 1374 * then check if it's an html 1375 * block. If it IS an html 1376 * block, htmlblock() will 1377 * populate this paragraph & 1378 * all we need to do is reset 1379 * the paragraph type to HTML, 1380 * otherwise the paragraph 1381 * remains empty and ready for 1382 * processing with textblock() 1383 */ 1384 1385 if ( !is_flag_set(f->flags, MKD_NOHTML) && (tag = isopentag(ptr)) ) { 1386 /* possibly an html block 1387 */ 1388 1389 ptr = htmlblock(p, tag, &unclosed); 1390 if ( ! unclosed ) { 1391 p->typ = HTML; 1392 } 1393 } 1394 if ( unclosed ) { 1395 ptr = textblock(p, toplevel, f->flags); 1396 /* tables are a special kind of paragraph */ 1397 if ( actually_a_table(f, p->text) ) 1398 p->typ = TABLE; 1399 } 1400 } 1401 if ( (para||toplevel) && !p->align ) 1402 p->align = PARA; 1403 1404 blocks++; 1405 para = toplevel || (blocks > 1); 1406 ptr = consume(ptr, ¶); 1407 1408 if ( para && !p->align ) 1409 p->align = PARA; 1410 1411 } 1412 return T(d); 1413 } 1414 1415 1416 /* 1417 * the guts of the markdown() function, ripped out so I can do 1418 * debugging. 1419 */ 1420 1421 /* 1422 * prepare and compile `text`, returning a Paragraph tree. 1423 */ 1424 int 1425 mkd_compile(Document *doc, mkd_flag_t flags) 1426 { 1427 if ( !doc ) 1428 return 0; 1429 1430 flags &= USER_FLAGS; 1431 1432 if ( doc->compiled ) { 1433 if ( doc->ctx->flags == flags && !doc->dirty) 1434 return 1; 1435 else { 1436 doc->compiled = doc->dirty = 0; 1437 if ( doc->code) 1438 ___mkd_freeParagraph(doc->code); 1439 if ( doc->ctx->footnotes ) 1440 ___mkd_freefootnotes(doc->ctx); 1441 } 1442 } 1443 1444 doc->compiled = 1; 1445 memset(doc->ctx, 0, sizeof(MMIOT) ); 1446 doc->ctx->ref_prefix= doc->ref_prefix; 1447 doc->ctx->cb = &(doc->cb); 1448 doc->ctx->flags = flags; 1449 CREATE(doc->ctx->in); 1450 doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]); 1451 doc->ctx->footnotes->reference = 0; 1452 CREATE(doc->ctx->footnotes->note); 1453 1454 mkd_initialize(); 1455 1456 doc->code = compile_document(T(doc->content), doc->ctx); 1457 qsort(T(doc->ctx->footnotes->note), S(doc->ctx->footnotes->note), 1458 sizeof T(doc->ctx->footnotes->note)[0], 1459 (stfu)__mkd_footsort); 1460 memset(&doc->content, 0, sizeof doc->content); 1461 return 1; 1462 } 1463