File indexing completed on 2024-04-28 11:21:09

0001 /* markdown: a C implementation of John Gruber's Markdown markup language.
0002  *
0003  * Copyright (C) 2007 David L Parsons.
0004  * The redistribution terms are provided in the COPYRIGHT file that must
0005  * be distributed with this source code.
0006  */
0007 #include "config.h"
0008 
0009 #include <stdio.h>
0010 #include <string.h>
0011 #include <stdarg.h>
0012 #include <stdlib.h>
0013 #include <time.h>
0014 #include <ctype.h>
0015 
0016 #include "cstring.h"
0017 #include "markdown.h"
0018 #include "amalloc.h"
0019 #include "tags.h"
0020 
0021 typedef int (*stfu)(const void*,const void*);
0022 
0023 typedef ANCHOR(Paragraph) ParagraphRoot;
0024 
0025 static Paragraph *Pp(ParagraphRoot *, Line *, int);
0026 static Paragraph *compile(Line *, int, MMIOT *);
0027 
0028 /* case insensitive string sort for Footnote tags.
0029  */
0030 int
0031 __mkd_footsort(Footnote *a, Footnote *b)
0032 {
0033     int i;
0034     char ac, bc;
0035 
0036     if ( S(a->tag) != S(b->tag) )
0037     return S(a->tag) - S(b->tag);
0038 
0039     for ( i=0; i < S(a->tag); i++) {
0040     ac = tolower(T(a->tag)[i]);
0041     bc = tolower(T(b->tag)[i]);
0042 
0043     if ( isspace(ac) && isspace(bc) )
0044         continue;
0045     if ( ac != bc )
0046         return ac - bc;
0047     }
0048     return 0;
0049 }
0050 
0051 
0052 /* find the first blank character after position <i>
0053  */
0054 static int
0055 nextblank(Line *t, int i)
0056 {
0057     while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
0058     ++i;
0059     return i;
0060 }
0061 
0062 
0063 /* find the next nonblank character after position <i>
0064  */
0065 static int
0066 nextnonblank(Line *t, int i)
0067 {
0068     while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
0069     ++i;
0070     return i;
0071 }
0072 
0073 
0074 /* find the first nonblank character on the Line.
0075  */
0076 int
0077 mkd_firstnonblank(Line *p)
0078 {
0079     return nextnonblank(p,0);
0080 }
0081 
0082 
0083 static inline int
0084 blankline(Line *p)
0085 {
0086     return ! (p && (S(p->text) > p->dle) );
0087 }
0088 
0089 
0090 static Line *
0091 skipempty(Line *p)
0092 {
0093     while ( p && (p->dle == S(p->text)) )
0094     p = p->next;
0095     return p;
0096 }
0097 
0098 
0099 void
0100 ___mkd_tidy(Cstring *t)
0101 {
0102     while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
0103     --S(*t);
0104 }
0105 
0106 
0107 static struct kw comment = { "!--", 3, 0 };
0108 
0109 static struct kw *
0110 isopentag(Line *p)
0111 {
0112     int i=0, len;
0113     char *line;
0114 
0115     if ( !p ) return 0;
0116 
0117     line = T(p->text);
0118     len = S(p->text);
0119 
0120     if ( len < 3 || line[0] != '<' )
0121     return 0;
0122 
0123     if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
0124     /* comments need special case handling, because
0125      * the !-- doesn't need to end in a whitespace
0126      */
0127     return &comment;
0128     
0129     /* find how long the tag is so we can check to see if
0130      * it's a block-level tag
0131      */
0132     for ( i=1; i < len && T(p->text)[i] != '>' 
0133                && T(p->text)[i] != '/'
0134                && !isspace(T(p->text)[i]); ++i )
0135     ;
0136 
0137 
0138     return mkd_search_tags(T(p->text)+1, i-1);
0139 }
0140 
0141 
0142 typedef struct _flo {
0143     Line *t;
0144     int i;
0145 } FLO;
0146 
0147 #define floindex(x) (x.i)
0148 
0149 
0150 static unsigned int
0151 flogetc(FLO *f)
0152 {
0153     if ( f && f->t ) {
0154     if ( f->i < S(f->t->text) )
0155         return (unsigned char)T(f->t->text)[f->i++];
0156     f->t = f->t->next;
0157     f->i = 0;
0158     return flogetc(f);
0159     }
0160     return EOF;
0161 }
0162 
0163 
0164 static void
0165 splitline(Line *t, int cutpoint)
0166 {
0167     if ( t && (cutpoint < S(t->text)) ) {
0168     Line *tmp = calloc(1, sizeof *tmp);
0169 
0170     tmp->next = t->next;
0171     t->next = tmp;
0172 
0173     SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
0174     EXPAND(tmp->text) = 0;
0175     S(tmp->text)--;
0176     
0177     S(t->text) = cutpoint;
0178     }
0179 }
0180 
0181 #define UNCHECK(l) ((l)->flags &= ~CHECKED)
0182 
0183 #define UNLESS_FENCED(t) if (fenced) { \
0184     other = 1; l->count += (c == ' ' ? 0 : -1); \
0185   } else { t; }
0186 
0187 /*
0188  * walk a line, seeing if it's any of half a dozen interesting regular
0189  * types.
0190  */
0191 static void
0192 checkline(Line *l, mkd_flag_t flags)
0193 {
0194     int eol, i;
0195     int dashes = 0, spaces = 0,
0196     equals = 0, underscores = 0,
0197     stars = 0, tildes = 0, other = 0,
0198     backticks = 0, fenced = 0;
0199 
0200     l->flags |= CHECKED;
0201     l->kind = chk_text;
0202     l->count = 0;
0203     
0204     if (l->dle >= 4) { l->kind=chk_code; return; }
0205 
0206     for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
0207     ;
0208 
0209     for (i=l->dle; i<eol; i++) {
0210     register int c = T(l->text)[i];
0211     int is_fence_char = 0;
0212 
0213     if ( c != ' ' ) l->count++;
0214 
0215     switch (c) {
0216     case '-':  UNLESS_FENCED(dashes = 1); break;
0217     case ' ':  UNLESS_FENCED(spaces = 1); break;
0218     case '=':  equals = 1; break;
0219     case '_':  UNLESS_FENCED(underscores = 1); break;
0220     case '*':  stars = 1; break;
0221     default:
0222         if ( is_flag_set(flags, MKD_FENCEDCODE) ) {
0223         switch (c) {
0224         case '~':  if (other) return; is_fence_char = 1; tildes = 1; break;
0225         case '`':  if (other) return; is_fence_char = 1; backticks = 1; break;
0226         }
0227         if (is_fence_char) {
0228             fenced = 1;
0229             break;
0230         }
0231         }
0232         other = 1;
0233         l->count--;
0234         if (!fenced) return;
0235     }
0236     }
0237 
0238     if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
0239     return;
0240 
0241     if ( spaces ) {
0242     if ( (underscores || stars || dashes) )
0243         l->kind = chk_hr;
0244     return;
0245     }
0246 
0247     if ( stars || underscores ) { l->kind = chk_hr; }
0248     else if ( dashes ) { l->kind = chk_dash; }
0249     else if ( equals ) { l->kind = chk_equal; }
0250     else if ( tildes ) { l->kind = chk_tilde; }
0251     else if ( backticks ) { l->kind = chk_backtick; }
0252 }
0253 
0254 
0255 
0256 /* markdown only does special handling of comments if the comment end
0257  * is at the end of a line
0258  */
0259 static Line *
0260 commentblock(Paragraph *p, int *unclosed)
0261 {
0262     Line *t, *ret;
0263     char *end;
0264 
0265        for ( t = p->text; t ; t = t->next) {
0266        if ( end = strstr(T(t->text), "-->") ) {
0267            if ( nextnonblank(t, 3 + (end - T(t->text))) < S(t->text) )
0268            continue;
0269            /*splitline(t, 3 + (end - T(t->text)) );*/
0270            ret = t->next;
0271            t->next = 0;
0272            return ret;
0273        }
0274     }
0275 
0276     *unclosed = 1;
0277     return t;
0278 
0279 }
0280 
0281 
0282 static Line *
0283 htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
0284 {
0285     Line *ret;
0286     FLO f = { p->text, 0 };
0287     int c;
0288     int i, closing, depth=0;
0289 
0290     *unclosed = 0;
0291     
0292     if ( tag == &comment )
0293     return commentblock(p, unclosed);
0294     
0295     if ( tag->selfclose ) {
0296     ret = f.t->next;
0297     f.t->next = 0;
0298     return ret;
0299     }
0300 
0301     while ( (c = flogetc(&f)) != EOF ) {
0302     if ( c == '<' ) {
0303         /* tag? */
0304         c = flogetc(&f);
0305         if ( c == '!' ) { /* comment? */
0306         if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
0307             /* yes */
0308             while ( (c = flogetc(&f)) != EOF ) {
0309             if ( c == '-' && flogetc(&f) == '-'
0310                       && flogetc(&f) == '>')
0311                   /* consumed whole comment */
0312                   break;
0313             }
0314         }
0315         }
0316         else { 
0317         if ( closing = (c == '/') ) c = flogetc(&f);
0318 
0319         for ( i=0; i < tag->size; i++, c=flogetc(&f) ) {
0320             if ( tag->id[i] != toupper(c) )
0321             break;
0322         }
0323 
0324         if ( (i == tag->size) && !isalnum(c) ) {
0325             depth = depth + (closing ? -1 : 1);
0326             if ( depth == 0 ) {
0327             while ( c != EOF && c != '>' ) {
0328                 /* consume trailing gunk in close tag */
0329                 c = flogetc(&f);
0330             }
0331             if ( c == EOF )
0332                 break;
0333             if ( !f.t )
0334                 return 0;
0335             splitline(f.t, floindex(f));
0336             ret = f.t->next;
0337             f.t->next = 0;
0338             return ret;
0339             }
0340         }
0341         }
0342     }
0343     }
0344     *unclosed = 1;
0345     return 0;
0346 }
0347 
0348 
0349 /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
0350  */
0351 static int
0352 isfootnote(Line *t)
0353 {
0354     int i;
0355 
0356     if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
0357     return 0;
0358 
0359     for ( ++i; i < S(t->text) ; ++i ) {
0360     if ( T(t->text)[i] == '[' )
0361         return 0;
0362     else if ( T(t->text)[i] == ']' )
0363         return ( T(t->text)[i+1] == ':' ) ;
0364     }
0365     return 0;
0366 }
0367 
0368 
0369 static inline int
0370 isquote(Line *t)
0371 {
0372     return (t->dle < 4 && T(t->text)[t->dle] == '>');
0373 }
0374 
0375 
0376 static inline int
0377 iscode(Line *t)
0378 {
0379     return (t->dle >= 4);
0380 }
0381 
0382 
0383 static inline int
0384 ishr(Line *t, mkd_flag_t flags)
0385 {
0386     if ( ! (t->flags & CHECKED) )
0387     checkline(t, flags);
0388 
0389     if ( t->count > 2 )
0390     return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
0391     return 0;
0392 }
0393 
0394 
0395 static int
0396 issetext(Line *t, int *htyp, mkd_flag_t flags)
0397 {
0398     Line *n;
0399     
0400     /* check for setext-style HEADER
0401      *                        ======
0402      */
0403 
0404     if ( (n = t->next) ) {
0405     if ( !(n->flags & CHECKED) )
0406         checkline(n, flags);
0407 
0408     if ( n->kind == chk_dash || n->kind == chk_equal ) {
0409         *htyp = SETEXT;
0410         return 1;
0411     }
0412     }
0413     return 0;
0414 }
0415 
0416 
0417 static int
0418 ishdr(Line *t, int *htyp, mkd_flag_t flags)
0419 {
0420     /* ANY leading `#`'s make this into an ETX header
0421      */
0422     if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
0423     *htyp = ETX;
0424     return 1;
0425     }
0426 
0427     /* And if not, maybe it's a SETEXT header instead
0428      */
0429     return issetext(t, htyp, flags);
0430 }
0431 
0432 
0433 static inline int
0434 end_of_block(Line *t, mkd_flag_t flags)
0435 {
0436     int dummy;
0437     
0438     if ( !t )
0439     return 0;
0440     
0441     return ( (S(t->text) <= t->dle) || ishr(t, flags) || ishdr(t, &dummy, flags) );
0442 }
0443 
0444 
0445 static Line*
0446 is_discount_dt(Line *t, int *clip, mkd_flag_t flags)
0447 {
0448     if ( !is_flag_set(flags, MKD_NODLDISCOUNT)
0449        && t
0450        && t->next
0451        && (S(t->text) > 2)
0452        && (t->dle == 0)
0453        && (T(t->text)[0] == '=')
0454        && (T(t->text)[S(t->text)-1] == '=') ) {
0455     if ( t->next->dle >= 4 ) {
0456         *clip = 4;
0457         return t;
0458     }
0459     else
0460         return is_discount_dt(t->next, clip, flags);
0461     }
0462     return 0;
0463 }
0464 
0465 
0466 static int
0467 is_extra_dd(Line *t)
0468 {
0469     return (t->dle < 4) && (T(t->text)[t->dle] == ':')
0470             && isspace(T(t->text)[t->dle+1]);
0471 }
0472 
0473 
0474 static Line*
0475 is_extra_dt(Line *t, int *clip, mkd_flag_t flags)
0476 {
0477     if ( is_flag_set(flags, MKD_DLEXTRA)
0478        && t
0479        && t->next && S(t->text) && T(t->text)[0] != '='
0480               && T(t->text)[S(t->text)-1] != '=') {
0481     Line *x;
0482     
0483     if ( iscode(t) || end_of_block(t, flags) )
0484         return 0;
0485 
0486     if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
0487         *clip = x->dle+2;
0488         return t;
0489     }
0490     
0491     if ( x=is_extra_dt(t->next, clip, flags) )
0492         return x;
0493     }
0494     return 0;
0495 }
0496 
0497 
0498 static Line*
0499 isdefinition(Line *t, int *clip, int *kind, mkd_flag_t flags)
0500 {
0501     Line *ret;
0502 
0503     *kind = 1;
0504     if ( ret = is_discount_dt(t,clip,flags) )
0505     return ret;
0506 
0507     *kind=2;
0508     return is_extra_dt(t,clip,flags);
0509 }
0510 
0511 
0512 static int
0513 islist(Line *t, int *clip, mkd_flag_t flags, int *list_type)
0514 {
0515     int i, j;
0516     char *q;
0517     
0518     if ( end_of_block(t, flags) )
0519     return 0;
0520 
0521     if ( !(is_flag_set(flags, MKD_NODLIST) || is_flag_set(flags, MKD_STRICT))
0522                       && isdefinition(t,clip,list_type,flags) )
0523     return DL;
0524 
0525     if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
0526     i = nextnonblank(t, t->dle+1);
0527     *clip = (i > 4) ? 4 : i;
0528     *list_type = UL;
0529     return is_flag_set(flags, MKD_EXPLICITLIST) ? UL : AL;
0530     }
0531 
0532     if ( (j = nextblank(t,t->dle)) > t->dle ) {
0533     if ( T(t->text)[j-1] == '.' ) {
0534 
0535         if ( !(is_flag_set(flags, MKD_NOALPHALIST) || is_flag_set(flags, MKD_STRICT))
0536               && (j == t->dle + 2)
0537               && isalpha(T(t->text)[t->dle]) ) {
0538         j = nextnonblank(t,j);
0539         *clip = (j > 4) ? 4 : j;
0540         *list_type = AL;
0541         return AL;
0542         }
0543 
0544         strtoul(T(t->text)+t->dle, &q, 10);
0545         if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
0546         j = nextnonblank(t,j);
0547         *clip = j;
0548         *list_type = OL;
0549         return AL;
0550         }
0551     }
0552     }
0553     return 0;
0554 }
0555 
0556 
0557 static Line *
0558 headerblock(Paragraph *pp, int htyp)
0559 {
0560     Line *ret = 0;
0561     Line *p = pp->text;
0562     int i, j;
0563 
0564     switch (htyp) {
0565     case SETEXT:
0566         /* p->text is header, p->next->text is -'s or ='s
0567          */
0568         pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
0569         
0570         ret = p->next->next;
0571         ___mkd_freeLine(p->next);
0572         p->next = 0;
0573         break;
0574 
0575     case ETX:
0576         /* p->text is ###header###, so we need to trim off
0577          * the leading and trailing `#`'s
0578          */
0579 
0580         for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
0581                                && (i < 6); i++)
0582         ;
0583 
0584         pp->hnumber = i;
0585 
0586         while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
0587         ++i;
0588 
0589         CLIP(p->text, 0, i);
0590         UNCHECK(p);
0591 
0592         for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
0593         ;
0594 
0595         while ( j && isspace(T(p->text)[j-1]) )
0596         --j;
0597 
0598         S(p->text) = j;
0599 
0600         ret = p->next;
0601         p->next = 0;
0602         break;
0603     }
0604     return ret;
0605 }
0606 
0607 
0608 static Line *
0609 codeblock(Paragraph *p)
0610 {
0611     Line *t = p->text, *r;
0612 
0613     for ( ; t; t = r ) {
0614     __mkd_trim_line(t,4);
0615 
0616     if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
0617         ___mkd_freeLineRange(t,r);
0618         t->next = 0;
0619         return r;
0620     }
0621     }
0622     return t;
0623 }
0624 
0625 
0626 static int
0627 iscodefence(Line *r, int size, line_type kind, mkd_flag_t flags)
0628 {
0629     if ( !is_flag_set(flags, MKD_FENCEDCODE) )
0630     return 0;
0631 
0632     if ( !(r->flags & CHECKED) )
0633     checkline(r, flags);
0634 
0635     if ( kind )
0636     return (r->kind == kind) && (r->count >= size);
0637     else
0638     return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
0639 }
0640 
0641 
0642 static Paragraph *
0643 fencedcodeblock(ParagraphRoot *d, Line **ptr, mkd_flag_t flags)
0644 {
0645     Line *first, *r;
0646     Paragraph *ret;
0647 
0648     first = (*ptr);
0649 
0650     /* don't allow zero-length code fences
0651     */
0652     if ( (first->next == 0) || iscodefence(first->next, first->count, 0, flags) )
0653     return 0;
0654 
0655     /* find the closing fence, discard the fences,
0656     * return a Paragraph with the contents
0657     */
0658     for ( r = first; r && r->next; r = r->next )
0659     if ( iscodefence(r->next, first->count, first->kind, flags) ) {
0660         (*ptr) = r->next->next;
0661         ret = Pp(d, first->next, CODE);
0662         if (S(first->text) - first->count > 0) {
0663         char *lang_attr = T(first->text) + first->count;
0664         while ( *lang_attr != 0 && *lang_attr == ' ' ) lang_attr++;
0665         ret->lang = strdup(lang_attr);
0666         }
0667         else {
0668         ret->lang = 0;
0669     }
0670     ___mkd_freeLine(first);
0671     ___mkd_freeLine(r->next);
0672     r->next = 0;
0673     return ret;
0674     }
0675     return 0;
0676 }
0677 
0678 
0679 static int
0680 centered(Line *first, Line *last)
0681 {
0682 
0683     if ( first&&last ) {
0684     int len = S(last->text);
0685 
0686     if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
0687                && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
0688         CLIP(first->text, 0, 2);
0689         S(last->text) -= 2;
0690         return CENTER;
0691     }
0692     }
0693     return 0;
0694 }
0695 
0696 
0697 static int
0698 endoftextblock(Line *t, int toplevelblock, mkd_flag_t flags)
0699 {
0700     int z;
0701 
0702     if ( end_of_block(t, flags) || isquote(t) )
0703     return 1;
0704 
0705     /* HORRIBLE STANDARDS KLUDGES:
0706      * 1. non-toplevel paragraphs absorb adjacent code blocks
0707      * 2. Toplevel paragraphs eat absorb adjacent list items,
0708      *    but sublevel blocks behave properly.
0709      * (What this means is that we only need to check for code
0710      *  blocks at toplevel, and only check for list items at
0711      *  nested levels.)
0712      */
0713     return toplevelblock ? 0 : islist(t,&z,flags,&z);
0714 }
0715 
0716 
0717 static Line *
0718 textblock(Paragraph *p, int toplevel, mkd_flag_t flags)
0719 {
0720     Line *t, *next;
0721 
0722     for ( t = p->text; t ; t = next ) {
0723     if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
0724         p->align = centered(p->text, t);
0725         t->next = 0;
0726         return next;
0727     }
0728     }
0729     return t;
0730 }
0731 
0732 
0733 /* length of the id: or class: kind in a special div-not-quote block
0734  */
0735 static int
0736 szmarkerclass(char *p)
0737 {
0738     if ( strncasecmp(p, "id:", 3) == 0 )
0739     return 3;
0740     if ( strncasecmp(p, "class:", 6) == 0 )
0741     return 6;
0742     return 0;
0743 }
0744 
0745 
0746 /*
0747  * check if the first line of a quoted block is the special div-not-quote
0748  * marker %[kind:]name%
0749  */
0750 #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
0751 
0752 static int
0753 isdivmarker(Line *p, int start, mkd_flag_t flags)
0754 {
0755     char *s;
0756     int last, i;
0757 
0758     if ( is_flag_set(flags, MKD_NODIVQUOTE) || is_flag_set(flags, MKD_STRICT) )
0759     return 0;
0760 
0761     start = nextnonblank(p, start);
0762     last= S(p->text) - (1 + start);
0763     s   = T(p->text) + start;
0764 
0765     if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
0766     return 0;
0767 
0768     i = szmarkerclass(s+1);
0769 
0770     if ( !iscsschar(s[i+1]) )
0771     return 0;
0772     while ( ++i < last )
0773     if ( !(isdigit(s[i]) || iscsschar(s[i])) )
0774         return 0;
0775 
0776     return 1;
0777 }
0778 
0779 
0780 /*
0781  * accumulate a blockquote.
0782  *
0783  * one sick horrible thing about blockquotes is that even though
0784  * it just takes ^> to start a quote, following lines, if quoted,
0785  * assume that the prefix is ``> ''.   This means that code needs
0786  * to be indented *5* spaces from the leading '>', but *4* spaces
0787  * from the start of the line.   This does not appear to be 
0788  * documented in the reference implementation, but it's the
0789  * way the markdown sample web form at Daring Fireball works.
0790  */
0791 static Line *
0792 quoteblock(Paragraph *p, mkd_flag_t flags)
0793 {
0794     Line *t, *q;
0795     int qp;
0796 
0797     for ( t = p->text; t ; t = q ) {
0798     if ( isquote(t) ) {
0799         /* clip leading spaces */
0800         for (qp = 0; T(t->text)[qp] != '>'; qp ++)
0801         /* assert: the first nonblank character on this line
0802          * will be a >
0803          */;
0804         /* clip '>' */
0805         qp++;
0806         /* clip next space, if any */
0807         if ( T(t->text)[qp] == ' ' )
0808         qp++;
0809         __mkd_trim_line(t,qp);
0810         UNCHECK(t);
0811     }
0812 
0813     q = skipempty(t->next);
0814 
0815     if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
0816         ___mkd_freeLineRange(t, q);
0817         t = q;
0818         break;
0819     }
0820     }
0821     if ( isdivmarker(p->text,0,flags) ) {
0822     char *prefix = "class";
0823     int i;
0824     
0825     q = p->text;
0826     p->text = p->text->next;
0827 
0828     if ( (i = szmarkerclass(1+T(q->text))) == 3 )
0829         /* and this would be an "%id:" prefix */
0830         prefix="id";
0831         
0832     if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
0833         sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
0834                              T(q->text)+(i+1) );
0835 
0836     ___mkd_freeLine(q);
0837     }
0838     return t;
0839 }
0840 
0841 
0842 typedef int (*linefn)(Line *);
0843 
0844 
0845 /*
0846  * pull in a list block.  A list block starts with a list marker and
0847  * runs until the next list marker, the next non-indented paragraph,
0848  * or EOF.   You do not have to indent nonblank lines after the list
0849  * marker, but multiple paragraphs need to start with a 4-space indent.
0850  */
0851 static Line *
0852 listitem(Paragraph *p, int indent, mkd_flag_t flags, linefn check)
0853 {
0854     Line *t, *q;
0855     int clip = indent;
0856     int z;
0857 #ifdef GITHUB_CHECKBOX
0858     int firstpara = 1;
0859     int ischeck;
0860 #define CHECK_NOT 0
0861 #define CHECK_NO 1
0862 #define CHECK_YES 2
0863 #endif
0864 
0865     for ( t = p->text; t ; t = q) {
0866     UNCHECK(t);
0867     __mkd_trim_line(t, clip);
0868 
0869 #ifdef GITHUB_CHECKBOX
0870     if ( firstpara ) {
0871         ischeck = CHECK_NOT;
0872         if ( strncmp(T(t->text)+t->dle, "[ ]", 3) == 0 )
0873         ischeck = CHECK_NO;
0874         else if ( strncasecmp(T(t->text)+t->dle, "[x]", 3) == 0 )
0875         ischeck = CHECK_YES;
0876 
0877         if ( ischeck != CHECK_NOT ) {
0878         __mkd_trim_line(t, 3);
0879         p->flags |= GITHUB_CHECK;
0880         if ( ischeck == CHECK_YES )
0881             p->flags |= IS_CHECKED;
0882         }
0883         firstpara = 0;
0884     }
0885 #endif
0886 
0887         /* even though we had to trim a long leader off this item,
0888          * the indent for trailing paragraphs is still 4...
0889      */
0890     if (indent > 4) {
0891         indent = 4;
0892     }
0893     if ( (q = skipempty(t->next)) == 0 ) {
0894         ___mkd_freeLineRange(t,q);
0895         return 0;
0896     }
0897 
0898     /* after a blank line, the next block needs to start with a line
0899      * that's indented 4(? -- reference implementation allows a 1
0900      * character indent, but that has unfortunate side effects here)
0901      * spaces, but after that the line doesn't need any indentation
0902      */
0903     if ( q != t->next ) {
0904         if (q->dle < indent) {
0905         q = t->next;
0906         t->next = 0;
0907         return q;
0908         }
0909         /* indent at least 2, and at most as
0910          * as far as the initial line was indented. */
0911         indent = clip ? clip : 2;
0912     }
0913 
0914     if ( (q->dle < indent) && (ishr(q,flags) || islist(q,&z,flags,&z)
0915                        || (check && (*check)(q)))
0916                    && !issetext(q,&z,flags) ) {
0917         q = t->next;
0918         t->next = 0;
0919         return q;
0920     }
0921 
0922     clip = (q->dle > indent) ? indent : q->dle;
0923     }
0924     return t;
0925 }
0926 
0927 
0928 static Line *
0929 definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
0930 {
0931     ParagraphRoot d = { 0, 0 };
0932     Paragraph *p;
0933     Line *q = top->text, *text = 0, *labels; 
0934     int z, para;
0935 
0936     while (( labels = q )) {
0937 
0938     if ( (q = isdefinition(labels, &z, &kind, f->flags)) == 0 )
0939         break;
0940 
0941     if ( (text = skipempty(q->next)) == 0 )
0942         break;
0943 
0944     if ( para = (text != q->next) )
0945         ___mkd_freeLineRange(q, text);
0946     
0947     q->next = 0; 
0948     if ( kind == 1 /* discount dl */ )
0949         for ( q = labels; q; q = q->next ) {
0950         CLIP(q->text, 0, 1);
0951         UNCHECK(q);
0952         S(q->text)--;
0953         }
0954 
0955     dd_block:
0956     p = Pp(&d, text, LISTITEM);
0957 
0958     text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
0959     p->down = compile(p->text, 0, f);
0960     p->text = labels; labels = 0;
0961 
0962     if ( para && p->down ) p->down->align = PARA;
0963 
0964     if ( (q = skipempty(text)) == 0 )
0965         break;
0966 
0967     if ( para = (q != text) ) {
0968         Line anchor;
0969 
0970         anchor.next = text;
0971         ___mkd_freeLineRange(&anchor,q);
0972         text = q;
0973         
0974     }
0975 
0976     if ( kind == 2 && is_extra_dd(q) )
0977         goto dd_block;
0978     }
0979     top->text = 0;
0980     top->down = T(d);
0981     return text;
0982 }
0983 
0984 
0985 static Line *
0986 enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
0987 {
0988     ParagraphRoot d = { 0, 0 };
0989     Paragraph *p;
0990     Line *q = top->text, *text;
0991     int para = 0, z;
0992 
0993     while (( text = q )) {
0994     
0995     p = Pp(&d, text, LISTITEM);
0996     text = listitem(p, clip, f->flags, 0);
0997 
0998     p->down = compile(p->text, 0, f);
0999     p->text = 0;
1000 
1001     if ( para && p->down ) p->down->align = PARA;
1002 
1003     if ( (q = skipempty(text)) == 0
1004                  || islist(q, &clip, f->flags, &z) != list_class )
1005         break;
1006 
1007     if ( para = (q != text) ) {
1008         Line anchor;
1009 
1010         anchor.next = text;
1011         ___mkd_freeLineRange(&anchor, q);
1012 
1013         if ( p->down ) p->down->align = PARA;
1014     }
1015     }
1016     top->text = 0;
1017     top->down = T(d);
1018     return text;
1019 }
1020 
1021 
1022 static int
1023 tgood(char c)
1024 {
1025     switch (c) {
1026     case '\'':
1027     case '"': return c;
1028     case '(': return ')';
1029     }
1030     return 0;
1031 }
1032 
1033 
1034 /*
1035  * eat lines for a markdown extra footnote
1036  */
1037 static Line *
1038 extrablock(Line *p)
1039 {
1040     Line *np;
1041     
1042     while ( p && p->next ) {
1043     np = p->next;
1044 
1045     if ( np->dle < 4 && np->dle < S(np->text) ) {
1046         p->next = 0;
1047         return np;
1048     }
1049     __mkd_trim_line(np,4);
1050     p = np;
1051     }
1052     return 0;
1053 }
1054 
1055 
1056 /*
1057  * add a new (image or link) footnote to the footnote table
1058  */
1059 static Line*
1060 addfootnote(Line *p, MMIOT* f)
1061 {
1062     int j, i;
1063     int c;
1064     Line *np = p->next;
1065 
1066     Footnote *foot = &EXPAND(f->footnotes->note);
1067     
1068     CREATE(foot->tag);
1069     CREATE(foot->link);
1070     CREATE(foot->title);
1071     foot->text = 0;
1072     foot->flags = foot->height = foot->width = 0;
1073 
1074     /* keep the footnote label */
1075     for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
1076     EXPAND(foot->tag) = T(p->text)[j];
1077     EXPAND(foot->tag) = 0;
1078     S(foot->tag)--;
1079 
1080     /* consume the closing ]: */
1081     j = nextnonblank(p, j+2);
1082 
1083     if ( is_flag_set(f->flags, MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
1084     /* markdown extra footnote: All indented lines past this point;
1085      * the first line includes the footnote reference, so we need to
1086      * snip that out as we go.
1087      */
1088     foot->flags |= EXTRA_FOOTNOTE;
1089     __mkd_trim_line(p,j);
1090 
1091     np = extrablock(p);
1092 
1093     foot->text = compile(p, 0, f);
1094 
1095     return np;
1096     }
1097 
1098     while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
1099     EXPAND(foot->link) = T(p->text)[j++];
1100     EXPAND(foot->link) = 0;
1101     S(foot->link)--;
1102     j = nextnonblank(p,j);
1103 
1104     if ( T(p->text)[j] == '=' ) {
1105     sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
1106     j = nextblank(p, j);
1107     j = nextnonblank(p,j);
1108     }
1109 
1110 
1111     if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
1112     ___mkd_freeLine(p);
1113     p = np;
1114     np = p->next;
1115     j = p->dle;
1116     }
1117 
1118     if ( (c = tgood(T(p->text)[j])) ) {
1119     /* Try to take the rest of the line as a comment; read to
1120      * EOL, then shrink the string back to before the final
1121      * quote.
1122      */
1123     ++j;    /* skip leading quote */
1124 
1125     while ( j < S(p->text) )
1126         EXPAND(foot->title) = T(p->text)[j++];
1127 
1128     while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
1129         --S(foot->title);
1130     if ( S(foot->title) )   /* skip trailing quote */
1131         --S(foot->title);
1132     EXPAND(foot->title) = 0;
1133     --S(foot->title);
1134     }
1135 
1136     ___mkd_freeLine(p);
1137     return np;
1138 }
1139 
1140 
1141 /*
1142  * allocate a paragraph header, link it to the
1143  * tail of the current document
1144  */
1145 static Paragraph *
1146 Pp(ParagraphRoot *d, Line *ptr, int typ)
1147 {
1148     Paragraph *ret = calloc(sizeof *ret, 1);
1149 
1150     ret->text = ptr;
1151     ret->typ = typ;
1152 
1153     return ATTACH(*d, ret);
1154 }
1155 
1156 
1157 
1158 static Line*
1159 consume(Line *ptr, int *eaten)
1160 {
1161     Line *next;
1162     int blanks=0;
1163 
1164     for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
1165     next = ptr->next;
1166     ___mkd_freeLine(ptr);
1167     }
1168     if ( ptr ) *eaten = blanks;
1169     return ptr;
1170 }
1171 
1172 
1173 typedef ANCHOR(Line) Cache;
1174 
1175 static void
1176 uncache(Cache *cache, ParagraphRoot *d, MMIOT *f)
1177 {
1178     Paragraph *p;
1179 
1180     if ( T(*cache) ) {
1181     E(*cache)->next = 0;
1182     p = Pp(d, 0, SOURCE);
1183     p->down = compile(T(*cache), 1, f);
1184     T(*cache) = E(*cache) = 0;
1185     }
1186 }
1187 
1188 
1189 /*
1190  * top-level compilation; break the document into
1191  * style, html, and source blocks with footnote links
1192  * weeded out.
1193  */
1194 static Paragraph *
1195 compile_document(Line *ptr, MMIOT *f)
1196 {
1197     ParagraphRoot d = { 0, 0 };
1198     Cache source = { 0, 0 };
1199     Paragraph *p = 0;
1200     struct kw *tag;
1201     int eaten, unclosed;
1202     int previous_was_break = 1;
1203 
1204     while ( ptr ) {
1205     if ( !is_flag_set(f->flags, MKD_NOHTML) && (tag = isopentag(ptr)) ) {
1206         int blocktype;
1207         /* If we encounter a html/style block, compile and save all
1208          * of the cached source BEFORE processing the html/style.
1209          */
1210         uncache(&source, &d, f);
1211         
1212         if (is_flag_set(f->flags, MKD_NOSTYLE) )
1213         blocktype = HTML;
1214         else
1215         blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
1216         p = Pp(&d, ptr, blocktype);
1217         ptr = htmlblock(p, tag, &unclosed);
1218         if ( unclosed ) {
1219         p->typ = SOURCE;
1220         p->down = compile(p->text, 1, f);
1221         p->text = 0;
1222         }
1223         previous_was_break = 1;
1224     }
1225     else if ( isfootnote(ptr) ) {
1226         /* footnotes, like cats, sleep anywhere; pull them
1227          * out of the input stream and file them away for
1228          * later processing
1229          */
1230         ptr = consume(addfootnote(ptr, f), &eaten);
1231         previous_was_break = 1;
1232     }
1233     else if ( previous_was_break && iscodefence(ptr,3,0,f->flags)) {
1234         uncache(&source, &d, f);
1235         if ( !fencedcodeblock(&d, &ptr, f->flags) ) /* just source */
1236         goto attach;
1237     }
1238     else {
1239     attach:
1240         /* source; cache it up to wait for eof or the
1241          * next html/style block
1242          */
1243         ATTACH(source,ptr);
1244         previous_was_break = blankline(ptr);
1245         ptr = ptr->next;
1246     }
1247     }
1248     /* if there's any cached source at EOF, compile
1249      * it now.
1250      */
1251     uncache(&source, &d, f);
1252     return T(d);
1253 }
1254 
1255 
1256 static int
1257 first_nonblank_before(Line *j, int dle)
1258 {
1259     return (j->dle < dle) ? j->dle : dle;
1260 }
1261 
1262 
1263 static int
1264 actually_a_table(MMIOT *f, Line *pp)
1265 {
1266     Line *r;
1267     int j;
1268     int c;
1269 
1270     /* tables need to be turned on */
1271     if ( is_flag_set(f->flags, MKD_STRICT) || is_flag_set(f->flags, MKD_NOTABLES) )
1272     return 0;
1273 
1274     /* tables need three lines */
1275     if ( !(pp && pp->next && pp->next->next) ) {
1276     return 0;
1277     }
1278 
1279     /* all lines must contain |'s */
1280     for (r = pp; r; r = r->next )
1281     if ( !(r->flags & PIPECHAR) ) {
1282         return 0;
1283     }
1284 
1285     /* if the header has a leading |, all lines must have leading |'s */
1286     if ( T(pp->text)[pp->dle] == '|' ) {
1287     for ( r = pp; r; r = r->next )
1288         if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
1289         return 0;
1290         }
1291     }
1292 
1293     /* second line must be only whitespace, -, |, or : */
1294     r = pp->next;
1295 
1296     for ( j=r->dle; j < S(r->text); ++j ) {
1297     c = T(r->text)[j];
1298 
1299     if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
1300         return 0;
1301     }
1302     }
1303 
1304     return 1;
1305 }
1306 
1307 
1308 /*
1309  * break a collection of markdown input into
1310  * blocks of lists, code, html, and text to
1311  * be marked up.
1312  */
1313 static Paragraph *
1314 compile(Line *ptr, int toplevel, MMIOT *f)
1315 {
1316     ParagraphRoot d = { 0, 0 };
1317     Paragraph *p = 0;
1318     Line *r;
1319     int para = toplevel;
1320     int blocks = 0;
1321     int hdr_type, list_type, list_class, indent;
1322 
1323     ptr = consume(ptr, &para);
1324 
1325     while ( ptr ) {
1326 
1327     if ( iscode(ptr) ) {
1328         p = Pp(&d, ptr, CODE);
1329         
1330         if ( is_flag_set(f->flags, MKD_1_COMPAT) ) {
1331         /* HORRIBLE STANDARDS KLUDGE: the first line of every block
1332          * has trailing whitespace trimmed off.
1333          */
1334         ___mkd_tidy(&p->text->text);
1335         }
1336         
1337         ptr = codeblock(p);
1338     }
1339     else if ( iscodefence(ptr,3,0,f->flags) && (p=fencedcodeblock(&d, &ptr, f->flags)) )
1340         /* yay, it's already done */ ;
1341     else if ( ishr(ptr, f->flags) ) {
1342         p = Pp(&d, 0, HR);
1343         r = ptr;
1344         ptr = ptr->next;
1345         ___mkd_freeLine(r);
1346     }
1347     else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) {
1348         if ( list_class == DL ) {
1349         p = Pp(&d, ptr, DL);
1350         ptr = definition_block(p, indent, f, list_type);
1351         }
1352         else {
1353         p = Pp(&d, ptr, list_type);
1354         ptr = enumerated_block(p, indent, f, list_class);
1355         }
1356     }
1357     else if ( isquote(ptr) ) {
1358         p = Pp(&d, ptr, QUOTE);
1359         ptr = quoteblock(p, f->flags);
1360         p->down = compile(p->text, 1, f);
1361         p->text = 0;
1362     }
1363     else if ( ishdr(ptr, &hdr_type, f->flags) ) {
1364         p = Pp(&d, ptr, HDR);
1365         ptr = headerblock(p, hdr_type);
1366     }
1367     else {
1368         /* either markup or an html block element
1369          */
1370         struct kw *tag;
1371         int unclosed = 1;
1372 
1373         p = Pp(&d, ptr, MARKUP);    /* default to regular markup,
1374                      * then check if it's an html
1375                      * block.   If it IS an html
1376                      * block, htmlblock() will
1377                      * populate this paragraph &
1378                      * all we need to do is reset
1379                      * the paragraph type to HTML,
1380                      * otherwise the paragraph
1381                      * remains empty and ready for
1382                      * processing with textblock()
1383                      */
1384         
1385         if ( !is_flag_set(f->flags, MKD_NOHTML) && (tag = isopentag(ptr)) ) {
1386         /* possibly an html block
1387          */
1388         
1389         ptr = htmlblock(p, tag, &unclosed);
1390         if ( ! unclosed ) {
1391             p->typ = HTML;
1392         }
1393         }
1394         if ( unclosed ) {
1395         ptr = textblock(p, toplevel, f->flags);
1396         /* tables are a special kind of paragraph */
1397         if ( actually_a_table(f, p->text) )
1398             p->typ = TABLE;
1399         }
1400     }
1401     if ( (para||toplevel) && !p->align )
1402         p->align = PARA;
1403 
1404     blocks++;
1405     para = toplevel || (blocks > 1);
1406     ptr = consume(ptr, &para);
1407 
1408     if ( para && !p->align )
1409         p->align = PARA;
1410 
1411     }
1412     return T(d);
1413 }
1414 
1415 
1416 /*
1417  * the guts of the markdown() function, ripped out so I can do
1418  * debugging.
1419  */
1420 
1421 /*
1422  * prepare and compile `text`, returning a Paragraph tree.
1423  */
1424 int
1425 mkd_compile(Document *doc, mkd_flag_t flags)
1426 {
1427     if ( !doc )
1428     return 0;
1429 
1430     flags &= USER_FLAGS;
1431     
1432     if ( doc->compiled ) {
1433     if ( doc->ctx->flags == flags && !doc->dirty)
1434         return 1;
1435     else {
1436         doc->compiled = doc->dirty = 0;
1437         if ( doc->code)
1438         ___mkd_freeParagraph(doc->code);
1439         if ( doc->ctx->footnotes )
1440         ___mkd_freefootnotes(doc->ctx);
1441     }
1442     }
1443 
1444     doc->compiled = 1;
1445     memset(doc->ctx, 0, sizeof(MMIOT) );
1446     doc->ctx->ref_prefix= doc->ref_prefix;
1447     doc->ctx->cb        = &(doc->cb);
1448     doc->ctx->flags     = flags;
1449     CREATE(doc->ctx->in);
1450     doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
1451     doc->ctx->footnotes->reference = 0;
1452     CREATE(doc->ctx->footnotes->note);
1453 
1454     mkd_initialize();
1455 
1456     doc->code = compile_document(T(doc->content), doc->ctx);
1457     qsort(T(doc->ctx->footnotes->note), S(doc->ctx->footnotes->note),
1458                 sizeof T(doc->ctx->footnotes->note)[0],
1459                        (stfu)__mkd_footsort);
1460     memset(&doc->content, 0, sizeof doc->content);
1461     return 1;
1462 }
1463