Warning, file /education/cantor/thirdparty/discount-2.2.6-patched/mkdio.c was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002  * mkdio -- markdown front end input functions
0003  *
0004  * Copyright (C) 2007 David L Parsons.
0005  * The redistribution terms are provided in the COPYRIGHT file that must
0006  * be distributed with this source code.
0007  */
0008 #include "config.h"
0009 #include <stdio.h>
0010 #include <stdlib.h>
0011 #include <ctype.h>
0012 
0013 #include "cstring.h"
0014 #include "markdown.h"
0015 #include "amalloc.h"
0016 
0017 typedef ANCHOR(Line) LineAnchor;
0018 
0019 
0020 /* create a new blank Document
0021  */
0022 Document*
0023 __mkd_new_Document()
0024 {
0025     Document *ret = calloc(sizeof(Document), 1);
0026 
0027     if ( ret ) {
0028     if ( ret->ctx = calloc(sizeof(MMIOT), 1) ) {
0029         ret->magic = VALID_DOCUMENT;
0030         return ret;
0031     }
0032     free(ret);
0033     }
0034     return 0;
0035 }
0036 
0037 
0038 /* add a line to the markdown input chain, expanding tabs and
0039  * noting the presence of special characters as we go.
0040  */
0041 void
0042 __mkd_enqueue(Document* a, Cstring *line)
0043 {
0044     Line *p = calloc(sizeof *p, 1);
0045     unsigned char c;
0046     int xp = 0;
0047     int           size = S(*line);
0048     unsigned char *str = (unsigned char*)T(*line);
0049 
0050     CREATE(p->text);
0051     ATTACH(a->content, p);
0052 
0053     while ( size-- ) {
0054     if ( (c = *str++) == '\t' ) {
0055         /* expand tabs into ->tabstop spaces.  We use ->tabstop
0056          * because the ENTIRE FREAKING COMPUTER WORLD uses editors
0057          * that don't do ^T/^D, but instead use tabs for indentation,
0058          * and, of course, set their tabs down to 4 spaces 
0059          */
0060         do {
0061         EXPAND(p->text) = ' ';
0062         } while ( ++xp % a->tabstop );
0063     }
0064     else if ( c >= ' ' ) {
0065         if ( c == '|' )
0066         p->flags |= PIPECHAR;
0067         EXPAND(p->text) = c;
0068         ++xp;
0069     }
0070     }
0071     EXPAND(p->text) = 0;
0072     S(p->text)--;
0073     p->dle = mkd_firstnonblank(p);
0074 }
0075 
0076 
0077 /* trim leading characters from a line, then adjust the dle.
0078  */
0079 void
0080 __mkd_trim_line(Line *p, int clip)
0081 {
0082     if ( clip >= S(p->text) ) {
0083     S(p->text) = p->dle = 0;
0084     T(p->text)[0] = 0;
0085     }
0086     else if ( clip > 0 ) {
0087     CLIP(p->text, 0, clip);
0088     p->dle = mkd_firstnonblank(p);
0089     }
0090 }
0091 
0092 
0093 /* build a Document from any old input.
0094  */
0095 typedef int (*getc_func)(void*);
0096 
0097 Document *
0098 populate(getc_func getc, void* ctx, mkd_flag_t flags)
0099 {
0100     Cstring line;
0101     Document *a = __mkd_new_Document();
0102     int c;
0103     int pandoc = 0;
0104 
0105     if ( !a ) return 0;
0106 
0107     a->tabstop = is_flag_set(flags, MKD_TABSTOP) ? 4 : TABSTOP;
0108 
0109     CREATE(line);
0110 
0111     while ( (c = (*getc)(ctx)) != EOF ) {
0112     if ( c == '\n' ) {
0113         if ( pandoc != EOF && pandoc < 3 ) {
0114         if ( S(line) && (T(line)[0] == '%') )
0115             pandoc++;
0116         else
0117             pandoc = EOF;
0118         }
0119         __mkd_enqueue(a, &line);
0120         S(line) = 0;
0121     }
0122     else if ( isprint(c) || isspace(c) || (c & 0x80) )
0123         EXPAND(line) = c;
0124     }
0125 
0126     if ( S(line) )
0127     __mkd_enqueue(a, &line);
0128 
0129     DELETE(line);
0130 
0131     if ( (pandoc == 3) && !(is_flag_set(flags, MKD_NOHEADER) || is_flag_set(flags, MKD_STRICT)) ) {
0132     /* the first three lines started with %, so we have a header.
0133      * clip the first three lines out of content and hang them
0134      * off header.
0135      */
0136     Line *headers = T(a->content);
0137 
0138     a->title = headers;             __mkd_trim_line(a->title, 1);
0139     a->author= headers->next;       __mkd_trim_line(a->author, 1);
0140     a->date  = headers->next->next; __mkd_trim_line(a->date, 1);
0141 
0142     T(a->content) = headers->next->next->next;
0143     }
0144 
0145     return a;
0146 }
0147 
0148 
0149 /* convert a file into a linked list
0150  */
0151 Document *
0152 mkd_in(FILE *f, mkd_flag_t flags)
0153 {
0154     return populate((getc_func)fgetc, f, flags & INPUT_MASK);
0155 }
0156 
0157 
0158 /* return a single character out of a buffer
0159  */
0160 int
0161 __mkd_io_strget(struct string_stream *in)
0162 {
0163     if ( !in->size ) return EOF;
0164 
0165     --(in->size);
0166 
0167     return *(in->data)++;
0168 }
0169 
0170 
0171 /* convert a block of text into a linked list
0172  */
0173 Document *
0174 mkd_string(const char *buf, int len, mkd_flag_t flags)
0175 {
0176     struct string_stream about;
0177 
0178     about.data = buf;
0179     about.size = len;
0180 
0181     return populate((getc_func)__mkd_io_strget, &about, flags & INPUT_MASK);
0182 }
0183 
0184 
0185 /* write the html to a file (xmlified if necessary)
0186  */
0187 int
0188 mkd_generatehtml(Document *p, FILE *output)
0189 {
0190     char *doc;
0191     int szdoc;
0192 
0193     DO_OR_DIE( szdoc = mkd_document(p,&doc) );
0194     if ( is_flag_set(p->ctx->flags, MKD_CDATA) )
0195     DO_OR_DIE( mkd_generatexml(doc, szdoc, output) );
0196     else if ( fwrite(doc, szdoc, 1, output) != 1 )
0197     return EOF;
0198     DO_OR_DIE( putc('\n', output) );
0199     return 0;
0200 }
0201 
0202 
0203 /* convert some markdown text to html
0204  */
0205 int
0206 markdown(Document *document, FILE *out, mkd_flag_t flags)
0207 {
0208     if ( mkd_compile(document, flags) ) {
0209     mkd_generatehtml(document, out);
0210     mkd_cleanup(document);
0211     return 0;
0212     }
0213     return -1;
0214 }
0215 
0216 
0217 /* anchor_format a string, returning the formatted string in malloc()ed space
0218  * MKD_URLENCODEDANCHOR is now perverted to being a html5 anchor
0219  *
0220  * !labelformat:  print all characters
0221  * labelformat && h4anchor: prefix nonalpha label with L,
0222  *                          expand all nonalnum, _, ':', '.' to hex
0223  *                          except space which maps to -
0224  * labelformat && !h4anchor:expand space to -, other isspace() & '%' to hex
0225  */
0226 static char *
0227 mkd_anchor_format(char *s, int len, int labelformat, mkd_flag_t flags)
0228 {
0229     char *res;
0230     unsigned char c;
0231     int i, needed, out = 0;
0232     int h4anchor = !is_flag_set(flags, MKD_URLENCODEDANCHOR);
0233     static const unsigned char hexchars[] = "0123456789abcdef";
0234 
0235     needed = labelformat ? (4*len) : len;
0236 
0237     if ( (res = malloc(needed)) == NULL )
0238     return NULL;
0239 
0240     if ( h4anchor && labelformat && !isalpha(s[0]) )
0241     res[out++] = 'L';
0242     
0243     
0244     for ( i=0; i < len ; i++ ) {
0245     c = s[i];
0246     if ( labelformat ) {
0247         if ( h4anchor
0248             ? (isalnum(c) || (c == '_') || (c == ':') || (c == '.' ) )
0249             : !(isspace(c) || c == '%') )
0250         res[out++] = c;
0251         else if ( c == ' ' )
0252         res[out++] = '-';
0253         else {
0254             res[out++] = h4anchor ? '-' : '%';
0255             res[out++] = hexchars[c >> 4 & 0xf];
0256             res[out++] = hexchars[c      & 0xf];
0257             if ( h4anchor )
0258             res[out++] = '-';
0259         }
0260     }
0261     else
0262         res[out++] = c;
0263     }
0264     
0265     res[out++] = 0;
0266     return res;
0267 } /* mkd_anchor_format */
0268 
0269 
0270 /* write out a Cstring, mangled into a form suitable for `<a href=` or `<a id=`
0271  */
0272 void
0273 mkd_string_to_anchor(char *s, int len, mkd_sta_function_t outchar,
0274                        void *out, int labelformat,
0275                        MMIOT *f)
0276 {
0277     char *res;
0278     char *line;
0279     int size;
0280 
0281     int i;
0282 
0283     size = mkd_line(s, len, &line, IS_LABEL);
0284 
0285     if ( !line )
0286     return;
0287 
0288     if ( f->cb->e_anchor )
0289     res = (*(f->cb->e_anchor))(line, size, f->cb->e_data);
0290     else
0291     res = mkd_anchor_format(line, size, labelformat, f->flags);
0292 
0293     free(line);
0294 
0295     if ( !res )
0296     return;
0297 
0298     for ( i=0; res[i]; i++ )
0299     (*outchar)(res[i], out);
0300 
0301     if ( f->cb->e_anchor ) {
0302     if ( f->cb->e_free )
0303         (*(f->cb->e_free))(res, f->cb->e_data);
0304     }
0305     else 
0306     free(res);
0307 }
0308 
0309 
0310 /*  ___mkd_reparse() a line
0311  */
0312 static void
0313 mkd_parse_line(char *bfr, int size, MMIOT *f, mkd_flag_t flags)
0314 {
0315     ___mkd_initmmiot(f, 0);
0316     f->flags = flags & USER_FLAGS;
0317     ___mkd_reparse(bfr, size, 0, f, 0);
0318     ___mkd_emblock(f);
0319 }
0320 
0321 
0322 /* ___mkd_reparse() a line, returning it in malloc()ed memory
0323  */
0324 int
0325 mkd_line(char *bfr, int size, char **res, mkd_flag_t flags)
0326 {
0327     MMIOT f;
0328     int len;
0329     
0330     mkd_parse_line(bfr, size, &f, flags);
0331 
0332     if ( len = S(f.out) ) {
0333     EXPAND(f.out) = 0;
0334     /* strdup() doesn't use amalloc(), so in an amalloc()ed
0335      * build this copies the string safely out of our memory
0336      * paranoia arena.  In a non-amalloc world, it's a spurious
0337      * memory allocation, but it avoids unintentional hilarity
0338      * with amalloc()
0339      */
0340     *res = strdup(T(f.out));
0341     }
0342     else {
0343      *res = 0;
0344      len = EOF;
0345      }
0346     ___mkd_freemmiot(&f, 0);
0347     return len;
0348 }
0349 
0350 
0351 /* ___mkd_reparse() a line, writing it to a FILE
0352  */
0353 int
0354 mkd_generateline(char *bfr, int size, FILE *output, mkd_flag_t flags)
0355 {
0356     MMIOT f;
0357     int status;
0358 
0359     mkd_parse_line(bfr, size, &f, flags);
0360     if ( is_flag_set(flags, MKD_CDATA) )
0361     status = mkd_generatexml(T(f.out), S(f.out), output) != EOF;
0362     else
0363     status = fwrite(T(f.out), S(f.out), 1, output) == S(f.out);
0364 
0365     ___mkd_freemmiot(&f, 0);
0366     return status ? 0 : EOF;
0367 }
0368 
0369 
0370 /* set the url display callback
0371  */
0372 void
0373 mkd_e_url(Document *f, mkd_callback_t edit)
0374 {
0375     if ( f ) {
0376     if ( f->cb.e_url != edit )
0377         f->dirty = 1;
0378     f->cb.e_url = edit;
0379     }
0380 }
0381 
0382 
0383 /* set the url options callback
0384  */
0385 void
0386 mkd_e_flags(Document *f, mkd_callback_t edit)
0387 {
0388     if ( f ) {
0389     if ( f->cb.e_flags != edit )
0390         f->dirty = 1;
0391     f->cb.e_flags = edit;
0392     }
0393 }
0394 
0395 
0396 /* set the anchor formatter
0397  */
0398 void
0399 mkd_e_anchor(Document *f, mkd_callback_t format)
0400 {
0401     if ( f ) {
0402     if ( f->cb.e_anchor != format )
0403         f->dirty = 1;
0404     f->cb.e_anchor = format;
0405     }
0406 }
0407 
0408 
0409 /* set the url display/options deallocator
0410  */
0411 void
0412 mkd_e_free(Document *f, mkd_free_t dealloc)
0413 {
0414     if ( f ) {
0415     if ( f->cb.e_free != dealloc )
0416         f->dirty = 1;
0417     f->cb.e_free = dealloc;
0418     }
0419 }
0420 
0421 
0422 /* set the url display/options context data field
0423  */
0424 void
0425 mkd_e_data(Document *f, void *data)
0426 {
0427     if ( f ) {
0428     if ( f->cb.e_data != data )
0429         f->dirty = 1;
0430     f->cb.e_data = data;
0431     }
0432 }
0433 
0434 
0435 /* set the code block display callback
0436  */
0437 void
0438 mkd_e_code_format(Document *f, mkd_callback_t codefmt)
0439 {
0440     if ( f && (f->cb.e_codefmt != codefmt) ) {
0441     f->dirty = 1;
0442     f->cb.e_codefmt = codefmt;
0443     }
0444 }
0445 
0446 
0447 /* set the href prefix for markdown extra style footnotes
0448  */
0449 void
0450 mkd_ref_prefix(Document *f, char *data)
0451 {
0452     if ( f ) {
0453     if ( f->ref_prefix != data )
0454         f->dirty = 1;
0455     f->ref_prefix = data;
0456     }
0457 }