md source code

1 /*******************************************************************************
2  * Markdown parser implementation.
3  * 
4  * Copyright: (c) 2012-2019 RejectedSoftware e.K. and the D community
5  * License: Subject to the terms of the MIT license.
6  * Repository: https://github.com/dlang-community/dmarkdown
7  * 
8  * This library was forked and modified in 2021 for the `hgen` project.
9  * hgen: https://gitlab.com/vindexbit/hgen
10  * Author: Eugene 'Vindex' Stulin <tech.vindex@gmail.com>
11  * 
12  * MIT License (Expat version)
13  * 
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this software and associated documentation files (the "Software"), to deal
16  * in the Software without restriction, including without limitation the rights
17  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18  * copies of the Software, and to permit persons to whom the Software is
19  * furnished to do so, subject to the following conditions:
20  * 
21  * The above copyright notice and this permission notice shall be included
22  * in all copies or substantial portions of the Software.
23  * 
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 module md;
34 
35 import std.algorithm;
36 import std.array;
37 import std.ascii;
38 import std.conv;
39 import std.format;
40 import std.uni;
41 import std.utf;
42 import core.exception;
43 import std.range;
44 import std.string;
45 import std.stdio;
46 
47 alias UrlFilterFn = string delegate(string urlOrPath, bool isImage);
48 alias ProcessCodeFn = string delegate(string) @safe nothrow;
49 
50 class MarkdownHandler {
51 
52     this(string markdownText) {
53         this.markdownText = markdownText;
54     }
55 
56     void setUrlFilterFunction(UrlFilterFn filter) {
57         this.urlFilter = filter;
58     }
59 
60     void setProcessCodeFunction(ProcessCodeFn process) {
61         this.processCode = process;
62     }
63 
64     void enableAlternateSubheaders() {
65         this.flags |= MarkdownFlags.alternateSubheaders;
66     }
67 
68     void disableUnderscoreEmphasis() {
69         this.flags |= MarkdownFlags.disableUnderscoreEmphasis;
70     }
71 
72     string convertToHTML() {
73         auto allLines = std..string.splitLines(this.markdownText);
74         auto links = scanForReferences(allLines);
75         auto lines = this.parseLines(allLines);
76         Block rootBlock;
77         this.parseBlocks(rootBlock, lines, null);
78         auto dst = appender!string();
79         this.writeBlock(dst, rootBlock, links);
80         return dst.data;
81     }
82 
83 private:
84 
85     string markdownText;
86 
87     /// Controls the capabilities of the parser.
88     MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
89 
90     /// Heading tags will start at this level.
91     size_t headingBaseLevel = 1;
92 
93     /// Called for every link/image URL to perform arbitrary transformations.
94     UrlFilterFn urlFilter;
95 
96     /***************************************************************************
97      * An optional delegate to post-process code blocks and inline code.
98      * Useful to e.g. add code highlighting.
99      */
100     ProcessCodeFn processCode = null;
101 
102     void determineIndent(ref Line lninfo) @safe {
103         auto ln = lninfo.text.idup;
104         while (ln.length > 0) {
105             if (ln[0] == '\t' ) {
106                 lninfo.indent ~= IndentType.White;
107                 ln.popFront();
108             } else if (ln.startsWith("    ")) {
109                 lninfo.indent ~= IndentType.White;
110                 ln.popFrontN(4);
111             } else {
112                 ln = ln.stripLeft();
113                 if (ln.startsWith(">")) {
114                     lninfo.indent ~= IndentType.Quote;
115                     ln.popFront();
116                 } else {
117                     break;
118                 }
119             }
120         }
121         lninfo.unindented = ln;
122     }
123 
124     LineType determineType(string ln, lazy char subHeaderChar)
125     pure @safe {
126         alias MF = MarkdownFlags;
127         if ((flags & MF.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) {
128             return LineType.CodeBlockDelimiter;
129         } else if (isAtxHeaderLine(ln)) {
130             return LineType.AtxHeader;
131         } else if (isSetextHeaderLine(ln, subHeaderChar)) {
132             return LineType.SetextHeader;
133         } else if ((flags & MF.supportTables) && isTableRowLine!false(ln)) {
134             return LineType.Table;
135         } else if (isHlineLine(ln)) {
136             return LineType.Hline;
137         } else if (isOListLine(ln)) {
138             return LineType.OList;
139         } else if (isUListLine(ln)) {
140             return LineType.UList;
141         } else if (isLineBlank(ln)) {
142             return LineType.Blank;
143         } else if (!(flags & MF.noInlineHtml) && isHtmlBlockLine(ln)) {
144             return LineType.HtmlBlock;
145         }
146         return LineType.Plain;
147     }
148 
149     Line[] parseLines(ref string[] lines) @safe {
150         Line[] ret;
151         char subHeaderChar = '-';
152         if (this.flags & MarkdownFlags.alternateSubheaders) {
153             subHeaderChar = '*';
154         }
155         while (!lines.empty) {
156             Line lninfo;
157             lninfo.text = lines.front;
158             lines.popFront();
159             determineIndent(lninfo);
160             lninfo.type = determineType(lninfo.unindented, subHeaderChar);
161             ret ~= lninfo;
162         }
163         return ret;
164     }
165 
166     void parseBlocks(ref Block root,
167                      ref Line[] lines,
168                      IndentType[] baseIndent)
169     pure @safe {
170         if (baseIndent.length == 0) {
171             root.type = BlockType.Text;
172         } else if (baseIndent[$-1] == IndentType.Quote) {
173             root.type = BlockType.Quote;
174         }
175 
176         while (!lines.empty) {
177             auto ln = lines.front;
178 
179             if (ln.type == LineType.Blank) {
180                 lines.popFront();
181                 continue;
182             }
183 
184             if (ln.indent != baseIndent) {
185                 if (ln.indent.length < baseIndent.length ||
186                     ln.indent[0 .. baseIndent.length] != baseIndent) {
187                     return;
188                 }
189 
190                 auto cindent = baseIndent ~ IndentType.White;
191                 if (ln.indent == cindent) {
192                     Block cblock;
193                     cblock.type = BlockType.Code;
194                     while (!lines.empty &&
195                         lines.front.indent.length >= cindent.length &&
196                         lines.front.indent[0 .. cindent.length] == cindent) {
197                         cblock.text ~= lines.front.unindent(cindent.length);
198                         lines.popFront();
199                     }
200                     root.blocks ~= cblock;
201                 } else {
202                     Block subblock;
203                     this.parseBlocks(subblock,
204                                     lines,
205                                     ln.indent[0 .. baseIndent.length+1]);
206                     root.blocks ~= subblock;
207                 }
208                 return;
209             }
210             // else
211             Block b;
212             void processPlain() {
213                 b.type = BlockType.Paragraph;
214                 b.text = skipText(lines, baseIndent);
215             }
216             final switch(ln.type) {
217                 case LineType.Undefined: assert(false);
218                 case LineType.Blank: assert(false);
219                 case LineType.Plain:
220                     if (lines.length >= 2 &&
221                         lines[1].type == LineType.SetextHeader) {
222                         auto setln = lines[1].unindented;
223                         b.type = BlockType.Header;
224                         b.text = [ln.unindented];
225                         b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
226                         lines.popFrontN(2);
227                     } else {
228                         processPlain();
229                     }
230                     break;
231                 case LineType.Hline:
232                     b.type = BlockType.Plain;
233                     b.text = ["<hr>"];
234                     lines.popFront();
235                     break;
236                 case LineType.AtxHeader:
237                     b.type = BlockType.Header;
238                     string hl = ln.unindented;
239                     b.headerLevel = 0;
240                     while (hl.length > 0 && hl[0] == '#') {
241                         b.headerLevel++;
242                         hl = hl[1 .. $];
243                     }
244                     while (hl.length > 0
245                         && (hl[$-1] == '#' || hl[$-1] == ' ')) {
246                         hl = hl[0 .. $-1];
247                     }
248                     b.text = [hl];
249                     lines.popFront();
250                     break;
251                 case LineType.SetextHeader:
252                     lines.popFront();
253                     break;
254                 case LineType.UList:
255                 case LineType.OList:
256                     b.type = ln.type == LineType.UList ? BlockType.UList
257                                                         : BlockType.OList;
258                     auto itemindent = baseIndent ~ IndentType.White;
259                     bool firstItem = true, paraMode = false;
260                     while (!lines.empty && lines.front.type == ln.type
261                         && lines.front.indent == baseIndent) {
262                         Block itm;
263                         itm.text = skipText(lines, itemindent);
264                         itm.text[0] = removeListPrefix(
265                             itm.text[0], ln.type
266                         );
267 
268                         // emit <p></p> if there are blank lines
269                         // between the items
270                         if (firstItem && !lines.empty
271                             && lines.front.type == LineType.Blank) {
272                             paraMode = true;
273                         }
274                         firstItem = false;
275                         if (paraMode) {
276                             Block para;
277                             para.type = BlockType.Paragraph;
278                             para.text = itm.text;
279                             itm.blocks ~= para;
280                             itm.text = null;
281                         }
282 
283                         this.parseBlocks(itm, lines, itemindent);
284                         itm.type = BlockType.ListItem;
285                         b.blocks ~= itm;
286                     }
287                     break;
288                 case LineType.HtmlBlock:
289                     int nestlevel = 0;
290                     auto starttag = parseHtmlBlockLine(ln.unindented);
291                     if (!starttag.isHtmlBlock || !starttag.open)
292                         break;
293 
294                     b.type = BlockType.Plain;
295                     while (!lines.empty) {
296                         auto frontIndLen = lines.front.indent.length;
297                         auto baseIndLen = baseIndent.length;
298                         if (frontIndLen < baseIndLen) {
299                             break;
300                         }
301                         if (lines.front.indent[0 .. baseIndLen] != baseIndent) {
302                             break;
303                         }
304 
305                         auto str = lines.front.unindent(baseIndent.length);
306                         auto taginfo = parseHtmlBlockLine(str);
307                         b.text ~= lines.front.unindent(baseIndent.length);
308                         lines.popFront();
309                         if (taginfo.isHtmlBlock
310                         && taginfo.tagName == starttag.tagName) {
311                             nestlevel += taginfo.open ? 1 : -1;
312                         }
313                         if (nestlevel <= 0) {
314                             break;
315                         }
316                     }
317                     break;
318                 case LineType.CodeBlockDelimiter:
319                     lines.popFront(); // TODO: get language from line
320                     b.type = BlockType.Code;
321                     while (!lines.empty) {
322                         if (lines.front.indent.length < baseIndent.length ) {
323                             break;
324                         }
325                         if (lines.front.indent[0 .. baseIndent.length] != baseIndent) {
326                             break;
327                         }
328                         if (lines.front.type == LineType.CodeBlockDelimiter) {
329                             lines.popFront();
330                             break;
331                         }
332                         b.text ~= lines.front.unindent(baseIndent.length);
333                         lines.popFront();
334                     }
335                     break;
336                 case LineType.Table:
337                     lines.popFront();
338                     // Can this be a valid table (is there a next line
339                     // that could be a header separator)?
340                     if (lines.empty) {
341                         processPlain();
342                         break;
343                     }
344                     Line lnNext = lines.front;
345                     immutable bool isTableHeader = (
346                         (lnNext.type == LineType.Table)
347                         && (lnNext.text.indexOf(" -") >= 0)
348                         && (lnNext.text.indexOf("- ") >= 0)
349                         && lnNext.text.allOf("-:| ")
350                     );
351                     if (!isTableHeader) {
352                         // Not a valid table header,
353                         // so let's assume it's plain markdown
354                         processPlain();
355                         break;
356                     }
357                     b.type = BlockType.Table;
358                     // Parse header
359                     b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln);
360                     // Parse table rows
361                     lines.popFront();
362                     while (!lines.empty) {
363                         ln = lines.front;
364                         if (ln.type != LineType.Table)
365                             break; // not a table row, so let's assume it's the end of the table
366                         b.blocks ~= splitTableRow(ln);
367                         lines.popFront();
368                     }
369                     break;
370             }
371             root.blocks ~= b;
372 
373         }
374     }
375 
376     void writeBlock(R)(ref R dst,
377                        ref const Block block,
378                        LinkRef[string] links) {
379         final switch(block.type) {
380             case BlockType.Plain:
381                 foreach (ln; block.text) {
382                     dst.put(ln);
383                     dst.put("\n");
384                 }
385                 foreach(b; block.blocks) {
386                     this.writeBlock(dst, b, links);
387                 }
388                 break;
389             case BlockType.Text:
390                 writeMarkdownEscaped(dst, block, links);
391                 foreach(b; block.blocks) {
392                     this.writeBlock(dst, b, links);
393                 }
394                 break;
395             case BlockType.Paragraph:
396                 assert(block.blocks.length == 0);
397                 dst.put("<p>");
398                 writeMarkdownEscaped(dst, block, links);
399                 dst.put("</p>\n");
400                 break;
401             case BlockType.Header:
402                 assert(block.blocks.length == 0);
403                 auto hlvl = block.headerLevel + this.headingBaseLevel-1;
404                 dst.formattedWrite(
405                     "<h%s id=\"%s\">", hlvl, block.text[0].asSlug
406                 );
407                 assert(block.text.length == 1);
408                 writeMarkdownEscaped(dst, block.text[0], links);
409                 dst.formattedWrite("</h%s>\n", hlvl);
410                 break;
411             case BlockType.OList:
412                 dst.put("<ol>\n");
413                 foreach(b; block.blocks) {
414                     this.writeBlock(dst, b, links);
415                 }
416                 dst.put("</ol>\n");
417                 break;
418             case BlockType.UList:
419                 dst.put("<ul>\n");
420                 foreach(b; block.blocks) {
421                     this.writeBlock(dst, b, links);
422                 }
423                 dst.put("</ul>\n");
424                 break;
425             case BlockType.ListItem:
426                 dst.put("<li>");
427                 writeMarkdownEscaped(dst, block, links);
428                 foreach(b; block.blocks) {
429                     this.writeBlock(dst, b, links);
430                 }
431                 dst.put("</li>\n");
432                 break;
433             case BlockType.Code:
434                 assert(block.blocks.length == 0);
435                 dst.put("<pre class=\"prettyprint\"><code>");
436                 if (this.processCode is null) {
437                     foreach (ln; block.text) {
438                         filterHTMLEscape(dst, ln);
439                         dst.put("\n");
440                     }
441                 } else {
442                     auto temp = appender!string();
443                     foreach(ln; block.text){
444                         filterHTMLEscape(temp, ln);
445                         temp.put("\n");
446                     }
447                     dst.put(this.processCode(temp.data));
448                 }
449                 dst.put("</code></pre>");
450                 break;
451             case BlockType.Quote:
452                 dst.put("<blockquote>");
453                 writeMarkdownEscaped(dst, block, links);
454                 foreach(b; block.blocks) {
455                     this.writeBlock(dst, b, links);
456                 }
457                 dst.put("</blockquote>\n");
458                 break;
459             case BlockType.Table:
460                 assert(block.blocks.length > 0);
461                 assert(block.blocks[0].type == BlockType.TableRow);
462                 dst.put("<table>\n<tr>");
463                 foreach (b; block.blocks[0].blocks) {
464                     assert(b.type == BlockType.TableHeader);
465                     dst.put("<th>");
466                     writeMarkdownEscaped(dst, b.text[0], links);
467                     dst.put("</th>");
468                 }
469                 dst.put("</tr>\n");
470                 if (block.blocks.length > 1) {
471                     foreach(row; block.blocks[1 .. $]) {
472                         assert(row.type == BlockType.TableRow);
473                         dst.put("<tr>");
474                         foreach(b; row.blocks) {
475                             assert(b.type == BlockType.TableData);
476                             dst.put("<td>");
477                             writeMarkdownEscaped(dst, b.text[0], links);
478                             dst.put("</td>");
479                         }
480                         dst.put("</tr>\n");
481                     }
482                 }
483                 dst.put("</table>\n");
484                 break;
485             case BlockType.TableRow:
486             case BlockType.TableData:
487             case BlockType.TableHeader:
488                 assert(0);
489         }
490     }
491 
492     void writeMarkdownEscaped(R)(ref R dst,
493                                  ref const Block block,
494                                  in LinkRef[string] links) {
495         auto lines = cast(string[])block.text;
496         auto text = this.flags & MarkdownFlags.keepLineBreaks
497             ? lines.join("<br>") : lines.join("\n");
498         writeMarkdownEscaped(dst, text, links);
499         if (lines.length) dst.put("\n");
500     }
501 
502 
503     void writeMarkdownEscaped(R)(ref R dst,
504                                  string ln,
505                                  in LinkRef[string] linkrefs) {
506         string filterLink(string lnk, bool isImage) {
507             return this.urlFilter ? this.urlFilter(lnk, isImage) : lnk;
508         }
509 
510         bool br = ln.endsWith("  ");
511         while (ln.length > 0) {
512             switch (ln[0]) {
513                 default:
514                     dst.put(ln[0]);
515                     ln = ln[1 .. $];
516                     break;
517                 case '\\':
518                     if (ln.length >= 2 ){
519                         switch(ln[1]){
520                             default:
521                                 dst.put(ln[0 .. 2]);
522                                 ln = ln[2 .. $];
523                                 break;
524                             case '\'', '`', '*', '_', '{', '}', '[', ']',
525                                 '(', ')', '#', '+', '-', '.', '!':
526                                 dst.put(ln[1]);
527                                 ln = ln[2 .. $];
528                                 break;
529                         }
530                     } else {
531                         dst.put(ln[0]);
532                         ln = ln[1 .. $];
533                     }
534                     break;
535                 case '_':
536                     if (this.flags & MarkdownFlags.disableUnderscoreEmphasis) {
537                         dst.put(ln[0]);
538                         ln = ln[1 .. $];
539                         break;
540                     }
541                     goto case;
542                 case '*':
543                     string text;
544                     if (auto em = parseEmphasis(ln, text)) {
545                         if (em == 1) {
546                             dst.put("<em>");
547                         } else if (em == 2) {
548                             dst.put("<strong>");
549                         } else {
550                             dst.put("<strong><em>");
551                         }
552                         filterHTMLEscape(
553                             dst, text, HTMLEscapeFlags.escapeMinimal
554                         );
555                         if (em == 1) {
556                             dst.put("</em>");
557                         } else if (em == 2) {
558                             dst.put("</strong>");
559                         } else {
560                             dst.put("</strong></em>");
561                         }
562                     } else {
563                         dst.put(ln[0]);
564                         ln = ln[1 .. $];
565                     }
566                     break;
567                 case '`':
568                     string code;
569                     if (parseInlineCode(ln, code)) {
570                         dst.put("<code class=\"prettyprint\">");
571                         if (this.processCode is null) {
572                             filterHTMLEscape(
573                                 dst, code, HTMLEscapeFlags.escapeMinimal
574                             );
575                         } else {
576                             auto temp = appender!string();
577                             filterHTMLEscape(
578                                 temp, code, HTMLEscapeFlags.escapeMinimal
579                             );
580                             dst.put(this.processCode(temp.data));
581                         }
582                         dst.put("</code>");
583                     } else {
584                         dst.put(ln[0]);
585                         ln = ln[1 .. $];
586                     }
587                     break;
588                 case '[':
589                     Link link;
590                     if (parseLink(ln, link, linkrefs)) {
591                         dst.put("<a href=\"");
592                         filterHTMLAttribEscape(dst, filterLink(link.url, false));
593                         dst.put("\"");
594                         if (link.title.length ){
595                             dst.put(" title=\"");
596                             filterHTMLAttribEscape(dst, link.title);
597                             dst.put("\"");
598                         }
599                         dst.put(">");
600                         writeMarkdownEscaped(dst, link.text, linkrefs);
601                         dst.put("</a>");
602                     } else {
603                         dst.put(ln[0]);
604                         ln = ln[1 .. $];
605                     }
606                     break;
607                 case '!':
608                     Link link;
609                     if (parseLink(ln, link, linkrefs)) {
610                         dst.put("<img src=\"");
611                         filterHTMLAttribEscape(dst, filterLink(link.url, true));
612                         dst.put("\" alt=\"");
613                         filterHTMLAttribEscape(dst, link.text);
614                         dst.put("\"");
615                         if (link.title.length ){
616                             dst.put(" title=\"");
617                             filterHTMLAttribEscape(dst, link.title);
618                             dst.put("\"");
619                         }
620                         dst.put(">");
621                     } else if (ln.length >= 2) {
622                         dst.put(ln[0 .. 2]);
623                         ln = ln[2 .. $];
624                     } else {
625                         dst.put(ln[0]);
626                         ln = ln[1 .. $];
627                     }
628                     break;
629                 case '>':
630                     if (this.flags & MarkdownFlags.noInlineHtml) {
631                         dst.put("&gt;");
632                     }
633                     else dst.put(ln[0]);
634                     ln = ln[1 .. $];
635                     break;
636                 case '<':
637                     string url;
638                     if (parseAutoLink(ln, url)) {
639                         bool isEmail = url.startsWith("mailto:");
640                         dst.put("<a href=\"");
641                         if (isEmail) {
642                             filterHTMLAllEscape(dst, url);
643                         } else {
644                             filterHTMLAttribEscape(dst, filterLink(url, false));
645                         }
646                         dst.put("\">");
647                         if (isEmail) {
648                             filterHTMLAllEscape(dst, url[7 .. $]);
649                         } else {
650                             filterHTMLEscape(
651                                 dst, url, HTMLEscapeFlags.escapeMinimal
652                             );
653                         }
654                         dst.put("</a>");
655                     } else {
656                         if (ln.startsWith("<br>")) {
657                             // always support line breaks,
658                             // since we embed them here ourselves!
659                             dst.put("<br/>");
660                             ln = ln[4 .. $];
661                         } else if (ln.startsWith("<br/>")) {
662                             dst.put("<br/>");
663                             ln = ln[5 .. $];
664                         } else {
665                             if (this.flags & MarkdownFlags.noInlineHtml) {
666                                 dst.put("&lt;");
667                             } else {
668                                 dst.put(ln[0]);
669                             }
670                             ln = ln[1 .. $];
671                         }
672                     }
673                     break;
674             }
675         }
676         if (br) {
677             dst.put("<br/>");
678         }
679     }
680 }
681 
682 
683 enum MarkdownFlags {
684     none = 0,
685     keepLineBreaks = 1<<0,
686     backtickCodeBlocks = 1<<1,
687     noInlineHtml = 1<<2,
688     //noLinks = 1<<3,
689     //allowUnsafeHtml = 1<<4,
690     /// If used, subheadings are underlined by stars ('*') instead of dashes ('-')
691     alternateSubheaders = 1 << 5,
692     /// If used, '_' may not be used for emphasis ('*' may still be used)
693     disableUnderscoreEmphasis = 1 << 6,
694     supportTables = 1 << 7,
695     vanillaMarkdown = none,
696     forumDefault = keepLineBreaks | backtickCodeBlocks | noInlineHtml,
697     githubInspired = backtickCodeBlocks | supportTables,
698 }
699 
700 
701 
702 unittest {
703     auto text =
704 `=======
705 Heading
706 =======
707 
708 **bold** *italic*
709 
710 List:
711 
712   * a
713   * b
714   * c
715 `;
716 
717     writeln("===========");
718     writeln(text);
719     writeln("===========");
720     writeln(convertMarkdownToHTML(text));
721 }
722 
723 unittest {
724     auto source =
725 `Merged prototype.
726 The prototype is not locked, allowing to add more components.
727   To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().`;
728     auto expected =
729 `<p>Merged prototype.
730 The prototype is not locked, allowing to add more components.
731   To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().
732 </p>
733 `;
734     string result = convertMarkdownToHTML(source);
735     assert(result == expected);
736 }
737 
738 
739 unittest {
740     auto source = `*stars* under_score_s`;
741     auto expectedUnderscores   = `<p><em>stars</em> under<em>score</em>s
742 </p>
743 `;
744     auto expectedNoUnderscores = `<p><em>stars</em> under_score_s
745 </p>
746 `;
747 
748     string resultUnderscores = convertMarkdownToHTML(source);
749     string resultNoUnderscores = convertMarkdownToHTML(
750         source, MarkdownFlags.disableUnderscoreEmphasis
751     );
752 
753     assert(
754         resultUnderscores == expectedUnderscores,
755         "'%s' != '%s'".format(resultUnderscores, expectedUnderscores)
756     );
757     assert(
758         resultNoUnderscores == expectedNoUnderscores,
759         "'%s' != '%s'".format(resultNoUnderscores, expectedNoUnderscores)
760     );
761 }
762 
763 
764 // Unittest for code post-processing
765 unittest {
766     auto text =
767 "`inline code`" ~ `
768 block:
769 
770     code block
771 `;
772     auto expected =
773 `<p><code class="prettyprint">AAAAAAAAAAA</code>
774 block:
775 </p>
776 <pre class="prettyprint"><code>AAAAAAAAAA</code></pre>`;
777 
778     string processCode(string input) @safe nothrow {
779         import std.exception: assumeWontThrow;
780         // ignore newlines generated by code block processing
781         input = input.filter!(c => c != '\n').array.to!string.assumeWontThrow;
782         return 'A'.repeat(input.length).array.to!string.assumeWontThrow;
783     }
784     auto settings = new MarkdownSettings;
785     settings.processCode = &processCode;
786     auto result = convertMarkdownToHTML(text, settings);
787 
788     auto err = format!"Unexpected code processing result:\n%s\nExpected:\n%s"(
789         result, expected
790     );
791     assert(result == expected, err);
792 }
793 
794 
795 
796 struct Section {
797     size_t headingLevel;
798     string caption;
799     string anchor;
800     Section[] subSections;
801 }
802 
803 private {
804     immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"];
805 }
806 
807 
808 private enum IndentType {
809     White,
810     Quote
811 }
812 
813 
814 private enum LineType {
815     Undefined,
816     Blank,
817     Plain,
818     Hline,
819     AtxHeader,
820     SetextHeader,
821     UList,
822     OList,
823     HtmlBlock,
824     CodeBlockDelimiter,
825     Table,
826 }
827 
828 private struct Line {
829     LineType type;
830     IndentType[] indent;
831     string text;
832     string unindented;
833 
834     string unindent(size_t n) pure @safe {
835         assert(n <= indent.length);
836         string ln = text;
837         foreach (i; 0 .. n) {
838             final switch(indent[i]) {
839                 case IndentType.White:
840                     ln = (ln[0] == ' ') ? ln[4 .. $] : ln[1 .. $];
841                     break;
842                 case IndentType.Quote:
843                     ln = ln.stripLeft()[1 .. $];
844                     break;
845             }
846         }
847         return ln;
848     }
849 }
850 
851 
852 private enum BlockType {
853     Plain,
854     Text,
855     Paragraph,
856     Header,
857     OList,
858     UList,
859     ListItem,
860     Code,
861     Quote,
862     Table,
863     TableRow,
864     TableHeader,
865     TableData,
866 }
867 
868 
869 private struct Block {
870     BlockType type;
871     string[] text;
872     Block[] blocks;
873     size_t headerLevel;
874 
875     // A human-readable toString for debugging.
876     string toString() {
877         return toStringNested;
878     }
879 
880     // toString implementation; capable of indenting nested blocks.
881     string toStringNested(uint depth = 0) {
882         string indent = " ".repeat(depth * 2).joiner.array.to!string;
883         return indent ~ "%s\n".format(type)
884              ~ indent ~ "%s\n".format(text)
885              ~ blocks.map!((ref b) => b.toStringNested(depth + 1))
886                      .joiner.array.to!string
887              ~ indent ~ "%s\n".format(headerLevel);
888     }
889 }
890 
891 
892 private string[] skipText(ref Line[] lines, IndentType[] indent)
893 pure @safe {
894     static bool matchesIndent(IndentType[] indent, IndentType[] baseIndent) {
895         // Any *plain* line with a higher indent should still be a part of
896         // a paragraph read by skipText(). Returning false here resulted in
897         // text such as:
898         // ---
899         // First line
900         //         Second line
901         // ---
902         // being interpreted as a paragraph followed by a code block, even though
903         // other Markdown processors would interpret it as a single paragraph.
904 
905         // if (indent.length > baseIndent.length ) return false;
906         if (indent.length > baseIndent.length ) return true;
907         if (indent != baseIndent[0 .. indent.length] ) return false;
908         sizediff_t qidx = -1;
909         foreach_reverse (i, tp; baseIndent) {
910             if (tp == IndentType.Quote) {
911                 qidx = i;
912                 break;
913             }
914         }
915         if (qidx >= 0) {
916             qidx = baseIndent.length - 1 - qidx;
917             if (indent.length <= qidx) {
918                 return false;
919             }
920         }
921         return true;
922     }
923 
924     string[] ret;
925 
926     while (true) {
927         ret ~= lines.front.unindent(
928             min(indent.length, lines.front.indent.length)
929         );
930         lines.popFront();
931 
932         if (lines.empty || !matchesIndent(lines.front.indent, indent)
933          || lines.front.type != LineType.Plain) {
934             return ret;
935         }
936     }
937 }
938 
939 
940 private Block splitTableRow(BlockType dataType = BlockType.TableData)(Line line)
941 pure @safe {
942     static assert(
943         dataType == BlockType.TableHeader || dataType == BlockType.TableData
944     );
945 
946     string ln = line.text.strip();
947     immutable size_t b = (ln[0..2] == "| ") ? 2 : 0;
948     immutable size_t e = (ln[($ - 2) .. $] == " |") ? (ln.length - 2)
949                                                     : ln.length;
950     Block ret;
951     ret.type = BlockType.TableRow;
952     foreach (txt; ln[b .. e].split(" | ")) {
953         Block d;
954         d.text = [txt.strip(" ")];
955         d.type = dataType;
956         ret.blocks ~= d;
957     }
958     return ret;
959 }
960 
961 
962 private void writeBlock(R)(ref R dst,
963                            ref const Block block,
964                            LinkRef[string] links,
965                            scope MarkdownSettings settings) {
966     final switch(block.type) {
967         case BlockType.Plain:
968             foreach (ln; block.text) {
969                 dst.put(ln);
970                 dst.put("\n");
971             }
972             foreach(b; block.blocks) {
973                 writeBlock(dst, b, links, settings);
974             }
975             break;
976         case BlockType.Text:
977             writeMarkdownEscaped(dst, block, links, settings);
978             foreach(b; block.blocks) {
979                 writeBlock(dst, b, links, settings);
980             }
981             break;
982         case BlockType.Paragraph:
983             assert(block.blocks.length == 0);
984             dst.put("<p>");
985             writeMarkdownEscaped(dst, block, links, settings);
986             dst.put("</p>\n");
987             break;
988         case BlockType.Header:
989             assert(block.blocks.length == 0);
990             auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0);
991             dst.formattedWrite("<h%s id=\"%s\">", hlvl, block.text[0].asSlug);
992             assert(block.text.length == 1);
993             writeMarkdownEscaped(dst, block.text[0], links, settings);
994             dst.formattedWrite("</h%s>\n", hlvl);
995             break;
996         case BlockType.OList:
997             dst.put("<ol>\n");
998             foreach(b; block.blocks) {
999                 writeBlock(dst, b, links, settings);
1000             }
1001             dst.put("</ol>\n");
1002             break;
1003         case BlockType.UList:
1004             dst.put("<ul>\n");
1005             foreach(b; block.blocks) {
1006                 writeBlock(dst, b, links, settings);
1007             }
1008             dst.put("</ul>\n");
1009             break;
1010         case BlockType.ListItem:
1011             dst.put("<li>");
1012             writeMarkdownEscaped(dst, block, links, settings);
1013             foreach(b; block.blocks) {
1014                 writeBlock(dst, b, links, settings);
1015             }
1016             dst.put("</li>\n");
1017             break;
1018         case BlockType.Code:
1019             assert(block.blocks.length == 0);
1020             dst.put("<pre class=\"prettyprint\"><code>");
1021             if (settings.processCode is null) {
1022                 foreach (ln; block.text) {
1023                     filterHTMLEscape(dst, ln);
1024                     dst.put("\n");
1025                 }
1026             } else {
1027                 auto temp = appender!string();
1028                 foreach(ln; block.text){
1029                     filterHTMLEscape(temp, ln);
1030                     temp.put("\n");
1031                 }
1032                 dst.put(settings.processCode(temp.data));
1033             }
1034             dst.put("</code></pre>");
1035             break;
1036         case BlockType.Quote:
1037             dst.put("<blockquote>");
1038             writeMarkdownEscaped(dst, block, links, settings);
1039             foreach(b; block.blocks)
1040                 writeBlock(dst, b, links, settings);
1041             dst.put("</blockquote>\n");
1042             break;
1043         case BlockType.Table:
1044             assert(block.blocks.length > 0);
1045             assert(block.blocks[0].type == BlockType.TableRow);
1046             dst.put("<table>\n<tr>");
1047             foreach (b; block.blocks[0].blocks) {
1048                 assert(b.type == BlockType.TableHeader);
1049                 dst.put("<th>");
1050                 writeMarkdownEscaped(dst, b.text[0], links, settings);
1051                 dst.put("</th>");
1052             }
1053             dst.put("</tr>\n");
1054             if (block.blocks.length > 1) {
1055                 foreach(row; block.blocks[1 .. $]) {
1056                     assert(row.type == BlockType.TableRow);
1057                     dst.put("<tr>");
1058                     foreach(b; row.blocks) {
1059                         assert(b.type == BlockType.TableData);
1060                         dst.put("<td>");
1061                         writeMarkdownEscaped(dst, b.text[0], links, settings);
1062                         dst.put("</td>");
1063                     }
1064                     dst.put("</tr>\n");
1065                 }
1066             }
1067             dst.put("</table>\n");
1068             break;
1069         case BlockType.TableRow:
1070         case BlockType.TableData:
1071         case BlockType.TableHeader:
1072             assert(0);
1073     }
1074 }
1075 
1076 
1077 private void writeMarkdownEscaped(R)(ref R dst,
1078                                      ref const Block block,
1079                                      in LinkRef[string] links,
1080                                      scope MarkdownSettings settings) {
1081     auto lines = cast(string[])block.text;
1082     auto text = settings.flags & MarkdownFlags.keepLineBreaks
1083         ? lines.join("<br>") : lines.join("\n");
1084     writeMarkdownEscaped(dst, text, links, settings);
1085     if (lines.length) {
1086         dst.put("\n");
1087     }
1088 }
1089 
1090 
1091 private void writeMarkdownEscaped(R)(ref R dst,
1092                                      string ln,
1093                                      in LinkRef[string] linkrefs,
1094                                      scope MarkdownSettings settings) {
1095     string filterLink(string lnk, bool isImage) {
1096         return settings.urlFilter ? settings.urlFilter(lnk, isImage) : lnk;
1097     }
1098 
1099     bool br = ln.endsWith("  ");
1100     while (ln.length > 0) {
1101         switch (ln[0]) {
1102             default:
1103                 dst.put(ln[0]);
1104                 ln = ln[1 .. $];
1105                 break;
1106             case '\\':
1107                 if (ln.length >= 2 ){
1108                     switch(ln[1]){
1109                         default:
1110                             dst.put(ln[0 .. 2]);
1111                             ln = ln[2 .. $];
1112                             break;
1113                         case '\'', '`', '*', '_', '{', '}', '[', ']',
1114                             '(', ')', '#', '+', '-', '.', '!':
1115                             dst.put(ln[1]);
1116                             ln = ln[2 .. $];
1117                             break;
1118                     }
1119                 } else {
1120                     dst.put(ln[0]);
1121                     ln = ln[1 .. $];
1122                 }
1123                 break;
1124             case '_':
1125                 if(settings.flags & MarkdownFlags.disableUnderscoreEmphasis)
1126                 {
1127                     dst.put(ln[0]);
1128                     ln = ln[1 .. $];
1129                     break;
1130                 }
1131                 goto case;
1132             case '*':
1133                 string text;
1134                 if (auto em = parseEmphasis(ln, text)){
1135                     dst.put(em == 1 ? "<em>"
1136                                     : em == 2 ? "<strong>" : "<strong><em>");
1137                     filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal);
1138                     dst.put(em == 1 ? "</em>"
1139                                     : em == 2 ? "</strong>": "</em></strong>");
1140                 } else {
1141                     dst.put(ln[0]);
1142                     ln = ln[1 .. $];
1143                 }
1144                 break;
1145             case '`':
1146                 string code;
1147                 if (parseInlineCode(ln, code)) {
1148                     dst.put("<code class=\"prettyprint\">");
1149                     if (settings.processCode is null) {
1150                         filterHTMLEscape(
1151                             dst, code, HTMLEscapeFlags.escapeMinimal
1152                         );
1153                     } else {
1154                         auto temp = appender!string();
1155                         filterHTMLEscape(
1156                             temp, code, HTMLEscapeFlags.escapeMinimal
1157                         );
1158                         dst.put(settings.processCode(temp.data));
1159                     }
1160                     dst.put("</code>");
1161                 } else {
1162                     dst.put(ln[0]);
1163                     ln = ln[1 .. $];
1164                 }
1165                 break;
1166             case '[':
1167                 Link link;
1168                 if (parseLink(ln, link, linkrefs)) {
1169                     dst.put("<a href=\"");
1170                     filterHTMLAttribEscape(dst, filterLink(link.url, false));
1171                     dst.put("\"");
1172                     if (link.title.length ){
1173                         dst.put(" title=\"");
1174                         filterHTMLAttribEscape(dst, link.title);
1175                         dst.put("\"");
1176                     }
1177                     dst.put(">");
1178                     writeMarkdownEscaped(dst, link.text, linkrefs, settings);
1179                     dst.put("</a>");
1180                 } else {
1181                     dst.put(ln[0]);
1182                     ln = ln[1 .. $];
1183                 }
1184                 break;
1185             case '!':
1186                 Link link;
1187                 if (parseLink(ln, link, linkrefs)) {
1188                     dst.put("<img src=\"");
1189                     filterHTMLAttribEscape(dst, filterLink(link.url, true));
1190                     dst.put("\" alt=\"");
1191                     filterHTMLAttribEscape(dst, link.text);
1192                     dst.put("\"");
1193                     if (link.title.length ){
1194                         dst.put(" title=\"");
1195                         filterHTMLAttribEscape(dst, link.title);
1196                         dst.put("\"");
1197                     }
1198                     dst.put(">");
1199                 } else if (ln.length >= 2) {
1200                     dst.put(ln[0 .. 2]);
1201                     ln = ln[2 .. $];
1202                 } else {
1203                     dst.put(ln[0]);
1204                     ln = ln[1 .. $];
1205                 }
1206                 break;
1207             case '>':
1208                 if (settings.flags & MarkdownFlags.noInlineHtml) {
1209                     dst.put("&gt;");
1210                 }
1211                 else dst.put(ln[0]);
1212                 ln = ln[1 .. $];
1213                 break;
1214             case '<':
1215                 string url;
1216                 if (parseAutoLink(ln, url)) {
1217                     bool isEmail = url.startsWith("mailto:");
1218                     dst.put("<a href=\"");
1219                     if (isEmail) {
1220                         filterHTMLAllEscape(dst, url);
1221                     } else {
1222                         filterHTMLAttribEscape(dst, filterLink(url, false));
1223                     }
1224                     dst.put("\">");
1225                     if (isEmail) {
1226                         filterHTMLAllEscape(dst, url[7 .. $]);
1227                     } else {
1228                         filterHTMLEscape(
1229                             dst, url, HTMLEscapeFlags.escapeMinimal
1230                         );
1231                     }
1232                     dst.put("</a>");
1233                 } else {
1234                     if (ln.startsWith("<br>")) {
1235                         // always support line breaks,
1236                         // since we embed them here ourselves!
1237                         dst.put("<br/>");
1238                         ln = ln[4 .. $];
1239                     } else if (ln.startsWith("<br/>")) {
1240                         dst.put("<br/>");
1241                         ln = ln[5 .. $];
1242                     } else {
1243                         if (settings.flags & MarkdownFlags.noInlineHtml) {
1244                             dst.put("&lt;");
1245                         } else {
1246                             dst.put(ln[0]);
1247                         }
1248                         ln = ln[1 .. $];
1249                     }
1250                 }
1251                 break;
1252         }
1253     }
1254     if (br) {
1255         dst.put("<br/>");
1256     }
1257 }
1258 
1259 
1260 private bool isLineBlank(string ln)
1261 pure @safe {
1262     return allOf(ln, " \t");
1263 }
1264 
1265 
1266 private bool isSetextHeaderLine(string ln, char subHeaderChar) pure @safe {
1267     ln = stripLeft(ln);
1268     if (ln.length < 1) {
1269         return false;
1270     }
1271     if (ln[0] == '=') {
1272         while (!ln.empty && ln.front == '=') {
1273             ln.popFront();
1274         }
1275         return allOf(ln, " \t");
1276     }
1277     if (ln[0] == subHeaderChar ){
1278         while(!ln.empty && ln.front == subHeaderChar) {
1279             ln.popFront();
1280         }
1281         return allOf(ln, " \t");
1282     }
1283     return false;
1284 }
1285 
1286 
1287 private bool isAtxHeaderLine(string ln) pure @safe {
1288     ln = stripLeft(ln);
1289     size_t i = 0;
1290     while ( i < ln.length && ln[i] == '#') {
1291         i++;
1292     }
1293     if (i < 1 || i > 6 || i >= ln.length) {
1294         return false;
1295     }
1296     return ln[i] == ' ';
1297 }
1298 
1299 
1300 private bool isHlineLine(string ln) pure @safe {
1301     if (allOf(ln, " -") && count(ln, '-') >= 3) return true;
1302     if (allOf(ln, " *") && count(ln, '*') >= 3) return true;
1303     if (allOf(ln, " _") && count(ln, '_') >= 3) return true;
1304     return false;
1305 }
1306 
1307 
1308 private bool isQuoteLine(string ln) pure @safe {
1309     return ln.stripLeft().startsWith(">");
1310 }
1311 
1312 
1313 private size_t getQuoteLevel(string ln) pure @safe {
1314     size_t level = 0;
1315     ln = stripLeft(ln);
1316     while (ln.length > 0 && ln[0] == '>') {
1317         level++;
1318         ln = stripLeft(ln[1 .. $]);
1319     }
1320     return level;
1321 }
1322 
1323 
1324 private bool isUListLine(string ln) pure @safe {
1325     ln = stripLeft(ln);
1326     if (ln.length < 2)                 return false;
1327     if (!canFind("*+-", ln[0]))        return false;
1328     if (ln[1] != ' ' && ln[1] != '\t') return false;
1329     return true;
1330 }
1331 
1332 
1333 private bool isOListLine(string ln) pure @safe {
1334     ln = stripLeft(ln);
1335     if (ln.length < 1) {
1336         return false;
1337     }
1338     if (ln[0] < '0' || ln[0] > '9' ) {
1339         return false;
1340     }
1341     ln = ln[1 .. $];
1342     while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') {
1343         ln = ln[1 .. $];
1344     }
1345     if (ln.length < 2) {
1346         return false;
1347     }
1348     if (ln[0] != '.') {
1349         return false;
1350     }
1351     if (ln[1] != ' ' && ln[1] != '\t') {
1352         return false;
1353     }
1354     return true;
1355 }
1356 
1357 
1358 private bool isTableRowLine(bool proper = false)(string ln) pure @safe {
1359     static if (proper) {
1360         return (
1361             (ln.indexOf(" | ") >= 0)
1362             && !ln.isOListLine
1363             && !ln.isUListLine
1364             && !ln.isAtxHeaderLine
1365         );
1366     } else {
1367         return (ln.indexOf(" | ") >= 0);
1368     }
1369 }
1370 
1371 
1372 private string removeListPrefix(string str, LineType tp) pure @safe {
1373     switch(tp){
1374         default: assert(false);
1375         case LineType.OList: // skip bullets and output using normal escaping
1376             auto idx = str.indexOfCT('.');
1377             assert(idx > 0);
1378             return str[idx+1 .. $].stripLeft();
1379         case LineType.UList:
1380             return stripLeft(str.stripLeft()[1 .. $]);
1381     }
1382 }
1383 
1384 
1385 private auto parseHtmlBlockLine(string ln) pure @safe {
1386     struct HtmlBlockInfo {
1387         bool isHtmlBlock;
1388         string tagName;
1389         bool open;
1390     }
1391 
1392     HtmlBlockInfo ret;
1393     ret.isHtmlBlock = false;
1394     ret.open = true;
1395 
1396     ln = strip(ln);
1397     if (ln.length < 3) return ret;
1398     if (ln[0] != '<') return ret;
1399     if (ln[1] == '/') {
1400         ret.open = false;
1401         ln = ln[1 .. $];
1402     }
1403     if (!std.ascii.isAlpha(ln[1])) {
1404         return ret;
1405     }
1406     ln = ln[1 .. $];
1407     size_t idx = 0;
1408     while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>')
1409         idx++;
1410     ret.tagName = ln[0 .. idx];
1411     ln = ln[idx .. $];
1412 
1413     auto eidx = ln.indexOf('>');
1414     if (eidx < 0) return ret;
1415     if (eidx != ln.length - 1) return ret;
1416 
1417     if (!s_blockTags.canFind(ret.tagName)) return ret;
1418 
1419     ret.isHtmlBlock = true;
1420     return ret;
1421 }
1422 
1423 
1424 private bool isHtmlBlockLine(string ln) pure @safe {
1425     auto bi = parseHtmlBlockLine(ln);
1426     return bi.isHtmlBlock && bi.open;
1427 }
1428 
1429 
1430 private bool isHtmlBlockCloseLine(string ln) pure @safe {
1431     auto bi = parseHtmlBlockLine(ln);
1432     return bi.isHtmlBlock && !bi.open;
1433 }
1434 
1435 
1436 private bool isCodeBlockDelimiter(string ln) pure @safe {
1437     return ln.startsWith("```");
1438 }
1439 
1440 
1441 // private string getHtmlTagName(string ln) pure @safe {
1442 //     return parseHtmlBlockLine(ln).tagName;
1443 // }
1444 
1445 
1446 private bool isLineIndented(string ln) pure @safe {
1447     return ln.startsWith("\t") || ln.startsWith("    ");
1448 }
1449 
1450 
1451 // private string unindentLine(string ln) pure @safe {
1452 //     if (ln.startsWith("\t")) return ln[1 .. $];
1453 //     if (ln.startsWith("    ")) return ln[4 .. $];
1454 //     assert(false);
1455 // }
1456 
1457 
1458 private int parseEmphasis(ref string str, ref string text) pure @safe {
1459     string pstr = str;
1460     if (pstr.length < 3 ) return false;
1461 
1462     string ctag;
1463     if      (pstr.startsWith("***")) ctag = "***";
1464     else if (pstr.startsWith("**"))  ctag = "**";
1465     else if (pstr.startsWith("*"))   ctag = "*";
1466     else if (pstr.startsWith("___")) ctag = "___";
1467     else if (pstr.startsWith("__"))  ctag = "__";
1468     else if (pstr.startsWith("_"))   ctag = "_";
1469     else return false;
1470 
1471     pstr = pstr[ctag.length .. $];
1472 
1473     auto cidx = () @trusted {
1474         return pstr.indexOf(ctag); }();
1475     if (cidx < 1 ) return false;
1476 
1477     text = pstr[0 .. cidx];
1478 
1479     str = pstr[cidx+ctag.length .. $];
1480     return cast(int)ctag.length;
1481 }
1482 
1483 
1484 private bool parseInlineCode(ref string str, ref string code) pure @safe {
1485     string pstr = str;
1486     if (pstr.length < 3) return false;
1487     string ctag;
1488     if (pstr.startsWith("``")) ctag = "``";
1489     else if (pstr.startsWith("`")) ctag = "`";
1490     else return false;
1491     pstr = pstr[ctag.length .. $];
1492 
1493     auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1494     if (cidx < 1 ) return false;
1495 
1496     code = pstr[0 .. cidx];
1497     str = pstr[cidx+ctag.length .. $];
1498     return true;
1499 }
1500 
1501 
1502 private bool parseLink(ref string str,
1503                        ref Link dst,
1504                        in LinkRef[string] linkrefs)
1505 pure @safe {
1506     string pstr = str;
1507     if (pstr.length < 3) return false;
1508     // ignore img-link prefix
1509     if (pstr[0] == '!') pstr = pstr[1 .. $];
1510 
1511     // parse the text part [text]
1512     if (pstr[0] != '[' ) return false;
1513     auto cidx = pstr.matchBracket();
1514     if (cidx < 1) return false;
1515     string refid;
1516     dst.text = pstr[1 .. cidx];
1517     pstr = pstr[cidx+1 .. $];
1518 
1519     // parse either (link '['"title"']') or '[' ']'[refid]
1520     if (pstr.length < 2) return false;
1521     if (pstr[0] == '(') {
1522         cidx = pstr.matchBracket();
1523         if (cidx < 1 ) return false;
1524         auto inner = pstr[1 .. cidx];
1525         immutable qidx = inner.indexOfCT('"');
1526         if (qidx > 1 && std.ascii.isWhite(inner[qidx - 1])) {
1527             dst.url = inner[0 .. qidx].stripRight();
1528             immutable len = inner[qidx .. $].lastIndexOf('"');
1529             if (len == 0 ) return false;
1530             assert(len > 0);
1531             dst.title = inner[qidx + 1 .. qidx + len];
1532         } else {
1533             dst.url = inner.stripRight();
1534             dst.title = null;
1535         }
1536         if (dst.url.startsWith("<") && dst.url.endsWith(">"))
1537             dst.url = dst.url[1 .. $-1];
1538         pstr = pstr[cidx+1 .. $];
1539     } else {
1540         if (pstr[0] == ' ' ) pstr = pstr[1 .. $];
1541         if (pstr[0] != '[' ) return false;
1542         pstr = pstr[1 .. $];
1543         cidx = pstr.indexOfCT(']');
1544         if (cidx < 0 ) return false;
1545         if (cidx == 0 ) refid = dst.text;
1546         else refid = pstr[0 .. cidx];
1547         pstr = pstr[cidx+1 .. $];
1548     }
1549 
1550 
1551     if (refid.length > 0) {
1552         auto pr = toLower(refid) in linkrefs;
1553         if (!pr) {
1554             // debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid);
1555             return false;
1556         }
1557         dst.url = pr.url;
1558         dst.title = pr.title;
1559     }
1560 
1561     str = pstr;
1562     return true;
1563 }
1564 
1565 
1566 @safe unittest {
1567     static void testLink(string s, Link exp, in LinkRef[string] refs)
1568     {
1569         Link link;
1570         assert(parseLink(s, link, refs), s);
1571         assert(link == exp);
1572     }
1573     LinkRef[string] refs;
1574     refs["ref"] = LinkRef("ref", "target", "title");
1575 
1576     testLink(`[link](target)`, Link("link", "target"), null);
1577     testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1578     testLink(`[link](target  "title")`, Link("link", "target", "title"), null);
1579     testLink(`[link](target "title"  )`, Link("link", "target", "title"), null);
1580 
1581     testLink(`[link](target)`, Link("link", "target"), null);
1582     testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1583 
1584     testLink(`[link][ref]`, Link("link", "target", "title"), refs);
1585     testLink(`[ref][]`, Link("ref", "target", "title"), refs);
1586 
1587     testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null);
1588     testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs);
1589 
1590     testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null);
1591     testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null);
1592 
1593     testLink(`[link](white-space  "around title" )`, Link("link", "white-space", "around title"), null);
1594     testLink(`[link](tabs    "around title"    )`, Link("link", "tabs", "around title"), null);
1595 
1596     testLink(`[link](target "")`, Link("link", "target", ""), null);
1597     testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null);
1598 
1599     testLink(`[link](<target>)`, Link("link", "target"), null);
1600 
1601     auto failing = [
1602         `text`, `[link](target`, `[link]target)`, `[link]`,
1603         `[link(target)`, `link](target)`, `[link] (target)`,
1604         `[link][noref]`, `[noref][]`
1605     ];
1606     Link link;
1607     foreach (s; failing)
1608         assert(!parseLink(s, link, refs), s);
1609 }
1610 
1611 
1612 private bool parseAutoLink(ref string str, ref string url)
1613 pure @safe {
1614     string pstr = str;
1615     if (pstr.length < 3 ) return false;
1616     if (pstr[0] != '<' ) return false;
1617     pstr = pstr[1 .. $];
1618     auto cidx = pstr.indexOf('>');
1619     if (cidx < 0 ) return false;
1620     url = pstr[0 .. cidx];
1621     if (anyOf(url, " \t")) return false;
1622     if (!anyOf(url, ":@")) return false;
1623     str = pstr[cidx+1 .. $];
1624     if (url.indexOf('@') > 0 ) url = "mailto:"~url;
1625     return true;
1626 }
1627 
1628 
1629 private LinkRef[string] scanForReferences(ref string[] lines) pure @safe {
1630     LinkRef[string] ret;
1631     bool[size_t] reflines;
1632 
1633     // search for reference definitions:
1634     //   [refid] link "opt text"
1635     //   [refid] <link> "opt text"
1636     //   "opt text", 'opt text', (opt text)
1637     //   line must not be indented
1638     foreach (i, ln; lines) {
1639         if (isLineIndented(ln)) continue;
1640         ln = strip(ln);
1641         if (!ln.startsWith("[")) continue;
1642         ln = ln[1 .. $];
1643 
1644         auto idx = ln.indexOf("]:");
1645         if (idx < 0) continue;
1646         string refid = ln[0 .. idx];
1647         ln = stripLeft(ln[idx+2 .. $]);
1648 
1649         string url;
1650         if (ln.startsWith("<")) {
1651             idx = ln.indexOfCT('>');
1652             if (idx < 0 ) continue;
1653             url = ln[1 .. idx];
1654             ln = ln[idx+1 .. $];
1655         } else {
1656             idx = ln.indexOfCT(' ');
1657             if (idx > 0) {
1658                 url = ln[0 .. idx];
1659                 ln = ln[idx+1 .. $];
1660             } else {
1661                 idx = ln.indexOfCT('\t');
1662                 if (idx < 0) {
1663                     url = ln;
1664                     ln = ln[$ .. $];
1665                 } else {
1666                     url = ln[0 .. idx];
1667                     ln = ln[idx+1 .. $];
1668                 }
1669             }
1670         }
1671         ln = stripLeft(ln);
1672 
1673         string title;
1674         if (ln.length >= 3) {
1675             if (ln[0] == '(' && ln[$-1] == ')' ||
1676                 ln[0] == '\"' && ln[$-1] == '\"' ||
1677                 ln[0] == '\'' && ln[$-1] == '\'' )
1678             {
1679                 title = ln[1 .. $-1];
1680             }
1681         }
1682 
1683         ret[toLower(refid)] = LinkRef(refid, url, title);
1684         reflines[i] = true;
1685 
1686         // debug if (!__ctfe) logTrace("[detected ref on line %d]", i+1);
1687     }
1688 
1689     // remove all lines containing references
1690     auto nonreflines = appender!(string[])();
1691     nonreflines.reserve(lines.length);
1692     foreach( i, ln; lines )
1693         if (i !in reflines )
1694             nonreflines.put(ln);
1695     lines = nonreflines.data();
1696 
1697     return ret;
1698 }
1699 
1700 
1701 /*******************************************************************************
1702  * Generates an identifier suitable to use as within a URL.
1703  *
1704  * The resulting string will contain only ASCII lower case alphabetic or
1705  * numeric characters, as well as dashes (-). Every sequence of
1706  * non-alphanumeric characters will be replaced by a single dash. No dashes
1707  * will be at either the front or the back of the result string.
1708  */
1709 auto asSlug(R)(R text)
1710 if (isInputRange!R && is(typeof(R.init.front) == dchar)) {
1711     static struct SlugRange {
1712         private {
1713             R _input;
1714             bool _dash;
1715         }
1716 
1717         this(R input)
1718         {
1719             _input = input;
1720             skipNonAlphaNum();
1721         }
1722 
1723         @property bool empty() const { return _dash ? false : _input.empty; }
1724         @property char front() const {
1725             if (_dash) return '-';
1726 
1727             char r = cast(char)_input.front;
1728             if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A'));
1729             return r;
1730         }
1731 
1732         void popFront()
1733         {
1734             if (_dash) {
1735                 _dash = false;
1736                 return;
1737             }
1738 
1739             _input.popFront();
1740             auto na = skipNonAlphaNum();
1741             if (na && !_input.empty)
1742                 _dash = true;
1743         }
1744 
1745         private bool skipNonAlphaNum()
1746         {
1747             bool have_skipped = false;
1748             while (!_input.empty) {
1749                 switch (_input.front) {
1750                     default:
1751                         _input.popFront();
1752                         have_skipped = true;
1753                         break;
1754                     case 'a': .. case 'z':
1755                     case 'A': .. case 'Z':
1756                     case '0': .. case '9':
1757                         return have_skipped;
1758                 }
1759             }
1760             return have_skipped;
1761         }
1762     }
1763     return SlugRange(text);
1764 }
1765 unittest {
1766     import std.algorithm : equal;
1767     assert("".asSlug.equal(""));
1768     assert(".,-".asSlug.equal(""));
1769     assert("abc".asSlug.equal("abc"));
1770     assert("aBc123".asSlug.equal("abc123"));
1771     assert("....aBc...123...".asSlug.equal("abc-123"));
1772 }
1773 
1774 
1775 private struct LinkRef {
1776     string id;
1777     string url;
1778     string title;
1779 }
1780 
1781 
1782 private struct Link {
1783     string text;
1784     string url;
1785     string title;
1786 }
1787 
1788 
1789 @safe unittest { // alt and title attributes
1790     assert(convertMarkdownToHTML("![alt](http://example.org/image)")
1791         == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n");
1792     assert(convertMarkdownToHTML("![alt](http://example.org/image \"Title\")")
1793         == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n");
1794 }
1795 
1796 
1797 @safe unittest { // complex links
1798     assert(convertMarkdownToHTML("their [install\ninstructions](<http://www.brew.sh>) and")
1799         == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n");
1800     assert(convertMarkdownToHTML("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)")
1801         == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n");
1802 }
1803 
1804 
1805 @safe unittest { // check CTFE-ability
1806     enum res = convertMarkdownToHTML("### some markdown\n[foo][]\n[foo]: /bar");
1807     assert(res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res);
1808 }
1809 
1810 
1811 @safe unittest { // correct line breaks in restrictive mode
1812     auto res = convertMarkdownToHTML("hello\nworld", MarkdownFlags.forumDefault);
1813     assert(res == "<p>hello<br/>world\n</p>\n", res);
1814 }
1815 
1816 /*@safe unittest { // code blocks and blockquotes
1817     assert(convertMarkdownToHTML("\tthis\n\tis\n\tcode") ==
1818         "<pre><code>this\nis\ncode</code></pre>\n");
1819     assert(convertMarkdownToHTML("    this\n    is\n    code") ==
1820         "<pre><code>this\nis\ncode</code></pre>\n");
1821     assert(convertMarkdownToHTML("    this\n    is\n\tcode") ==
1822         "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n");
1823     assert(convertMarkdownToHTML("\tthis\n\n\tcode") ==
1824         "<pre><code>this\n\ncode</code></pre>\n");
1825     assert(convertMarkdownToHTML("\t> this") ==
1826         "<pre><code>&gt; this</code></pre>\n");
1827     assert(convertMarkdownToHTML(">     this") ==
1828         "<blockquote><pre><code>this</code></pre></blockquote>\n");
1829     assert(convertMarkdownToHTML(">     this\n    is code") ==
1830         "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n");
1831 }*/
1832 
1833 
1834 @safe unittest { // test simple border-less table
1835     auto res = convertMarkdownToHTML(
1836         "Col 1 | Col 2 | Col 3\n -- | -- | --\n val 1 | val 2 | val 3\n *val 4* | val 5 | value 6",
1837         MarkdownFlags.supportTables
1838     );
1839     assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res);
1840 }
1841 
1842 
1843 @safe unittest { // test simple border'ed table
1844     auto res = convertMarkdownToHTML(
1845         "| Col 1 | Col 2 | Col 3 |\n| -- | -- | -- |\n| val 1 | val 2 | val 3 |\n| *val 4* | val 5 | value 6 |",
1846         MarkdownFlags.supportTables
1847     );
1848     assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res);
1849 }
1850 
1851 
1852 @safe unittest {
1853     string input = `
1854 Table:
1855 
1856 ID | Name  | Address
1857  - | ----  | ---------
1858  1 | Foo   | Somewhere
1859  2 | Bar   | Nowhere `;
1860     auto res = convertMarkdownToHTML(input, MarkdownFlags.supportTables);
1861     writeln("==========", input, "=====", res);
1862     assert(res == "<p>Table:\n</p>\n<table>\n<tr><th>ID</th><th>Name</th><th>Address</th></tr>\n<tr><td>1</td><td>Foo</td><td>Somewhere</td></tr>\n<tr><td>2</td><td>Bar</td><td>Nowhere</td></tr>\n</table>\n", res);
1863 }
1864 
1865 
1866 package:
1867 
1868 
1869 /// Function for work with HTML.
1870 
1871 /*******************************************************************************
1872  * Writes the HTML escaped version of a given string to an output range.
1873  */
1874 void filterHTMLEscape(R, S)
1875                      (ref R dst,
1876                       S str,
1877                       HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline)
1878 if (isOutputRange!(R, dchar) && isInputRange!S) {
1879     for (; !str.empty; str.popFront()) {
1880         filterHTMLEscape(dst, str.front, flags);
1881     }
1882 }
1883 
1884 
1885 /*******************************************************************************
1886  * Writes the HTML escaped version of a given string to an output range
1887  * (also escapes double quotes).
1888  */
1889 void filterHTMLAttribEscape(R, S)(ref R dst, S str)
1890 if (isOutputRange!(R, dchar) && isInputRange!S) {
1891     for (; !str.empty; str.popFront()) {
1892         filterHTMLEscape(
1893             dst,
1894             str.front,
1895             HTMLEscapeFlags.escapeNewline | HTMLEscapeFlags.escapeQuotes
1896         );
1897     }
1898 }
1899 
1900 
1901 /*******************************************************************************
1902  * Writes the HTML escaped version of a given string to an output range
1903  * (escapes every character).
1904  */
1905 void filterHTMLAllEscape(R, S)(ref R dst, S str)
1906 if (isOutputRange!(R, dchar) && isInputRange!S) {
1907     for (; !str.empty; str.popFront()) {
1908         dst.put("&#");
1909         dst.put(to!string(cast(uint)str.front));
1910         dst.put(';');
1911     }
1912 }
1913 
1914 
1915 /*******************************************************************************
1916  * Writes the HTML escaped version of a character to an output range.
1917  */
1918 void filterHTMLEscape(R)
1919                      (ref R dst,
1920                       dchar ch,
1921                       HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline ) {
1922     switch (ch) {
1923         default:
1924             if (flags & HTMLEscapeFlags.escapeUnknown) {
1925                 dst.put("&#");
1926                 dst.put(to!string(cast(uint)ch));
1927                 dst.put(';');
1928             } else dst.put(ch);
1929             break;
1930         case '"':
1931             if (flags & HTMLEscapeFlags.escapeQuotes) dst.put("&quot;");
1932             else dst.put('"');
1933             break;
1934         case '\'':
1935             if (flags & HTMLEscapeFlags.escapeQuotes) dst.put("&#39;");
1936             else dst.put('\'');
1937             break;
1938         case '\r', '\n':
1939             if (flags & HTMLEscapeFlags.escapeNewline) {
1940                 dst.put("&#");
1941                 dst.put(to!string(cast(uint)ch));
1942                 dst.put(';');
1943             } else dst.put(ch);
1944             break;
1945         case 'a': .. case 'z': goto case;
1946         case 'A': .. case 'Z': goto case;
1947         case '0': .. case '9': goto case;
1948         case ' ', '\t', '-', '_', '.', ':', ',', ';',
1949              '#', '+', '*', '?', '=', '(', ')', '/', '!',
1950              '%' , '{', '}', '[', ']', '`', '´', '$', '^', '~':
1951             dst.put(cast(char)ch);
1952             break;
1953         case '<': dst.put("&lt;"); break;
1954         case '>': dst.put("&gt;"); break;
1955         case '&': dst.put("&amp;"); break;
1956     }
1957 }
1958 
1959 
1960 /// Flags for HTML-escaping some symbols.
1961 enum HTMLEscapeFlags {
1962     escapeMinimal = 0,
1963     escapeQuotes = 1<<0,
1964     escapeNewline = 1<<1,
1965     escapeUnknown = 1<<2
1966 }
1967 
1968 
1969 /// Functions for work with string data
1970 
1971 /*******************************************************************************
1972  * Checks if all characters in 'str' are contained in 'chars'.
1973  */
1974 bool allOf(string str, string chars)
1975 @safe pure {
1976     foreach (dchar ch; str) {
1977         if (!chars.canFind(ch)) {
1978             return false;
1979         }
1980     }
1981     return true;
1982 }
1983 
1984 ptrdiff_t indexOfCT(Char)(in Char[] s,
1985                                   dchar c,
1986                                   CaseSensitive cs = CaseSensitive.yes)
1987 @safe pure {
1988     if (__ctfe) {
1989         if (cs == CaseSensitive.yes) {
1990             foreach (i, dchar ch; s) {
1991                 if (ch == c) {
1992                     return i;
1993                 }
1994             }
1995         } else {
1996             c = std.uni.toLower(c);
1997             foreach (i, dchar ch; s) {
1998                 if (std.uni.toLower(ch) == c) {
1999                     return i;
2000                 }
2001             }
2002         }
2003         return -1;
2004     }
2005     return std..string.indexOf(s, c, cs);
2006 }
2007 
2008 
2009 /*******************************************************************************
2010  * Checks if any character in 'str' is contained in 'chars'.
2011  */
2012 bool anyOf(string str, string chars)
2013 @safe pure {
2014     foreach (ch; str) {
2015         if (chars.canFind(ch)) {
2016             return true;
2017         }
2018     }
2019     return false;
2020 }
2021 
2022 
2023 /*******************************************************************************
2024  * Finds the closing bracket (works with any of '[', '$(LPAREN)', '<', '{').
2025  *
2026  * Params:
2027  *     str = input string
2028  *     nested = whether to skip nested brackets
2029  * Returns:
2030  *     The index of the closing bracket or -1 for unbalanced strings
2031  *     and strings that don't start with a bracket.
2032  */
2033 sizediff_t matchBracket(string str, bool nested = true)
2034 @safe pure nothrow {
2035     if (str.length < 2) return -1;
2036     
2037     char open = str[0], close = void;
2038     switch (str[0]) {
2039         case '[': close = ']'; break;
2040         case '(': close = ')'; break;
2041         case '<': close = '>'; break;
2042         case '{': close = '}'; break;
2043         default: return -1;
2044     }
2045     
2046     size_t level = 1;
2047     foreach (i, char c; str[1 .. $]) {
2048         if (nested && c == open) ++level;
2049         else if (c == close) --level;
2050         if (level == 0) return i + 1;
2051     }
2052     return -1;
2053 }
2054 
2055 
2056 
2057 ////////////////////////////////////////////////////////////////////////////////
2058 ////                        DEPRECATED FUNCTIONS                            ////
2059 ////////////////////////////////////////////////////////////////////////////////
2060 
2061 
2062 /*******************************************************************************
2063  * Returns the hierarchy of sections.
2064  */
2065 Section[] getMarkdownOutline(string markdown_source,
2066                              scope MarkdownSettings settings = null) {
2067     if (!settings) settings = new MarkdownSettings;
2068     auto all_lines = splitLines(markdown_source);
2069     auto lines = parseLines(all_lines, settings);
2070     Block root_block;
2071     parseBlocks(root_block, lines, null, settings);
2072     Section root;
2073 
2074     foreach (ref sb; root_block.blocks) {
2075         if (sb.type != BlockType.Header) {
2076             continue;
2077         }
2078         auto s = &root;
2079         while (true) {
2080             if (s.subSections.length == 0) break;
2081             if (s.subSections[$-1].headingLevel >= sb.headerLevel) break;
2082             s = &s.subSections[$-1];
2083         }
2084         s.subSections ~= Section(
2085             sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string
2086         );
2087     }
2088 
2089     return root.subSections;
2090 }
2091 ///
2092 unittest {
2093     auto mdText = "## first\n## second\n### third\n# fourth\n### fifth";
2094     auto expected = [
2095         Section(2, " first", "first"),
2096         Section(2, " second", "second", [Section(3, " third", "third")]),
2097         Section(1, " fourth", "fourth", [Section(3, " fifth", "fifth")])
2098     ];
2099     assert(getMarkdownOutline(mdText) == expected);
2100 }
2101 
2102 
2103 final class MarkdownSettings {
2104     /// Controls the capabilities of the parser.
2105     MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
2106 
2107     /// Heading tags will start at this level.
2108     size_t headingBaseLevel = 1;
2109 
2110     /// Called for every link/image URL to perform arbitrary transformations.
2111     string delegate(string urlOrPath, bool isImage) urlFilter;
2112 
2113     /***************************************************************************
2114      * An optional delegate to post-process code blocks and inline code.
2115      * Useful to e.g. add code highlighting.
2116      */
2117     string delegate(string) @safe nothrow processCode = null;
2118 }
2119 
2120 
2121 private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings)
2122 pure @safe {
2123     Line[] ret;
2124     char subHeaderChar;
2125     if (settings.flags * MarkdownFlags.alternateSubheaders) {
2126         subHeaderChar = '*';
2127     } else {
2128         subHeaderChar = '-';
2129     }
2130     while( !lines.empty ) {
2131         auto ln = lines.front;
2132         lines.popFront();
2133 
2134         Line lninfo;
2135         lninfo.text = ln;
2136 
2137         void determineIndent() {
2138             while (ln.length > 0) {
2139                 if (ln[0] == '\t' ) {
2140                     lninfo.indent ~= IndentType.White;
2141                     ln.popFront();
2142                 } else if (ln.startsWith("    ")) {
2143                     lninfo.indent ~= IndentType.White;
2144                     ln.popFrontN(4);
2145                 } else {
2146                     ln = ln.stripLeft();
2147                     if (ln.startsWith(">")) {
2148                         lninfo.indent ~= IndentType.Quote;
2149                         ln.popFront();
2150                     } else {
2151                         break;
2152                     }
2153                 }
2154             }
2155             lninfo.unindented = ln;
2156         }
2157 
2158         determineIndent();
2159 
2160         if ((settings.flags & MarkdownFlags.backtickCodeBlocks)
2161          && isCodeBlockDelimiter(ln)) {
2162             lninfo.type = LineType.CodeBlockDelimiter;
2163         } else if (isAtxHeaderLine(ln)) {
2164             lninfo.type = LineType.AtxHeader;
2165         } else if ( isSetextHeaderLine(ln, subHeaderChar)) {
2166             lninfo.type = LineType.SetextHeader;
2167         } else if ((settings.flags & MarkdownFlags.supportTables)
2168          && isTableRowLine!false(ln)) {
2169             lninfo.type = LineType.Table;
2170         } else if (isHlineLine(ln)) {
2171             lninfo.type = LineType.Hline;
2172         } else if (isOListLine(ln)) {
2173             lninfo.type = LineType.OList;
2174         } else if (isUListLine(ln)) {
2175             lninfo.type = LineType.UList;
2176         } else if (isLineBlank(ln)) {
2177             lninfo.type = LineType.Blank;
2178         } else if (!(settings.flags & MarkdownFlags.noInlineHtml)
2179          && isHtmlBlockLine(ln)) {
2180             lninfo.type = LineType.HtmlBlock;
2181         }
2182         else lninfo.type = LineType.Plain;
2183 
2184         ret ~= lninfo;
2185     }
2186     return ret;
2187 }
2188 
2189 
2190 private void parseBlocks(ref Block root,
2191                          ref Line[] lines,
2192                          IndentType[] baseIndent,
2193                          scope MarkdownSettings settings)
2194 pure @safe {
2195     if (baseIndent.length == 0) {
2196         root.type = BlockType.Text;
2197     } else if (baseIndent[$-1] == IndentType.Quote) {
2198         root.type = BlockType.Quote;
2199     }
2200 
2201     while (!lines.empty) {
2202         auto ln = lines.front;
2203 
2204         if (ln.type == LineType.Blank) {
2205             lines.popFront();
2206             continue;
2207         }
2208 
2209         if (ln.indent != baseIndent) {
2210             if (ln.indent.length < baseIndent.length ||
2211                 ln.indent[0 .. baseIndent.length] != baseIndent) {
2212                 return;
2213             }
2214 
2215             auto cindent = baseIndent ~ IndentType.White;
2216             if (ln.indent == cindent) {
2217                 Block cblock;
2218                 cblock.type = BlockType.Code;
2219                 while (!lines.empty &&
2220                        lines.front.indent.length >= cindent.length &&
2221                        lines.front.indent[0 .. cindent.length] == cindent) {
2222                     cblock.text ~= lines.front.unindent(cindent.length);
2223                     lines.popFront();
2224                 }
2225                 root.blocks ~= cblock;
2226             } else {
2227                 Block subblock;
2228                 parseBlocks(subblock,
2229                             lines,
2230                             ln.indent[0 .. baseIndent.length+1],
2231                             settings);
2232                 root.blocks ~= subblock;
2233             }
2234         } else {
2235             Block b;
2236             void processPlain() {
2237                 b.type = BlockType.Paragraph;
2238                 b.text = skipText(lines, baseIndent);
2239             }
2240             final switch(ln.type) {
2241                 case LineType.Undefined: assert(false);
2242                 case LineType.Blank: assert(false);
2243                 case LineType.Plain:
2244                     if (lines.length >= 2 &&
2245                         lines[1].type == LineType.SetextHeader) {
2246                         auto setln = lines[1].unindented;
2247                         b.type = BlockType.Header;
2248                         b.text = [ln.unindented];
2249                         b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
2250                         lines.popFrontN(2);
2251                     } else {
2252                         processPlain();
2253                     }
2254                     break;
2255                 case LineType.Hline:
2256                     b.type = BlockType.Plain;
2257                     b.text = ["<hr>"];
2258                     lines.popFront();
2259                     break;
2260                 case LineType.AtxHeader:
2261                     b.type = BlockType.Header;
2262                     string hl = ln.unindented;
2263                     b.headerLevel = 0;
2264                     while (hl.length > 0 && hl[0] == '#') {
2265                         b.headerLevel++;
2266                         hl = hl[1 .. $];
2267                     }
2268                     while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' '))
2269                         hl = hl[0 .. $-1];
2270                     b.text = [hl];
2271                     lines.popFront();
2272                     break;
2273                 case LineType.SetextHeader:
2274                     lines.popFront();
2275                     break;
2276                 case LineType.UList:
2277                 case LineType.OList:
2278                     b.type = ln.type == LineType.UList ? BlockType.UList
2279                                                        : BlockType.OList;
2280                     auto itemindent = baseIndent ~ IndentType.White;
2281                     bool firstItem = true, paraMode = false;
2282                     while (!lines.empty && lines.front.type == ln.type &&
2283                            lines.front.indent == baseIndent) {
2284                         Block itm;
2285                         itm.text = skipText(lines, itemindent);
2286                         itm.text[0] = removeListPrefix(itm.text[0], ln.type);
2287 
2288                         // emit <p></p> if there are blank lines between the items
2289                         if (firstItem && !lines.empty &&
2290                             lines.front.type == LineType.Blank) {
2291                             paraMode = true;
2292                         }
2293                         firstItem = false;
2294                         if (paraMode) {
2295                             Block para;
2296                             para.type = BlockType.Paragraph;
2297                             para.text = itm.text;
2298                             itm.blocks ~= para;
2299                             itm.text = null;
2300                         }
2301 
2302                         parseBlocks(itm, lines, itemindent, settings);
2303                         itm.type = BlockType.ListItem;
2304                         b.blocks ~= itm;
2305                     }
2306                     break;
2307                 case LineType.HtmlBlock:
2308                     int nestlevel = 0;
2309                     auto starttag = parseHtmlBlockLine(ln.unindented);
2310                     if (!starttag.isHtmlBlock || !starttag.open)
2311                         break;
2312 
2313                     b.type = BlockType.Plain;
2314                     while (!lines.empty) {
2315                         if (lines.front.indent.length < baseIndent.length) {
2316                             break;
2317                         }
2318                         if (lines.front.indent[0 .. baseIndent.length] != baseIndent) {
2319                             break;
2320                         }
2321 
2322                         auto str = lines.front.unindent(baseIndent.length);
2323                         auto taginfo = parseHtmlBlockLine(str);
2324                         b.text ~= lines.front.unindent(baseIndent.length);
2325                         lines.popFront();
2326                         if (taginfo.isHtmlBlock
2327                          && taginfo.tagName == starttag.tagName) {
2328                             nestlevel += taginfo.open ? 1 : -1;
2329                         }
2330                         if (nestlevel <= 0) {
2331                             break;
2332                         }
2333                     }
2334                     break;
2335                 case LineType.CodeBlockDelimiter:
2336                     lines.popFront(); // TODO: get language from line
2337                     b.type = BlockType.Code;
2338                     while (!lines.empty) {
2339                         if (lines.front.indent.length < baseIndent.length ) {
2340                             break;
2341                         }
2342                         if (lines.front.indent[0 .. baseIndent.length] != baseIndent) {
2343                             break;
2344                         }
2345                         if (lines.front.type == LineType.CodeBlockDelimiter) {
2346                             lines.popFront();
2347                             break;
2348                         }
2349                         b.text ~= lines.front.unindent(baseIndent.length);
2350                         lines.popFront();
2351                     }
2352                     break;
2353                 case LineType.Table:
2354                     lines.popFront();
2355                     // Can this be a valid table (is there a next line that could be a header separator)?
2356                     if (lines.empty) {
2357                         processPlain();
2358                         break;
2359                     }
2360                     Line lnNext = lines.front;
2361                     immutable bool isTableHeader = (
2362                         (lnNext.type == LineType.Table)
2363                         && (lnNext.text.indexOf(" -") >= 0)
2364                         && (lnNext.text.indexOf("- ") >= 0)
2365                         && lnNext.text.allOf("-:| ")
2366                     );
2367                     if (!isTableHeader) {
2368                         // Not a valid table header, so let's assume it's plain markdown
2369                         processPlain();
2370                         break;
2371                     }
2372                     b.type = BlockType.Table;
2373                     // Parse header
2374                     b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln);
2375                     // Parse table rows
2376                     lines.popFront();
2377                     while (!lines.empty) {
2378                         ln = lines.front;
2379                         if (ln.type != LineType.Table)
2380                             break; // not a table row, so let's assume it's the end of the table
2381                         b.blocks ~= splitTableRow(ln);
2382                         lines.popFront();
2383                     }
2384                     break;
2385             }
2386             root.blocks ~= b;
2387         }
2388     }
2389 }
2390