1 /*******************************************************************************
2  * Markdown parser implementation.
3  * 
4  * Copyright: (c) 2012-2019 RejectedSoftware e.K. and the D community
5  * License: Subject to the terms of the MIT license.
6  * Repository: https://github.com/dlang-community/dmarkdown
7  * 
8  * This library was forked and modified in 2021-2022 for the `hgen` project.
9  * hgen: https://gitlab.com/os-18/hgen
10  * Author: Eugene 'Vindex' Stulin <tech.vindex@gmail.com>
11  * 
12  * MIT License (Expat version)
13  * 
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this software and associated documentation files (the "Software"), to deal
16  * in the Software without restriction, including without limitation the rights
17  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18  * copies of the Software, and to permit persons to whom the Software is
19  * furnished to do so, subject to the following conditions:
20  * 
21  * The above copyright notice and this permission notice shall be included
22  * in all copies or substantial portions of the Software.
23  * 
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 module md;
34 
35 import std.algorithm;
36 import std.array;
37 import std.ascii;
38 import std.conv;
39 import std.exception;
40 import std.format;
41 import std.uni;
42 import std.utf;
43 import core.exception;
44 import std.range;
45 import std.string;
46 import std.stdio;
47 
48 alias UrlFilterFn = string delegate(string urlOrPath, bool isImage);
49 alias ProcessCodeFn = string delegate(string) @safe nothrow;
50 
51 
52 enum MarkdownFlags {
53     none = 0,
54     keepLineBreaks = 1 << 0,
55     backtickCodeBlocks = 1 << 1,
56     noInlineHtml = 1 << 2,
57     //noLinks = 1<<3,
58     //allowUnsafeHtml = 1<<4,
59     /// If used,
60     /// subheadings are underlined by stars ('*') instead of dashes ('-')
61     // alternateSubheaders = 1 << 5,
62     /// If used, '_' may not be used for emphasis ('*' may still be used)
63     disableUnderscoreEmphasis = 1 << 6,
64     supportTables = 1 << 7,
65     vanillaMarkdown = none,
66     forumDefault = keepLineBreaks | backtickCodeBlocks | noInlineHtml,
67     githubInspired = backtickCodeBlocks | supportTables,
68 }
69 
70 
71 private enum LineType {
72     Undefined,
73     Blank,
74     Plain,
75     Hline,
76     AtxHeader,
77     SetextHeader,
78     UList,
79     OList,
80     HtmlBlock,
81     CodeBlockDelimiter,
82     Table,
83 }
84 
85 
86 private enum BlockType {
87     Plain,
88     Text,
89     Paragraph,
90     Header,
91     OList,
92     UList,
93     ListItem,
94     Code,
95     Quote,
96     Table,
97     TableRow,
98     TableHeader,
99     TableData,
100 }
101 
102 
103 private struct LinkRef {
104     string id;
105     string url;
106     string title;
107 }
108 
109 
110 private bool edgesAreEqual(string l, char c) pure @safe {
111     return l[0] == c && l[$-1] == c;
112 }
113 
114 
115 private string extractUrl(ref string ln) pure @safe {
116     string url;
117     if (ln.startsWith("<")) {
118         auto idx = ln.indexOfCT('>');
119         enforce(idx >= 0, "No reference line.");
120         url = ln[1 .. idx];
121         ln = ln[idx + 1 .. $];
122     } else {
123         auto idx = ln.indexOfCT(' ');
124         if (idx > 0) {
125             url = ln[0 .. idx];
126             ln = ln[idx + 1 .. $];
127         } else {
128             idx = ln.indexOfCT('\t');
129             if (idx < 0) {
130                 url = ln;
131                 ln = ln[$ .. $];
132             } else {
133                 url = ln[0 .. idx];
134                 ln = ln[idx+1 .. $];
135             }
136         }
137     }
138     ln = stripLeft(ln);
139     return url;
140 }
141 
142 
143 private LinkRef extractLinkRef(ref string ln) pure @safe {
144     enforce(!isLineIndented(ln), "No reference line.");
145 
146     ln = strip(ln);
147     enforce(ln.startsWith("["), "No reference line.");
148     ln = ln[1 .. $];  // without '['
149 
150     auto idx = ln.indexOf("]:");
151     enforce(idx >= 0, "No reference line.");
152     string refid = ln[0 .. idx];
153     ln = ln[idx + 2 .. $].stripLeft;
154 
155     string url = extractUrl(ln);
156 
157     string title;
158     if (ln.length >= 3) {
159         if (ln[0] == '(' && ln[$ - 1] == ')' ||
160             edgesAreEqual(ln, '"') || edgesAreEqual(ln, '\'')
161         ) {
162             title = ln[1 .. $-1];
163         }
164     }
165     return LinkRef(refid, url, title);
166 }
167 
168 
169 private LinkRef[string] scanForReferences(ref string[] lines) pure @safe {
170     LinkRef[string] ret;
171     bool[size_t] reflines;
172     // line must not be indented
173     foreach (i, ln; lines) {
174         try {
175             auto r = extractLinkRef(ln);
176             ret[toLower(r.id)] = r;
177         } catch (Exception) {
178             continue;
179         }
180         reflines[i] = true;
181     }
182     // remove all lines containing references
183     auto nonreflines = appender!(string[])();
184     nonreflines.reserve(lines.length);
185     foreach (i, ln; lines) {
186         if (i !in reflines) {
187             nonreflines.put(ln);
188         }
189     }
190     lines = nonreflines.data();
191     return ret;
192 }
193 
194 
195 final class MarkdownSettings {
196     /// Controls the capabilities of the parser.
197     MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
198 
199     /// Heading tags will start at this level.
200     size_t headingBaseLevel = 1;
201 
202     /// Called for every link/image URL to perform arbitrary transformations.
203     string delegate(string urlOrPath, bool isImage) urlFilter;
204 
205     /***************************************************************************
206      * An optional delegate to post-process code blocks and inline code.
207      * Useful to e.g. add code highlighting.
208      */
209     string delegate(string) @safe nothrow processCode = null;
210 }
211 
212 
213 deprecated string convertMarkdownToHTML(
214     string markdownText, MarkdownSettings settings = null
215 ) @trusted {
216     auto handler = new MarkdownHandler(markdownText);
217     if (settings !is null) {
218         handler.flags = settings.flags;
219         handler.setHeadingBaseLevel(settings.headingBaseLevel);
220         handler.urlFilter = settings.urlFilter;
221         handler.processCode = settings.processCode;
222     }
223     return handler.convertToHTML();
224 }
225 
226 
227 deprecated string convertMarkdownToHTML(
228     string markdownText, MarkdownFlags flags
229 ) @trusted {
230     auto handler = new MarkdownHandler(markdownText);
231     handler.setFlags(flags);
232     return handler.convertToHTML();
233 }
234 
235 
236 private struct Line {
237     LineType type;
238     IndentType[] indent;
239     string text;
240     string unindented;
241 
242     string unindent(size_t n) pure @safe {
243         assert(n <= indent.length);
244         string ln = text;
245         foreach (i; 0 .. n) {
246             final switch (indent[i]) {
247                 case IndentType.White:
248                     ln = (ln[0] == ' ') ? ln[4 .. $] : ln[1 .. $];
249                     break;
250                 case IndentType.Quote:
251                     ln = ln.stripLeft()[1 .. $];
252                     break;
253             }
254         }
255         return ln;
256     }
257 }
258 
259 
260 class MarkdownHandler {
261     this(string markdownText) @safe {
262         this.markdownText = markdownText;
263     }
264 
265     void setProcessCodeFunction(ProcessCodeFn process) {
266         this.processCode = process;
267     }
268 
269     void setHeadingBaseLevel(size_t newHeadingBaseLevel) {
270         this.headingBaseLevel = newHeadingBaseLevel;
271     }
272 
273     void setFlags(MarkdownFlags newFlags) {
274         this.flags = newFlags;
275     }
276 
277     void disableUnderscoreEmphasis() {
278         this.flags |= MarkdownFlags.disableUnderscoreEmphasis;
279     }
280 
281     string convertToHTML() {
282         string[] allLines = std..string.splitLines(this.markdownText);
283         LinkRef[string] links = scanForReferences(allLines);
284         Line[] lines = this.parseLines(allLines);
285         Block rootBlock;
286         this.parseBlocks(rootBlock, lines);
287         auto dst = appender!string();
288         this.writeBlock(dst, rootBlock, links);
289         return dst.data;
290     }
291 
292     void setUrlFilterFunction(UrlFilterFn filter) {
293         this.urlFilter = filter;
294     }
295 
296 private:
297 
298     Line[] parseLines(ref string[] lines) @safe {
299         Line[] ret;
300         while (!lines.empty) {
301             Line lninfo;
302             lninfo.text = lines.front;
303             lines.popFront();
304             determineIndent(lninfo);
305             lninfo.type = determineType(lninfo.unindented);
306             ret ~= lninfo;
307         }
308         return ret;
309     }
310 
311     void determineIndent(ref Line lninfo) @safe {
312         auto ln = lninfo.text.idup;
313         while (ln.length > 0) {
314             if (ln[0] == '\t') {
315                 lninfo.indent ~= IndentType.White;
316                 ln.popFront();
317             } else if (ln.startsWith("    ")) {
318                 lninfo.indent ~= IndentType.White;
319                 ln.popFrontN(4);
320             } else {
321                 ln = ln.stripLeft();
322                 if (ln.startsWith(">")) {
323                     lninfo.indent ~= IndentType.Quote;
324                     ln.popFront();
325                 } else {
326                     break;
327                 }
328             }
329         }
330         lninfo.unindented = ln;
331     }
332 
333     LineType determineType(string ln)
334     pure @safe {
335         alias MF = MarkdownFlags;
336         if ((flags & MF.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) {
337             return LineType.CodeBlockDelimiter;
338         } else if (isAtxHeaderLine(ln)) {
339             return LineType.AtxHeader;
340         } else if (isSetextHeaderLine(ln, '-') || isSetextHeaderLine(ln, '=')) {
341             return LineType.SetextHeader;
342         } else if ((flags & MF.supportTables) && isTableRowLine(ln)) {
343             return LineType.Table;
344         } else if (isHlineLine(ln)) {
345             return LineType.Hline;
346         } else if (isOListLine(ln)) {
347             return LineType.OList;
348         } else if (isUListLine(ln)) {
349             return LineType.UList;
350         } else if (isLineBlank(ln)) {
351             return LineType.Blank;
352         } else if (!(flags & MF.noInlineHtml) && isHtmlBlockLine(ln)) {
353             return LineType.HtmlBlock;
354         }
355         return LineType.Plain;
356     }
357 
358     void parseBlocks(ref Block root,
359                      ref Line[] lines,
360                      IndentType[] baseIndent = null)
361     pure @safe {
362         if (baseIndent.empty) {
363             root.type = BlockType.Text;
364         } else if (baseIndent[$ - 1] == IndentType.Quote) {
365             root.type = BlockType.Quote;
366         }
367 
368         while (!lines.empty) {
369             auto ln = lines.front;
370 
371             if (ln.type == LineType.Blank) {
372                 lines.popFront();
373                 continue;
374             }
375 
376             if (ln.indent != baseIndent) {
377                 if (ln.indent.length < baseIndent.length ||
378                     ln.indent[0 .. baseIndent.length] != baseIndent
379                 ) {
380                     return;
381                 }
382 
383                 auto cindent = baseIndent ~ IndentType.White;
384                 if (ln.indent == cindent) {
385                     Block cblock;
386                     cblock.type = BlockType.Code;
387                     while (
388                         !lines.empty &&
389                         lines.front.indent.length >= cindent.length &&
390                         lines.front.indent[0 .. cindent.length] == cindent
391                     ) {
392                         cblock.text ~= lines.front.unindent(cindent.length);
393                         lines.popFront();
394                     }
395                     root.blocks ~= cblock;
396                 } else {
397                     Block subblock;
398                     this.parseBlocks(
399                         subblock,
400                         lines,
401                         ln.indent[0 .. baseIndent.length + 1]
402                     );
403                     root.blocks ~= subblock;
404                 }
405                 return;
406             }
407 
408             Block b;
409             void processPlain() {
410                 b.type = BlockType.Paragraph;
411                 b.text = skipText(lines, baseIndent);
412             }
413 
414             final switch (ln.type) {
415                 case LineType.Undefined:
416                     assert(false);
417                 case LineType.Blank:
418                     assert(false);
419                 case LineType.Plain:
420                     if (lines.length >= 2 && lines[1].type == LineType.SetextHeader) {
421                         auto setln = lines[1].unindented;
422                         b.type = BlockType.Header;
423                         b.text = [ln.unindented];
424                         b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
425                         lines.popFrontN(2);
426                     } else {
427                         processPlain();
428                     }
429                     break;
430                 case LineType.Hline:
431                     b.type = BlockType.Plain;
432                     b.text = ["<hr>"];
433                     lines.popFront();
434                     break;
435                 case LineType.AtxHeader:
436                     b.type = BlockType.Header;
437                     string hl = ln.unindented;
438                     b.headerLevel = 0;
439                     while (hl.length > 0 && hl[0] == '#') {
440                         b.headerLevel++;
441                         hl = hl[1 .. $];
442                     }
443                     while (hl.length > 0 && (hl[$ - 1] == '#' || hl[$ - 1] == ' ')) {
444                         hl = hl[0 .. $ - 1];
445                     }
446                     b.text = [hl];
447                     lines.popFront();
448                     break;
449                 case LineType.SetextHeader:
450                     lines.popFront();
451                     break;
452                 case LineType.UList:
453                 case LineType.OList:
454                     b.type = ln.type == LineType.UList ? BlockType.UList
455                                                        : BlockType.OList;
456                     auto itemindent = baseIndent ~ IndentType.White;
457                     bool firstItem = true, paraMode = false;
458                     while (
459                         !lines.empty &&
460                         lines.front.type == ln.type &&
461                         lines.front.indent == baseIndent
462                     ) {
463                         Block itm;
464                         itm.text = skipText(lines, itemindent);
465                         itm.text[0] = removeListPrefix(itm.text[0], ln.type);
466 
467                         // emit <p></p> if there are blank lines
468                         // between the items
469                         if (firstItem && !lines.empty
470                             && lines.front.type == LineType.Blank) {
471                             paraMode = true;
472                         }
473                         firstItem = false;
474                         if (paraMode) {
475                             Block para;
476                             para.type = BlockType.Paragraph;
477                             para.text = itm.text;
478                             itm.blocks ~= para;
479                             itm.text = null;
480                         }
481 
482                         this.parseBlocks(itm, lines, itemindent);
483                         itm.type = BlockType.ListItem;
484                         b.blocks ~= itm;
485                     }
486                     break;
487                 case LineType.HtmlBlock:
488                     int nestlevel = 0;
489                     auto starttag = parseHtmlBlockLine(ln.unindented);
490                     if (!starttag.isHtmlBlock || !starttag.open)
491                         break;
492 
493                     b.type = BlockType.Plain;
494                     while (!lines.empty) {
495                         auto frontIndLen = lines.front.indent.length;
496                         auto baseIndLen = baseIndent.length;
497                         if (frontIndLen < baseIndLen) {
498                             break;
499                         }
500                         if (lines.front.indent[0 .. baseIndLen] != baseIndent) {
501                             break;
502                         }
503 
504                         auto str = lines.front.unindent(baseIndent.length);
505                         auto taginfo = parseHtmlBlockLine(str);
506                         b.text ~= lines.front.unindent(baseIndent.length);
507                         lines.popFront();
508                         if (taginfo.isHtmlBlock
509                             && taginfo.tagName == starttag.tagName
510                         ) {
511                             nestlevel += taginfo.open ? 1 : -1;
512                         }
513                         if (nestlevel <= 0) {
514                             break;
515                         }
516                     }
517                     break;
518                 case LineType.CodeBlockDelimiter:
519                     lines.popFront();  // TODO: get language from line
520                     b.type = BlockType.Code;
521                     while (!lines.empty) {
522                         if (lines.front.indent.length < baseIndent.length) {
523                             break;
524                         }
525                         if (lines.front.indent[0 .. baseIndent.length] != baseIndent) {
526                             break;
527                         }
528                         if (lines.front.type == LineType.CodeBlockDelimiter) {
529                             lines.popFront();
530                             break;
531                         }
532                         b.text ~= lines.front.unindent(baseIndent.length);
533                         lines.popFront();
534                     }
535                     break;
536                 case LineType.Table:
537                     lines.popFront();
538                     // Can this be a valid table (is there a next line
539                     // that could be a header separator)?
540                     if (lines.empty) {
541                         processPlain();
542                         break;
543                     }
544                     Line lnNext = lines.front;
545                     immutable bool isTableHeader =
546                         lnNext.type ==
547                             LineType.Table &&
548                             lnNext.text.indexOf(" -") >= 0 &&
549                             lnNext.text.indexOf("- ") >= 0 &&
550                             lnNext.text.allOf("-:| ");
551                     if (!isTableHeader) {
552                         // Not a valid table header,
553                         // so let's assume it's plain markdown
554                         processPlain();
555                         break;
556                     }
557                     b.type = BlockType.Table;
558                     // Parse header
559                     b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln);
560                     // Parse table rows
561                     lines.popFront();
562                     while (!lines.empty) {
563                         ln = lines.front;
564                         if (ln.type != LineType.Table)
565                             break; // not a table row, so let's assume it's the end of the table
566                         b.blocks ~= splitTableRow(ln);
567                         lines.popFront();
568                     }
569                     break;
570             }
571             root.blocks ~= b;
572 
573         }
574     }
575 
576     void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links)
577     do {
578         final switch (block.type) {
579         case BlockType.Plain:
580             foreach (ln; block.text) {
581                 dst.put(ln);
582                 dst.put("\n");
583             }
584             foreach (b; block.blocks) {
585                 this.writeBlock(dst, b, links);
586             }
587             break;
588         case BlockType.Text:
589             writeMarkdownEscaped(dst, block, links);
590             foreach (b; block.blocks) {
591                 this.writeBlock(dst, b, links);
592             }
593             break;
594         case BlockType.Paragraph:
595             assert(block.blocks.length == 0);
596             dst.put("<p>");
597             writeMarkdownEscaped(dst, block, links);
598             dst.put("</p>\n");
599             break;
600         case BlockType.Header:
601             assert(block.blocks.length == 0);
602             auto hlvl = block.headerLevel + this.headingBaseLevel - 1;
603             dst.formattedWrite(
604                 "<h%s id=\"%s\">", hlvl, block.text[0].asSlug
605             );
606             assert(block.text.length == 1);
607             writeMarkdownEscaped(dst, block.text[0], links);
608             dst.formattedWrite("</h%s>\n", hlvl);
609             break;
610         case BlockType.OList:
611             dst.put("<ol>\n");
612             foreach (b; block.blocks) {
613                 this.writeBlock(dst, b, links);
614             }
615             dst.put("</ol>\n");
616             break;
617         case BlockType.UList:
618             dst.put("<ul>\n");
619             foreach (b; block.blocks) {
620                 this.writeBlock(dst, b, links);
621             }
622             dst.put("</ul>\n");
623             break;
624         case BlockType.ListItem:
625             dst.put("<li>");
626             writeMarkdownEscaped(dst, block, links);
627             foreach (b; block.blocks) {
628                 this.writeBlock(dst, b, links);
629             }
630             dst.put("</li>\n");
631             break;
632         case BlockType.Code:
633             assert(block.blocks.length == 0);
634             dst.put("<pre class=\"prettyprint\"><code>");
635             if (this.processCode is null) {
636                 foreach (ln; block.text) {
637                     filterHTMLEscape(dst, ln);
638                     dst.put("\n");
639                 }
640             } else {
641                 auto temp = appender!string();
642                 foreach (ln; block.text) {
643                     filterHTMLEscape(temp, ln);
644                     temp.put("\n");
645                 }
646                 dst.put(this.processCode(temp.data));
647             }
648             dst.put("</code></pre>");
649             break;
650         case BlockType.Quote:
651             dst.put("<blockquote>");
652             writeMarkdownEscaped(dst, block, links);
653             foreach (b; block.blocks) {
654                 this.writeBlock(dst, b, links);
655             }
656             dst.put("</blockquote>\n");
657             break;
658         case BlockType.Table:
659             assert(block.blocks.length > 0);
660             assert(block.blocks[0].type == BlockType.TableRow);
661             dst.put("<table>\n<tr>");
662             foreach (b; block.blocks[0].blocks) {
663                 assert(b.type == BlockType.TableHeader);
664                 dst.put("<th>");
665                 writeMarkdownEscaped(dst, b.text[0], links);
666                 dst.put("</th>");
667             }
668             dst.put("</tr>\n");
669             if (block.blocks.length > 1) {
670                 foreach (row; block.blocks[1 .. $]) {
671                     assert(row.type == BlockType.TableRow);
672                     dst.put("<tr>");
673                     foreach (b; row.blocks) {
674                         assert(b.type == BlockType.TableData);
675                         dst.put("<td>");
676                         writeMarkdownEscaped(dst, b.text[0], links);
677                         dst.put("</td>");
678                     }
679                     dst.put("</tr>\n");
680                 }
681             }
682             dst.put("</table>\n");
683             break;
684         case BlockType.TableRow:
685         case BlockType.TableData:
686         case BlockType.TableHeader:
687             assert(0);
688         }
689     }
690 
691     void writeMarkdownEscaped(R)(ref R dst,
692                                  ref const Block block,
693                                  in LinkRef[string] links) {
694         auto lines = cast(string[]) block.text;
695         auto text = this.flags & MarkdownFlags.keepLineBreaks
696             ? lines.join("<br>") : lines.join("\n");
697         writeMarkdownEscaped(dst, text, links);
698         if (lines.length) {
699             dst.put("\n");
700         }
701     }
702 
703     void writeMarkdownEscaped(R)(ref R dst,
704                                  string ln,
705                                  in LinkRef[string] linkrefs) {
706         string filterLink(string lnk, bool isImage) {
707             return this.urlFilter ? this.urlFilter(lnk, isImage) : lnk;
708         }
709 
710         bool br = ln.endsWith("  ");
711         while (ln.length > 0) {
712             switch (ln[0]) {
713             default:
714                 dst.put(ln[0]);
715                 ln = ln[1 .. $];
716                 break;
717             case '\\':
718                 if (ln.length >= 2) {
719                     switch (ln[1]) {
720                     default:
721                         dst.put(ln[0 .. 2]);
722                         ln = ln[2 .. $];
723                         break;
724                     case '\'', '`', '*', '_', '{', '}', '[', ']',
725                         '(', ')', '#', '+', '-', '.', '!':
726                         dst.put(ln[1]);
727                         ln = ln[2 .. $];
728                         break;
729                     }
730                 } else {
731                     dst.put(ln[0]);
732                     ln = ln[1 .. $];
733                 }
734                 break;
735             case '_':
736                 if (this.flags & MarkdownFlags.disableUnderscoreEmphasis) {
737                     dst.put(ln[0]);
738                     ln = ln[1 .. $];
739                     break;
740                 }
741                 goto case;
742             case '*':
743                 string text;
744                 if (auto em = parseEmphasis(ln, text)) {
745                     if (em == 1) {
746                         dst.put("<em>");
747                     } else if (em == 2) {
748                         dst.put("<strong>");
749                     } else {
750                         dst.put("<strong><em>");
751                     }
752                     filterHTMLEscape(
753                         dst, text, HTMLEscapeFlags.escapeMinimal
754                     );
755                     if (em == 1) {
756                         dst.put("</em>");
757                     } else if (em == 2) {
758                         dst.put("</strong>");
759                     } else {
760                         dst.put("</strong></em>");
761                     }
762                 } else {
763                     dst.put(ln[0]);
764                     ln = ln[1 .. $];
765                 }
766                 break;
767             case '`':
768                 string code;
769                 if (parseInlineCode(ln, code)) {
770                     dst.put("<code class=\"prettyprint\">");
771                     if (this.processCode is null) {
772                         filterHTMLEscape(
773                             dst, code, HTMLEscapeFlags.escapeMinimal
774                         );
775                     } else {
776                         auto temp = appender!string();
777                         filterHTMLEscape(
778                             temp, code, HTMLEscapeFlags.escapeMinimal
779                         );
780                         dst.put(this.processCode(temp.data));
781                     }
782                     dst.put("</code>");
783                 } else {
784                     dst.put(ln[0]);
785                     ln = ln[1 .. $];
786                 }
787                 break;
788             case '[':
789                 Link link;
790                 if (parseLink(ln, link, linkrefs)) {
791                     dst.put("<a href=\"");
792                     filterHTMLAttribEscape(dst, filterLink(link.url, false));
793                     dst.put("\"");
794                     if (link.title.length) {
795                         dst.put(" title=\"");
796                         filterHTMLAttribEscape(dst, link.title);
797                         dst.put("\"");
798                     }
799                     dst.put(">");
800                     writeMarkdownEscaped(dst, link.text, linkrefs);
801                     dst.put("</a>");
802                 } else {
803                     dst.put(ln[0]);
804                     ln = ln[1 .. $];
805                 }
806                 break;
807             case '!':
808                 Link link;
809                 if (parseLink(ln, link, linkrefs)) {
810                     dst.put("<img src=\"");
811                     filterHTMLAttribEscape(dst, filterLink(link.url, true));
812                     dst.put("\" alt=\"");
813                     filterHTMLAttribEscape(dst, link.text);
814                     dst.put("\"");
815                     if (link.title.length) {
816                         dst.put(" title=\"");
817                         filterHTMLAttribEscape(dst, link.title);
818                         dst.put("\"");
819                     }
820                     dst.put(">");
821                 } else if (ln.length >= 2) {
822                     dst.put(ln[0 .. 2]);
823                     ln = ln[2 .. $];
824                 } else {
825                     dst.put(ln[0]);
826                     ln = ln[1 .. $];
827                 }
828                 break;
829             case '>':
830                 if (this.flags & MarkdownFlags.noInlineHtml) {
831                     dst.put("&gt;");
832                 } else
833                     dst.put(ln[0]);
834                 ln = ln[1 .. $];
835                 break;
836             case '<':
837                 string url;
838                 if (parseAutoLink(ln, url)) {
839                     bool isEmail = url.startsWith("mailto:");
840                     dst.put("<a href=\"");
841                     if (isEmail) {
842                         filterHTMLAllEscape(dst, url);
843                     } else {
844                         filterHTMLAttribEscape(dst, filterLink(url, false));
845                     }
846                     dst.put("\">");
847                     if (isEmail) {
848                         filterHTMLAllEscape(dst, url[7 .. $]);
849                     } else {
850                         filterHTMLEscape(
851                             dst, url, HTMLEscapeFlags.escapeMinimal
852                         );
853                     }
854                     dst.put("</a>");
855                 } else {
856                     if (ln.startsWith("<br>")) {
857                         // always support line breaks,
858                         // since we embed them here ourselves!
859                         dst.put("<br/>");
860                         ln = ln[4 .. $];
861                     } else if (ln.startsWith("<br/>")) {
862                         dst.put("<br/>");
863                         ln = ln[5 .. $];
864                     } else {
865                         if (this.flags & MarkdownFlags.noInlineHtml) {
866                             dst.put("&lt;");
867                         } else {
868                             dst.put(ln[0]);
869                         }
870                         ln = ln[1 .. $];
871                     }
872                 }
873                 break;
874             }
875         }
876         if (br) {
877             dst.put("<br/>");
878         }
879     }
880 
881     string markdownText;
882 
883     /// Controls the capabilities of the parser.
884     MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
885 
886     /// Heading tags will start at this level.
887     size_t headingBaseLevel = 1;
888 
889     /// Called for every link/image URL to perform arbitrary transformations.
890     UrlFilterFn urlFilter;
891 
892     /***************************************************************************
893      * An optional delegate to post-process code blocks and inline code.
894      * Useful to e.g. add code highlighting.
895      */
896     ProcessCodeFn processCode = null;
897 }
898 
899 // unittest {
900 //     auto text =
901 // `=======
902 // Heading
903 // =======
904 
905 // **bold** *italic*
906 
907 // List:
908 
909 //   * a
910 //   * b
911 //   * c
912 // `;
913 
914 //     writeln("~~~~~~~~~~~");
915 //     writeln(text);
916 //     writeln("~~~~~~~~~~~");
917 //     writeln(convertMarkdownToHTML(text));
918 // }
919 
920 unittest {
921     auto source =
922         `Merged prototype.
923 The prototype is not locked, allowing to add more components.
924   To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().`;
925     auto expected =
926         `<p>Merged prototype.
927 The prototype is not locked, allowing to add more components.
928   To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().
929 </p>
930 `;
931     string result = convertMarkdownToHTML(source);
932     assert(result == expected);
933 }
934 
935 unittest {
936     auto source = `*stars* under_score_s`;
937     auto expectedUnderscores = `<p><em>stars</em> under<em>score</em>s
938 </p>
939 `;
940     auto expectedNoUnderscores = `<p><em>stars</em> under_score_s
941 </p>
942 `;
943 
944     string resultUnderscores = convertMarkdownToHTML(source);
945     string resultNoUnderscores = convertMarkdownToHTML(
946         source, MarkdownFlags.disableUnderscoreEmphasis
947     );
948 
949     assert(
950         resultUnderscores == expectedUnderscores,
951         "'%s' != '%s'".format(resultUnderscores, expectedUnderscores)
952     );
953     assert(
954         resultNoUnderscores == expectedNoUnderscores,
955         "'%s' != '%s'".format(resultNoUnderscores, expectedNoUnderscores)
956     );
957 }
958 
959 // Unittest for code post-processing
960 unittest {
961     auto text =
962         "`inline code`" ~ `
963 block:
964 
965     code block
966 `;
967     auto expected =
968         `<p><code class="prettyprint">AAAAAAAAAAA</code>
969 block:
970 </p>
971 <pre class="prettyprint"><code>AAAAAAAAAA</code></pre>`;
972 
973     string processCode(string input) @safe nothrow {
974         import std.exception : assumeWontThrow;
975 
976         // ignore newlines generated by code block processing
977         input = input.filter!(c => c != '\n')
978             .array
979             .to!string
980             .assumeWontThrow;
981         return 'A'.repeat(input.length).array.to!string.assumeWontThrow;
982     }
983 
984     auto settings = new MarkdownSettings;
985     settings.processCode = &processCode;
986     auto result = convertMarkdownToHTML(text, settings);
987 
988     auto err = format!"Unexpected code processing result:\n%s\nExpected:\n%s"(
989         result, expected
990     );
991     assert(result == expected, err);
992 }
993 
994 struct Section {
995     size_t headingLevel;
996     string caption;
997     string anchor;
998     Section[] subSections;
999 }
1000 
1001 private {
1002     immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"];
1003 }
1004 
1005 private enum IndentType {
1006     White,
1007     Quote
1008 }
1009 
1010 
1011 private struct Block {
1012     BlockType type;
1013     string[] text;
1014     Block[] blocks;
1015     size_t headerLevel;
1016 
1017     // A human-readable toString for debugging.
1018     string toString() {
1019         return toStringNested;
1020     }
1021 
1022     // toString implementation; capable of indenting nested blocks.
1023     string toStringNested(uint depth = 0) {
1024         string indent = " ".repeat(depth * 2).joiner.array.to!string;
1025         return indent ~
1026             "%s\n".format(type) ~
1027             indent ~ "%s\n".format(text) ~ 
1028             blocks
1029                 .map!((ref b) => b.toStringNested(depth + 1))
1030                 .joiner
1031                 .array
1032                 .to!string ~
1033             indent ~
1034             "%s\n".format(headerLevel);
1035     }
1036 }
1037 
1038 
1039 private string[] skipText(ref Line[] lines, IndentType[] indent)
1040 pure @safe {
1041     static bool matchesIndent(IndentType[] indent, IndentType[] baseIndent) {
1042         // Any *plain* line with a higher indent should still be a part of
1043         // a paragraph read by skipText(). Returning false here resulted in
1044         // text such as:
1045         // ---
1046         // First line
1047         //         Second line
1048         // ---
1049         // being interpreted as a paragraph followed by a code block, even though
1050         // other Markdown processors would interpret it as a single paragraph.
1051 
1052         // if (indent.length > baseIndent.length ) return false;
1053         if (indent.length > baseIndent.length) {
1054             return true;
1055         }
1056         if (indent != baseIndent[0 .. indent.length]) {
1057             return false;
1058         }
1059         sizediff_t qidx = -1;
1060         foreach_reverse (i, tp; baseIndent) {
1061             if (tp == IndentType.Quote) {
1062                 qidx = i;
1063                 break;
1064             }
1065         }
1066         if (qidx >= 0) {
1067             qidx = baseIndent.length - 1 - qidx;
1068             if (indent.length <= qidx) {
1069                 return false;
1070             }
1071         }
1072         return true;
1073     }
1074 
1075     string[] ret;
1076 
1077     while (true) {
1078         ret ~= lines.front.unindent(
1079             min(indent.length, lines.front.indent.length)
1080         );
1081         lines.popFront();
1082 
1083         if (lines.empty ||
1084             !matchesIndent(lines.front.indent, indent) ||
1085             lines.front.type != LineType.Plain
1086         ) {
1087             return ret;
1088         }
1089     }
1090 }
1091 
1092 
1093 private Block splitTableRow(BlockType dataType = BlockType.TableData)(Line line)
1094 pure @safe {
1095     static assert(
1096         dataType == BlockType.TableHeader || dataType == BlockType.TableData
1097     );
1098 
1099     string ln = line.text.strip();
1100     immutable size_t b = (ln[0 .. 2] == "| ") ? 2 : 0;
1101     immutable size_t e = (ln[($ - 2) .. $] == " |") ? (ln.length - 2) : ln.length;
1102     Block ret;
1103     ret.type = BlockType.TableRow;
1104     foreach (txt; ln[b .. e].split(" | ")) {
1105         Block d;
1106         d.text = [txt.strip(" ")];
1107         d.type = dataType;
1108         ret.blocks ~= d;
1109     }
1110     return ret;
1111 }
1112 
1113 private void writeBlock(R)(ref R dst,
1114     ref const Block block,
1115     LinkRef[string] links,
1116     scope MarkdownSettings settings) {
1117     final switch (block.type) {
1118     case BlockType.Plain:
1119         foreach (ln; block.text) {
1120             dst.put(ln);
1121             dst.put("\n");
1122         }
1123         foreach (b; block.blocks) {
1124             writeBlock(dst, b, links, settings);
1125         }
1126         break;
1127     case BlockType.Text:
1128         writeMarkdownEscaped(dst, block, links, settings);
1129         foreach (b; block.blocks) {
1130             writeBlock(dst, b, links, settings);
1131         }
1132         break;
1133     case BlockType.Paragraph:
1134         assert(block.blocks.length == 0);
1135         dst.put("<p>");
1136         writeMarkdownEscaped(dst, block, links, settings);
1137         dst.put("</p>\n");
1138         break;
1139     case BlockType.Header:
1140         assert(block.blocks.length == 0);
1141         auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel - 1 : 0);
1142         dst.formattedWrite("<h%s id=\"%s\">", hlvl, block.text[0].asSlug);
1143         assert(block.text.length == 1);
1144         writeMarkdownEscaped(dst, block.text[0], links, settings);
1145         dst.formattedWrite("</h%s>\n", hlvl);
1146         break;
1147     case BlockType.OList:
1148         dst.put("<ol>\n");
1149         foreach (b; block.blocks) {
1150             writeBlock(dst, b, links, settings);
1151         }
1152         dst.put("</ol>\n");
1153         break;
1154     case BlockType.UList:
1155         dst.put("<ul>\n");
1156         foreach (b; block.blocks) {
1157             writeBlock(dst, b, links, settings);
1158         }
1159         dst.put("</ul>\n");
1160         break;
1161     case BlockType.ListItem:
1162         dst.put("<li>");
1163         writeMarkdownEscaped(dst, block, links, settings);
1164         foreach (b; block.blocks) {
1165             writeBlock(dst, b, links, settings);
1166         }
1167         dst.put("</li>\n");
1168         break;
1169     case BlockType.Code:
1170         assert(block.blocks.length == 0);
1171         dst.put("<pre class=\"prettyprint\"><code>");
1172         if (settings.processCode is null) {
1173             foreach (ln; block.text) {
1174                 filterHTMLEscape(dst, ln);
1175                 dst.put("\n");
1176             }
1177         } else {
1178             auto temp = appender!string();
1179             foreach (ln; block.text) {
1180                 filterHTMLEscape(temp, ln);
1181                 temp.put("\n");
1182             }
1183             dst.put(settings.processCode(temp.data));
1184         }
1185         dst.put("</code></pre>");
1186         break;
1187     case BlockType.Quote:
1188         dst.put("<blockquote>");
1189         writeMarkdownEscaped(dst, block, links, settings);
1190         foreach (b; block.blocks)
1191             writeBlock(dst, b, links, settings);
1192         dst.put("</blockquote>\n");
1193         break;
1194     case BlockType.Table:
1195         assert(block.blocks.length > 0);
1196         assert(block.blocks[0].type == BlockType.TableRow);
1197         dst.put("<table>\n<tr>");
1198         foreach (b; block.blocks[0].blocks) {
1199             assert(b.type == BlockType.TableHeader);
1200             dst.put("<th>");
1201             writeMarkdownEscaped(dst, b.text[0], links, settings);
1202             dst.put("</th>");
1203         }
1204         dst.put("</tr>\n");
1205         if (block.blocks.length > 1) {
1206             foreach (row; block.blocks[1 .. $]) {
1207                 assert(row.type == BlockType.TableRow);
1208                 dst.put("<tr>");
1209                 foreach (b; row.blocks) {
1210                     assert(b.type == BlockType.TableData);
1211                     dst.put("<td>");
1212                     writeMarkdownEscaped(dst, b.text[0], links, settings);
1213                     dst.put("</td>");
1214                 }
1215                 dst.put("</tr>\n");
1216             }
1217         }
1218         dst.put("</table>\n");
1219         break;
1220     case BlockType.TableRow:
1221     case BlockType.TableData:
1222     case BlockType.TableHeader:
1223         assert(0);
1224     }
1225 }
1226 
1227 private void writeMarkdownEscaped(R)(ref R dst,
1228     ref const Block block,
1229     in LinkRef[string] links,
1230     scope MarkdownSettings settings) {
1231     auto lines = cast(string[]) block.text;
1232     auto text = settings.flags & MarkdownFlags.keepLineBreaks
1233         ? lines.join("<br>") : lines.join("\n");
1234     writeMarkdownEscaped(dst, text, links, settings);
1235     if (lines.length) {
1236         dst.put("\n");
1237     }
1238 }
1239 
1240 private void writeMarkdownEscaped(R)(ref R dst,
1241     string ln,
1242     in LinkRef[string] linkrefs,
1243     scope MarkdownSettings settings) {
1244     string filterLink(string lnk, bool isImage) {
1245         return settings.urlFilter ? settings.urlFilter(lnk, isImage) : lnk;
1246     }
1247 
1248     bool br = ln.endsWith("  ");
1249     while (ln.length > 0) {
1250         switch (ln[0]) {
1251         default:
1252             dst.put(ln[0]);
1253             ln = ln[1 .. $];
1254             break;
1255         case '\\':
1256             if (ln.length >= 2) {
1257                 switch (ln[1]) {
1258                 default:
1259                     dst.put(ln[0 .. 2]);
1260                     ln = ln[2 .. $];
1261                     break;
1262                 case '\'', '`', '*', '_', '{', '}', '[', ']',
1263                     '(', ')', '#', '+', '-', '.', '!':
1264                     dst.put(ln[1]);
1265                     ln = ln[2 .. $];
1266                     break;
1267                 }
1268             } else {
1269                 dst.put(ln[0]);
1270                 ln = ln[1 .. $];
1271             }
1272             break;
1273         case '_':
1274             if (settings.flags & MarkdownFlags.disableUnderscoreEmphasis) {
1275                 dst.put(ln[0]);
1276                 ln = ln[1 .. $];
1277                 break;
1278             }
1279             goto case;
1280         case '*':
1281             string text;
1282             if (auto em = parseEmphasis(ln, text)) {
1283                 dst.put(em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>");
1284                 filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal);
1285                 dst.put(em == 1 ? "</em>" : em == 2 ? "</strong>" : "</em></strong>");
1286             } else {
1287                 dst.put(ln[0]);
1288                 ln = ln[1 .. $];
1289             }
1290             break;
1291         case '`':
1292             string code;
1293             if (parseInlineCode(ln, code)) {
1294                 dst.put("<code class=\"prettyprint\">");
1295                 if (settings.processCode is null) {
1296                     filterHTMLEscape(
1297                         dst, code, HTMLEscapeFlags.escapeMinimal
1298                     );
1299                 } else {
1300                     auto temp = appender!string();
1301                     filterHTMLEscape(
1302                         temp, code, HTMLEscapeFlags.escapeMinimal
1303                     );
1304                     dst.put(settings.processCode(temp.data));
1305                 }
1306                 dst.put("</code>");
1307             } else {
1308                 dst.put(ln[0]);
1309                 ln = ln[1 .. $];
1310             }
1311             break;
1312         case '[':
1313             Link link;
1314             if (parseLink(ln, link, linkrefs)) {
1315                 dst.put("<a href=\"");
1316                 filterHTMLAttribEscape(dst, filterLink(link.url, false));
1317                 dst.put("\"");
1318                 if (link.title.length) {
1319                     dst.put(" title=\"");
1320                     filterHTMLAttribEscape(dst, link.title);
1321                     dst.put("\"");
1322                 }
1323                 dst.put(">");
1324                 writeMarkdownEscaped(dst, link.text, linkrefs, settings);
1325                 dst.put("</a>");
1326             } else {
1327                 dst.put(ln[0]);
1328                 ln = ln[1 .. $];
1329             }
1330             break;
1331         case '!':
1332             Link link;
1333             if (parseLink(ln, link, linkrefs)) {
1334                 dst.put("<img src=\"");
1335                 filterHTMLAttribEscape(dst, filterLink(link.url, true));
1336                 dst.put("\" alt=\"");
1337                 filterHTMLAttribEscape(dst, link.text);
1338                 dst.put("\"");
1339                 if (link.title.length) {
1340                     dst.put(" title=\"");
1341                     filterHTMLAttribEscape(dst, link.title);
1342                     dst.put("\"");
1343                 }
1344                 dst.put(">");
1345             } else if (ln.length >= 2) {
1346                 dst.put(ln[0 .. 2]);
1347                 ln = ln[2 .. $];
1348             } else {
1349                 dst.put(ln[0]);
1350                 ln = ln[1 .. $];
1351             }
1352             break;
1353         case '>':
1354             if (settings.flags & MarkdownFlags.noInlineHtml) {
1355                 dst.put("&gt;");
1356             } else
1357                 dst.put(ln[0]);
1358             ln = ln[1 .. $];
1359             break;
1360         case '<':
1361             string url;
1362             if (parseAutoLink(ln, url)) {
1363                 bool isEmail = url.startsWith("mailto:");
1364                 dst.put("<a href=\"");
1365                 if (isEmail) {
1366                     filterHTMLAllEscape(dst, url);
1367                 } else {
1368                     filterHTMLAttribEscape(dst, filterLink(url, false));
1369                 }
1370                 dst.put("\">");
1371                 if (isEmail) {
1372                     filterHTMLAllEscape(dst, url[7 .. $]);
1373                 } else {
1374                     filterHTMLEscape(
1375                         dst, url, HTMLEscapeFlags.escapeMinimal
1376                     );
1377                 }
1378                 dst.put("</a>");
1379             } else {
1380                 if (ln.startsWith("<br>")) {
1381                     // always support line breaks,
1382                     // since we embed them here ourselves!
1383                     dst.put("<br/>");
1384                     ln = ln[4 .. $];
1385                 } else if (ln.startsWith("<br/>")) {
1386                     dst.put("<br/>");
1387                     ln = ln[5 .. $];
1388                 } else {
1389                     if (settings.flags & MarkdownFlags.noInlineHtml) {
1390                         dst.put("&lt;");
1391                     } else {
1392                         dst.put(ln[0]);
1393                     }
1394                     ln = ln[1 .. $];
1395                 }
1396             }
1397             break;
1398         }
1399     }
1400     if (br) {
1401         dst.put("<br/>");
1402     }
1403 }
1404 
1405 
1406 private bool isLineBlank(string ln)
1407 pure @safe {
1408     return allOf(ln, " \t");
1409 }
1410 
1411 
1412 private bool isSetextHeaderLine(string ln, char subHeaderChar) pure @safe {
1413     ln = stripLeft(ln);
1414     if (ln.length < 1) {
1415         return false;
1416     }
1417     if (ln[0] == subHeaderChar) {
1418         while (!ln.empty && ln.front == subHeaderChar) {
1419             ln.popFront();
1420         }
1421         return allOf(ln, " \t");
1422     }
1423     return false;
1424 }
1425 
1426 
1427 private bool isAtxHeaderLine(string ln) pure @safe {
1428     ln = stripLeft(ln);
1429     size_t i = 0;
1430     while (i < ln.length && ln[i] == '#') {
1431         i++;
1432     }
1433     if (i < 1 || i > 6 || i >= ln.length) {
1434         return false;
1435     }
1436     return ln[i] == ' ';
1437 }
1438 
1439 
1440 private bool isHlineLine(string ln) pure @safe {
1441     if (allOf(ln, " -") && count(ln, '-') >= 3) {
1442         return true;
1443     } else if (allOf(ln, " *") && count(ln, '*') >= 3) {
1444         return true;
1445     } else if (allOf(ln, " _") && count(ln, '_') >= 3) {
1446         return true;
1447     }
1448     return false;
1449 }
1450 
1451 
1452 private bool isQuoteLine(string ln) pure @safe {
1453     return ln.stripLeft().startsWith(">");
1454 }
1455 
1456 
1457 private size_t getQuoteLevel(string ln) pure @safe {
1458     size_t level = 0;
1459     ln = stripLeft(ln);
1460     while (ln.length > 0 && ln[0] == '>') {
1461         level++;
1462         ln = stripLeft(ln[1 .. $]);
1463     }
1464     return level;
1465 }
1466 
1467 
1468 private bool isUListLine(string ln) pure @safe {
1469     ln = stripLeft(ln);
1470     if (ln.length < 2) {
1471         return false;
1472     }
1473     if (!canFind("*+-", ln[0])) {
1474         return false;
1475     }
1476     if (ln[1] != ' ' && ln[1] != '\t') {
1477         return false;
1478     }
1479     return true;
1480 }
1481 
1482 
1483 private bool isOListLine(string ln) pure @safe {
1484     ln = stripLeft(ln);
1485     if (ln.length < 1) {
1486         return false;
1487     }
1488     if (ln[0] < '0' || ln[0] > '9') {
1489         return false;
1490     }
1491     ln = ln[1 .. $];
1492     while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') {
1493         ln = ln[1 .. $];
1494     }
1495     if (ln.length < 2) {
1496         return false;
1497     }
1498     if (ln[0] != '.') {
1499         return false;
1500     }
1501     if (ln[1] != ' ' && ln[1] != '\t') {
1502         return false;
1503     }
1504     return true;
1505 }
1506 
1507 
1508 private bool isTableRowLine(string ln) pure @safe {
1509     return
1510         ln.indexOf(" | ") >= 0 &&
1511         !ln.isOListLine &&
1512         !ln.isUListLine &&
1513         !ln.isAtxHeaderLine;
1514 }
1515 
1516 
1517 private string removeListPrefix(string str, LineType tp) pure @safe {
1518     switch (tp) {
1519     default:
1520         assert(false);
1521     case LineType.OList: // skip bullets and output using normal escaping
1522         auto idx = str.indexOfCT('.');
1523         assert(idx > 0);
1524         return str[idx + 1 .. $].stripLeft();
1525     case LineType.UList:
1526         return stripLeft(str.stripLeft()[1 .. $]);
1527     }
1528 }
1529 
1530 
1531 private auto parseHtmlBlockLine(string ln) pure @safe {
1532     struct HtmlBlockInfo {
1533         bool isHtmlBlock;
1534         string tagName;
1535         bool open;
1536     }
1537 
1538     HtmlBlockInfo ret;
1539     ret.isHtmlBlock = false;
1540     ret.open = true;
1541 
1542     ln = strip(ln);
1543     if (ln.length < 3) {
1544         return ret;
1545     }
1546     if (ln[0] != '<') {
1547         return ret;
1548     }
1549     if (ln[1] == '/') {
1550         ret.open = false;
1551         ln = ln[1 .. $];
1552     }
1553     if (!std.ascii.isAlpha(ln[1])) {
1554         return ret;
1555     }
1556     ln = ln[1 .. $];
1557     size_t idx = 0;
1558     while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>') {
1559         idx++;
1560     }
1561     ret.tagName = ln[0 .. idx];
1562     ln = ln[idx .. $];
1563 
1564     auto eidx = ln.indexOf('>');
1565     if (eidx < 0) {
1566         return ret;
1567     }
1568     if (eidx != ln.length-1) {
1569         return ret;
1570     }
1571 
1572     if (!s_blockTags.canFind(ret.tagName)) {
1573         return ret;
1574     }
1575 
1576     ret.isHtmlBlock = true;
1577     return ret;
1578 }
1579 
1580 
1581 private bool isHtmlBlockLine(string ln) pure @safe {
1582     auto bi = parseHtmlBlockLine(ln);
1583     return bi.isHtmlBlock && bi.open;
1584 }
1585 
1586 
1587 private bool isHtmlBlockCloseLine(string ln) pure @safe {
1588     auto bi = parseHtmlBlockLine(ln);
1589     return bi.isHtmlBlock && !bi.open;
1590 }
1591 
1592 
1593 private bool isCodeBlockDelimiter(string ln) pure @safe {
1594     return ln.startsWith("```");
1595 }
1596 
1597 // private string getHtmlTagName(string ln) pure @safe {
1598 //     return parseHtmlBlockLine(ln).tagName;
1599 // }
1600 
1601 private bool isLineIndented(string ln) pure @safe {
1602     return ln.startsWith("\t") || ln.startsWith("    ");
1603 }
1604 
1605 // private string unindentLine(string ln) pure @safe {
1606 //     if (ln.startsWith("\t")) return ln[1 .. $];
1607 //     if (ln.startsWith("    ")) return ln[4 .. $];
1608 //     assert(false);
1609 // }
1610 
1611 private int parseEmphasis(ref string str, ref string text) pure @safe {
1612     string pstr = str;
1613     if (pstr.length < 3)
1614         return false;
1615 
1616     string ctag;
1617     if (pstr.startsWith("***"))
1618         ctag = "***";
1619     else if (pstr.startsWith("**"))
1620         ctag = "**";
1621     else if (pstr.startsWith("*"))
1622         ctag = "*";
1623     else if (pstr.startsWith("___"))
1624         ctag = "___";
1625     else if (pstr.startsWith("__"))
1626         ctag = "__";
1627     else if (pstr.startsWith("_"))
1628         ctag = "_";
1629     else
1630         return false;
1631 
1632     pstr = pstr[ctag.length .. $];
1633 
1634     auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1635     if (cidx < 1)
1636         return false;
1637 
1638     text = pstr[0 .. cidx];
1639 
1640     str = pstr[cidx + ctag.length .. $];
1641     return cast(int) ctag.length;
1642 }
1643 
1644 
1645 private bool parseInlineCode(ref string str, ref string code) pure @safe {
1646     string pstr = str;
1647     if (pstr.length < 3)
1648         return false;
1649     string ctag;
1650     if (pstr.startsWith("``"))
1651         ctag = "``";
1652     else if (pstr.startsWith("`"))
1653         ctag = "`";
1654     else
1655         return false;
1656     pstr = pstr[ctag.length .. $];
1657 
1658     auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1659     if (cidx < 1)
1660         return false;
1661 
1662     code = pstr[0 .. cidx];
1663     str = pstr[cidx + ctag.length .. $];
1664     return true;
1665 }
1666 
1667 
1668 private bool parseLink(
1669     ref string str, ref Link dst, in LinkRef[string] linkrefs
1670 ) pure @safe {
1671     string pstr = str;
1672     if (pstr.length < 3)
1673         return false;
1674     // ignore img-link prefix
1675     if (pstr[0] == '!')
1676         pstr = pstr[1 .. $];
1677 
1678     // parse the text part [text]
1679     if (pstr[0] != '[')
1680         return false;
1681     auto cidx = pstr.matchBracket();
1682     if (cidx < 1)
1683         return false;
1684     string refid;
1685     dst.text = pstr[1 .. cidx];
1686     pstr = pstr[cidx + 1 .. $];
1687 
1688     // parse either (link '['"title"']') or '[' ']'[refid]
1689     if (pstr.length < 2)
1690         return false;
1691     if (pstr[0] == '(') {
1692         cidx = pstr.matchBracket();
1693         if (cidx < 1)
1694             return false;
1695         auto inner = pstr[1 .. cidx];
1696         immutable qidx = inner.indexOfCT('"');
1697         if (qidx > 1 && std.ascii.isWhite(inner[qidx - 1])) {
1698             dst.url = inner[0 .. qidx].stripRight();
1699             immutable len = inner[qidx .. $].lastIndexOf('"');
1700             if (len == 0)
1701                 return false;
1702             assert(len > 0);
1703             dst.title = inner[qidx + 1 .. qidx + len];
1704         } else {
1705             dst.url = inner.stripRight();
1706             dst.title = null;
1707         }
1708         if (dst.url.startsWith("<") && dst.url.endsWith(">"))
1709             dst.url = dst.url[1 .. $ - 1];
1710         pstr = pstr[cidx + 1 .. $];
1711     } else {
1712         if (pstr[0] == ' ')
1713             pstr = pstr[1 .. $];
1714         if (pstr[0] != '[')
1715             return false;
1716         pstr = pstr[1 .. $];
1717         cidx = pstr.indexOfCT(']');
1718         if (cidx < 0)
1719             return false;
1720         if (cidx == 0)
1721             refid = dst.text;
1722         else
1723             refid = pstr[0 .. cidx];
1724         pstr = pstr[cidx + 1 .. $];
1725     }
1726 
1727     if (refid.length > 0) {
1728         auto pr = toLower(refid) in linkrefs;
1729         if (!pr) {
1730             // debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid);
1731             return false;
1732         }
1733         dst.url = pr.url;
1734         dst.title = pr.title;
1735     }
1736 
1737     str = pstr;
1738     return true;
1739 }
1740 
1741 
1742 /* UNITTESTS */
1743 
1744 
1745 @safe unittest {
1746     static void testLink(string s, Link exp, in LinkRef[string] refs) {
1747         Link link;
1748         assert(parseLink(s, link, refs), s);
1749         assert(link == exp);
1750     }
1751 
1752     LinkRef[string] refs;
1753     refs["ref"] = LinkRef("ref", "target", "title");
1754 
1755     testLink(`[link](target)`, Link("link", "target"), null);
1756     testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1757     testLink(`[link](target  "title")`, Link("link", "target", "title"), null);
1758     testLink(`[link](target "title"  )`, Link("link", "target", "title"), null);
1759 
1760     testLink(`[link](target)`, Link("link", "target"), null);
1761     testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1762 
1763     testLink(`[link][ref]`, Link("link", "target", "title"), refs);
1764     testLink(`[ref][]`, Link("ref", "target", "title"), refs);
1765 
1766     testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null);
1767     testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs);
1768 
1769     testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null);
1770     testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null);
1771 
1772     testLink(`[link](white-space  "around title" )`, Link("link", "white-space", "around title"), null);
1773     testLink(`[link](tabs    "around title"    )`, Link("link", "tabs", "around title"), null);
1774 
1775     testLink(`[link](target "")`, Link("link", "target", ""), null);
1776     testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null);
1777 
1778     testLink(`[link](<target>)`, Link("link", "target"), null);
1779 
1780     auto failing = [
1781         `text`, `[link](target`, `[link]target)`, `[link]`,
1782         `[link(target)`, `link](target)`, `[link] (target)`,
1783         `[link][noref]`, `[noref][]`
1784     ];
1785     Link link;
1786     foreach (s; failing)
1787         assert(!parseLink(s, link, refs), s);
1788 }
1789 
1790 private bool parseAutoLink(ref string str, ref string url)
1791 pure @safe {
1792     string pstr = str;
1793     if (pstr.length < 3)
1794         return false;
1795     if (pstr[0] != '<')
1796         return false;
1797     pstr = pstr[1 .. $];
1798     auto cidx = pstr.indexOf('>');
1799     if (cidx < 0)
1800         return false;
1801     url = pstr[0 .. cidx];
1802     if (anyOf(url, " \t"))
1803         return false;
1804     if (!anyOf(url, ":@"))
1805         return false;
1806     str = pstr[cidx + 1 .. $];
1807     if (url.indexOf('@') > 0)
1808         url = "mailto:" ~ url;
1809     return true;
1810 }
1811 
1812 /*******************************************************************************
1813  * Generates an identifier suitable to use as within a URL.
1814  *
1815  * The resulting string will contain only ASCII lower case alphabetic or
1816  * numeric characters, as well as dashes (-). Every sequence of
1817  * non-alphanumeric characters will be replaced by a single dash. No dashes
1818  * will be at either the front or the back of the result string.
1819  */
1820 auto asSlug(R)(R text) if (isInputRange!R && is(typeof(R.init.front) == dchar)) {
1821     static struct SlugRange {
1822         private {
1823             R _input;
1824             bool _dash;
1825         }
1826 
1827         this(R input) {
1828             _input = input;
1829             skipNonAlphaNum();
1830         }
1831 
1832         @property bool empty() const {
1833             return _dash ? false : _input.empty;
1834         }
1835 
1836         @property char front() const {
1837             if (_dash)
1838                 return '-';
1839 
1840             char r = cast(char) _input.front;
1841             if (r >= 'A' && r <= 'Z')
1842                 return cast(char)(r + ('a' - 'A'));
1843             return r;
1844         }
1845 
1846         void popFront() {
1847             if (_dash) {
1848                 _dash = false;
1849                 return;
1850             }
1851 
1852             _input.popFront();
1853             auto na = skipNonAlphaNum();
1854             if (na && !_input.empty)
1855                 _dash = true;
1856         }
1857 
1858         private bool skipNonAlphaNum() {
1859             bool have_skipped = false;
1860             while (!_input.empty) {
1861                 switch (_input.front) {
1862                 default:
1863                     _input.popFront();
1864                     have_skipped = true;
1865                     break;
1866                 case 'a': .. case 'z':
1867                 case 'A': .. case 'Z':
1868                 case '0': .. case '9':
1869                     return have_skipped;
1870                 }
1871             }
1872             return have_skipped;
1873         }
1874     }
1875 
1876     return SlugRange(text);
1877 }
1878 
1879 unittest {
1880     import std.algorithm : equal;
1881 
1882     assert("".asSlug.equal(""));
1883     assert(".,-".asSlug.equal(""));
1884     assert("abc".asSlug.equal("abc"));
1885     assert("aBc123".asSlug.equal("abc123"));
1886     assert("....aBc...123...".asSlug.equal("abc-123"));
1887 }
1888 
1889 private struct Link {
1890     string text;
1891     string url;
1892     string title;
1893 }
1894 
1895 @safe unittest { // alt and title attributes
1896     assert(convertMarkdownToHTML("![alt](http://example.org/image)")
1897             == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n");
1898     assert(convertMarkdownToHTML("![alt](http://example.org/image \"Title\")")
1899             == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n");
1900 }
1901 
1902 @safe unittest { // complex links
1903     assert(convertMarkdownToHTML("their [install\ninstructions](<http://www.brew.sh>) and")
1904             == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n");
1905     assert(convertMarkdownToHTML("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)")
1906             == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n");
1907 }
1908 
1909 @safe unittest { // check CTFE-ability
1910     enum res = convertMarkdownToHTML("### some markdown\n[foo][]\n[foo]: /bar");
1911     assert(
1912         res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res);
1913 }
1914 
1915 @safe unittest { // correct line breaks in restrictive mode
1916     auto res = convertMarkdownToHTML("hello\nworld", MarkdownFlags.forumDefault);
1917     assert(res == "<p>hello<br/>world\n</p>\n", res);
1918 }
1919 
1920 /*@safe unittest { // code blocks and blockquotes
1921     assert(convertMarkdownToHTML("\tthis\n\tis\n\tcode") ==
1922         "<pre><code>this\nis\ncode</code></pre>\n");
1923     assert(convertMarkdownToHTML("    this\n    is\n    code") ==
1924         "<pre><code>this\nis\ncode</code></pre>\n");
1925     assert(convertMarkdownToHTML("    this\n    is\n\tcode") ==
1926         "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n");
1927     assert(convertMarkdownToHTML("\tthis\n\n\tcode") ==
1928         "<pre><code>this\n\ncode</code></pre>\n");
1929     assert(convertMarkdownToHTML("\t> this") ==
1930         "<pre><code>&gt; this</code></pre>\n");
1931     assert(convertMarkdownToHTML(">     this") ==
1932         "<blockquote><pre><code>this</code></pre></blockquote>\n");
1933     assert(convertMarkdownToHTML(">     this\n    is code") ==
1934         "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n");
1935 }*/
1936 
1937 @safe unittest { // test simple border-less table
1938     auto res = convertMarkdownToHTML(
1939         "Col 1 | Col 2 | Col 3\n -- | -- | --\n val 1 | val 2 | val 3\n *val 4* | val 5 | value 6",
1940         MarkdownFlags.supportTables
1941     );
1942     assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res);
1943 }
1944 
1945 @safe unittest { // test simple border'ed table
1946     auto res = convertMarkdownToHTML(
1947         "| Col 1 | Col 2 | Col 3 |\n| -- | -- | -- |\n| val 1 | val 2 | val 3 |\n| *val 4* | val 5 | value 6 |",
1948         MarkdownFlags.supportTables
1949     );
1950     assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res);
1951 }
1952 
1953 @safe unittest {
1954     string input = `
1955 Table:
1956 
1957 ID  | Name  | Address
1958  -- | ----  | ---------
1959  1  | Foo   | Somewhere
1960  2  | Bar   | Nowhere `;
1961     auto res = convertMarkdownToHTML(input, MarkdownFlags.supportTables);
1962     auto exp = "<p>Table:\n</p>\n<table>\n<tr><th>ID</th><th>Name</th><th>Address</th></tr>\n<tr><td>1</td><td>Foo</td><td>Somewhere</td></tr>\n<tr><td>2</td><td>Bar</td><td>Nowhere</td></tr>\n</table>\n";
1963     assert(res == exp, res);
1964 }
1965 
1966 package:
1967 
1968 /// Function for work with HTML.
1969 
1970 /*******************************************************************************
1971  * Writes the HTML escaped version of a given string to an output range.
1972  */
1973 void filterHTMLEscape(R, S)(ref R dst,
1974     S str,
1975     HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline)
1976         if (isOutputRange!(R, dchar) && isInputRange!S) {
1977     for (; !str.empty; str.popFront()) {
1978         filterHTMLEscape(dst, str.front, flags);
1979     }
1980 }
1981 
1982 /*******************************************************************************
1983  * Writes the HTML escaped version of a given string to an output range
1984  * (also escapes double quotes).
1985  */
1986 void filterHTMLAttribEscape(R, S)(ref R dst, S str)
1987         if (isOutputRange!(R, dchar) && isInputRange!S) {
1988     for (; !str.empty; str.popFront()) {
1989         filterHTMLEscape(
1990             dst,
1991             str.front,
1992             HTMLEscapeFlags.escapeNewline | HTMLEscapeFlags.escapeQuotes
1993         );
1994     }
1995 }
1996 
1997 /*******************************************************************************
1998  * Writes the HTML escaped version of a given string to an output range
1999  * (escapes every character).
2000  */
2001 void filterHTMLAllEscape(R, S)(ref R dst, S str)
2002         if (isOutputRange!(R, dchar) && isInputRange!S) {
2003     for (; !str.empty; str.popFront()) {
2004         dst.put("&#");
2005         dst.put(to!string(cast(uint) str.front));
2006         dst.put(';');
2007     }
2008 }
2009 
2010 /*******************************************************************************
2011  * Writes the HTML escaped version of a character to an output range.
2012  */
2013 void filterHTMLEscape(R)(ref R dst,
2014     dchar ch,
2015     HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline) {
2016     switch (ch) {
2017     default:
2018         if (flags & HTMLEscapeFlags.escapeUnknown) {
2019             dst.put("&#");
2020             dst.put(to!string(cast(uint) ch));
2021             dst.put(';');
2022         } else
2023             dst.put(ch);
2024         break;
2025     case '"':
2026         if (flags & HTMLEscapeFlags.escapeQuotes)
2027             dst.put("&quot;");
2028         else
2029             dst.put('"');
2030         break;
2031     case '\'':
2032         if (flags & HTMLEscapeFlags.escapeQuotes)
2033             dst.put("&#39;");
2034         else
2035             dst.put('\'');
2036         break;
2037     case '\r', '\n':
2038         if (flags & HTMLEscapeFlags.escapeNewline) {
2039             dst.put("&#");
2040             dst.put(to!string(cast(uint) ch));
2041             dst.put(';');
2042         } else
2043             dst.put(ch);
2044         break;
2045     case 'a': .. case 'z':
2046         goto case;
2047     case 'A': .. case 'Z':
2048         goto case;
2049     case '0': .. case '9':
2050         goto case;
2051     case ' ', '\t', '-', '_', '.', ':', ',', ';',
2052         '#', '+', '*', '?', '=', '(', ')', '/', '!',
2053         '%', '{', '}', '[', ']', '`', '´', '$', '^', '~':
2054         dst.put(cast(char) ch);
2055         break;
2056     case '<':
2057         dst.put("&lt;");
2058         break;
2059     case '>':
2060         dst.put("&gt;");
2061         break;
2062     case '&':
2063         dst.put("&amp;");
2064         break;
2065     }
2066 }
2067 
2068 /// Flags for HTML-escaping some symbols.
2069 enum HTMLEscapeFlags {
2070     escapeMinimal = 0,
2071     escapeQuotes = 1 << 0,
2072     escapeNewline = 1 << 1,
2073     escapeUnknown = 1 << 2
2074 }
2075 
2076 /// Functions for work with string data
2077 
2078 /*******************************************************************************
2079  * Checks if all characters in 'str' are contained in 'chars'.
2080  */
2081 bool allOf(string str, string chars)
2082 @safe pure {
2083     foreach (dchar ch; str) {
2084         if (!chars.canFind(ch)) {
2085             return false;
2086         }
2087     }
2088     return true;
2089 }
2090 
2091 ptrdiff_t indexOfCT(Char)(in Char[] s, dchar c, CaseSensitive cs = CaseSensitive.yes)
2092 @safe pure {
2093     if (__ctfe) {
2094         if (cs == CaseSensitive.yes) {
2095             foreach (i, dchar ch; s) {
2096                 if (ch == c) {
2097                     return i;
2098                 }
2099             }
2100         } else {
2101             c = std.uni.toLower(c);
2102             foreach (i, dchar ch; s) {
2103                 if (std.uni.toLower(ch) == c) {
2104                     return i;
2105                 }
2106             }
2107         }
2108         return -1;
2109     }
2110     return std..string.indexOf(s, c, cs);
2111 }
2112 
2113 /*******************************************************************************
2114  * Checks if any character in 'str' is contained in 'chars'.
2115  */
2116 bool anyOf(string str, string chars)
2117 @safe pure {
2118     foreach (ch; str) {
2119         if (chars.canFind(ch)) {
2120             return true;
2121         }
2122     }
2123     return false;
2124 }
2125 
2126 /*******************************************************************************
2127  * Finds the closing bracket (works with any of '[', '$(LPAREN)', '<', '{').
2128  *
2129  * Params:
2130  *     str = input string
2131  *     nested = whether to skip nested brackets
2132  * Returns:
2133  *     The index of the closing bracket or -1 for unbalanced strings
2134  *     and strings that don't start with a bracket.
2135  */
2136 sizediff_t matchBracket(string str, bool nested = true)
2137 @safe pure nothrow {
2138     if (str.length < 2)
2139         return -1;
2140 
2141     char open = str[0], close = void;
2142     switch (str[0]) {
2143     case '[':
2144         close = ']';
2145         break;
2146     case '(':
2147         close = ')';
2148         break;
2149     case '<':
2150         close = '>';
2151         break;
2152     case '{':
2153         close = '}';
2154         break;
2155     default:
2156         return -1;
2157     }
2158 
2159     size_t level = 1;
2160     foreach (i, char c; str[1 .. $]) {
2161         if (nested && c == open)
2162             ++level;
2163         else if (c == close)
2164             --level;
2165         if (level == 0)
2166             return i + 1;
2167     }
2168     return -1;
2169 }
2170 
2171 ////////////////////////////////////////////////////////////////////////////////
2172 ////                        DEPRECATED FUNCTIONS                            ////
2173 ////////////////////////////////////////////////////////////////////////////////
2174 
2175 /*******************************************************************************
2176  * Returns the hierarchy of sections.
2177  */
2178 Section[] getMarkdownOutline(string markdown_source,
2179     scope MarkdownSettings settings = null) {
2180     if (!settings)
2181         settings = new MarkdownSettings;
2182     auto all_lines = splitLines(markdown_source);
2183     auto lines = parseLines(all_lines, settings);
2184     Block root_block;
2185     parseBlocks(root_block, lines, null, settings);
2186     Section root;
2187 
2188     foreach (ref sb; root_block.blocks) {
2189         if (sb.type != BlockType.Header) {
2190             continue;
2191         }
2192         auto s = &root;
2193         while (true) {
2194             if (s.subSections.length == 0)
2195                 break;
2196             if (s.subSections[$ - 1].headingLevel >= sb.headerLevel)
2197                 break;
2198             s = &s.subSections[$ - 1];
2199         }
2200         s.subSections ~= Section(
2201             sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string
2202         );
2203     }
2204 
2205     return root.subSections;
2206 }
2207 ///
2208 unittest {
2209     auto mdText = "## first\n## second\n### third\n# fourth\n### fifth";
2210     auto expected = [
2211         Section(2, " first", "first"),
2212         Section(2, " second", "second", [Section(3, " third", "third")]),
2213         Section(1, " fourth", "fourth", [Section(3, " fifth", "fifth")])
2214     ];
2215     assert(getMarkdownOutline(mdText) == expected);
2216 }
2217 
2218 private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings)
2219 pure @safe {
2220     Line[] ret;
2221     char subHeaderChar = '-';
2222     while (!lines.empty) {
2223         auto ln = lines.front;
2224         lines.popFront();
2225 
2226         Line lninfo;
2227         lninfo.text = ln;
2228 
2229         void determineIndent() {
2230             while (ln.length > 0) {
2231                 if (ln[0] == '\t') {
2232                     lninfo.indent ~= IndentType.White;
2233                     ln.popFront();
2234                 } else if (ln.startsWith("    ")) {
2235                     lninfo.indent ~= IndentType.White;
2236                     ln.popFrontN(4);
2237                 } else {
2238                     ln = ln.stripLeft();
2239                     if (ln.startsWith(">")) {
2240                         lninfo.indent ~= IndentType.Quote;
2241                         ln.popFront();
2242                     } else {
2243                         break;
2244                     }
2245                 }
2246             }
2247             lninfo.unindented = ln;
2248         }
2249 
2250         determineIndent();
2251 
2252         if ((settings.flags & MarkdownFlags.backtickCodeBlocks)
2253             && isCodeBlockDelimiter(ln)) {
2254             lninfo.type = LineType.CodeBlockDelimiter;
2255         } else if (isAtxHeaderLine(ln)) {
2256             lninfo.type = LineType.AtxHeader;
2257         } else if (isSetextHeaderLine(ln, subHeaderChar)) {
2258             lninfo.type = LineType.SetextHeader;
2259         } else if ((settings.flags & MarkdownFlags.supportTables)
2260             && isTableRowLine(ln)) {
2261             lninfo.type = LineType.Table;
2262         } else if (isHlineLine(ln)) {
2263             lninfo.type = LineType.Hline;
2264         } else if (isOListLine(ln)) {
2265             lninfo.type = LineType.OList;
2266         } else if (isUListLine(ln)) {
2267             lninfo.type = LineType.UList;
2268         } else if (isLineBlank(ln)) {
2269             lninfo.type = LineType.Blank;
2270         } else if (!(settings.flags & MarkdownFlags.noInlineHtml)
2271             && isHtmlBlockLine(ln)) {
2272             lninfo.type = LineType.HtmlBlock;
2273         } else
2274             lninfo.type = LineType.Plain;
2275 
2276         ret ~= lninfo;
2277     }
2278     return ret;
2279 }
2280 
2281 private void parseBlocks(ref Block root,
2282     ref Line[] lines,
2283     IndentType[] baseIndent,
2284     scope MarkdownSettings settings)
2285 pure @safe {
2286     if (baseIndent.length == 0) {
2287         root.type = BlockType.Text;
2288     } else if (baseIndent[$ - 1] == IndentType.Quote) {
2289         root.type = BlockType.Quote;
2290     }
2291 
2292     while (!lines.empty) {
2293         auto ln = lines.front;
2294 
2295         if (ln.type == LineType.Blank) {
2296             lines.popFront();
2297             continue;
2298         }
2299 
2300         if (ln.indent != baseIndent) {
2301             if (ln.indent.length < baseIndent.length ||
2302                 ln.indent[0 .. baseIndent.length] != baseIndent) {
2303                 return;
2304             }
2305 
2306             auto cindent = baseIndent ~ IndentType.White;
2307             if (ln.indent == cindent) {
2308                 Block cblock;
2309                 cblock.type = BlockType.Code;
2310                 while (!lines.empty &&
2311                     lines.front.indent.length >= cindent.length &&
2312                     lines.front.indent[0 .. cindent.length] == cindent) {
2313                     cblock.text ~= lines.front.unindent(cindent.length);
2314                     lines.popFront();
2315                 }
2316                 root.blocks ~= cblock;
2317             } else {
2318                 Block subblock;
2319                 parseBlocks(subblock,
2320                     lines,
2321                     ln.indent[0 .. baseIndent.length + 1],
2322                     settings);
2323                 root.blocks ~= subblock;
2324             }
2325         } else {
2326             Block b;
2327             void processPlain() {
2328                 b.type = BlockType.Paragraph;
2329                 b.text = skipText(lines, baseIndent);
2330             }
2331 
2332             final switch (ln.type) {
2333             case LineType.Undefined:
2334                 assert(false);
2335             case LineType.Blank:
2336                 assert(false);
2337             case LineType.Plain:
2338                 if (lines.length >= 2 &&
2339                     lines[1].type == LineType.SetextHeader) {
2340                     auto setln = lines[1].unindented;
2341                     b.type = BlockType.Header;
2342                     b.text = [ln.unindented];
2343                     b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
2344                     lines.popFrontN(2);
2345                 } else {
2346                     processPlain();
2347                 }
2348                 break;
2349             case LineType.Hline:
2350                 b.type = BlockType.Plain;
2351                 b.text = ["<hr>"];
2352                 lines.popFront();
2353                 break;
2354             case LineType.AtxHeader:
2355                 b.type = BlockType.Header;
2356                 string hl = ln.unindented;
2357                 b.headerLevel = 0;
2358                 while (hl.length > 0 && hl[0] == '#') {
2359                     b.headerLevel++;
2360                     hl = hl[1 .. $];
2361                 }
2362                 while (hl.length > 0 && (hl[$ - 1] == '#' || hl[$ - 1] == ' '))
2363                     hl = hl[0 .. $ - 1];
2364                 b.text = [hl];
2365                 lines.popFront();
2366                 break;
2367             case LineType.SetextHeader:
2368                 lines.popFront();
2369                 break;
2370             case LineType.UList:
2371             case LineType.OList:
2372                 b.type = ln.type == LineType.UList ? BlockType.UList : BlockType.OList;
2373                 auto itemindent = baseIndent ~ IndentType.White;
2374                 bool firstItem = true, paraMode = false;
2375                 while (!lines.empty && lines.front.type == ln.type &&
2376                     lines.front.indent == baseIndent) {
2377                     Block itm;
2378                     itm.text = skipText(lines, itemindent);
2379                     itm.text[0] = removeListPrefix(itm.text[0], ln.type);
2380 
2381                     // emit <p></p> if there are blank lines between the items
2382                     if (firstItem && !lines.empty &&
2383                         lines.front.type == LineType.Blank) {
2384                         paraMode = true;
2385                     }
2386                     firstItem = false;
2387                     if (paraMode) {
2388                         Block para;
2389                         para.type = BlockType.Paragraph;
2390                         para.text = itm.text;
2391                         itm.blocks ~= para;
2392                         itm.text = null;
2393                     }
2394 
2395                     parseBlocks(itm, lines, itemindent, settings);
2396                     itm.type = BlockType.ListItem;
2397                     b.blocks ~= itm;
2398                 }
2399                 break;
2400             case LineType.HtmlBlock:
2401                 int nestlevel = 0;
2402                 auto starttag = parseHtmlBlockLine(ln.unindented);
2403                 if (!starttag.isHtmlBlock || !starttag.open)
2404                     break;
2405 
2406                 b.type = BlockType.Plain;
2407                 while (!lines.empty) {
2408                     if (lines.front.indent.length < baseIndent.length) {
2409                         break;
2410                     }
2411                     if (lines.front.indent[0 .. baseIndent.length] != baseIndent) {
2412                         break;
2413                     }
2414 
2415                     auto str = lines.front.unindent(baseIndent.length);
2416                     auto taginfo = parseHtmlBlockLine(str);
2417                     b.text ~= lines.front.unindent(baseIndent.length);
2418                     lines.popFront();
2419                     if (taginfo.isHtmlBlock
2420                         && taginfo.tagName == starttag.tagName) {
2421                         nestlevel += taginfo.open ? 1 : -1;
2422                     }
2423                     if (nestlevel <= 0) {
2424                         break;
2425                     }
2426                 }
2427                 break;
2428             case LineType.CodeBlockDelimiter:
2429                 lines.popFront(); // TODO: get language from line
2430                 b.type = BlockType.Code;
2431                 while (!lines.empty) {
2432                     if (lines.front.indent.length < baseIndent.length) {
2433                         break;
2434                     }
2435                     if (lines.front.indent[0 .. baseIndent.length] != baseIndent) {
2436                         break;
2437                     }
2438                     if (lines.front.type == LineType.CodeBlockDelimiter) {
2439                         lines.popFront();
2440                         break;
2441                     }
2442                     b.text ~= lines.front.unindent(baseIndent.length);
2443                     lines.popFront();
2444                 }
2445                 break;
2446             case LineType.Table:
2447                 lines.popFront();
2448                 // Can this be a valid table (is there a next line that could be a header separator)?
2449                 if (lines.empty) {
2450                     processPlain();
2451                     break;
2452                 }
2453                 Line lnNext = lines.front;
2454                 immutable bool isTableHeader = (
2455                     (lnNext.type == LineType.Table)
2456                         && (lnNext.text.indexOf(" -") >= 0)
2457                         && (lnNext.text.indexOf("- ") >= 0)
2458                         && lnNext.text.allOf("-:| ")
2459                 );
2460                 if (!isTableHeader) {
2461                     // Not a valid table header, so let's assume it's plain markdown
2462                     processPlain();
2463                     break;
2464                 }
2465                 b.type = BlockType.Table;
2466                 // Parse header
2467                 b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln);
2468                 // Parse table rows
2469                 lines.popFront();
2470                 while (!lines.empty) {
2471                     ln = lines.front;
2472                     if (ln.type != LineType.Table)
2473                         break; // not a table row, so let's assume it's the end of the table
2474                     b.blocks ~= splitTableRow(ln);
2475                     lines.popFront();
2476                 }
2477                 break;
2478             }
2479             root.blocks ~= b;
2480         }
2481     }
2482 }