1 /******************************************************************************* 2 * Markdown parser implementation. 3 * 4 * Copyright: (c) 2012-2019 RejectedSoftware e.K. and the D community 5 * License: Subject to the terms of the MIT license. 6 * Repository: https://github.com/dlang-community/dmarkdown 7 * 8 * This library was forked and modified in 2021 for the `hgen` project. 9 * hgen: https://gitlab.com/vindexbit/hgen 10 * Author: Eugene 'Vindex' Stulin <tech.vindex@gmail.com> 11 * 12 * MIT License (Expat version) 13 * 14 * Permission is hereby granted, free of charge, to any person obtaining a copy 15 * of this software and associated documentation files (the "Software"), to deal 16 * in the Software without restriction, including without limitation the rights 17 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 * copies of the Software, and to permit persons to whom the Software is 19 * furnished to do so, subject to the following conditions: 20 * 21 * The above copyright notice and this permission notice shall be included 22 * in all copies or substantial portions of the Software. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 module md; 34 35 import std.algorithm; 36 import std.array; 37 import std.ascii; 38 import std.conv; 39 import std.format; 40 import std.uni; 41 import std.utf; 42 import core.exception; 43 import std.range; 44 import std.string; 45 import std.stdio; 46 47 alias UrlFilterFn = string delegate(string urlOrPath, bool isImage); 48 alias ProcessCodeFn = string delegate(string) @safe nothrow; 49 50 class MarkdownHandler { 51 52 this(string markdownText) { 53 this.markdownText = markdownText; 54 } 55 56 void setUrlFilterFunction(UrlFilterFn filter) { 57 this.urlFilter = filter; 58 } 59 60 void setProcessCodeFunction(ProcessCodeFn process) { 61 this.processCode = process; 62 } 63 64 void enableAlternateSubheaders() { 65 this.flags |= MarkdownFlags.alternateSubheaders; 66 } 67 68 void disableUnderscoreEmphasis() { 69 this.flags |= MarkdownFlags.disableUnderscoreEmphasis; 70 } 71 72 string convertToHTML() { 73 auto allLines = std..string.splitLines(this.markdownText); 74 auto links = scanForReferences(allLines); 75 auto lines = this.parseLines(allLines); 76 Block rootBlock; 77 this.parseBlocks(rootBlock, lines, null); 78 auto dst = appender!string(); 79 this.writeBlock(dst, rootBlock, links); 80 return dst.data; 81 } 82 83 private: 84 85 string markdownText; 86 87 /// Controls the capabilities of the parser. 88 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 89 90 /// Heading tags will start at this level. 91 size_t headingBaseLevel = 1; 92 93 /// Called for every link/image URL to perform arbitrary transformations. 94 UrlFilterFn urlFilter; 95 96 /*************************************************************************** 97 * An optional delegate to post-process code blocks and inline code. 98 * Useful to e.g. add code highlighting. 99 */ 100 ProcessCodeFn processCode = null; 101 102 void determineIndent(ref Line lninfo) @safe { 103 auto ln = lninfo.text.idup; 104 while (ln.length > 0) { 105 if (ln[0] == '\t' ) { 106 lninfo.indent ~= IndentType.White; 107 ln.popFront(); 108 } else if (ln.startsWith(" ")) { 109 lninfo.indent ~= IndentType.White; 110 ln.popFrontN(4); 111 } else { 112 ln = ln.stripLeft(); 113 if (ln.startsWith(">")) { 114 lninfo.indent ~= IndentType.Quote; 115 ln.popFront(); 116 } else { 117 break; 118 } 119 } 120 } 121 lninfo.unindented = ln; 122 } 123 124 LineType determineType(string ln, lazy char subHeaderChar) 125 pure @safe { 126 alias MF = MarkdownFlags; 127 if ((flags & MF.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) { 128 return LineType.CodeBlockDelimiter; 129 } else if (isAtxHeaderLine(ln)) { 130 return LineType.AtxHeader; 131 } else if (isSetextHeaderLine(ln, subHeaderChar)) { 132 return LineType.SetextHeader; 133 } else if ((flags & MF.supportTables) && isTableRowLine!false(ln)) { 134 return LineType.Table; 135 } else if (isHlineLine(ln)) { 136 return LineType.Hline; 137 } else if (isOListLine(ln)) { 138 return LineType.OList; 139 } else if (isUListLine(ln)) { 140 return LineType.UList; 141 } else if (isLineBlank(ln)) { 142 return LineType.Blank; 143 } else if (!(flags & MF.noInlineHtml) && isHtmlBlockLine(ln)) { 144 return LineType.HtmlBlock; 145 } 146 return LineType.Plain; 147 } 148 149 Line[] parseLines(ref string[] lines) @safe { 150 Line[] ret; 151 char subHeaderChar = '-'; 152 if (this.flags & MarkdownFlags.alternateSubheaders) { 153 subHeaderChar = '*'; 154 } 155 while (!lines.empty) { 156 Line lninfo; 157 lninfo.text = lines.front; 158 lines.popFront(); 159 determineIndent(lninfo); 160 lninfo.type = determineType(lninfo.unindented, subHeaderChar); 161 ret ~= lninfo; 162 } 163 return ret; 164 } 165 166 void parseBlocks(ref Block root, 167 ref Line[] lines, 168 IndentType[] baseIndent) 169 pure @safe { 170 if (baseIndent.length == 0) { 171 root.type = BlockType.Text; 172 } else if (baseIndent[$-1] == IndentType.Quote) { 173 root.type = BlockType.Quote; 174 } 175 176 while (!lines.empty) { 177 auto ln = lines.front; 178 179 if (ln.type == LineType.Blank) { 180 lines.popFront(); 181 continue; 182 } 183 184 if (ln.indent != baseIndent) { 185 if (ln.indent.length < baseIndent.length || 186 ln.indent[0 .. baseIndent.length] != baseIndent) { 187 return; 188 } 189 190 auto cindent = baseIndent ~ IndentType.White; 191 if (ln.indent == cindent) { 192 Block cblock; 193 cblock.type = BlockType.Code; 194 while (!lines.empty && 195 lines.front.indent.length >= cindent.length && 196 lines.front.indent[0 .. cindent.length] == cindent) { 197 cblock.text ~= lines.front.unindent(cindent.length); 198 lines.popFront(); 199 } 200 root.blocks ~= cblock; 201 } else { 202 Block subblock; 203 this.parseBlocks(subblock, 204 lines, 205 ln.indent[0 .. baseIndent.length+1]); 206 root.blocks ~= subblock; 207 } 208 return; 209 } 210 // else 211 Block b; 212 void processPlain() { 213 b.type = BlockType.Paragraph; 214 b.text = skipText(lines, baseIndent); 215 } 216 final switch(ln.type) { 217 case LineType.Undefined: assert(false); 218 case LineType.Blank: assert(false); 219 case LineType.Plain: 220 if (lines.length >= 2 && 221 lines[1].type == LineType.SetextHeader) { 222 auto setln = lines[1].unindented; 223 b.type = BlockType.Header; 224 b.text = [ln.unindented]; 225 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 226 lines.popFrontN(2); 227 } else { 228 processPlain(); 229 } 230 break; 231 case LineType.Hline: 232 b.type = BlockType.Plain; 233 b.text = ["<hr>"]; 234 lines.popFront(); 235 break; 236 case LineType.AtxHeader: 237 b.type = BlockType.Header; 238 string hl = ln.unindented; 239 b.headerLevel = 0; 240 while (hl.length > 0 && hl[0] == '#') { 241 b.headerLevel++; 242 hl = hl[1 .. $]; 243 } 244 while (hl.length > 0 245 && (hl[$-1] == '#' || hl[$-1] == ' ')) { 246 hl = hl[0 .. $-1]; 247 } 248 b.text = [hl]; 249 lines.popFront(); 250 break; 251 case LineType.SetextHeader: 252 lines.popFront(); 253 break; 254 case LineType.UList: 255 case LineType.OList: 256 b.type = ln.type == LineType.UList ? BlockType.UList 257 : BlockType.OList; 258 auto itemindent = baseIndent ~ IndentType.White; 259 bool firstItem = true, paraMode = false; 260 while (!lines.empty && lines.front.type == ln.type 261 && lines.front.indent == baseIndent) { 262 Block itm; 263 itm.text = skipText(lines, itemindent); 264 itm.text[0] = removeListPrefix( 265 itm.text[0], ln.type 266 ); 267 268 // emit <p></p> if there are blank lines 269 // between the items 270 if (firstItem && !lines.empty 271 && lines.front.type == LineType.Blank) { 272 paraMode = true; 273 } 274 firstItem = false; 275 if (paraMode) { 276 Block para; 277 para.type = BlockType.Paragraph; 278 para.text = itm.text; 279 itm.blocks ~= para; 280 itm.text = null; 281 } 282 283 this.parseBlocks(itm, lines, itemindent); 284 itm.type = BlockType.ListItem; 285 b.blocks ~= itm; 286 } 287 break; 288 case LineType.HtmlBlock: 289 int nestlevel = 0; 290 auto starttag = parseHtmlBlockLine(ln.unindented); 291 if (!starttag.isHtmlBlock || !starttag.open) 292 break; 293 294 b.type = BlockType.Plain; 295 while (!lines.empty) { 296 auto frontIndLen = lines.front.indent.length; 297 auto baseIndLen = baseIndent.length; 298 if (frontIndLen < baseIndLen) { 299 break; 300 } 301 if (lines.front.indent[0 .. baseIndLen] != baseIndent) { 302 break; 303 } 304 305 auto str = lines.front.unindent(baseIndent.length); 306 auto taginfo = parseHtmlBlockLine(str); 307 b.text ~= lines.front.unindent(baseIndent.length); 308 lines.popFront(); 309 if (taginfo.isHtmlBlock 310 && taginfo.tagName == starttag.tagName) { 311 nestlevel += taginfo.open ? 1 : -1; 312 } 313 if (nestlevel <= 0) { 314 break; 315 } 316 } 317 break; 318 case LineType.CodeBlockDelimiter: 319 lines.popFront(); // TODO: get language from line 320 b.type = BlockType.Code; 321 while (!lines.empty) { 322 if (lines.front.indent.length < baseIndent.length ) { 323 break; 324 } 325 if (lines.front.indent[0 .. baseIndent.length] != baseIndent) { 326 break; 327 } 328 if (lines.front.type == LineType.CodeBlockDelimiter) { 329 lines.popFront(); 330 break; 331 } 332 b.text ~= lines.front.unindent(baseIndent.length); 333 lines.popFront(); 334 } 335 break; 336 case LineType.Table: 337 lines.popFront(); 338 // Can this be a valid table (is there a next line 339 // that could be a header separator)? 340 if (lines.empty) { 341 processPlain(); 342 break; 343 } 344 Line lnNext = lines.front; 345 immutable bool isTableHeader = ( 346 (lnNext.type == LineType.Table) 347 && (lnNext.text.indexOf(" -") >= 0) 348 && (lnNext.text.indexOf("- ") >= 0) 349 && lnNext.text.allOf("-:| ") 350 ); 351 if (!isTableHeader) { 352 // Not a valid table header, 353 // so let's assume it's plain markdown 354 processPlain(); 355 break; 356 } 357 b.type = BlockType.Table; 358 // Parse header 359 b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln); 360 // Parse table rows 361 lines.popFront(); 362 while (!lines.empty) { 363 ln = lines.front; 364 if (ln.type != LineType.Table) 365 break; // not a table row, so let's assume it's the end of the table 366 b.blocks ~= splitTableRow(ln); 367 lines.popFront(); 368 } 369 break; 370 } 371 root.blocks ~= b; 372 373 } 374 } 375 376 void writeBlock(R)(ref R dst, 377 ref const Block block, 378 LinkRef[string] links) { 379 final switch(block.type) { 380 case BlockType.Plain: 381 foreach (ln; block.text) { 382 dst.put(ln); 383 dst.put("\n"); 384 } 385 foreach(b; block.blocks) { 386 this.writeBlock(dst, b, links); 387 } 388 break; 389 case BlockType.Text: 390 writeMarkdownEscaped(dst, block, links); 391 foreach(b; block.blocks) { 392 this.writeBlock(dst, b, links); 393 } 394 break; 395 case BlockType.Paragraph: 396 assert(block.blocks.length == 0); 397 dst.put("<p>"); 398 writeMarkdownEscaped(dst, block, links); 399 dst.put("</p>\n"); 400 break; 401 case BlockType.Header: 402 assert(block.blocks.length == 0); 403 auto hlvl = block.headerLevel + this.headingBaseLevel-1; 404 dst.formattedWrite( 405 "<h%s id=\"%s\">", hlvl, block.text[0].asSlug 406 ); 407 assert(block.text.length == 1); 408 writeMarkdownEscaped(dst, block.text[0], links); 409 dst.formattedWrite("</h%s>\n", hlvl); 410 break; 411 case BlockType.OList: 412 dst.put("<ol>\n"); 413 foreach(b; block.blocks) { 414 this.writeBlock(dst, b, links); 415 } 416 dst.put("</ol>\n"); 417 break; 418 case BlockType.UList: 419 dst.put("<ul>\n"); 420 foreach(b; block.blocks) { 421 this.writeBlock(dst, b, links); 422 } 423 dst.put("</ul>\n"); 424 break; 425 case BlockType.ListItem: 426 dst.put("<li>"); 427 writeMarkdownEscaped(dst, block, links); 428 foreach(b; block.blocks) { 429 this.writeBlock(dst, b, links); 430 } 431 dst.put("</li>\n"); 432 break; 433 case BlockType.Code: 434 assert(block.blocks.length == 0); 435 dst.put("<pre class=\"prettyprint\"><code>"); 436 if (this.processCode is null) { 437 foreach (ln; block.text) { 438 filterHTMLEscape(dst, ln); 439 dst.put("\n"); 440 } 441 } else { 442 auto temp = appender!string(); 443 foreach(ln; block.text){ 444 filterHTMLEscape(temp, ln); 445 temp.put("\n"); 446 } 447 dst.put(this.processCode(temp.data)); 448 } 449 dst.put("</code></pre>"); 450 break; 451 case BlockType.Quote: 452 dst.put("<blockquote>"); 453 writeMarkdownEscaped(dst, block, links); 454 foreach(b; block.blocks) { 455 this.writeBlock(dst, b, links); 456 } 457 dst.put("</blockquote>\n"); 458 break; 459 case BlockType.Table: 460 assert(block.blocks.length > 0); 461 assert(block.blocks[0].type == BlockType.TableRow); 462 dst.put("<table>\n<tr>"); 463 foreach (b; block.blocks[0].blocks) { 464 assert(b.type == BlockType.TableHeader); 465 dst.put("<th>"); 466 writeMarkdownEscaped(dst, b.text[0], links); 467 dst.put("</th>"); 468 } 469 dst.put("</tr>\n"); 470 if (block.blocks.length > 1) { 471 foreach(row; block.blocks[1 .. $]) { 472 assert(row.type == BlockType.TableRow); 473 dst.put("<tr>"); 474 foreach(b; row.blocks) { 475 assert(b.type == BlockType.TableData); 476 dst.put("<td>"); 477 writeMarkdownEscaped(dst, b.text[0], links); 478 dst.put("</td>"); 479 } 480 dst.put("</tr>\n"); 481 } 482 } 483 dst.put("</table>\n"); 484 break; 485 case BlockType.TableRow: 486 case BlockType.TableData: 487 case BlockType.TableHeader: 488 assert(0); 489 } 490 } 491 492 void writeMarkdownEscaped(R)(ref R dst, 493 ref const Block block, 494 in LinkRef[string] links) { 495 auto lines = cast(string[])block.text; 496 auto text = this.flags & MarkdownFlags.keepLineBreaks 497 ? lines.join("<br>") : lines.join("\n"); 498 writeMarkdownEscaped(dst, text, links); 499 if (lines.length) dst.put("\n"); 500 } 501 502 503 void writeMarkdownEscaped(R)(ref R dst, 504 string ln, 505 in LinkRef[string] linkrefs) { 506 string filterLink(string lnk, bool isImage) { 507 return this.urlFilter ? this.urlFilter(lnk, isImage) : lnk; 508 } 509 510 bool br = ln.endsWith(" "); 511 while (ln.length > 0) { 512 switch (ln[0]) { 513 default: 514 dst.put(ln[0]); 515 ln = ln[1 .. $]; 516 break; 517 case '\\': 518 if (ln.length >= 2 ){ 519 switch(ln[1]){ 520 default: 521 dst.put(ln[0 .. 2]); 522 ln = ln[2 .. $]; 523 break; 524 case '\'', '`', '*', '_', '{', '}', '[', ']', 525 '(', ')', '#', '+', '-', '.', '!': 526 dst.put(ln[1]); 527 ln = ln[2 .. $]; 528 break; 529 } 530 } else { 531 dst.put(ln[0]); 532 ln = ln[1 .. $]; 533 } 534 break; 535 case '_': 536 if (this.flags & MarkdownFlags.disableUnderscoreEmphasis) { 537 dst.put(ln[0]); 538 ln = ln[1 .. $]; 539 break; 540 } 541 goto case; 542 case '*': 543 string text; 544 if (auto em = parseEmphasis(ln, text)) { 545 if (em == 1) { 546 dst.put("<em>"); 547 } else if (em == 2) { 548 dst.put("<strong>"); 549 } else { 550 dst.put("<strong><em>"); 551 } 552 filterHTMLEscape( 553 dst, text, HTMLEscapeFlags.escapeMinimal 554 ); 555 if (em == 1) { 556 dst.put("</em>"); 557 } else if (em == 2) { 558 dst.put("</strong>"); 559 } else { 560 dst.put("</strong></em>"); 561 } 562 } else { 563 dst.put(ln[0]); 564 ln = ln[1 .. $]; 565 } 566 break; 567 case '`': 568 string code; 569 if (parseInlineCode(ln, code)) { 570 dst.put("<code class=\"prettyprint\">"); 571 if (this.processCode is null) { 572 filterHTMLEscape( 573 dst, code, HTMLEscapeFlags.escapeMinimal 574 ); 575 } else { 576 auto temp = appender!string(); 577 filterHTMLEscape( 578 temp, code, HTMLEscapeFlags.escapeMinimal 579 ); 580 dst.put(this.processCode(temp.data)); 581 } 582 dst.put("</code>"); 583 } else { 584 dst.put(ln[0]); 585 ln = ln[1 .. $]; 586 } 587 break; 588 case '[': 589 Link link; 590 if (parseLink(ln, link, linkrefs)) { 591 dst.put("<a href=\""); 592 filterHTMLAttribEscape(dst, filterLink(link.url, false)); 593 dst.put("\""); 594 if (link.title.length ){ 595 dst.put(" title=\""); 596 filterHTMLAttribEscape(dst, link.title); 597 dst.put("\""); 598 } 599 dst.put(">"); 600 writeMarkdownEscaped(dst, link.text, linkrefs); 601 dst.put("</a>"); 602 } else { 603 dst.put(ln[0]); 604 ln = ln[1 .. $]; 605 } 606 break; 607 case '!': 608 Link link; 609 if (parseLink(ln, link, linkrefs)) { 610 dst.put("<img src=\""); 611 filterHTMLAttribEscape(dst, filterLink(link.url, true)); 612 dst.put("\" alt=\""); 613 filterHTMLAttribEscape(dst, link.text); 614 dst.put("\""); 615 if (link.title.length ){ 616 dst.put(" title=\""); 617 filterHTMLAttribEscape(dst, link.title); 618 dst.put("\""); 619 } 620 dst.put(">"); 621 } else if (ln.length >= 2) { 622 dst.put(ln[0 .. 2]); 623 ln = ln[2 .. $]; 624 } else { 625 dst.put(ln[0]); 626 ln = ln[1 .. $]; 627 } 628 break; 629 case '>': 630 if (this.flags & MarkdownFlags.noInlineHtml) { 631 dst.put(">"); 632 } 633 else dst.put(ln[0]); 634 ln = ln[1 .. $]; 635 break; 636 case '<': 637 string url; 638 if (parseAutoLink(ln, url)) { 639 bool isEmail = url.startsWith("mailto:"); 640 dst.put("<a href=\""); 641 if (isEmail) { 642 filterHTMLAllEscape(dst, url); 643 } else { 644 filterHTMLAttribEscape(dst, filterLink(url, false)); 645 } 646 dst.put("\">"); 647 if (isEmail) { 648 filterHTMLAllEscape(dst, url[7 .. $]); 649 } else { 650 filterHTMLEscape( 651 dst, url, HTMLEscapeFlags.escapeMinimal 652 ); 653 } 654 dst.put("</a>"); 655 } else { 656 if (ln.startsWith("<br>")) { 657 // always support line breaks, 658 // since we embed them here ourselves! 659 dst.put("<br/>"); 660 ln = ln[4 .. $]; 661 } else if (ln.startsWith("<br/>")) { 662 dst.put("<br/>"); 663 ln = ln[5 .. $]; 664 } else { 665 if (this.flags & MarkdownFlags.noInlineHtml) { 666 dst.put("<"); 667 } else { 668 dst.put(ln[0]); 669 } 670 ln = ln[1 .. $]; 671 } 672 } 673 break; 674 } 675 } 676 if (br) { 677 dst.put("<br/>"); 678 } 679 } 680 } 681 682 683 enum MarkdownFlags { 684 none = 0, 685 keepLineBreaks = 1<<0, 686 backtickCodeBlocks = 1<<1, 687 noInlineHtml = 1<<2, 688 //noLinks = 1<<3, 689 //allowUnsafeHtml = 1<<4, 690 /// If used, subheadings are underlined by stars ('*') instead of dashes ('-') 691 alternateSubheaders = 1 << 5, 692 /// If used, '_' may not be used for emphasis ('*' may still be used) 693 disableUnderscoreEmphasis = 1 << 6, 694 supportTables = 1 << 7, 695 vanillaMarkdown = none, 696 forumDefault = keepLineBreaks | backtickCodeBlocks | noInlineHtml, 697 githubInspired = backtickCodeBlocks | supportTables, 698 } 699 700 701 702 unittest { 703 auto text = 704 `======= 705 Heading 706 ======= 707 708 **bold** *italic* 709 710 List: 711 712 * a 713 * b 714 * c 715 `; 716 717 writeln("==========="); 718 writeln(text); 719 writeln("==========="); 720 writeln(convertMarkdownToHTML(text)); 721 } 722 723 unittest { 724 auto source = 725 `Merged prototype. 726 The prototype is not locked, allowing to add more components. 727 To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().`; 728 auto expected = 729 `<p>Merged prototype. 730 The prototype is not locked, allowing to add more components. 731 To be used it must be locked by calling EntityPrototype.lockAndTrimMemory(). 732 </p> 733 `; 734 string result = convertMarkdownToHTML(source); 735 assert(result == expected); 736 } 737 738 739 unittest { 740 auto source = `*stars* under_score_s`; 741 auto expectedUnderscores = `<p><em>stars</em> under<em>score</em>s 742 </p> 743 `; 744 auto expectedNoUnderscores = `<p><em>stars</em> under_score_s 745 </p> 746 `; 747 748 string resultUnderscores = convertMarkdownToHTML(source); 749 string resultNoUnderscores = convertMarkdownToHTML( 750 source, MarkdownFlags.disableUnderscoreEmphasis 751 ); 752 753 assert( 754 resultUnderscores == expectedUnderscores, 755 "'%s' != '%s'".format(resultUnderscores, expectedUnderscores) 756 ); 757 assert( 758 resultNoUnderscores == expectedNoUnderscores, 759 "'%s' != '%s'".format(resultNoUnderscores, expectedNoUnderscores) 760 ); 761 } 762 763 764 // Unittest for code post-processing 765 unittest { 766 auto text = 767 "`inline code`" ~ ` 768 block: 769 770 code block 771 `; 772 auto expected = 773 `<p><code class="prettyprint">AAAAAAAAAAA</code> 774 block: 775 </p> 776 <pre class="prettyprint"><code>AAAAAAAAAA</code></pre>`; 777 778 string processCode(string input) @safe nothrow { 779 import std.exception: assumeWontThrow; 780 // ignore newlines generated by code block processing 781 input = input.filter!(c => c != '\n').array.to!string.assumeWontThrow; 782 return 'A'.repeat(input.length).array.to!string.assumeWontThrow; 783 } 784 auto settings = new MarkdownSettings; 785 settings.processCode = &processCode; 786 auto result = convertMarkdownToHTML(text, settings); 787 788 auto err = format!"Unexpected code processing result:\n%s\nExpected:\n%s"( 789 result, expected 790 ); 791 assert(result == expected, err); 792 } 793 794 795 796 struct Section { 797 size_t headingLevel; 798 string caption; 799 string anchor; 800 Section[] subSections; 801 } 802 803 private { 804 immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; 805 } 806 807 808 private enum IndentType { 809 White, 810 Quote 811 } 812 813 814 private enum LineType { 815 Undefined, 816 Blank, 817 Plain, 818 Hline, 819 AtxHeader, 820 SetextHeader, 821 UList, 822 OList, 823 HtmlBlock, 824 CodeBlockDelimiter, 825 Table, 826 } 827 828 private struct Line { 829 LineType type; 830 IndentType[] indent; 831 string text; 832 string unindented; 833 834 string unindent(size_t n) pure @safe { 835 assert(n <= indent.length); 836 string ln = text; 837 foreach (i; 0 .. n) { 838 final switch(indent[i]) { 839 case IndentType.White: 840 ln = (ln[0] == ' ') ? ln[4 .. $] : ln[1 .. $]; 841 break; 842 case IndentType.Quote: 843 ln = ln.stripLeft()[1 .. $]; 844 break; 845 } 846 } 847 return ln; 848 } 849 } 850 851 852 private enum BlockType { 853 Plain, 854 Text, 855 Paragraph, 856 Header, 857 OList, 858 UList, 859 ListItem, 860 Code, 861 Quote, 862 Table, 863 TableRow, 864 TableHeader, 865 TableData, 866 } 867 868 869 private struct Block { 870 BlockType type; 871 string[] text; 872 Block[] blocks; 873 size_t headerLevel; 874 875 // A human-readable toString for debugging. 876 string toString() { 877 return toStringNested; 878 } 879 880 // toString implementation; capable of indenting nested blocks. 881 string toStringNested(uint depth = 0) { 882 string indent = " ".repeat(depth * 2).joiner.array.to!string; 883 return indent ~ "%s\n".format(type) 884 ~ indent ~ "%s\n".format(text) 885 ~ blocks.map!((ref b) => b.toStringNested(depth + 1)) 886 .joiner.array.to!string 887 ~ indent ~ "%s\n".format(headerLevel); 888 } 889 } 890 891 892 private string[] skipText(ref Line[] lines, IndentType[] indent) 893 pure @safe { 894 static bool matchesIndent(IndentType[] indent, IndentType[] baseIndent) { 895 // Any *plain* line with a higher indent should still be a part of 896 // a paragraph read by skipText(). Returning false here resulted in 897 // text such as: 898 // --- 899 // First line 900 // Second line 901 // --- 902 // being interpreted as a paragraph followed by a code block, even though 903 // other Markdown processors would interpret it as a single paragraph. 904 905 // if (indent.length > baseIndent.length ) return false; 906 if (indent.length > baseIndent.length ) return true; 907 if (indent != baseIndent[0 .. indent.length] ) return false; 908 sizediff_t qidx = -1; 909 foreach_reverse (i, tp; baseIndent) { 910 if (tp == IndentType.Quote) { 911 qidx = i; 912 break; 913 } 914 } 915 if (qidx >= 0) { 916 qidx = baseIndent.length - 1 - qidx; 917 if (indent.length <= qidx) { 918 return false; 919 } 920 } 921 return true; 922 } 923 924 string[] ret; 925 926 while (true) { 927 ret ~= lines.front.unindent( 928 min(indent.length, lines.front.indent.length) 929 ); 930 lines.popFront(); 931 932 if (lines.empty || !matchesIndent(lines.front.indent, indent) 933 || lines.front.type != LineType.Plain) { 934 return ret; 935 } 936 } 937 } 938 939 940 private Block splitTableRow(BlockType dataType = BlockType.TableData)(Line line) 941 pure @safe { 942 static assert( 943 dataType == BlockType.TableHeader || dataType == BlockType.TableData 944 ); 945 946 string ln = line.text.strip(); 947 immutable size_t b = (ln[0..2] == "| ") ? 2 : 0; 948 immutable size_t e = (ln[($ - 2) .. $] == " |") ? (ln.length - 2) 949 : ln.length; 950 Block ret; 951 ret.type = BlockType.TableRow; 952 foreach (txt; ln[b .. e].split(" | ")) { 953 Block d; 954 d.text = [txt.strip(" ")]; 955 d.type = dataType; 956 ret.blocks ~= d; 957 } 958 return ret; 959 } 960 961 962 private void writeBlock(R)(ref R dst, 963 ref const Block block, 964 LinkRef[string] links, 965 scope MarkdownSettings settings) { 966 final switch(block.type) { 967 case BlockType.Plain: 968 foreach (ln; block.text) { 969 dst.put(ln); 970 dst.put("\n"); 971 } 972 foreach(b; block.blocks) { 973 writeBlock(dst, b, links, settings); 974 } 975 break; 976 case BlockType.Text: 977 writeMarkdownEscaped(dst, block, links, settings); 978 foreach(b; block.blocks) { 979 writeBlock(dst, b, links, settings); 980 } 981 break; 982 case BlockType.Paragraph: 983 assert(block.blocks.length == 0); 984 dst.put("<p>"); 985 writeMarkdownEscaped(dst, block, links, settings); 986 dst.put("</p>\n"); 987 break; 988 case BlockType.Header: 989 assert(block.blocks.length == 0); 990 auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0); 991 dst.formattedWrite("<h%s id=\"%s\">", hlvl, block.text[0].asSlug); 992 assert(block.text.length == 1); 993 writeMarkdownEscaped(dst, block.text[0], links, settings); 994 dst.formattedWrite("</h%s>\n", hlvl); 995 break; 996 case BlockType.OList: 997 dst.put("<ol>\n"); 998 foreach(b; block.blocks) { 999 writeBlock(dst, b, links, settings); 1000 } 1001 dst.put("</ol>\n"); 1002 break; 1003 case BlockType.UList: 1004 dst.put("<ul>\n"); 1005 foreach(b; block.blocks) { 1006 writeBlock(dst, b, links, settings); 1007 } 1008 dst.put("</ul>\n"); 1009 break; 1010 case BlockType.ListItem: 1011 dst.put("<li>"); 1012 writeMarkdownEscaped(dst, block, links, settings); 1013 foreach(b; block.blocks) { 1014 writeBlock(dst, b, links, settings); 1015 } 1016 dst.put("</li>\n"); 1017 break; 1018 case BlockType.Code: 1019 assert(block.blocks.length == 0); 1020 dst.put("<pre class=\"prettyprint\"><code>"); 1021 if (settings.processCode is null) { 1022 foreach (ln; block.text) { 1023 filterHTMLEscape(dst, ln); 1024 dst.put("\n"); 1025 } 1026 } else { 1027 auto temp = appender!string(); 1028 foreach(ln; block.text){ 1029 filterHTMLEscape(temp, ln); 1030 temp.put("\n"); 1031 } 1032 dst.put(settings.processCode(temp.data)); 1033 } 1034 dst.put("</code></pre>"); 1035 break; 1036 case BlockType.Quote: 1037 dst.put("<blockquote>"); 1038 writeMarkdownEscaped(dst, block, links, settings); 1039 foreach(b; block.blocks) 1040 writeBlock(dst, b, links, settings); 1041 dst.put("</blockquote>\n"); 1042 break; 1043 case BlockType.Table: 1044 assert(block.blocks.length > 0); 1045 assert(block.blocks[0].type == BlockType.TableRow); 1046 dst.put("<table>\n<tr>"); 1047 foreach (b; block.blocks[0].blocks) { 1048 assert(b.type == BlockType.TableHeader); 1049 dst.put("<th>"); 1050 writeMarkdownEscaped(dst, b.text[0], links, settings); 1051 dst.put("</th>"); 1052 } 1053 dst.put("</tr>\n"); 1054 if (block.blocks.length > 1) { 1055 foreach(row; block.blocks[1 .. $]) { 1056 assert(row.type == BlockType.TableRow); 1057 dst.put("<tr>"); 1058 foreach(b; row.blocks) { 1059 assert(b.type == BlockType.TableData); 1060 dst.put("<td>"); 1061 writeMarkdownEscaped(dst, b.text[0], links, settings); 1062 dst.put("</td>"); 1063 } 1064 dst.put("</tr>\n"); 1065 } 1066 } 1067 dst.put("</table>\n"); 1068 break; 1069 case BlockType.TableRow: 1070 case BlockType.TableData: 1071 case BlockType.TableHeader: 1072 assert(0); 1073 } 1074 } 1075 1076 1077 private void writeMarkdownEscaped(R)(ref R dst, 1078 ref const Block block, 1079 in LinkRef[string] links, 1080 scope MarkdownSettings settings) { 1081 auto lines = cast(string[])block.text; 1082 auto text = settings.flags & MarkdownFlags.keepLineBreaks 1083 ? lines.join("<br>") : lines.join("\n"); 1084 writeMarkdownEscaped(dst, text, links, settings); 1085 if (lines.length) { 1086 dst.put("\n"); 1087 } 1088 } 1089 1090 1091 private void writeMarkdownEscaped(R)(ref R dst, 1092 string ln, 1093 in LinkRef[string] linkrefs, 1094 scope MarkdownSettings settings) { 1095 string filterLink(string lnk, bool isImage) { 1096 return settings.urlFilter ? settings.urlFilter(lnk, isImage) : lnk; 1097 } 1098 1099 bool br = ln.endsWith(" "); 1100 while (ln.length > 0) { 1101 switch (ln[0]) { 1102 default: 1103 dst.put(ln[0]); 1104 ln = ln[1 .. $]; 1105 break; 1106 case '\\': 1107 if (ln.length >= 2 ){ 1108 switch(ln[1]){ 1109 default: 1110 dst.put(ln[0 .. 2]); 1111 ln = ln[2 .. $]; 1112 break; 1113 case '\'', '`', '*', '_', '{', '}', '[', ']', 1114 '(', ')', '#', '+', '-', '.', '!': 1115 dst.put(ln[1]); 1116 ln = ln[2 .. $]; 1117 break; 1118 } 1119 } else { 1120 dst.put(ln[0]); 1121 ln = ln[1 .. $]; 1122 } 1123 break; 1124 case '_': 1125 if(settings.flags & MarkdownFlags.disableUnderscoreEmphasis) 1126 { 1127 dst.put(ln[0]); 1128 ln = ln[1 .. $]; 1129 break; 1130 } 1131 goto case; 1132 case '*': 1133 string text; 1134 if (auto em = parseEmphasis(ln, text)){ 1135 dst.put(em == 1 ? "<em>" 1136 : em == 2 ? "<strong>" : "<strong><em>"); 1137 filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal); 1138 dst.put(em == 1 ? "</em>" 1139 : em == 2 ? "</strong>": "</em></strong>"); 1140 } else { 1141 dst.put(ln[0]); 1142 ln = ln[1 .. $]; 1143 } 1144 break; 1145 case '`': 1146 string code; 1147 if (parseInlineCode(ln, code)) { 1148 dst.put("<code class=\"prettyprint\">"); 1149 if (settings.processCode is null) { 1150 filterHTMLEscape( 1151 dst, code, HTMLEscapeFlags.escapeMinimal 1152 ); 1153 } else { 1154 auto temp = appender!string(); 1155 filterHTMLEscape( 1156 temp, code, HTMLEscapeFlags.escapeMinimal 1157 ); 1158 dst.put(settings.processCode(temp.data)); 1159 } 1160 dst.put("</code>"); 1161 } else { 1162 dst.put(ln[0]); 1163 ln = ln[1 .. $]; 1164 } 1165 break; 1166 case '[': 1167 Link link; 1168 if (parseLink(ln, link, linkrefs)) { 1169 dst.put("<a href=\""); 1170 filterHTMLAttribEscape(dst, filterLink(link.url, false)); 1171 dst.put("\""); 1172 if (link.title.length ){ 1173 dst.put(" title=\""); 1174 filterHTMLAttribEscape(dst, link.title); 1175 dst.put("\""); 1176 } 1177 dst.put(">"); 1178 writeMarkdownEscaped(dst, link.text, linkrefs, settings); 1179 dst.put("</a>"); 1180 } else { 1181 dst.put(ln[0]); 1182 ln = ln[1 .. $]; 1183 } 1184 break; 1185 case '!': 1186 Link link; 1187 if (parseLink(ln, link, linkrefs)) { 1188 dst.put("<img src=\""); 1189 filterHTMLAttribEscape(dst, filterLink(link.url, true)); 1190 dst.put("\" alt=\""); 1191 filterHTMLAttribEscape(dst, link.text); 1192 dst.put("\""); 1193 if (link.title.length ){ 1194 dst.put(" title=\""); 1195 filterHTMLAttribEscape(dst, link.title); 1196 dst.put("\""); 1197 } 1198 dst.put(">"); 1199 } else if (ln.length >= 2) { 1200 dst.put(ln[0 .. 2]); 1201 ln = ln[2 .. $]; 1202 } else { 1203 dst.put(ln[0]); 1204 ln = ln[1 .. $]; 1205 } 1206 break; 1207 case '>': 1208 if (settings.flags & MarkdownFlags.noInlineHtml) { 1209 dst.put(">"); 1210 } 1211 else dst.put(ln[0]); 1212 ln = ln[1 .. $]; 1213 break; 1214 case '<': 1215 string url; 1216 if (parseAutoLink(ln, url)) { 1217 bool isEmail = url.startsWith("mailto:"); 1218 dst.put("<a href=\""); 1219 if (isEmail) { 1220 filterHTMLAllEscape(dst, url); 1221 } else { 1222 filterHTMLAttribEscape(dst, filterLink(url, false)); 1223 } 1224 dst.put("\">"); 1225 if (isEmail) { 1226 filterHTMLAllEscape(dst, url[7 .. $]); 1227 } else { 1228 filterHTMLEscape( 1229 dst, url, HTMLEscapeFlags.escapeMinimal 1230 ); 1231 } 1232 dst.put("</a>"); 1233 } else { 1234 if (ln.startsWith("<br>")) { 1235 // always support line breaks, 1236 // since we embed them here ourselves! 1237 dst.put("<br/>"); 1238 ln = ln[4 .. $]; 1239 } else if (ln.startsWith("<br/>")) { 1240 dst.put("<br/>"); 1241 ln = ln[5 .. $]; 1242 } else { 1243 if (settings.flags & MarkdownFlags.noInlineHtml) { 1244 dst.put("<"); 1245 } else { 1246 dst.put(ln[0]); 1247 } 1248 ln = ln[1 .. $]; 1249 } 1250 } 1251 break; 1252 } 1253 } 1254 if (br) { 1255 dst.put("<br/>"); 1256 } 1257 } 1258 1259 1260 private bool isLineBlank(string ln) 1261 pure @safe { 1262 return allOf(ln, " \t"); 1263 } 1264 1265 1266 private bool isSetextHeaderLine(string ln, char subHeaderChar) pure @safe { 1267 ln = stripLeft(ln); 1268 if (ln.length < 1) { 1269 return false; 1270 } 1271 if (ln[0] == '=') { 1272 while (!ln.empty && ln.front == '=') { 1273 ln.popFront(); 1274 } 1275 return allOf(ln, " \t"); 1276 } 1277 if (ln[0] == subHeaderChar ){ 1278 while(!ln.empty && ln.front == subHeaderChar) { 1279 ln.popFront(); 1280 } 1281 return allOf(ln, " \t"); 1282 } 1283 return false; 1284 } 1285 1286 1287 private bool isAtxHeaderLine(string ln) pure @safe { 1288 ln = stripLeft(ln); 1289 size_t i = 0; 1290 while ( i < ln.length && ln[i] == '#') { 1291 i++; 1292 } 1293 if (i < 1 || i > 6 || i >= ln.length) { 1294 return false; 1295 } 1296 return ln[i] == ' '; 1297 } 1298 1299 1300 private bool isHlineLine(string ln) pure @safe { 1301 if (allOf(ln, " -") && count(ln, '-') >= 3) return true; 1302 if (allOf(ln, " *") && count(ln, '*') >= 3) return true; 1303 if (allOf(ln, " _") && count(ln, '_') >= 3) return true; 1304 return false; 1305 } 1306 1307 1308 private bool isQuoteLine(string ln) pure @safe { 1309 return ln.stripLeft().startsWith(">"); 1310 } 1311 1312 1313 private size_t getQuoteLevel(string ln) pure @safe { 1314 size_t level = 0; 1315 ln = stripLeft(ln); 1316 while (ln.length > 0 && ln[0] == '>') { 1317 level++; 1318 ln = stripLeft(ln[1 .. $]); 1319 } 1320 return level; 1321 } 1322 1323 1324 private bool isUListLine(string ln) pure @safe { 1325 ln = stripLeft(ln); 1326 if (ln.length < 2) return false; 1327 if (!canFind("*+-", ln[0])) return false; 1328 if (ln[1] != ' ' && ln[1] != '\t') return false; 1329 return true; 1330 } 1331 1332 1333 private bool isOListLine(string ln) pure @safe { 1334 ln = stripLeft(ln); 1335 if (ln.length < 1) { 1336 return false; 1337 } 1338 if (ln[0] < '0' || ln[0] > '9' ) { 1339 return false; 1340 } 1341 ln = ln[1 .. $]; 1342 while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') { 1343 ln = ln[1 .. $]; 1344 } 1345 if (ln.length < 2) { 1346 return false; 1347 } 1348 if (ln[0] != '.') { 1349 return false; 1350 } 1351 if (ln[1] != ' ' && ln[1] != '\t') { 1352 return false; 1353 } 1354 return true; 1355 } 1356 1357 1358 private bool isTableRowLine(bool proper = false)(string ln) pure @safe { 1359 static if (proper) { 1360 return ( 1361 (ln.indexOf(" | ") >= 0) 1362 && !ln.isOListLine 1363 && !ln.isUListLine 1364 && !ln.isAtxHeaderLine 1365 ); 1366 } else { 1367 return (ln.indexOf(" | ") >= 0); 1368 } 1369 } 1370 1371 1372 private string removeListPrefix(string str, LineType tp) pure @safe { 1373 switch(tp){ 1374 default: assert(false); 1375 case LineType.OList: // skip bullets and output using normal escaping 1376 auto idx = str.indexOfCT('.'); 1377 assert(idx > 0); 1378 return str[idx+1 .. $].stripLeft(); 1379 case LineType.UList: 1380 return stripLeft(str.stripLeft()[1 .. $]); 1381 } 1382 } 1383 1384 1385 private auto parseHtmlBlockLine(string ln) pure @safe { 1386 struct HtmlBlockInfo { 1387 bool isHtmlBlock; 1388 string tagName; 1389 bool open; 1390 } 1391 1392 HtmlBlockInfo ret; 1393 ret.isHtmlBlock = false; 1394 ret.open = true; 1395 1396 ln = strip(ln); 1397 if (ln.length < 3) return ret; 1398 if (ln[0] != '<') return ret; 1399 if (ln[1] == '/') { 1400 ret.open = false; 1401 ln = ln[1 .. $]; 1402 } 1403 if (!std.ascii.isAlpha(ln[1])) { 1404 return ret; 1405 } 1406 ln = ln[1 .. $]; 1407 size_t idx = 0; 1408 while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>') 1409 idx++; 1410 ret.tagName = ln[0 .. idx]; 1411 ln = ln[idx .. $]; 1412 1413 auto eidx = ln.indexOf('>'); 1414 if (eidx < 0) return ret; 1415 if (eidx != ln.length - 1) return ret; 1416 1417 if (!s_blockTags.canFind(ret.tagName)) return ret; 1418 1419 ret.isHtmlBlock = true; 1420 return ret; 1421 } 1422 1423 1424 private bool isHtmlBlockLine(string ln) pure @safe { 1425 auto bi = parseHtmlBlockLine(ln); 1426 return bi.isHtmlBlock && bi.open; 1427 } 1428 1429 1430 private bool isHtmlBlockCloseLine(string ln) pure @safe { 1431 auto bi = parseHtmlBlockLine(ln); 1432 return bi.isHtmlBlock && !bi.open; 1433 } 1434 1435 1436 private bool isCodeBlockDelimiter(string ln) pure @safe { 1437 return ln.startsWith("```"); 1438 } 1439 1440 1441 // private string getHtmlTagName(string ln) pure @safe { 1442 // return parseHtmlBlockLine(ln).tagName; 1443 // } 1444 1445 1446 private bool isLineIndented(string ln) pure @safe { 1447 return ln.startsWith("\t") || ln.startsWith(" "); 1448 } 1449 1450 1451 // private string unindentLine(string ln) pure @safe { 1452 // if (ln.startsWith("\t")) return ln[1 .. $]; 1453 // if (ln.startsWith(" ")) return ln[4 .. $]; 1454 // assert(false); 1455 // } 1456 1457 1458 private int parseEmphasis(ref string str, ref string text) pure @safe { 1459 string pstr = str; 1460 if (pstr.length < 3 ) return false; 1461 1462 string ctag; 1463 if (pstr.startsWith("***")) ctag = "***"; 1464 else if (pstr.startsWith("**")) ctag = "**"; 1465 else if (pstr.startsWith("*")) ctag = "*"; 1466 else if (pstr.startsWith("___")) ctag = "___"; 1467 else if (pstr.startsWith("__")) ctag = "__"; 1468 else if (pstr.startsWith("_")) ctag = "_"; 1469 else return false; 1470 1471 pstr = pstr[ctag.length .. $]; 1472 1473 auto cidx = () @trusted { 1474 return pstr.indexOf(ctag); }(); 1475 if (cidx < 1 ) return false; 1476 1477 text = pstr[0 .. cidx]; 1478 1479 str = pstr[cidx+ctag.length .. $]; 1480 return cast(int)ctag.length; 1481 } 1482 1483 1484 private bool parseInlineCode(ref string str, ref string code) pure @safe { 1485 string pstr = str; 1486 if (pstr.length < 3) return false; 1487 string ctag; 1488 if (pstr.startsWith("``")) ctag = "``"; 1489 else if (pstr.startsWith("`")) ctag = "`"; 1490 else return false; 1491 pstr = pstr[ctag.length .. $]; 1492 1493 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1494 if (cidx < 1 ) return false; 1495 1496 code = pstr[0 .. cidx]; 1497 str = pstr[cidx+ctag.length .. $]; 1498 return true; 1499 } 1500 1501 1502 private bool parseLink(ref string str, 1503 ref Link dst, 1504 in LinkRef[string] linkrefs) 1505 pure @safe { 1506 string pstr = str; 1507 if (pstr.length < 3) return false; 1508 // ignore img-link prefix 1509 if (pstr[0] == '!') pstr = pstr[1 .. $]; 1510 1511 // parse the text part [text] 1512 if (pstr[0] != '[' ) return false; 1513 auto cidx = pstr.matchBracket(); 1514 if (cidx < 1) return false; 1515 string refid; 1516 dst.text = pstr[1 .. cidx]; 1517 pstr = pstr[cidx+1 .. $]; 1518 1519 // parse either (link '['"title"']') or '[' ']'[refid] 1520 if (pstr.length < 2) return false; 1521 if (pstr[0] == '(') { 1522 cidx = pstr.matchBracket(); 1523 if (cidx < 1 ) return false; 1524 auto inner = pstr[1 .. cidx]; 1525 immutable qidx = inner.indexOfCT('"'); 1526 if (qidx > 1 && std.ascii.isWhite(inner[qidx - 1])) { 1527 dst.url = inner[0 .. qidx].stripRight(); 1528 immutable len = inner[qidx .. $].lastIndexOf('"'); 1529 if (len == 0 ) return false; 1530 assert(len > 0); 1531 dst.title = inner[qidx + 1 .. qidx + len]; 1532 } else { 1533 dst.url = inner.stripRight(); 1534 dst.title = null; 1535 } 1536 if (dst.url.startsWith("<") && dst.url.endsWith(">")) 1537 dst.url = dst.url[1 .. $-1]; 1538 pstr = pstr[cidx+1 .. $]; 1539 } else { 1540 if (pstr[0] == ' ' ) pstr = pstr[1 .. $]; 1541 if (pstr[0] != '[' ) return false; 1542 pstr = pstr[1 .. $]; 1543 cidx = pstr.indexOfCT(']'); 1544 if (cidx < 0 ) return false; 1545 if (cidx == 0 ) refid = dst.text; 1546 else refid = pstr[0 .. cidx]; 1547 pstr = pstr[cidx+1 .. $]; 1548 } 1549 1550 1551 if (refid.length > 0) { 1552 auto pr = toLower(refid) in linkrefs; 1553 if (!pr) { 1554 // debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid); 1555 return false; 1556 } 1557 dst.url = pr.url; 1558 dst.title = pr.title; 1559 } 1560 1561 str = pstr; 1562 return true; 1563 } 1564 1565 1566 @safe unittest { 1567 static void testLink(string s, Link exp, in LinkRef[string] refs) 1568 { 1569 Link link; 1570 assert(parseLink(s, link, refs), s); 1571 assert(link == exp); 1572 } 1573 LinkRef[string] refs; 1574 refs["ref"] = LinkRef("ref", "target", "title"); 1575 1576 testLink(`[link](target)`, Link("link", "target"), null); 1577 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1578 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1579 testLink(`[link](target "title" )`, Link("link", "target", "title"), null); 1580 1581 testLink(`[link](target)`, Link("link", "target"), null); 1582 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1583 1584 testLink(`[link][ref]`, Link("link", "target", "title"), refs); 1585 testLink(`[ref][]`, Link("ref", "target", "title"), refs); 1586 1587 testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); 1588 testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); 1589 1590 testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); 1591 testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); 1592 1593 testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); 1594 testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); 1595 1596 testLink(`[link](target "")`, Link("link", "target", ""), null); 1597 testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); 1598 1599 testLink(`[link](<target>)`, Link("link", "target"), null); 1600 1601 auto failing = [ 1602 `text`, `[link](target`, `[link]target)`, `[link]`, 1603 `[link(target)`, `link](target)`, `[link] (target)`, 1604 `[link][noref]`, `[noref][]` 1605 ]; 1606 Link link; 1607 foreach (s; failing) 1608 assert(!parseLink(s, link, refs), s); 1609 } 1610 1611 1612 private bool parseAutoLink(ref string str, ref string url) 1613 pure @safe { 1614 string pstr = str; 1615 if (pstr.length < 3 ) return false; 1616 if (pstr[0] != '<' ) return false; 1617 pstr = pstr[1 .. $]; 1618 auto cidx = pstr.indexOf('>'); 1619 if (cidx < 0 ) return false; 1620 url = pstr[0 .. cidx]; 1621 if (anyOf(url, " \t")) return false; 1622 if (!anyOf(url, ":@")) return false; 1623 str = pstr[cidx+1 .. $]; 1624 if (url.indexOf('@') > 0 ) url = "mailto:"~url; 1625 return true; 1626 } 1627 1628 1629 private LinkRef[string] scanForReferences(ref string[] lines) pure @safe { 1630 LinkRef[string] ret; 1631 bool[size_t] reflines; 1632 1633 // search for reference definitions: 1634 // [refid] link "opt text" 1635 // [refid] <link> "opt text" 1636 // "opt text", 'opt text', (opt text) 1637 // line must not be indented 1638 foreach (i, ln; lines) { 1639 if (isLineIndented(ln)) continue; 1640 ln = strip(ln); 1641 if (!ln.startsWith("[")) continue; 1642 ln = ln[1 .. $]; 1643 1644 auto idx = ln.indexOf("]:"); 1645 if (idx < 0) continue; 1646 string refid = ln[0 .. idx]; 1647 ln = stripLeft(ln[idx+2 .. $]); 1648 1649 string url; 1650 if (ln.startsWith("<")) { 1651 idx = ln.indexOfCT('>'); 1652 if (idx < 0 ) continue; 1653 url = ln[1 .. idx]; 1654 ln = ln[idx+1 .. $]; 1655 } else { 1656 idx = ln.indexOfCT(' '); 1657 if (idx > 0) { 1658 url = ln[0 .. idx]; 1659 ln = ln[idx+1 .. $]; 1660 } else { 1661 idx = ln.indexOfCT('\t'); 1662 if (idx < 0) { 1663 url = ln; 1664 ln = ln[$ .. $]; 1665 } else { 1666 url = ln[0 .. idx]; 1667 ln = ln[idx+1 .. $]; 1668 } 1669 } 1670 } 1671 ln = stripLeft(ln); 1672 1673 string title; 1674 if (ln.length >= 3) { 1675 if (ln[0] == '(' && ln[$-1] == ')' || 1676 ln[0] == '\"' && ln[$-1] == '\"' || 1677 ln[0] == '\'' && ln[$-1] == '\'' ) 1678 { 1679 title = ln[1 .. $-1]; 1680 } 1681 } 1682 1683 ret[toLower(refid)] = LinkRef(refid, url, title); 1684 reflines[i] = true; 1685 1686 // debug if (!__ctfe) logTrace("[detected ref on line %d]", i+1); 1687 } 1688 1689 // remove all lines containing references 1690 auto nonreflines = appender!(string[])(); 1691 nonreflines.reserve(lines.length); 1692 foreach( i, ln; lines ) 1693 if (i !in reflines ) 1694 nonreflines.put(ln); 1695 lines = nonreflines.data(); 1696 1697 return ret; 1698 } 1699 1700 1701 /******************************************************************************* 1702 * Generates an identifier suitable to use as within a URL. 1703 * 1704 * The resulting string will contain only ASCII lower case alphabetic or 1705 * numeric characters, as well as dashes (-). Every sequence of 1706 * non-alphanumeric characters will be replaced by a single dash. No dashes 1707 * will be at either the front or the back of the result string. 1708 */ 1709 auto asSlug(R)(R text) 1710 if (isInputRange!R && is(typeof(R.init.front) == dchar)) { 1711 static struct SlugRange { 1712 private { 1713 R _input; 1714 bool _dash; 1715 } 1716 1717 this(R input) 1718 { 1719 _input = input; 1720 skipNonAlphaNum(); 1721 } 1722 1723 @property bool empty() const { return _dash ? false : _input.empty; } 1724 @property char front() const { 1725 if (_dash) return '-'; 1726 1727 char r = cast(char)_input.front; 1728 if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A')); 1729 return r; 1730 } 1731 1732 void popFront() 1733 { 1734 if (_dash) { 1735 _dash = false; 1736 return; 1737 } 1738 1739 _input.popFront(); 1740 auto na = skipNonAlphaNum(); 1741 if (na && !_input.empty) 1742 _dash = true; 1743 } 1744 1745 private bool skipNonAlphaNum() 1746 { 1747 bool have_skipped = false; 1748 while (!_input.empty) { 1749 switch (_input.front) { 1750 default: 1751 _input.popFront(); 1752 have_skipped = true; 1753 break; 1754 case 'a': .. case 'z': 1755 case 'A': .. case 'Z': 1756 case '0': .. case '9': 1757 return have_skipped; 1758 } 1759 } 1760 return have_skipped; 1761 } 1762 } 1763 return SlugRange(text); 1764 } 1765 unittest { 1766 import std.algorithm : equal; 1767 assert("".asSlug.equal("")); 1768 assert(".,-".asSlug.equal("")); 1769 assert("abc".asSlug.equal("abc")); 1770 assert("aBc123".asSlug.equal("abc123")); 1771 assert("....aBc...123...".asSlug.equal("abc-123")); 1772 } 1773 1774 1775 private struct LinkRef { 1776 string id; 1777 string url; 1778 string title; 1779 } 1780 1781 1782 private struct Link { 1783 string text; 1784 string url; 1785 string title; 1786 } 1787 1788 1789 @safe unittest { // alt and title attributes 1790 assert(convertMarkdownToHTML("") 1791 == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n"); 1792 assert(convertMarkdownToHTML("") 1793 == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n"); 1794 } 1795 1796 1797 @safe unittest { // complex links 1798 assert(convertMarkdownToHTML("their [install\ninstructions](<http://www.brew.sh>) and") 1799 == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n"); 1800 assert(convertMarkdownToHTML("[](https://travis-ci.org/rejectedsoftware/vibe.d)") 1801 == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n"); 1802 } 1803 1804 1805 @safe unittest { // check CTFE-ability 1806 enum res = convertMarkdownToHTML("### some markdown\n[foo][]\n[foo]: /bar"); 1807 assert(res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res); 1808 } 1809 1810 1811 @safe unittest { // correct line breaks in restrictive mode 1812 auto res = convertMarkdownToHTML("hello\nworld", MarkdownFlags.forumDefault); 1813 assert(res == "<p>hello<br/>world\n</p>\n", res); 1814 } 1815 1816 /*@safe unittest { // code blocks and blockquotes 1817 assert(convertMarkdownToHTML("\tthis\n\tis\n\tcode") == 1818 "<pre><code>this\nis\ncode</code></pre>\n"); 1819 assert(convertMarkdownToHTML(" this\n is\n code") == 1820 "<pre><code>this\nis\ncode</code></pre>\n"); 1821 assert(convertMarkdownToHTML(" this\n is\n\tcode") == 1822 "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n"); 1823 assert(convertMarkdownToHTML("\tthis\n\n\tcode") == 1824 "<pre><code>this\n\ncode</code></pre>\n"); 1825 assert(convertMarkdownToHTML("\t> this") == 1826 "<pre><code>> this</code></pre>\n"); 1827 assert(convertMarkdownToHTML("> this") == 1828 "<blockquote><pre><code>this</code></pre></blockquote>\n"); 1829 assert(convertMarkdownToHTML("> this\n is code") == 1830 "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n"); 1831 }*/ 1832 1833 1834 @safe unittest { // test simple border-less table 1835 auto res = convertMarkdownToHTML( 1836 "Col 1 | Col 2 | Col 3\n -- | -- | --\n val 1 | val 2 | val 3\n *val 4* | val 5 | value 6", 1837 MarkdownFlags.supportTables 1838 ); 1839 assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res); 1840 } 1841 1842 1843 @safe unittest { // test simple border'ed table 1844 auto res = convertMarkdownToHTML( 1845 "| Col 1 | Col 2 | Col 3 |\n| -- | -- | -- |\n| val 1 | val 2 | val 3 |\n| *val 4* | val 5 | value 6 |", 1846 MarkdownFlags.supportTables 1847 ); 1848 assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res); 1849 } 1850 1851 1852 @safe unittest { 1853 string input = ` 1854 Table: 1855 1856 ID | Name | Address 1857 - | ---- | --------- 1858 1 | Foo | Somewhere 1859 2 | Bar | Nowhere `; 1860 auto res = convertMarkdownToHTML(input, MarkdownFlags.supportTables); 1861 writeln("==========", input, "=====", res); 1862 assert(res == "<p>Table:\n</p>\n<table>\n<tr><th>ID</th><th>Name</th><th>Address</th></tr>\n<tr><td>1</td><td>Foo</td><td>Somewhere</td></tr>\n<tr><td>2</td><td>Bar</td><td>Nowhere</td></tr>\n</table>\n", res); 1863 } 1864 1865 1866 package: 1867 1868 1869 /// Function for work with HTML. 1870 1871 /******************************************************************************* 1872 * Writes the HTML escaped version of a given string to an output range. 1873 */ 1874 void filterHTMLEscape(R, S) 1875 (ref R dst, 1876 S str, 1877 HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline) 1878 if (isOutputRange!(R, dchar) && isInputRange!S) { 1879 for (; !str.empty; str.popFront()) { 1880 filterHTMLEscape(dst, str.front, flags); 1881 } 1882 } 1883 1884 1885 /******************************************************************************* 1886 * Writes the HTML escaped version of a given string to an output range 1887 * (also escapes double quotes). 1888 */ 1889 void filterHTMLAttribEscape(R, S)(ref R dst, S str) 1890 if (isOutputRange!(R, dchar) && isInputRange!S) { 1891 for (; !str.empty; str.popFront()) { 1892 filterHTMLEscape( 1893 dst, 1894 str.front, 1895 HTMLEscapeFlags.escapeNewline | HTMLEscapeFlags.escapeQuotes 1896 ); 1897 } 1898 } 1899 1900 1901 /******************************************************************************* 1902 * Writes the HTML escaped version of a given string to an output range 1903 * (escapes every character). 1904 */ 1905 void filterHTMLAllEscape(R, S)(ref R dst, S str) 1906 if (isOutputRange!(R, dchar) && isInputRange!S) { 1907 for (; !str.empty; str.popFront()) { 1908 dst.put("&#"); 1909 dst.put(to!string(cast(uint)str.front)); 1910 dst.put(';'); 1911 } 1912 } 1913 1914 1915 /******************************************************************************* 1916 * Writes the HTML escaped version of a character to an output range. 1917 */ 1918 void filterHTMLEscape(R) 1919 (ref R dst, 1920 dchar ch, 1921 HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline ) { 1922 switch (ch) { 1923 default: 1924 if (flags & HTMLEscapeFlags.escapeUnknown) { 1925 dst.put("&#"); 1926 dst.put(to!string(cast(uint)ch)); 1927 dst.put(';'); 1928 } else dst.put(ch); 1929 break; 1930 case '"': 1931 if (flags & HTMLEscapeFlags.escapeQuotes) dst.put("""); 1932 else dst.put('"'); 1933 break; 1934 case '\'': 1935 if (flags & HTMLEscapeFlags.escapeQuotes) dst.put("'"); 1936 else dst.put('\''); 1937 break; 1938 case '\r', '\n': 1939 if (flags & HTMLEscapeFlags.escapeNewline) { 1940 dst.put("&#"); 1941 dst.put(to!string(cast(uint)ch)); 1942 dst.put(';'); 1943 } else dst.put(ch); 1944 break; 1945 case 'a': .. case 'z': goto case; 1946 case 'A': .. case 'Z': goto case; 1947 case '0': .. case '9': goto case; 1948 case ' ', '\t', '-', '_', '.', ':', ',', ';', 1949 '#', '+', '*', '?', '=', '(', ')', '/', '!', 1950 '%' , '{', '}', '[', ']', '`', '´', '$', '^', '~': 1951 dst.put(cast(char)ch); 1952 break; 1953 case '<': dst.put("<"); break; 1954 case '>': dst.put(">"); break; 1955 case '&': dst.put("&"); break; 1956 } 1957 } 1958 1959 1960 /// Flags for HTML-escaping some symbols. 1961 enum HTMLEscapeFlags { 1962 escapeMinimal = 0, 1963 escapeQuotes = 1<<0, 1964 escapeNewline = 1<<1, 1965 escapeUnknown = 1<<2 1966 } 1967 1968 1969 /// Functions for work with string data 1970 1971 /******************************************************************************* 1972 * Checks if all characters in 'str' are contained in 'chars'. 1973 */ 1974 bool allOf(string str, string chars) 1975 @safe pure { 1976 foreach (dchar ch; str) { 1977 if (!chars.canFind(ch)) { 1978 return false; 1979 } 1980 } 1981 return true; 1982 } 1983 1984 ptrdiff_t indexOfCT(Char)(in Char[] s, 1985 dchar c, 1986 CaseSensitive cs = CaseSensitive.yes) 1987 @safe pure { 1988 if (__ctfe) { 1989 if (cs == CaseSensitive.yes) { 1990 foreach (i, dchar ch; s) { 1991 if (ch == c) { 1992 return i; 1993 } 1994 } 1995 } else { 1996 c = std.uni.toLower(c); 1997 foreach (i, dchar ch; s) { 1998 if (std.uni.toLower(ch) == c) { 1999 return i; 2000 } 2001 } 2002 } 2003 return -1; 2004 } 2005 return std..string.indexOf(s, c, cs); 2006 } 2007 2008 2009 /******************************************************************************* 2010 * Checks if any character in 'str' is contained in 'chars'. 2011 */ 2012 bool anyOf(string str, string chars) 2013 @safe pure { 2014 foreach (ch; str) { 2015 if (chars.canFind(ch)) { 2016 return true; 2017 } 2018 } 2019 return false; 2020 } 2021 2022 2023 /******************************************************************************* 2024 * Finds the closing bracket (works with any of '[', '$(LPAREN)', '<', '{'). 2025 * 2026 * Params: 2027 * str = input string 2028 * nested = whether to skip nested brackets 2029 * Returns: 2030 * The index of the closing bracket or -1 for unbalanced strings 2031 * and strings that don't start with a bracket. 2032 */ 2033 sizediff_t matchBracket(string str, bool nested = true) 2034 @safe pure nothrow { 2035 if (str.length < 2) return -1; 2036 2037 char open = str[0], close = void; 2038 switch (str[0]) { 2039 case '[': close = ']'; break; 2040 case '(': close = ')'; break; 2041 case '<': close = '>'; break; 2042 case '{': close = '}'; break; 2043 default: return -1; 2044 } 2045 2046 size_t level = 1; 2047 foreach (i, char c; str[1 .. $]) { 2048 if (nested && c == open) ++level; 2049 else if (c == close) --level; 2050 if (level == 0) return i + 1; 2051 } 2052 return -1; 2053 } 2054 2055 2056 2057 //////////////////////////////////////////////////////////////////////////////// 2058 //// DEPRECATED FUNCTIONS //// 2059 //////////////////////////////////////////////////////////////////////////////// 2060 2061 2062 /******************************************************************************* 2063 * Returns the hierarchy of sections. 2064 */ 2065 Section[] getMarkdownOutline(string markdown_source, 2066 scope MarkdownSettings settings = null) { 2067 if (!settings) settings = new MarkdownSettings; 2068 auto all_lines = splitLines(markdown_source); 2069 auto lines = parseLines(all_lines, settings); 2070 Block root_block; 2071 parseBlocks(root_block, lines, null, settings); 2072 Section root; 2073 2074 foreach (ref sb; root_block.blocks) { 2075 if (sb.type != BlockType.Header) { 2076 continue; 2077 } 2078 auto s = &root; 2079 while (true) { 2080 if (s.subSections.length == 0) break; 2081 if (s.subSections[$-1].headingLevel >= sb.headerLevel) break; 2082 s = &s.subSections[$-1]; 2083 } 2084 s.subSections ~= Section( 2085 sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string 2086 ); 2087 } 2088 2089 return root.subSections; 2090 } 2091 /// 2092 unittest { 2093 auto mdText = "## first\n## second\n### third\n# fourth\n### fifth"; 2094 auto expected = [ 2095 Section(2, " first", "first"), 2096 Section(2, " second", "second", [Section(3, " third", "third")]), 2097 Section(1, " fourth", "fourth", [Section(3, " fifth", "fifth")]) 2098 ]; 2099 assert(getMarkdownOutline(mdText) == expected); 2100 } 2101 2102 2103 final class MarkdownSettings { 2104 /// Controls the capabilities of the parser. 2105 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 2106 2107 /// Heading tags will start at this level. 2108 size_t headingBaseLevel = 1; 2109 2110 /// Called for every link/image URL to perform arbitrary transformations. 2111 string delegate(string urlOrPath, bool isImage) urlFilter; 2112 2113 /*************************************************************************** 2114 * An optional delegate to post-process code blocks and inline code. 2115 * Useful to e.g. add code highlighting. 2116 */ 2117 string delegate(string) @safe nothrow processCode = null; 2118 } 2119 2120 2121 private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings) 2122 pure @safe { 2123 Line[] ret; 2124 char subHeaderChar; 2125 if (settings.flags * MarkdownFlags.alternateSubheaders) { 2126 subHeaderChar = '*'; 2127 } else { 2128 subHeaderChar = '-'; 2129 } 2130 while( !lines.empty ) { 2131 auto ln = lines.front; 2132 lines.popFront(); 2133 2134 Line lninfo; 2135 lninfo.text = ln; 2136 2137 void determineIndent() { 2138 while (ln.length > 0) { 2139 if (ln[0] == '\t' ) { 2140 lninfo.indent ~= IndentType.White; 2141 ln.popFront(); 2142 } else if (ln.startsWith(" ")) { 2143 lninfo.indent ~= IndentType.White; 2144 ln.popFrontN(4); 2145 } else { 2146 ln = ln.stripLeft(); 2147 if (ln.startsWith(">")) { 2148 lninfo.indent ~= IndentType.Quote; 2149 ln.popFront(); 2150 } else { 2151 break; 2152 } 2153 } 2154 } 2155 lninfo.unindented = ln; 2156 } 2157 2158 determineIndent(); 2159 2160 if ((settings.flags & MarkdownFlags.backtickCodeBlocks) 2161 && isCodeBlockDelimiter(ln)) { 2162 lninfo.type = LineType.CodeBlockDelimiter; 2163 } else if (isAtxHeaderLine(ln)) { 2164 lninfo.type = LineType.AtxHeader; 2165 } else if ( isSetextHeaderLine(ln, subHeaderChar)) { 2166 lninfo.type = LineType.SetextHeader; 2167 } else if ((settings.flags & MarkdownFlags.supportTables) 2168 && isTableRowLine!false(ln)) { 2169 lninfo.type = LineType.Table; 2170 } else if (isHlineLine(ln)) { 2171 lninfo.type = LineType.Hline; 2172 } else if (isOListLine(ln)) { 2173 lninfo.type = LineType.OList; 2174 } else if (isUListLine(ln)) { 2175 lninfo.type = LineType.UList; 2176 } else if (isLineBlank(ln)) { 2177 lninfo.type = LineType.Blank; 2178 } else if (!(settings.flags & MarkdownFlags.noInlineHtml) 2179 && isHtmlBlockLine(ln)) { 2180 lninfo.type = LineType.HtmlBlock; 2181 } 2182 else lninfo.type = LineType.Plain; 2183 2184 ret ~= lninfo; 2185 } 2186 return ret; 2187 } 2188 2189 2190 private void parseBlocks(ref Block root, 2191 ref Line[] lines, 2192 IndentType[] baseIndent, 2193 scope MarkdownSettings settings) 2194 pure @safe { 2195 if (baseIndent.length == 0) { 2196 root.type = BlockType.Text; 2197 } else if (baseIndent[$-1] == IndentType.Quote) { 2198 root.type = BlockType.Quote; 2199 } 2200 2201 while (!lines.empty) { 2202 auto ln = lines.front; 2203 2204 if (ln.type == LineType.Blank) { 2205 lines.popFront(); 2206 continue; 2207 } 2208 2209 if (ln.indent != baseIndent) { 2210 if (ln.indent.length < baseIndent.length || 2211 ln.indent[0 .. baseIndent.length] != baseIndent) { 2212 return; 2213 } 2214 2215 auto cindent = baseIndent ~ IndentType.White; 2216 if (ln.indent == cindent) { 2217 Block cblock; 2218 cblock.type = BlockType.Code; 2219 while (!lines.empty && 2220 lines.front.indent.length >= cindent.length && 2221 lines.front.indent[0 .. cindent.length] == cindent) { 2222 cblock.text ~= lines.front.unindent(cindent.length); 2223 lines.popFront(); 2224 } 2225 root.blocks ~= cblock; 2226 } else { 2227 Block subblock; 2228 parseBlocks(subblock, 2229 lines, 2230 ln.indent[0 .. baseIndent.length+1], 2231 settings); 2232 root.blocks ~= subblock; 2233 } 2234 } else { 2235 Block b; 2236 void processPlain() { 2237 b.type = BlockType.Paragraph; 2238 b.text = skipText(lines, baseIndent); 2239 } 2240 final switch(ln.type) { 2241 case LineType.Undefined: assert(false); 2242 case LineType.Blank: assert(false); 2243 case LineType.Plain: 2244 if (lines.length >= 2 && 2245 lines[1].type == LineType.SetextHeader) { 2246 auto setln = lines[1].unindented; 2247 b.type = BlockType.Header; 2248 b.text = [ln.unindented]; 2249 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 2250 lines.popFrontN(2); 2251 } else { 2252 processPlain(); 2253 } 2254 break; 2255 case LineType.Hline: 2256 b.type = BlockType.Plain; 2257 b.text = ["<hr>"]; 2258 lines.popFront(); 2259 break; 2260 case LineType.AtxHeader: 2261 b.type = BlockType.Header; 2262 string hl = ln.unindented; 2263 b.headerLevel = 0; 2264 while (hl.length > 0 && hl[0] == '#') { 2265 b.headerLevel++; 2266 hl = hl[1 .. $]; 2267 } 2268 while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ')) 2269 hl = hl[0 .. $-1]; 2270 b.text = [hl]; 2271 lines.popFront(); 2272 break; 2273 case LineType.SetextHeader: 2274 lines.popFront(); 2275 break; 2276 case LineType.UList: 2277 case LineType.OList: 2278 b.type = ln.type == LineType.UList ? BlockType.UList 2279 : BlockType.OList; 2280 auto itemindent = baseIndent ~ IndentType.White; 2281 bool firstItem = true, paraMode = false; 2282 while (!lines.empty && lines.front.type == ln.type && 2283 lines.front.indent == baseIndent) { 2284 Block itm; 2285 itm.text = skipText(lines, itemindent); 2286 itm.text[0] = removeListPrefix(itm.text[0], ln.type); 2287 2288 // emit <p></p> if there are blank lines between the items 2289 if (firstItem && !lines.empty && 2290 lines.front.type == LineType.Blank) { 2291 paraMode = true; 2292 } 2293 firstItem = false; 2294 if (paraMode) { 2295 Block para; 2296 para.type = BlockType.Paragraph; 2297 para.text = itm.text; 2298 itm.blocks ~= para; 2299 itm.text = null; 2300 } 2301 2302 parseBlocks(itm, lines, itemindent, settings); 2303 itm.type = BlockType.ListItem; 2304 b.blocks ~= itm; 2305 } 2306 break; 2307 case LineType.HtmlBlock: 2308 int nestlevel = 0; 2309 auto starttag = parseHtmlBlockLine(ln.unindented); 2310 if (!starttag.isHtmlBlock || !starttag.open) 2311 break; 2312 2313 b.type = BlockType.Plain; 2314 while (!lines.empty) { 2315 if (lines.front.indent.length < baseIndent.length) { 2316 break; 2317 } 2318 if (lines.front.indent[0 .. baseIndent.length] != baseIndent) { 2319 break; 2320 } 2321 2322 auto str = lines.front.unindent(baseIndent.length); 2323 auto taginfo = parseHtmlBlockLine(str); 2324 b.text ~= lines.front.unindent(baseIndent.length); 2325 lines.popFront(); 2326 if (taginfo.isHtmlBlock 2327 && taginfo.tagName == starttag.tagName) { 2328 nestlevel += taginfo.open ? 1 : -1; 2329 } 2330 if (nestlevel <= 0) { 2331 break; 2332 } 2333 } 2334 break; 2335 case LineType.CodeBlockDelimiter: 2336 lines.popFront(); // TODO: get language from line 2337 b.type = BlockType.Code; 2338 while (!lines.empty) { 2339 if (lines.front.indent.length < baseIndent.length ) { 2340 break; 2341 } 2342 if (lines.front.indent[0 .. baseIndent.length] != baseIndent) { 2343 break; 2344 } 2345 if (lines.front.type == LineType.CodeBlockDelimiter) { 2346 lines.popFront(); 2347 break; 2348 } 2349 b.text ~= lines.front.unindent(baseIndent.length); 2350 lines.popFront(); 2351 } 2352 break; 2353 case LineType.Table: 2354 lines.popFront(); 2355 // Can this be a valid table (is there a next line that could be a header separator)? 2356 if (lines.empty) { 2357 processPlain(); 2358 break; 2359 } 2360 Line lnNext = lines.front; 2361 immutable bool isTableHeader = ( 2362 (lnNext.type == LineType.Table) 2363 && (lnNext.text.indexOf(" -") >= 0) 2364 && (lnNext.text.indexOf("- ") >= 0) 2365 && lnNext.text.allOf("-:| ") 2366 ); 2367 if (!isTableHeader) { 2368 // Not a valid table header, so let's assume it's plain markdown 2369 processPlain(); 2370 break; 2371 } 2372 b.type = BlockType.Table; 2373 // Parse header 2374 b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln); 2375 // Parse table rows 2376 lines.popFront(); 2377 while (!lines.empty) { 2378 ln = lines.front; 2379 if (ln.type != LineType.Table) 2380 break; // not a table row, so let's assume it's the end of the table 2381 b.blocks ~= splitTableRow(ln); 2382 lines.popFront(); 2383 } 2384 break; 2385 } 2386 root.blocks ~= b; 2387 } 2388 } 2389 } 2390