1 /******************************************************************************* 2 * Markdown parser implementation. 3 * 4 * Copyright: (c) 2012-2019 RejectedSoftware e.K. and the D community 5 * License: Subject to the terms of the MIT license. 6 * Repository: https://github.com/dlang-community/dmarkdown 7 * 8 * This library was forked and modified in 2021-2022 for the `hgen` project. 9 * hgen: https://gitlab.com/os-18/hgen 10 * Author: Eugene 'Vindex' Stulin <tech.vindex@gmail.com> 11 * 12 * MIT License (Expat version) 13 * 14 * Permission is hereby granted, free of charge, to any person obtaining a copy 15 * of this software and associated documentation files (the "Software"), to deal 16 * in the Software without restriction, including without limitation the rights 17 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 * copies of the Software, and to permit persons to whom the Software is 19 * furnished to do so, subject to the following conditions: 20 * 21 * The above copyright notice and this permission notice shall be included 22 * in all copies or substantial portions of the Software. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 module md; 34 35 import std.algorithm; 36 import std.array; 37 import std.ascii; 38 import std.conv; 39 import std.exception; 40 import std.format; 41 import std.uni; 42 import std.utf; 43 import core.exception; 44 import std.range; 45 import std.string; 46 import std.stdio; 47 48 alias UrlFilterFn = string delegate(string urlOrPath, bool isImage); 49 alias ProcessCodeFn = string delegate(string) @safe nothrow; 50 51 52 enum MarkdownFlags { 53 none = 0, 54 keepLineBreaks = 1 << 0, 55 backtickCodeBlocks = 1 << 1, 56 noInlineHtml = 1 << 2, 57 //noLinks = 1<<3, 58 //allowUnsafeHtml = 1<<4, 59 /// If used, 60 /// subheadings are underlined by stars ('*') instead of dashes ('-') 61 // alternateSubheaders = 1 << 5, 62 /// If used, '_' may not be used for emphasis ('*' may still be used) 63 disableUnderscoreEmphasis = 1 << 6, 64 supportTables = 1 << 7, 65 vanillaMarkdown = none, 66 forumDefault = keepLineBreaks | backtickCodeBlocks | noInlineHtml, 67 githubInspired = backtickCodeBlocks | supportTables, 68 } 69 70 71 private enum LineType { 72 Undefined, 73 Blank, 74 Plain, 75 Hline, 76 AtxHeader, 77 SetextHeader, 78 UList, 79 OList, 80 HtmlBlock, 81 CodeBlockDelimiter, 82 Table, 83 } 84 85 86 private enum BlockType { 87 Plain, 88 Text, 89 Paragraph, 90 Header, 91 OList, 92 UList, 93 ListItem, 94 Code, 95 Quote, 96 Table, 97 TableRow, 98 TableHeader, 99 TableData, 100 } 101 102 103 private struct LinkRef { 104 string id; 105 string url; 106 string title; 107 } 108 109 110 private bool edgesAreEqual(string l, char c) pure @safe { 111 return l[0] == c && l[$-1] == c; 112 } 113 114 115 private string extractUrl(ref string ln) pure @safe { 116 string url; 117 if (ln.startsWith("<")) { 118 auto idx = ln.indexOfCT('>'); 119 enforce(idx >= 0, "No reference line."); 120 url = ln[1 .. idx]; 121 ln = ln[idx + 1 .. $]; 122 } else { 123 auto idx = ln.indexOfCT(' '); 124 if (idx > 0) { 125 url = ln[0 .. idx]; 126 ln = ln[idx + 1 .. $]; 127 } else { 128 idx = ln.indexOfCT('\t'); 129 if (idx < 0) { 130 url = ln; 131 ln = ln[$ .. $]; 132 } else { 133 url = ln[0 .. idx]; 134 ln = ln[idx+1 .. $]; 135 } 136 } 137 } 138 ln = stripLeft(ln); 139 return url; 140 } 141 142 143 private LinkRef extractLinkRef(ref string ln) pure @safe { 144 enforce(!isLineIndented(ln), "No reference line."); 145 146 ln = strip(ln); 147 enforce(ln.startsWith("["), "No reference line."); 148 ln = ln[1 .. $]; // without '[' 149 150 auto idx = ln.indexOf("]:"); 151 enforce(idx >= 0, "No reference line."); 152 string refid = ln[0 .. idx]; 153 ln = ln[idx + 2 .. $].stripLeft; 154 155 string url = extractUrl(ln); 156 157 string title; 158 if (ln.length >= 3) { 159 if (ln[0] == '(' && ln[$ - 1] == ')' || 160 edgesAreEqual(ln, '"') || edgesAreEqual(ln, '\'') 161 ) { 162 title = ln[1 .. $-1]; 163 } 164 } 165 return LinkRef(refid, url, title); 166 } 167 168 169 private LinkRef[string] scanForReferences(ref string[] lines) pure @safe { 170 LinkRef[string] ret; 171 bool[size_t] reflines; 172 // line must not be indented 173 foreach (i, ln; lines) { 174 try { 175 auto r = extractLinkRef(ln); 176 ret[toLower(r.id)] = r; 177 } catch (Exception) { 178 continue; 179 } 180 reflines[i] = true; 181 } 182 // remove all lines containing references 183 auto nonreflines = appender!(string[])(); 184 nonreflines.reserve(lines.length); 185 foreach (i, ln; lines) { 186 if (i !in reflines) { 187 nonreflines.put(ln); 188 } 189 } 190 lines = nonreflines.data(); 191 return ret; 192 } 193 194 195 final class MarkdownSettings { 196 /// Controls the capabilities of the parser. 197 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 198 199 /// Heading tags will start at this level. 200 size_t headingBaseLevel = 1; 201 202 /// Called for every link/image URL to perform arbitrary transformations. 203 string delegate(string urlOrPath, bool isImage) urlFilter; 204 205 /*************************************************************************** 206 * An optional delegate to post-process code blocks and inline code. 207 * Useful to e.g. add code highlighting. 208 */ 209 string delegate(string) @safe nothrow processCode = null; 210 } 211 212 213 deprecated string convertMarkdownToHTML( 214 string markdownText, MarkdownSettings settings = null 215 ) @trusted { 216 auto handler = new MarkdownHandler(markdownText); 217 if (settings !is null) { 218 handler.flags = settings.flags; 219 handler.setHeadingBaseLevel(settings.headingBaseLevel); 220 handler.urlFilter = settings.urlFilter; 221 handler.processCode = settings.processCode; 222 } 223 return handler.convertToHTML(); 224 } 225 226 227 deprecated string convertMarkdownToHTML( 228 string markdownText, MarkdownFlags flags 229 ) @trusted { 230 auto handler = new MarkdownHandler(markdownText); 231 handler.setFlags(flags); 232 return handler.convertToHTML(); 233 } 234 235 236 private struct Line { 237 LineType type; 238 IndentType[] indent; 239 string text; 240 string unindented; 241 242 string unindent(size_t n) pure @safe { 243 assert(n <= indent.length); 244 string ln = text; 245 foreach (i; 0 .. n) { 246 final switch (indent[i]) { 247 case IndentType.White: 248 ln = (ln[0] == ' ') ? ln[4 .. $] : ln[1 .. $]; 249 break; 250 case IndentType.Quote: 251 ln = ln.stripLeft()[1 .. $]; 252 break; 253 } 254 } 255 return ln; 256 } 257 } 258 259 260 class MarkdownHandler { 261 this(string markdownText) @safe { 262 this.markdownText = markdownText; 263 } 264 265 void setProcessCodeFunction(ProcessCodeFn process) { 266 this.processCode = process; 267 } 268 269 void setHeadingBaseLevel(size_t newHeadingBaseLevel) { 270 this.headingBaseLevel = newHeadingBaseLevel; 271 } 272 273 void setFlags(MarkdownFlags newFlags) { 274 this.flags = newFlags; 275 } 276 277 void disableUnderscoreEmphasis() { 278 this.flags |= MarkdownFlags.disableUnderscoreEmphasis; 279 } 280 281 string convertToHTML() { 282 string[] allLines = std..string.splitLines(this.markdownText); 283 LinkRef[string] links = scanForReferences(allLines); 284 Line[] lines = this.parseLines(allLines); 285 Block rootBlock; 286 this.parseBlocks(rootBlock, lines); 287 auto dst = appender!string(); 288 this.writeBlock(dst, rootBlock, links); 289 return dst.data; 290 } 291 292 void setUrlFilterFunction(UrlFilterFn filter) { 293 this.urlFilter = filter; 294 } 295 296 private: 297 298 Line[] parseLines(ref string[] lines) @safe { 299 Line[] ret; 300 while (!lines.empty) { 301 Line lninfo; 302 lninfo.text = lines.front; 303 lines.popFront(); 304 determineIndent(lninfo); 305 lninfo.type = determineType(lninfo.unindented); 306 ret ~= lninfo; 307 } 308 return ret; 309 } 310 311 void determineIndent(ref Line lninfo) @safe { 312 auto ln = lninfo.text.idup; 313 while (ln.length > 0) { 314 if (ln[0] == '\t') { 315 lninfo.indent ~= IndentType.White; 316 ln.popFront(); 317 } else if (ln.startsWith(" ")) { 318 lninfo.indent ~= IndentType.White; 319 ln.popFrontN(4); 320 } else { 321 ln = ln.stripLeft(); 322 if (ln.startsWith(">")) { 323 lninfo.indent ~= IndentType.Quote; 324 ln.popFront(); 325 } else { 326 break; 327 } 328 } 329 } 330 lninfo.unindented = ln; 331 } 332 333 LineType determineType(string ln) 334 pure @safe { 335 alias MF = MarkdownFlags; 336 if ((flags & MF.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) { 337 return LineType.CodeBlockDelimiter; 338 } else if (isAtxHeaderLine(ln)) { 339 return LineType.AtxHeader; 340 } else if (isSetextHeaderLine(ln, '-') || isSetextHeaderLine(ln, '=')) { 341 return LineType.SetextHeader; 342 } else if ((flags & MF.supportTables) && isTableRowLine(ln)) { 343 return LineType.Table; 344 } else if (isHlineLine(ln)) { 345 return LineType.Hline; 346 } else if (isOListLine(ln)) { 347 return LineType.OList; 348 } else if (isUListLine(ln)) { 349 return LineType.UList; 350 } else if (isLineBlank(ln)) { 351 return LineType.Blank; 352 } else if (!(flags & MF.noInlineHtml) && isHtmlBlockLine(ln)) { 353 return LineType.HtmlBlock; 354 } 355 return LineType.Plain; 356 } 357 358 void parseBlocks(ref Block root, 359 ref Line[] lines, 360 IndentType[] baseIndent = null) 361 pure @safe { 362 if (baseIndent.empty) { 363 root.type = BlockType.Text; 364 } else if (baseIndent[$ - 1] == IndentType.Quote) { 365 root.type = BlockType.Quote; 366 } 367 368 while (!lines.empty) { 369 auto ln = lines.front; 370 371 if (ln.type == LineType.Blank) { 372 lines.popFront(); 373 continue; 374 } 375 376 if (ln.indent != baseIndent) { 377 if (ln.indent.length < baseIndent.length || 378 ln.indent[0 .. baseIndent.length] != baseIndent 379 ) { 380 return; 381 } 382 383 auto cindent = baseIndent ~ IndentType.White; 384 if (ln.indent == cindent) { 385 Block cblock; 386 cblock.type = BlockType.Code; 387 while ( 388 !lines.empty && 389 lines.front.indent.length >= cindent.length && 390 lines.front.indent[0 .. cindent.length] == cindent 391 ) { 392 cblock.text ~= lines.front.unindent(cindent.length); 393 lines.popFront(); 394 } 395 root.blocks ~= cblock; 396 } else { 397 Block subblock; 398 this.parseBlocks( 399 subblock, 400 lines, 401 ln.indent[0 .. baseIndent.length + 1] 402 ); 403 root.blocks ~= subblock; 404 } 405 return; 406 } 407 408 Block b; 409 void processPlain() { 410 b.type = BlockType.Paragraph; 411 b.text = skipText(lines, baseIndent); 412 } 413 414 final switch (ln.type) { 415 case LineType.Undefined: 416 assert(false); 417 case LineType.Blank: 418 assert(false); 419 case LineType.Plain: 420 if (lines.length >= 2 && lines[1].type == LineType.SetextHeader) { 421 auto setln = lines[1].unindented; 422 b.type = BlockType.Header; 423 b.text = [ln.unindented]; 424 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 425 lines.popFrontN(2); 426 } else { 427 processPlain(); 428 } 429 break; 430 case LineType.Hline: 431 b.type = BlockType.Plain; 432 b.text = ["<hr>"]; 433 lines.popFront(); 434 break; 435 case LineType.AtxHeader: 436 b.type = BlockType.Header; 437 string hl = ln.unindented; 438 b.headerLevel = 0; 439 while (hl.length > 0 && hl[0] == '#') { 440 b.headerLevel++; 441 hl = hl[1 .. $]; 442 } 443 while (hl.length > 0 && (hl[$ - 1] == '#' || hl[$ - 1] == ' ')) { 444 hl = hl[0 .. $ - 1]; 445 } 446 b.text = [hl]; 447 lines.popFront(); 448 break; 449 case LineType.SetextHeader: 450 lines.popFront(); 451 break; 452 case LineType.UList: 453 case LineType.OList: 454 b.type = ln.type == LineType.UList ? BlockType.UList 455 : BlockType.OList; 456 auto itemindent = baseIndent ~ IndentType.White; 457 bool firstItem = true, paraMode = false; 458 while ( 459 !lines.empty && 460 lines.front.type == ln.type && 461 lines.front.indent == baseIndent 462 ) { 463 Block itm; 464 itm.text = skipText(lines, itemindent); 465 itm.text[0] = removeListPrefix(itm.text[0], ln.type); 466 467 // emit <p></p> if there are blank lines 468 // between the items 469 if (firstItem && !lines.empty 470 && lines.front.type == LineType.Blank) { 471 paraMode = true; 472 } 473 firstItem = false; 474 if (paraMode) { 475 Block para; 476 para.type = BlockType.Paragraph; 477 para.text = itm.text; 478 itm.blocks ~= para; 479 itm.text = null; 480 } 481 482 this.parseBlocks(itm, lines, itemindent); 483 itm.type = BlockType.ListItem; 484 b.blocks ~= itm; 485 } 486 break; 487 case LineType.HtmlBlock: 488 int nestlevel = 0; 489 auto starttag = parseHtmlBlockLine(ln.unindented); 490 if (!starttag.isHtmlBlock || !starttag.open) 491 break; 492 493 b.type = BlockType.Plain; 494 while (!lines.empty) { 495 auto frontIndLen = lines.front.indent.length; 496 auto baseIndLen = baseIndent.length; 497 if (frontIndLen < baseIndLen) { 498 break; 499 } 500 if (lines.front.indent[0 .. baseIndLen] != baseIndent) { 501 break; 502 } 503 504 auto str = lines.front.unindent(baseIndent.length); 505 auto taginfo = parseHtmlBlockLine(str); 506 b.text ~= lines.front.unindent(baseIndent.length); 507 lines.popFront(); 508 if (taginfo.isHtmlBlock 509 && taginfo.tagName == starttag.tagName 510 ) { 511 nestlevel += taginfo.open ? 1 : -1; 512 } 513 if (nestlevel <= 0) { 514 break; 515 } 516 } 517 break; 518 case LineType.CodeBlockDelimiter: 519 lines.popFront(); // TODO: get language from line 520 b.type = BlockType.Code; 521 while (!lines.empty) { 522 if (lines.front.indent.length < baseIndent.length) { 523 break; 524 } 525 if (lines.front.indent[0 .. baseIndent.length] != baseIndent) { 526 break; 527 } 528 if (lines.front.type == LineType.CodeBlockDelimiter) { 529 lines.popFront(); 530 break; 531 } 532 b.text ~= lines.front.unindent(baseIndent.length); 533 lines.popFront(); 534 } 535 break; 536 case LineType.Table: 537 lines.popFront(); 538 // Can this be a valid table (is there a next line 539 // that could be a header separator)? 540 if (lines.empty) { 541 processPlain(); 542 break; 543 } 544 Line lnNext = lines.front; 545 immutable bool isTableHeader = 546 lnNext.type == 547 LineType.Table && 548 lnNext.text.indexOf(" -") >= 0 && 549 lnNext.text.indexOf("- ") >= 0 && 550 lnNext.text.allOf("-:| "); 551 if (!isTableHeader) { 552 // Not a valid table header, 553 // so let's assume it's plain markdown 554 processPlain(); 555 break; 556 } 557 b.type = BlockType.Table; 558 // Parse header 559 b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln); 560 // Parse table rows 561 lines.popFront(); 562 while (!lines.empty) { 563 ln = lines.front; 564 if (ln.type != LineType.Table) 565 break; // not a table row, so let's assume it's the end of the table 566 b.blocks ~= splitTableRow(ln); 567 lines.popFront(); 568 } 569 break; 570 } 571 root.blocks ~= b; 572 573 } 574 } 575 576 void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links) 577 do { 578 final switch (block.type) { 579 case BlockType.Plain: 580 foreach (ln; block.text) { 581 dst.put(ln); 582 dst.put("\n"); 583 } 584 foreach (b; block.blocks) { 585 this.writeBlock(dst, b, links); 586 } 587 break; 588 case BlockType.Text: 589 writeMarkdownEscaped(dst, block, links); 590 foreach (b; block.blocks) { 591 this.writeBlock(dst, b, links); 592 } 593 break; 594 case BlockType.Paragraph: 595 assert(block.blocks.length == 0); 596 dst.put("<p>"); 597 writeMarkdownEscaped(dst, block, links); 598 dst.put("</p>\n"); 599 break; 600 case BlockType.Header: 601 assert(block.blocks.length == 0); 602 auto hlvl = block.headerLevel + this.headingBaseLevel - 1; 603 dst.formattedWrite( 604 "<h%s id=\"%s\">", hlvl, block.text[0].asSlug 605 ); 606 assert(block.text.length == 1); 607 writeMarkdownEscaped(dst, block.text[0], links); 608 dst.formattedWrite("</h%s>\n", hlvl); 609 break; 610 case BlockType.OList: 611 dst.put("<ol>\n"); 612 foreach (b; block.blocks) { 613 this.writeBlock(dst, b, links); 614 } 615 dst.put("</ol>\n"); 616 break; 617 case BlockType.UList: 618 dst.put("<ul>\n"); 619 foreach (b; block.blocks) { 620 this.writeBlock(dst, b, links); 621 } 622 dst.put("</ul>\n"); 623 break; 624 case BlockType.ListItem: 625 dst.put("<li>"); 626 writeMarkdownEscaped(dst, block, links); 627 foreach (b; block.blocks) { 628 this.writeBlock(dst, b, links); 629 } 630 dst.put("</li>\n"); 631 break; 632 case BlockType.Code: 633 assert(block.blocks.length == 0); 634 dst.put("<pre class=\"prettyprint\"><code>"); 635 if (this.processCode is null) { 636 foreach (ln; block.text) { 637 filterHTMLEscape(dst, ln); 638 dst.put("\n"); 639 } 640 } else { 641 auto temp = appender!string(); 642 foreach (ln; block.text) { 643 filterHTMLEscape(temp, ln); 644 temp.put("\n"); 645 } 646 dst.put(this.processCode(temp.data)); 647 } 648 dst.put("</code></pre>"); 649 break; 650 case BlockType.Quote: 651 dst.put("<blockquote>"); 652 writeMarkdownEscaped(dst, block, links); 653 foreach (b; block.blocks) { 654 this.writeBlock(dst, b, links); 655 } 656 dst.put("</blockquote>\n"); 657 break; 658 case BlockType.Table: 659 assert(block.blocks.length > 0); 660 assert(block.blocks[0].type == BlockType.TableRow); 661 dst.put("<table>\n<tr>"); 662 foreach (b; block.blocks[0].blocks) { 663 assert(b.type == BlockType.TableHeader); 664 dst.put("<th>"); 665 writeMarkdownEscaped(dst, b.text[0], links); 666 dst.put("</th>"); 667 } 668 dst.put("</tr>\n"); 669 if (block.blocks.length > 1) { 670 foreach (row; block.blocks[1 .. $]) { 671 assert(row.type == BlockType.TableRow); 672 dst.put("<tr>"); 673 foreach (b; row.blocks) { 674 assert(b.type == BlockType.TableData); 675 dst.put("<td>"); 676 writeMarkdownEscaped(dst, b.text[0], links); 677 dst.put("</td>"); 678 } 679 dst.put("</tr>\n"); 680 } 681 } 682 dst.put("</table>\n"); 683 break; 684 case BlockType.TableRow: 685 case BlockType.TableData: 686 case BlockType.TableHeader: 687 assert(0); 688 } 689 } 690 691 void writeMarkdownEscaped(R)(ref R dst, 692 ref const Block block, 693 in LinkRef[string] links) { 694 auto lines = cast(string[]) block.text; 695 auto text = this.flags & MarkdownFlags.keepLineBreaks 696 ? lines.join("<br>") : lines.join("\n"); 697 writeMarkdownEscaped(dst, text, links); 698 if (lines.length) { 699 dst.put("\n"); 700 } 701 } 702 703 void writeMarkdownEscaped(R)(ref R dst, 704 string ln, 705 in LinkRef[string] linkrefs) { 706 string filterLink(string lnk, bool isImage) { 707 return this.urlFilter ? this.urlFilter(lnk, isImage) : lnk; 708 } 709 710 bool br = ln.endsWith(" "); 711 while (ln.length > 0) { 712 switch (ln[0]) { 713 default: 714 dst.put(ln[0]); 715 ln = ln[1 .. $]; 716 break; 717 case '\\': 718 if (ln.length >= 2) { 719 switch (ln[1]) { 720 default: 721 dst.put(ln[0 .. 2]); 722 ln = ln[2 .. $]; 723 break; 724 case '\'', '`', '*', '_', '{', '}', '[', ']', 725 '(', ')', '#', '+', '-', '.', '!': 726 dst.put(ln[1]); 727 ln = ln[2 .. $]; 728 break; 729 } 730 } else { 731 dst.put(ln[0]); 732 ln = ln[1 .. $]; 733 } 734 break; 735 case '_': 736 if (this.flags & MarkdownFlags.disableUnderscoreEmphasis) { 737 dst.put(ln[0]); 738 ln = ln[1 .. $]; 739 break; 740 } 741 goto case; 742 case '*': 743 string text; 744 if (auto em = parseEmphasis(ln, text)) { 745 if (em == 1) { 746 dst.put("<em>"); 747 } else if (em == 2) { 748 dst.put("<strong>"); 749 } else { 750 dst.put("<strong><em>"); 751 } 752 filterHTMLEscape( 753 dst, text, HTMLEscapeFlags.escapeMinimal 754 ); 755 if (em == 1) { 756 dst.put("</em>"); 757 } else if (em == 2) { 758 dst.put("</strong>"); 759 } else { 760 dst.put("</strong></em>"); 761 } 762 } else { 763 dst.put(ln[0]); 764 ln = ln[1 .. $]; 765 } 766 break; 767 case '`': 768 string code; 769 if (parseInlineCode(ln, code)) { 770 dst.put("<code class=\"prettyprint\">"); 771 if (this.processCode is null) { 772 filterHTMLEscape( 773 dst, code, HTMLEscapeFlags.escapeMinimal 774 ); 775 } else { 776 auto temp = appender!string(); 777 filterHTMLEscape( 778 temp, code, HTMLEscapeFlags.escapeMinimal 779 ); 780 dst.put(this.processCode(temp.data)); 781 } 782 dst.put("</code>"); 783 } else { 784 dst.put(ln[0]); 785 ln = ln[1 .. $]; 786 } 787 break; 788 case '[': 789 Link link; 790 if (parseLink(ln, link, linkrefs)) { 791 dst.put("<a href=\""); 792 filterHTMLAttribEscape(dst, filterLink(link.url, false)); 793 dst.put("\""); 794 if (link.title.length) { 795 dst.put(" title=\""); 796 filterHTMLAttribEscape(dst, link.title); 797 dst.put("\""); 798 } 799 dst.put(">"); 800 writeMarkdownEscaped(dst, link.text, linkrefs); 801 dst.put("</a>"); 802 } else { 803 dst.put(ln[0]); 804 ln = ln[1 .. $]; 805 } 806 break; 807 case '!': 808 Link link; 809 if (parseLink(ln, link, linkrefs)) { 810 dst.put("<img src=\""); 811 filterHTMLAttribEscape(dst, filterLink(link.url, true)); 812 dst.put("\" alt=\""); 813 filterHTMLAttribEscape(dst, link.text); 814 dst.put("\""); 815 if (link.title.length) { 816 dst.put(" title=\""); 817 filterHTMLAttribEscape(dst, link.title); 818 dst.put("\""); 819 } 820 dst.put(">"); 821 } else if (ln.length >= 2) { 822 dst.put(ln[0 .. 2]); 823 ln = ln[2 .. $]; 824 } else { 825 dst.put(ln[0]); 826 ln = ln[1 .. $]; 827 } 828 break; 829 case '>': 830 if (this.flags & MarkdownFlags.noInlineHtml) { 831 dst.put(">"); 832 } else 833 dst.put(ln[0]); 834 ln = ln[1 .. $]; 835 break; 836 case '<': 837 string url; 838 if (parseAutoLink(ln, url)) { 839 bool isEmail = url.startsWith("mailto:"); 840 dst.put("<a href=\""); 841 if (isEmail) { 842 filterHTMLAllEscape(dst, url); 843 } else { 844 filterHTMLAttribEscape(dst, filterLink(url, false)); 845 } 846 dst.put("\">"); 847 if (isEmail) { 848 filterHTMLAllEscape(dst, url[7 .. $]); 849 } else { 850 filterHTMLEscape( 851 dst, url, HTMLEscapeFlags.escapeMinimal 852 ); 853 } 854 dst.put("</a>"); 855 } else { 856 if (ln.startsWith("<br>")) { 857 // always support line breaks, 858 // since we embed them here ourselves! 859 dst.put("<br/>"); 860 ln = ln[4 .. $]; 861 } else if (ln.startsWith("<br/>")) { 862 dst.put("<br/>"); 863 ln = ln[5 .. $]; 864 } else { 865 if (this.flags & MarkdownFlags.noInlineHtml) { 866 dst.put("<"); 867 } else { 868 dst.put(ln[0]); 869 } 870 ln = ln[1 .. $]; 871 } 872 } 873 break; 874 } 875 } 876 if (br) { 877 dst.put("<br/>"); 878 } 879 } 880 881 string markdownText; 882 883 /// Controls the capabilities of the parser. 884 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 885 886 /// Heading tags will start at this level. 887 size_t headingBaseLevel = 1; 888 889 /// Called for every link/image URL to perform arbitrary transformations. 890 UrlFilterFn urlFilter; 891 892 /*************************************************************************** 893 * An optional delegate to post-process code blocks and inline code. 894 * Useful to e.g. add code highlighting. 895 */ 896 ProcessCodeFn processCode = null; 897 } 898 899 // unittest { 900 // auto text = 901 // `======= 902 // Heading 903 // ======= 904 905 // **bold** *italic* 906 907 // List: 908 909 // * a 910 // * b 911 // * c 912 // `; 913 914 // writeln("~~~~~~~~~~~"); 915 // writeln(text); 916 // writeln("~~~~~~~~~~~"); 917 // writeln(convertMarkdownToHTML(text)); 918 // } 919 920 unittest { 921 auto source = 922 `Merged prototype. 923 The prototype is not locked, allowing to add more components. 924 To be used it must be locked by calling EntityPrototype.lockAndTrimMemory().`; 925 auto expected = 926 `<p>Merged prototype. 927 The prototype is not locked, allowing to add more components. 928 To be used it must be locked by calling EntityPrototype.lockAndTrimMemory(). 929 </p> 930 `; 931 string result = convertMarkdownToHTML(source); 932 assert(result == expected); 933 } 934 935 unittest { 936 auto source = `*stars* under_score_s`; 937 auto expectedUnderscores = `<p><em>stars</em> under<em>score</em>s 938 </p> 939 `; 940 auto expectedNoUnderscores = `<p><em>stars</em> under_score_s 941 </p> 942 `; 943 944 string resultUnderscores = convertMarkdownToHTML(source); 945 string resultNoUnderscores = convertMarkdownToHTML( 946 source, MarkdownFlags.disableUnderscoreEmphasis 947 ); 948 949 assert( 950 resultUnderscores == expectedUnderscores, 951 "'%s' != '%s'".format(resultUnderscores, expectedUnderscores) 952 ); 953 assert( 954 resultNoUnderscores == expectedNoUnderscores, 955 "'%s' != '%s'".format(resultNoUnderscores, expectedNoUnderscores) 956 ); 957 } 958 959 // Unittest for code post-processing 960 unittest { 961 auto text = 962 "`inline code`" ~ ` 963 block: 964 965 code block 966 `; 967 auto expected = 968 `<p><code class="prettyprint">AAAAAAAAAAA</code> 969 block: 970 </p> 971 <pre class="prettyprint"><code>AAAAAAAAAA</code></pre>`; 972 973 string processCode(string input) @safe nothrow { 974 import std.exception : assumeWontThrow; 975 976 // ignore newlines generated by code block processing 977 input = input.filter!(c => c != '\n') 978 .array 979 .to!string 980 .assumeWontThrow; 981 return 'A'.repeat(input.length).array.to!string.assumeWontThrow; 982 } 983 984 auto settings = new MarkdownSettings; 985 settings.processCode = &processCode; 986 auto result = convertMarkdownToHTML(text, settings); 987 988 auto err = format!"Unexpected code processing result:\n%s\nExpected:\n%s"( 989 result, expected 990 ); 991 assert(result == expected, err); 992 } 993 994 struct Section { 995 size_t headingLevel; 996 string caption; 997 string anchor; 998 Section[] subSections; 999 } 1000 1001 private { 1002 immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; 1003 } 1004 1005 private enum IndentType { 1006 White, 1007 Quote 1008 } 1009 1010 1011 private struct Block { 1012 BlockType type; 1013 string[] text; 1014 Block[] blocks; 1015 size_t headerLevel; 1016 1017 // A human-readable toString for debugging. 1018 string toString() { 1019 return toStringNested; 1020 } 1021 1022 // toString implementation; capable of indenting nested blocks. 1023 string toStringNested(uint depth = 0) { 1024 string indent = " ".repeat(depth * 2).joiner.array.to!string; 1025 return indent ~ 1026 "%s\n".format(type) ~ 1027 indent ~ "%s\n".format(text) ~ 1028 blocks 1029 .map!((ref b) => b.toStringNested(depth + 1)) 1030 .joiner 1031 .array 1032 .to!string ~ 1033 indent ~ 1034 "%s\n".format(headerLevel); 1035 } 1036 } 1037 1038 1039 private string[] skipText(ref Line[] lines, IndentType[] indent) 1040 pure @safe { 1041 static bool matchesIndent(IndentType[] indent, IndentType[] baseIndent) { 1042 // Any *plain* line with a higher indent should still be a part of 1043 // a paragraph read by skipText(). Returning false here resulted in 1044 // text such as: 1045 // --- 1046 // First line 1047 // Second line 1048 // --- 1049 // being interpreted as a paragraph followed by a code block, even though 1050 // other Markdown processors would interpret it as a single paragraph. 1051 1052 // if (indent.length > baseIndent.length ) return false; 1053 if (indent.length > baseIndent.length) { 1054 return true; 1055 } 1056 if (indent != baseIndent[0 .. indent.length]) { 1057 return false; 1058 } 1059 sizediff_t qidx = -1; 1060 foreach_reverse (i, tp; baseIndent) { 1061 if (tp == IndentType.Quote) { 1062 qidx = i; 1063 break; 1064 } 1065 } 1066 if (qidx >= 0) { 1067 qidx = baseIndent.length - 1 - qidx; 1068 if (indent.length <= qidx) { 1069 return false; 1070 } 1071 } 1072 return true; 1073 } 1074 1075 string[] ret; 1076 1077 while (true) { 1078 ret ~= lines.front.unindent( 1079 min(indent.length, lines.front.indent.length) 1080 ); 1081 lines.popFront(); 1082 1083 if (lines.empty || 1084 !matchesIndent(lines.front.indent, indent) || 1085 lines.front.type != LineType.Plain 1086 ) { 1087 return ret; 1088 } 1089 } 1090 } 1091 1092 1093 private Block splitTableRow(BlockType dataType = BlockType.TableData)(Line line) 1094 pure @safe { 1095 static assert( 1096 dataType == BlockType.TableHeader || dataType == BlockType.TableData 1097 ); 1098 1099 string ln = line.text.strip(); 1100 immutable size_t b = (ln[0 .. 2] == "| ") ? 2 : 0; 1101 immutable size_t e = (ln[($ - 2) .. $] == " |") ? (ln.length - 2) : ln.length; 1102 Block ret; 1103 ret.type = BlockType.TableRow; 1104 foreach (txt; ln[b .. e].split(" | ")) { 1105 Block d; 1106 d.text = [txt.strip(" ")]; 1107 d.type = dataType; 1108 ret.blocks ~= d; 1109 } 1110 return ret; 1111 } 1112 1113 private void writeBlock(R)(ref R dst, 1114 ref const Block block, 1115 LinkRef[string] links, 1116 scope MarkdownSettings settings) { 1117 final switch (block.type) { 1118 case BlockType.Plain: 1119 foreach (ln; block.text) { 1120 dst.put(ln); 1121 dst.put("\n"); 1122 } 1123 foreach (b; block.blocks) { 1124 writeBlock(dst, b, links, settings); 1125 } 1126 break; 1127 case BlockType.Text: 1128 writeMarkdownEscaped(dst, block, links, settings); 1129 foreach (b; block.blocks) { 1130 writeBlock(dst, b, links, settings); 1131 } 1132 break; 1133 case BlockType.Paragraph: 1134 assert(block.blocks.length == 0); 1135 dst.put("<p>"); 1136 writeMarkdownEscaped(dst, block, links, settings); 1137 dst.put("</p>\n"); 1138 break; 1139 case BlockType.Header: 1140 assert(block.blocks.length == 0); 1141 auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel - 1 : 0); 1142 dst.formattedWrite("<h%s id=\"%s\">", hlvl, block.text[0].asSlug); 1143 assert(block.text.length == 1); 1144 writeMarkdownEscaped(dst, block.text[0], links, settings); 1145 dst.formattedWrite("</h%s>\n", hlvl); 1146 break; 1147 case BlockType.OList: 1148 dst.put("<ol>\n"); 1149 foreach (b; block.blocks) { 1150 writeBlock(dst, b, links, settings); 1151 } 1152 dst.put("</ol>\n"); 1153 break; 1154 case BlockType.UList: 1155 dst.put("<ul>\n"); 1156 foreach (b; block.blocks) { 1157 writeBlock(dst, b, links, settings); 1158 } 1159 dst.put("</ul>\n"); 1160 break; 1161 case BlockType.ListItem: 1162 dst.put("<li>"); 1163 writeMarkdownEscaped(dst, block, links, settings); 1164 foreach (b; block.blocks) { 1165 writeBlock(dst, b, links, settings); 1166 } 1167 dst.put("</li>\n"); 1168 break; 1169 case BlockType.Code: 1170 assert(block.blocks.length == 0); 1171 dst.put("<pre class=\"prettyprint\"><code>"); 1172 if (settings.processCode is null) { 1173 foreach (ln; block.text) { 1174 filterHTMLEscape(dst, ln); 1175 dst.put("\n"); 1176 } 1177 } else { 1178 auto temp = appender!string(); 1179 foreach (ln; block.text) { 1180 filterHTMLEscape(temp, ln); 1181 temp.put("\n"); 1182 } 1183 dst.put(settings.processCode(temp.data)); 1184 } 1185 dst.put("</code></pre>"); 1186 break; 1187 case BlockType.Quote: 1188 dst.put("<blockquote>"); 1189 writeMarkdownEscaped(dst, block, links, settings); 1190 foreach (b; block.blocks) 1191 writeBlock(dst, b, links, settings); 1192 dst.put("</blockquote>\n"); 1193 break; 1194 case BlockType.Table: 1195 assert(block.blocks.length > 0); 1196 assert(block.blocks[0].type == BlockType.TableRow); 1197 dst.put("<table>\n<tr>"); 1198 foreach (b; block.blocks[0].blocks) { 1199 assert(b.type == BlockType.TableHeader); 1200 dst.put("<th>"); 1201 writeMarkdownEscaped(dst, b.text[0], links, settings); 1202 dst.put("</th>"); 1203 } 1204 dst.put("</tr>\n"); 1205 if (block.blocks.length > 1) { 1206 foreach (row; block.blocks[1 .. $]) { 1207 assert(row.type == BlockType.TableRow); 1208 dst.put("<tr>"); 1209 foreach (b; row.blocks) { 1210 assert(b.type == BlockType.TableData); 1211 dst.put("<td>"); 1212 writeMarkdownEscaped(dst, b.text[0], links, settings); 1213 dst.put("</td>"); 1214 } 1215 dst.put("</tr>\n"); 1216 } 1217 } 1218 dst.put("</table>\n"); 1219 break; 1220 case BlockType.TableRow: 1221 case BlockType.TableData: 1222 case BlockType.TableHeader: 1223 assert(0); 1224 } 1225 } 1226 1227 private void writeMarkdownEscaped(R)(ref R dst, 1228 ref const Block block, 1229 in LinkRef[string] links, 1230 scope MarkdownSettings settings) { 1231 auto lines = cast(string[]) block.text; 1232 auto text = settings.flags & MarkdownFlags.keepLineBreaks 1233 ? lines.join("<br>") : lines.join("\n"); 1234 writeMarkdownEscaped(dst, text, links, settings); 1235 if (lines.length) { 1236 dst.put("\n"); 1237 } 1238 } 1239 1240 private void writeMarkdownEscaped(R)(ref R dst, 1241 string ln, 1242 in LinkRef[string] linkrefs, 1243 scope MarkdownSettings settings) { 1244 string filterLink(string lnk, bool isImage) { 1245 return settings.urlFilter ? settings.urlFilter(lnk, isImage) : lnk; 1246 } 1247 1248 bool br = ln.endsWith(" "); 1249 while (ln.length > 0) { 1250 switch (ln[0]) { 1251 default: 1252 dst.put(ln[0]); 1253 ln = ln[1 .. $]; 1254 break; 1255 case '\\': 1256 if (ln.length >= 2) { 1257 switch (ln[1]) { 1258 default: 1259 dst.put(ln[0 .. 2]); 1260 ln = ln[2 .. $]; 1261 break; 1262 case '\'', '`', '*', '_', '{', '}', '[', ']', 1263 '(', ')', '#', '+', '-', '.', '!': 1264 dst.put(ln[1]); 1265 ln = ln[2 .. $]; 1266 break; 1267 } 1268 } else { 1269 dst.put(ln[0]); 1270 ln = ln[1 .. $]; 1271 } 1272 break; 1273 case '_': 1274 if (settings.flags & MarkdownFlags.disableUnderscoreEmphasis) { 1275 dst.put(ln[0]); 1276 ln = ln[1 .. $]; 1277 break; 1278 } 1279 goto case; 1280 case '*': 1281 string text; 1282 if (auto em = parseEmphasis(ln, text)) { 1283 dst.put(em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>"); 1284 filterHTMLEscape(dst, text, HTMLEscapeFlags.escapeMinimal); 1285 dst.put(em == 1 ? "</em>" : em == 2 ? "</strong>" : "</em></strong>"); 1286 } else { 1287 dst.put(ln[0]); 1288 ln = ln[1 .. $]; 1289 } 1290 break; 1291 case '`': 1292 string code; 1293 if (parseInlineCode(ln, code)) { 1294 dst.put("<code class=\"prettyprint\">"); 1295 if (settings.processCode is null) { 1296 filterHTMLEscape( 1297 dst, code, HTMLEscapeFlags.escapeMinimal 1298 ); 1299 } else { 1300 auto temp = appender!string(); 1301 filterHTMLEscape( 1302 temp, code, HTMLEscapeFlags.escapeMinimal 1303 ); 1304 dst.put(settings.processCode(temp.data)); 1305 } 1306 dst.put("</code>"); 1307 } else { 1308 dst.put(ln[0]); 1309 ln = ln[1 .. $]; 1310 } 1311 break; 1312 case '[': 1313 Link link; 1314 if (parseLink(ln, link, linkrefs)) { 1315 dst.put("<a href=\""); 1316 filterHTMLAttribEscape(dst, filterLink(link.url, false)); 1317 dst.put("\""); 1318 if (link.title.length) { 1319 dst.put(" title=\""); 1320 filterHTMLAttribEscape(dst, link.title); 1321 dst.put("\""); 1322 } 1323 dst.put(">"); 1324 writeMarkdownEscaped(dst, link.text, linkrefs, settings); 1325 dst.put("</a>"); 1326 } else { 1327 dst.put(ln[0]); 1328 ln = ln[1 .. $]; 1329 } 1330 break; 1331 case '!': 1332 Link link; 1333 if (parseLink(ln, link, linkrefs)) { 1334 dst.put("<img src=\""); 1335 filterHTMLAttribEscape(dst, filterLink(link.url, true)); 1336 dst.put("\" alt=\""); 1337 filterHTMLAttribEscape(dst, link.text); 1338 dst.put("\""); 1339 if (link.title.length) { 1340 dst.put(" title=\""); 1341 filterHTMLAttribEscape(dst, link.title); 1342 dst.put("\""); 1343 } 1344 dst.put(">"); 1345 } else if (ln.length >= 2) { 1346 dst.put(ln[0 .. 2]); 1347 ln = ln[2 .. $]; 1348 } else { 1349 dst.put(ln[0]); 1350 ln = ln[1 .. $]; 1351 } 1352 break; 1353 case '>': 1354 if (settings.flags & MarkdownFlags.noInlineHtml) { 1355 dst.put(">"); 1356 } else 1357 dst.put(ln[0]); 1358 ln = ln[1 .. $]; 1359 break; 1360 case '<': 1361 string url; 1362 if (parseAutoLink(ln, url)) { 1363 bool isEmail = url.startsWith("mailto:"); 1364 dst.put("<a href=\""); 1365 if (isEmail) { 1366 filterHTMLAllEscape(dst, url); 1367 } else { 1368 filterHTMLAttribEscape(dst, filterLink(url, false)); 1369 } 1370 dst.put("\">"); 1371 if (isEmail) { 1372 filterHTMLAllEscape(dst, url[7 .. $]); 1373 } else { 1374 filterHTMLEscape( 1375 dst, url, HTMLEscapeFlags.escapeMinimal 1376 ); 1377 } 1378 dst.put("</a>"); 1379 } else { 1380 if (ln.startsWith("<br>")) { 1381 // always support line breaks, 1382 // since we embed them here ourselves! 1383 dst.put("<br/>"); 1384 ln = ln[4 .. $]; 1385 } else if (ln.startsWith("<br/>")) { 1386 dst.put("<br/>"); 1387 ln = ln[5 .. $]; 1388 } else { 1389 if (settings.flags & MarkdownFlags.noInlineHtml) { 1390 dst.put("<"); 1391 } else { 1392 dst.put(ln[0]); 1393 } 1394 ln = ln[1 .. $]; 1395 } 1396 } 1397 break; 1398 } 1399 } 1400 if (br) { 1401 dst.put("<br/>"); 1402 } 1403 } 1404 1405 1406 private bool isLineBlank(string ln) 1407 pure @safe { 1408 return allOf(ln, " \t"); 1409 } 1410 1411 1412 private bool isSetextHeaderLine(string ln, char subHeaderChar) pure @safe { 1413 ln = stripLeft(ln); 1414 if (ln.length < 1) { 1415 return false; 1416 } 1417 if (ln[0] == subHeaderChar) { 1418 while (!ln.empty && ln.front == subHeaderChar) { 1419 ln.popFront(); 1420 } 1421 return allOf(ln, " \t"); 1422 } 1423 return false; 1424 } 1425 1426 1427 private bool isAtxHeaderLine(string ln) pure @safe { 1428 ln = stripLeft(ln); 1429 size_t i = 0; 1430 while (i < ln.length && ln[i] == '#') { 1431 i++; 1432 } 1433 if (i < 1 || i > 6 || i >= ln.length) { 1434 return false; 1435 } 1436 return ln[i] == ' '; 1437 } 1438 1439 1440 private bool isHlineLine(string ln) pure @safe { 1441 if (allOf(ln, " -") && count(ln, '-') >= 3) { 1442 return true; 1443 } else if (allOf(ln, " *") && count(ln, '*') >= 3) { 1444 return true; 1445 } else if (allOf(ln, " _") && count(ln, '_') >= 3) { 1446 return true; 1447 } 1448 return false; 1449 } 1450 1451 1452 private bool isQuoteLine(string ln) pure @safe { 1453 return ln.stripLeft().startsWith(">"); 1454 } 1455 1456 1457 private size_t getQuoteLevel(string ln) pure @safe { 1458 size_t level = 0; 1459 ln = stripLeft(ln); 1460 while (ln.length > 0 && ln[0] == '>') { 1461 level++; 1462 ln = stripLeft(ln[1 .. $]); 1463 } 1464 return level; 1465 } 1466 1467 1468 private bool isUListLine(string ln) pure @safe { 1469 ln = stripLeft(ln); 1470 if (ln.length < 2) { 1471 return false; 1472 } 1473 if (!canFind("*+-", ln[0])) { 1474 return false; 1475 } 1476 if (ln[1] != ' ' && ln[1] != '\t') { 1477 return false; 1478 } 1479 return true; 1480 } 1481 1482 1483 private bool isOListLine(string ln) pure @safe { 1484 ln = stripLeft(ln); 1485 if (ln.length < 1) { 1486 return false; 1487 } 1488 if (ln[0] < '0' || ln[0] > '9') { 1489 return false; 1490 } 1491 ln = ln[1 .. $]; 1492 while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') { 1493 ln = ln[1 .. $]; 1494 } 1495 if (ln.length < 2) { 1496 return false; 1497 } 1498 if (ln[0] != '.') { 1499 return false; 1500 } 1501 if (ln[1] != ' ' && ln[1] != '\t') { 1502 return false; 1503 } 1504 return true; 1505 } 1506 1507 1508 private bool isTableRowLine(string ln) pure @safe { 1509 return 1510 ln.indexOf(" | ") >= 0 && 1511 !ln.isOListLine && 1512 !ln.isUListLine && 1513 !ln.isAtxHeaderLine; 1514 } 1515 1516 1517 private string removeListPrefix(string str, LineType tp) pure @safe { 1518 switch (tp) { 1519 default: 1520 assert(false); 1521 case LineType.OList: // skip bullets and output using normal escaping 1522 auto idx = str.indexOfCT('.'); 1523 assert(idx > 0); 1524 return str[idx + 1 .. $].stripLeft(); 1525 case LineType.UList: 1526 return stripLeft(str.stripLeft()[1 .. $]); 1527 } 1528 } 1529 1530 1531 private auto parseHtmlBlockLine(string ln) pure @safe { 1532 struct HtmlBlockInfo { 1533 bool isHtmlBlock; 1534 string tagName; 1535 bool open; 1536 } 1537 1538 HtmlBlockInfo ret; 1539 ret.isHtmlBlock = false; 1540 ret.open = true; 1541 1542 ln = strip(ln); 1543 if (ln.length < 3) { 1544 return ret; 1545 } 1546 if (ln[0] != '<') { 1547 return ret; 1548 } 1549 if (ln[1] == '/') { 1550 ret.open = false; 1551 ln = ln[1 .. $]; 1552 } 1553 if (!std.ascii.isAlpha(ln[1])) { 1554 return ret; 1555 } 1556 ln = ln[1 .. $]; 1557 size_t idx = 0; 1558 while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>') { 1559 idx++; 1560 } 1561 ret.tagName = ln[0 .. idx]; 1562 ln = ln[idx .. $]; 1563 1564 auto eidx = ln.indexOf('>'); 1565 if (eidx < 0) { 1566 return ret; 1567 } 1568 if (eidx != ln.length-1) { 1569 return ret; 1570 } 1571 1572 if (!s_blockTags.canFind(ret.tagName)) { 1573 return ret; 1574 } 1575 1576 ret.isHtmlBlock = true; 1577 return ret; 1578 } 1579 1580 1581 private bool isHtmlBlockLine(string ln) pure @safe { 1582 auto bi = parseHtmlBlockLine(ln); 1583 return bi.isHtmlBlock && bi.open; 1584 } 1585 1586 1587 private bool isHtmlBlockCloseLine(string ln) pure @safe { 1588 auto bi = parseHtmlBlockLine(ln); 1589 return bi.isHtmlBlock && !bi.open; 1590 } 1591 1592 1593 private bool isCodeBlockDelimiter(string ln) pure @safe { 1594 return ln.startsWith("```"); 1595 } 1596 1597 // private string getHtmlTagName(string ln) pure @safe { 1598 // return parseHtmlBlockLine(ln).tagName; 1599 // } 1600 1601 private bool isLineIndented(string ln) pure @safe { 1602 return ln.startsWith("\t") || ln.startsWith(" "); 1603 } 1604 1605 // private string unindentLine(string ln) pure @safe { 1606 // if (ln.startsWith("\t")) return ln[1 .. $]; 1607 // if (ln.startsWith(" ")) return ln[4 .. $]; 1608 // assert(false); 1609 // } 1610 1611 private int parseEmphasis(ref string str, ref string text) pure @safe { 1612 string pstr = str; 1613 if (pstr.length < 3) 1614 return false; 1615 1616 string ctag; 1617 if (pstr.startsWith("***")) 1618 ctag = "***"; 1619 else if (pstr.startsWith("**")) 1620 ctag = "**"; 1621 else if (pstr.startsWith("*")) 1622 ctag = "*"; 1623 else if (pstr.startsWith("___")) 1624 ctag = "___"; 1625 else if (pstr.startsWith("__")) 1626 ctag = "__"; 1627 else if (pstr.startsWith("_")) 1628 ctag = "_"; 1629 else 1630 return false; 1631 1632 pstr = pstr[ctag.length .. $]; 1633 1634 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1635 if (cidx < 1) 1636 return false; 1637 1638 text = pstr[0 .. cidx]; 1639 1640 str = pstr[cidx + ctag.length .. $]; 1641 return cast(int) ctag.length; 1642 } 1643 1644 1645 private bool parseInlineCode(ref string str, ref string code) pure @safe { 1646 string pstr = str; 1647 if (pstr.length < 3) 1648 return false; 1649 string ctag; 1650 if (pstr.startsWith("``")) 1651 ctag = "``"; 1652 else if (pstr.startsWith("`")) 1653 ctag = "`"; 1654 else 1655 return false; 1656 pstr = pstr[ctag.length .. $]; 1657 1658 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1659 if (cidx < 1) 1660 return false; 1661 1662 code = pstr[0 .. cidx]; 1663 str = pstr[cidx + ctag.length .. $]; 1664 return true; 1665 } 1666 1667 1668 private bool parseLink( 1669 ref string str, ref Link dst, in LinkRef[string] linkrefs 1670 ) pure @safe { 1671 string pstr = str; 1672 if (pstr.length < 3) 1673 return false; 1674 // ignore img-link prefix 1675 if (pstr[0] == '!') 1676 pstr = pstr[1 .. $]; 1677 1678 // parse the text part [text] 1679 if (pstr[0] != '[') 1680 return false; 1681 auto cidx = pstr.matchBracket(); 1682 if (cidx < 1) 1683 return false; 1684 string refid; 1685 dst.text = pstr[1 .. cidx]; 1686 pstr = pstr[cidx + 1 .. $]; 1687 1688 // parse either (link '['"title"']') or '[' ']'[refid] 1689 if (pstr.length < 2) 1690 return false; 1691 if (pstr[0] == '(') { 1692 cidx = pstr.matchBracket(); 1693 if (cidx < 1) 1694 return false; 1695 auto inner = pstr[1 .. cidx]; 1696 immutable qidx = inner.indexOfCT('"'); 1697 if (qidx > 1 && std.ascii.isWhite(inner[qidx - 1])) { 1698 dst.url = inner[0 .. qidx].stripRight(); 1699 immutable len = inner[qidx .. $].lastIndexOf('"'); 1700 if (len == 0) 1701 return false; 1702 assert(len > 0); 1703 dst.title = inner[qidx + 1 .. qidx + len]; 1704 } else { 1705 dst.url = inner.stripRight(); 1706 dst.title = null; 1707 } 1708 if (dst.url.startsWith("<") && dst.url.endsWith(">")) 1709 dst.url = dst.url[1 .. $ - 1]; 1710 pstr = pstr[cidx + 1 .. $]; 1711 } else { 1712 if (pstr[0] == ' ') 1713 pstr = pstr[1 .. $]; 1714 if (pstr[0] != '[') 1715 return false; 1716 pstr = pstr[1 .. $]; 1717 cidx = pstr.indexOfCT(']'); 1718 if (cidx < 0) 1719 return false; 1720 if (cidx == 0) 1721 refid = dst.text; 1722 else 1723 refid = pstr[0 .. cidx]; 1724 pstr = pstr[cidx + 1 .. $]; 1725 } 1726 1727 if (refid.length > 0) { 1728 auto pr = toLower(refid) in linkrefs; 1729 if (!pr) { 1730 // debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid); 1731 return false; 1732 } 1733 dst.url = pr.url; 1734 dst.title = pr.title; 1735 } 1736 1737 str = pstr; 1738 return true; 1739 } 1740 1741 1742 /* UNITTESTS */ 1743 1744 1745 @safe unittest { 1746 static void testLink(string s, Link exp, in LinkRef[string] refs) { 1747 Link link; 1748 assert(parseLink(s, link, refs), s); 1749 assert(link == exp); 1750 } 1751 1752 LinkRef[string] refs; 1753 refs["ref"] = LinkRef("ref", "target", "title"); 1754 1755 testLink(`[link](target)`, Link("link", "target"), null); 1756 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1757 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1758 testLink(`[link](target "title" )`, Link("link", "target", "title"), null); 1759 1760 testLink(`[link](target)`, Link("link", "target"), null); 1761 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1762 1763 testLink(`[link][ref]`, Link("link", "target", "title"), refs); 1764 testLink(`[ref][]`, Link("ref", "target", "title"), refs); 1765 1766 testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); 1767 testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); 1768 1769 testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); 1770 testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); 1771 1772 testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); 1773 testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); 1774 1775 testLink(`[link](target "")`, Link("link", "target", ""), null); 1776 testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); 1777 1778 testLink(`[link](<target>)`, Link("link", "target"), null); 1779 1780 auto failing = [ 1781 `text`, `[link](target`, `[link]target)`, `[link]`, 1782 `[link(target)`, `link](target)`, `[link] (target)`, 1783 `[link][noref]`, `[noref][]` 1784 ]; 1785 Link link; 1786 foreach (s; failing) 1787 assert(!parseLink(s, link, refs), s); 1788 } 1789 1790 private bool parseAutoLink(ref string str, ref string url) 1791 pure @safe { 1792 string pstr = str; 1793 if (pstr.length < 3) 1794 return false; 1795 if (pstr[0] != '<') 1796 return false; 1797 pstr = pstr[1 .. $]; 1798 auto cidx = pstr.indexOf('>'); 1799 if (cidx < 0) 1800 return false; 1801 url = pstr[0 .. cidx]; 1802 if (anyOf(url, " \t")) 1803 return false; 1804 if (!anyOf(url, ":@")) 1805 return false; 1806 str = pstr[cidx + 1 .. $]; 1807 if (url.indexOf('@') > 0) 1808 url = "mailto:" ~ url; 1809 return true; 1810 } 1811 1812 /******************************************************************************* 1813 * Generates an identifier suitable to use as within a URL. 1814 * 1815 * The resulting string will contain only ASCII lower case alphabetic or 1816 * numeric characters, as well as dashes (-). Every sequence of 1817 * non-alphanumeric characters will be replaced by a single dash. No dashes 1818 * will be at either the front or the back of the result string. 1819 */ 1820 auto asSlug(R)(R text) if (isInputRange!R && is(typeof(R.init.front) == dchar)) { 1821 static struct SlugRange { 1822 private { 1823 R _input; 1824 bool _dash; 1825 } 1826 1827 this(R input) { 1828 _input = input; 1829 skipNonAlphaNum(); 1830 } 1831 1832 @property bool empty() const { 1833 return _dash ? false : _input.empty; 1834 } 1835 1836 @property char front() const { 1837 if (_dash) 1838 return '-'; 1839 1840 char r = cast(char) _input.front; 1841 if (r >= 'A' && r <= 'Z') 1842 return cast(char)(r + ('a' - 'A')); 1843 return r; 1844 } 1845 1846 void popFront() { 1847 if (_dash) { 1848 _dash = false; 1849 return; 1850 } 1851 1852 _input.popFront(); 1853 auto na = skipNonAlphaNum(); 1854 if (na && !_input.empty) 1855 _dash = true; 1856 } 1857 1858 private bool skipNonAlphaNum() { 1859 bool have_skipped = false; 1860 while (!_input.empty) { 1861 switch (_input.front) { 1862 default: 1863 _input.popFront(); 1864 have_skipped = true; 1865 break; 1866 case 'a': .. case 'z': 1867 case 'A': .. case 'Z': 1868 case '0': .. case '9': 1869 return have_skipped; 1870 } 1871 } 1872 return have_skipped; 1873 } 1874 } 1875 1876 return SlugRange(text); 1877 } 1878 1879 unittest { 1880 import std.algorithm : equal; 1881 1882 assert("".asSlug.equal("")); 1883 assert(".,-".asSlug.equal("")); 1884 assert("abc".asSlug.equal("abc")); 1885 assert("aBc123".asSlug.equal("abc123")); 1886 assert("....aBc...123...".asSlug.equal("abc-123")); 1887 } 1888 1889 private struct Link { 1890 string text; 1891 string url; 1892 string title; 1893 } 1894 1895 @safe unittest { // alt and title attributes 1896 assert(convertMarkdownToHTML("") 1897 == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n"); 1898 assert(convertMarkdownToHTML("") 1899 == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n"); 1900 } 1901 1902 @safe unittest { // complex links 1903 assert(convertMarkdownToHTML("their [install\ninstructions](<http://www.brew.sh>) and") 1904 == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n"); 1905 assert(convertMarkdownToHTML("[](https://travis-ci.org/rejectedsoftware/vibe.d)") 1906 == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n"); 1907 } 1908 1909 @safe unittest { // check CTFE-ability 1910 enum res = convertMarkdownToHTML("### some markdown\n[foo][]\n[foo]: /bar"); 1911 assert( 1912 res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res); 1913 } 1914 1915 @safe unittest { // correct line breaks in restrictive mode 1916 auto res = convertMarkdownToHTML("hello\nworld", MarkdownFlags.forumDefault); 1917 assert(res == "<p>hello<br/>world\n</p>\n", res); 1918 } 1919 1920 /*@safe unittest { // code blocks and blockquotes 1921 assert(convertMarkdownToHTML("\tthis\n\tis\n\tcode") == 1922 "<pre><code>this\nis\ncode</code></pre>\n"); 1923 assert(convertMarkdownToHTML(" this\n is\n code") == 1924 "<pre><code>this\nis\ncode</code></pre>\n"); 1925 assert(convertMarkdownToHTML(" this\n is\n\tcode") == 1926 "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n"); 1927 assert(convertMarkdownToHTML("\tthis\n\n\tcode") == 1928 "<pre><code>this\n\ncode</code></pre>\n"); 1929 assert(convertMarkdownToHTML("\t> this") == 1930 "<pre><code>> this</code></pre>\n"); 1931 assert(convertMarkdownToHTML("> this") == 1932 "<blockquote><pre><code>this</code></pre></blockquote>\n"); 1933 assert(convertMarkdownToHTML("> this\n is code") == 1934 "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n"); 1935 }*/ 1936 1937 @safe unittest { // test simple border-less table 1938 auto res = convertMarkdownToHTML( 1939 "Col 1 | Col 2 | Col 3\n -- | -- | --\n val 1 | val 2 | val 3\n *val 4* | val 5 | value 6", 1940 MarkdownFlags.supportTables 1941 ); 1942 assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res); 1943 } 1944 1945 @safe unittest { // test simple border'ed table 1946 auto res = convertMarkdownToHTML( 1947 "| Col 1 | Col 2 | Col 3 |\n| -- | -- | -- |\n| val 1 | val 2 | val 3 |\n| *val 4* | val 5 | value 6 |", 1948 MarkdownFlags.supportTables 1949 ); 1950 assert(res == "<table>\n<tr><th>Col 1</th><th>Col 2</th><th>Col 3</th></tr>\n<tr><td>val 1</td><td>val 2</td><td>val 3</td></tr>\n<tr><td><em>val 4</em></td><td>val 5</td><td>value 6</td></tr>\n</table>\n", res); 1951 } 1952 1953 @safe unittest { 1954 string input = ` 1955 Table: 1956 1957 ID | Name | Address 1958 -- | ---- | --------- 1959 1 | Foo | Somewhere 1960 2 | Bar | Nowhere `; 1961 auto res = convertMarkdownToHTML(input, MarkdownFlags.supportTables); 1962 auto exp = "<p>Table:\n</p>\n<table>\n<tr><th>ID</th><th>Name</th><th>Address</th></tr>\n<tr><td>1</td><td>Foo</td><td>Somewhere</td></tr>\n<tr><td>2</td><td>Bar</td><td>Nowhere</td></tr>\n</table>\n"; 1963 assert(res == exp, res); 1964 } 1965 1966 package: 1967 1968 /// Function for work with HTML. 1969 1970 /******************************************************************************* 1971 * Writes the HTML escaped version of a given string to an output range. 1972 */ 1973 void filterHTMLEscape(R, S)(ref R dst, 1974 S str, 1975 HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline) 1976 if (isOutputRange!(R, dchar) && isInputRange!S) { 1977 for (; !str.empty; str.popFront()) { 1978 filterHTMLEscape(dst, str.front, flags); 1979 } 1980 } 1981 1982 /******************************************************************************* 1983 * Writes the HTML escaped version of a given string to an output range 1984 * (also escapes double quotes). 1985 */ 1986 void filterHTMLAttribEscape(R, S)(ref R dst, S str) 1987 if (isOutputRange!(R, dchar) && isInputRange!S) { 1988 for (; !str.empty; str.popFront()) { 1989 filterHTMLEscape( 1990 dst, 1991 str.front, 1992 HTMLEscapeFlags.escapeNewline | HTMLEscapeFlags.escapeQuotes 1993 ); 1994 } 1995 } 1996 1997 /******************************************************************************* 1998 * Writes the HTML escaped version of a given string to an output range 1999 * (escapes every character). 2000 */ 2001 void filterHTMLAllEscape(R, S)(ref R dst, S str) 2002 if (isOutputRange!(R, dchar) && isInputRange!S) { 2003 for (; !str.empty; str.popFront()) { 2004 dst.put("&#"); 2005 dst.put(to!string(cast(uint) str.front)); 2006 dst.put(';'); 2007 } 2008 } 2009 2010 /******************************************************************************* 2011 * Writes the HTML escaped version of a character to an output range. 2012 */ 2013 void filterHTMLEscape(R)(ref R dst, 2014 dchar ch, 2015 HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline) { 2016 switch (ch) { 2017 default: 2018 if (flags & HTMLEscapeFlags.escapeUnknown) { 2019 dst.put("&#"); 2020 dst.put(to!string(cast(uint) ch)); 2021 dst.put(';'); 2022 } else 2023 dst.put(ch); 2024 break; 2025 case '"': 2026 if (flags & HTMLEscapeFlags.escapeQuotes) 2027 dst.put("""); 2028 else 2029 dst.put('"'); 2030 break; 2031 case '\'': 2032 if (flags & HTMLEscapeFlags.escapeQuotes) 2033 dst.put("'"); 2034 else 2035 dst.put('\''); 2036 break; 2037 case '\r', '\n': 2038 if (flags & HTMLEscapeFlags.escapeNewline) { 2039 dst.put("&#"); 2040 dst.put(to!string(cast(uint) ch)); 2041 dst.put(';'); 2042 } else 2043 dst.put(ch); 2044 break; 2045 case 'a': .. case 'z': 2046 goto case; 2047 case 'A': .. case 'Z': 2048 goto case; 2049 case '0': .. case '9': 2050 goto case; 2051 case ' ', '\t', '-', '_', '.', ':', ',', ';', 2052 '#', '+', '*', '?', '=', '(', ')', '/', '!', 2053 '%', '{', '}', '[', ']', '`', '´', '$', '^', '~': 2054 dst.put(cast(char) ch); 2055 break; 2056 case '<': 2057 dst.put("<"); 2058 break; 2059 case '>': 2060 dst.put(">"); 2061 break; 2062 case '&': 2063 dst.put("&"); 2064 break; 2065 } 2066 } 2067 2068 /// Flags for HTML-escaping some symbols. 2069 enum HTMLEscapeFlags { 2070 escapeMinimal = 0, 2071 escapeQuotes = 1 << 0, 2072 escapeNewline = 1 << 1, 2073 escapeUnknown = 1 << 2 2074 } 2075 2076 /// Functions for work with string data 2077 2078 /******************************************************************************* 2079 * Checks if all characters in 'str' are contained in 'chars'. 2080 */ 2081 bool allOf(string str, string chars) 2082 @safe pure { 2083 foreach (dchar ch; str) { 2084 if (!chars.canFind(ch)) { 2085 return false; 2086 } 2087 } 2088 return true; 2089 } 2090 2091 ptrdiff_t indexOfCT(Char)(in Char[] s, dchar c, CaseSensitive cs = CaseSensitive.yes) 2092 @safe pure { 2093 if (__ctfe) { 2094 if (cs == CaseSensitive.yes) { 2095 foreach (i, dchar ch; s) { 2096 if (ch == c) { 2097 return i; 2098 } 2099 } 2100 } else { 2101 c = std.uni.toLower(c); 2102 foreach (i, dchar ch; s) { 2103 if (std.uni.toLower(ch) == c) { 2104 return i; 2105 } 2106 } 2107 } 2108 return -1; 2109 } 2110 return std..string.indexOf(s, c, cs); 2111 } 2112 2113 /******************************************************************************* 2114 * Checks if any character in 'str' is contained in 'chars'. 2115 */ 2116 bool anyOf(string str, string chars) 2117 @safe pure { 2118 foreach (ch; str) { 2119 if (chars.canFind(ch)) { 2120 return true; 2121 } 2122 } 2123 return false; 2124 } 2125 2126 /******************************************************************************* 2127 * Finds the closing bracket (works with any of '[', '$(LPAREN)', '<', '{'). 2128 * 2129 * Params: 2130 * str = input string 2131 * nested = whether to skip nested brackets 2132 * Returns: 2133 * The index of the closing bracket or -1 for unbalanced strings 2134 * and strings that don't start with a bracket. 2135 */ 2136 sizediff_t matchBracket(string str, bool nested = true) 2137 @safe pure nothrow { 2138 if (str.length < 2) 2139 return -1; 2140 2141 char open = str[0], close = void; 2142 switch (str[0]) { 2143 case '[': 2144 close = ']'; 2145 break; 2146 case '(': 2147 close = ')'; 2148 break; 2149 case '<': 2150 close = '>'; 2151 break; 2152 case '{': 2153 close = '}'; 2154 break; 2155 default: 2156 return -1; 2157 } 2158 2159 size_t level = 1; 2160 foreach (i, char c; str[1 .. $]) { 2161 if (nested && c == open) 2162 ++level; 2163 else if (c == close) 2164 --level; 2165 if (level == 0) 2166 return i + 1; 2167 } 2168 return -1; 2169 } 2170 2171 //////////////////////////////////////////////////////////////////////////////// 2172 //// DEPRECATED FUNCTIONS //// 2173 //////////////////////////////////////////////////////////////////////////////// 2174 2175 /******************************************************************************* 2176 * Returns the hierarchy of sections. 2177 */ 2178 Section[] getMarkdownOutline(string markdown_source, 2179 scope MarkdownSettings settings = null) { 2180 if (!settings) 2181 settings = new MarkdownSettings; 2182 auto all_lines = splitLines(markdown_source); 2183 auto lines = parseLines(all_lines, settings); 2184 Block root_block; 2185 parseBlocks(root_block, lines, null, settings); 2186 Section root; 2187 2188 foreach (ref sb; root_block.blocks) { 2189 if (sb.type != BlockType.Header) { 2190 continue; 2191 } 2192 auto s = &root; 2193 while (true) { 2194 if (s.subSections.length == 0) 2195 break; 2196 if (s.subSections[$ - 1].headingLevel >= sb.headerLevel) 2197 break; 2198 s = &s.subSections[$ - 1]; 2199 } 2200 s.subSections ~= Section( 2201 sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string 2202 ); 2203 } 2204 2205 return root.subSections; 2206 } 2207 /// 2208 unittest { 2209 auto mdText = "## first\n## second\n### third\n# fourth\n### fifth"; 2210 auto expected = [ 2211 Section(2, " first", "first"), 2212 Section(2, " second", "second", [Section(3, " third", "third")]), 2213 Section(1, " fourth", "fourth", [Section(3, " fifth", "fifth")]) 2214 ]; 2215 assert(getMarkdownOutline(mdText) == expected); 2216 } 2217 2218 private Line[] parseLines(ref string[] lines, scope MarkdownSettings settings) 2219 pure @safe { 2220 Line[] ret; 2221 char subHeaderChar = '-'; 2222 while (!lines.empty) { 2223 auto ln = lines.front; 2224 lines.popFront(); 2225 2226 Line lninfo; 2227 lninfo.text = ln; 2228 2229 void determineIndent() { 2230 while (ln.length > 0) { 2231 if (ln[0] == '\t') { 2232 lninfo.indent ~= IndentType.White; 2233 ln.popFront(); 2234 } else if (ln.startsWith(" ")) { 2235 lninfo.indent ~= IndentType.White; 2236 ln.popFrontN(4); 2237 } else { 2238 ln = ln.stripLeft(); 2239 if (ln.startsWith(">")) { 2240 lninfo.indent ~= IndentType.Quote; 2241 ln.popFront(); 2242 } else { 2243 break; 2244 } 2245 } 2246 } 2247 lninfo.unindented = ln; 2248 } 2249 2250 determineIndent(); 2251 2252 if ((settings.flags & MarkdownFlags.backtickCodeBlocks) 2253 && isCodeBlockDelimiter(ln)) { 2254 lninfo.type = LineType.CodeBlockDelimiter; 2255 } else if (isAtxHeaderLine(ln)) { 2256 lninfo.type = LineType.AtxHeader; 2257 } else if (isSetextHeaderLine(ln, subHeaderChar)) { 2258 lninfo.type = LineType.SetextHeader; 2259 } else if ((settings.flags & MarkdownFlags.supportTables) 2260 && isTableRowLine(ln)) { 2261 lninfo.type = LineType.Table; 2262 } else if (isHlineLine(ln)) { 2263 lninfo.type = LineType.Hline; 2264 } else if (isOListLine(ln)) { 2265 lninfo.type = LineType.OList; 2266 } else if (isUListLine(ln)) { 2267 lninfo.type = LineType.UList; 2268 } else if (isLineBlank(ln)) { 2269 lninfo.type = LineType.Blank; 2270 } else if (!(settings.flags & MarkdownFlags.noInlineHtml) 2271 && isHtmlBlockLine(ln)) { 2272 lninfo.type = LineType.HtmlBlock; 2273 } else 2274 lninfo.type = LineType.Plain; 2275 2276 ret ~= lninfo; 2277 } 2278 return ret; 2279 } 2280 2281 private void parseBlocks(ref Block root, 2282 ref Line[] lines, 2283 IndentType[] baseIndent, 2284 scope MarkdownSettings settings) 2285 pure @safe { 2286 if (baseIndent.length == 0) { 2287 root.type = BlockType.Text; 2288 } else if (baseIndent[$ - 1] == IndentType.Quote) { 2289 root.type = BlockType.Quote; 2290 } 2291 2292 while (!lines.empty) { 2293 auto ln = lines.front; 2294 2295 if (ln.type == LineType.Blank) { 2296 lines.popFront(); 2297 continue; 2298 } 2299 2300 if (ln.indent != baseIndent) { 2301 if (ln.indent.length < baseIndent.length || 2302 ln.indent[0 .. baseIndent.length] != baseIndent) { 2303 return; 2304 } 2305 2306 auto cindent = baseIndent ~ IndentType.White; 2307 if (ln.indent == cindent) { 2308 Block cblock; 2309 cblock.type = BlockType.Code; 2310 while (!lines.empty && 2311 lines.front.indent.length >= cindent.length && 2312 lines.front.indent[0 .. cindent.length] == cindent) { 2313 cblock.text ~= lines.front.unindent(cindent.length); 2314 lines.popFront(); 2315 } 2316 root.blocks ~= cblock; 2317 } else { 2318 Block subblock; 2319 parseBlocks(subblock, 2320 lines, 2321 ln.indent[0 .. baseIndent.length + 1], 2322 settings); 2323 root.blocks ~= subblock; 2324 } 2325 } else { 2326 Block b; 2327 void processPlain() { 2328 b.type = BlockType.Paragraph; 2329 b.text = skipText(lines, baseIndent); 2330 } 2331 2332 final switch (ln.type) { 2333 case LineType.Undefined: 2334 assert(false); 2335 case LineType.Blank: 2336 assert(false); 2337 case LineType.Plain: 2338 if (lines.length >= 2 && 2339 lines[1].type == LineType.SetextHeader) { 2340 auto setln = lines[1].unindented; 2341 b.type = BlockType.Header; 2342 b.text = [ln.unindented]; 2343 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 2344 lines.popFrontN(2); 2345 } else { 2346 processPlain(); 2347 } 2348 break; 2349 case LineType.Hline: 2350 b.type = BlockType.Plain; 2351 b.text = ["<hr>"]; 2352 lines.popFront(); 2353 break; 2354 case LineType.AtxHeader: 2355 b.type = BlockType.Header; 2356 string hl = ln.unindented; 2357 b.headerLevel = 0; 2358 while (hl.length > 0 && hl[0] == '#') { 2359 b.headerLevel++; 2360 hl = hl[1 .. $]; 2361 } 2362 while (hl.length > 0 && (hl[$ - 1] == '#' || hl[$ - 1] == ' ')) 2363 hl = hl[0 .. $ - 1]; 2364 b.text = [hl]; 2365 lines.popFront(); 2366 break; 2367 case LineType.SetextHeader: 2368 lines.popFront(); 2369 break; 2370 case LineType.UList: 2371 case LineType.OList: 2372 b.type = ln.type == LineType.UList ? BlockType.UList : BlockType.OList; 2373 auto itemindent = baseIndent ~ IndentType.White; 2374 bool firstItem = true, paraMode = false; 2375 while (!lines.empty && lines.front.type == ln.type && 2376 lines.front.indent == baseIndent) { 2377 Block itm; 2378 itm.text = skipText(lines, itemindent); 2379 itm.text[0] = removeListPrefix(itm.text[0], ln.type); 2380 2381 // emit <p></p> if there are blank lines between the items 2382 if (firstItem && !lines.empty && 2383 lines.front.type == LineType.Blank) { 2384 paraMode = true; 2385 } 2386 firstItem = false; 2387 if (paraMode) { 2388 Block para; 2389 para.type = BlockType.Paragraph; 2390 para.text = itm.text; 2391 itm.blocks ~= para; 2392 itm.text = null; 2393 } 2394 2395 parseBlocks(itm, lines, itemindent, settings); 2396 itm.type = BlockType.ListItem; 2397 b.blocks ~= itm; 2398 } 2399 break; 2400 case LineType.HtmlBlock: 2401 int nestlevel = 0; 2402 auto starttag = parseHtmlBlockLine(ln.unindented); 2403 if (!starttag.isHtmlBlock || !starttag.open) 2404 break; 2405 2406 b.type = BlockType.Plain; 2407 while (!lines.empty) { 2408 if (lines.front.indent.length < baseIndent.length) { 2409 break; 2410 } 2411 if (lines.front.indent[0 .. baseIndent.length] != baseIndent) { 2412 break; 2413 } 2414 2415 auto str = lines.front.unindent(baseIndent.length); 2416 auto taginfo = parseHtmlBlockLine(str); 2417 b.text ~= lines.front.unindent(baseIndent.length); 2418 lines.popFront(); 2419 if (taginfo.isHtmlBlock 2420 && taginfo.tagName == starttag.tagName) { 2421 nestlevel += taginfo.open ? 1 : -1; 2422 } 2423 if (nestlevel <= 0) { 2424 break; 2425 } 2426 } 2427 break; 2428 case LineType.CodeBlockDelimiter: 2429 lines.popFront(); // TODO: get language from line 2430 b.type = BlockType.Code; 2431 while (!lines.empty) { 2432 if (lines.front.indent.length < baseIndent.length) { 2433 break; 2434 } 2435 if (lines.front.indent[0 .. baseIndent.length] != baseIndent) { 2436 break; 2437 } 2438 if (lines.front.type == LineType.CodeBlockDelimiter) { 2439 lines.popFront(); 2440 break; 2441 } 2442 b.text ~= lines.front.unindent(baseIndent.length); 2443 lines.popFront(); 2444 } 2445 break; 2446 case LineType.Table: 2447 lines.popFront(); 2448 // Can this be a valid table (is there a next line that could be a header separator)? 2449 if (lines.empty) { 2450 processPlain(); 2451 break; 2452 } 2453 Line lnNext = lines.front; 2454 immutable bool isTableHeader = ( 2455 (lnNext.type == LineType.Table) 2456 && (lnNext.text.indexOf(" -") >= 0) 2457 && (lnNext.text.indexOf("- ") >= 0) 2458 && lnNext.text.allOf("-:| ") 2459 ); 2460 if (!isTableHeader) { 2461 // Not a valid table header, so let's assume it's plain markdown 2462 processPlain(); 2463 break; 2464 } 2465 b.type = BlockType.Table; 2466 // Parse header 2467 b.blocks ~= splitTableRow!(BlockType.TableHeader)(ln); 2468 // Parse table rows 2469 lines.popFront(); 2470 while (!lines.empty) { 2471 ln = lines.front; 2472 if (ln.type != LineType.Table) 2473 break; // not a table row, so let's assume it's the end of the table 2474 b.blocks ~= splitTableRow(ln); 2475 lines.popFront(); 2476 } 2477 break; 2478 } 2479 root.blocks ~= b; 2480 } 2481 } 2482 }