亲手打造一份类Markdown
规则解析器
Markdown
语法规则有些细节之处总是不尽人意,于是19年春我尝试定制了一份自定义语法的解析器,命名为Kuder
,本站yifeishu.com
亦是依托于Kuder
实现的文章托管平台。
这是在线语法例子。也可以通过NPM
包管理器安装使用,这里一直包含最新代码。
以下是第一版代码:
/** * kuder - a kude parser * Base ES5 * Copyright (c) 2019, hu@kude.me (MIT Licensed) */ ; (function (root) { 'use strict'; var log, logTable, logTime, logTimeEnd; function setDebug(enable) { if (enable) { log = console.log; logTable = console.table; logTime = console.time; logTimeEnd = console.timeEnd; } else { log = logTable = logTime = logTimeEnd = function () { }; } } setDebug(false); var NORMAL = 'normal'; /** * ------- Parser ------- */ function Parser() { this.inlineRuleNames = Object.keys(this.rules.inline); this.blockRuleNames = Object.keys(this.rules.block); var inlineTokens = this.inlineRuleNames.map( function (key) { if (key === 'link') return; return this.rules.inline[key]; }.bind(this) ).join(''); this.inlineTokenRE = new RegExp('([' + inlineTokens + ']).*?\\1'); log('Inline Rules: ', this.inlineRuleNames); log('Inline tokens: ', this.inlineTokenRE.toString()); log('Block Rules: ', this.blockRuleNames); } /* rules */ Parser.prototype.rules = { inline: { 'code': '`', 'bold': '*', 'italic': '/', 'highlight': '^', 'underline': '_', 'lineThrough': '-', 'link': /\[@\s*(\S+)\s*(\S\s*?\S*)?\s*\]/ }, block: { 'newLine': /^$/, 'horizontal': /^-{3,}/, 'title': /^(#{1,6})(!)?\s(.*)$/, 'unorderList': /^\+\s(.*)$/, 'orderList': /^\d+?\.\s(.*)$/, 'table': /^\d+\s*(\|\s*\d+\s*){2,}$/, 'image': /^\[!\s*(\S+)\s*(\S\s*?\S*)?\s*\]$/ }, blockCode: /(^|\r?\n)`{3}((.|\r|\n)*?)`{3}\r?\n/g, footerLink: /^\[\s*(\S+)\s*\]\(\s*(\S+)\s*(.*)\)$/ }; Parser.prototype.getInlineRuleName = function (token) { if (this.getInlineRuleName[token]) { return this.getInlineRuleName[token]; } else { var currName; var ruleNames = this.inlineRuleNames.concat(); while (currName = ruleNames.shift()) { if (this.rules.inline[currName] === token) { this.getInlineRuleName[token] = currName; return currName; } } } }; Parser.prototype.splitLines = function (src) { return src.split(/\r?\n/).map(function (text) { return text.trim() }); }; Parser.prototype.parse = function (content) { logTime('Parser time'); content = content.replace('<', '<'); var chunks = this.seekPreformatted(content); var lastNormalChunk, index = 0, length = chunks.length; while (index < length) { if (typeof chunks[index] === 'string') { lastNormalChunk = chunks[index] = this.splitLines(chunks[index]); log('Split chunks to lines:'); logTable(lastNormalChunk); } index++; } this.links = this.seekFooterLink(lastNormalChunk); log('%cAll links:', 'color: green'); logTable(this.links); var lexTree = [], count = 0; while (count < length) { if (chunks[count].type) { // code chunk lexTree.push(chunks[count]); } else { // lines lexTree = lexTree.concat(this.lex(chunks[count])); } count++; } log('%cParsed tree:\n', 'color: green', lexTree); logTimeEnd('Parser time'); return { links: this.links, tree: lexTree }; }; Parser.prototype.seekPreformatted = function (content) { var chunks = [], prevEnd = 0, cap; while (cap = this.rules.blockCode.exec(content)) { chunks.push(content.substring(prevEnd, cap.index)); prevEnd = cap.index + cap[0].length; chunks.push({ type: 'preformatted', text: cap[2] }); } var suffixes = content.substring(prevEnd); suffixes.length && chunks.push(suffixes); return chunks; }; Parser.prototype.seekFooterLink = function (lines) { var links = {}, linkDetail; var index = lines.length - 1; while (index >= 0) { if (lines[index].length) { linkDetail = this.matchFooterLink(lines[index]); if (linkDetail) { links[linkDetail.id] = { url: linkDetail.url, title: linkDetail.title }; lines.splice(index, 1); index--; } else { break; } } else { index--; continue; } } return links; }; Parser.prototype.matchFooterLink = function (line) { var linkRE = this.rules.footerLink; var cap; if (cap = linkRE.exec(line)) { return { id: cap[1], url: cap[2], title: cap[3].length ? cap[3] : undefined }; } else { return null; } }; Parser.prototype.lex = function (lines) { var lexTree = lines.map(function (line) { var vo = this.seekBlock(line); if (vo.text) { if (vo.type === NORMAL) vo.type = 'paragraph'; vo.children = this.seekInline(vo.text); } return vo; }.bind(this)); lexTree = this.lexAmend(lexTree); return lexTree; }; Parser.prototype.seekBlock = function (line) { var length = this.blockRuleNames.length, count = 0, vo = { text: line }; do { var ruleName = this.blockRuleNames[count++]; vo = this.matchBlock(vo.text, ruleName); } while (count < length && vo.type === NORMAL); return vo; }; Parser.prototype.matchBlock = function (src, ruleName) { var cap = this.rules.block[ruleName].exec(src); if (!cap || cap && ruleName === 'image' && !(cap[1] in this.links)) { return { type: NORMAL, text: src }; } else { var vo = { type: ruleName }; switch (ruleName) { case 'title': vo.level = cap[1].length; // title level vo.isCenter = !!cap[2]; // align center vo.text = cap[3]; break; case 'table': vo.text = src; break; case 'image': vo.linkId = cap[1]; // image id vo.text = cap[2]; // image alt break; default: vo.text = cap[1]; } return vo; } }; Parser.prototype.seekInline = function (text, groups) { groups = groups || []; var match = this.inlineTokenRE.exec(text); if (match) { var left = text.substring(0, match.index); var right = text.substring(match.index); if (left.length) { this.seekInlineLink(left, groups); } var token = match[1]; var RE = new RegExp('\\' + token + '(.*?)' + '\\' + token); var tokenMatch = RE.exec(right); groups.push({ type: this.getInlineRuleName(token), text: tokenMatch[1] }); this.seekInline(right.substring(tokenMatch[0].length), groups); } else { if (text.length) { this.seekInlineLink(text, groups); } } return groups; }; Parser.prototype.seekInlineLink = function (text, groups) { var match = this.rules.inline.link.exec(text); if (match && match[1] in this.links) { var left = text.substring(0, match.index); var right = text.substring(match.index); if (left.length) { groups.push({ type: NORMAL, text: left }); } groups.push({ type: this.getInlineRuleName(this.rules.inline.link), text: match[2], linkId: match[1] }); this.seekInlineLink(right.substring(match[0].length), groups); } else { if (text.length) { groups.push({ type: NORMAL, text: text }); } } }; Parser.prototype.lexAmend = function (tree) { var tableCellRE = /[^|]+/g; var index = 0, count, vo, cap, detail; while (index < tree.length) { switch (tree[index].type) { case 'unorderList': count = 1; while (index + count < tree.length && tree[index + count].type === 'unorderList') count++; tree.splice(index, 0, { type: 'unorderListBox', children: tree.splice(index, count) }); break; case 'orderList': count = 1; while (index + count < tree.length && tree[index + count].type === 'orderList') count++; tree.splice(index, 0, { type: 'orderListBox', children: tree.splice(index, count) }); break; case 'table': count = 1; // Repair 'paragraph' to NORMAL while (index + count < tree.length && (vo = tree[index + count], vo.type === 'paragraph')) { detail = []; while (cap = tableCellRE.exec(vo.text)) detail.push(cap[0]); if (detail.length) { vo.type = 'tableRow'; vo.children = detail.map(function (text) { return { type: count === 1 ? 'tableHead' : 'tableCell', text: text }; }.bind(this)); count++; } else { break; } } tree[index].children = tree.splice(index + 1, count - 1); break; } index++; } return tree; }; /** * ------- Renderer ------- */ function Renderer() { } Renderer.prototype.setLinks = function (links) { this.links = links; }; Renderer.prototype.output = function (vo, covered) { switch (vo.type) { case NORMAL: return covered || vo.text; case 'preformatted': return '<pre>' + vo.text + '</pre>'; case 'code': return '<code>' + vo.text + '</code>'; case 'bold': return '<strong>' + vo.text + '</strong>'; case 'italic': return '<em>' + vo.text + '</em>'; case 'highlight': return '<mark>' + vo.text + '</mark>'; case 'underline': return '<span class="underline">' + vo.text + '</span>'; case 'lineThrough': return '<span class="line-through">' + vo.text + '</span>'; // case 'newLine': // return '<br />'; case 'paragraph': return '<p>' + covered + '</p>'; case 'horizontal': return '<hr />'; case 'orderListBox': return '<ol>' + covered + '</ol>'; case 'unorderListBox': return '<ul>' + covered + '</ul>'; case 'orderList': case 'unorderList': return '<li>' + vo.text + '</li>'; case 'table': return '<table>' + covered + '</table >'; case 'tableRow': return '<tr>' + covered + '</tr>'; case 'tableHead': return '<th>' + vo.text + '</th>'; case 'tableCell': return '<td>' + vo.text + '</td>'; case 'title': var template = []; template.push('<h', vo.level); vo.isCenter && template.push(' class="center"'); template.push('>'); template.push(covered || vo.text, '</h', vo.level, '>'); return template.join(''); case 'image': var link = this.links[vo.linkId]; var template = []; template.push('<img src="', link.url, '"'); link.title && template.push(' title="', link.title, '"'); (vo.text || link.title) && template.push(' alt="', vo.text || link.title, '"'); template.push('/>'); return template.join(''); case 'link': var link = this.links[vo.linkId]; var template = []; template.push('<a href="', link.url, '"'); link.title && template.push(' title="', link.title, '"'); template.push('>'); template.push(vo.text || link.url); template.push('</a>'); return template.join(''); } }; Renderer.prototype.cross = function (tree) { return tree.map(function (group) { if (group.children) { return this.output(group, this.cross(group.children)); } else { return this.output(group); } }.bind(this)).join(''); }; Renderer.prototype.render = function (tree) { logTime('Render time'); var html = '<div class="kuder">' + this.cross(tree) + '</div>'; log('%cPrint html:', 'color:brown'); log(html); logTimeEnd('Render time'); return html; }; /** * ------- kuder ------- */ function kuder(content, hook, isDebug) { setDebug(isDebug); logTime('Kuder time'); var html; if (content && content.length) { var parsed = kuder.parser.parse(content); if (hook) { parsed = hook(parsed); log('%cModify parsed data:\n', 'color: green', parsed); } kuder.renderer.setLinks(parsed.links); html = kuder.renderer.render(parsed.tree); } else { html = content; } logTimeEnd('Kuder time'); setDebug(false); return html; } kuder.parser = new Parser; kuder.renderer = new Renderer; /* * ------- export ------- */ if (typeof module !== 'undefined' && typeof exports === 'object') { module.exports = kuder; } else if (typeof define === 'function' && define.amd) { define(function () { return kuder; }); } else { root.kuder = kuder; } })(this || window || global);