亲手打造一份类Markdown规则解析器
Markdown语法规则有些细节之处总是不尽人意,于是19年春我尝试定制了一份自定义语法的解析器,命名为Kuder,本站yifeishu.com亦是依托于Kuder实现的文章托管平台。
这是在线语法例子。也可以通过NPM包管理器安装使用,这里一直包含最新代码。
以下是第一版代码:
/**
* kuder - a kude parser
* Base ES5
* Copyright (c) 2019, hu@kude.me (MIT Licensed)
*/
; (function (root) {
'use strict';
var log, logTable, logTime, logTimeEnd;
function setDebug(enable) {
if (enable) {
log = console.log;
logTable = console.table;
logTime = console.time;
logTimeEnd = console.timeEnd;
} else {
log = logTable = logTime = logTimeEnd = function () { };
}
}
setDebug(false);
var NORMAL = 'normal';
/**
* ------- Parser -------
*/
function Parser() {
this.inlineRuleNames = Object.keys(this.rules.inline);
this.blockRuleNames = Object.keys(this.rules.block);
var inlineTokens = this.inlineRuleNames.map(
function (key) {
if (key === 'link') return;
return this.rules.inline[key];
}.bind(this)
).join('');
this.inlineTokenRE = new RegExp('([' + inlineTokens + ']).*?\\1');
log('Inline Rules: ', this.inlineRuleNames);
log('Inline tokens: ', this.inlineTokenRE.toString());
log('Block Rules: ', this.blockRuleNames);
}
/* rules */
Parser.prototype.rules = {
inline: {
'code': '`',
'bold': '*',
'italic': '/',
'highlight': '^',
'underline': '_',
'lineThrough': '-',
'link': /\[@\s*(\S+)\s*(\S\s*?\S*)?\s*\]/
},
block: {
'newLine': /^$/,
'horizontal': /^-{3,}/,
'title': /^(#{1,6})(!)?\s(.*)$/,
'unorderList': /^\+\s(.*)$/,
'orderList': /^\d+?\.\s(.*)$/,
'table': /^\d+\s*(\|\s*\d+\s*){2,}$/,
'image': /^\[!\s*(\S+)\s*(\S\s*?\S*)?\s*\]$/
},
blockCode: /(^|\r?\n)`{3}((.|\r|\n)*?)`{3}\r?\n/g,
footerLink: /^\[\s*(\S+)\s*\]\(\s*(\S+)\s*(.*)\)$/
};
Parser.prototype.getInlineRuleName = function (token) {
if (this.getInlineRuleName[token]) {
return this.getInlineRuleName[token];
} else {
var currName;
var ruleNames = this.inlineRuleNames.concat();
while (currName = ruleNames.shift()) {
if (this.rules.inline[currName] === token) {
this.getInlineRuleName[token] = currName;
return currName;
}
}
}
};
Parser.prototype.splitLines = function (src) {
return src.split(/\r?\n/).map(function (text) { return text.trim() });
};
Parser.prototype.parse = function (content) {
logTime('Parser time');
content = content.replace('<', '<');
var chunks = this.seekPreformatted(content);
var lastNormalChunk, index = 0, length = chunks.length;
while (index < length) {
if (typeof chunks[index] === 'string') {
lastNormalChunk = chunks[index] = this.splitLines(chunks[index]);
log('Split chunks to lines:');
logTable(lastNormalChunk);
}
index++;
}
this.links = this.seekFooterLink(lastNormalChunk);
log('%cAll links:', 'color: green');
logTable(this.links);
var lexTree = [], count = 0;
while (count < length) {
if (chunks[count].type) {
// code chunk
lexTree.push(chunks[count]);
} else {
// lines
lexTree = lexTree.concat(this.lex(chunks[count]));
}
count++;
}
log('%cParsed tree:\n', 'color: green', lexTree);
logTimeEnd('Parser time');
return { links: this.links, tree: lexTree };
};
Parser.prototype.seekPreformatted = function (content) {
var chunks = [], prevEnd = 0, cap;
while (cap = this.rules.blockCode.exec(content)) {
chunks.push(content.substring(prevEnd, cap.index));
prevEnd = cap.index + cap[0].length;
chunks.push({ type: 'preformatted', text: cap[2] });
}
var suffixes = content.substring(prevEnd);
suffixes.length && chunks.push(suffixes);
return chunks;
};
Parser.prototype.seekFooterLink = function (lines) {
var links = {}, linkDetail;
var index = lines.length - 1;
while (index >= 0) {
if (lines[index].length) {
linkDetail = this.matchFooterLink(lines[index]);
if (linkDetail) {
links[linkDetail.id] = {
url: linkDetail.url,
title: linkDetail.title
};
lines.splice(index, 1);
index--;
} else {
break;
}
} else {
index--;
continue;
}
}
return links;
};
Parser.prototype.matchFooterLink = function (line) {
var linkRE = this.rules.footerLink;
var cap;
if (cap = linkRE.exec(line)) {
return {
id: cap[1],
url: cap[2],
title: cap[3].length ? cap[3] : undefined
};
} else {
return null;
}
};
Parser.prototype.lex = function (lines) {
var lexTree = lines.map(function (line) {
var vo = this.seekBlock(line);
if (vo.text) {
if (vo.type === NORMAL) vo.type = 'paragraph';
vo.children = this.seekInline(vo.text);
}
return vo;
}.bind(this));
lexTree = this.lexAmend(lexTree);
return lexTree;
};
Parser.prototype.seekBlock = function (line) {
var length = this.blockRuleNames.length,
count = 0,
vo = { text: line };
do {
var ruleName = this.blockRuleNames[count++];
vo = this.matchBlock(vo.text, ruleName);
} while (count < length && vo.type === NORMAL);
return vo;
};
Parser.prototype.matchBlock = function (src, ruleName) {
var cap = this.rules.block[ruleName].exec(src);
if (!cap || cap && ruleName === 'image' && !(cap[1] in this.links)) {
return { type: NORMAL, text: src };
} else {
var vo = { type: ruleName };
switch (ruleName) {
case 'title':
vo.level = cap[1].length; // title level
vo.isCenter = !!cap[2]; // align center
vo.text = cap[3];
break;
case 'table':
vo.text = src;
break;
case 'image':
vo.linkId = cap[1]; // image id
vo.text = cap[2]; // image alt
break;
default:
vo.text = cap[1];
}
return vo;
}
};
Parser.prototype.seekInline = function (text, groups) {
groups = groups || [];
var match = this.inlineTokenRE.exec(text);
if (match) {
var left = text.substring(0, match.index);
var right = text.substring(match.index);
if (left.length) {
this.seekInlineLink(left, groups);
}
var token = match[1];
var RE = new RegExp('\\' + token + '(.*?)' + '\\' + token);
var tokenMatch = RE.exec(right);
groups.push({ type: this.getInlineRuleName(token), text: tokenMatch[1] });
this.seekInline(right.substring(tokenMatch[0].length), groups);
} else {
if (text.length) {
this.seekInlineLink(text, groups);
}
}
return groups;
};
Parser.prototype.seekInlineLink = function (text, groups) {
var match = this.rules.inline.link.exec(text);
if (match && match[1] in this.links) {
var left = text.substring(0, match.index);
var right = text.substring(match.index);
if (left.length) {
groups.push({ type: NORMAL, text: left });
}
groups.push({
type: this.getInlineRuleName(this.rules.inline.link),
text: match[2],
linkId: match[1]
});
this.seekInlineLink(right.substring(match[0].length), groups);
} else {
if (text.length) {
groups.push({ type: NORMAL, text: text });
}
}
};
Parser.prototype.lexAmend = function (tree) {
var tableCellRE = /[^|]+/g;
var index = 0, count, vo, cap, detail;
while (index < tree.length) {
switch (tree[index].type) {
case 'unorderList':
count = 1;
while (index + count < tree.length && tree[index + count].type === 'unorderList') count++;
tree.splice(index, 0, {
type: 'unorderListBox',
children: tree.splice(index, count)
});
break;
case 'orderList':
count = 1;
while (index + count < tree.length && tree[index + count].type === 'orderList') count++;
tree.splice(index, 0, {
type: 'orderListBox',
children: tree.splice(index, count)
});
break;
case 'table':
count = 1;
// Repair 'paragraph' to NORMAL
while (index + count < tree.length && (vo = tree[index + count], vo.type === 'paragraph')) {
detail = [];
while (cap = tableCellRE.exec(vo.text)) detail.push(cap[0]);
if (detail.length) {
vo.type = 'tableRow';
vo.children = detail.map(function (text) {
return { type: count === 1 ? 'tableHead' : 'tableCell', text: text };
}.bind(this));
count++;
} else {
break;
}
}
tree[index].children = tree.splice(index + 1, count - 1);
break;
}
index++;
}
return tree;
};
/**
* ------- Renderer -------
*/
function Renderer() { }
Renderer.prototype.setLinks = function (links) {
this.links = links;
};
Renderer.prototype.output = function (vo, covered) {
switch (vo.type) {
case NORMAL:
return covered || vo.text;
case 'preformatted':
return '<pre>' + vo.text + '</pre>';
case 'code':
return '<code>' + vo.text + '</code>';
case 'bold':
return '<strong>' + vo.text + '</strong>';
case 'italic':
return '<em>' + vo.text + '</em>';
case 'highlight':
return '<mark>' + vo.text + '</mark>';
case 'underline':
return '<span class="underline">' + vo.text + '</span>';
case 'lineThrough':
return '<span class="line-through">' + vo.text + '</span>';
// case 'newLine':
// return '<br />';
case 'paragraph':
return '<p>' + covered + '</p>';
case 'horizontal':
return '<hr />';
case 'orderListBox':
return '<ol>' + covered + '</ol>';
case 'unorderListBox':
return '<ul>' + covered + '</ul>';
case 'orderList':
case 'unorderList':
return '<li>' + vo.text + '</li>';
case 'table':
return '<table>' + covered + '</table >';
case 'tableRow':
return '<tr>' + covered + '</tr>';
case 'tableHead':
return '<th>' + vo.text + '</th>';
case 'tableCell':
return '<td>' + vo.text + '</td>';
case 'title':
var template = [];
template.push('<h', vo.level);
vo.isCenter && template.push(' class="center"');
template.push('>');
template.push(covered || vo.text, '</h', vo.level, '>');
return template.join('');
case 'image':
var link = this.links[vo.linkId];
var template = [];
template.push('<img src="', link.url, '"');
link.title && template.push(' title="', link.title, '"');
(vo.text || link.title) && template.push(' alt="', vo.text || link.title, '"');
template.push('/>');
return template.join('');
case 'link':
var link = this.links[vo.linkId];
var template = [];
template.push('<a href="', link.url, '"');
link.title && template.push(' title="', link.title, '"');
template.push('>');
template.push(vo.text || link.url);
template.push('</a>');
return template.join('');
}
};
Renderer.prototype.cross = function (tree) {
return tree.map(function (group) {
if (group.children) {
return this.output(group, this.cross(group.children));
} else {
return this.output(group);
}
}.bind(this)).join('');
};
Renderer.prototype.render = function (tree) {
logTime('Render time');
var html = '<div class="kuder">' + this.cross(tree) + '</div>';
log('%cPrint html:', 'color:brown');
log(html);
logTimeEnd('Render time');
return html;
};
/**
* ------- kuder -------
*/
function kuder(content, hook, isDebug) {
setDebug(isDebug);
logTime('Kuder time');
var html;
if (content && content.length) {
var parsed = kuder.parser.parse(content);
if (hook) {
parsed = hook(parsed);
log('%cModify parsed data:\n', 'color: green', parsed);
}
kuder.renderer.setLinks(parsed.links);
html = kuder.renderer.render(parsed.tree);
} else {
html = content;
}
logTimeEnd('Kuder time');
setDebug(false);
return html;
}
kuder.parser = new Parser;
kuder.renderer = new Renderer;
/*
* ------- export -------
*/
if (typeof module !== 'undefined' && typeof exports === 'object') {
module.exports = kuder;
} else if (typeof define === 'function' && define.amd) {
define(function () { return kuder; });
} else {
root.kuder = kuder;
}
})(this || window || global);