Watch Print
Author: hu@yifeishu.com Last modified: 0000-00-00 00:00 Helpful: 3 Visits: 1027

亲手打造一份类Markdown规则解析器

Markdown语法规则有些细节之处总是不尽人意,于是19年春我尝试定制了一份自定义语法的解析器,命名为Kuder,本站yifeishu.com亦是依托于Kuder实现的文章托管平台。

这是在线语法例子。也可以通过NPM包管理器安装使用,这里一直包含最新代码。

以下是第一版代码:

/**
 * kuder - a kude parser
 * Base ES5
 * Copyright (c) 2019, hu@kude.me (MIT Licensed)
 */

; (function (root) {
   'use strict';

   var log, logTable, logTime, logTimeEnd;

   function setDebug(enable) {
      if (enable) {
         log = console.log;
         logTable = console.table;
         logTime = console.time;
         logTimeEnd = console.timeEnd;
      } else {
         log = logTable = logTime = logTimeEnd = function () { };
      }
   }

   setDebug(false);


   var NORMAL = 'normal';

   /**
    * ------- Parser -------
    */
   function Parser() {
      this.inlineRuleNames = Object.keys(this.rules.inline);
      this.blockRuleNames = Object.keys(this.rules.block);

      var inlineTokens = this.inlineRuleNames.map(
         function (key) {
            if (key === 'link') return;
            return this.rules.inline[key];
         }.bind(this)
      ).join('');
      this.inlineTokenRE = new RegExp('([' + inlineTokens + ']).*?\\1');

      log('Inline Rules: ', this.inlineRuleNames);
      log('Inline tokens: ', this.inlineTokenRE.toString());
      log('Block Rules: ', this.blockRuleNames);
   }

   /* rules */
   Parser.prototype.rules = {
      inline: {
         'code': '`',
         'bold': '*',
         'italic': '/',
         'highlight': '^',
         'underline': '_',
         'lineThrough': '-',
         'link': /\[@\s*(\S+)\s*(\S\s*?\S*)?\s*\]/
      },
      block: {
         'newLine': /^$/,
         'horizontal': /^-{3,}/,
         'title': /^(#{1,6})(!)?\s(.*)$/,
         'unorderList': /^\+\s(.*)$/,
         'orderList': /^\d+?\.\s(.*)$/,
         'table': /^\d+\s*(\|\s*\d+\s*){2,}$/,
         'image': /^\[!\s*(\S+)\s*(\S\s*?\S*)?\s*\]$/
      },
      blockCode: /(^|\r?\n)`{3}((.|\r|\n)*?)`{3}\r?\n/g,
      footerLink: /^\[\s*(\S+)\s*\]\(\s*(\S+)\s*(.*)\)$/
   };

   Parser.prototype.getInlineRuleName = function (token) {
      if (this.getInlineRuleName[token]) {
         return this.getInlineRuleName[token];
      } else {
         var currName;
         var ruleNames = this.inlineRuleNames.concat();
         while (currName = ruleNames.shift()) {
            if (this.rules.inline[currName] === token) {
               this.getInlineRuleName[token] = currName;
               return currName;
            }
         }
      }
   };

   Parser.prototype.splitLines = function (src) {
      return src.split(/\r?\n/).map(function (text) { return text.trim() });
   };

   Parser.prototype.parse = function (content) {
      logTime('Parser time');
      content = content.replace('<', '&lt;');

      var chunks = this.seekPreformatted(content);

      var lastNormalChunk, index = 0, length = chunks.length;
      while (index < length) {
         if (typeof chunks[index] === 'string') {
            lastNormalChunk = chunks[index] = this.splitLines(chunks[index]);
            log('Split chunks to lines:');
            logTable(lastNormalChunk);
         }
         index++;
      }

      this.links = this.seekFooterLink(lastNormalChunk);
      log('%cAll links:', 'color: green');
      logTable(this.links);

      var lexTree = [], count = 0;
      while (count < length) {
         if (chunks[count].type) {
            // code chunk
            lexTree.push(chunks[count]);
         } else {
            // lines
            lexTree = lexTree.concat(this.lex(chunks[count]));
         }
         count++;
      }
      log('%cParsed tree:\n', 'color: green', lexTree);
      logTimeEnd('Parser time');

      return { links: this.links, tree: lexTree };
   };

   Parser.prototype.seekPreformatted = function (content) {
      var chunks = [], prevEnd = 0, cap;
      while (cap = this.rules.blockCode.exec(content)) {
         chunks.push(content.substring(prevEnd, cap.index));
         prevEnd = cap.index + cap[0].length;

         chunks.push({ type: 'preformatted', text: cap[2] });
      }

      var suffixes = content.substring(prevEnd);
      suffixes.length && chunks.push(suffixes);

      return chunks;
   };

   Parser.prototype.seekFooterLink = function (lines) {
      var links = {}, linkDetail;
      var index = lines.length - 1;
      while (index >= 0) {
         if (lines[index].length) {
            linkDetail = this.matchFooterLink(lines[index]);
            if (linkDetail) {
               links[linkDetail.id] = {
                  url: linkDetail.url,
                  title: linkDetail.title
               };
               lines.splice(index, 1);
               index--;
            } else {
               break;
            }
         } else {
            index--;
            continue;
         }
      }

      return links;
   };

   Parser.prototype.matchFooterLink = function (line) {
      var linkRE = this.rules.footerLink;
      var cap;
      if (cap = linkRE.exec(line)) {
         return {
            id: cap[1],
            url: cap[2],
            title: cap[3].length ? cap[3] : undefined
         };
      } else {
         return null;
      }
   };

   Parser.prototype.lex = function (lines) {
      var lexTree = lines.map(function (line) {
         var vo = this.seekBlock(line);
         if (vo.text) {
            if (vo.type === NORMAL) vo.type = 'paragraph';
            vo.children = this.seekInline(vo.text);
         }
         return vo;
      }.bind(this));

      lexTree = this.lexAmend(lexTree);

      return lexTree;
   };

   Parser.prototype.seekBlock = function (line) {
      var length = this.blockRuleNames.length,
         count = 0,
         vo = { text: line };

      do {
         var ruleName = this.blockRuleNames[count++];
         vo = this.matchBlock(vo.text, ruleName);
      } while (count < length && vo.type === NORMAL);

      return vo;
   };

   Parser.prototype.matchBlock = function (src, ruleName) {
      var cap = this.rules.block[ruleName].exec(src);

      if (!cap || cap && ruleName === 'image' && !(cap[1] in this.links)) {
         return { type: NORMAL, text: src };
      } else {
         var vo = { type: ruleName };
         switch (ruleName) {
            case 'title':
               vo.level = cap[1].length; // title level
               vo.isCenter = !!cap[2]; // align center
               vo.text = cap[3];
               break;
            case 'table':
               vo.text = src;
               break;
            case 'image':
               vo.linkId = cap[1]; // image id
               vo.text = cap[2]; // image alt
               break;
            default:
               vo.text = cap[1];
         }
         return vo;
      }
   };

   Parser.prototype.seekInline = function (text, groups) {
      groups = groups || [];

      var match = this.inlineTokenRE.exec(text);
      if (match) {
         var left = text.substring(0, match.index);
         var right = text.substring(match.index);

         if (left.length) {
            this.seekInlineLink(left, groups);
         }

         var token = match[1];
         var RE = new RegExp('\\' + token + '(.*?)' + '\\' + token);
         var tokenMatch = RE.exec(right);

         groups.push({ type: this.getInlineRuleName(token), text: tokenMatch[1] });
         this.seekInline(right.substring(tokenMatch[0].length), groups);
      } else {
         if (text.length) {
            this.seekInlineLink(text, groups);
         }
      }

      return groups;
   };

   Parser.prototype.seekInlineLink = function (text, groups) {
      var match = this.rules.inline.link.exec(text);
      if (match && match[1] in this.links) {
         var left = text.substring(0, match.index);
         var right = text.substring(match.index);

         if (left.length) {
            groups.push({ type: NORMAL, text: left });
         }

         groups.push({
            type: this.getInlineRuleName(this.rules.inline.link),
            text: match[2],
            linkId: match[1]
         });
         this.seekInlineLink(right.substring(match[0].length), groups);
      } else {
         if (text.length) {
            groups.push({ type: NORMAL, text: text });
         }
      }
   };

   Parser.prototype.lexAmend = function (tree) {
      var tableCellRE = /[^|]+/g;
      var index = 0, count, vo, cap, detail;
      while (index < tree.length) {
         switch (tree[index].type) {
            case 'unorderList':
               count = 1;
               while (index + count < tree.length && tree[index + count].type === 'unorderList') count++;
               tree.splice(index, 0, {
                  type: 'unorderListBox',
                  children: tree.splice(index, count)
               });
               break;
            case 'orderList':
               count = 1;
               while (index + count < tree.length && tree[index + count].type === 'orderList') count++;
               tree.splice(index, 0, {
                  type: 'orderListBox',
                  children: tree.splice(index, count)
               });
               break;
            case 'table':
               count = 1;
               // Repair 'paragraph' to NORMAL
               while (index + count < tree.length && (vo = tree[index + count], vo.type === 'paragraph')) {
                  detail = [];
                  while (cap = tableCellRE.exec(vo.text)) detail.push(cap[0]);

                  if (detail.length) {
                     vo.type = 'tableRow';
                     vo.children = detail.map(function (text) {
                        return { type: count === 1 ? 'tableHead' : 'tableCell', text: text };
                     }.bind(this));
                     count++;
                  } else {
                     break;
                  }
               }
               tree[index].children = tree.splice(index + 1, count - 1);
               break;
         }
         index++;
      }
      return tree;
   };


   /**
    * ------- Renderer -------
    */
   function Renderer() { }

   Renderer.prototype.setLinks = function (links) {
      this.links = links;
   };

   Renderer.prototype.output = function (vo, covered) {
      switch (vo.type) {
         case NORMAL:
            return covered || vo.text;
         case 'preformatted':
            return '<pre>' + vo.text + '</pre>';
         case 'code':
            return '<code>' + vo.text + '</code>';
         case 'bold':
            return '<strong>' + vo.text + '</strong>';
         case 'italic':
            return '<em>' + vo.text + '</em>';
         case 'highlight':
            return '<mark>' + vo.text + '</mark>';
         case 'underline':
            return '<span class="underline">' + vo.text + '</span>';
         case 'lineThrough':
            return '<span class="line-through">' + vo.text + '</span>';
         // case 'newLine':
         //    return '<br />';
         case 'paragraph':
            return '<p>' + covered + '</p>';
         case 'horizontal':
            return '<hr />';
         case 'orderListBox':
            return '<ol>' + covered + '</ol>';
         case 'unorderListBox':
            return '<ul>' + covered + '</ul>';
         case 'orderList':
         case 'unorderList':
            return '<li>' + vo.text + '</li>';
         case 'table':
            return '<table>' + covered + '</table >';
         case 'tableRow':
            return '<tr>' + covered + '</tr>';
         case 'tableHead':
            return '<th>' + vo.text + '</th>';
         case 'tableCell':
            return '<td>' + vo.text + '</td>';
         case 'title':
            var template = [];
            template.push('<h', vo.level);
            vo.isCenter && template.push(' class="center"');
            template.push('>');
            template.push(covered || vo.text, '</h', vo.level, '>');
            return template.join('');
         case 'image':
            var link = this.links[vo.linkId];
            var template = [];
            template.push('<img src="', link.url, '"');
            link.title && template.push(' title="', link.title, '"');
            (vo.text || link.title) && template.push(' alt="', vo.text || link.title, '"');
            template.push('/>');
            return template.join('');
         case 'link':
            var link = this.links[vo.linkId];
            var template = [];
            template.push('<a href="', link.url, '"');
            link.title && template.push(' title="', link.title, '"');
            template.push('>');
            template.push(vo.text || link.url);
            template.push('</a>');
            return template.join('');
      }
   };

   Renderer.prototype.cross = function (tree) {
      return tree.map(function (group) {
         if (group.children) {
            return this.output(group, this.cross(group.children));
         } else {
            return this.output(group);
         }
      }.bind(this)).join('');
   };

   Renderer.prototype.render = function (tree) {
      logTime('Render time');
      var html = '<div class="kuder">' + this.cross(tree) + '</div>';

      log('%cPrint html:', 'color:brown');
      log(html);
      logTimeEnd('Render time');
      return html;
   };


   /**
    * ------- kuder -------
    */
   function kuder(content, hook, isDebug) {
      setDebug(isDebug);
      logTime('Kuder time');
      var html;
      if (content && content.length) {
         var parsed = kuder.parser.parse(content);

         if (hook) {
            parsed = hook(parsed);
            log('%cModify parsed data:\n', 'color: green', parsed);
         }

         kuder.renderer.setLinks(parsed.links);
         html = kuder.renderer.render(parsed.tree);
      } else {
         html = content;
      }
      logTimeEnd('Kuder time');
      setDebug(false);

      return html;
   }

   kuder.parser = new Parser;
   kuder.renderer = new Renderer;


   /*
    * ------- export -------
    */
   if (typeof module !== 'undefined' && typeof exports === 'object') {
      module.exports = kuder;
   } else if (typeof define === 'function' && define.amd) {
      define(function () { return kuder; });
   } else {
      root.kuder = kuder;
   }

})(this || window || global);
×
Is this page helpful?
Yes No