build(aio): implement the remark renderer

The implementation adds three plugins to the remark processor: * remove support for code blocks triggered by indented text - only gfm triple backticks are supported; and also adds support for dgeni inline tags. * ignore content within `code-example` and `code-tabs` elements. This prevents the content being accidentally treated as markdown * ignore dgeni inline tags, e.g. `{@link ... }` to prevent the content of the links from being accidentally treated as markdown
2017-04-11 18:24:08 +01:00
parent 540581da3e
commit 374bf1ed98
6 changed files with 659 additions and 5 deletions
--- a/aio/transforms/angular.io-package/index.js
+++ b/aio/transforms/angular.io-package/index.js
@ -17,7 +17,7 @@ const linksPackage = require('../links-package');
 const examplesPackage = require('../examples-package');
 const targetPackage = require('../target-package');
 const contentPackage = require('../content-package');
-const rhoPackage = require('../rho-package');
+const remarkPackage = require('../remark-package');

 const PROJECT_ROOT = path.resolve(__dirname, '../../..');
 const API_SOURCE_PATH = path.resolve(PROJECT_ROOT, 'packages');
@ -31,7 +31,7 @@ module.exports =
    new Package(
        'angular.io', [
          jsdocPackage, nunjucksPackage, typescriptPackage, linksPackage, examplesPackage,
-          gitPackage, targetPackage, contentPackage, rhoPackage
+          gitPackage, targetPackage, contentPackage, remarkPackage
        ])

        // Register the processors
--- a/aio/transforms/remark-package/index.js
+++ b/aio/transforms/remark-package/index.js
@ -0,0 +1,9 @@
+var Package = require('dgeni').Package;
+
+/**
+ * @dgPackage remark
+ * @description Overrides the renderMarkdown service with an implementation based on remark
+ */
+module.exports = new Package('remark', ['nunjucks'])
+
+                     .factory(require('./services/renderMarkdown'));
--- a/aio/transforms/remark-package/services/renderMarkdown.js
+++ b/aio/transforms/remark-package/services/renderMarkdown.js
@ -0,0 +1,209 @@
+const remark = require('remark');
+const html = require('remark-html');
+
+/**
+ * @dgService renderMarkdown
+ * @description
+ * Render the markdown in the given string as HTML.
+ */
+module.exports = function renderMarkdown() {
+  const renderer = remark()
+                    .use(inlineTagDefs)
+                    .use(noIndentedCodeBlocks)
+                    .use(plainHTMLBlocks)
+                    // USEFUL DEBUGGING CODE
+                    // .use(() => tree => {
+                    //   console.log(require('util').inspect(tree, { colors: true, depth: 4 }));
+                    // })
+                    .use(html);
+
+  return function renderMarkdownImpl(content) {
+    return renderer.processSync(content).toString();
+  };
+
+  /**
+   * Teach remark not to render indented codeblocks
+   */
+  function noIndentedCodeBlocks() {
+    const blockMethods = this.Parser.prototype.blockMethods;
+    blockMethods.splice(blockMethods.indexOf('indentedCode'), 1);
+  }
+
+
+  /**
+   * Teach remark about inline tags, so that it neither wraps block level
+   * tags in paragraphs nor processes the text within the tag.
+   */
+  function inlineTagDefs() {
+    const Parser = this.Parser;
+    const inlineTokenizers = Parser.prototype.inlineTokenizers;
+    const inlineMethods = Parser.prototype.inlineMethods;
+    const blockTokenizers = Parser.prototype.blockTokenizers;
+    const blockMethods = Parser.prototype.blockMethods;
+
+    blockTokenizers.inlineTag = tokenizeInlineTag;
+    blockMethods.splice(blockMethods.indexOf('paragraph'), 0, 'inlineTag');
+
+    inlineTokenizers.inlineTag = tokenizeInlineTag;
+    inlineMethods.splice(blockMethods.indexOf('text'), 0, 'inlineTag');
+    tokenizeInlineTag.notInLink = true;
+    tokenizeInlineTag.locator = inlineTagLocator;
+
+    function tokenizeInlineTag(eat, value, silent) {
+      const match = /^\{@[^\s\}]+[^\}]*\}/.exec(value);
+
+      if (match) {
+        if (silent) {
+          return true;
+        }
+        return eat(match[0])({
+          'type': 'inlineTag',
+          'value': match[0]
+        });
+      }
+    }
+
+    function inlineTagLocator(value, fromIndex) {
+      return value.indexOf('{@', fromIndex);
+    }
+  }
+
+  /**
+   * Teach remark that some HTML blocks never include markdown
+   */
+  function plainHTMLBlocks() {
+
+    const plainBlocks = ['code-example', 'code-tabs'];
+
+    // Create matchers for each block
+    const anyBlockMatcher = new RegExp('^' + createOpenMatcher(`(${plainBlocks.join('|')})`));
+
+    const Parser = this.Parser;
+    const blockTokenizers = Parser.prototype.blockTokenizers;
+    const blockMethods = Parser.prototype.blockMethods;
+
+    blockTokenizers.plainHTMLBlocks = tokenizePlainHTMLBlocks;
+    blockMethods.splice(blockMethods.indexOf('html'), 0, 'plainHTMLBlocks');
+
+    function tokenizePlainHTMLBlocks(eat, value, silent) {
+      const openMatch = anyBlockMatcher.exec(value);
+      if (openMatch) {
+        const blockName = openMatch[1];
+        const fullMatch = matchRecursiveRegExp(value, createOpenMatcher(blockName), createCloseMatcher(blockName))[0];
+        if (silent || !fullMatch) {
+          // either we are not eating (silent) or the match failed
+          return !!fullMatch;
+        }
+        return eat(fullMatch[0])({
+          type: 'html',
+          value: fullMatch[0]
+        });
+      }
+    }
+  }
+};
+
+
+
+
+
+
+
+
+/**
+ * matchRecursiveRegExp
+ *
+ * (c) 2007 Steven Levithan <stevenlevithan.com>
+ * MIT License
+ *
+ * Accepts a string to search, a left and right format delimiter
+ * as regex patterns, and optional regex flags. Returns an array
+ * of matches, allowing nested instances of left/right delimiters.
+ * Use the "g" flag to return all matches, otherwise only the
+ * first is returned. Be careful to ensure that the left and
+ * right format delimiters produce mutually exclusive matches.
+ * Backreferences are not supported within the right delimiter
+ * due to how it is internally combined with the left delimiter.
+ * When matching strings whose format delimiters are unbalanced
+ * to the left or right, the output is intentionally as a
+ * conventional regex library with recursion support would
+ * produce, e.g. "<<x>" and "<x>>" both produce ["x"] when using
+ * "<" and ">" as the delimiters (both strings contain a single,
+ * balanced instance of "<x>").
+ *
+ * examples:
+ * matchRecursiveRegExp("test", "\\(", "\\)")
+ * returns: []
+ * matchRecursiveRegExp("<t<<e>><s>>t<>", "<", ">", "g")
+ * returns: ["t<<e>><s>", ""]
+ * matchRecursiveRegExp("<div id=\"x\">test</div>", "<div\\b[^>]*>", "</div>", "gi")
+ * returns: ["test"]
+ */
+function matchRecursiveRegExp(str, left, right, flags) {
+  'use strict';
+
+  const matchPos = rgxFindMatchPos(str, left, right, flags);
+  const results = [];
+
+  for (var i = 0; i < matchPos.length; ++i) {
+    results.push([
+      str.slice(matchPos[i].wholeMatch.start, matchPos[i].wholeMatch.end),
+      str.slice(matchPos[i].match.start, matchPos[i].match.end),
+      str.slice(matchPos[i].left.start, matchPos[i].left.end),
+      str.slice(matchPos[i].right.start, matchPos[i].right.end)
+    ]);
+  }
+  return results;
+}
+
+function rgxFindMatchPos(str, left, right, flags) {
+  'use strict';
+  flags = flags || '';
+  const global = flags.indexOf('g') > -1;
+  const bothMatcher = new RegExp(left + '|' + right, 'g' + flags.replace(/g/g, ''));
+  const leftMatcher = new RegExp(left, flags.replace(/g/g, ''));
+  const pos = [];
+  let index, match, start, end;
+  let count = 0;
+
+  do {
+    while ((match = bothMatcher.exec(str))) {
+      if (leftMatcher.test(match[0])) {
+        if (!(count++)) {
+          index = bothMatcher.lastIndex;
+          start = index - match[0].length;
+        }
+      } else if (count) {
+        if (!--count) {
+          end = match.index + match[0].length;
+          var obj = {
+            left: {start: start, end: index},
+            match: {start: index, end: match.index},
+            right: {start: match.index, end: end},
+            wholeMatch: {start: start, end: end}
+          };
+          pos.push(obj);
+          if (!global) {
+            return pos;
+          }
+        }
+      }
+    }
+  } while (count && (bothMatcher.lastIndex = index));
+
+  return pos;
+}
+
+function createOpenMatcher(elementNameMatcher) {
+  const attributeName = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
+  const unquoted = '[^"\'=<>`\\u0000-\\u0020]+';
+  const singleQuoted = '\'[^\']*\'';
+  const doubleQuoted = '"[^"]*"';
+  const attributeValue = '(?:' + unquoted + '|' + singleQuoted + '|' + doubleQuoted + ')';
+  const attribute = '(?:\\s+' + attributeName + '(?:\\s*=\\s*' + attributeValue + ')?)';
+  return `<${elementNameMatcher}${attribute}*\\s*>`;
+}
+
+function createCloseMatcher(elementNameMatcher) {
+  return `</${elementNameMatcher}>`;
+}
--- a/aio/transforms/remark-package/services/renderMarkdown.spec.js
+++ b/aio/transforms/remark-package/services/renderMarkdown.spec.js
@ -0,0 +1,70 @@
+const renderMarkdownFactory = require('./renderMarkdown');
+
+describe('remark: renderMarkdown service', () => {
+  let renderMarkdown;
+  beforeEach(() => {
+    renderMarkdown = renderMarkdownFactory();
+  });
+
+  it('should convert markdown to HTML', () => {
+    const content = '# heading 1\n' +
+        '\n' +
+        'A paragraph with **bold** and _italic_.\n' +
+        '\n' +
+        '* List item 1\n' +
+        '* List item 2';
+    const output = renderMarkdown(content);
+
+    expect(output).toEqual(
+        '<h1>heading 1</h1>\n' +
+        '<p>A paragraph with <strong>bold</strong> and <em>italic</em>.</p>\n' +
+        '<ul>\n' +
+        '<li>List item 1</li>\n' +
+        '<li>List item 2</li>\n' +
+        '</ul>\n');
+  });
+
+  it('should not process markdown inside inline tags', () => {
+    const content = '# heading {@link some_url_path}';
+    const output = renderMarkdown(content);
+    expect(output).toEqual('<h1>heading {@link some_url_path}</h1>\n');
+  });
+
+  it('should not put block level inline tags inside paragraphs', () => {
+    const content = 'A paragraph.\n' +
+        '\n' +
+        '{@example blah **blah** blah }\n' +
+        '\n' +
+        'Another paragraph {@link _containing_ } an inline tag';
+    const output = renderMarkdown(content);
+    expect(output).toEqual(
+        '<p>A paragraph.</p>\n' +
+        '{@example blah **blah** blah }\n' +
+        '<p>Another paragraph {@link _containing_ } an inline tag</p>\n');
+  });
+
+  it('should not format the contents of tags marked as unformatted ', () => {
+    const content = '<code-example>\n\n  **abc**\n\n  def\n</code-example>\n\n<code-tabs><code-pane>\n\n  **abc**\n\n  def\n</code-pane></code-tabs>';
+    const output = renderMarkdown(content);
+    expect(output).toEqual('<code-example>\n\n  **abc**\n\n  def\n</code-example>\n<code-tabs><code-pane>\n\n  **abc**\n\n  def\n</code-pane></code-tabs>\n');
+  });
+
+  it('should not remove spaces after anchor tags', () => {
+    var input =
+        'A aa aaa aaaa aaaaa aaaaaa aaaaaaa aaaaaaaa aaaaaaaaa aaaaaaaaaa aaaaaaaaaaa\n' +
+        '[foo](path/to/foo) bbb.';
+    var output =
+        '<p>' +
+        'A aa aaa aaaa aaaaa aaaaaa aaaaaaa aaaaaaaa aaaaaaaaa aaaaaaaaaa aaaaaaaaaaa\n' +
+        '<a href="path/to/foo">foo</a> bbb.' +
+        '</p>\n';
+
+    expect(renderMarkdown(input)).toEqual(output);
+  });
+
+  it('should not format indented text as code', () => {
+    const content = 'some text\n\n    indented text\n\nother text';
+    const output = renderMarkdown(content);
+    expect(output).toEqual('<p>some text</p>\n<p>    indented text</p>\n<p>other text</p>\n');
+  });
+});