build(aio): implement the remark renderer

The implementation adds three plugins to the remark processor:

* remove support for code blocks triggered by indented
text - only gfm triple backticks are supported; and also adds support for
dgeni inline tags.

* ignore content within `code-example` and `code-tabs` elements. This prevents
the content being accidentally treated as markdown

* ignore dgeni inline tags, e.g. `{@link ... }` to prevent the content of
the links from being accidentally treated as markdown
This commit is contained in:
Peter Bacon Darwin
2017-04-11 18:24:08 +01:00
committed by Igor Minar
parent 540581da3e
commit 374bf1ed98
6 changed files with 659 additions and 5 deletions

View File

@ -17,7 +17,7 @@ const linksPackage = require('../links-package');
const examplesPackage = require('../examples-package');
const targetPackage = require('../target-package');
const contentPackage = require('../content-package');
const rhoPackage = require('../rho-package');
const remarkPackage = require('../remark-package');
const PROJECT_ROOT = path.resolve(__dirname, '../../..');
const API_SOURCE_PATH = path.resolve(PROJECT_ROOT, 'packages');
@ -31,7 +31,7 @@ module.exports =
new Package(
'angular.io', [
jsdocPackage, nunjucksPackage, typescriptPackage, linksPackage, examplesPackage,
gitPackage, targetPackage, contentPackage, rhoPackage
gitPackage, targetPackage, contentPackage, remarkPackage
])
// Register the processors

View File

@ -0,0 +1,9 @@
var Package = require('dgeni').Package;
/**
* @dgPackage remark
* @description Overrides the renderMarkdown service with an implementation based on remark
*/
module.exports = new Package('remark', ['nunjucks'])
.factory(require('./services/renderMarkdown'));

View File

@ -0,0 +1,209 @@
const remark = require('remark');
const html = require('remark-html');
/**
* @dgService renderMarkdown
* @description
* Render the markdown in the given string as HTML.
*/
module.exports = function renderMarkdown() {
const renderer = remark()
.use(inlineTagDefs)
.use(noIndentedCodeBlocks)
.use(plainHTMLBlocks)
// USEFUL DEBUGGING CODE
// .use(() => tree => {
// console.log(require('util').inspect(tree, { colors: true, depth: 4 }));
// })
.use(html);
return function renderMarkdownImpl(content) {
return renderer.processSync(content).toString();
};
/**
* Teach remark not to render indented codeblocks
*/
function noIndentedCodeBlocks() {
const blockMethods = this.Parser.prototype.blockMethods;
blockMethods.splice(blockMethods.indexOf('indentedCode'), 1);
}
/**
* Teach remark about inline tags, so that it neither wraps block level
* tags in paragraphs nor processes the text within the tag.
*/
function inlineTagDefs() {
const Parser = this.Parser;
const inlineTokenizers = Parser.prototype.inlineTokenizers;
const inlineMethods = Parser.prototype.inlineMethods;
const blockTokenizers = Parser.prototype.blockTokenizers;
const blockMethods = Parser.prototype.blockMethods;
blockTokenizers.inlineTag = tokenizeInlineTag;
blockMethods.splice(blockMethods.indexOf('paragraph'), 0, 'inlineTag');
inlineTokenizers.inlineTag = tokenizeInlineTag;
inlineMethods.splice(blockMethods.indexOf('text'), 0, 'inlineTag');
tokenizeInlineTag.notInLink = true;
tokenizeInlineTag.locator = inlineTagLocator;
function tokenizeInlineTag(eat, value, silent) {
const match = /^\{@[^\s\}]+[^\}]*\}/.exec(value);
if (match) {
if (silent) {
return true;
}
return eat(match[0])({
'type': 'inlineTag',
'value': match[0]
});
}
}
function inlineTagLocator(value, fromIndex) {
return value.indexOf('{@', fromIndex);
}
}
/**
* Teach remark that some HTML blocks never include markdown
*/
function plainHTMLBlocks() {
const plainBlocks = ['code-example', 'code-tabs'];
// Create matchers for each block
const anyBlockMatcher = new RegExp('^' + createOpenMatcher(`(${plainBlocks.join('|')})`));
const Parser = this.Parser;
const blockTokenizers = Parser.prototype.blockTokenizers;
const blockMethods = Parser.prototype.blockMethods;
blockTokenizers.plainHTMLBlocks = tokenizePlainHTMLBlocks;
blockMethods.splice(blockMethods.indexOf('html'), 0, 'plainHTMLBlocks');
function tokenizePlainHTMLBlocks(eat, value, silent) {
const openMatch = anyBlockMatcher.exec(value);
if (openMatch) {
const blockName = openMatch[1];
const fullMatch = matchRecursiveRegExp(value, createOpenMatcher(blockName), createCloseMatcher(blockName))[0];
if (silent || !fullMatch) {
// either we are not eating (silent) or the match failed
return !!fullMatch;
}
return eat(fullMatch[0])({
type: 'html',
value: fullMatch[0]
});
}
}
}
};
/**
* matchRecursiveRegExp
*
* (c) 2007 Steven Levithan <stevenlevithan.com>
* MIT License
*
* Accepts a string to search, a left and right format delimiter
* as regex patterns, and optional regex flags. Returns an array
* of matches, allowing nested instances of left/right delimiters.
* Use the "g" flag to return all matches, otherwise only the
* first is returned. Be careful to ensure that the left and
* right format delimiters produce mutually exclusive matches.
* Backreferences are not supported within the right delimiter
* due to how it is internally combined with the left delimiter.
* When matching strings whose format delimiters are unbalanced
* to the left or right, the output is intentionally as a
* conventional regex library with recursion support would
* produce, e.g. "<<x>" and "<x>>" both produce ["x"] when using
* "<" and ">" as the delimiters (both strings contain a single,
* balanced instance of "<x>").
*
* examples:
* matchRecursiveRegExp("test", "\\(", "\\)")
* returns: []
* matchRecursiveRegExp("<t<<e>><s>>t<>", "<", ">", "g")
* returns: ["t<<e>><s>", ""]
* matchRecursiveRegExp("<div id=\"x\">test</div>", "<div\\b[^>]*>", "</div>", "gi")
* returns: ["test"]
*/
function matchRecursiveRegExp(str, left, right, flags) {
'use strict';
const matchPos = rgxFindMatchPos(str, left, right, flags);
const results = [];
for (var i = 0; i < matchPos.length; ++i) {
results.push([
str.slice(matchPos[i].wholeMatch.start, matchPos[i].wholeMatch.end),
str.slice(matchPos[i].match.start, matchPos[i].match.end),
str.slice(matchPos[i].left.start, matchPos[i].left.end),
str.slice(matchPos[i].right.start, matchPos[i].right.end)
]);
}
return results;
}
function rgxFindMatchPos(str, left, right, flags) {
'use strict';
flags = flags || '';
const global = flags.indexOf('g') > -1;
const bothMatcher = new RegExp(left + '|' + right, 'g' + flags.replace(/g/g, ''));
const leftMatcher = new RegExp(left, flags.replace(/g/g, ''));
const pos = [];
let index, match, start, end;
let count = 0;
do {
while ((match = bothMatcher.exec(str))) {
if (leftMatcher.test(match[0])) {
if (!(count++)) {
index = bothMatcher.lastIndex;
start = index - match[0].length;
}
} else if (count) {
if (!--count) {
end = match.index + match[0].length;
var obj = {
left: {start: start, end: index},
match: {start: index, end: match.index},
right: {start: match.index, end: end},
wholeMatch: {start: start, end: end}
};
pos.push(obj);
if (!global) {
return pos;
}
}
}
}
} while (count && (bothMatcher.lastIndex = index));
return pos;
}
function createOpenMatcher(elementNameMatcher) {
const attributeName = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
const unquoted = '[^"\'=<>`\\u0000-\\u0020]+';
const singleQuoted = '\'[^\']*\'';
const doubleQuoted = '"[^"]*"';
const attributeValue = '(?:' + unquoted + '|' + singleQuoted + '|' + doubleQuoted + ')';
const attribute = '(?:\\s+' + attributeName + '(?:\\s*=\\s*' + attributeValue + ')?)';
return `<${elementNameMatcher}${attribute}*\\s*>`;
}
function createCloseMatcher(elementNameMatcher) {
return `</${elementNameMatcher}>`;
}

View File

@ -0,0 +1,70 @@
const renderMarkdownFactory = require('./renderMarkdown');
describe('remark: renderMarkdown service', () => {
let renderMarkdown;
beforeEach(() => {
renderMarkdown = renderMarkdownFactory();
});
it('should convert markdown to HTML', () => {
const content = '# heading 1\n' +
'\n' +
'A paragraph with **bold** and _italic_.\n' +
'\n' +
'* List item 1\n' +
'* List item 2';
const output = renderMarkdown(content);
expect(output).toEqual(
'<h1>heading 1</h1>\n' +
'<p>A paragraph with <strong>bold</strong> and <em>italic</em>.</p>\n' +
'<ul>\n' +
'<li>List item 1</li>\n' +
'<li>List item 2</li>\n' +
'</ul>\n');
});
it('should not process markdown inside inline tags', () => {
const content = '# heading {@link some_url_path}';
const output = renderMarkdown(content);
expect(output).toEqual('<h1>heading {@link some_url_path}</h1>\n');
});
it('should not put block level inline tags inside paragraphs', () => {
const content = 'A paragraph.\n' +
'\n' +
'{@example blah **blah** blah }\n' +
'\n' +
'Another paragraph {@link _containing_ } an inline tag';
const output = renderMarkdown(content);
expect(output).toEqual(
'<p>A paragraph.</p>\n' +
'{@example blah **blah** blah }\n' +
'<p>Another paragraph {@link _containing_ } an inline tag</p>\n');
});
it('should not format the contents of tags marked as unformatted ', () => {
const content = '<code-example>\n\n **abc**\n\n def\n</code-example>\n\n<code-tabs><code-pane>\n\n **abc**\n\n def\n</code-pane></code-tabs>';
const output = renderMarkdown(content);
expect(output).toEqual('<code-example>\n\n **abc**\n\n def\n</code-example>\n<code-tabs><code-pane>\n\n **abc**\n\n def\n</code-pane></code-tabs>\n');
});
it('should not remove spaces after anchor tags', () => {
var input =
'A aa aaa aaaa aaaaa aaaaaa aaaaaaa aaaaaaaa aaaaaaaaa aaaaaaaaaa aaaaaaaaaaa\n' +
'[foo](path/to/foo) bbb.';
var output =
'<p>' +
'A aa aaa aaaa aaaaa aaaaaa aaaaaaa aaaaaaaa aaaaaaaaa aaaaaaaaaa aaaaaaaaaaa\n' +
'<a href="path/to/foo">foo</a> bbb.' +
'</p>\n';
expect(renderMarkdown(input)).toEqual(output);
});
it('should not format indented text as code', () => {
const content = 'some text\n\n indented text\n\nother text';
const output = renderMarkdown(content);
expect(output).toEqual('<p>some text</p>\n<p> indented text</p>\n<p>other text</p>\n');
});
});