492 lines
12 KiB
Plaintext
492 lines
12 KiB
Plaintext
const Tokenizer = require('./Tokenizer.js');
|
|
const { defaults } = require('./defaults.js');
|
|
const { block, inline } = require('./rules.js');
|
|
const { repeatString } = require('./helpers.js');
|
|
|
|
/**
|
|
* smartypants text replacement
|
|
*/
|
|
function smartypants(text) {
|
|
return text
|
|
// em-dashes
|
|
.replace(/---/g, '\u2014')
|
|
// en-dashes
|
|
.replace(/--/g, '\u2013')
|
|
// opening singles
|
|
.replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
|
|
// closing singles & apostrophes
|
|
.replace(/'/g, '\u2019')
|
|
// opening doubles
|
|
.replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
|
|
// closing doubles
|
|
.replace(/"/g, '\u201d')
|
|
// ellipses
|
|
.replace(/\.{3}/g, '\u2026');
|
|
}
|
|
|
|
/**
|
|
* mangle email addresses
|
|
*/
|
|
function mangle(text) {
|
|
let out = '',
|
|
i,
|
|
ch;
|
|
|
|
const l = text.length;
|
|
for (i = 0; i < l; i++) {
|
|
ch = text.charCodeAt(i);
|
|
if (Math.random() > 0.5) {
|
|
ch = 'x' + ch.toString(16);
|
|
}
|
|
out += '&#' + ch + ';';
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
/**
|
|
* Block Lexer
|
|
*/
|
|
module.exports = class Lexer {
|
|
constructor(options) {
|
|
this.tokens = [];
|
|
this.tokens.links = Object.create(null);
|
|
this.options = options || defaults;
|
|
this.options.tokenizer = this.options.tokenizer || new Tokenizer();
|
|
this.tokenizer = this.options.tokenizer;
|
|
this.tokenizer.options = this.options;
|
|
|
|
const rules = {
|
|
block: block.normal,
|
|
inline: inline.normal
|
|
};
|
|
|
|
if (this.options.pedantic) {
|
|
rules.block = block.pedantic;
|
|
rules.inline = inline.pedantic;
|
|
} else if (this.options.gfm) {
|
|
rules.block = block.gfm;
|
|
if (this.options.breaks) {
|
|
rules.inline = inline.breaks;
|
|
} else {
|
|
rules.inline = inline.gfm;
|
|
}
|
|
}
|
|
this.tokenizer.rules = rules;
|
|
}
|
|
|
|
/**
|
|
* Expose Rules
|
|
*/
|
|
static get rules() {
|
|
return {
|
|
block,
|
|
inline
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Static Lex Method
|
|
*/
|
|
static lex(src, options) {
|
|
const lexer = new Lexer(options);
|
|
return lexer.lex(src);
|
|
}
|
|
|
|
/**
|
|
* Static Lex Inline Method
|
|
*/
|
|
static lexInline(src, options) {
|
|
const lexer = new Lexer(options);
|
|
return lexer.inlineTokens(src);
|
|
}
|
|
|
|
/**
|
|
* Preprocessing
|
|
*/
|
|
lex(src) {
|
|
src = src
|
|
.replace(/\r\n|\r/g, '\n')
|
|
.replace(/\t/g, ' ');
|
|
|
|
this.blockTokens(src, this.tokens, true);
|
|
|
|
this.inline(this.tokens);
|
|
|
|
return this.tokens;
|
|
}
|
|
|
|
/**
|
|
* Lexing
|
|
*/
|
|
blockTokens(src, tokens = [], top = true) {
|
|
if (this.options.pedantic) {
|
|
src = src.replace(/^ +$/gm, '');
|
|
}
|
|
let token, i, l, lastToken;
|
|
|
|
while (src) {
|
|
// newline
|
|
if (token = this.tokenizer.space(src)) {
|
|
src = src.substring(token.raw.length);
|
|
if (token.type) {
|
|
tokens.push(token);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// code
|
|
if (token = this.tokenizer.code(src)) {
|
|
src = src.substring(token.raw.length);
|
|
lastToken = tokens[tokens.length - 1];
|
|
// An indented code block cannot interrupt a paragraph.
|
|
if (lastToken && lastToken.type === 'paragraph') {
|
|
lastToken.raw += '\n' + token.raw;
|
|
lastToken.text += '\n' + token.text;
|
|
} else {
|
|
tokens.push(token);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// fences
|
|
if (token = this.tokenizer.fences(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// heading
|
|
if (token = this.tokenizer.heading(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// table no leading pipe (gfm)
|
|
if (token = this.tokenizer.nptable(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// hr
|
|
if (token = this.tokenizer.hr(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// blockquote
|
|
if (token = this.tokenizer.blockquote(src)) {
|
|
src = src.substring(token.raw.length);
|
|
token.tokens = this.blockTokens(token.text, [], top);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// list
|
|
if (token = this.tokenizer.list(src)) {
|
|
src = src.substring(token.raw.length);
|
|
l = token.items.length;
|
|
for (i = 0; i < l; i++) {
|
|
token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
|
|
}
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// html
|
|
if (token = this.tokenizer.html(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// def
|
|
if (top && (token = this.tokenizer.def(src))) {
|
|
src = src.substring(token.raw.length);
|
|
if (!this.tokens.links[token.tag]) {
|
|
this.tokens.links[token.tag] = {
|
|
href: token.href,
|
|
title: token.title
|
|
};
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// table (gfm)
|
|
if (token = this.tokenizer.table(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// lheading
|
|
if (token = this.tokenizer.lheading(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// top-level paragraph
|
|
if (top && (token = this.tokenizer.paragraph(src))) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// text
|
|
if (token = this.tokenizer.text(src)) {
|
|
src = src.substring(token.raw.length);
|
|
lastToken = tokens[tokens.length - 1];
|
|
if (lastToken && lastToken.type === 'text') {
|
|
lastToken.raw += '\n' + token.raw;
|
|
lastToken.text += '\n' + token.text;
|
|
} else {
|
|
tokens.push(token);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (src) {
|
|
const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
|
if (this.options.silent) {
|
|
console.error(errMsg);
|
|
break;
|
|
} else {
|
|
throw new Error(errMsg);
|
|
}
|
|
}
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
inline(tokens) {
|
|
let i,
|
|
j,
|
|
k,
|
|
l2,
|
|
row,
|
|
token;
|
|
|
|
const l = tokens.length;
|
|
for (i = 0; i < l; i++) {
|
|
token = tokens[i];
|
|
switch (token.type) {
|
|
case 'paragraph':
|
|
case 'text':
|
|
case 'heading': {
|
|
token.tokens = [];
|
|
this.inlineTokens(token.text, token.tokens);
|
|
break;
|
|
}
|
|
case 'table': {
|
|
token.tokens = {
|
|
header: [],
|
|
cells: []
|
|
};
|
|
|
|
// header
|
|
l2 = token.header.length;
|
|
for (j = 0; j < l2; j++) {
|
|
token.tokens.header[j] = [];
|
|
this.inlineTokens(token.header[j], token.tokens.header[j]);
|
|
}
|
|
|
|
// cells
|
|
l2 = token.cells.length;
|
|
for (j = 0; j < l2; j++) {
|
|
row = token.cells[j];
|
|
token.tokens.cells[j] = [];
|
|
for (k = 0; k < row.length; k++) {
|
|
token.tokens.cells[j][k] = [];
|
|
this.inlineTokens(row[k], token.tokens.cells[j][k]);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case 'blockquote': {
|
|
this.inline(token.tokens);
|
|
break;
|
|
}
|
|
case 'list': {
|
|
l2 = token.items.length;
|
|
for (j = 0; j < l2; j++) {
|
|
this.inline(token.items[j].tokens);
|
|
}
|
|
break;
|
|
}
|
|
default: {
|
|
// do nothing
|
|
}
|
|
}
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
/**
|
|
* Lexing/Compiling
|
|
*/
|
|
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
|
|
let token, lastToken;
|
|
|
|
// String with links masked to avoid interference with em and strong
|
|
let maskedSrc = src;
|
|
let match;
|
|
let keepPrevChar, prevChar;
|
|
|
|
// Mask out reflinks
|
|
if (this.tokens.links) {
|
|
const links = Object.keys(this.tokens.links);
|
|
if (links.length > 0) {
|
|
while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
|
|
if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
|
|
maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Mask out other blocks
|
|
while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
|
|
maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
|
|
}
|
|
|
|
// Mask out escaped em & strong delimiters
|
|
while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
|
|
maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
|
|
}
|
|
|
|
while (src) {
|
|
if (!keepPrevChar) {
|
|
prevChar = '';
|
|
}
|
|
keepPrevChar = false;
|
|
|
|
// escape
|
|
if (token = this.tokenizer.escape(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// tag
|
|
if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
|
|
src = src.substring(token.raw.length);
|
|
inLink = token.inLink;
|
|
inRawBlock = token.inRawBlock;
|
|
const lastToken = tokens[tokens.length - 1];
|
|
if (lastToken && token.type === 'text' && lastToken.type === 'text') {
|
|
lastToken.raw += token.raw;
|
|
lastToken.text += token.text;
|
|
} else {
|
|
tokens.push(token);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// link
|
|
if (token = this.tokenizer.link(src)) {
|
|
src = src.substring(token.raw.length);
|
|
if (token.type === 'link') {
|
|
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
|
|
}
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// reflink, nolink
|
|
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
|
|
src = src.substring(token.raw.length);
|
|
const lastToken = tokens[tokens.length - 1];
|
|
if (token.type === 'link') {
|
|
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
|
|
tokens.push(token);
|
|
} else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
|
|
lastToken.raw += token.raw;
|
|
lastToken.text += token.text;
|
|
} else {
|
|
tokens.push(token);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// em & strong
|
|
if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
|
|
src = src.substring(token.raw.length);
|
|
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// code
|
|
if (token = this.tokenizer.codespan(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// br
|
|
if (token = this.tokenizer.br(src)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// del (gfm)
|
|
if (token = this.tokenizer.del(src)) {
|
|
src = src.substring(token.raw.length);
|
|
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// autolink
|
|
if (token = this.tokenizer.autolink(src, mangle)) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// url (gfm)
|
|
if (!inLink && (token = this.tokenizer.url(src, mangle))) {
|
|
src = src.substring(token.raw.length);
|
|
tokens.push(token);
|
|
continue;
|
|
}
|
|
|
|
// text
|
|
if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
|
|
src = src.substring(token.raw.length);
|
|
if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
|
|
prevChar = token.raw.slice(-1);
|
|
}
|
|
keepPrevChar = true;
|
|
lastToken = tokens[tokens.length - 1];
|
|
if (lastToken && lastToken.type === 'text') {
|
|
lastToken.raw += token.raw;
|
|
lastToken.text += token.text;
|
|
} else {
|
|
tokens.push(token);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (src) {
|
|
const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
|
|
if (this.options.silent) {
|
|
console.error(errMsg);
|
|
break;
|
|
} else {
|
|
throw new Error(errMsg);
|
|
}
|
|
}
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
};
|