731 lines
19 KiB
Plaintext
731 lines
19 KiB
Plaintext
const { defaults } = require('./defaults.js');
|
|
const {
|
|
rtrim,
|
|
splitCells,
|
|
escape,
|
|
findClosingBracket
|
|
} = require('./helpers.js');
|
|
|
|
function outputLink(cap, link, raw) {
|
|
const href = link.href;
|
|
const title = link.title ? escape(link.title) : null;
|
|
const text = cap[1].replace(/\\([\[\]])/g, '$1');
|
|
|
|
if (cap[0].charAt(0) !== '!') {
|
|
return {
|
|
type: 'link',
|
|
raw,
|
|
href,
|
|
title,
|
|
text
|
|
};
|
|
} else {
|
|
return {
|
|
type: 'image',
|
|
raw,
|
|
href,
|
|
title,
|
|
text: escape(text)
|
|
};
|
|
}
|
|
}
|
|
|
|
function indentCodeCompensation(raw, text) {
|
|
const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
|
|
|
|
if (matchIndentToCode === null) {
|
|
return text;
|
|
}
|
|
|
|
const indentToCode = matchIndentToCode[1];
|
|
|
|
return text
|
|
.split('\n')
|
|
.map(node => {
|
|
const matchIndentInNode = node.match(/^\s+/);
|
|
if (matchIndentInNode === null) {
|
|
return node;
|
|
}
|
|
|
|
const [indentInNode] = matchIndentInNode;
|
|
|
|
if (indentInNode.length >= indentToCode.length) {
|
|
return node.slice(indentToCode.length);
|
|
}
|
|
|
|
return node;
|
|
})
|
|
.join('\n');
|
|
}
|
|
|
|
/**
|
|
* Tokenizer
|
|
*/
|
|
module.exports = class Tokenizer {
|
|
constructor(options) {
|
|
this.options = options || defaults;
|
|
}
|
|
|
|
space(src) {
|
|
const cap = this.rules.block.newline.exec(src);
|
|
if (cap) {
|
|
if (cap[0].length > 1) {
|
|
return {
|
|
type: 'space',
|
|
raw: cap[0]
|
|
};
|
|
}
|
|
return { raw: '\n' };
|
|
}
|
|
}
|
|
|
|
code(src) {
|
|
const cap = this.rules.block.code.exec(src);
|
|
if (cap) {
|
|
const text = cap[0].replace(/^ {1,4}/gm, '');
|
|
return {
|
|
type: 'code',
|
|
raw: cap[0],
|
|
codeBlockStyle: 'indented',
|
|
text: !this.options.pedantic
|
|
? rtrim(text, '\n')
|
|
: text
|
|
};
|
|
}
|
|
}
|
|
|
|
fences(src) {
|
|
const cap = this.rules.block.fences.exec(src);
|
|
if (cap) {
|
|
const raw = cap[0];
|
|
const text = indentCodeCompensation(raw, cap[3] || '');
|
|
|
|
return {
|
|
type: 'code',
|
|
raw,
|
|
lang: cap[2] ? cap[2].trim() : cap[2],
|
|
text
|
|
};
|
|
}
|
|
}
|
|
|
|
heading(src) {
|
|
const cap = this.rules.block.heading.exec(src);
|
|
if (cap) {
|
|
let text = cap[2].trim();
|
|
|
|
// remove trailing #s
|
|
if (/#$/.test(text)) {
|
|
const trimmed = rtrim(text, '#');
|
|
if (this.options.pedantic) {
|
|
text = trimmed.trim();
|
|
} else if (!trimmed || / $/.test(trimmed)) {
|
|
// CommonMark requires space before trailing #s
|
|
text = trimmed.trim();
|
|
}
|
|
}
|
|
|
|
return {
|
|
type: 'heading',
|
|
raw: cap[0],
|
|
depth: cap[1].length,
|
|
text: text
|
|
};
|
|
}
|
|
}
|
|
|
|
nptable(src) {
|
|
const cap = this.rules.block.nptable.exec(src);
|
|
if (cap) {
|
|
const item = {
|
|
type: 'table',
|
|
header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
|
align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
|
cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
|
|
raw: cap[0]
|
|
};
|
|
|
|
if (item.header.length === item.align.length) {
|
|
let l = item.align.length;
|
|
let i;
|
|
for (i = 0; i < l; i++) {
|
|
if (/^ *-+: *$/.test(item.align[i])) {
|
|
item.align[i] = 'right';
|
|
} else if (/^ *:-+: *$/.test(item.align[i])) {
|
|
item.align[i] = 'center';
|
|
} else if (/^ *:-+ *$/.test(item.align[i])) {
|
|
item.align[i] = 'left';
|
|
} else {
|
|
item.align[i] = null;
|
|
}
|
|
}
|
|
|
|
l = item.cells.length;
|
|
for (i = 0; i < l; i++) {
|
|
item.cells[i] = splitCells(item.cells[i], item.header.length);
|
|
}
|
|
|
|
return item;
|
|
}
|
|
}
|
|
}
|
|
|
|
hr(src) {
|
|
const cap = this.rules.block.hr.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: 'hr',
|
|
raw: cap[0]
|
|
};
|
|
}
|
|
}
|
|
|
|
blockquote(src) {
|
|
const cap = this.rules.block.blockquote.exec(src);
|
|
if (cap) {
|
|
const text = cap[0].replace(/^ *> ?/gm, '');
|
|
|
|
return {
|
|
type: 'blockquote',
|
|
raw: cap[0],
|
|
text
|
|
};
|
|
}
|
|
}
|
|
|
|
list(src) {
|
|
const cap = this.rules.block.list.exec(src);
|
|
if (cap) {
|
|
let raw = cap[0];
|
|
const bull = cap[2];
|
|
const isordered = bull.length > 1;
|
|
|
|
const list = {
|
|
type: 'list',
|
|
raw,
|
|
ordered: isordered,
|
|
start: isordered ? +bull.slice(0, -1) : '',
|
|
loose: false,
|
|
items: []
|
|
};
|
|
|
|
// Get each top-level item.
|
|
const itemMatch = cap[0].match(this.rules.block.item);
|
|
|
|
let next = false,
|
|
item,
|
|
space,
|
|
bcurr,
|
|
bnext,
|
|
addBack,
|
|
loose,
|
|
istask,
|
|
ischecked,
|
|
endMatch;
|
|
|
|
let l = itemMatch.length;
|
|
bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
|
|
for (let i = 0; i < l; i++) {
|
|
item = itemMatch[i];
|
|
raw = item;
|
|
|
|
if (!this.options.pedantic) {
|
|
// Determine if current item contains the end of the list
|
|
endMatch = item.match(new RegExp('\\n\\s*\\n {0,' + (bcurr[0].length - 1) + '}\\S'));
|
|
if (endMatch) {
|
|
addBack = item.length - endMatch.index + itemMatch.slice(i + 1).join('\n').length;
|
|
list.raw = list.raw.substring(0, list.raw.length - addBack);
|
|
|
|
item = item.substring(0, endMatch.index);
|
|
raw = item;
|
|
l = i + 1;
|
|
}
|
|
}
|
|
|
|
// Determine whether the next list item belongs here.
|
|
// Backpedal if it does not belong in this list.
|
|
if (i !== l - 1) {
|
|
bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
|
|
if (
|
|
!this.options.pedantic
|
|
? bnext[1].length >= bcurr[0].length || bnext[1].length > 3
|
|
: bnext[1].length > bcurr[1].length
|
|
) {
|
|
// nested list or continuation
|
|
itemMatch.splice(i, 2, itemMatch[i] + (!this.options.pedantic && bnext[1].length < bcurr[0].length && !itemMatch[i].match(/\n$/) ? '' : '\n') + itemMatch[i + 1]);
|
|
i--;
|
|
l--;
|
|
continue;
|
|
} else if (
|
|
// different bullet style
|
|
!this.options.pedantic || this.options.smartLists
|
|
? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
|
|
: isordered === (bnext[2].length === 1)
|
|
) {
|
|
addBack = itemMatch.slice(i + 1).join('\n').length;
|
|
list.raw = list.raw.substring(0, list.raw.length - addBack);
|
|
i = l - 1;
|
|
}
|
|
bcurr = bnext;
|
|
}
|
|
|
|
// Remove the list item's bullet
|
|
// so it is seen as the next token.
|
|
space = item.length;
|
|
item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');
|
|
|
|
// Outdent whatever the
|
|
// list item contains. Hacky.
|
|
if (~item.indexOf('\n ')) {
|
|
space -= item.length;
|
|
item = !this.options.pedantic
|
|
? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
|
|
: item.replace(/^ {1,4}/gm, '');
|
|
}
|
|
|
|
// trim item newlines at end
|
|
item = rtrim(item, '\n');
|
|
if (i !== l - 1) {
|
|
raw = raw + '\n';
|
|
}
|
|
|
|
// Determine whether item is loose or not.
|
|
// Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
|
|
// for discount behavior.
|
|
loose = next || /\n\n(?!\s*$)/.test(raw);
|
|
if (i !== l - 1) {
|
|
next = raw.slice(-2) === '\n\n';
|
|
if (!loose) loose = next;
|
|
}
|
|
|
|
if (loose) {
|
|
list.loose = true;
|
|
}
|
|
|
|
// Check for task list items
|
|
if (this.options.gfm) {
|
|
istask = /^\[[ xX]\] /.test(item);
|
|
ischecked = undefined;
|
|
if (istask) {
|
|
ischecked = item[1] !== ' ';
|
|
item = item.replace(/^\[[ xX]\] +/, '');
|
|
}
|
|
}
|
|
|
|
list.items.push({
|
|
type: 'list_item',
|
|
raw,
|
|
task: istask,
|
|
checked: ischecked,
|
|
loose: loose,
|
|
text: item
|
|
});
|
|
}
|
|
|
|
return list;
|
|
}
|
|
}
|
|
|
|
html(src) {
|
|
const cap = this.rules.block.html.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: this.options.sanitize
|
|
? 'paragraph'
|
|
: 'html',
|
|
raw: cap[0],
|
|
pre: !this.options.sanitizer
|
|
&& (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
|
|
text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
|
|
};
|
|
}
|
|
}
|
|
|
|
def(src) {
|
|
const cap = this.rules.block.def.exec(src);
|
|
if (cap) {
|
|
if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
|
|
const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
|
|
return {
|
|
type: 'def',
|
|
tag,
|
|
raw: cap[0],
|
|
href: cap[2],
|
|
title: cap[3]
|
|
};
|
|
}
|
|
}
|
|
|
|
table(src) {
|
|
const cap = this.rules.block.table.exec(src);
|
|
if (cap) {
|
|
const item = {
|
|
type: 'table',
|
|
header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
|
|
align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
|
|
cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
|
|
};
|
|
|
|
if (item.header.length === item.align.length) {
|
|
item.raw = cap[0];
|
|
|
|
let l = item.align.length;
|
|
let i;
|
|
for (i = 0; i < l; i++) {
|
|
if (/^ *-+: *$/.test(item.align[i])) {
|
|
item.align[i] = 'right';
|
|
} else if (/^ *:-+: *$/.test(item.align[i])) {
|
|
item.align[i] = 'center';
|
|
} else if (/^ *:-+ *$/.test(item.align[i])) {
|
|
item.align[i] = 'left';
|
|
} else {
|
|
item.align[i] = null;
|
|
}
|
|
}
|
|
|
|
l = item.cells.length;
|
|
for (i = 0; i < l; i++) {
|
|
item.cells[i] = splitCells(
|
|
item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
|
|
item.header.length);
|
|
}
|
|
|
|
return item;
|
|
}
|
|
}
|
|
}
|
|
|
|
lheading(src) {
|
|
const cap = this.rules.block.lheading.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: 'heading',
|
|
raw: cap[0],
|
|
depth: cap[2].charAt(0) === '=' ? 1 : 2,
|
|
text: cap[1]
|
|
};
|
|
}
|
|
}
|
|
|
|
paragraph(src) {
|
|
const cap = this.rules.block.paragraph.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: 'paragraph',
|
|
raw: cap[0],
|
|
text: cap[1].charAt(cap[1].length - 1) === '\n'
|
|
? cap[1].slice(0, -1)
|
|
: cap[1]
|
|
};
|
|
}
|
|
}
|
|
|
|
text(src) {
|
|
const cap = this.rules.block.text.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: 'text',
|
|
raw: cap[0],
|
|
text: cap[0]
|
|
};
|
|
}
|
|
}
|
|
|
|
escape(src) {
|
|
const cap = this.rules.inline.escape.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: 'escape',
|
|
raw: cap[0],
|
|
text: escape(cap[1])
|
|
};
|
|
}
|
|
}
|
|
|
|
tag(src, inLink, inRawBlock) {
|
|
const cap = this.rules.inline.tag.exec(src);
|
|
if (cap) {
|
|
if (!inLink && /^<a /i.test(cap[0])) {
|
|
inLink = true;
|
|
} else if (inLink && /^<\/a>/i.test(cap[0])) {
|
|
inLink = false;
|
|
}
|
|
if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
|
inRawBlock = true;
|
|
} else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
|
inRawBlock = false;
|
|
}
|
|
|
|
return {
|
|
type: this.options.sanitize
|
|
? 'text'
|
|
: 'html',
|
|
raw: cap[0],
|
|
inLink,
|
|
inRawBlock,
|
|
text: this.options.sanitize
|
|
? (this.options.sanitizer
|
|
? this.options.sanitizer(cap[0])
|
|
: escape(cap[0]))
|
|
: cap[0]
|
|
};
|
|
}
|
|
}
|
|
|
|
link(src) {
|
|
const cap = this.rules.inline.link.exec(src);
|
|
if (cap) {
|
|
const trimmedUrl = cap[2].trim();
|
|
if (!this.options.pedantic && /^</.test(trimmedUrl)) {
|
|
// commonmark requires matching angle brackets
|
|
if (!(/>$/.test(trimmedUrl))) {
|
|
return;
|
|
}
|
|
|
|
// ending angle bracket cannot be escaped
|
|
const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\');
|
|
if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) {
|
|
return;
|
|
}
|
|
} else {
|
|
// find closing parenthesis
|
|
const lastParenIndex = findClosingBracket(cap[2], '()');
|
|
if (lastParenIndex > -1) {
|
|
const start = cap[0].indexOf('!') === 0 ? 5 : 4;
|
|
const linkLen = start + cap[1].length + lastParenIndex;
|
|
cap[2] = cap[2].substring(0, lastParenIndex);
|
|
cap[0] = cap[0].substring(0, linkLen).trim();
|
|
cap[3] = '';
|
|
}
|
|
}
|
|
let href = cap[2];
|
|
let title = '';
|
|
if (this.options.pedantic) {
|
|
// split pedantic href and title
|
|
const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
|
|
|
|
if (link) {
|
|
href = link[1];
|
|
title = link[3];
|
|
}
|
|
} else {
|
|
title = cap[3] ? cap[3].slice(1, -1) : '';
|
|
}
|
|
|
|
href = href.trim();
|
|
if (/^</.test(href)) {
|
|
if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
|
|
// pedantic allows starting angle bracket without ending angle bracket
|
|
href = href.slice(1);
|
|
} else {
|
|
href = href.slice(1, -1);
|
|
}
|
|
}
|
|
return outputLink(cap, {
|
|
href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
|
|
title: title ? title.replace(this.rules.inline._escapes, '$1') : title
|
|
}, cap[0]);
|
|
}
|
|
}
|
|
|
|
reflink(src, links) {
|
|
let cap;
|
|
if ((cap = this.rules.inline.reflink.exec(src))
|
|
|| (cap = this.rules.inline.nolink.exec(src))) {
|
|
let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
|
|
link = links[link.toLowerCase()];
|
|
if (!link || !link.href) {
|
|
const text = cap[0].charAt(0);
|
|
return {
|
|
type: 'text',
|
|
raw: text,
|
|
text
|
|
};
|
|
}
|
|
return outputLink(cap, link, cap[0]);
|
|
}
|
|
}
|
|
|
|
emStrong(src, maskedSrc, prevChar = '') {
|
|
let match = this.rules.inline.emStrong.lDelim.exec(src);
|
|
if (!match) return;
|
|
|
|
if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return; // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
|
|
|
|
const nextChar = match[1] || match[2] || '';
|
|
|
|
if (!nextChar || (nextChar && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
|
|
const lLength = match[0].length - 1;
|
|
let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
|
|
|
|
const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
|
|
endReg.lastIndex = 0;
|
|
|
|
maskedSrc = maskedSrc.slice(-1 * src.length + lLength); // Bump maskedSrc to same section of string as src (move to lexer?)
|
|
|
|
while ((match = endReg.exec(maskedSrc)) != null) {
|
|
rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6];
|
|
|
|
if (!rDelim) continue; // matched the first alternative in rules.js (skip the * in __abc*abc__)
|
|
|
|
rLength = rDelim.length;
|
|
|
|
if (match[3] || match[4]) { // found another Left Delim
|
|
delimTotal += rLength;
|
|
continue;
|
|
} else if (match[5] || match[6]) { // either Left or Right Delim
|
|
if (lLength % 3 && !((lLength + rLength) % 3)) {
|
|
midDelimTotal += rLength;
|
|
continue; // CommonMark Emphasis Rules 9-10
|
|
}
|
|
}
|
|
|
|
delimTotal -= rLength;
|
|
|
|
if (delimTotal > 0) continue; // Haven't found enough closing delimiters
|
|
|
|
// If this is the last rDelimiter, remove extra characters. *a*** -> *a*
|
|
if (delimTotal + midDelimTotal - rLength <= 0 && !maskedSrc.slice(endReg.lastIndex).match(endReg)) {
|
|
rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
|
|
}
|
|
|
|
if (Math.min(lLength, rLength) % 2) {
|
|
return {
|
|
type: 'em',
|
|
raw: src.slice(0, lLength + match.index + rLength + 1),
|
|
text: src.slice(1, lLength + match.index + rLength)
|
|
};
|
|
}
|
|
if (Math.min(lLength, rLength) % 2 === 0) {
|
|
return {
|
|
type: 'strong',
|
|
raw: src.slice(0, lLength + match.index + rLength + 1),
|
|
text: src.slice(2, lLength + match.index + rLength - 1)
|
|
};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
codespan(src) {
|
|
const cap = this.rules.inline.code.exec(src);
|
|
if (cap) {
|
|
let text = cap[2].replace(/\n/g, ' ');
|
|
const hasNonSpaceChars = /[^ ]/.test(text);
|
|
const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
|
|
if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
|
|
text = text.substring(1, text.length - 1);
|
|
}
|
|
text = escape(text, true);
|
|
return {
|
|
type: 'codespan',
|
|
raw: cap[0],
|
|
text
|
|
};
|
|
}
|
|
}
|
|
|
|
br(src) {
|
|
const cap = this.rules.inline.br.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: 'br',
|
|
raw: cap[0]
|
|
};
|
|
}
|
|
}
|
|
|
|
del(src) {
|
|
const cap = this.rules.inline.del.exec(src);
|
|
if (cap) {
|
|
return {
|
|
type: 'del',
|
|
raw: cap[0],
|
|
text: cap[2]
|
|
};
|
|
}
|
|
}
|
|
|
|
autolink(src, mangle) {
|
|
const cap = this.rules.inline.autolink.exec(src);
|
|
if (cap) {
|
|
let text, href;
|
|
if (cap[2] === '@') {
|
|
text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
|
|
href = 'mailto:' + text;
|
|
} else {
|
|
text = escape(cap[1]);
|
|
href = text;
|
|
}
|
|
|
|
return {
|
|
type: 'link',
|
|
raw: cap[0],
|
|
text,
|
|
href,
|
|
tokens: [
|
|
{
|
|
type: 'text',
|
|
raw: text,
|
|
text
|
|
}
|
|
]
|
|
};
|
|
}
|
|
}
|
|
|
|
url(src, mangle) {
|
|
let cap;
|
|
if (cap = this.rules.inline.url.exec(src)) {
|
|
let text, href;
|
|
if (cap[2] === '@') {
|
|
text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
|
|
href = 'mailto:' + text;
|
|
} else {
|
|
// do extended autolink path validation
|
|
let prevCapZero;
|
|
do {
|
|
prevCapZero = cap[0];
|
|
cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
|
|
} while (prevCapZero !== cap[0]);
|
|
text = escape(cap[0]);
|
|
if (cap[1] === 'www.') {
|
|
href = 'http://' + text;
|
|
} else {
|
|
href = text;
|
|
}
|
|
}
|
|
return {
|
|
type: 'link',
|
|
raw: cap[0],
|
|
text,
|
|
href,
|
|
tokens: [
|
|
{
|
|
type: 'text',
|
|
raw: text,
|
|
text
|
|
}
|
|
]
|
|
};
|
|
}
|
|
}
|
|
|
|
inlineText(src, inRawBlock, smartypants) {
|
|
const cap = this.rules.inline.text.exec(src);
|
|
if (cap) {
|
|
let text;
|
|
if (inRawBlock) {
|
|
text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
|
|
} else {
|
|
text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
|
|
}
|
|
return {
|
|
type: 'text',
|
|
raw: cap[0],
|
|
text
|
|
};
|
|
}
|
|
}
|
|
};
|