123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491 |
- const Tokenizer = require('./Tokenizer.js');
- const { defaults } = require('./defaults.js');
- const { block, inline } = require('./rules.js');
- const { repeatString } = require('./helpers.js');
- /**
- * smartypants text replacement
- */
- function smartypants(text) {
- return text
- // em-dashes
- .replace(/---/g, '\u2014')
- // en-dashes
- .replace(/--/g, '\u2013')
- // opening singles
- .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
- // closing singles & apostrophes
- .replace(/'/g, '\u2019')
- // opening doubles
- .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
- // closing doubles
- .replace(/"/g, '\u201d')
- // ellipses
- .replace(/\.{3}/g, '\u2026');
- }
- /**
- * mangle email addresses
- */
- function mangle(text) {
- let out = '',
- i,
- ch;
- const l = text.length;
- for (i = 0; i < l; i++) {
- ch = text.charCodeAt(i);
- if (Math.random() > 0.5) {
- ch = 'x' + ch.toString(16);
- }
- out += '&#' + ch + ';';
- }
- return out;
- }
- /**
- * Block Lexer
- */
- module.exports = class Lexer {
- constructor(options) {
- this.tokens = [];
- this.tokens.links = Object.create(null);
- this.options = options || defaults;
- this.options.tokenizer = this.options.tokenizer || new Tokenizer();
- this.tokenizer = this.options.tokenizer;
- this.tokenizer.options = this.options;
- const rules = {
- block: block.normal,
- inline: inline.normal
- };
- if (this.options.pedantic) {
- rules.block = block.pedantic;
- rules.inline = inline.pedantic;
- } else if (this.options.gfm) {
- rules.block = block.gfm;
- if (this.options.breaks) {
- rules.inline = inline.breaks;
- } else {
- rules.inline = inline.gfm;
- }
- }
- this.tokenizer.rules = rules;
- }
- /**
- * Expose Rules
- */
- static get rules() {
- return {
- block,
- inline
- };
- }
- /**
- * Static Lex Method
- */
- static lex(src, options) {
- const lexer = new Lexer(options);
- return lexer.lex(src);
- }
- /**
- * Static Lex Inline Method
- */
- static lexInline(src, options) {
- const lexer = new Lexer(options);
- return lexer.inlineTokens(src);
- }
- /**
- * Preprocessing
- */
- lex(src) {
- src = src
- .replace(/\r\n|\r/g, '\n')
- .replace(/\t/g, ' ');
- this.blockTokens(src, this.tokens, true);
- this.inline(this.tokens);
- return this.tokens;
- }
- /**
- * Lexing
- */
- blockTokens(src, tokens = [], top = true) {
- if (this.options.pedantic) {
- src = src.replace(/^ +$/gm, '');
- }
- let token, i, l, lastToken;
- while (src) {
- // newline
- if (token = this.tokenizer.space(src)) {
- src = src.substring(token.raw.length);
- if (token.type) {
- tokens.push(token);
- }
- continue;
- }
- // code
- if (token = this.tokenizer.code(src)) {
- src = src.substring(token.raw.length);
- lastToken = tokens[tokens.length - 1];
- // An indented code block cannot interrupt a paragraph.
- if (lastToken && lastToken.type === 'paragraph') {
- lastToken.raw += '\n' + token.raw;
- lastToken.text += '\n' + token.text;
- } else {
- tokens.push(token);
- }
- continue;
- }
- // fences
- if (token = this.tokenizer.fences(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // heading
- if (token = this.tokenizer.heading(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // table no leading pipe (gfm)
- if (token = this.tokenizer.nptable(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // hr
- if (token = this.tokenizer.hr(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // blockquote
- if (token = this.tokenizer.blockquote(src)) {
- src = src.substring(token.raw.length);
- token.tokens = this.blockTokens(token.text, [], top);
- tokens.push(token);
- continue;
- }
- // list
- if (token = this.tokenizer.list(src)) {
- src = src.substring(token.raw.length);
- l = token.items.length;
- for (i = 0; i < l; i++) {
- token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
- }
- tokens.push(token);
- continue;
- }
- // html
- if (token = this.tokenizer.html(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // def
- if (top && (token = this.tokenizer.def(src))) {
- src = src.substring(token.raw.length);
- if (!this.tokens.links[token.tag]) {
- this.tokens.links[token.tag] = {
- href: token.href,
- title: token.title
- };
- }
- continue;
- }
- // table (gfm)
- if (token = this.tokenizer.table(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // lheading
- if (token = this.tokenizer.lheading(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // top-level paragraph
- if (top && (token = this.tokenizer.paragraph(src))) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // text
- if (token = this.tokenizer.text(src)) {
- src = src.substring(token.raw.length);
- lastToken = tokens[tokens.length - 1];
- if (lastToken && lastToken.type === 'text') {
- lastToken.raw += '\n' + token.raw;
- lastToken.text += '\n' + token.text;
- } else {
- tokens.push(token);
- }
- continue;
- }
- if (src) {
- const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
- if (this.options.silent) {
- console.error(errMsg);
- break;
- } else {
- throw new Error(errMsg);
- }
- }
- }
- return tokens;
- }
- inline(tokens) {
- let i,
- j,
- k,
- l2,
- row,
- token;
- const l = tokens.length;
- for (i = 0; i < l; i++) {
- token = tokens[i];
- switch (token.type) {
- case 'paragraph':
- case 'text':
- case 'heading': {
- token.tokens = [];
- this.inlineTokens(token.text, token.tokens);
- break;
- }
- case 'table': {
- token.tokens = {
- header: [],
- cells: []
- };
- // header
- l2 = token.header.length;
- for (j = 0; j < l2; j++) {
- token.tokens.header[j] = [];
- this.inlineTokens(token.header[j], token.tokens.header[j]);
- }
- // cells
- l2 = token.cells.length;
- for (j = 0; j < l2; j++) {
- row = token.cells[j];
- token.tokens.cells[j] = [];
- for (k = 0; k < row.length; k++) {
- token.tokens.cells[j][k] = [];
- this.inlineTokens(row[k], token.tokens.cells[j][k]);
- }
- }
- break;
- }
- case 'blockquote': {
- this.inline(token.tokens);
- break;
- }
- case 'list': {
- l2 = token.items.length;
- for (j = 0; j < l2; j++) {
- this.inline(token.items[j].tokens);
- }
- break;
- }
- default: {
- // do nothing
- }
- }
- }
- return tokens;
- }
- /**
- * Lexing/Compiling
- */
- inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
- let token, lastToken;
- // String with links masked to avoid interference with em and strong
- let maskedSrc = src;
- let match;
- let keepPrevChar, prevChar;
- // Mask out reflinks
- if (this.tokens.links) {
- const links = Object.keys(this.tokens.links);
- if (links.length > 0) {
- while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
- if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
- maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
- }
- }
- }
- }
- // Mask out other blocks
- while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
- maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
- }
- // Mask out escaped em & strong delimiters
- while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
- maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
- }
- while (src) {
- if (!keepPrevChar) {
- prevChar = '';
- }
- keepPrevChar = false;
- // escape
- if (token = this.tokenizer.escape(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // tag
- if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
- src = src.substring(token.raw.length);
- inLink = token.inLink;
- inRawBlock = token.inRawBlock;
- const lastToken = tokens[tokens.length - 1];
- if (lastToken && token.type === 'text' && lastToken.type === 'text') {
- lastToken.raw += token.raw;
- lastToken.text += token.text;
- } else {
- tokens.push(token);
- }
- continue;
- }
- // link
- if (token = this.tokenizer.link(src)) {
- src = src.substring(token.raw.length);
- if (token.type === 'link') {
- token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
- }
- tokens.push(token);
- continue;
- }
- // reflink, nolink
- if (token = this.tokenizer.reflink(src, this.tokens.links)) {
- src = src.substring(token.raw.length);
- const lastToken = tokens[tokens.length - 1];
- if (token.type === 'link') {
- token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
- tokens.push(token);
- } else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
- lastToken.raw += token.raw;
- lastToken.text += token.text;
- } else {
- tokens.push(token);
- }
- continue;
- }
- // em & strong
- if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
- src = src.substring(token.raw.length);
- token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
- tokens.push(token);
- continue;
- }
- // code
- if (token = this.tokenizer.codespan(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // br
- if (token = this.tokenizer.br(src)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // del (gfm)
- if (token = this.tokenizer.del(src)) {
- src = src.substring(token.raw.length);
- token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
- tokens.push(token);
- continue;
- }
- // autolink
- if (token = this.tokenizer.autolink(src, mangle)) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // url (gfm)
- if (!inLink && (token = this.tokenizer.url(src, mangle))) {
- src = src.substring(token.raw.length);
- tokens.push(token);
- continue;
- }
- // text
- if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
- src = src.substring(token.raw.length);
- if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
- prevChar = token.raw.slice(-1);
- }
- keepPrevChar = true;
- lastToken = tokens[tokens.length - 1];
- if (lastToken && lastToken.type === 'text') {
- lastToken.raw += token.raw;
- lastToken.text += token.text;
- } else {
- tokens.push(token);
- }
- continue;
- }
- if (src) {
- const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
- if (this.options.silent) {
- console.error(errMsg);
- break;
- } else {
- throw new Error(errMsg);
- }
- }
- }
- return tokens;
- }
- };
|