/* * This file is a modified version of Nixinova/Wikity, whose license is given below: * Original: https://www.npmjs.com/package/wikity * * > ISC License * > * > Copyright © 2021 Nixinova * > * > Permission to use, copy, modify, and/or distribute this software for any purpose with or * > without fee is hereby granted, provided that the above copyright notice and this * > permission notice appear in all copies. * > * > The software is provided "as is" and the author disclaims all warranties with regard to * > this software including all implied warranties of merchantability and fitness. In no * > event shall the author be liable for any special, direct, indirect, or consequential * > damages or any damages whatsoever resulting from loss of use, data or profits, whether * > in an action of contract, negligence or other tortious action, arising out of or in * > connection with the use or performance of this software. * * Additonally, this project and my modifications are also licensed under the ISC license. */ import dateFormat from 'dateformat'; import htmlEscape from 'escape-html'; export class Result { public html: string; public metadata: any; constructor(html: string, metadata: any) { this.html = html; this.metadata = metadata; } } const re = (regex, flag = 'mgi') => { return RegExp(regex.replace(/ /g, '').replace(/\|\|.+?\|\|/g, ''), flag); }; const r = String.raw; const arg = r`\s*([^|}]+?)\s*`; export function findDependencies(data: string): Set { const pages = new Set(); let outText = data; for (let l = 0, last = ''; l < parseInt(process.env.PARSER_MAX_RECURSION, 10); l++) { if (last === outText) break; last = outText; outText = outText // Remove non-template magic words .replace(re(r`<(/?) \s* (?= script|link|meta|iframe|frameset|object|embed|applet|form|input|button|textarea )`), '<$1') .replace(re(r`(?<= <[^>]+ ) (\bon(\w+))`), 'data-$2') .replace(//g, '') .replace(re(r`{{ \s* displayTitle: ([^}]+) }}`), '') .replace(re(r`{{ \s* navbarSortOrder: ([^}]+) }}`), '') .replace(re(r`{{ \s* ! \s* }}`), '|') .replace(re(r`{{ \s* = \s* }}`), '=') .replace(re(r`{{ \s* [Rr]eflist \s* }}`), '') .replace(re(r`{{ \s* #? urlencode: ${arg} }}`), '') .replace(re(r`{{ \s* #? urldecode: ${arg} }}`), '') .replace(re(r`{{ \s* #? lc: ${arg} }}`), '') .replace(re(r`{{ \s* #? uc: ${arg} }}`), '') .replace(re(r`{{ \s* #? lcfirst: ${arg} }}`), '') .replace(re(r`{{ \s* #? ucfirst: ${arg} }}`), '') .replace(re(r`{{ \s* #? len: ${arg} }}`), '') .replace(re(r`{{ \s* #? pos: ${arg} \|${arg} (?: \s*\|${arg} )? }}`), '') .replace(re(r`{{ \s* #? sub: ${arg} \|${arg} (?:\|${arg})? }}`), '') .replace(re(r`{{ \s* #? padleft: ${arg} \|${arg} \|${arg} }}`), '') .replace(re(r`{{ \s* #? padright: ${arg} \|${arg} \|${arg} }}`), '') .replace(re(r`{{ \s* #? replace: ${arg} \|${arg} \|${arg} }}`), '') .replace(re(r`{{ \s* #? explode: ${arg} \|${arg} \|${arg} }}`), '') .replace(re(r`{{ \s* (#\w+) \s* : \s* ( [^{}]+ ) \s* }} ( ?!} )`), '') // Templates: {{template}} .replace(re(r`{{ \s* ([^#}|]+?) (\|[^}]+)? }} (?!})`), (_, title, params = '') => { if (/{{/.test(params)) return _; const page = title.includes(':') ? title : `Template:${title}` pages.add(page); return ''; }) } return pages; } export function parse(directory, data): Result { const vars = {}; const metadata: any = {}; const nowikis = []; const refs = []; let nowikiCount = 0; let rawExtLinkCount = 0; let refCount = 0; let outText = data; for (let l = 0, last = ''; l < parseInt(process.env.PARSER_MAX_RECURSION, 10); l++) { if (last === outText) break; last = outText; outText = outText // Nowiki: .replace(re(r` ([^]+?) `), (_, m) => `%NOWIKI#${nowikis.push(m), nowikiCount++}%`) // Sanitise unacceptable HTML .replace(re(r`<(/?) \s* (?= script|link|meta|iframe|frameset|object|embed|applet|form|input|button|textarea )`), '<$1') .replace(re(r`(?<= <[^>]+ ) (\bon(\w+))`), 'data-$2') // Comments: .replace(//g, '') // Lines: ---- .replace(/^-{4,}/gm, '
') // Metadata: displayTitle, __NOTOC__, etc .replace(re(r`{{ \s* displayTitle: ([^}]+) }}`), (_, title) => (metadata.displayTitle = title, '')) .replace(re(r`{{ \s* navbarSortOrder: ([^}]+) }}`), (_, order) => (metadata.sortOrder = parseInt(order, 10), '')) .replace(re(r`__NOINDEX__`), () => (metadata.noindex = true, '')) .replace(re(r`__NOTOC__`), () => (metadata.notoc = true, '')) .replace(re(r`__FORCETOC__`), () => (metadata.toc = true, '')) .replace(re(r`__TOC__`), () => (metadata.toc = true, '')) .replace(re(r`__PRIMARY__`), () => (metadata.primary = true, '')) .replace(re(r`__NOTITLE__`), () => (metadata.notitle = true, '')) // Magic words: {{!}}, {{reflist}}, etc .replace(re(r`{{ \s* ! \s* }}`), '|') .replace(re(r`{{ \s* = \s* }}`), '=') .replace(re(r`{{ \s* [Rr]eflist \s* }}`), '') // String functions: {{lc:}}, {{ucfirst:}}, {{len:}}, etc .replace(re(r`{{ \s* #? urlencode: ${arg} }}`), (_, m) => encodeURI(m)) .replace(re(r`{{ \s* #? urldecode: ${arg} }}`), (_, m) => decodeURI(m)) .replace(re(r`{{ \s* #? lc: ${arg} }}`), (_, m) => m.toLowerCase()) .replace(re(r`{{ \s* #? uc: ${arg} }}`), (_, m) => m.toUpperCase()) .replace(re(r`{{ \s* #? lcfirst: ${arg} }}`), (_, m) => m[0].toLowerCase() + m.substr(1)) .replace(re(r`{{ \s* #? ucfirst: ${arg} }}`), (_, m) => m[0].toUpperCase() + m.substr(1)) .replace(re(r`{{ \s* #? len: ${arg} }}`), (_, m) => m.length) .replace(re(r`{{ \s* #? pos: ${arg} \|${arg} (?: \s*\|${arg} )? }}`), (_, find, str, n = 0) => find.substr(n).indexOf(str)) .replace(re(r`{{ \s* #? sub: ${arg} \|${arg} (?:\|${arg})? }}`), (_, str, from, len) => str.substr(+from - 1, +len)) .replace(re(r`{{ \s* #? padleft: ${arg} \|${arg} \|${arg} }}`), (_, str, n, char) => str.padStart(+n, char)) .replace(re(r`{{ \s* #? padright: ${arg} \|${arg} \|${arg} }}`), (_, str, n, char) => str.padEnd(+n, char)) .replace(re(r`{{ \s* #? replace: ${arg} \|${arg} \|${arg} }}`), (_, str, find, rep) => str.split(find).join(rep)) .replace(re(r`{{ \s* #? explode: ${arg} \|${arg} \|${arg} }}`), (_, str, delim, pos) => str.split(delim)[+pos]) // Parser functions: {{#if:}}, {{#switch:}}, etc .replace(re(r`{{ \s* (#\w+) \s* : \s* ( [^{}]+ ) \s* }} ( ?!} )`), (_, name, content) => { if (/{{\s*#/.test(content)) return _; const args = content.trim().split(/\s*\|\s*/); switch (name) { case '#if': return (args[0] ? args[1] : args[2]) || ''; case '#ifeq': return (args[0] === args[1] ? args[2] : args[3]) || ''; case '#vardefine': vars[args[0]] = args[1] || ''; return ''; case '#var': if (re(r`{{ \s* #vardefine \s* : \s* ${args[0]}`).test(outText)) return _; // wait until var is set return vars[args[0]] || args[1] || ''; case '#switch': return args.slice(1) .map(arg => arg.split(/\s*=\s*/)) .filter(duo => args[0] === duo[0].replace('#default', args[0]))[0][1]; case '#time': case '#date': case '#datetime': return dateFormat(args[1] ? new Date(args[1]) : new Date(), args[0]); } }) // Templates: {{template}} .replace(re(r`{{ \s* ([^#}|]+?) (\|[^}]+)? }} (?!})`), (_, title, params = '') => { if (/{{/.test(params)) return _; const page = title.includes(':') ? title : `Template:${title}` // Retrieve template content let content = directory.get(page); if (!content?.html) { return `Template:${title}`; } // Remove non-template sections content = content.raw .replace(/.*?<\/noinclude>/gs, '') .replace(/.*<(includeonly|onlyinclude)>|<\/(includeonly|onlyinclude)>.*/gs, ''); // Substitite arguments const argMatch = (arg) => re(r`{{{ \s* ${arg} (?:\|([^}]*))? \s* }}}`); const args = params.split('|').slice(1); for (const i in args) { const parts = args[i].split('='); const [arg, val] = parts[1] ? [parts[0], ...parts.slice(1)] : [(+i + 1) + '', parts[0]]; content = content.replace(argMatch(arg), (_, m) => val || m || ''); } for (let i = 1; i <= 10; i++) { content = content.replace(argMatch(arg), '$2'); } return content; }) // Images: [[File:Image.png|options|caption]] .replace(re(r`\[\[ (?:File|Image): (.+?) (\|.+?)? \]\]`), (_, file, params) => { if (/{{/.test(params)) return _; const path = file.trim().replace(/ /g, '_'); let caption = ''; const imageData: any = {}; const imageArgs = params?.split('|').map((arg) => arg.replace(/"/g, '"')); if (imageArgs) { for (const param of imageArgs) { if (['left', 'right', 'center', 'none'].includes(param)) { imageData.float = param; } if (['baseline', 'sub', 'super', 'top', 'text-bottom', 'middle', 'bottom', 'text-bottom'].includes(param)) { imageData.align = param; } else if (['border', 'frameless', 'frame', 'framed', 'thumb', 'thumbnail'].includes(param)) { imageData.type = { framed: 'frame', thumbnail: 'thumb' }[param] || param; if (imageData.type === 'thumb') imageData.hasCaption = true; } else if (param.endsWith('px')) { param.replace(/(?:(\w+)?(x))?(\w+)px/, (_, size1, auto, size2) => { if (size1) Object.assign(imageData, { width: size1, height: size2 }); else if (auto) Object.assign(imageData, { width: 'auto', height: size2 }); else Object.assign(imageData, { width: size2, height: 'auto' }); return ''; }); } else if (param.startsWith('upright=')) { imageData.width = +param.replace('upright=', '') * 300; } else if (param.startsWith('link=')) { imageData.link = param.replace('link=', ''); } else if (param.startsWith('alt=')) { imageData.alt = param.replace('alt=', ''); } else if (param.startsWith('style=')) { imageData.style = param.replace('style=', ''); } else if (param.startsWith('class=')) { imageData.class = param.replace('class=', ''); } else { caption = param; } } } let content = `
${imageData.alt || file} ${imageData.hasCaption ? `
${caption}
` : ''}
`; if (imageData.link) content = `${content}`; return content; }) // Markup: '''bold''' and '''italic''' .replace(re(r`''' ([^']+?) '''`), '$1') .replace(re(r`'' ([^']+?) ''`), '$1') // Headings: ==heading== .replace(re(r`^ (=+) \s* (.+?) \s* \1 \s* $`), (_, lvl, txt) => `${txt}`) // Internal links: [[Page]] and [[Page|Text]] .replace(re(r`\[\[ ([^\]|]+?) \]\]`), '$1') .replace(re(r`\[\[ ([^\]|]+?) \| ([^\]]+?) \]\]`), '$2') .replace(re(r`()([a-z]+)`), '$2$1') // External links: [href Page] and just [href] .replace(re(r`\[ ((?:\w+:)?\/\/ [^\s\]]+) (\s [^\]]+?)? \]`), (_, href, txt) => `${txt || '[' + (++rawExtLinkCount) + ']'}`) // Bulleted list: *item .replace(re(r`^ (\*+) (.+?) $`), (_, lvl, txt) => `${'
    '.repeat(lvl.length)}
  • ${txt}
  • ${'
'.repeat(lvl.length)}`) .replace(re(r` (\s*?)
    `), '$1') // Numbered list: #item .replace(re(r`^ (#+) (.+?) $`), (_, lvl, txt) => `${'
      '.repeat(lvl.length)}
    1. ${txt}
    2. ${'
    '.repeat(lvl.length)}`) .replace(re(r` (\s*?)
      `), '$1') // Definition list: ;head, :item .replace(re(r`^ ; (.+) $`), '
      $1
      ') .replace(re(r`^ (:+) (.+?) $`), (_, lvl, txt) => `${'
      '.repeat(lvl.length)}
      ${txt}
      ${'
      '.repeat(lvl.length)}`) .replace(re(r` (\s*?)
      `), '$1') // Tables: {|, |+, !, |-, |, |} .replace(re(r`^ \{\| (.*?) $`), (_, attrs) => ``) .replace(re(r`^ ! ([^]+?) (?= \n^[!|] )`), (_, content) => ``) .replace(re(r`^ \|\+ (.*?) $`), (_, content) => ``) .replace(re(r`^ \|[^-+}] ([^]*?) (?= \n^[!|] )`), (_, content) => ``) .replace(re(r`^ \|- (.*?) $`), (_, attrs) => ``) .replace(re(r`^ \|\}`), '
      ${content}
      ${content}
      ${content}
      ') // References: , .replace(re(r` (.+?) `), (_, text) => { refs.push(text); refCount++; return `[${refCount}]`; }) .replace(re(r``), '
        ' + refs.map((ref, i) => `
      1. ${ref}
      2. `).join('\n') + '
      ' ) // Nonstandard: ``code`` and ```code blocks``` .replace(re(r` \`\`\` ([^\`]+?) \`\`\` `), '
      $1
      ') .replace(re(r`
       ([^\`]+?) 
      `), '
      $1
      ') // Spacing .replace(/(\r?\n){2}/g, '\n

      \n') // Restore nowiki contents .replace(/%NOWIKI#(\d+)%/g, (_, n) => htmlEscape(nowikis[n])); } metadata.buildTime = new Date(); const result = new Result(outText, metadata); return result; }