From f4a3e9bf53c657c3e6b9330eb6ad644094f75e61 Mon Sep 17 00:00:00 2001 From: LMBishop <13875753+LMBishop@users.noreply.github.com> Date: Sat, 20 Nov 2021 17:46:20 +0000 Subject: Initial commit --- app/wikiparser.mjs | 254 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 app/wikiparser.mjs (limited to 'app/wikiparser.mjs') diff --git a/app/wikiparser.mjs b/app/wikiparser.mjs new file mode 100644 index 0000000..c48dca3 --- /dev/null +++ b/app/wikiparser.mjs @@ -0,0 +1,254 @@ +'use strict'; + +import { PARSER_MAX_RECURSION, TEMPLATE_DIR, IMAGES_DIR } from './constants.mjs'; +import * as fs from 'fs'; + +const re = (regex, flag = 'mgi') => { + return RegExp(regex.replace(/ /g, '').replace(/\|\|.+?\|\|/g, ''), flag); +}; +const r = String.raw; +const arg = r`\s*([^|}]+?)\s*`; + +export function parse(data) { + const vars = {}; + const metadata = {}; + let nowikis = []; + let nowikiCount = 0; + let rawExtLinkCount = 0; + let refCount = 0; + let refs = []; + + let outText = data; + + for (let l = 0, last = ''; l < PARSER_MAX_RECURSION; l++) { + if (last === outText) break; last = outText; + + outText = outText + + // Nowiki: + .replace(re(r` ([^]+?) `), (_, m) => `%NOWIKI#${nowikis.push(m), nowikiCount++}%`) + + // Sanitise unacceptable HTML + .replace(re(r`<(/?) \s* (?= script|link|meta|iframe|frameset|object|embed|applet|form|input|button|textarea )`), '<$1') + .replace(re(r`(?<= <[^>]+ ) (\bon(\w+))`), 'data-$2') + + // Comments: + .replace(//g, '') + + // Lines: ---- + .replace(/^-{4,}/gm, '
') + + // Metadata: displayTitle, __NOTOC__, etc + .replace(re(r`{{ \s* displayTitle: ([^}]+) }}`), (_, title) => (metadata.displayTitle = title, '')) + .replace(re(r`__NOINDEX__`), () => (metadata.noindex = true, '')) + .replace(re(r`__NOTOC__`), () => (metadata.notoc = true, '')) + .replace(re(r`__FORCETOC__`), () => (metadata.toc = true, '')) + .replace(re(r`__TOC__`), () => (metadata.toc = true, '')) + .replace(re(r`__HIDDEN__`), () => (metadata.hidden = true, '')) + + // Magic words: {{!}}, {{reflist}}, etc + .replace(re(r`{{ \s* ! \s* }}`), '|') + .replace(re(r`{{ \s* = \s* }}`), '=') + .replace(re(r`{{ \s* [Rr]eflist \s* }}`), '') + + // String functions: {{lc:}}, {{ucfirst:}}, {{len:}}, etc + .replace(re(r`{{ \s* #? urlencode: ${arg} }}`), (_, m) => encodeURI(m)) + .replace(re(r`{{ \s* #? urldecode: ${arg} }}`), (_, m) => decodeURI(m)) + .replace(re(r`{{ \s* #? lc: ${arg} }}`), (_, m) => m.toLowerCase()) + .replace(re(r`{{ \s* #? uc: ${arg} }}`), (_, m) => m.toUpperCase()) + .replace(re(r`{{ \s* #? lcfirst: ${arg} }}`), (_, m) => m[0].toLowerCase() + m.substr(1)) + .replace(re(r`{{ \s* #? ucfirst: ${arg} }}`), (_, m) => m[0].toUpperCase() + m.substr(1)) + .replace(re(r`{{ \s* #? len: ${arg} }}`), (_, m) => m.length) + .replace(re(r`{{ \s* #? pos: ${arg} \|${arg} (?: \s*\|${arg} )? }}`), (_, find, str, n = 0) => find.substr(n).indexOf(str)) + .replace(re(r`{{ \s* #? sub: ${arg} \|${arg} (?:\|${arg})? }}`), (_, str, from, len) => str.substr(+from - 1, +len)) + .replace(re(r`{{ \s* #? padleft: ${arg} \|${arg} \|${arg} }}`), (_, str, n, char) => str.padStart(+n, char)) + .replace(re(r`{{ \s* #? padright: ${arg} \|${arg} \|${arg} }}`), (_, str, n, char) => str.padEnd(+n, char)) + .replace(re(r`{{ \s* #? replace: ${arg} \|${arg} \|${arg} }}`), (_, str, find, rep) => str.split(find).join(rep)) + .replace(re(r`{{ \s* #? explode: ${arg} \|${arg} \|${arg} }}`), (_, str, delim, pos) => str.split(delim)[+pos]) + + // Parser functions: {{#if:}}, {{#switch:}}, etc + .replace(re(r`{{ \s* (#\w+) \s* : \s* ( [^{}]+ ) \s* }} ( ?!} )`), (_, name, content) => { + if (/{{\s*#/.test(content)) return _; + const args = content.trim().split(/\s*\|\s*/); + switch (name) { + case '#if': + return (args[0] ? args[1] : args[2]) || ''; + case '#ifeq': + return (args[0] === args[1] ? args[2] : args[3]) || ''; + case '#vardefine': + vars[args[0]] = args[1] || ''; + return ''; + case '#var': + if (re(r`{{ \s* #vardefine \s* : \s* ${args[0]}`).test(outText)) return _; // wait until var is set + return vars[args[0]] || args[1] || ''; + case '#switch': + return args.slice(1) + .map(arg => arg.split(/\s*=\s*/)) + .filter(duo => args[0] === duo[0].replace('#default', args[0]))[0][1]; + case '#time': + case '#date': + case '#datetime': + return dateFormat(args[1] ? new Date(args[1]) : new Date(), args[0]); + } + }) + + // Templates: {{template}} + .replace(re(r`{{ \s* ([^#}|]+?) (\|[^}]+)? }} (?!})`), (_, title, params = '') => { + if (/{{/.test(params)) return _; + const page = TEMPLATE_DIR + '/' + title.trim().replace(/ /g, '_'); + + // Retrieve template content + let content = ''; + try { + content = fs.readFileSync(page + '.wiki', 'utf8' ); + } + catch { + return `${title}`; + } + + // Remove non-template sections + content = content + .replace(/.*?<\/noinclude>/gs, '') + .replace(/.*<(includeonly|onlyinclude)>|<\/(includeonly|onlyinclude)>.*/gs, ''); + + // Substitite arguments + const argMatch = (arg) => re(r`{{{ \s* ${arg} (?:\|([^}]*))? \s* }}}`); + let args = params.split('|').slice(1); + for (let i in args) { + let parts = args[i].split('='); + let [arg, val] = parts[1] ? [parts[0], ...parts.slice(1)] : [(+i + 1) + '', parts[0]]; + content = content.replace(argMatch(arg), (_, m) => val || m || ''); + } + for (let i = 1; i <= 10; i++) { + content = content.replace(argMatch(arg), '$2'); + } + + return content; + }) + + // Images: [[File:Image.png|options|caption]] + .replace(re(r`\[\[ (?:File|Image): (.+?) (\|.+?)? \]\]`), (_, file, params) => { + if (/{{/.test(params)) return _; + const path = IMAGES_DIR + '/' + file.trim().replace(/ /g, '_'); + let caption = ''; + let imageData = {}; + let imageArgs = params.split('|').map((arg) => arg.replace(/"/g, '"')); + for (const param of imageArgs) { + if (['left', 'right', 'center', 'none'].includes(param)) { + imageData.float = param; + } + if (['baseline', 'sub', 'super', 'top', 'text-bottom', 'middle', 'bottom', 'text-bottom'].includes(param)) { + imageData.align = param; + } + else if (['border', 'frameless', 'frame', 'framed', 'thumb', 'thumbnail'].includes(param)) { + imageData.type = { framed: 'frame', thumbnail: 'thumb' }[param] || param; + if (imageData.type === 'thumb') imageData.hasCaption = true; + } + else if (param.endsWith('px')) { + param.replace(/(?:(\w+)?(x))?(\w+)px/, (_, size1, auto, size2) => { + if (size1) Object.assign(imageData, { width: size1, height: size2 }); + else if (auto) Object.assign(imageData, { width: 'auto', height: size2 }); + else Object.assign(imageData, { width: size2, height: 'auto' }); + return ''; + }); + } + else if (param.startsWith('upright=')) { + imageData.width = +param.replace('upright=', '') * 300; + } + else if (param.startsWith('link=')) { + imageData.link = param.replace('link=', ''); + } + else if (param.startsWith('alt=')) { + imageData.alt = param.replace('alt=', ''); + } + else if (param.startsWith('style=')) { + imageData.style = param.replace('style=', ''); + } + else if (param.startsWith('class=')) { + imageData.class = param.replace('class=', ''); + } + else { + caption = param; + } + } + let content = ` +
+ ${imageData.alt || file} + ${imageData.hasCaption ? `
${caption}
` : ''} +
+ `; + if (imageData.link) content = `${content}`; + return content; + }) + + // Markup: '''bold''' and '''italic''' + .replace(re(r`''' ([^']+?) '''`), '$1') + .replace(re(r`'' ([^']+?) ''`), '$1') + + // Headings: ==heading== + .replace(re(r`^ (=+) \s* (.+?) \s* \1 \s* $`), (_, lvl, txt) => `${txt}`) + + // Internal links: [[Page]] and [[Page|Text]] + .replace(re(r`\[\[ ([^\]|]+?) \]\]`), '$1') + .replace(re(r`\[\[ ([^\]|]+?) \| ([^\]]+?) \]\]`), '$2') + .replace(re(r`()([a-z]+)`), '$2$1') + + // External links: [href Page] and just [href] + .replace(re(r`\[ ((?:\w+:)?\/\/ [^\s\]]+) (\s [^\]]+?)? \]`), (_, href, txt) => `${txt || '[' + (++rawExtLinkCount) + ']'}`) + + // Bulleted list: *item + .replace(re(r`^ (\*+) (.+?) $`), (_, lvl, txt) => `${''.repeat(lvl.length)}`) + .replace(re(r` (\s*?)