From 951984fb55d552d9c816a30069e2321f3602d305 Mon Sep 17 00:00:00 2001 From: LMBishop <13875753+LMBishop@users.noreply.github.com> Date: Wed, 22 Dec 2021 20:35:16 +0000 Subject: Add circular dependency detection and logging library --- app/wikiparser.ts | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'app/wikiparser.ts') diff --git a/app/wikiparser.ts b/app/wikiparser.ts index 544b6e5..f52d646 100644 --- a/app/wikiparser.ts +++ b/app/wikiparser.ts @@ -38,6 +38,49 @@ const re = (regex, flag = 'mgi') => { const r = String.raw; const arg = r`\s*([^|}]+?)\s*`; +export function findDependencies(data: string): Set { + const pages = new Set(); + + let outText = data; + for (let l = 0, last = ''; l < parseInt(process.env.PARSER_MAX_RECURSION, 10); l++) { + if (last === outText) break; last = outText; + + outText = outText + // Remove non-template magic words + .replace(re(r`<(/?) \s* (?= script|link|meta|iframe|frameset|object|embed|applet|form|input|button|textarea )`), '<$1') + .replace(re(r`(?<= <[^>]+ ) (\bon(\w+))`), 'data-$2') + .replace(//g, '') + .replace(re(r`{{ \s* displayTitle: ([^}]+) }}`), '') + .replace(re(r`{{ \s* navbarSortOrder: ([^}]+) }}`), '') + .replace(re(r`{{ \s* ! \s* }}`), '|') + .replace(re(r`{{ \s* = \s* }}`), '=') + .replace(re(r`{{ \s* [Rr]eflist \s* }}`), '') + .replace(re(r`{{ \s* #? urlencode: ${arg} }}`), '') + .replace(re(r`{{ \s* #? urldecode: ${arg} }}`), '') + .replace(re(r`{{ \s* #? lc: ${arg} }}`), '') + .replace(re(r`{{ \s* #? uc: ${arg} }}`), '') + .replace(re(r`{{ \s* #? lcfirst: ${arg} }}`), '') + .replace(re(r`{{ \s* #? ucfirst: ${arg} }}`), '') + .replace(re(r`{{ \s* #? len: ${arg} }}`), '') + .replace(re(r`{{ \s* #? pos: ${arg} \|${arg} (?: \s*\|${arg} )? }}`), '') + .replace(re(r`{{ \s* #? sub: ${arg} \|${arg} (?:\|${arg})? }}`), '') + .replace(re(r`{{ \s* #? padleft: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* #? padright: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* #? replace: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* #? explode: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* (#\w+) \s* : \s* ( [^{}]+ ) \s* }} ( ?!} )`), '') + + // Templates: {{template}} + .replace(re(r`{{ \s* ([^#}|]+?) (\|[^}]+)? }} (?!})`), (_, title, params = '') => { + if (/{{/.test(params)) return _; + const page = title.includes(':') ? title : `Template:${title}` + pages.add(page); + return ''; + }) + } + return pages; +} + export function parse(directory, data): Result { const vars = {}; const metadata: any = {}; @@ -131,8 +174,8 @@ export function parse(directory, data): Result { // Retrieve template content let content = directory.get(page); - if (!content) { - return `Template:${title}`; + if (!content?.html) { + return `Template:${title}`; } // Remove non-template sections -- cgit v1.2.3-70-g09d2