From 951984fb55d552d9c816a30069e2321f3602d305 Mon Sep 17 00:00:00 2001 From: LMBishop <13875753+LMBishop@users.noreply.github.com> Date: Wed, 22 Dec 2021 20:35:16 +0000 Subject: Add circular dependency detection and logging library --- app/directory.ts | 140 +++++++++++++++++++++++++++++++++++-------- app/index.ts | 7 +-- app/logger.ts | 23 +++++++ app/routes/page/router.ts | 19 +++++- app/routes/special/router.ts | 3 + app/wikiparser.ts | 47 ++++++++++++++- 6 files changed, 205 insertions(+), 34 deletions(-) create mode 100644 app/logger.ts (limited to 'app') diff --git a/app/directory.ts b/app/directory.ts index fd9c008..ee6a5a9 100644 --- a/app/directory.ts +++ b/app/directory.ts @@ -1,9 +1,9 @@ -import { parse } from './wikiparser.js'; +import * as parser from './wikiparser.js'; import { readFileSync } from 'fs'; import glob from 'glob'; +import { logger } from './logger.js' export class PageDirectory { - pages: Record; primaryPages: Page[]; pagePath: string; @@ -33,28 +33,74 @@ export class PageDirectory { const pages = glob.sync(`**/*.wiki`, { cwd: this.pagePath }) pages.forEach(page => { - page = page.replace('.wiki', '').replace('/', ':').replace(/[^a-z0-9:]/gi, '_').toLowerCase(); + page = this.convertNameToStandard(page.replace('.wiki', '').replace('/', ':')); this.pages[page] = { standardName: page, + raw: this.loadRaw(page), buildTime: 0, - metadata: {} + metadata: { + dependencies: [], + dependents: [], + errors: [] + } } }); - // Build templates first - Object.keys(this.pages).forEach(name => { - if (name.includes('Template:')) { - this.pages[name] = this.buildPage(name); + const dependencyGraph: Record = {}; + + Object.keys(this.pages).forEach(name => dependencyGraph[name] = Array.from(parser.findDependencies(this.pages[name].raw)).map(e => this.convertNameToStandard(e))); + + function traverse(dependents: string[], dependencies: string[], recursionCount: number) { + if (recursionCount > parseInt(process.env.PARSER_MAX_RECURSION, 10)) { + throw new RecursionError('max recursion reached'); } + dependencies?.forEach((dependency: string) => { + if (dependencyGraph[dependency]?.length != 0) { + dependents.forEach((dependent: string) => { + if (dependencyGraph[dependency]?.includes(dependent)) { + throw new DependencyError(`circular dependency between ${dependent} and ${dependency}`, [dependent, dependency]); + } + }); + traverse([...dependents, dependency], dependencyGraph[dependency], recursionCount + 1); + } + }); + } + + Object.keys(dependencyGraph).forEach(name => { + dependencyGraph[name].forEach(dependency => { + try { + traverse([name, dependency], dependencyGraph[dependency], 1); + } catch (e) { + if (e instanceof RecursionError) { + this.pages[name].metadata.errors.push({ + identifier: 'max-recursion-reached', + message: `maximum dependency depth of ${process.env.PARSER_MAX_RECURSION} reached` + }) + logger.warn(`max recursion for ${name} reached`) + } else if (e instanceof DependencyError) { + if (e.pages.includes(name)) { + this.pages[name].metadata.errors.push({ + identifier: 'circular-dependency', + message: e.message + }) + logger.warn(`${e.pages[0]} has a circular dependency with ${e.pages[1]}`) + } else { + logger.warn(`transclusions on page ${name} may not resolve due to dependency errors in its dependency tree`) + } + } else { + throw e; + } + } + }); }); const primaryPages = []; Object.keys(this.pages).forEach(name => { - if (!name.includes('Template:')) { + if (this.pages[name].metadata.errors.length == 0) { this.pages[name] = this.buildPage(name); - } - if (this.pages[name].metadata.includeInNavbar) { - primaryPages.push(this.pages[name]); + if (this.pages[name].metadata.includeInNavbar) { + primaryPages.push(this.pages[name]); + } } }); @@ -88,11 +134,7 @@ export class PageDirectory { if (!page) { return undefined; } - - if (!page.html) { - return this.buildPage(name) - } - + return page; } @@ -147,6 +189,17 @@ export class PageDirectory { getPrimaryPages(): Page[] { return this.primaryPages; } + + private loadRaw(name: string): string { + name = this.convertNameToStandard(name); + let data: string; + try { + data = readFileSync(`${this.pagePath}/${this.convertStandardToFilePath(name)}`, 'utf-8'); + } catch { + return undefined; + } + return data; + } /** * Build a page. @@ -157,12 +210,13 @@ export class PageDirectory { private buildPage(name: string): Page { name = this.convertNameToStandard(name); let data: string; - try { - data = readFileSync(`${this.pagePath}/${this.convertStandardToFilePath(name)}`, 'utf-8'); - } catch { - return undefined; + if (this.pages[name]?.raw) { + data = this.pages[name]?.raw + } else { + data = this.loadRaw(name) } - const result = parse(this, data); + + const result = parser.parse(this, data); const title = result.metadata.displayTitle ?? name const content = `${result.metadata.notitle ? '' : `

${title}

`}${result.html}`; @@ -175,7 +229,10 @@ export class PageDirectory { includeInNavbar: result.metadata.primary ?? false, sortOrder: result.metadata.sortOrder ?? -1, showTitle: !result.metadata.notitle ?? true, - displayTitle: title + displayTitle: title, + dependencies: [], + dependents: [], + errors: [] } }; this.pages[name] = page; @@ -189,7 +246,11 @@ export class PageDirectory { * @param name non-standard name for a page */ private convertNameToStandard(name: string): string { - return name.replace(/[^a-z0-9:]/gi, '_').toLowerCase(); + name = name.replace(/[^a-z0-9:]/gi, '_').toLowerCase(); + if (!name.includes(':')) { + name = `main:${name}`; + } + return name; } /** @@ -198,7 +259,7 @@ export class PageDirectory { * @param name standard name for a page */ private convertStandardToFilePath(name: string): string { - const [first, second] = name.split(':'); + const [first, second] = name.replace('main:', '').split(':'); const [title, subpage] = ((second) ? second : first).split('.') const namespace = (second) ? first : undefined @@ -209,7 +270,7 @@ export class PageDirectory { export type Page = { html?: string; raw?: string; - standardName: string, + standardName: string; buildTime: number; metadata: PageMetadata; }; @@ -219,4 +280,31 @@ export type PageMetadata = { sortOrder?: number; showTitle?: boolean; includeInNavbar?: boolean; + dependencies: string[]; + dependents: string[]; + errors: PageError[]; }; + +export type PageError = { + identifier: string; + message: string; +} + +export class DependencyError extends Error { + pages: string[] + + constructor(message: string, pages: string[]) { + super(message); + this.pages = pages; + + Object.setPrototypeOf(this, DependencyError.prototype); + } +} + +export class RecursionError extends Error { + constructor(message: string) { + super(message); + + Object.setPrototypeOf(this, RecursionError.prototype); + } +} diff --git a/app/index.ts b/app/index.ts index 9865190..8514380 100644 --- a/app/index.ts +++ b/app/index.ts @@ -4,14 +4,13 @@ import dotenv from 'dotenv-defaults'; import * as page from './routes/page/router.js'; import * as special from './routes/special/router.js'; import { navbar } from './middlewares/index.js' +import { logger } from './logger.js' dotenv.config() const app = express(); const directory = new PageDirectory(process.env.PAGES_DIR); -directory.rebuild(); - app.set('view engine', 'ejs'); app.set('views', 'views'); @@ -33,11 +32,11 @@ app.use(navbar, (req, res) => { }); const server = app.listen(process.env.PORT, () => { - console.log(`App listening on port ${process.env.PORT}`); + logger.info(`App listening on port ${process.env.PORT}`); }); const exit = () => { - console.info('Stopping server...'); + logger.info('Stopping server...'); server.close(() => { process.exit(0); }) diff --git a/app/logger.ts b/app/logger.ts new file mode 100644 index 0000000..4ce0150 --- /dev/null +++ b/app/logger.ts @@ -0,0 +1,23 @@ +import winston from 'winston'; + +const enumerateErrorFormat = winston.format((info) => { + if (info instanceof Error) { + Object.assign(info, { message: info.stack }); + } + return info; + }); + +export const logger = winston.createLogger({ + level: process.env.LOGGING_LEVEL === 'development' ? 'debug' : 'info', + format: winston.format.combine( + enumerateErrorFormat(), + winston.format.colorize(), + winston.format.splat(), + winston.format.printf(({ level, message }) => `${level}: ${message}`) + ), + transports: [ + new winston.transports.Console({ + stderrLevels: ['error'], + }), + ], +}); diff --git a/app/routes/page/router.ts b/app/routes/page/router.ts index af5e844..dc819f8 100644 --- a/app/routes/page/router.ts +++ b/app/routes/page/router.ts @@ -26,11 +26,26 @@ router.get('/:page?', navbar, (req, res, next) => { return; } + let html: string; + let title: string; + + if (page.metadata.errors.length != 0) { + html = '
This page could not be built due to the following errors:
    ' + page.metadata.errors.forEach(e => { + html += `
  • ${e.identifier}: ${e.message}
  • ` + }); + html += '
Go home?
' + title = 'Page error' + } else { + html = page.html; + title = page.metadata.displayTitle; + } + res.render('page.ejs', { navbar: res.locals.navbarHtml, path: res.locals.path, - content: page.html, - title: page.metadata.displayTitle, + content: html, + title: title, buildTime: new Date(page.buildTime) }); }); diff --git a/app/routes/special/router.ts b/app/routes/special/router.ts index e88919c..26130c5 100644 --- a/app/routes/special/router.ts +++ b/app/routes/special/router.ts @@ -1,5 +1,6 @@ import express from 'express'; import { navbar, page } from '../../middlewares/index.js'; +import { logger } from './../../logger.js' export const router = express.Router({ mergeParams: true }); @@ -30,6 +31,7 @@ router.get('/special/purge/:page/confirm', (req, res, next) => { return; } + logger.info(`Purge for page ${page.standardName} requested by ${req.headers['x-forwarded-for'] || req.socket.remoteAddress }`) if (res.locals.directory.purge(res.locals.path)) { res.status(200).send(); } else { @@ -44,6 +46,7 @@ router.get('/special/rebuild', navbar, (req, res) => { }); router.get('/special/rebuild/confirm', (req, res) => { + logger.info(`Directory rebuild requested by ${req.headers['x-forwarded-for'] || req.socket.remoteAddress }`) if (res.locals.directory.rebuild()) { res.status(200).send(); } else { diff --git a/app/wikiparser.ts b/app/wikiparser.ts index 544b6e5..f52d646 100644 --- a/app/wikiparser.ts +++ b/app/wikiparser.ts @@ -38,6 +38,49 @@ const re = (regex, flag = 'mgi') => { const r = String.raw; const arg = r`\s*([^|}]+?)\s*`; +export function findDependencies(data: string): Set { + const pages = new Set(); + + let outText = data; + for (let l = 0, last = ''; l < parseInt(process.env.PARSER_MAX_RECURSION, 10); l++) { + if (last === outText) break; last = outText; + + outText = outText + // Remove non-template magic words + .replace(re(r`<(/?) \s* (?= script|link|meta|iframe|frameset|object|embed|applet|form|input|button|textarea )`), '<$1') + .replace(re(r`(?<= <[^>]+ ) (\bon(\w+))`), 'data-$2') + .replace(//g, '') + .replace(re(r`{{ \s* displayTitle: ([^}]+) }}`), '') + .replace(re(r`{{ \s* navbarSortOrder: ([^}]+) }}`), '') + .replace(re(r`{{ \s* ! \s* }}`), '|') + .replace(re(r`{{ \s* = \s* }}`), '=') + .replace(re(r`{{ \s* [Rr]eflist \s* }}`), '') + .replace(re(r`{{ \s* #? urlencode: ${arg} }}`), '') + .replace(re(r`{{ \s* #? urldecode: ${arg} }}`), '') + .replace(re(r`{{ \s* #? lc: ${arg} }}`), '') + .replace(re(r`{{ \s* #? uc: ${arg} }}`), '') + .replace(re(r`{{ \s* #? lcfirst: ${arg} }}`), '') + .replace(re(r`{{ \s* #? ucfirst: ${arg} }}`), '') + .replace(re(r`{{ \s* #? len: ${arg} }}`), '') + .replace(re(r`{{ \s* #? pos: ${arg} \|${arg} (?: \s*\|${arg} )? }}`), '') + .replace(re(r`{{ \s* #? sub: ${arg} \|${arg} (?:\|${arg})? }}`), '') + .replace(re(r`{{ \s* #? padleft: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* #? padright: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* #? replace: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* #? explode: ${arg} \|${arg} \|${arg} }}`), '') + .replace(re(r`{{ \s* (#\w+) \s* : \s* ( [^{}]+ ) \s* }} ( ?!} )`), '') + + // Templates: {{template}} + .replace(re(r`{{ \s* ([^#}|]+?) (\|[^}]+)? }} (?!})`), (_, title, params = '') => { + if (/{{/.test(params)) return _; + const page = title.includes(':') ? title : `Template:${title}` + pages.add(page); + return ''; + }) + } + return pages; +} + export function parse(directory, data): Result { const vars = {}; const metadata: any = {}; @@ -131,8 +174,8 @@ export function parse(directory, data): Result { // Retrieve template content let content = directory.get(page); - if (!content) { - return `Template:${title}`; + if (!content?.html) { + return `Template:${title}`; } // Remove non-template sections -- cgit v1.2.3-70-g09d2