From 2742c1600759773cfd73badd1b4550ca71c533a5 Mon Sep 17 00:00:00 2001 From: JoostK Date: Mon, 20 May 2024 18:19:49 +0200 Subject: [PATCH 1/2] perf: avoid excessive linear scanning for large functions This commit restores the token indices for a node after they have been refined to span the node, instead of before the refinement. This avoids the potential for excessive searches for token indices, which is implemented using a linear scan. It was attempted to convert the linear scans into binary searches (which an accompanying comment on `findTokenRange` indicates shouldn't be needed) and that also avoids the problem, but using the refined positions as implemented in this commit is even more effective, to the point where using a binary search is less efficient in my testing. This change can achieve a substantial improvement, where an ~2.6MB file used to be parsed and cloned in ~120s, now reduced to ~1.5s, almost two orders of magnitude faster. The perf test that tests the `backbone.js` file shows that performance improves from ~60ms to ~53ms, only a slight improvement. --- lib/parser.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/parser.ts b/lib/parser.ts index 4e5f22f4..4498fdb9 100644 --- a/lib/parser.ts +++ b/lib/parser.ts @@ -190,9 +190,6 @@ TCp.copy = function (node) { const oldIndent = this.indent; let newIndent = oldIndent; - const oldStartTokenIndex = this.startTokenIndex; - const oldEndTokenIndex = this.endTokenIndex; - if (loc) { // When node is a comment, we set node.loc.indent to // node.loc.start.column so that, when/if we print the comment by @@ -221,6 +218,9 @@ TCp.copy = function (node) { this.findTokenRange(loc); } + const oldStartTokenIndex = this.startTokenIndex; + const oldEndTokenIndex = this.endTokenIndex; + const keys = Object.keys(node); const keyCount = keys.length; for (let i = 0; i < keyCount; ++i) { @@ -262,7 +262,7 @@ TCp.findTokenRange = function (loc) { // *before* loc.end, we need to fast-forward this.endTokenIndex first. while (this.endTokenIndex < loc.tokens.length) { const token = loc.tokens[this.endTokenIndex]; - if (util.comparePos(token.loc.end, loc.end) < 0) { + if (util.comparePos(token.loc.end, loc.end) <= 0) { ++this.endTokenIndex; } else break; } From 8f73d07a2b9d402ce9013ecef550068ed3f32186 Mon Sep 17 00:00:00 2001 From: JoostK Date: Mon, 20 May 2024 18:53:10 +0200 Subject: [PATCH 2/2] perf: micro optimizations for node cloning This commit makes various micro optimizations to improve node cloning performance. In particular, `Object.create` with property descriptors appears to be slower than `Object.defineProperty` after the object has been created (measured in Node 18 and 20). The other bigger factor was the early bailout in `TCp.copy` for non-object values, as such primitives will never hit any of the remaining logic. For a 2.6MB large file, this improves parsing and cloning times from ~1.5s to ~1.2s. The performance test using `backbone.js` sess about 10% improvement. --- lib/parser.ts | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/lib/parser.ts b/lib/parser.ts index 4498fdb9..8c90095b 100644 --- a/lib/parser.ts +++ b/lib/parser.ts @@ -154,15 +154,25 @@ const TreeCopier = function TreeCopier( const TCp: TreeCopierType = TreeCopier.prototype; +const originalProperty: PropertyDescriptor = { + configurable: false, + enumerable: false, + writable: true, +}; + TCp.copy = function (node) { + if (typeof node !== "object") { + return node; + } + if (this.seen.has(node)) { return this.seen.get(node); } if (isArray.check(node)) { - const copy: any = new Array(node.length); + const copy: any = node.slice(); this.seen.set(node, copy); - node.forEach(function (this: any, item: any, i: any) { + node.forEach(function (this: TreeCopierType, item: any, i: any) { copy[i] = this.copy(item); }, this); return copy; @@ -174,15 +184,10 @@ TCp.copy = function (node) { util.fixFaultyLocations(node, this.lines); - const copy: any = Object.create(Object.getPrototypeOf(node), { - original: { - // Provide a link from the copy to the original. - value: node, - configurable: false, - enumerable: false, - writable: true, - }, - }); + const copy: any = Object.create(Object.getPrototypeOf(node)); + // Provide a link from the copy to the original. + Object.defineProperty(copy, "original", originalProperty); + copy.original = node; this.seen.set(node, copy); @@ -190,6 +195,7 @@ TCp.copy = function (node) { const oldIndent = this.indent; let newIndent = oldIndent; + const type = node.type; if (loc) { // When node is a comment, we set node.loc.indent to // node.loc.start.column so that, when/if we print the comment by @@ -197,10 +203,10 @@ TCp.copy = function (node) { // the comment. This only really matters for multiline Block comments, // but it doesn't hurt for Line comments. if ( - node.type === "Block" || - node.type === "Line" || - node.type === "CommentBlock" || - node.type === "CommentLine" || + type === "Block" || + type === "Line" || + type === "CommentBlock" || + type === "CommentLine" || this.lines.isPrecededOnlyByWhitespace(loc.start) ) { newIndent = this.indent = loc.start.column; @@ -226,11 +232,11 @@ TCp.copy = function (node) { for (let i = 0; i < keyCount; ++i) { const key = keys[i]; if (key === "loc") { - copy[key] = node[key]; - } else if (key === "tokens" && node.type === "File") { + copy.loc = loc; + } else if (key === "tokens" && type === "File") { // Preserve file.tokens (uncopied) in case client code cares about // it, even though Recast ignores it when reprinting. - copy[key] = node[key]; + copy.tokens = node.tokens; } else { copy[key] = this.copy(node[key]); }