You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/09/18 13:41:44 UTC
[incubator-annotator] 02/03: More performance tweaking
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch faster-describeTextQuote
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit 35d3ebe00083d53570a829638a6dd489f6e7ce5e
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Sep 18 14:45:54 2020 +0200
More performance tweaking
I suppose substring()ing the whole scope may be a performance eater if
the interpreter copies the string every time.
---
packages/dom/src/text-quote/describe.ts | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index b599252..93cbe53 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -76,14 +76,18 @@ function calculateContextForDisambiguation(
// Count how many characters before & after them the false match and target have in common.
const sufficientPrefixLength = charactersNeededToBeUnique(
- scopeText.substring(0, targetStartIndex),
- scopeText.substring(0, matchStartIndex),
+ scopeText,
+ targetStartIndex,
+ matchStartIndex,
true,
+ prefix.length,
);
const sufficientSuffixLength = charactersNeededToBeUnique(
- scopeText.substring(targetEndIndex),
- scopeText.substring(matchEndIndex),
+ scopeText,
+ targetEndIndex,
+ matchEndIndex,
false,
+ suffix.length,
);
// Use either the prefix or suffix, whichever is shortest.
@@ -104,21 +108,16 @@ function calculateContextForDisambiguation(
}
function charactersNeededToBeUnique(
- target: string,
- impostor: string,
+ text: string,
+ target: number,
+ impostor: number,
reverse = false,
-) {
- // Count how many characters the two strings have in common.
- let overlap = 0;
- const charAt = (s: string, i: number) =>
- reverse ? s[s.length - 1 - i] : s[overlap];
- while (
- overlap < target.length &&
- charAt(target, overlap) === charAt(impostor, overlap)
- )
+ overlap = 0,
+): number {
+ const nextChar = (offset: number) => reverse ? text[offset - 1 - overlap] : text[offset + overlap];
+ while (nextChar(target) && nextChar(target) === nextChar(impostor))
overlap++;
- if (overlap === target.length) return Infinity;
- // (no substring of target can make it distinguishable from its impostor)
+ if (!nextChar(target)) return Infinity; // end/start of string reached.
else return overlap + 1;
}