You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/09/18 13:41:44 UTC
[incubator-annotator] 02/03: More performance tweaking

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch faster-describeTextQuote
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 35d3ebe00083d53570a829638a6dd489f6e7ce5e
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Sep 18 14:45:54 2020 +0200

    More performance tweaking
    
    I suppose substring()ing the whole scope may be a performance eater if
    the interpreter copies the string every time.
---
 packages/dom/src/text-quote/describe.ts | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index b599252..93cbe53 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -76,14 +76,18 @@ function calculateContextForDisambiguation(
 
     // Count how many characters before & after them the false match and target have in common.
     const sufficientPrefixLength = charactersNeededToBeUnique(
-      scopeText.substring(0, targetStartIndex),
-      scopeText.substring(0, matchStartIndex),
+      scopeText,
+      targetStartIndex,
+      matchStartIndex,
       true,
+      prefix.length,
     );
     const sufficientSuffixLength = charactersNeededToBeUnique(
-      scopeText.substring(targetEndIndex),
-      scopeText.substring(matchEndIndex),
+      scopeText,
+      targetEndIndex,
+      matchEndIndex,
       false,
+      suffix.length,
     );
 
     // Use either the prefix or suffix, whichever is shortest.
@@ -104,21 +108,16 @@ function calculateContextForDisambiguation(
 }
 
 function charactersNeededToBeUnique(
-  target: string,
-  impostor: string,
+  text: string,
+  target: number,
+  impostor: number,
   reverse = false,
-) {
-  // Count how many characters the two strings have in common.
-  let overlap = 0;
-  const charAt = (s: string, i: number) =>
-    reverse ? s[s.length - 1 - i] : s[overlap];
-  while (
-    overlap < target.length &&
-    charAt(target, overlap) === charAt(impostor, overlap)
-  )
+  overlap = 0,
+): number {
+  const nextChar = (offset: number) => reverse ? text[offset - 1 - overlap] : text[offset + overlap];
+  while (nextChar(target) && nextChar(target) === nextChar(impostor))
     overlap++;
-  if (overlap === target.length) return Infinity;
-  // (no substring of target can make it distinguishable from its impostor)
+  if (!nextChar(target)) return Infinity; // end/start of string reached.
   else return overlap + 1;
 }