You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2022/11/23 21:39:21 UTC

[incubator-annotator] 01/01: Turn highlightText into a class TextHighlight

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch texthighlight-class
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 8f890ea405718ee094c2ef9c92c4d30c33ba8f67
Author: Gerben <ge...@treora.com>
AuthorDate: Wed Nov 2 15:31:29 2022 +0100

    Turn highlightText into a class TextHighlight
    
    Keeping the previous API available for now, can remove in a new release.
---
 packages/dom/src/highlight-text.ts                 | 164 ----------------
 packages/dom/src/index.ts                          |   2 +-
 packages/dom/src/text-highlight.ts                 | 214 +++++++++++++++++++++
 ...ghlight-text.test.ts => text-highlight.test.ts} |  76 +++++---
 web/index.js                                       |  14 +-
 5 files changed, 274 insertions(+), 196 deletions(-)

diff --git a/packages/dom/src/highlight-text.ts b/packages/dom/src/highlight-text.ts
deleted file mode 100644
index 3a2448d..0000000
--- a/packages/dom/src/highlight-text.ts
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * @license
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-FileCopyrightText: The Apache Software Foundation
- * SPDX-License-Identifier: Apache-2.0
- */
-
-import { ownerDocument } from './owner-document.js';
-import { toRange } from './to-range.js';
-
-/**
- * Wrap each text node in a given Node or Range with a `<mark>` or other
- * element.
- *
- * If a Range is given that starts and/or ends within a Text node, that node
- * will be split in order to only wrap the contained part in the mark element.
- *
- * The highlight can be removed again by calling the function that cleans up the
- * wrapper elements. Note that this might not perfectly restore the DOM to its
- * previous state: text nodes that were split are not merged again. One could
- * consider running `range.commonAncestorContainer.normalize()` afterwards to
- * join all adjacent text nodes.
- *
- * @param target - The Node/Range containing the text. If it is a Range, note
- * that as highlighting modifies the DOM, the Range may be unusable afterwards.
- * @param tagName - The element used to wrap text nodes. Defaults to `'mark'`.
- * @param attributes - An object defining any attributes to be set on the
- * wrapper elements, e.g. its `class`.
- * @returns A function that removes the created highlight.
- *
- * @public
- */
-export function highlightText(
-  target: Node | Range,
-  tagName = 'mark',
-  attributes: Record<string, string> = {},
-): () => void {
-  // First put all nodes in an array (splits start and end nodes if needed)
-  const nodes = textNodesInRange(toRange(target));
-
-  // Highlight each node
-  const highlightElements: HTMLElement[] = [];
-  for (const node of nodes) {
-    const highlightElement = wrapNodeInHighlight(node, tagName, attributes);
-    highlightElements.push(highlightElement);
-  }
-
-  // Return a function that cleans up the highlightElements.
-  function removeHighlights() {
-    // Remove each of the created highlightElements.
-    for (const highlightElement of highlightElements) {
-      removeHighlight(highlightElement);
-    }
-  }
-  return removeHighlights;
-}
-
-// Return an array of the text nodes in the range. Split the start and end nodes if required.
-function textNodesInRange(range: Range): Text[] {
-  // If the start or end node is a text node and only partly in the range, split it.
-  if (isTextNode(range.startContainer) && range.startOffset > 0) {
-    const endOffset = range.endOffset; // (this may get lost when the splitting the node)
-    const createdNode = range.startContainer.splitText(range.startOffset);
-    if (range.endContainer === range.startContainer) {
-      // If the end was in the same container, it will now be in the newly created node.
-      range.setEnd(createdNode, endOffset - range.startOffset);
-    }
-    range.setStart(createdNode, 0);
-  }
-  if (
-    isTextNode(range.endContainer) &&
-    range.endOffset < range.endContainer.length
-  ) {
-    range.endContainer.splitText(range.endOffset);
-  }
-
-  // Collect the text nodes.
-  const walker = ownerDocument(range).createTreeWalker(
-    range.commonAncestorContainer,
-    NodeFilter.SHOW_TEXT,
-    {
-      acceptNode: (node) =>
-        range.intersectsNode(node)
-          ? NodeFilter.FILTER_ACCEPT
-          : NodeFilter.FILTER_REJECT,
-    },
-  );
-  walker.currentNode = range.startContainer;
-
-  // // Optimise by skipping nodes that are explicitly outside the range.
-  // const NodeTypesWithCharacterOffset = [
-  //  Node.TEXT_NODE,
-  //  Node.PROCESSING_INSTRUCTION_NODE,
-  //  Node.COMMENT_NODE,
-  // ];
-  // if (!NodeTypesWithCharacterOffset.includes(range.startContainer.nodeType)) {
-  //   if (range.startOffset < range.startContainer.childNodes.length) {
-  //     walker.currentNode = range.startContainer.childNodes[range.startOffset];
-  //   } else {
-  //     walker.nextSibling(); // TODO verify this is correct.
-  //   }
-  // }
-
-  const nodes: Text[] = [];
-  if (isTextNode(walker.currentNode)) nodes.push(walker.currentNode);
-  while (walker.nextNode() && range.comparePoint(walker.currentNode, 0) !== 1)
-    nodes.push(walker.currentNode as Text);
-  return nodes;
-}
-
-// Replace [node] with <tagName ...attributes>[node]</tagName>
-function wrapNodeInHighlight(
-  node: ChildNode,
-  tagName: string,
-  attributes: Record<string, string>,
-): HTMLElement {
-  const document = node.ownerDocument as Document;
-  const highlightElement = document.createElement(tagName);
-  Object.keys(attributes).forEach((key) => {
-    highlightElement.setAttribute(key, attributes[key]);
-  });
-  const tempRange = document.createRange();
-  tempRange.selectNode(node);
-  tempRange.surroundContents(highlightElement);
-  return highlightElement;
-}
-
-// Remove a highlight element created with wrapNodeInHighlight.
-function removeHighlight(highlightElement: HTMLElement) {
-  // If it has somehow been removed already, there is nothing to be done.
-  if (!highlightElement.parentNode) return;
-  if (highlightElement.childNodes.length === 1) {
-    highlightElement.replaceWith(highlightElement.firstChild as Node);
-  } else {
-    // If the highlight somehow contains multiple nodes now, move them all.
-    while (highlightElement.firstChild) {
-      highlightElement.parentNode.insertBefore(
-        highlightElement.firstChild,
-        highlightElement,
-      );
-    }
-    highlightElement.remove();
-  }
-}
-
-function isTextNode(node: Node): node is Text {
-  return node.nodeType === Node.TEXT_NODE;
-}
diff --git a/packages/dom/src/index.ts b/packages/dom/src/index.ts
index 6969ea9..aa328ad 100644
--- a/packages/dom/src/index.ts
+++ b/packages/dom/src/index.ts
@@ -25,4 +25,4 @@ export * from './css.js';
 export * from './range/index.js';
 export * from './text-quote/index.js';
 export * from './text-position/index.js';
-export * from './highlight-text.js';
+export * from './text-highlight.js';
diff --git a/packages/dom/src/text-highlight.ts b/packages/dom/src/text-highlight.ts
new file mode 100644
index 0000000..dc90816
--- /dev/null
+++ b/packages/dom/src/text-highlight.ts
@@ -0,0 +1,214 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-FileCopyrightText: The Apache Software Foundation
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { ownerDocument } from './owner-document.js';
+import { toRange } from './to-range.js';
+
+/**
+ * Wrap each text node in a given Node or Range with a `<mark>` or other
+ * element.
+ *
+ * If a Range is given that starts and/or ends within a Text node, that node
+ * will be split in order to only wrap the contained part in the mark element.
+ *
+ * The highlight can be removed again by calling the function that cleans up the
+ * wrapper elements. Note that this might not perfectly restore the DOM to its
+ * previous state: text nodes that were split are not merged again. One could
+ * consider running `range.commonAncestorContainer.normalize()` afterwards to
+ * join all adjacent text nodes.
+ *
+ * @deprecated
+ * Use TextHighlight instead:
+ *
+ *     highlight = new TextHighlight(…);
+ *     highlight.remove();
+ *
+ * @param target - The Node/Range containing the text. If it is a Range, note
+ * that as highlighting modifies the DOM, the Range may be unusable afterwards.
+ * @param tagName - The element used to wrap text nodes. Defaults to `'mark'`.
+ * @param attributes - An object defining any attributes to be set on the
+ * wrapper elements, e.g. its `class`.
+ * @returns A function that removes the created highlight.
+ *
+ * @public
+ */
+export function highlightText(
+  target: Node | Range,
+  tagName = 'mark',
+  attributes: Record<string, string> = {},
+): () => void {
+  const highlight = new TextHighlight(target, tagName, attributes);
+  return () => highlight.remove();
+}
+
+export class TextHighlight {
+  public readonly highlightElements: HTMLElement[] = [];
+
+  /**
+   * Wrap each text node in a given Node or Range with a `<mark>` or other
+   * element.
+   *
+   * If a Range is given that starts and/or ends within a Text node, that node
+   * will be split in order to only wrap the contained part in the mark element.
+   *
+   * @param target - The Node/Range containing the text. If it is a Range, note
+   * that as highlighting modifies the DOM, the Range may be unusable
+   * afterwards.
+   * @param element - The element used to wrap text nodes. Pass either a string
+   * with its tag name, or an Element to be cloned. Defaults to `'mark'`.
+   * @param attributes - An object defining any attributes to be set on the
+   * wrapper elements, e.g. its `class`. Not applicable if `element` is a
+   * function.
+   * @returns A {@link Highlight} object, that can be used to access the wrapper
+   * elements or to remove the highlight again.
+   *
+   * @public
+   */
+  constructor(
+    target: Node | Range,
+    element: string | HTMLElement = "mark",
+    attributes: Record<string, string> = {},
+  ) {
+    // First put all nodes in an array (splits start and end nodes if needed)
+    const nodes = this.textNodesInRange(toRange(target));
+
+    // Highlight each node
+    for (const node of nodes) {
+      const highlightElement = this.wrapNodeInHighlight(node, element, attributes);
+      this.highlightElements.push(highlightElement);
+    }
+  }
+
+  /**
+   * Remove each of the created highlightElements.
+   *
+   * It takes the wrapping (`<mark>`) elements out of the DOM tree, thus moving
+   * their child node(s) one level up.
+   */
+  public remove() {
+    for (const highlightElement of this.highlightElements) {
+      this.removeHighlightElement(highlightElement);
+    }
+  }
+
+  /**
+   * Return an array of the text nodes in the range. Split the start and end nodes if required.
+   */
+  private textNodesInRange(range: Range): Text[] {
+    // If the start or end node is a text node and only partly in the range, split it.
+    if (isTextNode(range.startContainer) && range.startOffset > 0) {
+      const endOffset = range.endOffset; // (this may get lost when the splitting the node)
+      const createdNode = range.startContainer.splitText(range.startOffset);
+      if (range.endContainer === range.startContainer) {
+        // If the end was in the same container, it will now be in the newly created node.
+        range.setEnd(createdNode, endOffset - range.startOffset);
+      }
+      range.setStart(createdNode, 0);
+    }
+    if (
+      isTextNode(range.endContainer) &&
+      range.endOffset < range.endContainer.length
+    ) {
+      range.endContainer.splitText(range.endOffset);
+    }
+
+    // Collect the text nodes.
+    const walker = ownerDocument(range).createTreeWalker(
+      range.commonAncestorContainer,
+      NodeFilter.SHOW_TEXT,
+      {
+        acceptNode: (node) =>
+          range.intersectsNode(node)
+            ? NodeFilter.FILTER_ACCEPT
+            : NodeFilter.FILTER_REJECT,
+      }
+    );
+    walker.currentNode = range.startContainer;
+
+    // // Optimise by skipping nodes that are explicitly outside the range.
+    // const NodeTypesWithCharacterOffset = [
+    //  Node.TEXT_NODE,
+    //  Node.PROCESSING_INSTRUCTION_NODE,
+    //  Node.COMMENT_NODE,
+    // ];
+    // if (!NodeTypesWithCharacterOffset.includes(range.startContainer.nodeType)) {
+    //   if (range.startOffset < range.startContainer.childNodes.length) {
+    //     walker.currentNode = range.startContainer.childNodes[range.startOffset];
+    //   } else {
+    //     walker.nextSibling(); // TODO verify this is correct.
+    //   }
+    // }
+
+    const nodes: Text[] = [];
+    if (isTextNode(walker.currentNode)) nodes.push(walker.currentNode);
+    while (walker.nextNode() && range.comparePoint(walker.currentNode, 0) !== 1)
+      nodes.push(walker.currentNode as Text);
+    return nodes;
+  }
+
+  /**
+   * Replace [node] with <tagName ...attributes>[node]</tagName>
+   */
+  private wrapNodeInHighlight(
+    node: ChildNode,
+    element: string | HTMLElement,
+    attributes: Record<string, string>
+  ): HTMLElement {
+    const document = node.ownerDocument as Document;
+    const highlightElement =
+      typeof element === "string"
+        ? document.createElement(element)
+        : (element.cloneNode() as typeof element);
+    Object.keys(attributes).forEach((key) => {
+      highlightElement.setAttribute(key, attributes[key]);
+    });
+    const tempRange = document.createRange();
+    tempRange.selectNode(node);
+    tempRange.surroundContents(highlightElement);
+    return highlightElement;
+  }
+
+  /**
+   * Remove a highlight element created with wrapNodeInHighlight.
+   */
+  private removeHighlightElement(highlightElement: HTMLElement) {
+    // If it has somehow been removed already, there is nothing to be done.
+    if (!highlightElement.parentNode) return;
+    if (highlightElement.childNodes.length === 1) {
+      highlightElement.replaceWith(highlightElement.firstChild as Node);
+    } else {
+      // If the highlight somehow contains multiple nodes now, move them all.
+      while (highlightElement.firstChild) {
+        highlightElement.parentNode.insertBefore(
+          highlightElement.firstChild,
+          highlightElement
+        );
+      }
+      highlightElement.remove();
+    }
+  }
+}
+
+function isTextNode(node: Node): node is Text {
+  return node.nodeType === Node.TEXT_NODE;
+}
diff --git a/packages/dom/test/highlight-text/highlight-text.test.ts b/packages/dom/test/highlight-text/text-highlight.test.ts
similarity index 74%
rename from packages/dom/test/highlight-text/highlight-text.test.ts
rename to packages/dom/test/highlight-text/text-highlight.test.ts
index 7ef91eb..065de4d 100644
--- a/packages/dom/test/highlight-text/highlight-text.test.ts
+++ b/packages/dom/test/highlight-text/text-highlight.test.ts
@@ -22,7 +22,7 @@
  */
 
 import { strict as assert } from 'assert';
-import { highlightText } from '../../src/highlight-text.js';
+import { TextHighlight } from '../../src/text-highlight.js';
 import type { RangeInfo } from '../utils.js';
 import { hydrateRange, evaluateXPath } from '../utils.js';
 
@@ -32,7 +32,7 @@ const testCases: {
   [name: string]: {
     inputHtml: string;
     range: RangeInfo;
-    tagName?: string;
+    element?: string;
     attributes?: Record<string, string>;
     expectedHtml: string;
   };
@@ -76,7 +76,7 @@ const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 20,
     },
-    tagName: 'span',
+    element: 'span',
     expectedHtml: '<b>lorem ipsum <span>dolor am</span>et yada yada</b>',
   },
   'custom attributes': {
@@ -102,30 +102,30 @@ const testCases: {
       endContainerXPath: '//b/text()[2]',
       endOffset: 7,
     },
-    tagName: 'mark2',
+    element: 'mark2',
     expectedHtml:
       '<b>lorem ipsum <mark>dolor <mark2>am</mark2></mark><mark2>et yada</mark2> yada</b>',
   },
 };
 
-describe('highlightText', () => {
+describe('TextHighlight', () => {
   for (const [
     name,
-    { inputHtml, range, tagName, attributes, expectedHtml },
+    { inputHtml, range, element, attributes, expectedHtml },
   ] of Object.entries(testCases)) {
     it(`works for case: ${name}`, () => {
       const doc = domParser.parseFromString(inputHtml, 'text/html');
 
-      // Invoke highlightText for the specified Range, and check the result.
-      const removeHighlights = highlightText(
+      // Highlight the specified Range, and check the result.
+      const highlight = new TextHighlight(
         hydrateRange(range, doc),
-        tagName,
+        element,
         attributes,
       );
       assert.equal(doc.body.innerHTML, expectedHtml);
 
       // Remove the highlight again and check that we end up exactly how we started.
-      removeHighlights();
+      highlight.remove();
       assert.equal(doc.body.innerHTML, inputHtml);
     });
   }
@@ -141,12 +141,12 @@ describe('highlightText', () => {
     range.setStart(evaluateXPath(doc, '//b/text()[1]'), 12); // before 'dolor am'
     range.setEnd(evaluateXPath(doc, '//b/text()[2]'), 20 - 15); // after 'dolor am'
 
-    const removeHighlights = highlightText(range);
+    const highlight = new TextHighlight(range);
     const expectedHtml =
       '<b>lorem ipsum <mark>dol</mark><mark>or am</mark>et yada yada</b>';
     assert.equal(doc.body.innerHTML, expectedHtml);
 
-    removeHighlights();
+    highlight.remove();
     assert.equal(doc.body.innerHTML, inputHtml);
   });
 
@@ -162,12 +162,12 @@ describe('highlightText', () => {
     range.setStart(evaluateXPath(doc, '//b/text()[1]'), 12); // before 'dolor am'
     range.setEnd(evaluateXPath(doc, '//b/text()[3]'), 20 - 15); // after 'dolor am'
 
-    const removeHighlights = highlightText(range);
+    const highlight = new TextHighlight(range);
     const expectedHtml =
       '<b>lorem ipsum <mark>dol</mark><mark></mark><mark>or am</mark>et yada yada</b>';
     assert.equal(doc.body.innerHTML, expectedHtml);
 
-    removeHighlights();
+    highlight.remove();
     assert.equal(doc.body.innerHTML, inputHtml);
   });
 
@@ -178,10 +178,10 @@ describe('highlightText', () => {
     const range = doc.createRange();
     range.selectNode(evaluateXPath(doc, '//img'));
 
-    const removeHighlights = highlightText(range);
+    const highlight = new TextHighlight(range);
     assert.equal(doc.body.innerHTML, inputHtml);
 
-    removeHighlights();
+    highlight.remove();
     assert.equal(doc.body.innerHTML, inputHtml);
   });
 
@@ -190,12 +190,12 @@ describe('highlightText', () => {
     const { range: range2, expectedHtml } = testCases['overlapping highlight'];
     const doc = domParser.parseFromString(inputHtml, 'text/html');
 
-    const removeHighlights1 = highlightText(hydrateRange(range, doc));
-    const removeHighlights2 = highlightText(hydrateRange(range2, doc), 'mark2');
+    const highlight1 = new TextHighlight(hydrateRange(range, doc));
+    const highlight2 = new TextHighlight(hydrateRange(range2, doc), 'mark2');
     assert.equal(doc.body.innerHTML, expectedHtml);
 
-    removeHighlights1();
-    removeHighlights2();
+    highlight1.remove();
+    highlight2.remove();
     assert.equal(doc.body.innerHTML, inputHtml);
   });
 
@@ -204,12 +204,40 @@ describe('highlightText', () => {
     const { range: range2, expectedHtml } = testCases['overlapping highlight'];
     const doc = domParser.parseFromString(inputHtml, 'text/html');
 
-    const removeHighlights1 = highlightText(hydrateRange(range, doc));
-    const removeHighlights2 = highlightText(hydrateRange(range2, doc), 'mark2');
+    const highlight1 = new TextHighlight(hydrateRange(range, doc));
+    const highlight2 = new TextHighlight(hydrateRange(range2, doc), 'mark2');
     assert.equal(doc.body.innerHTML, expectedHtml);
 
-    removeHighlights2();
-    removeHighlights1();
+    highlight2.remove();
+    highlight1.remove();
     assert.equal(doc.body.innerHTML, inputHtml);
   });
+
+  it('Clones a given wrapper element', () => {
+    const { inputHtml, range } = testCases['across elements'];
+    const doc = domParser.parseFromString(inputHtml, 'text/html');
+    const element = doc.createElement('span');
+    element.setAttribute('class', 'highlighted');
+
+    const highlight = new TextHighlight(hydrateRange(range, doc), element);
+    const expectedHtml =
+      '<b>lorem <i>ipsum</i> <span class="highlighted">dolor </span><u><span class="highlighted">am</span>et</u> yada yada</b>';
+    assert.equal(doc.body.innerHTML, expectedHtml);
+
+    highlight.remove();
+    assert.equal(doc.body.innerHTML, inputHtml);
+  });
+
+  it('exposes the wrapper elements', () => {
+    const { inputHtml, range } = testCases['across elements'];
+    const doc = domParser.parseFromString(inputHtml, 'text/html');
+
+    const highlight = new TextHighlight(hydrateRange(range, doc));
+
+    assert.equal(highlight.highlightElements.length, 2);
+    assert.equal(highlight.highlightElements[0].tagName, 'MARK');
+    assert.equal(highlight.highlightElements[0].textContent, 'dolor ');
+    assert.equal(highlight.highlightElements[1].tagName, 'MARK');
+    assert.equal(highlight.highlightElements[1].textContent, 'am');
+  });
 });
diff --git a/web/index.js b/web/index.js
index 7917798..39e3d08 100644
--- a/web/index.js
+++ b/web/index.js
@@ -29,7 +29,7 @@ import {
   describeTextQuote,
   createTextPositionSelectorMatcher,
   describeTextPosition,
-  highlightText,
+  TextHighlight,
 } from '@apache-annotator/dom';
 import { makeRefinable } from '@apache-annotator/selector';
 
@@ -87,13 +87,13 @@ const EXAMPLE_SELECTORS = [
 ];
 
 let moduleState = {
-  cleanupFunctions: [],
+  highlights: [],
 };
 
 function cleanup() {
-  let removeHighlight;
-  while ((removeHighlight = moduleState.cleanupFunctions.shift())) {
-    removeHighlight();
+  let highlight;
+  while ((highlight = moduleState.highlights.shift())) {
+    highlight.remove();
   }
   target.normalize();
   info.innerText = '';
@@ -124,8 +124,8 @@ async function anchor(selector) {
   }
 
   for (const range of ranges) {
-    const removeHighlight = highlightText(range);
-    moduleState.cleanupFunctions.push(removeHighlight);
+    const highlight = new TextHighlight(range);
+    moduleState.highlights.push(highlight);
   }
 
   info.innerText += JSON.stringify(selector, null, 2) + '\n\n';