You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2021/06/03 16:38:40 UTC

[incubator-annotator] branch css-selector updated (f375738 -> c8ef340)

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a change to branch css-selector
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.


 discard f375738  Add describeCss & tests
     new c8ef340  Add describeCss & tests, document spec ambiguity

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (f375738)
            \
             N -- N -- N   refs/heads/css-selector (c8ef340)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 packages/dom/package.json              |  2 +-
 packages/dom/src/css.ts                |  4 +--
 packages/dom/test/css/describe.test.ts | 40 +++++++++++++++---------------
 yarn.lock                              | 45 +++++-----------------------------
 4 files changed, 29 insertions(+), 62 deletions(-)

[incubator-annotator] 01/01: Add describeCss & tests, document spec ambiguity

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch css-selector
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit c8ef340e34b10534507b4f28dfe210ca7b095243
Author: Gerben <ge...@treora.com>
AuthorDate: Tue May 25 00:59:21 2021 +0200

    Add describeCss & tests, document spec ambiguity
    
    I tried a few css selector generators, listed here:
    <https://github.com/fczbkk/css-selector-generator-benchmark>
    
    - css-selector-generator failed when a root (= scope) is passed; see
      issue <https://github.com/fczbkk/css-selector-generator/issues/65>.
    
    - using @mdev/finder instead gave syntax errors due to ‘export’ token.
      (perhaps because we don’t transpile dependencies; worth considering?)
    
    - optimal-select seemed to work; whatever works is good enough for now.
    
    I made describeCss accept an Element, not a Range, for its scope and
    target, as Ranges make little sense for a CssSelector; I figured we may
    want to change this in the matcher too, and perhaps more widely.
---
 packages/dom/package.json              |  3 +-
 packages/dom/src/css.ts                | 31 +++++++++++++----
 packages/dom/test/css/describe.test.ts | 55 +++++++++++++++++++++++++++++
 packages/dom/test/css/match-cases.ts   | 63 ++++++++++++++++++++++++++++++++++
 packages/dom/test/css/match.test.ts    | 62 +++++++++++++++++++++++++++++++++
 yarn.lock                              |  5 +++
 6 files changed, 212 insertions(+), 7 deletions(-)

diff --git a/packages/dom/package.json b/packages/dom/package.json
index ff22835..0155fc7 100644
--- a/packages/dom/package.json
+++ b/packages/dom/package.json
@@ -14,7 +14,8 @@
   "exports": "./lib/index.js",
   "main": "./lib/index.js",
   "dependencies": {
-    "@babel/runtime-corejs3": "^7.13.10"
+    "@babel/runtime-corejs3": "^7.13.10",
+    "optimal-select": "^4.0.1"
   },
   "devDependencies": {
     "@apache-annotator/selector": "^0.1.0"
diff --git a/packages/dom/src/css.ts b/packages/dom/src/css.ts
index c8c0db5..1a62141 100644
--- a/packages/dom/src/css.ts
+++ b/packages/dom/src/css.ts
@@ -18,6 +18,7 @@
  * under the License.
  */
 
+import optimalSelect from 'optimal-select';
 import type { CssSelector, Matcher } from '@apache-annotator/selector';
 import { ownerDocument } from './owner-document';
 
@@ -32,18 +33,25 @@ import { ownerDocument } from './owner-document';
  * The function is curried, taking first the selector and then the scope.
  *
  * As there may be multiple matches for a given selector, the matcher will
- * return an (async) generator that produces each match in the order they are
- * found in the text.
+ * return an (async) iterable that produces each match in the order they are
+ * found in the document.
+ *
+ * Note that the Web Annotation specification does not mention whether an
+ * ‘ambiguous’ CssSelector should indeed match all elements that match the
+ * selector value, or perhaps only the first. This implementation returns all
+ * matches to give users the freedom to follow either interpretation. This is
+ * also in line with more clearly defined behaviour of the TextQuoteSelector:
+ *
+ * > “If […] the user agent discovers multiple matching text sequences, then the
+ * > selection SHOULD be treated as matching all of the matches.”
  *
  * Each matching element is returned as a {@link https://developer.mozilla.org/en-US/docs/Web/API/Range
  * | Range} surrounding that element. This in order to make its output reusable
  * as the scope for any subsequents selectors that {@link
  * Selector.refinedBy | refine} this CssSelector.
  *
- * @param selector - The {@link CssSelector} to be
- * anchored
- * @returns A {@link Matcher} function that applies
- * `selector` to a given {@link https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * @param selector - The {@link CssSelector} to be anchored
+ * @returns A {@link Matcher} function that applies `selector` to a given {@link https://developer.mozilla.org/en-US/docs/Web/API/Range
  * | Range}
  *
  * @public
@@ -66,3 +74,14 @@ export function createCssSelectorMatcher(
     }
   };
 }
+
+export async function describeCss(
+  element: HTMLElement,
+  scope?: HTMLElement,
+): Promise<CssSelector> {
+  const selector = optimalSelect(element, { root: scope ?? element.ownerDocument.body });
+  return {
+    type: 'CssSelector',
+    value: selector,
+  };
+}
diff --git a/packages/dom/test/css/describe.test.ts b/packages/dom/test/css/describe.test.ts
new file mode 100644
index 0000000..17d1ce9
--- /dev/null
+++ b/packages/dom/test/css/describe.test.ts
@@ -0,0 +1,55 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { assert } from 'chai';
+import { describeCss } from '../../src/css';
+import { testCases } from './match-cases';
+import { evaluateXPath } from '../utils';
+
+const domParser = new DOMParser();
+
+describe('describeCss', () => {
+  describe('inverts test cases of css matcher', () => {
+    for (const [name, { html, scopeXPath, expected }] of Object.entries(
+      testCases,
+    )) {
+      for (let i = 0; i < expected.length; i++) {
+        const elementXPath = expected[i];
+        it(`case: '${name}' (${i+1}/${expected.length})`, async () => {
+          const doc = domParser.parseFromString(html, 'text/html');
+          const element = evaluateXPath(doc, elementXPath) as HTMLElement;
+          const scopeElement = scopeXPath
+            ? evaluateXPath(doc, scopeXPath) as HTMLElement
+            : undefined;
+          const cssSelector = await describeCss(
+            element,
+            scopeElement,
+          );
+
+          // We do not require a specific value for the selector, just
+          // that it uniquely matches the same element again.
+          const matchingElements = (scopeElement ?? doc).querySelectorAll(cssSelector.value);
+          assert.equal(matchingElements.length, 1, 'Expected a selector with a single match');
+          assert.equal(matchingElements[0], element);
+        });
+      }
+    }
+  });
+});
diff --git a/packages/dom/test/css/match-cases.ts b/packages/dom/test/css/match-cases.ts
new file mode 100644
index 0000000..26fbe03
--- /dev/null
+++ b/packages/dom/test/css/match-cases.ts
@@ -0,0 +1,63 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import type { CssSelector } from '@apache-annotator/selector';
+
+export const testCases: {
+  [name: string]: {
+    html: string;
+    selector: CssSelector;
+    scopeXPath?: string;
+    expected: string[];
+  };
+} = {
+  'simple': {
+    html: '<b>lorem <i>ipsum</i> dolor <i>amet</i> yada <i>yada</i></b>',
+    selector: {
+      type: 'CssSelector',
+      value: 'i:nth-child(2)',
+    },
+    expected: ['//b/i[2]'],
+  },
+  'multiple matches': {
+    html: '<b>lorem <i>ipsum</i> dolor <i>amet</i> yada <i>yada</i></b>',
+    selector: {
+      type: 'CssSelector',
+      value: 'i',
+    },
+    expected: [
+      '//b/i[1]',
+      '//b/i[2]',
+      '//b/i[3]',
+    ],
+  },
+  'with scope': {
+    html: '<b>lorem <i>ipsum</i> dolor <u><i>amet</i> yada <i>yada</i></u></b>',
+    selector: {
+      type: 'CssSelector',
+      value: 'i',
+    },
+    scopeXPath: '//u',
+    expected: [
+      '//u/i[1]',
+      '//u/i[2]',
+    ],
+  },
+};
diff --git a/packages/dom/test/css/match.test.ts b/packages/dom/test/css/match.test.ts
new file mode 100644
index 0000000..9d4c18f
--- /dev/null
+++ b/packages/dom/test/css/match.test.ts
@@ -0,0 +1,62 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { assert } from 'chai';
+import type { CssSelector } from '@apache-annotator/selector';
+import { createCssSelectorMatcher } from '../../src/css';
+import { testCases } from './match-cases';
+import { evaluateXPath } from '../utils';
+
+const domParser = new DOMParser();
+
+describe('CreateCssSelectorMatcher', () => {
+  for (const [name, { html, selector, scopeXPath, expected }] of Object.entries(
+    testCases,
+  )) {
+    it(`works for case: '${name}'`, async () => {
+      const doc = domParser.parseFromString(html, 'text/html');
+
+      const scopeElement = scopeXPath ? evaluateXPath(doc, scopeXPath) : doc;
+      const scope = doc.createRange();
+      scope.selectNodeContents(scopeElement);
+
+      await testMatcher(doc, scope, selector, expected);
+    });
+  }
+});
+
+async function testMatcher(
+  doc: Document,
+  scope: Range,
+  selector: CssSelector,
+  expected: string[],
+) {
+  const matcher = createCssSelectorMatcher(selector);
+  const matches = [];
+  for await (const value of matcher(scope)) matches.push(value);
+  assert.equal(matches.length, expected.length, 'Unexpected number of matches');
+  matches.forEach((match, i) => {
+    const expectedElement = evaluateXPath(doc, expected[i]);
+    // The match should be a Range that exactly contains the expected element.
+    assert.equal(match.startContainer.childNodes[match.startOffset], expectedElement);
+    assert.equal(match.endContainer, match.startContainer);
+    assert.equal(match.endOffset, match.startOffset + 1);
+  });
+}
diff --git a/yarn.lock b/yarn.lock
index 5dbf5f7..50675a0 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -7463,6 +7463,11 @@ opn@^5.5.0:
   dependencies:
     is-wsl "^1.1.0"
 
+optimal-select@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/optimal-select/-/optimal-select-4.0.1.tgz#47de7da7a39bb0949fd9af54c6f03571548f04c9"
+  integrity sha1-R959p6ObsJSf2a9UxvA1cVSPBMk=
+
 optionator@^0.8.1:
   version "0.8.3"
   resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495"