You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2020/08/28 16:43:26 UTC

[jena] branch master updated: JENA-1936: Value-equality indexing for binary datatypes

This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/master by this push:
     new 72a6ec6  JENA-1936: Value-equality indexing for binary datatypes
     new fc024ee  Merge pull request #785 from afs/byte-array
72a6ec6 is described below

commit 72a6ec6b26d6740f1096c521d22d33327899b084
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Sat Aug 22 19:19:17 2020 +0100

    JENA-1936: Value-equality indexing for binary datatypes
---
 .../apache/jena/graph/impl/LiteralLabelImpl.java   | 65 ++++++++++++++++++----
 .../java/org/apache/jena/graph/test/TestNode.java  | 38 ++++++++++---
 .../apache/jena/graph/test/TestTypedLiterals.java  | 36 ++++++++++++
 3 files changed, 122 insertions(+), 17 deletions(-)

diff --git a/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabelImpl.java b/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabelImpl.java
index bc5423c..cfbb262 100644
--- a/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabelImpl.java
+++ b/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabelImpl.java
@@ -18,6 +18,7 @@
 
 package org.apache.jena.graph.impl;
 
+import java.util.Arrays;
 import java.util.Locale ;
 import java.util.Objects ;
 
@@ -303,20 +304,64 @@ final /*public*/ class LiteralLabelImpl implements LiteralLabel {
 		return lexicalForm;
 	}
     
-    /** 
-     	Answer the value used to index this literal
-        TODO Consider pushing indexing decisions down to the datatype
-    */
+    /**
+     * Answer an object used to index this literal. This object must provide
+     * {@link Object#equals} and {@link Object#hashCode} based on values, not object
+     * instance identity.
+     */
     @Override
     public Object getIndexingValue() {
-        return
-            isXML() ? this
-            : !lang.equals( "" ) ? getLexicalForm() + "@" + lang.toLowerCase(Locale.ROOT)
-            : wellformed ? getValue()
-            : getLexicalForm() 
-            ;
+        if ( isXML() )
+            return this;
+        if ( !lang.equals("") )
+            return getLexicalForm() + "@" + lang.toLowerCase(Locale.ROOT);
+        if ( wellformed ) {
+            Object value = getValue();
+            // JENA-1936
+            // byte[] does not provide hashCode/equals based on the contents of the array.
+            if ( value instanceof byte[] )
+                return new ByteArray((byte[])value);
+            return value;
+        }
+        return getLexicalForm();
     }
 
+    /**
+     * {@code byte[]} wrapper that provides {@code hashCode} and {@code equals} based
+     * on the value of the array. This assumes the {@code byte[]} is not changed
+     * (which is the case for literals with binary value).
+     */
+    static class ByteArray {
+        private int hashCode = 0 ;
+        
+        private final byte[] bytes;
+        /*package*/ ByteArray(byte[] bytes) {
+            this.bytes = bytes;
+        }
+
+        @Override
+        public int hashCode() {
+            if ( hashCode == 0 ) {
+                final int prime = 31;
+                int result = 1;
+                hashCode = prime * result + Arrays.hashCode(bytes);
+            }
+            return hashCode;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if ( this == obj )
+                return true;
+            if ( obj == null )
+                return false;
+            if ( getClass() != obj.getClass() )
+                return false;
+            ByteArray other = (ByteArray)obj;
+            return Arrays.equals(bytes, other.bytes);
+        }
+    }
+    
 	/** 
      	Answer the language associated with this literal (the empty string if
         there's no language).
diff --git a/jena-core/src/test/java/org/apache/jena/graph/test/TestNode.java b/jena-core/src/test/java/org/apache/jena/graph/test/TestNode.java
index 1d2907f..53ea93a 100644
--- a/jena-core/src/test/java/org/apache/jena/graph/test/TestNode.java
+++ b/jena-core/src/test/java/org/apache/jena/graph/test/TestNode.java
@@ -22,6 +22,7 @@ package org.apache.jena.graph.test;
 import junit.framework.TestSuite ;
 
 import org.apache.jena.JenaRuntime ;
+import org.apache.jena.atlas.lib.Creator;
 import org.apache.jena.datatypes.RDFDatatype ;
 import org.apache.jena.datatypes.TypeMapper ;
 import org.apache.jena.datatypes.xsd.XSDDatatype ;
@@ -606,17 +607,40 @@ public class TestNode extends GraphTestBase
     }
 
     public void testGetIndexingValuePlainString()
-    { testIndexingValueLiteral( NodeCreateUtils.create( "'literally'" ) ); }
+    { testIndexingValueLiteral( ()->NodeCreateUtils.create( "'literally'" ) ); }
 
     public void testGetIndexingValueLanguagedString()
-    { testIndexingValueLiteral( NodeCreateUtils.create( "'chat'fr" ) ); }
+    { testIndexingValueLiteral( ()->NodeCreateUtils.create( "'chat'fr" ) ); }
 
     public void testGetIndexingValueXSDString()
-    { testIndexingValueLiteral( NodeCreateUtils.create( "'string'xsd:string" ) ); }
-
-    private void testIndexingValueLiteral( Node s )
-    { assertEquals( s.getLiteral().getIndexingValue(), s.getIndexingValue() ); }
-
+    { testIndexingValueLiteral( ()->NodeCreateUtils.create( "'string'xsd:string" ) ); }
+
+    // JENA-1936
+    public void testGetIndexingValueHexBinary1()
+    { testIndexingValueLiteral( ()->NodeCreateUtils.create( "''xsd:hexBinary" ) ); }
+
+    public void testGetIndexingValueHexBinary2()
+    { testIndexingValueLiteral( ()->NodeCreateUtils.create( "'ABCD'xsd:hexBinary" ) ); }
+
+    public void testGetIndexingValueBase64Binary1()
+    { testIndexingValueLiteral( ()->NodeCreateUtils.create( "''xsd:base64Binary" ) ); }
+
+    // "sure." encodes to "c3VyZS4=" 
+    public void testGetIndexingValueBase64Binary2()
+    { testIndexingValueLiteral( ()->NodeCreateUtils.create( "'c3VyZS4='xsd:base64Binary" ) ); }
+    
+    private void testIndexingValueLiteral( Creator<Node> creator) {
+        Node n1 = creator.create();
+        Node n2 = creator.create();
+        testIndexingValueLiteral(n1,n2);
+    }
+    
+    private void testIndexingValueLiteral(Node n1, Node n2) {
+        assertNotSame(n1, n2); // Test the test.
+        assertEquals(n1.getLiteral().getIndexingValue(), n2.getIndexingValue());
+        assertEquals(n1.getLiteral().getIndexingValue().hashCode(), n2.getIndexingValue().hashCode());
+    }
+    
     public void  testGetLiteralValuePlainString()
     {
         Node s = NodeCreateUtils.create( "'aString'" );
diff --git a/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java b/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java
index 91350c3..a81cc33 100644
--- a/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java
+++ b/jena-core/src/test/java/org/apache/jena/graph/test/TestTypedLiterals.java
@@ -954,6 +954,42 @@ public class TestTypedLiterals extends TestCase {
         Literal l = m.createTypedLiteral(data, XSDDatatype.XSDhexBinary);
         assertEquals("hexBinary encoding", "0FB7", l.getLexicalForm());
     }
+    
+    public void testBinaryIndexing1() {
+        Literal x1 = m.createTypedLiteral("", XSDDatatype.XSDbase64Binary);
+        Literal x2 = m.createTypedLiteral("", XSDDatatype.XSDbase64Binary);
+        assertEquals("base64Binary indexing hashCode", 
+                     x1.asNode().getIndexingValue().hashCode(),
+                     x2.asNode().getIndexingValue().hashCode());
+        assertEquals("base64Binary indexing", x1.asNode().getIndexingValue(), x2.asNode().getIndexingValue());  
+    }
+
+    public void testBinaryIndexing2() {
+        Literal x1 = m.createTypedLiteral("GpM7", XSDDatatype.XSDbase64Binary);
+        Literal x2 = m.createTypedLiteral("GpM7", XSDDatatype.XSDbase64Binary);
+        assertEquals("base64Binary indexing hashCode", 
+            x1.asNode().getIndexingValue().hashCode(),
+            x2.asNode().getIndexingValue().hashCode());
+        assertEquals("base64Binary indexing", x1.asNode().getIndexingValue(), x2.asNode().getIndexingValue());  
+    }
+    
+    public void testBinaryIndexing3() {
+        Literal x1 = m.createTypedLiteral("", XSDDatatype.XSDhexBinary);
+        Literal x2 = m.createTypedLiteral("", XSDDatatype.XSDhexBinary);
+        assertEquals("hexBinary indexing hashCode", 
+            x1.asNode().getIndexingValue().hashCode(),
+            x2.asNode().getIndexingValue().hashCode());
+        assertEquals("hexBinary indexing", x1.asNode().getIndexingValue(), x2.asNode().getIndexingValue());  
+    }
+
+    public void testBinaryIndexing4() {
+        Literal x1 = m.createTypedLiteral("AABB", XSDDatatype.XSDhexBinary);
+        Literal x2 = m.createTypedLiteral("AABB", XSDDatatype.XSDhexBinary);
+        assertEquals("hexBinary indexing hashCode", 
+            x1.asNode().getIndexingValue().hashCode(),
+            x2.asNode().getIndexingValue().hashCode());
+        assertEquals("hexBinary indexing", x1.asNode().getIndexingValue(), x2.asNode().getIndexingValue());  
+    }
 
     /** Test that XSD anyURI is not sameValueAs XSD string (Xerces returns a string as the value for both) */
     public void testXSDanyURI() {