You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ki...@apache.org on 2017/06/13 10:13:28 UTC

[1/6] jena git commit: JENA-1313: compare using a Collator when both literals are tagged with same language

Repository: jena
Updated Branches:
  refs/heads/master 69756463c -> 739a7187a


JENA-1313: compare using a Collator when both literals are tagged with same language


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/fdcfc630
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/fdcfc630
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/fdcfc630

Branch: refs/heads/master
Commit: fdcfc6307d7d0f4cbd850adeeb48d3ca9300c266
Parents: ebf0062
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Thu Apr 13 00:44:42 2017 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Thu Apr 13 00:44:42 2017 +1200

----------------------------------------------------------------------
 .../main/java/org/apache/jena/sparql/expr/NodeValue.java | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/fdcfc630/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
index c2b727e..de5f90e 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
@@ -27,8 +27,10 @@ import java.io.FileInputStream ;
 import java.io.InputStream ;
 import java.math.BigDecimal ;
 import java.math.BigInteger ;
+import java.text.Collator;
 import java.util.Calendar ;
 import java.util.Iterator ;
+import java.util.Locale;
 import java.util.Properties ;
 import java.util.ServiceLoader ;
 
@@ -784,10 +786,15 @@ public abstract class NodeValue extends ExprNode
                     return x ;
                 }
 
-                // same lang tag (case insensitive)
-                x = StrUtils.strCompare(node1.getLiteralLexicalForm(), node2.getLiteralLexicalForm()) ;
+                // same lang tag, handle collation
+                // TBD: cache locales? cache collators? pre define both/any? a simple in-memory lru-map-cache?
+                Locale desiredLocale = Locale.forLanguageTag(node1.getLiteralLanguage());
+                Collator collator = Collator.getInstance(desiredLocale);
+
+                x = collator.compare(node1.getLiteralLexicalForm(), node2.getLiteralLexicalForm());
                 if ( x != Expr.CMP_EQUAL )
                     return x ;
+
                 // Same lexical forms, same lang tag by value
                 // Try to split by syntactic lang tags.
                 x = StrUtils.strCompare(node1.getLiteralLanguage(), node2.getLiteralLanguage()) ;


[4/6] jena git commit: JENA-1313 new NodeValueSortKey, and SortKey value space, plus unit tests

Posted by ki...@apache.org.
JENA-1313 new NodeValueSortKey, and SortKey value space, plus unit tests


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/32e86847
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/32e86847
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/32e86847

Branch: refs/heads/master
Commit: 32e868476860011731e72cf672572117f817940e
Parents: 6e3be8d
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Wed May 3 16:48:41 2017 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Wed May 3 16:48:41 2017 +1200

----------------------------------------------------------------------
 .../org/apache/jena/sparql/expr/NodeValue.java  |  39 +++---
 .../sparql/expr/ValueSpaceClassification.java   |   2 +-
 .../sparql/expr/nodevalue/NodeFunctions.java    |  11 +-
 .../sparql/expr/nodevalue/NodeValueSortKey.java |  91 ++++++++++++++
 .../sparql/expr/nodevalue/NodeValueString.java  |  16 +--
 .../sparql/expr/nodevalue/NodeValueVisitor.java |   1 +
 .../sparql/function/library/FN_Collation.java   |   7 +-
 .../apache/jena/sparql/expr/TestNodeValue.java  | 124 +++++++++++++++++++
 .../expr/nodevalue/TestNodeFunctions.java       |  40 ++++++
 .../expr/nodevalue/TestNodeValueSortKey.java    |  74 +++++++++++
 .../function/library/TestFunctionCollation.java |  70 +++++++++++
 .../rewriters/NodeValueRewriter.java            |   5 +
 12 files changed, 445 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
index 47aef8d..7721910 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
@@ -248,9 +248,8 @@ public abstract class NodeValue extends ExprNode
     public static NodeValue makeString(String s)
     { return new NodeValueString(s) ; }
 
-    // instead of changing makeString, we can add another method like makeCollatedString
-    public static NodeValue makeString(String s, String collation)
-    { return new NodeValueString(s, collation) ; }
+    public static NodeValue makeSortKey(String s, String collation)
+    { return new NodeValueSortKey(s, collation) ; }
 
     public static NodeValue makeLangString(String s, String lang) 
     { return new NodeValueLang(s, lang) ; }
@@ -736,6 +735,7 @@ public abstract class NodeValue extends ExprNode
             case VSPACE_NODE :
             case VSPACE_NUM :
             case VSPACE_STRING :
+            case VSPACE_SORTKEY :
             case VSPACE_UNKNOWN :
                 // Drop through.
         }
@@ -756,18 +756,7 @@ public abstract class NodeValue extends ExprNode
             case VSPACE_NUM:        return XSDFuncOp.compareNumeric(nv1, nv2) ;
             case VSPACE_STRING:
             {
-                // Not sure if this would fit in XSDFuncOp, maybe passing a locale string or Collator object
-                // to compareString
-                int cmp = 0;
-                String c1 = nv1.getCollation();
-                String c2 = nv2.getCollation();
-                if (c1 != null && c2 != null && c1.equals(c2)) {
-                    Locale desiredLocale = Locale.forLanguageTag(c1);
-                    Collator collator = Collator.getInstance(desiredLocale);
-                    cmp = collator.compare(nv1.getString(), nv2.getString());
-                } else {
-                    cmp = XSDFuncOp.compareString(nv1, nv2) ;
-                }
+                int cmp = XSDFuncOp.compareString(nv1, nv2) ;
                 
                 // Split plain literals and xsd:strings for sorting purposes.
                 if ( ! sortOrderingCompare )
@@ -783,6 +772,22 @@ public abstract class NodeValue extends ExprNode
                     return Expr.CMP_GREATER ;
                 return Expr.CMP_EQUAL;  // Both plain or both xsd:string.
             }
+            case VSPACE_SORTKEY :
+            {
+                int cmp = 0;
+                String c1 = nv1.getCollation();
+                String c2 = nv2.getCollation();
+                if (c1 != null && c2 != null && c1.equals(c2)) {
+                    // locales are parsed. Here we could think about caching if necessary
+                    Locale desiredLocale = Locale.forLanguageTag(c1);
+                    // collators are already stored in a concurrent map by the JVM, with <locale, softref<collator>>
+                    Collator collator = Collator.getInstance(desiredLocale);
+                    cmp = collator.compare(nv1.getString(), nv2.getString());
+                } else {
+                    cmp = XSDFuncOp.compareString(nv1, nv2) ;
+                }
+                return cmp;
+            }
             case VSPACE_BOOLEAN:    return XSDFuncOp.compareBoolean(nv1, nv2) ;
             
             case VSPACE_LANG:
@@ -884,6 +889,7 @@ public abstract class NodeValue extends ExprNode
             return VSPACE_DATE ;
         
         if ( nv.isString())         return VSPACE_STRING ;
+        if ( nv.isSortKey())        return VSPACE_SORTKEY ;
         if ( nv.isBoolean())        return VSPACE_BOOLEAN ;
         
         if ( ! nv.isLiteral() )     return VSPACE_NODE ;
@@ -927,6 +933,7 @@ public abstract class NodeValue extends ExprNode
     public boolean isBoolean()      { return false ; } 
     public boolean isString()       { return false ; } 
     public boolean isLangString()   { return false ; }
+    public boolean isSortKey()      { return false ; }
 
     public boolean isNumber()       { return false ; }
     public boolean isInteger()      { return false ; }
@@ -971,7 +978,7 @@ public abstract class NodeValue extends ExprNode
     public boolean     getBoolean()     { raise(new ExprEvalTypeException("Not a boolean: "+this)) ; return false ; }
     public String      getString()      { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }
     public String      getLang()        { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }
-    public String      getCollation()   { raise(new ExprEvalTypeException("Not a collation: "+this)) ; return null ; }
+    public String      getCollation()   { raise(new ExprEvalTypeException("Not a sort key: "+this)) ; return null ; }
 
     public BigInteger  getInteger()     { raise(new ExprEvalTypeException("Not an integer: "+this)) ; return null ; }
     public BigDecimal  getDecimal()     { raise(new ExprEvalTypeException("Not a decimal: "+this)) ; return null ; }

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/main/java/org/apache/jena/sparql/expr/ValueSpaceClassification.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/ValueSpaceClassification.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/ValueSpaceClassification.java
index 815a0ac..218cdd0 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/ValueSpaceClassification.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/ValueSpaceClassification.java
@@ -33,7 +33,7 @@ public enum ValueSpaceClassification {
     VSPACE_G_MONTH,    
     VSPACE_G_DAY,
     
-    VSPACE_STRING, VSPACE_LANG,
+    VSPACE_STRING, VSPACE_LANG, VSPACE_SORTKEY,
     VSPACE_BOOLEAN,
     VSPACE_UNKNOWN,
     VSPACE_DIFFERENT

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
index 1ad0c35..8aff09c 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
@@ -188,11 +188,6 @@ public class NodeFunctions {
         return NodeValue.makeString(str(nv.asNode())) ;
     }
 
-    // or instead or can create another utility method like strCollation(NodeValue, String)
-    public static NodeValue str(NodeValue nv, String collation) {
-        return NodeValue.makeString(str(nv.asNode()), collation) ;
-    }
-
     public static String str(Node node) {
         if ( node.isLiteral() )
             return node.getLiteral().getLexicalForm() ;
@@ -207,6 +202,12 @@ public class NodeFunctions {
         return "[undef]" ;
     }
 
+    // -------- sort key (collation)
+
+    public static NodeValue sortKey(NodeValue nv, String collation) {
+        return NodeValue.makeSortKey(str(nv.asNode()), collation) ;
+    }
+
     // -------- datatype
     public static NodeValue datatype(NodeValue nv) {
         return NodeValue.makeNode(datatype(nv.asNode())) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java
new file mode 100644
index 0000000..60204c7
--- /dev/null
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.sparql.expr.nodevalue;
+
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.sparql.expr.NodeValue;
+import org.apache.jena.sparql.util.FmtUtils;
+
+/**
+ * A {@link NodeValue} that supports collation value for a string. This allows query values
+ * to be sorted following rules for a specific collation.
+ */
+public class NodeValueSortKey extends NodeValue {
+
+    /**
+     * Node value text.
+     */
+    private final String string;
+    /**
+     * Node value collation language tag (e.g. fi, pt-BR, en, en-CA, etc).
+     */
+    private final String collation;
+
+    public NodeValueSortKey(final String string, final String collation) {
+        this.string = string;
+        this.collation = collation;
+    }
+
+    public NodeValueSortKey(final String string, final String collation, Node n) {
+        super(n);
+        this.string = string;
+        this.collation = collation;
+    }
+
+    @Override
+    public boolean isSortKey() {
+        return Boolean.TRUE;
+    }
+
+    @Override
+    public String getString() {
+        return string;
+    }
+
+    @Override
+    public String asString() {
+        return string;
+    }
+
+    @Override
+    public String getCollation() {
+        return collation;
+    }
+
+    @Override
+    protected Node makeNode() {
+        return NodeFactory.createLiteral(string);
+    }
+
+    @Override
+    public void visit(NodeValueVisitor visitor) {
+        visitor.visit(this);
+    }
+
+    @Override
+    public String toString()
+    { 
+        if (getNode() != null) {
+            return FmtUtils.stringForNode(getNode()) ;
+        }
+        return "'"+getString()+"'";
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
index ac4377b..a921c43 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
@@ -29,14 +29,9 @@ public class NodeValueString extends NodeValue
     // A plain string, with no language tag, or an xsd:string.
     
     private String string ; 
-    // Here we are adding a new feature to a NodeValueString. Instead, we could try to create a new type
-    // that extends NodeValue. e.g. NodeValueCollatedString, moving this property and half constructors away
-    private final String collation;
-
-    public NodeValueString(String str)         { this(str, (String) null); }
-    public NodeValueString(String str, String collation)         { string = str ; this.collation = collation; }
-    public NodeValueString(String str, Node n) { this(str, n, (String) null); }
-    public NodeValueString(String str, Node n, String collation) { super(n) ; string = str ; this.collation = collation; }
+    
+    public NodeValueString(String str)         { string = str ; } 
+    public NodeValueString(String str, Node n) { super(n) ; string = str ; }
     
     @Override
     public boolean isString() { return true ; }
@@ -46,10 +41,7 @@ public class NodeValueString extends NodeValue
 
     @Override
     public String asString() { return string ; }
-
-    @Override
-    public String getCollation() { return collation ; }
-
+    
     @Override
     public String toString()
     { 

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueVisitor.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueVisitor.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueVisitor.java
index 992a5c3..5f5bbba 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueVisitor.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueVisitor.java
@@ -30,6 +30,7 @@ public interface NodeValueVisitor
     public void visit(NodeValueNode nv) ;
     public void visit(NodeValueLang nv) ;
     public void visit(NodeValueString nv) ;
+    public void visit(NodeValueSortKey nv) ;
     public void visit(NodeValueDT nv) ;
 //    public void visit(NodeValueTime nv) ;
 	public void visit(NodeValueDuration nodeValueDuration);

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java b/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java
index 639ee98..4df2f47 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java
@@ -19,11 +19,14 @@
 package org.apache.jena.sparql.function.library;
 
 import java.text.Collator;
+import java.text.RuleBasedCollator;
 import java.util.Locale;
 
+import org.apache.jena.sparql.expr.Expr;
 import org.apache.jena.sparql.expr.NodeValue;
 import org.apache.jena.sparql.expr.nodevalue.NodeFunctions;
 import org.apache.jena.sparql.expr.nodevalue.NodeValueLang;
+import org.apache.jena.sparql.expr.nodevalue.NodeValueSortKey;
 import org.apache.jena.sparql.function.FunctionBase2;
 
 /**
@@ -41,6 +44,8 @@ import org.apache.jena.sparql.function.FunctionBase2;
  * expr is a {@link NodeValueLang} (e.g. rendered from "Casa"@pt), the language tag will
  * be discarded, and only the literal string value (i.e. Casa) will be taken into account
  * for this function.</p>
+ *
+ * @see {@link NodeValueSortKey}
  */
 public class FN_Collation extends FunctionBase2 {
 
@@ -53,7 +58,7 @@ public class FN_Collation extends FunctionBase2 {
         // retrieve collation value
         String collation = NodeFunctions.str(v1.asNode());
         // return a NodeValue that contains the v2 literal string, plus the given collation
-        return NodeFunctions.str(v2, collation);
+        return NodeFunctions.sortKey(v2, collation);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestNodeValue.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestNodeValue.java b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestNodeValue.java
index a742278..a93fc04 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestNodeValue.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestNodeValue.java
@@ -20,7 +20,10 @@ package org.apache.jena.sparql.expr;
 
 import java.math.BigDecimal ;
 import java.util.Calendar ;
+import java.util.Comparator;
 import java.util.GregorianCalendar ;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.TimeZone ;
 
 import org.apache.jena.JenaRuntime ;
@@ -28,6 +31,14 @@ import org.apache.jena.atlas.junit.BaseTest ;
 import org.apache.jena.datatypes.xsd.XSDDatatype ;
 import org.apache.jena.graph.Node ;
 import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.query.Query;
+import org.apache.jena.query.QueryExecution;
+import org.apache.jena.query.QueryExecutionFactory;
+import org.apache.jena.query.QueryFactory;
+import org.apache.jena.query.QuerySolution;
+import org.apache.jena.query.ResultSet;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
 import org.apache.jena.sparql.expr.nodevalue.XSDFuncOp ;
 import org.apache.jena.sparql.util.NodeFactoryExtra ;
 import org.junit.AfterClass ;
@@ -746,6 +757,119 @@ public class TestNodeValue extends BaseTest
         assertEquals("Print form mismatch", rightAnswer, actualStr);
     }
 
+    @Test
+    public void testNodeSortKey1() {
+        NodeValue nv = NodeValue.makeSortKey("Wagen", "de");
+        assertTrue("Not a sort key: " + nv, nv.isSortKey());
+        String actualStr = nv.asQuotedString();
+        String rightAnswer = "\"Wagen\"";
+        assertEquals("Print form mismatch", rightAnswer, actualStr);
+    }
+
+    @Test
+    public void testNodeSortKey2() {
+        final String[] unordered =
+                {"Broager", "Åkirkeby", "Børkop", "Ærøskøbing", "Brædstrup", "Wandsbek"};
+        final String[] ordered =
+                {"'Broager'", "'Brædstrup'", "'Børkop'", "'Wandsbek'", "'Ærøskøbing'", "'Åkirkeby'"};
+        // tests collation sort order for Danish
+        final String collation = "da";
+        List<NodeValue> nodeValues = new LinkedList<>();
+        for (String string : unordered) {
+            nodeValues.add(NodeValue.makeSortKey(string, collation));
+        }
+        nodeValues.sort(new Comparator<NodeValue>() {
+            @Override
+            public int compare(NodeValue o1, NodeValue o2) {
+                return NodeValue.compare(o1, o2);
+            }
+        });
+        List<String> result = new LinkedList<>();
+        for (NodeValue nv : nodeValues) {
+            String s = nv.toString();
+            result.add(s);
+        }
+        assertArrayEquals(ordered, result.toArray(new String[0]));
+    }
+
+    @Test
+    public void testNodeSortKey3() {
+        final String[] unordered = new String[]
+                {"Broager", "Åkirkeby", "Børkop", "Ærøskøbing", "Brædstrup", "Wandsbek"};
+        final String[] ordered = new String[]
+                {"'Ærøskøbing'", "'Åkirkeby'", "'Brædstrup'", "'Broager'", "'Børkop'", "'Wandsbek'"};
+        // tests collation sort order with Danish words, but New Zealand English collation rules
+        final String collation = "en-NZ";
+        List<NodeValue> nodeValues = new LinkedList<>();
+        for (String string : unordered) {
+            nodeValues.add(NodeValue.makeSortKey(string, collation));
+        }
+        nodeValues.sort(new Comparator<NodeValue>() {
+            @Override
+            public int compare(NodeValue o1, NodeValue o2) {
+                return NodeValue.compare(o1, o2);
+            }
+        });
+        List<String> result = new LinkedList<>();
+        for (NodeValue nv : nodeValues) {
+            String s = nv.toString();
+            result.add(s);
+        }
+        assertArrayEquals(ordered, result.toArray(new String[0]));
+    }
+
+    @Test
+    public void testNodeSortKey4() {
+        // Collation sort order for Finnish
+        final String collation = "fi";
+        String[] ordered = new String[]
+                {"tsahurin kieli", "tšekin kieli", "tulun kieli", "töyhtöhyyppä"};
+        // Query String
+        final String queryString = "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n" +
+                "PREFIX arq: <http://jena.apache.org/ARQ/function#>\n" +
+                "SELECT ?label WHERE {\n" +
+                "   VALUES ?label { \"tulun kieli\"@es \"tšekin kieli\" \"tsahurin kieli\"@en \"töyhtöhyyppä\"@fi }\n" +
+                "}\n" +
+                "ORDER BY arq:collation(\"" + collation + "\", ?label)";
+        Model model = ModelFactory.createDefaultModel();
+        Query query = QueryFactory.create(queryString);
+        List<String> result = new LinkedList<>();
+        try (QueryExecution qExec = QueryExecutionFactory.create(query, model)) {
+            ResultSet results = qExec.execSelect();
+            while (results.hasNext()) {
+                QuerySolution solution = results.nextSolution();
+                result.add(solution.getLiteral(solution.varNames().next()).getLexicalForm());
+            }
+        }
+        assertArrayEquals(ordered, result.toArray(new String[0]));
+    }
+
+    @Test
+    public void testNodeSortKey5() {
+     // Collation sort order for English from Belize
+        final String collation = "en-BZ";
+        String[] ordered = new String[]
+                {"töyhtöhyyppä", "tsahurin kieli", "tšekin kieli", "tulun kieli"};
+        // Query String
+        final String queryString = "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n" +
+                "PREFIX arq: <http://jena.apache.org/ARQ/function#>\n" +
+                "SELECT ?label WHERE {\n" +
+                "   VALUES ?label { \"tulun kieli\"@es \"tšekin kieli\" \"tsahurin kieli\"@en \"töyhtöhyyppä\"@fi }\n" +
+                "}\n" +
+                "ORDER BY arq:collation(\"" + collation + "\", ?label)";
+        Model model = ModelFactory.createDefaultModel();
+        Query query = QueryFactory.create(queryString);
+        List<String> result = new LinkedList<>();
+        try (QueryExecution qExec = QueryExecutionFactory.create(query, model)) {
+            ResultSet results = qExec.execSelect();
+            while (results.hasNext()) {
+                QuerySolution solution = results.nextSolution();
+                result.add(solution.getLiteral(solution.varNames().next()).getLexicalForm());
+            }
+        }
+        assertArrayEquals(ordered, result.toArray(new String[0]));
+    }
+
     // TODO testSameValueDecimal tests
     // TODO sameValueAs mixed tests
 

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeFunctions.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeFunctions.java b/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeFunctions.java
new file mode 100644
index 0000000..b9de6d6
--- /dev/null
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeFunctions.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.sparql.expr.nodevalue;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.jena.sparql.expr.NodeValue;
+import org.junit.Test;
+
+/**
+ * Tests for {@link NodeFunctions}.
+ */
+public class TestNodeFunctions {
+
+    @Test
+    public void testSortKeyNodeValue() {
+        NodeValue noveValue = NodeValue.makeString("Casa");
+        NodeValue nv = NodeFunctions.sortKey(noveValue, "es");
+        assertTrue(nv instanceof NodeValueSortKey);
+        assertEquals("es", ((NodeValueSortKey) nv).getCollation());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java b/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java
new file mode 100644
index 0000000..54d3dfe
--- /dev/null
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.sparql.expr.nodevalue;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.jena.graph.Node;
+import org.junit.Test;
+
+/**
+ * Tests for {@link NodeValueSortKey}.
+ */
+public class TestNodeValueSortKey {
+
+    @Test
+    public void testCreateNodeValueSortKey() {
+        NodeValueSortKey nv = new NodeValueSortKey("", null);
+        assertTrue(nv.isSortKey());
+    }
+
+    @Test
+    public void testCreateNodeValueSortKeyWithNode() {
+        Node n = Node.ANY;
+        NodeValueSortKey nv = new NodeValueSortKey("", null, n);
+        assertEquals(n, nv.getNode());
+    }
+
+    @Test
+    public void testGetCollation() {
+        NodeValueSortKey nv = new NodeValueSortKey("", null);
+        assertNull(nv.getCollation());
+        nv = new NodeValueSortKey("", "fi");
+        assertEquals("fi", nv.getCollation());
+    }
+
+    @Test
+    public void testGetString() {
+        NodeValueSortKey nv = new NodeValueSortKey("Casa", "pt-BR");
+        assertEquals("Casa", nv.asString());
+        assertEquals("Casa", nv.getString());
+    }
+
+    @Test
+    public void testMakeNode() {
+        NodeValueSortKey nv = new NodeValueSortKey("Casa", "pt-BR");
+        Node n = nv.makeNode();
+        assertTrue(n.isLiteral());
+        assertEquals("Casa", n.getLiteral().toString());
+    }
+
+    @Test
+    public void testToString() {
+        NodeValueSortKey nv = new NodeValueSortKey("Tutte", "it");
+        assertEquals("'Tutte'", nv.toString());
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-arq/src/test/java/org/apache/jena/sparql/function/library/TestFunctionCollation.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/function/library/TestFunctionCollation.java b/jena-arq/src/test/java/org/apache/jena/sparql/function/library/TestFunctionCollation.java
new file mode 100644
index 0000000..7d69cd2
--- /dev/null
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/function/library/TestFunctionCollation.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.sparql.function.library;
+
+import static org.junit.Assert.assertArrayEquals;
+
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.jena.sparql.expr.NodeValue;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for {@link FN_Collation}.
+ */
+public class TestFunctionCollation {
+
+    private FN_Collation function = null;
+
+    @Before
+    public void setUp() {
+        function = new FN_Collation();
+    }
+
+    @Test
+    public void testFunctionCollationExec() {
+        NodeValue collation = NodeValue.makeString("fi");
+        
+        final String[] unordered = new String[]
+                {"tšekin kieli", "tulun kieli", "töyhtöhyyppä", "tsahurin kieli", "tsahurin kieli", "tulun kieli"};
+        String[] ordered = new String[]
+                {"'tsahurin kieli'", "'tsahurin kieli'", "'tšekin kieli'",
+                        "'tulun kieli'", "'tulun kieli'", "'töyhtöhyyppä'"};
+        // tests collation sort order with Danish words, but New Zealand English collation rules
+        List<NodeValue> nodeValues = new LinkedList<>();
+        for (String string : unordered) {
+            nodeValues.add(function.exec(collation, NodeValue.makeString(string)));
+        }
+        nodeValues.sort(new Comparator<NodeValue>() {
+            @Override
+            public int compare(NodeValue o1, NodeValue o2) {
+                return NodeValue.compare(o1, o2);
+            }
+        });
+        List<String> result = new LinkedList<>();
+        for (NodeValue nv : nodeValues) {
+            String s = nv.toString();
+            result.add(s);
+        }
+        assertArrayEquals(ordered, result.toArray(new String[0]));
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/32e86847/jena-extras/jena-querybuilder/src/main/java/org/apache/jena/arq/querybuilder/rewriters/NodeValueRewriter.java
----------------------------------------------------------------------
diff --git a/jena-extras/jena-querybuilder/src/main/java/org/apache/jena/arq/querybuilder/rewriters/NodeValueRewriter.java b/jena-extras/jena-querybuilder/src/main/java/org/apache/jena/arq/querybuilder/rewriters/NodeValueRewriter.java
index a3f33cd..d11ddc8 100644
--- a/jena-extras/jena-querybuilder/src/main/java/org/apache/jena/arq/querybuilder/rewriters/NodeValueRewriter.java
+++ b/jena-extras/jena-querybuilder/src/main/java/org/apache/jena/arq/querybuilder/rewriters/NodeValueRewriter.java
@@ -74,6 +74,11 @@ class NodeValueRewriter extends AbstractRewriter<NodeValue> implements
 		push(new NodeValueString(nv.getString(), changeNode(nv.getNode())));
 	}
 
+    @Override
+    public void visit(NodeValueSortKey nv) {
+        push(new NodeValueString(nv.getString(), changeNode(nv.getNode())));
+    }
+
 	@Override
 	public void visit(NodeValueDT nv) {
 		push(new NodeValueDT(nv.getDateTime().toXMLFormat(),


[6/6] jena git commit: JENA-1313 Merge remote-tracking branch 'upstream/pr/237'

Posted by ki...@apache.org.
JENA-1313 Merge remote-tracking branch 'upstream/pr/237'

This closes #237


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/739a7187
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/739a7187
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/739a7187

Branch: refs/heads/master
Commit: 739a7187ab6b14b312e069221efb8824b9f85067
Parents: 6975646 3b12f83
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Tue Jun 13 22:12:15 2017 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Tue Jun 13 22:12:15 2017 +1200

----------------------------------------------------------------------
 .../org/apache/jena/sparql/expr/NodeValue.java  |  17 ++-
 .../sparql/expr/ValueSpaceClassification.java   |   2 +-
 .../sparql/expr/nodevalue/NodeFunctions.java    |   6 +
 .../sparql/expr/nodevalue/NodeValueSortKey.java | 118 ++++++++++++++++++
 .../sparql/expr/nodevalue/NodeValueVisitor.java |   1 +
 .../jena/sparql/function/StandardFunctions.java |   3 +
 .../sparql/function/library/FN_Collation.java   |  64 ++++++++++
 .../apache/jena/sparql/expr/TestNodeValue.java  | 124 +++++++++++++++++++
 .../expr/nodevalue/TestNodeFunctions.java       |  40 ++++++
 .../expr/nodevalue/TestNodeValueSortKey.java    |  86 +++++++++++++
 .../function/library/TestFunctionCollation.java |  70 +++++++++++
 .../rewriters/NodeValueRewriter.java            |   5 +
 12 files changed, 533 insertions(+), 3 deletions(-)
----------------------------------------------------------------------



[3/6] jena git commit: Collating data via custom function

Posted by ki...@apache.org.
Collating data via custom function


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/6e3be8d7
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/6e3be8d7
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/6e3be8d7

Branch: refs/heads/master
Commit: 6e3be8d728aca68010273080e804ef7fdaf2fd2a
Parents: 3e9a2de
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Sun Apr 30 01:54:40 2017 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Sun Apr 30 13:25:00 2017 +1200

----------------------------------------------------------------------
 .../org/apache/jena/sparql/expr/NodeValue.java  | 24 +++++++-
 .../sparql/expr/nodevalue/NodeFunctions.java    |  5 ++
 .../sparql/expr/nodevalue/NodeValueString.java  | 16 ++++--
 .../jena/sparql/function/StandardFunctions.java |  3 +
 .../sparql/function/library/FN_Collation.java   | 59 ++++++++++++++++++++
 5 files changed, 100 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/6e3be8d7/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
index c2b727e..47aef8d 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
@@ -27,8 +27,10 @@ import java.io.FileInputStream ;
 import java.io.InputStream ;
 import java.math.BigDecimal ;
 import java.math.BigInteger ;
+import java.text.Collator;
 import java.util.Calendar ;
 import java.util.Iterator ;
+import java.util.Locale;
 import java.util.Properties ;
 import java.util.ServiceLoader ;
 
@@ -243,9 +245,13 @@ public abstract class NodeValue extends ExprNode
     public static NodeValue makeDouble(double d)
     { return new NodeValueDouble(d) ; }
 
-    public static NodeValue makeString(String s) 
+    public static NodeValue makeString(String s)
     { return new NodeValueString(s) ; }
 
+    // instead of changing makeString, we can add another method like makeCollatedString
+    public static NodeValue makeString(String s, String collation)
+    { return new NodeValueString(s, collation) ; }
+
     public static NodeValue makeLangString(String s, String lang) 
     { return new NodeValueLang(s, lang) ; }
 
@@ -750,7 +756,18 @@ public abstract class NodeValue extends ExprNode
             case VSPACE_NUM:        return XSDFuncOp.compareNumeric(nv1, nv2) ;
             case VSPACE_STRING:
             {
-                int cmp = XSDFuncOp.compareString(nv1, nv2) ;
+                // Not sure if this would fit in XSDFuncOp, maybe passing a locale string or Collator object
+                // to compareString
+                int cmp = 0;
+                String c1 = nv1.getCollation();
+                String c2 = nv2.getCollation();
+                if (c1 != null && c2 != null && c1.equals(c2)) {
+                    Locale desiredLocale = Locale.forLanguageTag(c1);
+                    Collator collator = Collator.getInstance(desiredLocale);
+                    cmp = collator.compare(nv1.getString(), nv2.getString());
+                } else {
+                    cmp = XSDFuncOp.compareString(nv1, nv2) ;
+                }
                 
                 // Split plain literals and xsd:strings for sorting purposes.
                 if ( ! sortOrderingCompare )
@@ -954,7 +971,8 @@ public abstract class NodeValue extends ExprNode
     public boolean     getBoolean()     { raise(new ExprEvalTypeException("Not a boolean: "+this)) ; return false ; }
     public String      getString()      { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }
     public String      getLang()        { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }
-    
+    public String      getCollation()   { raise(new ExprEvalTypeException("Not a collation: "+this)) ; return null ; }
+
     public BigInteger  getInteger()     { raise(new ExprEvalTypeException("Not an integer: "+this)) ; return null ; }
     public BigDecimal  getDecimal()     { raise(new ExprEvalTypeException("Not a decimal: "+this)) ; return null ; }
     public float       getFloat()       { raise(new ExprEvalTypeException("Not a float: "+this)) ; return Float.NaN ; }

http://git-wip-us.apache.org/repos/asf/jena/blob/6e3be8d7/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
index 3aa8db3..1ad0c35 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeFunctions.java
@@ -188,6 +188,11 @@ public class NodeFunctions {
         return NodeValue.makeString(str(nv.asNode())) ;
     }
 
+    // or instead or can create another utility method like strCollation(NodeValue, String)
+    public static NodeValue str(NodeValue nv, String collation) {
+        return NodeValue.makeString(str(nv.asNode()), collation) ;
+    }
+
     public static String str(Node node) {
         if ( node.isLiteral() )
             return node.getLiteral().getLexicalForm() ;

http://git-wip-us.apache.org/repos/asf/jena/blob/6e3be8d7/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
index a921c43..ac4377b 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueString.java
@@ -29,9 +29,14 @@ public class NodeValueString extends NodeValue
     // A plain string, with no language tag, or an xsd:string.
     
     private String string ; 
-    
-    public NodeValueString(String str)         { string = str ; } 
-    public NodeValueString(String str, Node n) { super(n) ; string = str ; }
+    // Here we are adding a new feature to a NodeValueString. Instead, we could try to create a new type
+    // that extends NodeValue. e.g. NodeValueCollatedString, moving this property and half constructors away
+    private final String collation;
+
+    public NodeValueString(String str)         { this(str, (String) null); }
+    public NodeValueString(String str, String collation)         { string = str ; this.collation = collation; }
+    public NodeValueString(String str, Node n) { this(str, n, (String) null); }
+    public NodeValueString(String str, Node n, String collation) { super(n) ; string = str ; this.collation = collation; }
     
     @Override
     public boolean isString() { return true ; }
@@ -41,7 +46,10 @@ public class NodeValueString extends NodeValue
 
     @Override
     public String asString() { return string ; }
-    
+
+    @Override
+    public String getCollation() { return collation ; }
+
     @Override
     public String toString()
     { 

http://git-wip-us.apache.org/repos/asf/jena/blob/6e3be8d7/jena-arq/src/main/java/org/apache/jena/sparql/function/StandardFunctions.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/function/StandardFunctions.java b/jena-arq/src/main/java/org/apache/jena/sparql/function/StandardFunctions.java
index db38377..145b1b0 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/function/StandardFunctions.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/function/StandardFunctions.java
@@ -95,6 +95,9 @@ public class StandardFunctions
         addCastTemporal(registry, XSDDatatype.XSDgMonthDay) ;
         addCastTemporal(registry, XSDDatatype.XSDgDay) ;
 
+        // Using ARQ prefix http://jena.apache.org/ARQ/function#
+        add(registry, ARQConstants.ARQFunctionLibraryURI+"collation",        FN_Collation.class) ;
+
         //TODO op:numeric-greater-than etc.
         //TODO sparql:* for all the SPARQL builtins.
         

http://git-wip-us.apache.org/repos/asf/jena/blob/6e3be8d7/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java b/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java
new file mode 100644
index 0000000..639ee98
--- /dev/null
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/function/library/FN_Collation.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.sparql.function.library;
+
+import java.text.Collator;
+import java.util.Locale;
+
+import org.apache.jena.sparql.expr.NodeValue;
+import org.apache.jena.sparql.expr.nodevalue.NodeFunctions;
+import org.apache.jena.sparql.expr.nodevalue.NodeValueLang;
+import org.apache.jena.sparql.function.FunctionBase2;
+
+/**
+ * Collation function. Takes two parameters. First is the collation, second the
+ * Node, that is an {@link Expr} (ExprVar, ExprFunctionN, NodeValue, etc).
+ *
+ * <p>Called with a prefix @{code p}, e.g. {@code ORDER BY p:collation("fi", ?label);}.
+ * The first argument (in this case, "fi") is then resolved to a {@link Locale}, that is
+ * used to build a {@link Collator}. If a locale does not match any known collator, then
+ * a rule based collator ({@link RuleBasedCollator}) is returned, but with no rules,
+ * returning values in natural order, not applying any specific collation order.</p>
+ *
+ * <p>The second argument, which is an {@link Expr}, will have its literal string value
+ * extracted (or will raise an error if it is not possible). This means that if the
+ * expr is a {@link NodeValueLang} (e.g. rendered from "Casa"@pt), the language tag will
+ * be discarded, and only the literal string value (i.e. Casa) will be taken into account
+ * for this function.</p>
+ */
+public class FN_Collation extends FunctionBase2 {
+
+    public FN_Collation() {
+        super();
+    }
+
+    @Override
+    public NodeValue exec(NodeValue v1, NodeValue v2) {
+        // retrieve collation value
+        String collation = NodeFunctions.str(v1.asNode());
+        // return a NodeValue that contains the v2 literal string, plus the given collation
+        return NodeFunctions.str(v2, collation);
+    }
+
+}


[2/6] jena git commit: Revert "JENA-1313: compare using a Collator when both literals are tagged with same language"

Posted by ki...@apache.org.
Revert "JENA-1313: compare using a Collator when both literals are tagged with same language"

This reverts commit fdcfc6307d7d0f4cbd850adeeb48d3ca9300c266.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/3e9a2de0
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/3e9a2de0
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/3e9a2de0

Branch: refs/heads/master
Commit: 3e9a2de0877fde2dac724585f1aa09fc868993d3
Parents: fdcfc63
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Sun Apr 30 13:06:06 2017 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Sun Apr 30 13:06:06 2017 +1200

----------------------------------------------------------------------
 .../main/java/org/apache/jena/sparql/expr/NodeValue.java | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/3e9a2de0/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
index de5f90e..c2b727e 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
@@ -27,10 +27,8 @@ import java.io.FileInputStream ;
 import java.io.InputStream ;
 import java.math.BigDecimal ;
 import java.math.BigInteger ;
-import java.text.Collator;
 import java.util.Calendar ;
 import java.util.Iterator ;
-import java.util.Locale;
 import java.util.Properties ;
 import java.util.ServiceLoader ;
 
@@ -786,15 +784,10 @@ public abstract class NodeValue extends ExprNode
                     return x ;
                 }
 
-                // same lang tag, handle collation
-                // TBD: cache locales? cache collators? pre define both/any? a simple in-memory lru-map-cache?
-                Locale desiredLocale = Locale.forLanguageTag(node1.getLiteralLanguage());
-                Collator collator = Collator.getInstance(desiredLocale);
-
-                x = collator.compare(node1.getLiteralLexicalForm(), node2.getLiteralLexicalForm());
+                // same lang tag (case insensitive)
+                x = StrUtils.strCompare(node1.getLiteralLexicalForm(), node2.getLiteralLexicalForm()) ;
                 if ( x != Expr.CMP_EQUAL )
                     return x ;
-
                 // Same lexical forms, same lang tag by value
                 // Try to split by syntactic lang tags.
                 x = StrUtils.strCompare(node1.getLiteralLanguage(), node2.getLiteralLanguage()) ;


[5/6] jena git commit: Make NodeSortValueKey comparable, and add comments

Posted by ki...@apache.org.
Make NodeSortValueKey comparable, and add comments


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/3b12f834
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/3b12f834
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/3b12f834

Branch: refs/heads/master
Commit: 3b12f8348c2bbedea67080fa3aecc0fd8258a200
Parents: 32e8684
Author: Bruno P. Kinoshita <br...@yahoo.com.br>
Authored: Sat May 13 11:40:29 2017 +1200
Committer: Bruno P. Kinoshita <br...@yahoo.com.br>
Committed: Sat May 13 11:40:29 2017 +1200

----------------------------------------------------------------------
 .../org/apache/jena/sparql/expr/NodeValue.java  | 18 ++----------
 .../sparql/expr/nodevalue/NodeValueSortKey.java | 31 ++++++++++++++++++--
 .../expr/nodevalue/TestNodeValueSortKey.java    | 12 ++++++++
 3 files changed, 44 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/3b12f834/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
index 7721910..e84a6b5 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java
@@ -27,10 +27,8 @@ import java.io.FileInputStream ;
 import java.io.InputStream ;
 import java.math.BigDecimal ;
 import java.math.BigInteger ;
-import java.text.Collator;
 import java.util.Calendar ;
 import java.util.Iterator ;
-import java.util.Locale;
 import java.util.Properties ;
 import java.util.ServiceLoader ;
 
@@ -774,19 +772,10 @@ public abstract class NodeValue extends ExprNode
             }
             case VSPACE_SORTKEY :
             {
-                int cmp = 0;
-                String c1 = nv1.getCollation();
-                String c2 = nv2.getCollation();
-                if (c1 != null && c2 != null && c1.equals(c2)) {
-                    // locales are parsed. Here we could think about caching if necessary
-                    Locale desiredLocale = Locale.forLanguageTag(c1);
-                    // collators are already stored in a concurrent map by the JVM, with <locale, softref<collator>>
-                    Collator collator = Collator.getInstance(desiredLocale);
-                    cmp = collator.compare(nv1.getString(), nv2.getString());
-                } else {
-                    cmp = XSDFuncOp.compareString(nv1, nv2) ;
+                if (!(nv1 instanceof NodeValueSortKey) || !(nv2 instanceof NodeValueSortKey)) {
+                    raise(new ExprNotComparableException("Can't compare (not node value sort keys) "+nv1+" and "+nv2)) ;
                 }
-                return cmp;
+                return ((NodeValueSortKey) nv1).compareTo((NodeValueSortKey) nv2);
             }
             case VSPACE_BOOLEAN:    return XSDFuncOp.compareBoolean(nv1, nv2) ;
             
@@ -978,7 +967,6 @@ public abstract class NodeValue extends ExprNode
     public boolean     getBoolean()     { raise(new ExprEvalTypeException("Not a boolean: "+this)) ; return false ; }
     public String      getString()      { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }
     public String      getLang()        { raise(new ExprEvalTypeException("Not a string: "+this)) ; return null ; }
-    public String      getCollation()   { raise(new ExprEvalTypeException("Not a sort key: "+this)) ; return null ; }
 
     public BigInteger  getInteger()     { raise(new ExprEvalTypeException("Not an integer: "+this)) ; return null ; }
     public BigDecimal  getDecimal()     { raise(new ExprEvalTypeException("Not a decimal: "+this)) ; return null ; }

http://git-wip-us.apache.org/repos/asf/jena/blob/3b12f834/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java
index 60204c7..e14adb9 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueSortKey.java
@@ -18,8 +18,12 @@
 
 package org.apache.jena.sparql.expr.nodevalue;
 
+import java.text.Collator;
+import java.util.Locale;
+
 import org.apache.jena.graph.Node;
 import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.graph.Node_Literal;
 import org.apache.jena.sparql.expr.NodeValue;
 import org.apache.jena.sparql.util.FmtUtils;
 
@@ -27,7 +31,7 @@ import org.apache.jena.sparql.util.FmtUtils;
  * A {@link NodeValue} that supports collation value for a string. This allows query values
  * to be sorted following rules for a specific collation.
  */
-public class NodeValueSortKey extends NodeValue {
+public final class NodeValueSortKey extends NodeValue implements Comparable<NodeValueSortKey> {
 
     /**
      * Node value text.
@@ -64,11 +68,15 @@ public class NodeValueSortKey extends NodeValue {
         return string;
     }
 
-    @Override
     public String getCollation() {
         return collation;
     }
 
+    /**
+     * The node created by a NodeValueSortKey is a {@link Node_Literal}. This is used to represent
+     * the node value internally for comparison, and should no be expected to work in other cases.
+     * Users are not expected to extend it, or use in other functions.
+     */
     @Override
     protected Node makeNode() {
         return NodeFactory.createLiteral(string);
@@ -88,4 +96,23 @@ public class NodeValueSortKey extends NodeValue {
         return "'"+getString()+"'";
     }
 
+    @Override
+    public int compareTo(NodeValueSortKey other) {
+        int cmp = 0;
+        if (other != null) {
+            String c1 = this.getCollation();
+            String c2 = other.getCollation();
+            if (c1 != null && c2 != null && c1.equals(c2)) {
+                // locales are parsed. Here we could think about caching if necessary
+                Locale desiredLocale = Locale.forLanguageTag(c1);
+                // collators are already stored in a concurrent map by the JVM, with <locale, softref<collator>>
+                Collator collator = Collator.getInstance(desiredLocale);
+                cmp = collator.compare(this.getString(), other.getString());
+            } else {
+                cmp = XSDFuncOp.compareString(this, other) ;
+            }
+        }
+        return cmp;
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/3b12f834/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java b/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java
index 54d3dfe..3478745 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/expr/nodevalue/TestNodeValueSortKey.java
@@ -71,4 +71,16 @@ public class TestNodeValueSortKey {
         NodeValueSortKey nv = new NodeValueSortKey("Tutte", "it");
         assertEquals("'Tutte'", nv.toString());
     }
+
+    @Test
+    public void testCompareTo() {
+        final String languageTag = "pt";
+        NodeValueSortKey nv = new NodeValueSortKey("Bonito", languageTag);
+        assertEquals(0, nv.compareTo(null));
+        assertEquals(1, nv.compareTo(new NodeValueSortKey("Bonita", languageTag)));
+        assertEquals(-1, nv.compareTo(new NodeValueSortKey("Bonitos", languageTag)));
+        // comparing string, regardless of the collations
+        assertEquals(1, nv.compareTo(new NodeValueSortKey("Bonita", "es")));
+        assertEquals(0, nv.compareTo(new NodeValueSortKey("Bonito", "es")));
+    }
 }