You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2017/09/13 12:49:34 UTC

[1/2] incubator-hivemall git commit: Close #110: [HIVEMALL-142] Implement SingularizeUDF

Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 8639810d3 -> bedbd39ca


Close #110: [HIVEMALL-142] Implement SingularizeUDF


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/5e1d0d07
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/5e1d0d07
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/5e1d0d07

Branch: refs/heads/master
Commit: 5e1d0d0703d5d6d9e217c6e8a8345138b78e6843
Parents: 8639810
Author: Takuya Kitazawa <k....@gmail.com>
Authored: Wed Sep 13 21:35:56 2017 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Wed Sep 13 21:35:56 2017 +0900

----------------------------------------------------------------------
 .../hivemall/tools/text/SingularizeUDF.java     | 173 +++++++++++++++++++
 .../java/hivemall/utils/lang/StringUtils.java   |  38 +++-
 .../hivemall/tools/text/SingularizeUDFTest.java |  71 ++++++++
 docs/gitbook/misc/generic_funcs.md              |   8 +
 resources/ddl/define-all-as-permanent.hive      |   3 +
 resources/ddl/define-all.hive                   |   3 +
 resources/ddl/define-all.spark                  |   3 +
 resources/ddl/define-udfs.td.hql                |   1 +
 8 files changed, 292 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/core/src/main/java/hivemall/tools/text/SingularizeUDF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/tools/text/SingularizeUDF.java b/core/src/main/java/hivemall/tools/text/SingularizeUDF.java
new file mode 100644
index 0000000..390e3f2
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/text/SingularizeUDF.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.text;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+import hivemall.utils.lang.StringUtils;
+
+/**
+ * @link
+ *       https://github.com/sundrio/sundrio/blob/95c2b11f7b842bdaa04f61e8e338aea60fb38f70/codegen/src
+ *       /main/java/io/sundr/codegen/functions/Singularize.java
+ * @link https://github.com/clips/pattern/blob/
+ *       3eef00481a4555331cf9a099308910d977f6fc22/pattern/text/en/inflect.py#L445-L623
+ */
+@Description(name = "singularize",
+        value = "_FUNC_(string word) - Returns singular form of a given English word")
+@UDFType(deterministic = true, stateful = false)
+public final class SingularizeUDF extends UDF {
+
+    // sorted by an ascending (i.e., alphabetical) order for binary search
+    // plural preposition to detect compound words like "plural-preposition-something"
+    private static final String[] prepositions = new String[] {"about", "above", "across", "after",
+            "among", "around", "at", "athwart", "before", "behind", "below", "beneath", "beside",
+            "besides", "between", "betwixt", "beyond", "but", "by", "during", "except", "for",
+            "from", "in", "into", "near", "of", "off", "on", "onto", "out", "over", "since",
+            "till", "to", "under", "until", "unto", "upon", "with"};
+    // uninfected or uncountable words
+    private static final String[] unchanged = new String[] {"advice", "bison", "bread", "bream",
+            "breeches", "britches", "butter", "carp", "chassis", "cheese", "christmas", "clippers",
+            "cod", "contretemps", "corps", "debris", "diabetes", "djinn", "eland", "electricity",
+            "elk", "equipment", "flounder", "fruit", "furniture", "gallows", "garbage", "georgia",
+            "graffiti", "gravel", "happiness", "headquarters", "herpes", "high-jinks", "homework",
+            "information", "innings", "jackanapes", "ketchup", "knowledge", "love", "luggage",
+            "mackerel", "mathematics", "mayonnaise", "measles", "meat", "mews", "mumps", "mustard",
+            "news", "news", "pincers", "pliers", "proceedings", "progress", "rabies", "research",
+            "rice", "salmon", "sand", "scissors", "series", "shears", "software", "species",
+            "swine", "swiss", "trout", "tuna", "understanding", "water", "whiting", "wildebeest"};
+
+    private static final Map<String, String> irregular = new HashMap<String, String>();
+    static {
+        irregular.put("atlantes", "atlas");
+        irregular.put("atlases", "atlas");
+        irregular.put("axes", "axe");
+        irregular.put("beeves", "beef");
+        irregular.put("brethren", "brother");
+        irregular.put("children", "child");
+        irregular.put("corpora", "corpus");
+        irregular.put("corpuses", "corpus");
+        irregular.put("ephemerides", "ephemeris");
+        irregular.put("feet", "foot");
+        irregular.put("ganglia", "ganglion");
+        irregular.put("geese", "goose");
+        irregular.put("genera", "genus");
+        irregular.put("genii", "genie");
+        irregular.put("graffiti", "graffito");
+        irregular.put("helves", "helve");
+        irregular.put("kine", "cow");
+        irregular.put("leaves", "leaf");
+        irregular.put("loaves", "loaf");
+        irregular.put("men", "man");
+        irregular.put("mongooses", "mongoose");
+        irregular.put("monies", "money");
+        irregular.put("moves", "move");
+        irregular.put("mythoi", "mythos");
+        irregular.put("numena", "numen");
+        irregular.put("occipita", "occiput");
+        irregular.put("octopodes", "octopus");
+        irregular.put("opera", "opus");
+        irregular.put("opuses", "opus");
+        irregular.put("our", "my");
+        irregular.put("oxen", "ox");
+        irregular.put("penes", "penis");
+        irregular.put("penises", "penis");
+        irregular.put("people", "person");
+        irregular.put("sexes", "sex");
+        irregular.put("soliloquies", "soliloquy");
+        irregular.put("teeth", "tooth");
+        irregular.put("testes", "testis");
+        irregular.put("trilbys", "trilby");
+        irregular.put("turves", "turf");
+        irregular.put("zoa", "zoon");
+    }
+
+    private static final List<String> rules = Arrays.asList(
+        // regexp1, replacement1, regexp2, replacement2, ...
+        "(quiz)zes$", "$1", "(matr)ices$", "$1ix", "(vert|ind)ices$", "$1ex", "^(ox)en", "$1",
+        "(alias|status)$", "$1", "(alias|status)es$", "$1", "(octop|vir)us$", "$1us",
+        "(octop|vir)i$", "$1us", "(cris|ax|test)es$", "$1is", "(cris|ax|test)is$", "$1is",
+        "(shoe)s$", "$1", "(o)es$", "$1", "(bus)es$", "$1", "([m|l])ice$", "$1ouse",
+        "(x|ch|ss|sh)es$", "$1", "(m)ovies$", "$1ovie", "(s)eries$", "$1eries",
+        "([^aeiouy]|qu)ies$", "$1y", "([lr])ves$", "$1f", "(tive)s$", "$1", "(hive)s$", "$1",
+        "([^f])ves$", "$1fe", "(^analy)sis$", "$1sis", "(^analy)ses$", "$1sis",
+        "((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis", "([ti])a$",
+        "$1um", "(n)ews$", "$1ews", "(s|si|u)s$", "$1s", "s$", "");
+
+    @Nullable
+    public String evaluate(@Nullable String word) {
+        return singularize(word);
+    }
+
+    @Nullable
+    private String singularize(@Nullable String word) {
+        if (word == null) {
+            return null;
+        }
+
+        if (word.isEmpty()) {
+            return word;
+        }
+
+        if (Arrays.binarySearch(unchanged, word) >= 0) {
+            return word;
+        }
+
+        if (word.contains("-")) { // compound words (e.g., mothers-in-law)
+            final List<String> chunks = new ArrayList<>();
+            chunks.addAll(Arrays.asList(word.split("-")));
+
+            if ((chunks.size() > 1) && (Arrays.binarySearch(prepositions, chunks.get(1)) >= 0)) {
+                String head = chunks.remove(0);
+                return singularize(head) + "-" + StringUtils.join(chunks, "-");
+            }
+        }
+
+        if (word.endsWith("'")) { // dogs' => dog's
+            return singularize(word.substring(0, word.length() - 1)) + "'s";
+        }
+
+        if (irregular.containsKey(word)) {
+            return irregular.get(word);
+        }
+
+        for (int i = 0, n = rules.size(); i < n; i += 2) {
+            Pattern pattern = Pattern.compile(rules.get(i), Pattern.CASE_INSENSITIVE);
+            Matcher matcher = pattern.matcher(word);
+            if (matcher.find()) {
+                return matcher.replaceAll(rules.get(i + 1));
+            }
+        }
+
+        return word;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/core/src/main/java/hivemall/utils/lang/StringUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/lang/StringUtils.java b/core/src/main/java/hivemall/utils/lang/StringUtils.java
index 5b66dd1..3652ebd 100644
--- a/core/src/main/java/hivemall/utils/lang/StringUtils.java
+++ b/core/src/main/java/hivemall/utils/lang/StringUtils.java
@@ -28,7 +28,8 @@ public final class StringUtils {
 
     private StringUtils() {}
 
-    public static byte[] getBytes(final String s) {
+    @Nonnull
+    public static byte[] getBytes(@Nonnull final String s) {
         final int len = s.length();
         final byte[] b = new byte[len * 2];
         for (int i = 0; i < len; i++) {
@@ -37,11 +38,13 @@ public final class StringUtils {
         return b;
     }
 
-    public static String toString(byte[] b) {
+    @Nonnull
+    public static String toString(@Nonnull final byte[] b) {
         return toString(b, 0, b.length);
     }
 
-    public static String toString(byte[] b, int off, int len) {
+    @Nonnull
+    public static String toString(@Nonnull final byte[] b, final int off, final int len) {
         final int clen = len >>> 1;
         final char[] c = new char[clen];
         for (int i = 0; i < clen; i++) {
@@ -53,11 +56,11 @@ public final class StringUtils {
 
     /**
      * Checks whether the String a valid Java number. this code is ported from jakarta commons lang.
-     * 
+     *
      * @link http://jakarta.apache.org/commons/lang/apidocs/org/apache/commons/lang
      *       /math/NumberUtils.html
      */
-    public static boolean isNumber(final String str) {
+    public static boolean isNumber(@Nullable final String str) {
         if (str == null || str.length() == 0) {
             return false;
         }
@@ -97,7 +100,7 @@ public final class StringUtils {
 
             } else if (chars[i] == '.') {
                 if (hasDecPoint || hasExp) {
-                    // two decimal points or dec in exponent   
+                    // two decimal points or dec in exponent
                     return false;
                 }
                 hasDecPoint = true;
@@ -170,6 +173,7 @@ public final class StringUtils {
         buf.setLength(0);
     }
 
+    @Nonnull
     public static String concat(@Nonnull final List<String> list, @Nonnull final String sep) {
         final StringBuilder buf = new StringBuilder(128);
         for (String s : list) {
@@ -182,11 +186,29 @@ public final class StringUtils {
         return buf.toString();
     }
 
-    public static String[] split(final String str, final char separatorChar) {
+    @Nonnull
+    public static String join(@Nonnull final List<String> list, @Nonnull final String sep) {
+        final StringBuilder buf = new StringBuilder(128);
+        for (int i = 0, size = list.size(); i < size; i++) {
+            if (i > 0) { // append separator before each element, except for the head element
+                buf.append(sep);
+            }
+
+            final String s = list.get(i);
+            if (s != null) {
+                buf.append(s);
+            }
+        }
+        return buf.toString();
+    }
+
+    @Nullable
+    public static String[] split(@Nullable final String str, final char separatorChar) {
         return split(str, separatorChar, false);
     }
 
-    public static String[] split(final String str, final char separatorChar,
+    @Nullable
+    public static String[] split(@Nullable final String str, final char separatorChar,
             final boolean preserveAllTokens) {
         if (str == null) {
             return null;

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/core/src/test/java/hivemall/tools/text/SingularizeUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/text/SingularizeUDFTest.java b/core/src/test/java/hivemall/tools/text/SingularizeUDFTest.java
new file mode 100644
index 0000000..6ea9cc3
--- /dev/null
+++ b/core/src/test/java/hivemall/tools/text/SingularizeUDFTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.text;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SingularizeUDFTest {
+
+    private SingularizeUDF udf;
+
+    @Before
+    public void setUp() {
+        this.udf = new SingularizeUDF();
+    }
+
+    @Test
+    public void testNull() {
+        Assert.assertEquals(null, udf.evaluate(null));
+    }
+
+    @Test
+    public void testEmpty() {
+        Assert.assertEquals("", udf.evaluate(""));
+    }
+
+    @Test
+    public void testUnchanged() {
+        Assert.assertEquals("christmas", udf.evaluate("christmas"));
+    }
+
+    @Test
+    public void testCompound() {
+        Assert.assertEquals("mother-in-law", udf.evaluate("mothers-in-law"));
+    }
+
+    @Test
+    public void testTailSingleQuote() {
+        Assert.assertEquals("dog's", udf.evaluate("dogs'"));
+    }
+
+    @Test
+    public void testIrregular() {
+        Assert.assertEquals("child", udf.evaluate("children"));
+    }
+
+    @Test
+    public void testRule() {
+        Assert.assertEquals("apple", udf.evaluate("apples"));
+        Assert.assertEquals("bus", udf.evaluate("buses"));
+        Assert.assertEquals("candy", udf.evaluate("candies"));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/docs/gitbook/misc/generic_funcs.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/misc/generic_funcs.md b/docs/gitbook/misc/generic_funcs.md
index 03e1ef3..9775439 100644
--- a/docs/gitbook/misc/generic_funcs.md
+++ b/docs/gitbook/misc/generic_funcs.md
@@ -239,6 +239,14 @@ The compression level must be in range [-1,9]
 
 - `is_stopword(string word)` - Returns whether English stopword or not
 
+- `singularize(string word)` - Returns singular form of a given English word
+
+    ```sql
+    select singularize(lower("Apples"));
+
+    > "apple"
+    ```
+
 - `tokenize(string englishText [, boolean toLowerCase])` - Returns words in array<string>
 
 - `tokenize_ja(String line [, const string mode = "normal", const list<string> stopWords, const list<string> stopTags])` - returns tokenized strings in array<string>. Refer [this article](../misc/tokenizer.html) for detail.

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/resources/ddl/define-all-as-permanent.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all-as-permanent.hive b/resources/ddl/define-all-as-permanent.hive
index b1c0075..100fe22 100644
--- a/resources/ddl/define-all-as-permanent.hive
+++ b/resources/ddl/define-all-as-permanent.hive
@@ -538,6 +538,9 @@ CREATE FUNCTION tokenize as 'hivemall.tools.text.TokenizeUDF' USING JAR '${hivem
 DROP FUNCTION IF EXISTS is_stopword;
 CREATE FUNCTION is_stopword as 'hivemall.tools.text.StopwordUDF' USING JAR '${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS singularize;
+CREATE FUNCTION singularize as 'hivemall.tools.text.SingularizeUDF' USING JAR '${hivemall_jar}';
+
 DROP FUNCTION IF EXISTS split_words;
 CREATE FUNCTION split_words as 'hivemall.tools.text.SplitWordsUDF' USING JAR '${hivemall_jar}';
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/resources/ddl/define-all.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive
index e1933b4..6fb34ca 100644
--- a/resources/ddl/define-all.hive
+++ b/resources/ddl/define-all.hive
@@ -530,6 +530,9 @@ create temporary function tokenize as 'hivemall.tools.text.TokenizeUDF';
 drop temporary function if exists is_stopword;
 create temporary function is_stopword as 'hivemall.tools.text.StopwordUDF';
 
+drop temporary function if exists singularize;
+create temporary function singularize as 'hivemall.tools.text.SingularizeUDF';
+
 drop temporary function if exists split_words;
 create temporary function split_words as 'hivemall.tools.text.SplitWordsUDF';
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/resources/ddl/define-all.spark
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark
index db29d85..d0a1084 100644
--- a/resources/ddl/define-all.spark
+++ b/resources/ddl/define-all.spark
@@ -514,6 +514,9 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION tokenize AS 'hivemall.tools.text.Token
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS is_stopword")
 sqlContext.sql("CREATE TEMPORARY FUNCTION is_stopword AS 'hivemall.tools.text.StopwordUDF'")
 
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS singularize")
+sqlContext.sql("CREATE TEMPORARY FUNCTION singularize AS 'hivemall.tools.text.SingularizeUDF'")
+
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS split_words")
 sqlContext.sql("CREATE TEMPORARY FUNCTION split_words AS 'hivemall.tools.text.SplitWordsUDF'")
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5e1d0d07/resources/ddl/define-udfs.td.hql
----------------------------------------------------------------------
diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql
index 7c9bfc7..d90cb3c 100644
--- a/resources/ddl/define-udfs.td.hql
+++ b/resources/ddl/define-udfs.td.hql
@@ -178,6 +178,7 @@ create temporary function train_ffm as 'hivemall.fm.FieldAwareFactorizationMachi
 create temporary function ffm_predict as 'hivemall.fm.FFMPredictGenericUDAF';
 create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF';
 create temporary function to_ordered_list as 'hivemall.tools.list.UDAFToOrderedList';
+create temporary function singularize as 'hivemall.tools.text.SingularizeUDF';
 
 -- NLP features
 create temporary function tokenize_ja as 'hivemall.nlp.tokenizer.KuromojiUDF';


[2/2] incubator-hivemall git commit: Applied some refactoring on SingularizeUDF

Posted by my...@apache.org.
Applied some refactoring on SingularizeUDF


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/bedbd39c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/bedbd39c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/bedbd39c

Branch: refs/heads/master
Commit: bedbd39cac612afdccfe2c0fd2e436336cb2958e
Parents: 5e1d0d0
Author: Makoto Yui <my...@apache.org>
Authored: Wed Sep 13 21:46:59 2017 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Wed Sep 13 21:46:59 2017 +0900

----------------------------------------------------------------------
 .../hivemall/tools/text/SingularizeUDF.java     | 24 ++++++++------------
 1 file changed, 10 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bedbd39c/core/src/main/java/hivemall/tools/text/SingularizeUDF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/tools/text/SingularizeUDF.java b/core/src/main/java/hivemall/tools/text/SingularizeUDF.java
index 390e3f2..775c413 100644
--- a/core/src/main/java/hivemall/tools/text/SingularizeUDF.java
+++ b/core/src/main/java/hivemall/tools/text/SingularizeUDF.java
@@ -18,12 +18,11 @@
  */
 package hivemall.tools.text;
 
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.udf.UDFType;
+import hivemall.utils.lang.StringUtils;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -32,15 +31,13 @@ import java.util.regex.Pattern;
 
 import javax.annotation.Nullable;
 
-import hivemall.utils.lang.StringUtils;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.udf.UDFType;
 
-/**
- * @link
- *       https://github.com/sundrio/sundrio/blob/95c2b11f7b842bdaa04f61e8e338aea60fb38f70/codegen/src
- *       /main/java/io/sundr/codegen/functions/Singularize.java
- * @link https://github.com/clips/pattern/blob/
- *       3eef00481a4555331cf9a099308910d977f6fc22/pattern/text/en/inflect.py#L445-L623
- */
+// Inspired by
+//  https://github.com/sundrio/sundrio/blob/95c2b11f7b842bdaa04f61e8e338aea60fb38f70/codegen/src/main/java/io/sundr/codegen/functions/Singularize.java
+//  https://github.com/clips/pattern/blob/3eef00481a4555331cf9a099308910d977f6fc22/pattern/text/en/inflect.py#L445-L623
 @Description(name = "singularize",
         value = "_FUNC_(string word) - Returns singular form of a given English word")
 @UDFType(deterministic = true, stateful = false)
@@ -128,7 +125,7 @@ public final class SingularizeUDF extends UDF {
     }
 
     @Nullable
-    private String singularize(@Nullable String word) {
+    private static String singularize(@Nullable final String word) {
         if (word == null) {
             return null;
         }
@@ -143,8 +140,7 @@ public final class SingularizeUDF extends UDF {
 
         if (word.contains("-")) { // compound words (e.g., mothers-in-law)
             final List<String> chunks = new ArrayList<>();
-            chunks.addAll(Arrays.asList(word.split("-")));
-
+            Collections.addAll(chunks, word.split("-"));
             if ((chunks.size() > 1) && (Arrays.binarySearch(prepositions, chunks.get(1)) >= 0)) {
                 String head = chunks.remove(0);
                 return singularize(head) + "-" + StringUtils.join(chunks, "-");