You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by al...@apache.org on 2020/07/31 07:12:24 UTC

[zeppelin] branch master updated: [ZEPPELIN-4964] Cassandra: add formatter for decimal values

This is an automated email from the ASF dual-hosted git repository.

alexott pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e0bd50  [ZEPPELIN-4964] Cassandra: add formatter for decimal values
8e0bd50 is described below

commit 8e0bd50b72c05dc3bd998c5ff873d9fb853c9bbe
Author: Alex Ott <al...@gmail.com>
AuthorDate: Wed Jul 22 14:45:13 2020 +0200

    [ZEPPELIN-4964] Cassandra: add formatter for decimal values
    
    ### What is this PR for?
    
    New options for formatting of `decimal` values.  By default, formatted without any
    precision loss (by specifying special value -1 as precision)
    
    ### What type of PR is it?
    Improvement
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-4964
    
    ### How should this be tested?
    * https://travis-ci.org/github/alexott/zeppelin/builds/710727496
    * tested manually
    
    Author: Alex Ott <al...@gmail.com>
    
    Closes #3862 from alexott/ZEPPELIN-4964 and squashes the following commits:
    
    f9a873628 [Alex Ott] [ZEPPELIN-4964] Add formatter for decimal values
---
 .../zeppelin/cassandra/CassandraInterpreter.java   |  2 +
 .../src/main/resources/interpreter-setting.json    | 11 ++++-
 .../apache/zeppelin/cassandra/CqlFormatter.scala   | 50 ++++++++++++++++------
 .../zeppelin/cassandra/InterpreterLogic.scala      |  3 ++
 .../cassandra/CassandraInterpreterTest.java        | 10 +++--
 cassandra/src/test/resources/prepare_all.cql       |  2 +-
 .../zeppelin/cassandra/CqlFormatterTest.scala      |  9 +++-
 docs/interpreter/cassandra.md                      | 14 +++++-
 8 files changed, 77 insertions(+), 24 deletions(-)

diff --git a/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java b/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java
index 3a00448..e0e5167 100644
--- a/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java
+++ b/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java
@@ -118,6 +118,8 @@ public class CassandraInterpreter extends Interpreter {
           "cassandra.format.float_precision";
   public static final String CASSANDRA_FORMAT_DOUBLE_PRECISION =
           "cassandra.format.double_precision";
+  public static final String CASSANDRA_FORMAT_DECIMAL_PRECISION =
+          "cassandra.format.decimal_precision";
   public static final String CASSANDRA_FORMAT_TIMESTAMP =
           "cassandra.format.timestamp";
   public static final String CASSANDRA_FORMAT_TIME =
diff --git a/cassandra/src/main/resources/interpreter-setting.json b/cassandra/src/main/resources/interpreter-setting.json
index 5c32ad3..5870635 100644
--- a/cassandra/src/main/resources/interpreter-setting.json
+++ b/cassandra/src/main/resources/interpreter-setting.json
@@ -246,14 +246,21 @@
         "envName": null,
         "propertyName": "cassandra.format.float_precision",
         "defaultValue": "5",
-        "description": "Precision for formatting of float columns",
+        "description": "Precision for formatting of float values",
         "type": "number"
       },
       "cassandra.format.double_precision": {
         "envName": null,
         "propertyName": "cassandra.format.double_precision",
         "defaultValue": "12",
-        "description": "Precision for formatting of double columns",
+        "description": "Precision for formatting of double values",
+        "type": "number"
+      },
+      "cassandra.format.decimal_precision": {
+        "envName": null,
+        "propertyName": "cassandra.format.decimal_precision",
+        "defaultValue": "-1",
+        "description": "Precision for formatting of decimal values (by default, show everything)",
         "type": "number"
       }
     },
diff --git a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/CqlFormatter.scala b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/CqlFormatter.scala
index fe4d4ac..1715c88 100644
--- a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/CqlFormatter.scala
+++ b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/CqlFormatter.scala
@@ -37,8 +37,9 @@ import scala.collection.JavaConverters._
 object CqlFormatter {
   val DEFAULT_TIMEZONE = "UTC"
   val DEFAULT_FORMAT = "human"
-  val DEFAULT_FLOAT_PRECISION = 5
-  val DEFAULT_DOUBLE_PRECISION = 12
+  val DEFAULT_FLOAT_PRECISION: Int  = 5
+  val DEFAULT_DOUBLE_PRECISION: Int  = 12
+  val DEFAULT_DECIMAL_PRECISION: Int = -1
   val DEFAULT_TIMESTAMP_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSSXXX"
   val DEFAULT_TIME_FORMAT = "HH:mm:ss.SSS"
   val DEFAULT_DATE_FORMAT = "yyyy-MM-dd"
@@ -46,10 +47,14 @@ object CqlFormatter {
 
   val allAvailableTimezones: Set[String] = TimeZone.getAvailableIDs.toSet
 
-  def getNumberFormatter(locale: Locale, precision: Int): DecimalFormat = {
-    val df = NumberFormat.getNumberInstance(locale).asInstanceOf[DecimalFormat]
-    df.applyPattern("#." + "#" * precision)
-    df
+  def getNumberFormatter(locale: Locale, precision: Int): Option[DecimalFormat] = {
+    if (precision == -1)
+      None
+    else {
+      val df = NumberFormat.getNumberInstance(locale).asInstanceOf[DecimalFormat]
+      df.applyPattern("#." + "#" * precision)
+      Some(df)
+    }
   }
 
   def getDateTimeFormatter(locale: Locale, timeZone: ZoneId, format: String): DateTimeFormatter = {
@@ -83,6 +88,7 @@ object CqlFormatter {
 class CqlFormatter(val outputFormat: String = CqlFormatter.DEFAULT_FORMAT,
                    val floatPrecision: Int = CqlFormatter.DEFAULT_FLOAT_PRECISION,
                    val doublePrecision: Int = CqlFormatter.DEFAULT_DOUBLE_PRECISION,
+                   val decimalPrecision: Int = CqlFormatter.DEFAULT_DECIMAL_PRECISION,
                    val timestampFormat: String = CqlFormatter.DEFAULT_TIMESTAMP_FORMAT,
                    val timeFormat: String = CqlFormatter.DEFAULT_TIME_FORMAT,
                    val dateFormat: String = CqlFormatter.DEFAULT_DATE_FORMAT,
@@ -93,8 +99,9 @@ class CqlFormatter(val outputFormat: String = CqlFormatter.DEFAULT_FORMAT,
   val locale: Locale = CqlFormatter.getLocale(localeStr)
   val timeZone: ZoneId = CqlFormatter.getTimezone(timeZoneId)
 
-  val floatFormatter: DecimalFormat = CqlFormatter.getNumberFormatter(locale, floatPrecision)
-  val doubleFormatter: DecimalFormat = CqlFormatter.getNumberFormatter(locale, doublePrecision)
+  val floatFormatter: Option[DecimalFormat] = CqlFormatter.getNumberFormatter(locale, floatPrecision)
+  val doubleFormatter: Option[DecimalFormat] = CqlFormatter.getNumberFormatter(locale, doublePrecision)
+  val decimalFormatter: Option[DecimalFormat] = CqlFormatter.getNumberFormatter(locale, decimalPrecision)
 
   val timestampFormatter: DateTimeFormatter = CqlFormatter.getDateTimeFormatter(
     locale, timeZone, timestampFormat)
@@ -112,6 +119,9 @@ class CqlFormatter(val outputFormat: String = CqlFormatter.DEFAULT_FORMAT,
       properties.getProperty(
         CassandraInterpreter.CASSANDRA_FORMAT_DOUBLE_PRECISION,
         CqlFormatter.DEFAULT_DOUBLE_PRECISION.toString).toInt,
+      properties.getProperty(
+        CassandraInterpreter.CASSANDRA_FORMAT_DECIMAL_PRECISION,
+        CqlFormatter.DEFAULT_DECIMAL_PRECISION.toString).toInt,
       properties.getProperty(CassandraInterpreter.CASSANDRA_FORMAT_TIMESTAMP,
         CqlFormatter.DEFAULT_TIMESTAMP_FORMAT),
       properties.getProperty(CassandraInterpreter.CASSANDRA_FORMAT_TIME,
@@ -128,13 +138,14 @@ class CqlFormatter(val outputFormat: String = CqlFormatter.DEFAULT_FORMAT,
   def copy(outputFormat: String = this.outputFormat,
            floatPrecision: Int = this.floatPrecision,
            doublePrecision: Int = this.doublePrecision,
+           decimalPrecision: Int = this.decimalPrecision,
            timestampFormat: String = this.timestampFormat,
            timeFormat: String = this.timeFormat,
            dateFormat: String = this.dateFormat,
            timeZoneId: String = this.timeZoneId,
            localeStr: String = this.localeStr) =
-    new CqlFormatter(outputFormat, floatPrecision, doublePrecision, timestampFormat,
-      timeFormat, dateFormat, timeZoneId, localeStr)
+    new CqlFormatter(outputFormat, floatPrecision, doublePrecision, decimalPrecision,
+      timestampFormat, timeFormat, dateFormat, timeZoneId, localeStr)
 
   def formatHuman(obj: Object): String = {
     if (obj == null) {
@@ -142,9 +153,20 @@ class CqlFormatter(val outputFormat: String = CqlFormatter.DEFAULT_FORMAT,
     } else {
       obj match {
         case f: java.lang.Float =>
-          floatFormatter.format(f)
+          floatFormatter match {
+            case None => java.lang.Float.toString(f)
+            case Some(fmt) => fmt.format(f)
+          }
         case d: java.lang.Double =>
-          doubleFormatter.format(d)
+          doubleFormatter match {
+            case None => java.lang.Double.toString(d)
+            case Some(fmt) => fmt.format(d)
+          }
+        case dc: java.math.BigDecimal =>
+          decimalFormatter match {
+            case None => dc.toString
+            case Some(fmt) => fmt.format(dc)
+          }
         case m: java.util.Map[Object, Object] =>
           m.asScala.map{case(k,v) => formatHuman(k) + ": " + formatHuman(v)}.mkString("{", ", ", "}")
         case l: java.util.List[Object] =>
@@ -177,7 +199,7 @@ class CqlFormatter(val outputFormat: String = CqlFormatter.DEFAULT_FORMAT,
     if (isCqlFormat) {
       codec.format(obj)
     } else {
-      formatHuman(obj)
+      formatHuman(obj, codec)
     }
   }
 
@@ -191,6 +213,6 @@ class CqlFormatter(val outputFormat: String = CqlFormatter.DEFAULT_FORMAT,
   }
 
   override def toString: String = s"CqlFormatter(format=$outputFormat, fp=$floatPrecision, dp=$doublePrecision, " +
-    s"tsFormat=$timestampFormat, tmFormat=$timeFormat, dtFormat=$dateFormat, " +
+    s", dcp=$decimalPrecision, tsFormat=$timestampFormat, tmFormat=$timeFormat, dtFormat=$dateFormat, " +
     s"timeozone=$timeZoneId, locale=$localeStr)"
 }
diff --git a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala
index 3fba775..5529a71 100644
--- a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala
+++ b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala
@@ -267,6 +267,8 @@ class InterpreterLogic(val session: CqlSession, val properties: Properties)  {
           formatter.floatPrecision.toString).toInt
         val doublePrecision: Int = props.getOrDefault("doublePrecision",
           formatter.doublePrecision.toString).toInt
+        val decimalPrecision: Int = props.getOrDefault("decimalPrecision",
+          formatter.decimalPrecision.toString).toInt
         val timestampFormat = props.getOrDefault("timestampFormat", formatter.timestampFormat)
         val timeFormat = props.getOrDefault("timeFormat", formatter.timeFormat)
         val dateFormat = props.getOrDefault("dateFormat", formatter.dateFormat)
@@ -275,6 +277,7 @@ class InterpreterLogic(val session: CqlSession, val properties: Properties)  {
           outputFormat = outputFormat,
           floatPrecision = floatPrecision,
           doublePrecision = doublePrecision,
+          decimalPrecision = decimalPrecision,
           timestampFormat = timestampFormat,
           timeFormat = timeFormat,
           dateFormat = dateFormat,
diff --git a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java
index e2fa350..5e4c994 100644
--- a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java
+++ b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java
@@ -210,17 +210,19 @@ public class CassandraInterpreterTest { //extends AbstractCassandraUnit4CQLTestC
     props.put("locale", "de_DE");
     props.put("floatPrecision", "2");
     props.put("doublePrecision", "4");
+    props.put("decimalPrecision", "5");
     props.put("timeFormat", "hh:mma");
     props.put("timestampFormat", "MM/dd/yy HH:mm");
     props.put("dateFormat", "E, d MMM yy");
     props.put("timezone", "Etc/GMT+2");
     String query =
-            "select date, time, timestamp, double, float, tuple, udt from zeppelin.test_format;";
+            "select date,time,timestamp,dec,double,float,tuple,udt from zeppelin.test_format;";
     final InterpreterResult actual = interpreter.interpret(query, intrContext);
     props.remove("outputFormat");
     props.remove("locale");
     props.remove("floatPrecision");
     props.remove("doublePrecision");
+    props.remove("decimalPrecision");
     props.remove("timeFormat");
     props.remove("timestampFormat");
     props.remove("dateFormat");
@@ -229,9 +231,9 @@ public class CassandraInterpreterTest { //extends AbstractCassandraUnit4CQLTestC
     //Then
     assertThat(actual).isNotNull();
     assertThat(actual.code()).isEqualTo(Code.SUCCESS);
-    String expected = "date\ttime\ttimestamp\tdouble\tfloat\ttuple\tudt\n" +
-            "Di, 29 Jan 19\t04:05AM\t06/16/20 21:59\t10,0153\t20,03\t(1, text, 10)\t" +
-            "{id: 1, t: text, lst: [1, 2, 3]}\n";
+    String expected = "date\ttime\ttimestamp\tdec\tdouble\tfloat\ttuple\tudt\n" +
+            "Di, 29 Jan 19\t04:05AM\t06/16/20 21:59\t123562352352,12346\t10,0153\t20,03\t" +
+            "(1, text, 10)\t{id: 1, t: text, lst: [1, 2, 3]}\n";
     assertThat(actual.message().get(0).getData()).isEqualTo(expected);
   }
 
diff --git a/cassandra/src/test/resources/prepare_all.cql b/cassandra/src/test/resources/prepare_all.cql
index 3e2d25d..44e296e 100644
--- a/cassandra/src/test/resources/prepare_all.cql
+++ b/cassandra/src/test/resources/prepare_all.cql
@@ -96,7 +96,7 @@ insert into zeppelin.test_format(id, text, date, timestamp, time, uuid, tuuid, m
   values (1, 'text', '2019-01-29', '2020-06-16T23:59:59.123Z', '04:05:00.234', 123e4567-e89b-12d3-a456-426655440000,
   now(), {1:'m1', 2:'m2'}, {'m1':1, 'm2':2}, [1,2,3], [1,2,3], {1,2,3}, {'1','2','3'},
   {id: 1, t: 'text', lst: [1,2,3]}, [{id: 1, t: 'text', lst: [1,2,3]}, {id: 2, t: 'text2'}],
-  [{id: 1, t: 'text', lst: [1,2,3]}, {id: 2, t: 'text2'}], bigintAsBlob(1024), true, 123562352352.0,
+  [{id: 1, t: 'text', lst: [1,2,3]}, {id: 2, t: 'text2'}], bigintAsBlob(1024), true, 123562352352.123456789012345678,
   10.0153423453425634653463466346543, 20.0303443, 123562352352, 10000, 10,
   124325345634643900999999, 'ascii', (1, 'text', 10.0), 'varchar русский'); 
 
diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala
index b855232..bf86227 100644
--- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala
+++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala
@@ -33,6 +33,7 @@ class CqlFormatterTest extends FlatSpec
 
   val longVal: java.lang.Long = java.lang.Long.valueOf(12345678901L)
   val floatVal: java.lang.Float = java.lang.Float.valueOf(123.456789f)
+  val decimalVal: java.math.BigDecimal = java.math.BigDecimal.valueOf(123.4567890123456789)
   val intVal: java.lang.Integer = Integer.valueOf(123456)
   val doubleVal: java.lang.Double = java.lang.Double.valueOf(123.4567890123456789)
   val dateVal: LocalDate = LocalDate.of(2020, 6, 16)
@@ -49,6 +50,7 @@ class CqlFormatterTest extends FlatSpec
     defaultFormatter.formatHuman(longVal) should be("12345678901")
     defaultFormatter.formatHuman(floatVal) should be("123.45679")
     defaultFormatter.formatHuman(doubleVal) should be("123.456789012346")
+    defaultFormatter.formatHuman(decimalVal) should be("123.45678901234568")
     defaultFormatter.formatHuman("just text") should be("just text")
     defaultFormatter.formatHuman(java.lang.Boolean.TRUE) should be("true")
     defaultFormatter.formatHuman(List(1,2,3).asJava) should be("[1, 2, 3]")
@@ -69,9 +71,11 @@ class CqlFormatterTest extends FlatSpec
   "CqlFormatter" should "format objects with copied settings" in {
     val copiedFormatter = new CqlFormatter()
       .copy(floatPrecision = 2, doublePrecision = 4, timeZoneId = "Etc/GMT+2",
-        timeFormat = "hh:mma", dateFormat = "E, d MMM yy", localeStr = "en_US")
+        timeFormat = "hh:mma", dateFormat = "E, d MMM yy", localeStr = "en_US",
+        decimalPrecision = 5)
     copiedFormatter.formatHuman(floatVal) should be("123.46")
     copiedFormatter.formatHuman(doubleVal) should be("123.4568")
+    copiedFormatter.formatHuman(decimalVal) should be("123.45679")
     copiedFormatter.formatHuman(timestampVal) should be("2020-06-16T21:59:59.123-02:00")
     copiedFormatter.formatHuman(timeVal) should be("11:59PM")
     copiedFormatter.formatHuman(dateVal) should be("Tue, 16 Jun 20")
@@ -81,6 +85,7 @@ class CqlFormatterTest extends FlatSpec
     val properties = new Properties()
     properties.setProperty(CassandraInterpreter.CASSANDRA_FORMAT_FLOAT_PRECISION, "2")
     properties.setProperty(CassandraInterpreter.CASSANDRA_FORMAT_DOUBLE_PRECISION, "4")
+    properties.setProperty(CassandraInterpreter.CASSANDRA_FORMAT_DECIMAL_PRECISION, "5")
     properties.setProperty(CassandraInterpreter.CASSANDRA_FORMAT_TIME, "hh:mma")
     properties.setProperty(CassandraInterpreter.CASSANDRA_FORMAT_DATE, "E, d MMM yy")
     properties.setProperty(CassandraInterpreter.CASSANDRA_FORMAT_TIMEZONE, "Etc/GMT+2")
@@ -88,6 +93,7 @@ class CqlFormatterTest extends FlatSpec
     val copiedFormatter = new CqlFormatter(properties)
     copiedFormatter.formatHuman(floatVal) should be("123.46")
     copiedFormatter.formatHuman(doubleVal) should be("123.4568")
+    copiedFormatter.formatHuman(decimalVal) should be("123.45679")
     copiedFormatter.formatHuman(timestampVal) should be("2020-06-16T21:59:59.123-02:00")
     copiedFormatter.formatHuman(timeVal) should be("11:59PM")
     copiedFormatter.formatHuman(dateVal) should be("Tue, 16 Jun 20")
@@ -111,6 +117,7 @@ class CqlFormatterTest extends FlatSpec
     cqlFormatter.format(longVal, codecRegistry.codecFor(DataTypes.BIGINT)) should be("12345678901")
     cqlFormatter.format(floatVal, codecRegistry.codecFor(DataTypes.FLOAT)) should be("123.45679")
     cqlFormatter.format(doubleVal, codecRegistry.codecFor(DataTypes.DOUBLE)) should be("123.45678901234568")
+    cqlFormatter.format(decimalVal, codecRegistry.codecFor(DataTypes.DECIMAL)) should be("123.45678901234568")
     cqlFormatter.format("just text", codecRegistry.codecFor(DataTypes.TEXT)) should be("'just text'")
     cqlFormatter.format(java.lang.Boolean.TRUE, codecRegistry.codecFor(DataTypes.BOOLEAN)) should be("true")
     cqlFormatter.format(dateVal,
diff --git a/docs/interpreter/cassandra.md b/docs/interpreter/cassandra.md
index 2088f9f..0de7b51 100644
--- a/docs/interpreter/cassandra.md
+++ b/docs/interpreter/cassandra.md
@@ -444,12 +444,17 @@ Sometimes you want to be able to format output of your statement. Cassandra inte
    <tr>
      <td nowrap>Float precision</td>
      <td><strong>floatPrecision=<em>value</em></strong></td>
-     <td>Precision when formatting <tt>float</tt> values. Any positive integer value</td>
+     <td>Precision when formatting <tt>float</tt> values. Any positive integer value, or `-1` to show everything</td>
    </tr>
    <tr>
      <td nowrap>Double precision</td>
      <td><strong>doublePrecision=<em>value</em></strong></td>
-     <td>Precision when formatting <tt>double</tt> values. Any positive integer value</td>
+     <td>Precision when formatting <tt>double</tt> values. Any positive integer value, or `-1` to show everything</td>
+   </tr>
+   <tr>
+     <td nowrap>Decimal precision</td>
+     <td><strong>decimalPrecision=<em>value</em></strong></td>
+     <td>Precision when formatting <tt>decimal</tt> values. Any positive integer value, or `-1` to show everything</td>
    </tr>
    <tr>
      <td nowrap>Timestamp Format</td>
@@ -890,6 +895,11 @@ Below are the configuration parameters supported by interpreter and their defaul
      <td>Precision when formatting values of `double` type</td>
      <td>`12`</td>
    </tr>
+   <tr>
+     <td>`cassandra.format.decimal_precision`</td>
+     <td>Precision when formatting values of `decimal` type</td>
+     <td>`-1` (show everything)</td>
+   </tr>
  </table>
 
 Besides these parameters, it's also possible to set other driver parameters by adding them into interpreter configuration.  The configuration key should have full form with `datastax-java-driver` prefix, as [described in documentation](https://docs.datastax.com/en/developer/java-driver/latest/manual/core/configuration/).  For example, to specify 5 seconds request timeout, you can use `datastax-java-driver.basic.request.timeout` with value of `5 seconds`.  Full list of available configura [...]