You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@daffodil.apache.org by GitBox <gi...@apache.org> on 2018/01/17 15:42:22 UTC
[GitHub] mbeckerle commented on a change in pull request #16: Implemented packed binary formats

mbeckerle commented on a change in pull request #16: Implemented packed binary formats
URL: https://github.com/apache/incubator-daffodil/pull/16#discussion_r162088581
 
 

 ##########
 File path: daffodil-lib/src/main/scala/edu/illinois/ncsa/daffodil/util/DecimalUtils.scala
 ##########
 @@ -0,0 +1,342 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.illinois.ncsa.daffodil.util
+
+import edu.illinois.ncsa.daffodil.schema.annotation.props.gen.BinaryNumberCheckPolicy
+
+import java.math.{ BigInteger => JBigInteger, BigDecimal => JBigDecimal }
+
+object DecimalUtils {
+
+  def signCodesToHex(signCodes: String, policy: BinaryNumberCheckPolicy): Map[String, List[Int]] = {
+    // remove all spaces from the string
+    val str: String = signCodes.replaceAll("\\s", "")
+    val chars: Array[Char] = str.toCharArray()
+
+    val valid_positives: List[Int] = List(
+      Integer.parseInt(String.valueOf('A'), 16),
+      Integer.parseInt(String.valueOf('C'), 16),
+      Integer.parseInt(String.valueOf('E'), 16),
+      Integer.parseInt(String.valueOf('F'), 16))
+    val valid_negatives: List[Int] = List(
+      Integer.parseInt(String.valueOf('B'), 16),
+      Integer.parseInt(String.valueOf('D'), 16))
+    val valid_unsigneds: List[Int] = List(
+      Integer.parseInt(String.valueOf('F'), 16))
+    val valid_zero_signs: List[Int] = List(
+      Integer.parseInt(String.valueOf('A'), 16),
+      Integer.parseInt(String.valueOf('C'), 16),
+      Integer.parseInt(String.valueOf('E'), 16),
+      Integer.parseInt(String.valueOf('F'), 16),
+      Integer.parseInt(String.valueOf('0'), 16))
+
+    val positives: List[Int] = List(Integer.parseInt(String.valueOf(chars(0)), 16))
+    val negatives: List[Int] = List(Integer.parseInt(String.valueOf(chars(1)), 16))
+    val unsigneds: List[Int] = List(Integer.parseInt(String.valueOf(chars(2)), 16))
+    val zero_signs: List[Int] = List(Integer.parseInt(String.valueOf(chars(3)), 16))
+
+    val hexCodes = policy match {
+      case BinaryNumberCheckPolicy.Strict => Map("positive" -> positives, "negative" -> negatives, "unsigned" -> unsigneds, "zero_sign" -> zero_signs)
+      case BinaryNumberCheckPolicy.Lax => Map("positive" -> valid_positives, "negative" -> valid_negatives, "unsigned" -> valid_unsigneds, "zero_sign" -> valid_zero_signs)
+    }
+
+    hexCodes
+  }
+
+  def packedToBigInteger(num: Array[Byte], signCodes: String, policy: BinaryNumberCheckPolicy): JBigInteger = {
+    val hexCodes: Map[String, List[Int]] = signCodesToHex(signCodes, policy)
+    val numDigits: Int = num.size * 2  // 2 digits stored per byte
+    val outputData: Array[Char] = new Array[Char](numDigits-1)
+    var outputPos: Int = 0
+    var offset: Int = 0
+    var nibble: Int = 0
+    var negative: Boolean = false
+
+    // Parse and validate the last (sign) bit
+    nibble = (num(offset +  num.size - 1) & 0x0F)
+    if (hexCodes("negative").contains(nibble)) {
+      negative = true
+    } else if (!hexCodes("positive").contains(nibble)) {
+      throw new NumberFormatException("Invalid low nibble")
+    }
+
+    while (outputPos < outputData.size - 1) {
+      // Parse high nibble
+      nibble = (num(offset) & 0xFF) >>> 4
+      if (nibble > 0x09) {
+        throw new NumberFormatException("Invalid high nibble")
+      }
+
+      outputData(outputPos) = (nibble | 0x0030).toChar
+      outputPos = outputPos + 1
+
+      // Parse low nibble
+      nibble = (num(offset) & 0x0F)
+      offset = offset + 1
+      if (nibble > 0x09) {
+        throw new NumberFormatException("Invalid low nibble")
+      }
+
+      outputData(outputPos) = (nibble | 0x0030).toChar
+      outputPos = outputPos + 1
+    }
+
+    // Parse last digit
+    nibble = (num(offset) & 0xFF) >>> 4
+    if (nibble > 0x09) {
+      throw new NumberFormatException("Invalid high nibble")
+    }
+
+    outputData(outputPos) = (nibble | 0x0030).toChar
+
+    if (negative)
+      new JBigInteger(new String(outputData)).negate()
+    else
+      new JBigInteger(new String(outputData))
+
+  }
+
+  def packedToBigDecimal(num: Array[Byte], scale: Int, signCodes: String, policy: BinaryNumberCheckPolicy): JBigDecimal = {
+    return new JBigDecimal(packedToBigInteger(num, signCodes, policy), scale)
+  }
+
+  def packedFromBigInteger(num: String, nBits: Int, signCodes:String, policy: BinaryNumberCheckPolicy): Array[Byte] = {
+    val negative: Boolean = (num.charAt(0) == '-')
+    // Discard any sign symbols or decimal points from the string (decimal point will be maintained in binaryVirtualDecimalPoint)
 
 Review comment:
   Yes, it is symmetric. BinaryDecimalVirtualPoint is for what is called Fixed-point decimal. It provides a scale factor that converts an integer to a decimal when parsing, and a decimal back to an integer when unparsing. Data should round-trip. This is used all the time in financial data where floating point is never used, base 10 rounding rules are required, etc. 
   
   There's an obscure case, that *is* used I believe which is when the binaryDecimalVirtualPoint is negative when it scales an integer (on parsing) into a bigger integer, and cuts it down to a smaller integer on unparsing (have to think about rounding in this latter case, as the digits being lost aren't necessarily zeros). 
   
   This is used for currency units like Japanese Yen, where often "000" are removed, so that what is stored in the data is thousands of Yen, but logically the data is single Yen. (Yen are like pennies - so thousands of yen is like $10 units, roughly in concept.) 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services