You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/09/18 22:35:07 UTC

[02/41] hive git commit: HIVE-11544: Improve LazySimpleSerDe null data handling for Byte, Short, Integer, Float, Long and Double. (Gopal V, reviewed by Ashutosh Chauhan)

HIVE-11544: Improve LazySimpleSerDe null data handling for Byte, Short, Integer, Float, Long and Double. (Gopal V, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/98049182
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/98049182
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/98049182

Branch: refs/heads/llap
Commit: 980491821e6c33d595e8a0e3abcfbc6c207aa436
Parents: 0bc9677
Author: Gopal V <go...@apache.org>
Authored: Mon Sep 14 18:13:42 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Mon Sep 14 18:13:42 2015 -0700

----------------------------------------------------------------------
 .../benchmark/serde/LazySimpleSerDeBench.java   | 453 +++++++++++++++++++
 .../hadoop/hive/serde2/lazy/LazyByte.java       |   4 +
 .../hadoop/hive/serde2/lazy/LazyDouble.java     |   4 +
 .../hadoop/hive/serde2/lazy/LazyFloat.java      |   4 +
 .../hadoop/hive/serde2/lazy/LazyInteger.java    |   4 +
 .../hadoop/hive/serde2/lazy/LazyLong.java       |   4 +
 .../hadoop/hive/serde2/lazy/LazyShort.java      |   4 +
 .../hadoop/hive/serde2/lazy/LazyUtils.java      |  28 ++
 8 files changed, 505 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java
new file mode 100644
index 0000000..a1b63d5
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java
@@ -0,0 +1,453 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.benchmark.serde;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyByte;
+import org.apache.hadoop.hive.serde2.lazy.LazyDouble;
+import org.apache.hadoop.hive.serde2.lazy.LazyFloat;
+import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
+import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.lazy.LazyShort;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+@State(Scope.Benchmark)
+public class LazySimpleSerDeBench {
+  /**
+   * This test measures the performance for LazySimpleSerDe.
+   * <p/>
+   * This test uses JMH framework for benchmarking. You may execute this
+   * benchmark tool using JMH command line in different ways:
+   * <p/>
+   * To run using default settings, use: 
+   * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.serde.LazySimpleSerDeBench
+   * <p/>
+   */
+
+  @BenchmarkMode(Mode.AverageTime)
+  @Fork(1)
+  @State(Scope.Thread)
+  @OutputTimeUnit(TimeUnit.NANOSECONDS)
+  public static abstract class AbstractDeserializer {
+    public static final int DEFAULT_ITER_TIME = 1000000;
+
+    public static final int DEFAULT_DATA_SIZE = 4096;
+
+    public int[] offsets = new int[DEFAULT_DATA_SIZE];
+    public int[] sizes = new int[DEFAULT_DATA_SIZE];
+    protected final ByteArrayRef ref = new ByteArrayRef();
+
+    @Setup
+    public abstract void setup();
+
+    @Benchmark
+    @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS)
+    @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS)
+    public void bench() {
+
+    }
+  }
+
+  public static abstract class RandomDataInitializer extends
+      AbstractDeserializer {
+
+    final int width;
+
+    public RandomDataInitializer(final int width) {
+      this.width = width;
+    }
+
+    @Override
+    public void setup() {
+      int len = 0;
+      Random r = new Random();
+      for (int i = 0; i < sizes.length; i++) {
+        sizes[i] = (int) (r.nextInt(width));
+        offsets[i] = len;
+        len += sizes[i];
+      }
+      byte[] data = new byte[len + 1];
+      r.nextBytes(data);
+      ref.setData(data);
+    }
+  }
+
+  public static abstract class GoodDataInitializer extends AbstractDeserializer {
+
+    public final int max;
+
+    public GoodDataInitializer(final int max) {
+      this.max = max;
+    }
+
+    @Override
+    public void setup() {
+      sizes = new int[1024];
+      offsets = new int[sizes.length];
+      ByteArrayOutputStream bos = new ByteArrayOutputStream();
+      Random r = new Random();
+      int len = 0;
+      for (int i = 0; i < sizes.length / 2; i++) {
+        int p = r.nextInt(max);
+        int n = -1 * (p - 1);
+        byte[] ps = String.format("%d", p).getBytes();
+        byte[] ns = String.format("%d", n).getBytes();
+        sizes[2 * i] = ps.length;
+        sizes[2 * i + 1] = ns.length;
+        offsets[2 * i] = len;
+        offsets[2 * i + 1] = len + ps.length;
+        len += ps.length + ns.length;
+        try {
+          bos.write(ns);
+          bos.write(ps);
+        } catch (IOException e) {
+          e.printStackTrace();
+          throw new RuntimeException(e);
+        }
+      }
+      ref.setData(bos.toByteArray());
+    }
+  }
+
+  public static class RandomLazyByte extends RandomDataInitializer {
+
+    public RandomLazyByte() {
+      super(2);
+    }
+
+    final LazyByte obj = new LazyByte(
+        LazyPrimitiveObjectInspectorFactory.LAZY_BYTE_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class WorstLazyByte extends RandomDataInitializer {
+
+    public WorstLazyByte() {
+      super(8);
+    }
+
+    final LazyByte obj = new LazyByte(
+        LazyPrimitiveObjectInspectorFactory.LAZY_BYTE_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class GoodLazyByte extends GoodDataInitializer {
+
+    final LazyByte obj = new LazyByte(
+        LazyPrimitiveObjectInspectorFactory.LAZY_BYTE_OBJECT_INSPECTOR);
+
+    public GoodLazyByte() {
+      super(Integer.MAX_VALUE);
+    }
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class RandomLazyShort extends RandomDataInitializer {
+
+    public RandomLazyShort() {
+      super(2);
+    }
+
+    final LazyShort obj = new LazyShort(
+        LazyPrimitiveObjectInspectorFactory.LAZY_SHORT_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class WorstLazyShort extends RandomDataInitializer {
+
+    public WorstLazyShort() {
+      super(8);
+    }
+
+    final LazyShort obj = new LazyShort(
+        LazyPrimitiveObjectInspectorFactory.LAZY_SHORT_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class GoodLazyShort extends GoodDataInitializer {
+
+    final LazyShort obj = new LazyShort(
+        LazyPrimitiveObjectInspectorFactory.LAZY_SHORT_OBJECT_INSPECTOR);
+
+    public GoodLazyShort() {
+      super(Integer.MAX_VALUE);
+    }
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class RandomLazyInteger extends RandomDataInitializer {
+
+    public RandomLazyInteger() {
+      super(2);
+    }
+
+    final LazyInteger obj = new LazyInteger(
+        LazyPrimitiveObjectInspectorFactory.LAZY_INT_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class WorstLazyInteger extends RandomDataInitializer {
+
+    public WorstLazyInteger() {
+      super(8);
+    }
+
+    final LazyInteger obj = new LazyInteger(
+        LazyPrimitiveObjectInspectorFactory.LAZY_INT_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class GoodLazyInteger extends GoodDataInitializer {
+
+    final LazyInteger obj = new LazyInteger(
+        LazyPrimitiveObjectInspectorFactory.LAZY_INT_OBJECT_INSPECTOR);
+
+    public GoodLazyInteger() {
+      super(Integer.MAX_VALUE);
+    }
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class RandomLazyFloat extends RandomDataInitializer {
+
+    public RandomLazyFloat() {
+      super(2);
+    }
+
+    final LazyFloat obj = new LazyFloat(
+        LazyPrimitiveObjectInspectorFactory.LAZY_FLOAT_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class WorstLazyFloat extends RandomDataInitializer {
+
+    public WorstLazyFloat() {
+      super(8);
+    }
+
+    final LazyFloat obj = new LazyFloat(
+        LazyPrimitiveObjectInspectorFactory.LAZY_FLOAT_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class GoodLazyFloat extends GoodDataInitializer {
+
+    final LazyFloat obj = new LazyFloat(
+        LazyPrimitiveObjectInspectorFactory.LAZY_FLOAT_OBJECT_INSPECTOR);
+
+    public GoodLazyFloat() {
+      super(Integer.MAX_VALUE);
+    }
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class RandomLazyLong extends RandomDataInitializer {
+
+    public RandomLazyLong() {
+      super(2);
+    }
+
+    final LazyLong obj = new LazyLong(
+        LazyPrimitiveObjectInspectorFactory.LAZY_LONG_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class WorstLazyLong extends RandomDataInitializer {
+
+    public WorstLazyLong() {
+      super(8);
+    }
+
+    final LazyLong obj = new LazyLong(
+        LazyPrimitiveObjectInspectorFactory.LAZY_LONG_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class GoodLazyLong extends GoodDataInitializer {
+
+    final LazyLong obj = new LazyLong(
+        LazyPrimitiveObjectInspectorFactory.LAZY_LONG_OBJECT_INSPECTOR);
+
+    public GoodLazyLong() {
+      super(Integer.MAX_VALUE);
+    }
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class RandomLazyDouble extends RandomDataInitializer {
+
+    public RandomLazyDouble() {
+      super(2);
+    }
+
+    final LazyDouble obj = new LazyDouble(
+        LazyPrimitiveObjectInspectorFactory.LAZY_DOUBLE_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class WorstLazyDouble extends RandomDataInitializer {
+
+    public WorstLazyDouble() {
+      super(8);
+    }
+
+    final LazyDouble obj = new LazyDouble(
+        LazyPrimitiveObjectInspectorFactory.LAZY_DOUBLE_OBJECT_INSPECTOR);
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static class GoodLazyDouble extends GoodDataInitializer {
+
+    final LazyDouble obj = new LazyDouble(
+        LazyPrimitiveObjectInspectorFactory.LAZY_DOUBLE_OBJECT_INSPECTOR);
+
+    public GoodLazyDouble() {
+      super(Integer.MAX_VALUE);
+    }
+
+    @Override
+    public void bench() {
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]);
+      }
+    }
+  }
+
+  public static void main(String[] args) throws RunnerException {
+    Options opt = new OptionsBuilder().include(
+        ".*" + LazySimpleSerDeBench.class.getSimpleName() + ".*").build();
+    new Runner(opt).run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java
index a3b8f76..1f9cead 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java
@@ -48,6 +48,10 @@ public class LazyByte extends
 
   @Override
   public void init(ByteArrayRef bytes, int start, int length) {
+    if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) {
+      isNull = true;
+      return;
+    }
     try {
       data.set(parseByte(bytes.getData(), start, length, 10));
       isNull = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java
index 05ca4e9..35c2141 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java
@@ -46,6 +46,10 @@ public class LazyDouble extends
   @Override
   public void init(ByteArrayRef bytes, int start, int length) {
     String byteData = null;
+    if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) {
+      isNull = true;
+      return;
+    }
     try {
       byteData = Text.decode(bytes.getData(), start, length);
       data.set(Double.parseDouble(byteData));

http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java
index 37676d1..6e132c7 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java
@@ -46,6 +46,10 @@ public class LazyFloat extends
   @Override
   public void init(ByteArrayRef bytes, int start, int length) {
     String byteData = null;
+    if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) {
+      isNull = true;
+      return;
+    }
     try {
       byteData = Text.decode(bytes.getData(), start, length);
       data.set(Float.parseFloat(byteData));

http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java
index ad82ebf..22742aa 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java
@@ -51,6 +51,10 @@ public class LazyInteger extends
 
   @Override
   public void init(ByteArrayRef bytes, int start, int length) {
+    if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) {
+      isNull = true;
+      return;
+    }
     try {
       data.set(parseInt(bytes.getData(), start, length, 10));
       isNull = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java
index a9779a0..c0d52b9 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java
@@ -51,6 +51,10 @@ public class LazyLong extends
 
   @Override
   public void init(ByteArrayRef bytes, int start, int length) {
+    if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) {
+      isNull = true;
+      return;
+    }
     try {
       data.set(parseLong(bytes.getData(), start, length, 10));
       isNull = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java
index f04e131..b8b9488 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java
@@ -48,6 +48,10 @@ public class LazyShort extends
 
   @Override
   public void init(ByteArrayRef bytes, int start, int length) {
+    if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) {
+      isNull = true;
+      return;
+    }
     try {
       data.set(parseShort(bytes.getData(), start, length));
       isNull = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/98049182/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
index 5c58f6b..a5e4be4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
@@ -81,6 +81,34 @@ public final class LazyUtils {
   }
 
   /**
+   * returns false, when the bytes definitely cannot be parsed into a base-10
+   * Number (Long or a Double)
+   * 
+   * If it returns true, the bytes might still be invalid, but not obviously.
+   */
+
+  public static boolean isNumberMaybe(byte[] buf, int offset, int len) {
+    switch (len) {
+    case 0:
+      return false;
+    case 1:
+      // space usually
+      return Character.isDigit(buf[offset]);
+    case 2:
+      // \N or -1 (allow latter)
+      return Character.isDigit(buf[offset + 1])
+          || Character.isDigit(buf[offset + 0]);
+    case 4:
+      // null or NULL
+      if (buf[offset] == 'N' || buf[offset] == 'n') {
+        return false;
+      }
+    }
+    // maybe valid - too expensive to check without a parse
+    return true;
+  }
+
+  /**
    * Returns -1 if the first byte sequence is lexicographically less than the
    * second; returns +1 if the second byte sequence is lexicographically less
    * than the first; otherwise return 0.