You are viewing a plain text version of this content. The canonical link for it is here.
Posted to log4j-dev@logging.apache.org by Gary Gregory <ga...@gmail.com> on 2015/10/06 01:31:43 UTC

Fwd: [1/2] logging-log4j2 git commit: LOG4J2-1151 updated: added exact port of JDK 8 logic for handling invalid input, renamed methods

I am worried and almost -1 here: It is OK to copy from OpenJDK but I bet
it's not OK to copy from Oracle's JRE... but how can we properly benchmark
then?

Gary

---------- Forwarded message ----------
From: <rp...@apache.org>
Date: Mon, Oct 5, 2015 at 4:14 PM
Subject: [1/2] logging-log4j2 git commit: LOG4J2-1151 updated: added exact
port of JDK 8 logic for handling invalid input, renamed methods
To: commits@logging.apache.org


Repository: logging-log4j2
Updated Branches:
  refs/heads/master 2612b61e6 -> 8b9d10095


LOG4J2-1151 updated: added exact port of JDK 8 logic for handling
invalid input, renamed methods

Project: http://git-wip-us.apache.org/repos/asf/logging-log4j2/repo
Commit:
http://git-wip-us.apache.org/repos/asf/logging-log4j2/commit/1360daa6
Tree: http://git-wip-us.apache.org/repos/asf/logging-log4j2/tree/1360daa6
Diff: http://git-wip-us.apache.org/repos/asf/logging-log4j2/diff/1360daa6

Branch: refs/heads/master
Commit: 1360daa694ed247b100d8d19336c92ecebd147ff
Parents: 9bc8144
Author: rpopma <rp...@apache.org>
Authored: Tue Oct 6 01:14:18 2015 +0200
Committer: rpopma <rp...@apache.org>
Committed: Tue Oct 6 01:14:18 2015 +0200

----------------------------------------------------------------------
 .../log4j/perf/jmh/StringEncodingBenchmark.java | 151 +++++++++++++++++--
 1 file changed, 137 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/1360daa6/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
----------------------------------------------------------------------
diff --git
a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
index 4159b2a..7e9459c 100644
---
a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
+++
b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
@@ -35,7 +35,7 @@ import org.openjdk.jmh.annotations.State;
  * Tests Log4j2 StringEncoding performance.
  */
 // ============================== HOW TO RUN THIS TEST:
====================================
-//(Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am )
+// (Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am )
 //
 // java -jar log4j-perf/target/benchmarks.jar ".*StringEncoding.*" -f 1
-wi 5 -i 10
 //
@@ -63,70 +63,70 @@ public class StringEncodingBenchmark {
     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytes() {
+    public byte[] defaultStringGetBytes() {
         return LOGMSG.getBytes();
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesString88591() throws Exception {
+    public byte[] iso8859_1StringGetBytesString() throws Exception {
         return LOGMSG.getBytes(STRING_ISO8859_1);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesCharSet88591() {
+    public byte[] iso8859_1StringGetBytesCharSet() {
         return LOGMSG.getBytes(CHARSET_ISO8859_1);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesStringUsAscii() throws Exception {
+    public byte[] usAsciiStringGetBytesString() throws Exception {
         return LOGMSG.getBytes(STRING_US_ASCII);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesCharSetUsAscii() {
+    public byte[] usAsciiStringGetBytesCharSet() {
         return LOGMSG.getBytes(CHARSET_US_ASCII);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesStringDefault() throws Exception {
+    public byte[] defaultStringGetBytesString() throws Exception {
         return LOGMSG.getBytes(DEFAULT_ENCODING);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesCharSetDefault() {
+    public byte[] defaultStringGetBytesCharSet() {
         return LOGMSG.getBytes(CHARSET_DEFAULT);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesStringShiftJIS() throws Exception {
+    public byte[] shiftJisStringGetBytesString() throws Exception {
         return LOGMSG.getBytes(STRING_SHIFT_JIS);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] stringGetBytesCharSetShiftJIS() {
+    public byte[] shiftJisStringGetBytesCharSet() {
         return LOGMSG.getBytes(CHARSET_SHIFT_JIS);
     }

     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] encoderShiftJIS() throws CharacterCodingException {
+    public byte[] shiftJisEncoder() throws CharacterCodingException {
         ByteBuffer buf = ENCODER_SHIFT_JIS.encode(CharBuffer.wrap(LOGMSG));
         return buf.array();
     }
@@ -134,7 +134,7 @@ public class StringEncodingBenchmark {
     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] encoderIso8859_1() throws CharacterCodingException {
+    public byte[] iso8859_1Encoder() throws CharacterCodingException {
         ByteBuffer buf = ENCODER_ISO8859_1.encode(CharBuffer.wrap(LOGMSG));
         return buf.array();
     }
@@ -142,12 +142,135 @@ public class StringEncodingBenchmark {
     @Benchmark
     @BenchmarkMode(Mode.SampleTime)
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
-    public byte[] customIso8859_1() throws CharacterCodingException {
+    public byte[] iso8859_1CustomCastToByte() throws
CharacterCodingException {
         final int length = LOGMSG.length();
         final byte[] result = new byte[length];
         for (int i = 0; i < length; i++) {
-            result[i] = (byte) LOGMSG.charAt(i);
+            final char c = LOGMSG.charAt(i);
+            result[i++] = (byte) c;
         }
         return result;
     }
+
+    @Benchmark
+    @BenchmarkMode(Mode.SampleTime)
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public byte[] iso8859_1CustomVerifyAndCast() throws
CharacterCodingException {
+        final int length = LOGMSG.length();
+        final byte[] result = new byte[length];
+        int j = 0;
+        for (int i = 0; i < length; i++) {
+            final char c = LOGMSG.charAt(i);
+            if (c <= 255) {
+                result[j++] = (byte) c;
+            } else {
+                i = nonIsoChar(LOGMSG, i);
+                result[j++] = (byte) '?';
+            }
+        }
+        return result;
+    }
+
+    private int nonIsoChar(String logmsg, int i) {
+        char c = logmsg.charAt(i++);
+        if ((Character.isHighSurrogate(c)) && (i < logmsg.length()) &&
(Character.isLowSurrogate(logmsg.charAt(i)))) {
+            i++;
+        }
+        return i;
+    }
+
+    @Benchmark
+    @BenchmarkMode(Mode.SampleTime)
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public byte[] iso8859_1CustomPortedJDK8() throws
CharacterCodingException {
+        final int length = LOGMSG.length();
+        final byte[] result = new byte[length];
+        encode(LOGMSG, 0, length, result);
+        return result;
+    }
+
+    private static int encodeISOArray(String charArray, int charIndex,
byte[] byteArray, int byteIndex, int length) {
+        int i = 0;
+        for (; i < length; i++) {
+            char c = charArray.charAt(charIndex++);
+            if (c > 255) {
+                break;
+            }
+            byteArray[(byteIndex++)] = ((byte) c);
+        }
+        return i;
+    }
+
+    private int encode(String charArray, int charOffset, int charLength,
byte[] byteArray) {
+        int offset = 0;
+        int length = Math.min(charLength, byteArray.length);
+        int charDoneIndex = charOffset + length;
+        while (charOffset < charDoneIndex) {
+            int m = encodeISOArray(charArray, charOffset, byteArray,
offset, length);
+            charOffset += m;
+            offset += m;
+            if (m != length) {
+                char c = charArray.charAt(charOffset++);
+                if ((Character.isHighSurrogate(c)) && (charOffset <
charDoneIndex)
+                        &&
(Character.isLowSurrogate(charArray.charAt(charOffset)))) {
+                    if (charLength > byteArray.length) {
+                        charDoneIndex++;
+                        charLength--;
+                    }
+                    charOffset++;
+                }
+                byteArray[(offset++)] = '?';
+                length = Math.min(charDoneIndex - charOffset,
byteArray.length - offset);
+            }
+        }
+        return offset;
+    }
+
+    @Benchmark
+    @BenchmarkMode(Mode.SampleTime)
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public byte[] iso8859_1CustomPortedJDK8CopyArray() throws
CharacterCodingException {
+        char[] charArray = LOGMSG.toCharArray();
+        final int length = charArray.length;
+        final byte[] result = new byte[length];
+        encode0(charArray, 0, length, result);
+        return result;
+    }
+
+    private static int encodeISOArray0(char[] charArray, int charIndex,
byte[] byteArray, int byteIndex, int length) {
+        int i = 0;
+        for (; i < length; i++) {
+            char c = charArray[(charIndex++)];
+            if (c > 255) {
+                break;
+            }
+            byteArray[(byteIndex++)] = ((byte) c);
+        }
+        return i;
+    }
+
+    private int encode0(char[] charArray, int charOffset, int charLength,
byte[] byteArray) {
+        int offset = 0;
+        int length = Math.min(charLength, byteArray.length);
+        int charDoneIndex = charOffset + length;
+        while (charOffset < charDoneIndex) {
+            int m = encodeISOArray0(charArray, charOffset, byteArray,
offset, length);
+            charOffset += m;
+            offset += m;
+            if (m != length) {
+                char c = charArray[(charOffset++)];
+                if ((Character.isHighSurrogate(c)) && (charOffset <
charDoneIndex)
+                        &&
(Character.isLowSurrogate(charArray[(charOffset)]))) {
+                    if (charLength > byteArray.length) {
+                        charDoneIndex++;
+                        charLength--;
+                    }
+                    charOffset++;
+                }
+                byteArray[(offset++)] = '?';
+                length = Math.min(charDoneIndex - charOffset,
byteArray.length - offset);
+            }
+        }
+        return offset;
+    }
 }




-- 
E-Mail: garydgregory@gmail.com | ggregory@apache.org
Java Persistence with Hibernate, Second Edition
<http://www.manning.com/bauer3/>
JUnit in Action, Second Edition <http://www.manning.com/tahchiev/>
Spring Batch in Action <http://www.manning.com/templier/>
Blog: http://garygregory.wordpress.com
Home: http://garygregory.com/
Tweet! http://twitter.com/GaryGregory

Re: [1/2] logging-log4j2 git commit: LOG4J2-1151 updated: added exact port of JDK 8 logic for handling invalid input, renamed methods

Posted by Remko Popma <re...@gmail.com>.
However, I used a tool so it might be a pretty close match to the original
source code, not sure. Also, the method names are unchanged.

On Tue, Oct 6, 2015 at 1:44 AM, Remko Popma <re...@gmail.com> wrote:

> This is not a copy of Oracle's source code but my interpretation of the
> relevant byte codes.
>
> On Tue, Oct 6, 2015 at 1:31 AM, Gary Gregory <ga...@gmail.com>
> wrote:
>
>> I am worried and almost -1 here: It is OK to copy from OpenJDK but I bet
>> it's not OK to copy from Oracle's JRE... but how can we properly benchmark
>> then?
>>
>> Gary
>>
>> ---------- Forwarded message ----------
>> From: <rp...@apache.org>
>> Date: Mon, Oct 5, 2015 at 4:14 PM
>> Subject: [1/2] logging-log4j2 git commit: LOG4J2-1151 updated: added
>> exact port of JDK 8 logic for handling invalid input, renamed methods
>> To: commits@logging.apache.org
>>
>>
>> Repository: logging-log4j2
>> Updated Branches:
>>   refs/heads/master 2612b61e6 -> 8b9d10095
>>
>>
>> LOG4J2-1151 updated: added exact port of JDK 8 logic for handling
>> invalid input, renamed methods
>>
>> Project: http://git-wip-us.apache.org/repos/asf/logging-log4j2/repo
>> Commit:
>> http://git-wip-us.apache.org/repos/asf/logging-log4j2/commit/1360daa6
>> Tree: http://git-wip-us.apache.org/repos/asf/logging-log4j2/tree/1360daa6
>> Diff: http://git-wip-us.apache.org/repos/asf/logging-log4j2/diff/1360daa6
>>
>> Branch: refs/heads/master
>> Commit: 1360daa694ed247b100d8d19336c92ecebd147ff
>> Parents: 9bc8144
>> Author: rpopma <rp...@apache.org>
>> Authored: Tue Oct 6 01:14:18 2015 +0200
>> Committer: rpopma <rp...@apache.org>
>> Committed: Tue Oct 6 01:14:18 2015 +0200
>>
>> ----------------------------------------------------------------------
>>  .../log4j/perf/jmh/StringEncodingBenchmark.java | 151 +++++++++++++++++--
>>  1 file changed, 137 insertions(+), 14 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>>
>> http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/1360daa6/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
>> ----------------------------------------------------------------------
>> diff --git
>> a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
>> b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
>> index 4159b2a..7e9459c 100644
>> ---
>> a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
>> +++
>> b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
>> @@ -35,7 +35,7 @@ import org.openjdk.jmh.annotations.State;
>>   * Tests Log4j2 StringEncoding performance.
>>   */
>>  // ============================== HOW TO RUN THIS TEST:
>> ====================================
>> -//(Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am )
>> +// (Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am )
>>  //
>>  // java -jar log4j-perf/target/benchmarks.jar ".*StringEncoding.*" -f 1
>> -wi 5 -i 10
>>  //
>> @@ -63,70 +63,70 @@ public class StringEncodingBenchmark {
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytes() {
>> +    public byte[] defaultStringGetBytes() {
>>          return LOGMSG.getBytes();
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesString88591() throws Exception {
>> +    public byte[] iso8859_1StringGetBytesString() throws Exception {
>>          return LOGMSG.getBytes(STRING_ISO8859_1);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesCharSet88591() {
>> +    public byte[] iso8859_1StringGetBytesCharSet() {
>>          return LOGMSG.getBytes(CHARSET_ISO8859_1);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesStringUsAscii() throws Exception {
>> +    public byte[] usAsciiStringGetBytesString() throws Exception {
>>          return LOGMSG.getBytes(STRING_US_ASCII);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesCharSetUsAscii() {
>> +    public byte[] usAsciiStringGetBytesCharSet() {
>>          return LOGMSG.getBytes(CHARSET_US_ASCII);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesStringDefault() throws Exception {
>> +    public byte[] defaultStringGetBytesString() throws Exception {
>>          return LOGMSG.getBytes(DEFAULT_ENCODING);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesCharSetDefault() {
>> +    public byte[] defaultStringGetBytesCharSet() {
>>          return LOGMSG.getBytes(CHARSET_DEFAULT);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesStringShiftJIS() throws Exception {
>> +    public byte[] shiftJisStringGetBytesString() throws Exception {
>>          return LOGMSG.getBytes(STRING_SHIFT_JIS);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] stringGetBytesCharSetShiftJIS() {
>> +    public byte[] shiftJisStringGetBytesCharSet() {
>>          return LOGMSG.getBytes(CHARSET_SHIFT_JIS);
>>      }
>>
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] encoderShiftJIS() throws CharacterCodingException {
>> +    public byte[] shiftJisEncoder() throws CharacterCodingException {
>>          ByteBuffer buf =
>> ENCODER_SHIFT_JIS.encode(CharBuffer.wrap(LOGMSG));
>>          return buf.array();
>>      }
>> @@ -134,7 +134,7 @@ public class StringEncodingBenchmark {
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] encoderIso8859_1() throws CharacterCodingException {
>> +    public byte[] iso8859_1Encoder() throws CharacterCodingException {
>>          ByteBuffer buf =
>> ENCODER_ISO8859_1.encode(CharBuffer.wrap(LOGMSG));
>>          return buf.array();
>>      }
>> @@ -142,12 +142,135 @@ public class StringEncodingBenchmark {
>>      @Benchmark
>>      @BenchmarkMode(Mode.SampleTime)
>>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> -    public byte[] customIso8859_1() throws CharacterCodingException {
>> +    public byte[] iso8859_1CustomCastToByte() throws
>> CharacterCodingException {
>>          final int length = LOGMSG.length();
>>          final byte[] result = new byte[length];
>>          for (int i = 0; i < length; i++) {
>> -            result[i] = (byte) LOGMSG.charAt(i);
>> +            final char c = LOGMSG.charAt(i);
>> +            result[i++] = (byte) c;
>>          }
>>          return result;
>>      }
>> +
>> +    @Benchmark
>> +    @BenchmarkMode(Mode.SampleTime)
>> +    @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> +    public byte[] iso8859_1CustomVerifyAndCast() throws
>> CharacterCodingException {
>> +        final int length = LOGMSG.length();
>> +        final byte[] result = new byte[length];
>> +        int j = 0;
>> +        for (int i = 0; i < length; i++) {
>> +            final char c = LOGMSG.charAt(i);
>> +            if (c <= 255) {
>> +                result[j++] = (byte) c;
>> +            } else {
>> +                i = nonIsoChar(LOGMSG, i);
>> +                result[j++] = (byte) '?';
>> +            }
>> +        }
>> +        return result;
>> +    }
>> +
>> +    private int nonIsoChar(String logmsg, int i) {
>> +        char c = logmsg.charAt(i++);
>> +        if ((Character.isHighSurrogate(c)) && (i < logmsg.length()) &&
>> (Character.isLowSurrogate(logmsg.charAt(i)))) {
>> +            i++;
>> +        }
>> +        return i;
>> +    }
>> +
>> +    @Benchmark
>> +    @BenchmarkMode(Mode.SampleTime)
>> +    @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> +    public byte[] iso8859_1CustomPortedJDK8() throws
>> CharacterCodingException {
>> +        final int length = LOGMSG.length();
>> +        final byte[] result = new byte[length];
>> +        encode(LOGMSG, 0, length, result);
>> +        return result;
>> +    }
>> +
>> +    private static int encodeISOArray(String charArray, int charIndex,
>> byte[] byteArray, int byteIndex, int length) {
>> +        int i = 0;
>> +        for (; i < length; i++) {
>> +            char c = charArray.charAt(charIndex++);
>> +            if (c > 255) {
>> +                break;
>> +            }
>> +            byteArray[(byteIndex++)] = ((byte) c);
>> +        }
>> +        return i;
>> +    }
>> +
>> +    private int encode(String charArray, int charOffset, int charLength,
>> byte[] byteArray) {
>> +        int offset = 0;
>> +        int length = Math.min(charLength, byteArray.length);
>> +        int charDoneIndex = charOffset + length;
>> +        while (charOffset < charDoneIndex) {
>> +            int m = encodeISOArray(charArray, charOffset, byteArray,
>> offset, length);
>> +            charOffset += m;
>> +            offset += m;
>> +            if (m != length) {
>> +                char c = charArray.charAt(charOffset++);
>> +                if ((Character.isHighSurrogate(c)) && (charOffset <
>> charDoneIndex)
>> +                        &&
>> (Character.isLowSurrogate(charArray.charAt(charOffset)))) {
>> +                    if (charLength > byteArray.length) {
>> +                        charDoneIndex++;
>> +                        charLength--;
>> +                    }
>> +                    charOffset++;
>> +                }
>> +                byteArray[(offset++)] = '?';
>> +                length = Math.min(charDoneIndex - charOffset,
>> byteArray.length - offset);
>> +            }
>> +        }
>> +        return offset;
>> +    }
>> +
>> +    @Benchmark
>> +    @BenchmarkMode(Mode.SampleTime)
>> +    @OutputTimeUnit(TimeUnit.NANOSECONDS)
>> +    public byte[] iso8859_1CustomPortedJDK8CopyArray() throws
>> CharacterCodingException {
>> +        char[] charArray = LOGMSG.toCharArray();
>> +        final int length = charArray.length;
>> +        final byte[] result = new byte[length];
>> +        encode0(charArray, 0, length, result);
>> +        return result;
>> +    }
>> +
>> +    private static int encodeISOArray0(char[] charArray, int charIndex,
>> byte[] byteArray, int byteIndex, int length) {
>> +        int i = 0;
>> +        for (; i < length; i++) {
>> +            char c = charArray[(charIndex++)];
>> +            if (c > 255) {
>> +                break;
>> +            }
>> +            byteArray[(byteIndex++)] = ((byte) c);
>> +        }
>> +        return i;
>> +    }
>> +
>> +    private int encode0(char[] charArray, int charOffset, int
>> charLength, byte[] byteArray) {
>> +        int offset = 0;
>> +        int length = Math.min(charLength, byteArray.length);
>> +        int charDoneIndex = charOffset + length;
>> +        while (charOffset < charDoneIndex) {
>> +            int m = encodeISOArray0(charArray, charOffset, byteArray,
>> offset, length);
>> +            charOffset += m;
>> +            offset += m;
>> +            if (m != length) {
>> +                char c = charArray[(charOffset++)];
>> +                if ((Character.isHighSurrogate(c)) && (charOffset <
>> charDoneIndex)
>> +                        &&
>> (Character.isLowSurrogate(charArray[(charOffset)]))) {
>> +                    if (charLength > byteArray.length) {
>> +                        charDoneIndex++;
>> +                        charLength--;
>> +                    }
>> +                    charOffset++;
>> +                }
>> +                byteArray[(offset++)] = '?';
>> +                length = Math.min(charDoneIndex - charOffset,
>> byteArray.length - offset);
>> +            }
>> +        }
>> +        return offset;
>> +    }
>>  }
>>
>>
>>
>>
>> --
>> E-Mail: garydgregory@gmail.com | ggregory@apache.org
>> Java Persistence with Hibernate, Second Edition
>> <http://www.manning.com/bauer3/>
>> JUnit in Action, Second Edition <http://www.manning.com/tahchiev/>
>> Spring Batch in Action <http://www.manning.com/templier/>
>> Blog: http://garygregory.wordpress.com
>> Home: http://garygregory.com/
>> Tweet! http://twitter.com/GaryGregory
>>
>
>

Re: [1/2] logging-log4j2 git commit: LOG4J2-1151 updated: added exact port of JDK 8 logic for handling invalid input, renamed methods

Posted by Remko Popma <re...@gmail.com>.
This is not a copy of Oracle's source code but my interpretation of the
relevant byte codes.

On Tue, Oct 6, 2015 at 1:31 AM, Gary Gregory <ga...@gmail.com> wrote:

> I am worried and almost -1 here: It is OK to copy from OpenJDK but I bet
> it's not OK to copy from Oracle's JRE... but how can we properly benchmark
> then?
>
> Gary
>
> ---------- Forwarded message ----------
> From: <rp...@apache.org>
> Date: Mon, Oct 5, 2015 at 4:14 PM
> Subject: [1/2] logging-log4j2 git commit: LOG4J2-1151 updated: added exact
> port of JDK 8 logic for handling invalid input, renamed methods
> To: commits@logging.apache.org
>
>
> Repository: logging-log4j2
> Updated Branches:
>   refs/heads/master 2612b61e6 -> 8b9d10095
>
>
> LOG4J2-1151 updated: added exact port of JDK 8 logic for handling
> invalid input, renamed methods
>
> Project: http://git-wip-us.apache.org/repos/asf/logging-log4j2/repo
> Commit:
> http://git-wip-us.apache.org/repos/asf/logging-log4j2/commit/1360daa6
> Tree: http://git-wip-us.apache.org/repos/asf/logging-log4j2/tree/1360daa6
> Diff: http://git-wip-us.apache.org/repos/asf/logging-log4j2/diff/1360daa6
>
> Branch: refs/heads/master
> Commit: 1360daa694ed247b100d8d19336c92ecebd147ff
> Parents: 9bc8144
> Author: rpopma <rp...@apache.org>
> Authored: Tue Oct 6 01:14:18 2015 +0200
> Committer: rpopma <rp...@apache.org>
> Committed: Tue Oct 6 01:14:18 2015 +0200
>
> ----------------------------------------------------------------------
>  .../log4j/perf/jmh/StringEncodingBenchmark.java | 151 +++++++++++++++++--
>  1 file changed, 137 insertions(+), 14 deletions(-)
> ----------------------------------------------------------------------
>
>
>
> http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/1360daa6/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
> ----------------------------------------------------------------------
> diff --git
> a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
> b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
> index 4159b2a..7e9459c 100644
> ---
> a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
> +++
> b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java
> @@ -35,7 +35,7 @@ import org.openjdk.jmh.annotations.State;
>   * Tests Log4j2 StringEncoding performance.
>   */
>  // ============================== HOW TO RUN THIS TEST:
> ====================================
> -//(Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am )
> +// (Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am )
>  //
>  // java -jar log4j-perf/target/benchmarks.jar ".*StringEncoding.*" -f 1
> -wi 5 -i 10
>  //
> @@ -63,70 +63,70 @@ public class StringEncodingBenchmark {
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytes() {
> +    public byte[] defaultStringGetBytes() {
>          return LOGMSG.getBytes();
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesString88591() throws Exception {
> +    public byte[] iso8859_1StringGetBytesString() throws Exception {
>          return LOGMSG.getBytes(STRING_ISO8859_1);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesCharSet88591() {
> +    public byte[] iso8859_1StringGetBytesCharSet() {
>          return LOGMSG.getBytes(CHARSET_ISO8859_1);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesStringUsAscii() throws Exception {
> +    public byte[] usAsciiStringGetBytesString() throws Exception {
>          return LOGMSG.getBytes(STRING_US_ASCII);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesCharSetUsAscii() {
> +    public byte[] usAsciiStringGetBytesCharSet() {
>          return LOGMSG.getBytes(CHARSET_US_ASCII);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesStringDefault() throws Exception {
> +    public byte[] defaultStringGetBytesString() throws Exception {
>          return LOGMSG.getBytes(DEFAULT_ENCODING);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesCharSetDefault() {
> +    public byte[] defaultStringGetBytesCharSet() {
>          return LOGMSG.getBytes(CHARSET_DEFAULT);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesStringShiftJIS() throws Exception {
> +    public byte[] shiftJisStringGetBytesString() throws Exception {
>          return LOGMSG.getBytes(STRING_SHIFT_JIS);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] stringGetBytesCharSetShiftJIS() {
> +    public byte[] shiftJisStringGetBytesCharSet() {
>          return LOGMSG.getBytes(CHARSET_SHIFT_JIS);
>      }
>
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] encoderShiftJIS() throws CharacterCodingException {
> +    public byte[] shiftJisEncoder() throws CharacterCodingException {
>          ByteBuffer buf =
> ENCODER_SHIFT_JIS.encode(CharBuffer.wrap(LOGMSG));
>          return buf.array();
>      }
> @@ -134,7 +134,7 @@ public class StringEncodingBenchmark {
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] encoderIso8859_1() throws CharacterCodingException {
> +    public byte[] iso8859_1Encoder() throws CharacterCodingException {
>          ByteBuffer buf =
> ENCODER_ISO8859_1.encode(CharBuffer.wrap(LOGMSG));
>          return buf.array();
>      }
> @@ -142,12 +142,135 @@ public class StringEncodingBenchmark {
>      @Benchmark
>      @BenchmarkMode(Mode.SampleTime)
>      @OutputTimeUnit(TimeUnit.NANOSECONDS)
> -    public byte[] customIso8859_1() throws CharacterCodingException {
> +    public byte[] iso8859_1CustomCastToByte() throws
> CharacterCodingException {
>          final int length = LOGMSG.length();
>          final byte[] result = new byte[length];
>          for (int i = 0; i < length; i++) {
> -            result[i] = (byte) LOGMSG.charAt(i);
> +            final char c = LOGMSG.charAt(i);
> +            result[i++] = (byte) c;
>          }
>          return result;
>      }
> +
> +    @Benchmark
> +    @BenchmarkMode(Mode.SampleTime)
> +    @OutputTimeUnit(TimeUnit.NANOSECONDS)
> +    public byte[] iso8859_1CustomVerifyAndCast() throws
> CharacterCodingException {
> +        final int length = LOGMSG.length();
> +        final byte[] result = new byte[length];
> +        int j = 0;
> +        for (int i = 0; i < length; i++) {
> +            final char c = LOGMSG.charAt(i);
> +            if (c <= 255) {
> +                result[j++] = (byte) c;
> +            } else {
> +                i = nonIsoChar(LOGMSG, i);
> +                result[j++] = (byte) '?';
> +            }
> +        }
> +        return result;
> +    }
> +
> +    private int nonIsoChar(String logmsg, int i) {
> +        char c = logmsg.charAt(i++);
> +        if ((Character.isHighSurrogate(c)) && (i < logmsg.length()) &&
> (Character.isLowSurrogate(logmsg.charAt(i)))) {
> +            i++;
> +        }
> +        return i;
> +    }
> +
> +    @Benchmark
> +    @BenchmarkMode(Mode.SampleTime)
> +    @OutputTimeUnit(TimeUnit.NANOSECONDS)
> +    public byte[] iso8859_1CustomPortedJDK8() throws
> CharacterCodingException {
> +        final int length = LOGMSG.length();
> +        final byte[] result = new byte[length];
> +        encode(LOGMSG, 0, length, result);
> +        return result;
> +    }
> +
> +    private static int encodeISOArray(String charArray, int charIndex,
> byte[] byteArray, int byteIndex, int length) {
> +        int i = 0;
> +        for (; i < length; i++) {
> +            char c = charArray.charAt(charIndex++);
> +            if (c > 255) {
> +                break;
> +            }
> +            byteArray[(byteIndex++)] = ((byte) c);
> +        }
> +        return i;
> +    }
> +
> +    private int encode(String charArray, int charOffset, int charLength,
> byte[] byteArray) {
> +        int offset = 0;
> +        int length = Math.min(charLength, byteArray.length);
> +        int charDoneIndex = charOffset + length;
> +        while (charOffset < charDoneIndex) {
> +            int m = encodeISOArray(charArray, charOffset, byteArray,
> offset, length);
> +            charOffset += m;
> +            offset += m;
> +            if (m != length) {
> +                char c = charArray.charAt(charOffset++);
> +                if ((Character.isHighSurrogate(c)) && (charOffset <
> charDoneIndex)
> +                        &&
> (Character.isLowSurrogate(charArray.charAt(charOffset)))) {
> +                    if (charLength > byteArray.length) {
> +                        charDoneIndex++;
> +                        charLength--;
> +                    }
> +                    charOffset++;
> +                }
> +                byteArray[(offset++)] = '?';
> +                length = Math.min(charDoneIndex - charOffset,
> byteArray.length - offset);
> +            }
> +        }
> +        return offset;
> +    }
> +
> +    @Benchmark
> +    @BenchmarkMode(Mode.SampleTime)
> +    @OutputTimeUnit(TimeUnit.NANOSECONDS)
> +    public byte[] iso8859_1CustomPortedJDK8CopyArray() throws
> CharacterCodingException {
> +        char[] charArray = LOGMSG.toCharArray();
> +        final int length = charArray.length;
> +        final byte[] result = new byte[length];
> +        encode0(charArray, 0, length, result);
> +        return result;
> +    }
> +
> +    private static int encodeISOArray0(char[] charArray, int charIndex,
> byte[] byteArray, int byteIndex, int length) {
> +        int i = 0;
> +        for (; i < length; i++) {
> +            char c = charArray[(charIndex++)];
> +            if (c > 255) {
> +                break;
> +            }
> +            byteArray[(byteIndex++)] = ((byte) c);
> +        }
> +        return i;
> +    }
> +
> +    private int encode0(char[] charArray, int charOffset, int charLength,
> byte[] byteArray) {
> +        int offset = 0;
> +        int length = Math.min(charLength, byteArray.length);
> +        int charDoneIndex = charOffset + length;
> +        while (charOffset < charDoneIndex) {
> +            int m = encodeISOArray0(charArray, charOffset, byteArray,
> offset, length);
> +            charOffset += m;
> +            offset += m;
> +            if (m != length) {
> +                char c = charArray[(charOffset++)];
> +                if ((Character.isHighSurrogate(c)) && (charOffset <
> charDoneIndex)
> +                        &&
> (Character.isLowSurrogate(charArray[(charOffset)]))) {
> +                    if (charLength > byteArray.length) {
> +                        charDoneIndex++;
> +                        charLength--;
> +                    }
> +                    charOffset++;
> +                }
> +                byteArray[(offset++)] = '?';
> +                length = Math.min(charDoneIndex - charOffset,
> byteArray.length - offset);
> +            }
> +        }
> +        return offset;
> +    }
>  }
>
>
>
>
> --
> E-Mail: garydgregory@gmail.com | ggregory@apache.org
> Java Persistence with Hibernate, Second Edition
> <http://www.manning.com/bauer3/>
> JUnit in Action, Second Edition <http://www.manning.com/tahchiev/>
> Spring Batch in Action <http://www.manning.com/templier/>
> Blog: http://garygregory.wordpress.com
> Home: http://garygregory.com/
> Tweet! http://twitter.com/GaryGregory
>