You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dubbo.apache.org by wo...@apache.org on 2021/01/03 09:41:46 UTC

[dubbo-go-hessian2] branch fix-emoji created (now cc4f9a4)

This is an automated email from the ASF dual-hosted git repository.

wongoo pushed a change to branch fix-emoji
in repository https://gitbox.apache.org/repos/asf/dubbo-go-hessian2.git.


      at cc4f9a4  add benchmark for string decode

This branch includes the following new commits:

     new 33721e1  recover string decode algorithm to support emoji codec
     new f567f91  refactor string decode algorithm
     new cc4f9a4  add benchmark for string decode

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[dubbo-go-hessian2] 02/03: refactor string decode algorithm

Posted by wo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

wongoo pushed a commit to branch fix-emoji
in repository https://gitbox.apache.org/repos/asf/dubbo-go-hessian2.git

commit f567f918dd928cbba6206b14b2b060db7b4a9c3c
Author: wangoo <wo...@apache.org>
AuthorDate: Sun Jan 3 17:02:58 2021 +0800

    refactor string decode algorithm
---
 string.go      | 247 ++++++++++++++++++++++++++++++++-------------------------
 string_test.go |  16 ++++
 2 files changed, 154 insertions(+), 109 deletions(-)

diff --git a/string.go b/string.go
index 8702b7d..e01e83e 100644
--- a/string.go
+++ b/string.go
@@ -88,59 +88,6 @@ func encodeUcs2Rune(b []byte, ch uint32) int {
 	return 3
 }
 
-func decodeUcs4Rune(r *bufio.Reader) (c rune, cLen, bLen int, err error) {
-	c1, n1, err1 := decodeUcs2Rune(r)
-	if err1 != nil {
-		return c1, 0, n1, err1
-	}
-
-	if c1 >= 0xD800 && c1 <= 0xDBFF {
-		c2, n2, err2 := decodeUcs2Rune(r)
-		if err2 != nil {
-			return c2, 0, n2, err2
-		}
-
-		c := (c1-0xD800)<<10 + (c2 - 0xDC00) + 0x10000
-		return c, 2, n1 + n2, nil
-	}
-
-	return c1, 1, n1, nil
-}
-
-func decodeUcs2Rune(r *bufio.Reader) (rune, int, error) {
-	ch, err := r.ReadByte()
-	if err != nil {
-		return utf8.RuneError, 1, err
-	}
-
-	if ch < 0x80 {
-		return rune(ch), 1, nil
-	}
-
-	if (ch & 0xe0) == 0xc0 {
-		ch1, err := r.ReadByte()
-		if err != nil {
-			return utf8.RuneError, 2, err
-		}
-		return rune(((uint32(ch) & 0x1f) << 6) + (uint32(ch1) & 0x3f)), 2, nil
-	}
-
-	if (ch & 0xf0) == 0xe0 {
-		ch1, err := r.ReadByte()
-		if err != nil {
-			return utf8.RuneError, 2, err
-		}
-		ch2, err := r.ReadByte()
-		if err != nil {
-			return utf8.RuneError, 3, err
-		}
-		c := ((uint32(ch) & 0x0f) << 12) + ((uint32(ch1) & 0x3f) << 6) + (uint32(ch2) & 0x3f)
-		return rune(c), 3, nil
-	}
-
-	return utf8.RuneError, 0, fmt.Errorf("bad utf-8 encoding at %x", ch)
-}
-
 // # UTF-8 encoded character string split into 64k chunks
 // ::= x52 b1 b0 <utf8-data> string  # non-final chunk
 // ::= 'S' b1 b0 <utf8-data>         # string of length 0-65535
@@ -255,11 +202,8 @@ func (d *Decoder) getStringLength(tag byte) (int, error) {
 
 func (d *Decoder) decString(flag int32) (string, error) {
 	var (
-		tag       byte
-		charTotal int
-		last      bool
-		s         string
-		r         rune
+		tag byte
+		s   string
 	)
 
 	if flag != TAG_READ {
@@ -308,75 +252,160 @@ func (d *Decoder) decString(flag int32) (string, error) {
 		(tag >= 0x30 && tag <= 0x33) ||
 		(tag == BC_STRING_CHUNK || tag == BC_STRING) {
 
-		if tag == BC_STRING_CHUNK {
-			last = false
-		} else {
-			last = true
+		if tag != BC_STRING_CHUNK {
+			data, err := d.readStringChunkData(tag)
+			if err != nil {
+				return "", err
+			}
+			return *(*string)(unsafe.Pointer(&data)), nil
 		}
 
-		l, err := d.getStringLength(tag)
-		if err != nil {
-			return s, perrors.WithStack(err)
-		}
-		charTotal = l
-		charCount := 0
+		var chunkDataSlice [][]byte
+		dataLength := 0
 
-		runeData := make([]rune, charTotal)
-		runeIndex := 0
+		for {
+			data, err := d.readStringChunkData(tag)
+			if err != nil {
+				return "", err
+			}
 
-		byteCount := 0
-		byteLen := 0
-		charLen := 0
+			chunkDataSlice = append(chunkDataSlice, data)
+			dataLength += len(data)
 
-		for {
-			if charCount == charTotal {
-				if last {
-					return string(runeData[:runeIndex]), nil
+			// last chunk
+			if tag != BC_STRING_CHUNK {
+				allData := make([]byte, dataLength)
+				index := 0
+				for _, b := range chunkDataSlice {
+					copy(allData[index:], b)
+					index += len(b)
 				}
+				return *(*string)(unsafe.Pointer(&allData)), nil
+			}
 
-				b, _ := d.ReadByte()
-				switch {
-				case (tag >= BC_STRING_DIRECT && tag <= STRING_DIRECT_MAX) ||
-					(tag >= 0x30 && tag <= 0x33) ||
-					(tag == BC_STRING_CHUNK || tag == BC_STRING):
+			// read next string chunk tag
+			tag, _ = d.ReadByte()
+			switch {
+			case (tag >= BC_STRING_DIRECT && tag <= STRING_DIRECT_MAX) ||
+				(tag >= 0x30 && tag <= 0x33) ||
+				(tag == BC_STRING_CHUNK || tag == BC_STRING):
 
-					if b == BC_STRING_CHUNK {
-						last = false
-					} else {
-						last = true
-					}
+			default:
+				return s, perrors.New("expect string tag")
+			}
+		}
 
-					l, err := d.getStringLength(b)
-					if err != nil {
-						return s, perrors.WithStack(err)
-					}
-					charTotal += l
-					bs := make([]rune, charTotal)
-					copy(bs, runeData)
-					runeData = bs
+	}
 
-				default:
-					return s, perrors.New("expect string tag")
-				}
+	return s, perrors.Errorf("unknown string tag %#x\n", tag)
+}
+
+// readStringChunkData read one string chunk data as a utf8 buffer
+func (d *Decoder) readStringChunkData(tag byte) ([]byte, error) {
+	charTotal, err := d.getStringLength(tag)
+	if err != nil {
+		return nil, perrors.WithStack(err)
+	}
+
+	data := make([]byte, charTotal*3)
+
+	start := 0
+	end := 0
+
+	charCount := 0
+	charRead := 0
+
+	for charCount < charTotal {
+		_, err = io.ReadFull(d.reader, data[end:end+charTotal-charCount])
+		if err != nil {
+			return nil, err
+		}
+
+		end += charTotal - charCount
+
+		start, end, charRead, err = decode2utf8(d.reader, data, start, end)
+		if err != nil {
+			return nil, err
+		}
+
+		charCount += charRead
+	}
+
+	return data[:end], nil
+}
+
+// decode2utf8 decode hessian2 buffer to utf8 buffer
+// parameters:
+// - r : the input buffer
+// - data: the buffer already read
+// - start: the decoding index
+// - end: the already read buffer index
+// response: updated start, updated end, read char count, error.
+func decode2utf8(r *bufio.Reader, data []byte, start, end int) (int, int, int, error) {
+	var err error
+
+	charCount := 0
+
+	for start < end {
+		ch := data[start]
+		if ch < 0x80 {
+			start++
+			charCount++
+			continue
+		}
+
+		if start+1 == end {
+			data[end], err = r.ReadByte()
+			if err != nil {
+				return start, end, 0, err
 			}
+			end++
+		}
 
-			r, charLen, byteLen, err = decodeUcs4Rune(d.reader)
+		if (ch & 0xe0) == 0xc0 {
+			start += 2
+			charCount++
+			continue
+		}
+
+		if start+2 == end {
+			data[end], err = r.ReadByte()
 			if err != nil {
-				if err == io.EOF {
-					break
-				}
-				return s, perrors.WithStack(err)
+				return start, end, 0, err
 			}
+			end++
+		}
 
-			runeData[runeIndex] = r
-			runeIndex++
+		if (ch & 0xf0) == 0xe0 {
+			c1 := ((uint32(ch) & 0x0f) << 12) + ((uint32(data[start+1]) & 0x3f) << 6) + (uint32(data[start+2]) & 0x3f)
 
-			charCount += charLen
-			byteCount += byteLen
+			if c1 >= 0xD800 && c1 <= 0xDBFF {
+				if start+6 >= end {
+					_, err = io.ReadFull(r, data[end:start+6])
+					if err != nil {
+						return start, end, 0, err
+					}
+					end = start + 6
+				}
+
+				c2 := ((uint32(data[start+3]) & 0x0f) << 12) + ((uint32(data[start+4]) & 0x3f) << 6) + (uint32(data[start+5]) & 0x3f)
+				c := (c1-0xD800)<<10 + (c2 - 0xDC00) + 0x10000
+
+				n := utf8.EncodeRune(data[start:], rune(c))
+				copy(data[start+n:], data[start+6:end])
+				start, end = start+n, end-6+n
+
+				charCount += 2
+				continue
+			}
+
+			start += 3
+			charCount++
+			continue
 		}
 
-		return string(runeData[:runeIndex]), nil
+		return start, end, 0, fmt.Errorf("bad utf-8 encoding at %x", ch)
 	}
 
-	return s, perrors.Errorf("unknown string tag %#x\n", tag)
+	return start, end, charCount, nil
 }
diff --git a/string_test.go b/string_test.go
index be3ac73..de945f2 100644
--- a/string_test.go
+++ b/string_test.go
@@ -19,6 +19,7 @@ package hessian
 
 import (
 	"fmt"
+	"strings"
 	"sync"
 	"testing"
 )
@@ -212,3 +213,18 @@ func TestStringComplex(t *testing.T) {
 	testDecodeFramework(t, "customReplyComplexString", s0)
 	testJavaDecode(t, "customArgComplexString", s0)
 }
+
+func BenchmarkDecodeString(b *testing.B) {
+	s := "β„οΈπŸš«πŸš«πŸš«πŸš« 倚欑θ‡ͺζˆ‘δ»‹η»γ€δ»»εŠ‘γ€εŠ¨ζ€ε’Œ"
+	s = strings.Repeat(s, 4096)
+
+	e := NewEncoder()
+	_ = e.Encode(s)
+	buf := e.buffer
+
+	d := NewDecoder(buf)
+	for i := 0; i < b.N; i++ {
+		d.Reset(buf)
+		_, _ = d.Decode()
+	}
+}


[dubbo-go-hessian2] 01/03: recover string decode algorithm to support emoji codec

Posted by wo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

wongoo pushed a commit to branch fix-emoji
in repository https://gitbox.apache.org/repos/asf/dubbo-go-hessian2.git

commit 33721e112570ded26ccdf5761b441c9b1738c98a
Author: wangoo <wo...@apache.org>
AuthorDate: Sun Jan 3 11:22:34 2021 +0800

    recover string decode algorithm to support emoji codec
---
 string.go                                          | 263 +++------------------
 string_test.go                                     |   9 +
 .../src/main/java/test/TestCustomDecode.java       |   7 +-
 .../src/main/java/test/TestCustomReply.java        |   5 +
 test_hessian/src/main/java/test/TestString.java    |   7 +
 5 files changed, 56 insertions(+), 235 deletions(-)

diff --git a/string.go b/string.go
index 987a6ac..8702b7d 100644
--- a/string.go
+++ b/string.go
@@ -255,9 +255,11 @@ func (d *Decoder) getStringLength(tag byte) (int, error) {
 
 func (d *Decoder) decString(flag int32) (string, error) {
 	var (
-		tag  byte
-		last bool
-		s    string
+		tag       byte
+		charTotal int
+		last      bool
+		s         string
+		r         rune
 	)
 
 	if flag != TAG_READ {
@@ -312,18 +314,24 @@ func (d *Decoder) decString(flag int32) (string, error) {
 			last = true
 		}
 
-		chunkLen, err := d.getStringLength(tag)
+		l, err := d.getStringLength(tag)
 		if err != nil {
 			return s, perrors.WithStack(err)
 		}
-		bytesBuf := make([]byte, chunkLen<<2)
-		offset := 0
+		charTotal = l
+		charCount := 0
+
+		runeData := make([]rune, charTotal)
+		runeIndex := 0
+
+		byteCount := 0
+		byteLen := 0
+		charLen := 0
 
 		for {
-			if chunkLen <= 0 {
+			if charCount == charTotal {
 				if last {
-					b := bytesBuf[:offset]
-					return *(*string)(unsafe.Pointer(&b)), nil
+					return string(runeData[:runeIndex]), nil
 				}
 
 				b, _ := d.ReadByte()
@@ -338,190 +346,21 @@ func (d *Decoder) decString(flag int32) (string, error) {
 						last = true
 					}
 
-					chunkLen, err = d.getStringLength(b)
+					l, err := d.getStringLength(b)
 					if err != nil {
 						return s, perrors.WithStack(err)
 					}
-					remain, cap := len(bytesBuf)-offset, chunkLen<<2
-					if remain < cap {
-						grow := len(bytesBuf) + cap
-						bs := make([]byte, grow)
-						copy(bs, bytesBuf)
-						bytesBuf = bs
-					}
+					charTotal += l
+					bs := make([]rune, charTotal)
+					copy(bs, runeData)
+					runeData = bs
+
 				default:
 					return s, perrors.New("expect string tag")
 				}
 			}
 
-			if chunkLen > 0 {
-				nread, err := d.next(bytesBuf[offset : offset+chunkLen])
-				if err != nil {
-					if err == io.EOF {
-						break
-					}
-					return s, perrors.WithStack(err)
-				}
-
-				// quickly detect the actual number of bytes
-				prev, i := offset, offset
-				len := offset + nread
-				copied := false
-				for r, r1 := len-1, len-2; i < len; chunkLen-- {
-					ch := bytesBuf[offset]
-					if ch < 0x80 {
-						i++
-						offset++
-					} else if (ch & 0xe0) == 0xc0 {
-						i += 2
-						offset += 2
-					} else if (ch & 0xf0) == 0xe0 {
-						// handle the 3-byte right edge
-						// case:
-						// 1. Expect 3 bytes, but the current byte is on the right
-						// 2. Expect 3 bytes, but the current byte is second to last to the right
-						if i == r {
-							bytesBuf[i+1], err = d.reader.ReadByte()
-							if err != nil {
-								return s, perrors.WithStack(err)
-							}
-							bytesBuf[i+2], err = d.reader.ReadByte()
-							if err != nil {
-								return s, perrors.WithStack(err)
-							}
-							nread += 2
-							len += 2
-						} else if i == r1 {
-							bytesBuf[i+2], err = d.reader.ReadByte()
-							if err != nil {
-								return s, perrors.WithStack(err)
-							}
-							nread++
-							len++
-						}
-
-						// we detect emoji first
-						c1 := ((uint32(ch) & 0x0f) << 12) + ((uint32(bytesBuf[i+1]) & 0x3f) << 6) + (uint32(bytesBuf[i+2]) & 0x3f)
-						if c1 >= 0xD800 && c1 <= 0xDBFF {
-
-							var (
-								c2  rune
-								n2  int
-								err error
-								ch0 byte
-							)
-
-							// more cache byte available
-							if i+3 < len {
-								ch0 = bytesBuf[i+3]
-							} else {
-								ch0, err = d.reader.ReadByte()
-								if err != nil {
-									return s, perrors.WithStack(err)
-								}
-								// update accumulates read bytes,
-								// because it reads more than thunk bytes
-								nread++
-								len++
-							}
-
-							if ch0 < 0x80 {
-								c2, n2 = rune(ch0), 1
-							} else if (ch0 & 0xe0) == 0xc0 {
-								var ch1 byte
-								if i+4 < len {
-									ch1 = bytesBuf[i+4]
-								} else {
-									// out of the chunk byte data
-									bytesBuf[i+4], err = d.reader.ReadByte()
-									if err != nil {
-										return s, perrors.WithStack(err)
-									}
-									ch1 = bytesBuf[i+4]
-									nread++
-									len++
-								}
-								c2, n2 = rune(((uint32(ch0)&0x1f)<<6)+(uint32(ch1)&0x3f)), 2
-							} else if (ch0 & 0xf0) == 0xe0 {
-								var ch1, ch2 byte
-								if i+5 < len {
-									ch1 = bytesBuf[i+4]
-									ch2 = bytesBuf[i+5]
-								} else {
-									ch1, err = d.reader.ReadByte()
-									if err != nil {
-										return s, perrors.WithStack(err)
-									}
-									ch2, err = d.reader.ReadByte()
-									if err != nil {
-										return s, perrors.WithStack(err)
-									}
-									len += 2
-									nread += 2
-								}
-								c := ((uint32(ch0) & 0x0f) << 12) + ((uint32(ch1) & 0x3f) << 6) + (uint32(ch2) & 0x3f)
-								c2, n2 = rune(c), 3
-							}
-
-							c := rune(c1-0xD800)<<10 + (c2 - 0xDC00) + 0x10000
-							n3 := utf8.EncodeRune(bytesBuf[i:], c)
-							if copied = n3 > 0 && n3 < /** front three byte */ 3+n2; copied {
-								// We need to move the bytes,
-								// for example, less bytes after decoding
-								offset = i + n3
-								copy(bytesBuf[offset:], bytesBuf[i+3+n2:len])
-							}
-
-							i += n2
-							chunkLen--
-						}
-						i += 3
-
-						// fix read the next byte index
-						if copied {
-							copied = false
-							continue
-						}
-
-						offset += 3
-					} else {
-						return s, perrors.Errorf("bad utf-8 encoding")
-					}
-				}
-
-				if remain := offset - prev - nread; remain > 0 {
-					if remain == 1 {
-						ch, err := d.ReadByte()
-						if err != nil {
-							return s, perrors.WithStack(err)
-						}
-						bytesBuf[offset-1] = ch
-					} else {
-						var err error
-						if buffed := d.Buffered(); buffed < remain {
-							// trigger fill data if required
-							copy(bytesBuf[offset-remain:offset], d.peek(remain))
-							_, err = d.reader.Discard(remain)
-						} else {
-							// copy remaining bytes.
-							_, err = d.next(bytesBuf[offset-remain : offset])
-						}
-
-						if err != nil {
-							return s, perrors.WithStack(err)
-						}
-					}
-				}
-
-				// the expected length string has been processed.
-				if chunkLen <= 0 {
-					// we need to detect next chunk
-					continue
-				}
-			}
-
-			// decode byte
-			ch, err := d.ReadByte()
+			r, charLen, byteLen, err = decodeUcs4Rune(d.reader)
 			if err != nil {
 				if err == io.EOF {
 					break
@@ -529,58 +368,14 @@ func (d *Decoder) decString(flag int32) (string, error) {
 				return s, perrors.WithStack(err)
 			}
 
-			if ch < 0x80 {
-				bytesBuf[offset] = ch
-				offset++
-			} else if (ch & 0xe0) == 0xc0 {
-				ch1, err := d.ReadByte()
-				if err != nil {
-					return s, perrors.WithStack(err)
-				}
-				bytesBuf[offset] = ch
-				bytesBuf[offset+1] = ch1
-				offset += 2
-			} else if (ch & 0xf0) == 0xe0 {
-				var err error
-				if buffed := d.Buffered(); buffed < 2 {
-					// trigger fill data if required
-					copy(bytesBuf[offset+1:offset+3], d.peek(2))
-					_, err = d.reader.Discard(2)
-				} else {
-					_, err = d.next(bytesBuf[offset+1 : offset+3])
-				}
-				if err != nil {
-					return s, perrors.WithStack(err)
-				}
-
-				bytesBuf[offset] = ch
-
-				// we detect emoji first
-				c1 := ((uint32(ch) & 0x0f) << 12) + ((uint32(bytesBuf[offset+1]) & 0x3f) << 6) + (uint32(bytesBuf[offset+2]) & 0x3f)
-				if c1 >= 0xD800 && c1 <= 0xDBFF {
-					c2, n2, err := decodeUcs2Rune(d.reader)
-					if err != nil {
-						return s, perrors.WithStack(err)
-					}
-
-					c := rune(c1-0xD800)<<10 + (c2 - 0xDC00) + 0x10000
-					utf8.EncodeRune(bytesBuf[offset:], c)
-
-					// update next rune
-					offset += n2
-					chunkLen--
-				}
-
-				offset += 3
-			} else {
-				return s, perrors.Errorf("bad utf-8 encoding, offset=%d\n", offset)
-			}
+			runeData[runeIndex] = r
+			runeIndex++
 
-			chunkLen--
+			charCount += charLen
+			byteCount += byteLen
 		}
 
-		b := bytesBuf[:offset]
-		return *(*string)(unsafe.Pointer(&b)), nil
+		return string(runeData[:runeIndex]), nil
 	}
 
 	return s, perrors.Errorf("unknown string tag %#x\n", tag)
diff --git a/string_test.go b/string_test.go
index ae8d5c1..be3ac73 100644
--- a/string_test.go
+++ b/string_test.go
@@ -196,6 +196,15 @@ func TestStringEmoji(t *testing.T) {
 	testJavaDecode(t, "customArgString_emoji", s0)
 }
 
+func TestStringEmoji2(t *testing.T) {
+	// see: test_hessian/src/main/java/test/TestString.java
+	// see https://github.com/apache/dubbo-go-hessian2/issues/252
+	s0 := "β„οΈπŸš«πŸš«πŸš«πŸš« 倚欑θ‡ͺζˆ‘δ»‹η»γ€δ»»εŠ‘γ€εŠ¨ζ€ε’Œ"
+
+	testDecodeFramework(t, "customReplyStringEmoji2", s0)
+	testJavaDecode(t, "customArgString_emoji2", s0)
+}
+
 func TestStringComplex(t *testing.T) {
 	// see: test_hessian/src/main/java/test/TestString.java
 	s0 := "킐\u0088δΈ­ε›½δ½ ε₯½!\u0088\u0088\u0088\u0088\u0088\u0088"
diff --git a/test_hessian/src/main/java/test/TestCustomDecode.java b/test_hessian/src/main/java/test/TestCustomDecode.java
index 1c4c276..b97d691 100644
--- a/test_hessian/src/main/java/test/TestCustomDecode.java
+++ b/test_hessian/src/main/java/test/TestCustomDecode.java
@@ -200,6 +200,11 @@ public class TestCustomDecode {
         return TestString.getEmojiTestString().equals(o);
     }
 
+    public Object customArgString_emoji2() throws Exception {
+        String o = (String) input.readObject();
+        return TestString.getEmojiTestString2().equals(o);
+    }
+
     public Object customArgComplexString() throws Exception {
         String o = (String) input.readObject();
         return TestString.getComplexString().equals(o);
@@ -209,4 +214,4 @@ public class TestCustomDecode {
         HashSet o = (HashSet) input.readObject();
         return o.contains(0) && o.contains(1);
     }
-}
\ No newline at end of file
+}
diff --git a/test_hessian/src/main/java/test/TestCustomReply.java b/test_hessian/src/main/java/test/TestCustomReply.java
index 041a191..be212ac 100644
--- a/test_hessian/src/main/java/test/TestCustomReply.java
+++ b/test_hessian/src/main/java/test/TestCustomReply.java
@@ -430,6 +430,11 @@ public class TestCustomReply {
         output.flush();
     }
 
+    public void customReplyStringEmoji2() throws Exception {
+        output.writeObject(TestString.getEmojiTestString2());
+        output.flush();
+    }
+
     public void customReplyPerson183() throws Exception {
         Person183 p = new Person183();
         p.name = "pname";
diff --git a/test_hessian/src/main/java/test/TestString.java b/test_hessian/src/main/java/test/TestString.java
index c026796..8f18458 100644
--- a/test_hessian/src/main/java/test/TestString.java
+++ b/test_hessian/src/main/java/test/TestString.java
@@ -30,6 +30,13 @@ public class TestString {
         return s + ",max" + maxUnicode;
     }
 
+    /**
+     * see https://github.com/apache/dubbo-go-hessian2/issues/252
+     */
+    public static String getEmojiTestString2() {
+        return "❄️\uD83D\uDEAB\uD83D\uDEAB\uD83D\uDEAB\uD83D\uDEAB 倚欑θ‡ͺζˆ‘δ»‹η»γ€δ»»εŠ‘γ€εŠ¨ζ€ε’Œ";
+    }
+
     public static String getComplexString() {
         String s = "킐\u0088δΈ­ε›½δ½ ε₯½!\u0088\u0088\u0088\u0088\u0088\u0088";
         return s;


[dubbo-go-hessian2] 03/03: add benchmark for string decode

Posted by wo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

wongoo pushed a commit to branch fix-emoji
in repository https://gitbox.apache.org/repos/asf/dubbo-go-hessian2.git

commit cc4f9a45ae36b635ed54e61f8e42c9873ca1a740
Author: wangoo <wo...@apache.org>
AuthorDate: Sun Jan 3 17:41:16 2021 +0800

    add benchmark for string decode
---
 string_test.go | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/string_test.go b/string_test.go
index de945f2..1c2666d 100644
--- a/string_test.go
+++ b/string_test.go
@@ -214,8 +214,19 @@ func TestStringComplex(t *testing.T) {
 	testJavaDecode(t, "customArgComplexString", s0)
 }
 
-func BenchmarkDecodeString(b *testing.B) {
-	s := "β„οΈπŸš«πŸš«πŸš«πŸš« 倚欑θ‡ͺζˆ‘δ»‹η»γ€δ»»εŠ‘γ€εŠ¨ζ€ε’Œ"
+func BenchmarkDecodeStringAscii(b *testing.B) {
+	runBenchmarkDecodeString(b, "hello world, hello hessian")
+}
+
+func BenchmarkDecodeStringUnicode(b *testing.B) {
+	runBenchmarkDecodeString(b, "δ½ ε₯½, δΈ–η•Œ, δ½ ε₯½, hessian")
+}
+
+func BenchmarkDecodeStringEmoji(b *testing.B) {
+	runBenchmarkDecodeString(b, "β„οΈπŸš«πŸš«πŸš«πŸš« 倚欑θ‡ͺζˆ‘δ»‹η»γ€δ»»εŠ‘γ€εŠ¨ζ€ε’Œ")
+}
+
+func runBenchmarkDecodeString(b *testing.B, s string) {
 	s = strings.Repeat(s, 4096)
 
 	e := NewEncoder()
@@ -225,6 +236,10 @@ func BenchmarkDecodeString(b *testing.B) {
 	d := NewDecoder(buf)
 	for i := 0; i < b.N; i++ {
 		d.Reset(buf)
-		_, _ = d.Decode()
+		_, err := d.Decode()
+		if err != nil {
+			b.Logf("err: %s", err)
+			b.FailNow()
+		}
 	}
 }