You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ja...@apache.org on 2014/06/11 05:52:27 UTC

[42/61] [abbrv] git commit: DRILL-630: Substr() function

DRILL-630: Substr() function


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/e4101807
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/e4101807
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/e4101807

Branch: refs/heads/master
Commit: e4101807728c06b62bb38e2871374215113cb2ef
Parents: 9b827b5
Author: Yash Sharma <ya...@snapdeal.com>
Authored: Thu Jun 5 07:54:24 2014 -0700
Committer: Jacques Nadeau <ja...@apache.org>
Committed: Mon Jun 9 17:09:38 2014 -0700

----------------------------------------------------------------------
 .../exec/expr/fn/impl/StringFunctions.java      | 415 ++++++++++---------
 .../exec/physical/impl/TestStringFunctions.java |   4 +-
 .../resources/functions/string/testSubstr.json  |   7 +-
 3 files changed, 233 insertions(+), 193 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/e4101807/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index 8d792fa..51a7dbb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -36,86 +36,86 @@ import org.apache.drill.exec.record.RecordBatch;
 
 public class StringFunctions{
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StringFunctions.class);
-  
+
   private StringFunctions(){}
-  
+
   /*
-   * String Function Implementation. 
+   * String Function Implementation.
    */
-  
+
   @FunctionTemplate(name = "like", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Like implements DrillSimpleFunc{
-    
+
     @Param VarCharHolder input;
     @Param(constant=true) VarCharHolder pattern;
     @Output BitHolder out;
     @Workspace java.util.regex.Pattern regPattern;
-    
+
     public void setup(RecordBatch incoming){
       regPattern = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike(pattern.toString()));
     }
-    
+
     public void eval(){
       out.value = regPattern.matcher(input.toString()).matches()? 1:0;
     }
   }
 
   @FunctionTemplate(names = {"similar", "similar to"}, scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
-  public static class Similar implements DrillSimpleFunc{    
+  public static class Similar implements DrillSimpleFunc{
     @Param VarCharHolder input;
     @Param(constant=true) VarCharHolder pattern;
     @Output BitHolder out;
     @Workspace java.util.regex.Pattern regPattern;
 
     public void setup(RecordBatch incoming){
-      regPattern = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(pattern.toString()));      
+      regPattern = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(pattern.toString()));
     }
-    
+
     public void eval(){
       out.value = regPattern.matcher(input.toString()).matches()? 1:0;
     }
   }
-  
+
   /*
-   * Replace all substring that match the regular expression with replacement. 
+   * Replace all substring that match the regular expression with replacement.
    */
   @FunctionTemplate(name = "regexp_replace", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class RegexpReplace implements DrillSimpleFunc{
-    
+
     @Param VarCharHolder input;
     @Param(constant=true) VarCharHolder pattern;
     @Param VarCharHolder replacement;
     @Workspace ByteBuf buffer;
-    @Workspace java.util.regex.Pattern regPattern;    
+    @Workspace java.util.regex.Pattern regPattern;
     @Output VarCharHolder out;
 
     public void setup(RecordBatch incoming){
-      buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);  
+      buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
       regPattern = java.util.regex.Pattern.compile(pattern.toString());
     }
-    
+
     public void eval(){
       out.buffer = buffer;
       out.start = 0;
-      
+
       byte [] bytea = regPattern.matcher(input.toString()).replaceAll(replacement.toString()).getBytes(java.nio.charset.Charset.forName("UTF-8"));
       out.buffer.setBytes(out.start, bytea);
       out.end = bytea.length;
     }
   }
-  
+
 
   @FunctionTemplate(names = {"char_length", "character_length", "length"}, scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class CharLength implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder input;
     @Output BigIntHolder out;
 
     public void setup(RecordBatch incoming){}
-    
+
     public void eval(){
-      out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(input.buffer, input.start, input.end);          
-    } 
+      out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(input.buffer, input.start, input.end);
+    }
   }
 
   @FunctionTemplate(name = "lengthUtf8", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
@@ -133,109 +133,109 @@ public class StringFunctions{
 
   @FunctionTemplate(name = "octet_length", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class OctetLength implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder input;
     @Output BigIntHolder out;
 
     public void setup(RecordBatch incoming){}
-    
-    public void eval(){  
+
+    public void eval(){
       out.value = input.end - input.start;
     }
   }
 
   @FunctionTemplate(name = "bit_length", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class BitLength implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder input;
     @Output BigIntHolder out;
 
     public void setup(RecordBatch incoming){}
-    
-    public void eval(){  
+
+    public void eval(){
       out.value = (input.end - input.start) * 8;
     }
   }
-  
+
   /*
    * Location of specified substring.
-   * 
+   *
    * Difference from PostgreSQL :
-   *          exp \ System                PostgreSQL            Drill 
+   *          exp \ System                PostgreSQL            Drill
    *     position('', 'abc')                1                     0
    *     position('', '')                   1                     0
    */
   @FunctionTemplate(name = "position", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Position implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder substr;
     @Param  VarCharHolder str;
 
     @Output BigIntHolder out;
 
     public void setup(RecordBatch incoming){}
-    
+
     public void eval(){
       //Do string match.
-      int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(str.buffer, str.start, str.end, 
+      int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(str.buffer, str.start, str.end,
                                                                                           substr.buffer, substr.start, substr.end);
       if (pos < 0) {
         out.value = 0; //indicate not found a matched substr.
       } else {
-        //Count the # of characters. (one char could have 1-4 bytes)        
-        out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(str.buffer, str.start, pos) + 1;        
+        //Count the # of characters. (one char could have 1-4 bytes)
+        out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(str.buffer, str.start, pos) + 1;
       }
     }
-    
+
   }
-  
-  // same as function "position(substr, str) ", except the reverse order of argument. 
+
+  // same as function "position(substr, str) ", except the reverse order of argument.
   @FunctionTemplate(name = "strpos", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Strpos implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder str;
     @Param  VarCharHolder substr;
 
     @Output BigIntHolder out;
 
     public void setup(RecordBatch incoming){}
-    
-    public void eval(){  
+
+    public void eval(){
       //Do string match.
-      int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(str.buffer, str.start, str.end, 
+      int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(str.buffer, str.start, str.end,
                                                                                           substr.buffer, substr.start, substr.end);
       if (pos < 0) {
         out.value = 0; //indicate not found a matched substr.
       } else {
-        //Count the # of characters. (one char could have 1-4 bytes)        
-        out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(str.buffer, str.start, pos) + 1;        
+        //Count the # of characters. (one char could have 1-4 bytes)
+        out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(str.buffer, str.start, pos) + 1;
       }
-    }  
-    
+    }
+
   }
-  
+
   /*
    * Convert string to lower case.
    */
   @FunctionTemplate(name = "lower", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class LowerCase implements DrillSimpleFunc{
-    
+
     @Param VarCharHolder input;
     @Output VarCharHolder out;
-    @Workspace ByteBuf buffer;     
+    @Workspace ByteBuf buffer;
 
     public void setup(RecordBatch incoming){
       buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
     }
-    
+
     public void eval(){
       out.buffer = buffer;
       out.start = 0;
       out.end = input.end - input.start;
-      
+
       for (int id = input.start; id < input.end; id++) {
         byte  currentByte = input.buffer.getByte(id);
-        
+
         // 'A - Z' : 0x41 - 0x5A
         // 'a - z' : 0x61 - 0x7A
         if (currentByte >= 0x41 && currentByte <= 0x5A) {
@@ -251,23 +251,23 @@ public class StringFunctions{
    */
   @FunctionTemplate(name = "upper", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class UpperCase implements DrillSimpleFunc{
-    
+
     @Param VarCharHolder input;
     @Output VarCharHolder out;
-    @Workspace ByteBuf buffer;     
+    @Workspace ByteBuf buffer;
 
     public void setup(RecordBatch incoming){
       buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
     }
-    
+
     public void eval() {
       out.buffer = buffer;
       out.start = 0;
       out.end = input.end - input.start;
-      
+
       for (int id = input.start; id < input.end; id++) {
         byte  currentByte = input.buffer.getByte(id);
-        
+
         // 'A - Z' : 0x41 - 0x5A
         // 'a - z' : 0x61 - 0x7A
         if (currentByte >= 0x61 && currentByte <= 0x7A) {
@@ -278,50 +278,85 @@ public class StringFunctions{
     }
   }
 
-  // Follow Postgre.  
-  //  -- Valid "offset": [1, string_length], 
-  //  -- Valid "length": [1, up to string_length - offset + 1], if length > string_length - offset +1, get the substr up to the string_lengt.   
+
+  // Follow Postgre.
+  //  -- Valid "offset": [1, string_length],
+  //  -- Valid "length": [1, up to string_length - offset + 1], if length > string_length - offset +1, get the substr up to the string_lengt.
   @FunctionTemplate(names = {"substring", "substr"}, scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Substring implements DrillSimpleFunc{
 
     @Param VarCharHolder string;
     @Param BigIntHolder offset;
     @Param BigIntHolder length;
-    
+
     @Output VarCharHolder out;
-    @Workspace ByteBuf buffer;     
+    @Workspace ByteBuf buffer;
 
     public void setup(RecordBatch incoming) {
-      
+
     }
-    
+
     public void eval() {
       out.buffer = string.buffer;
-      // if length is NOT positive, or offset is NOT positive, or input string is empty, return empty string. 
+      // if length is NOT positive, or offset is NOT positive, or input string is empty, return empty string.
       if (length.value <= 0 || offset.value <=0 || string.end <= string.start) {
-        out.start = out.end = 0;        
-      } else {      
-        //Do 1st scan to counter # of character in string.           
+        out.start = out.end = 0;
+      } else {
+        //Do 1st scan to counter # of character in string.
         int charCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(string.buffer, string.start, string.end);
-               
-        int fromCharIdx = (int) offset.value; //the start position of char  (inclusive)            
-        
-        if (fromCharIdx > charCount ) { // invalid length, return empty string. 
-          out.start = out.end = 0;          
+
+        int fromCharIdx = (int) offset.value; //the start position of char  (inclusive)
+
+        if (fromCharIdx > charCount ) { // invalid length, return empty string.
+          out.start = out.end = 0;
         } else {
           out.start = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, string.start, string.end, fromCharIdx-1);
-          
+
           // Bounded length by charCount - fromCharIdx + 1. substring("abc", 1, 5) --> "abc"
-          int charLen = Math.min((int)length.value, charCount - fromCharIdx + 1);  
-      
+          int charLen = Math.min((int)length.value, charCount - fromCharIdx + 1);
+
           out.end = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, out.start, string.end, charLen);
         }
-      } 
+      }
+    }
+
+  }
+
+  @FunctionTemplate(names = {"substring", "substr"}, scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
+  public static class SubstringOffset implements DrillSimpleFunc{
+
+    @Param VarCharHolder string;
+    @Param BigIntHolder offset;
+
+    @Output VarCharHolder out;
+    @Workspace ByteBuf buffer;
+
+    public void setup(RecordBatch incoming) {
+    }
+
+    public void eval() {
+      out.buffer = string.buffer;
+      // if length is NOT positive, or offset is NOT positive, or input string is empty, return empty string.
+      if (offset.value <=0 || string.end <= string.start) {
+        out.start = out.end = 0;
+      } else {
+        //Do 1st scan to counter # of character in string.
+        int charCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(string.buffer, string.start, string.end);
+
+        int fromCharIdx = (int) offset.value; //the start position of char  (inclusive)
+
+        if (fromCharIdx > charCount ) { // invalid length, return empty string.
+          out.start = out.end = 0;
+        } else {
+          out.start = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, string.start, string.end, fromCharIdx-1);
+          out.end = string.end;
+        }
+      }
     }
-    
+
   }
 
-  // Return first length characters in the string. When length is negative, return all but last |length| characters. 
+  // Return first length characters in the string. When length is negative, return all but last |length| characters.
   // If length > total charcounts, return the whole string.
   // If length = 0, return empty
   // If length < 0, and |length| > total charcounts, return empty.
@@ -330,30 +365,30 @@ public class StringFunctions{
 
     @Param VarCharHolder string;
     @Param BigIntHolder length;
-    
+
     @Output VarCharHolder out;
-    @Workspace ByteBuf buffer;     
+    @Workspace ByteBuf buffer;
 
     public void setup(RecordBatch incoming){
     }
-    
+
     public void eval() {
       out.buffer = string.buffer;
       // if length is 0, or input string is empty, return empty string.
       if (length.value == 0 || string.end <= string.start) {
         out.start = out.end = 0;
-      } else {      
-        //Do 1st scan to counter # of character in string.                   
+      } else {
+        //Do 1st scan to counter # of character in string.
         int charCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(string.buffer, string.start, string.end);
 
         int charLen = 0;
         if (length.value > 0) {
           charLen = Math.min((int)length.value, charCount);  //left('abc', 5) -> 'abc'
         } else if (length.value < 0) {
-          charLen = Math.max(0, charCount + (int)length.value) ; // left('abc', -5) ==> ''          
+          charLen = Math.max(0, charCount + (int)length.value) ; // left('abc', -5) ==> ''
         }
-        
-        out.start = string.start; //Starting from the left of input string. 
+
+        out.start = string.start; //Starting from the left of input string.
         out.end = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, out.start, string.end, charLen);
       } // end of lenth.value != 0
     }
@@ -365,22 +400,22 @@ public class StringFunctions{
 
     @Param VarCharHolder string;
     @Param BigIntHolder length;
-    
+
     @Output VarCharHolder out;
-    @Workspace ByteBuf buffer;     
+    @Workspace ByteBuf buffer;
 
     public void setup(RecordBatch incoming){
     }
-    
+
     public void eval() {
       out.buffer = string.buffer;
       // invalid length.
       if (length.value == 0 || string.end <= string.start) {
         out.start = out.end = 0;
       } else {
-        //Do 1st scan to counter # of character in string.                   
+        //Do 1st scan to counter # of character in string.
         int charCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(string.buffer, string.start, string.end);
-                  
+
         int fromCharIdx; //the start position of char (inclusive)
         int charLen; // the end position of char (inclusive)
         if (length.value > 0) {
@@ -390,15 +425,15 @@ public class StringFunctions{
           fromCharIdx = Math.abs((int) length.value) + 1;
           charLen = charCount - fromCharIdx +1;
         }
-      
+
         // invalid length :  right('abc', -5) -> ''
-        if (charLen <=0) { 
+        if (charLen <=0) {
           out.start = out.end = 0;
-        } else {          
+        } else {
           //Do 2nd scan of string. Get bytes corresponding chars in range.
-          out.start = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, string.start, string.end, fromCharIdx-1);        
-          out.end = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, out.start, string.end, charLen);           
-        } 
+          out.start = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, string.start, string.end, fromCharIdx-1);
+          out.end = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(string.buffer, out.start, string.end, charLen);
+        }
       }
     }
   }
@@ -406,26 +441,26 @@ public class StringFunctions{
 
   @FunctionTemplate(name = "initcap", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class InitCap implements DrillSimpleFunc{
-    
+
     @Param VarCharHolder input;
     @Output VarCharHolder out;
-    @Workspace ByteBuf buffer;     
+    @Workspace ByteBuf buffer;
 
     public void setup(RecordBatch incoming){
       buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
     }
-    
+
     public void eval() {
       out.buffer = buffer;
       out.start = 0;
       out.end = input.end - input.start;
-           
+
       // Assumes Alpha as [A-Za-z0-9]
-      // white space is treated as everything else.      
+      // white space is treated as everything else.
       boolean capNext = true;
       for (int id = input.start; id < input.end; id++) {
         byte  currentByte = input.buffer.getByte(id);
-        
+
         // 'A - Z' : 0x41 - 0x5A
         // 'a - z' : 0x61 - 0x7A
         // '0-9'   : 0x30 - 0x39
@@ -450,102 +485,102 @@ public class StringFunctions{
             capNext = true;
           }
         }
-        
+
         out.buffer.setByte(id - input.start, currentByte) ;
       } //end of for_loop
-      
+
     }
-    
+
   }
-  
+
   //Replace all occurrences in 'text' of substring 'from' with substring 'to'
   @FunctionTemplate(name = "replace", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Replace implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder text;
     @Param  VarCharHolder from;
     @Param  VarCharHolder to;
-    @Workspace ByteBuf buffer;     
+    @Workspace ByteBuf buffer;
     @Output VarCharHolder out;
 
     public void setup(RecordBatch incoming){
       buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
     }
-    
+
     public void eval(){
       out.buffer = buffer;
       out.start = out.end = 0;
       int fromL = from.end - from.start;
       int textL = text.end - text.start;
-      
+
       if (fromL > 0 && fromL <= textL) {
         //If "from" is not empty and it's length is no longer than text's length
         //then, we may find a match, and do replace.
-        int i = text.start; 
+        int i = text.start;
         for (; i<=text.end - fromL; ) {
           int j = from.start;
           for (; j<from.end; j++) {
             if (text.buffer.getByte(i + j - from.start) != from.buffer.getByte(j))
               break;
           }
-        
+
           if (j == from.end ) {
-            //find a true match ("from" is not empty), copy entire "to" string to out buffer           
+            //find a true match ("from" is not empty), copy entire "to" string to out buffer
             for (int k = to.start ; k< to.end; k++) {
               out.buffer.setByte(out.end++, to.buffer.getByte(k));
             }
-          
+
             //advance i by the length of "from"
             i += from.end - from.start;
           } else {
-            //no match. copy byte i in text, advance i by 1. 
+            //no match. copy byte i in text, advance i by 1.
             out.buffer.setByte(out.end++, text.buffer.getByte(i++));
           }
         }
-        
+
         //Copy the tail part of text (length < fromL).
         for (; i< text.end; i++) {
           out.buffer.setByte(out.end++, text.buffer.getByte(i));
         }
       } else {
-        //If "from" is empty or its length is larger than text's length, 
-        //then, we just set "out" as "text". 
+        //If "from" is empty or its length is larger than text's length,
+        //then, we just set "out" as "text".
         out.buffer = text.buffer;
         out.start = text.start;
         out.end = text.end;
       }
-      
+
     } // end of eval()
-    
+
   }
 
   /*
-   * Fill up the string to length 'length' by prepending the characters 'fill' in the beginning of 'text'. 
+   * Fill up the string to length 'length' by prepending the characters 'fill' in the beginning of 'text'.
    * If the string is already longer than length, then it is truncated (on the right).
    */
   @FunctionTemplate(name = "lpad", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Lpad implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder text;
     @Param  BigIntHolder length;
     @Param  VarCharHolder fill;
-    @Workspace ByteBuf buffer;     
-    
+    @Workspace ByteBuf buffer;
+
     @Output VarCharHolder out;
 
     public void setup(RecordBatch incoming){
       buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
     }
-    
-    public void eval() {      
+
+    public void eval() {
       byte currentByte = 0;
       int id = 0;
       //get the char length of text.
       int textCharCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(text.buffer, text.start, text.end);
-                        
+
       //get the char length of fill.
       int fillCharCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(fill.buffer, fill.start, fill.end);
-      
+
       if (length.value <= 0) {
         //case 1: target length is <=0, then return an empty string.
         out.buffer = buffer;
@@ -565,58 +600,58 @@ public class StringFunctions{
         int count = 0;
         out.buffer = buffer;
         out.start = out.end = 0;
-        
+
         while (count < length.value - textCharCount) {
           for (id = fill.start; id < fill.end; id++) {
             if (count == length.value - textCharCount)
               break;
-            
-            currentByte = fill.buffer.getByte(id);           
+
+            currentByte = fill.buffer.getByte(id);
             if (currentByte < 0x128  ||           // 1-byte char. First byte is 0xxxxxxx.
                 (currentByte & 0xE0) == 0xC0 ||   // 2-byte char. First byte is 110xxxxx
-                (currentByte & 0xF0) == 0xE0 ||   // 3-byte char. First byte is 1110xxxx 
+                (currentByte & 0xF0) == 0xE0 ||   // 3-byte char. First byte is 1110xxxx
                 (currentByte & 0xF8) == 0xF0) {   //4-byte char. First byte is 11110xxx
               count ++;  //Advance the counter, since we find one char.
             }
             out.buffer.setByte(out.end++, currentByte);
-          }                
+          }
         } // end of while
-        
+
         //copy "text" into "out"
-        for (id = text.start; id < text.end; id++) 
+        for (id = text.start; id < text.end; id++)
           out.buffer.setByte(out.end++, text.buffer.getByte(id));
       }
     } // end of eval
-    
+
   }
-  
+
   /**
    * Fill up the string to length "length" by appending the characters 'fill' at the end of 'text'
    * If the string is already longer than length then it is truncated.
    */
   @FunctionTemplate(name = "rpad", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Rpad implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder text;
     @Param  BigIntHolder length;
     @Param  VarCharHolder fill;
-    @Workspace ByteBuf buffer;     
-    
+    @Workspace ByteBuf buffer;
+
     @Output VarCharHolder out;
 
     public void setup(RecordBatch incoming){
       buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
     }
-    
-    public void eval() {      
+
+    public void eval() {
       byte currentByte = 0;
       int id = 0;
       //get the char length of text.
       int textCharCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(text.buffer, text.start, text.end);
-                        
+
       //get the char length of fill.
       int fillCharCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(fill.buffer, fill.start, fill.end);
-      
+
       if (length.value <= 0) {
         //case 1: target length is <=0, then return an empty string.
         out.buffer = buffer;
@@ -631,36 +666,36 @@ public class StringFunctions{
         out.buffer = text.buffer;
         out.start = text.start;
         out.end = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(text.buffer, text.start, text.end, (int)length.value);
-      } else if (length.value > textCharCount) {        
+      } else if (length.value > textCharCount) {
         //case 4: copy "text" into "out", then copy "fill" on the right.
         out.buffer = buffer;
         out.start = out.end = 0;
 
-        for (id = text.start; id < text.end; id++) 
+        for (id = text.start; id < text.end; id++)
           out.buffer.setByte(out.end++, text.buffer.getByte(id));
 
         //copy "fill" on right. Total # of char to copy : length.value - textCharCount
         int count = 0;
-        
+
         while (count < length.value - textCharCount) {
           for (id = fill.start; id < fill.end; id++) {
             if (count == length.value - textCharCount)
               break;
-            
-            currentByte = fill.buffer.getByte(id);           
+
+            currentByte = fill.buffer.getByte(id);
             if (currentByte < 0x128  ||           // 1-byte char. First byte is 0xxxxxxx.
                 (currentByte & 0xE0) == 0xC0 ||   // 2-byte char. First byte is 110xxxxx
-                (currentByte & 0xF0) == 0xE0 ||   // 3-byte char. First byte is 1110xxxx 
+                (currentByte & 0xF0) == 0xE0 ||   // 3-byte char. First byte is 1110xxxx
                 (currentByte & 0xF8) == 0xF0) {   //4-byte char. First byte is 11110xxx
               count ++;  //Advance the counter, since we find one char.
             }
             out.buffer.setByte(out.end++, currentByte);
-          }                
+          }
         } // end of while
-        
+
       }
     } // end of eval
-    
+
   }
 
   /**
@@ -668,18 +703,18 @@ public class StringFunctions{
    */
   @FunctionTemplate(name = "ltrim", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Ltrim implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder text;
     @Param  VarCharHolder from;
-    
+
     @Output VarCharHolder out;
 
     public void setup(RecordBatch incoming){
     }
-    
-    public void eval() {      
+
+    public void eval() {
       out.buffer = text.buffer;
-      out.start = out.end = text.end; 
+      out.start = out.end = text.end;
 
       byte currentByte = 0;
       int id = 0;
@@ -687,26 +722,26 @@ public class StringFunctions{
       //Scan from left of "text", stop until find a char not in "from"
       for (id = text.start; id < text.end; ) {
         currentByte = text.buffer.getByte(id);
-        
+
         bytePerChar = 0;
-        
+
         if (currentByte < 0x128)                 // 1-byte char. First byte is 0xxxxxxx.
           bytePerChar = 1;
         else if ((currentByte & 0xE0) == 0xC0 )   // 2-byte char. First byte is 110xxxxx
           bytePerChar = 2;
-        else if ((currentByte & 0xF0) == 0xE0 )   // 3-byte char. First byte is 1110xxxx 
+        else if ((currentByte & 0xF0) == 0xE0 )   // 3-byte char. First byte is 1110xxxx
           bytePerChar = 3;
         else if ((currentByte & 0xF8) == 0xF0)    //4-byte char. First byte is 11110xxx
           bytePerChar = 4;
-        
+
         //Scan to check if "from" contains the character of "byterPerChar" bytes.
         int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(from.buffer, from.start, from.end,
                                                                                             text.buffer, id, id + bytePerChar);
         if (pos < 0) { // Found the 1st char not in "from", stop
-          out.start = id; 
+          out.start = id;
           break;
         }
-        id += bytePerChar; //Advance to next character.  
+        id += bytePerChar; //Advance to next character.
       }
     } // end of eval
 
@@ -717,18 +752,18 @@ public class StringFunctions{
    */
   @FunctionTemplate(name = "rtrim", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Rtrim implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder text;
     @Param  VarCharHolder from;
-    
+
     @Output VarCharHolder out;
 
     public void setup(RecordBatch incoming){
     }
-    
-    public void eval() {      
+
+    public void eval() {
       out.buffer = text.buffer;
-      out.start = out.end = text.start; 
+      out.start = out.end = text.start;
 
       byte currentByte = 0;
       int id = 0;
@@ -736,7 +771,7 @@ public class StringFunctions{
       //Scan from right of "text", stop until find a char not in "from"
       for (id = text.end-1; id>=  text.start; ) {
         currentByte = text.buffer.getByte(id);
-        
+
         bytePerChar = 0;
         //In UTF-8 encoding, the continuation byte for a multi-byte char is 10xxxxxx.
         //Continue back-off to prior byte if it's continuation byte
@@ -747,51 +782,51 @@ public class StringFunctions{
           bytePerChar = 1;
         else if ((currentByte & 0xE0) == 0xC0 )   // 2-byte char. First byte is 110xxxxx
           bytePerChar = 2;
-        else if ((currentByte & 0xF0) == 0xE0 )   // 3-byte char. First byte is 1110xxxx 
+        else if ((currentByte & 0xF0) == 0xE0 )   // 3-byte char. First byte is 1110xxxx
           bytePerChar = 3;
         else if ((currentByte & 0xF8) == 0xF0)    //4-byte char. First byte is 11110xxx
           bytePerChar = 4;
-        
-        //Scan to check if "from" contains the character of "byterPerChar" bytes. The lead byte starts at id.  
+
+        //Scan to check if "from" contains the character of "byterPerChar" bytes. The lead byte starts at id.
         int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(from.buffer, from.start, from.end,
                                                                                             text.buffer, id, id + bytePerChar);
         if (pos < 0) { // Found the 1st char not in "from", stop
-          out.end = id+ bytePerChar; 
+          out.end = id+ bytePerChar;
           break;
         }
- 
-        id --; // back-off to prior character.  
+
+        id --; // back-off to prior character.
       }
     } // end of eval
   }
 
   //Concatenate the text representations of the arguments. NULL arguments are ignored.
-  //TODO: NullHanding.INTERNAL for DrillSimpleFunc requires change in code generation. 
+  //TODO: NullHanding.INTERNAL for DrillSimpleFunc requires change in code generation.
   @FunctionTemplate(name = "concat", scope = FunctionScope.SIMPLE, nulls = NullHandling.INTERNAL)
   public static class Concat implements DrillSimpleFunc{
-    
+
     @Param  VarCharHolder left;
     @Param  VarCharHolder right;
     @Output VarCharHolder out;
-    @Workspace ByteBuf buffer;     
-    
-    
+    @Workspace ByteBuf buffer;
+
+
     public void setup(RecordBatch incoming){
       buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [8000]);
     }
-    
+
     public void eval(){
       out.buffer = buffer;
       out.start = out.end = 0;
-      
+
       int id = 0;
-      for (id = left.start; id < left.end; id++) 
+      for (id = left.start; id < left.end; id++)
         out.buffer.setByte(out.end++, left.buffer.getByte(id));
-      
+
       for (id = right.start; id < right.end; id++)
         out.buffer.setByte(out.end++, right.buffer.getByte(id));
-    } 
-    
+    }
+
   }
 
   @FunctionTemplate(name = "concat", scope = FunctionScope.SIMPLE, nulls = NullHandling.INTERNAL)

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/e4101807/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
index 51aa633..af741a5 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestStringFunctions.java
@@ -93,7 +93,7 @@ public class TestStringFunctions extends ExecTest {
 
       for (int i = 0; i<res.length; i++) {
         assertEquals(String.format("column %s does not match", i), expectedResults[i],  res[i]);
-      }
+      }      
     }
 
     if(context.getFailureCause() != null){
@@ -190,7 +190,7 @@ public class TestStringFunctions extends ExecTest {
   @Test
   public void testSubstr(@Injectable final DrillbitContext bitContext,
                            @Injectable UserServer.UserClientConnection connection) throws Throwable{
-    Object [] expected = new Object[] {"abc", "bcd", "bcdef", "bcdef", "", "", "", "", "भारत", "वर्ष", "वर्ष"};
+    Object [] expected = new Object[] {"abc", "bcd", "bcdef", "bcdef", "", "", "", "", "भारत", "वर्ष", "वर्ष", "cdef", "", "", "", "ड्रिल"};
 
     runTest(bitContext, connection, expected, "functions/string/testSubstr.json");
   }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/e4101807/exec/java-exec/src/test/resources/functions/string/testSubstr.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/functions/string/testSubstr.json b/exec/java-exec/src/test/resources/functions/string/testSubstr.json
index 94467ae..02c7a2d 100644
--- a/exec/java-exec/src/test/resources/functions/string/testSubstr.json
+++ b/exec/java-exec/src/test/resources/functions/string/testSubstr.json
@@ -33,7 +33,12 @@
               { ref: "col8", expr: "substring('abcdef', 10, 2)"},
               { ref: "col9", expr: "substring('भारतवर्ष', 1, 4)"},
               { ref: "col10", expr: "substring('भारतवर्ष', 5, 4)"},
-              { ref: "col11", expr: "substring('भारतवर्ष', 5, 5)"}
+              { ref: "col11", expr: "substring('भारतवर्ष', 5, 5)"},
+              { ref: "col12", expr: "substring('abcdef', 3)"},
+              { ref: "col13", expr: "substring('abcdef', -2)"},
+              { ref: "col14", expr: "substring('abcdef', 0)"},
+              { ref: "col15", expr: "substring('abcdef', 10)"},
+              { ref: "col16", expr: "substring('अपाचे ड्रिल', 7)"}
             ]
         },
         {