You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "William Dee (JIRA)" <ji...@apache.org> on 2015/12/21 17:59:46 UTC
[jira] [Updated] (SPARK-12451) Regexp functions don't support
patterns containing '*/'
[ https://issues.apache.org/jira/browse/SPARK-12451?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
William Dee updated SPARK-12451:
--------------------------------
Description:
When using the regexp functions in Spark SQL, patterns containing '*/' create runtime errors in the auto generated code. This is due to the fact that the code generator creates a multiline comment containing, amongst other things, the pattern.
Here is an excerpt from my stacktrace to illustrate: (Helpfully, the stack trace includes all of the auto-generated code)
{code}
Caused by: org.codehaus.commons.compiler.CompileException: Line 232, Column 54: Unexpected token "," in primary
at org.codehaus.janino.Parser.compileException(Parser.java:3125)
at org.codehaus.janino.Parser.parsePrimary(Parser.java:2512)
at org.codehaus.janino.Parser.parseUnaryExpression(Parser.java:2252)
at org.codehaus.janino.Parser.parseMultiplicativeExpression(Parser.java:2211)
at org.codehaus.janino.Parser.parseAdditiveExpression(Parser.java:2190)
at org.codehaus.janino.Parser.parseShiftExpression(Parser.java:2169)
at org.codehaus.janino.Parser.parseRelationalExpression(Parser.java:2072)
at org.codehaus.janino.Parser.parseEqualityExpression(Parser.java:2046)
at org.codehaus.janino.Parser.parseAndExpression(Parser.java:2025)
at org.codehaus.janino.Parser.parseExclusiveOrExpression(Parser.java:2004)
at org.codehaus.janino.Parser.parseInclusiveOrExpression(Parser.java:1983)
at org.codehaus.janino.Parser.parseConditionalAndExpression(Parser.java:1962)
at org.codehaus.janino.Parser.parseConditionalOrExpression(Parser.java:1941)
at org.codehaus.janino.Parser.parseConditionalExpression(Parser.java:1922)
at org.codehaus.janino.Parser.parseAssignmentExpression(Parser.java:1901)
at org.codehaus.janino.Parser.parseExpression(Parser.java:1886)
at org.codehaus.janino.Parser.parseBlockStatement(Parser.java:1149)
at org.codehaus.janino.Parser.parseBlockStatements(Parser.java:1085)
at org.codehaus.janino.Parser.parseMethodDeclarationRest(Parser.java:938)
at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:620)
at org.codehaus.janino.Parser.parseClassBody(Parser.java:515)
at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:481)
at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:577)
at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229)
at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:192)
at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:84)
at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:77)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:387)
... line 232 ...
/* regexp_replace(input[46, StringType],^.*/,) */
/* input[46, StringType] */
boolean isNull31 = i.isNullAt(46);
UTF8String primitive32 = isNull31 ? null : (i.getUTF8String(46));
boolean isNull24 = true;
UTF8String primitive25 = null;
if (!isNull31) {
/* ^.*/ */
/* expression: ^.*/ */
Object obj35 = expressions[4].eval(i);
boolean isNull33 = obj35 == null;
UTF8String primitive34 = null;
if (!isNull33) {
primitive34 = (UTF8String) obj35;
}
...
{code}
Note the multiple multiline comments, these obviously break when the regex pattern contains the end-of-comment token '*/'
was:
When using the regexp functions in Spark SQL, patterns containing '*/' create runtime errors in the auto generated code. This is due to the fact that the code generator creates a multiline comment containing, amongst other things, the pattern.
Here is an excerpt from my stacktrace to illustrate:
{code}
Caused by: org.codehaus.commons.compiler.CompileException: Line 232, Column 54: Unexpected token "," in primary
at org.codehaus.janino.Parser.compileException(Parser.java:3125)
at org.codehaus.janino.Parser.parsePrimary(Parser.java:2512)
at org.codehaus.janino.Parser.parseUnaryExpression(Parser.java:2252)
at org.codehaus.janino.Parser.parseMultiplicativeExpression(Parser.java:2211)
at org.codehaus.janino.Parser.parseAdditiveExpression(Parser.java:2190)
at org.codehaus.janino.Parser.parseShiftExpression(Parser.java:2169)
at org.codehaus.janino.Parser.parseRelationalExpression(Parser.java:2072)
at org.codehaus.janino.Parser.parseEqualityExpression(Parser.java:2046)
at org.codehaus.janino.Parser.parseAndExpression(Parser.java:2025)
at org.codehaus.janino.Parser.parseExclusiveOrExpression(Parser.java:2004)
at org.codehaus.janino.Parser.parseInclusiveOrExpression(Parser.java:1983)
at org.codehaus.janino.Parser.parseConditionalAndExpression(Parser.java:1962)
at org.codehaus.janino.Parser.parseConditionalOrExpression(Parser.java:1941)
at org.codehaus.janino.Parser.parseConditionalExpression(Parser.java:1922)
at org.codehaus.janino.Parser.parseAssignmentExpression(Parser.java:1901)
at org.codehaus.janino.Parser.parseExpression(Parser.java:1886)
at org.codehaus.janino.Parser.parseBlockStatement(Parser.java:1149)
at org.codehaus.janino.Parser.parseBlockStatements(Parser.java:1085)
at org.codehaus.janino.Parser.parseMethodDeclarationRest(Parser.java:938)
at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:620)
at org.codehaus.janino.Parser.parseClassBody(Parser.java:515)
at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:481)
at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:577)
at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229)
at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:192)
at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:84)
at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:77)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:387)
... line 232 ...
/* regexp_replace(input[46, StringType],^.*/,) */
/* input[46, StringType] */
boolean isNull31 = i.isNullAt(46);
UTF8String primitive32 = isNull31 ? null : (i.getUTF8String(46));
boolean isNull24 = true;
UTF8String primitive25 = null;
if (!isNull31) {
/* ^.*/ */
/* expression: ^.*/ */
Object obj35 = expressions[4].eval(i);
boolean isNull33 = obj35 == null;
UTF8String primitive34 = null;
if (!isNull33) {
primitive34 = (UTF8String) obj35;
}
...
{code}
Note the multiple multiline comments, these obviously break when the regex pattern contains the end-of-comment token '*/'
> Regexp functions don't support patterns containing '*/'
> -------------------------------------------------------
>
> Key: SPARK-12451
> URL: https://issues.apache.org/jira/browse/SPARK-12451
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 1.5.2
> Reporter: William Dee
>
> When using the regexp functions in Spark SQL, patterns containing '*/' create runtime errors in the auto generated code. This is due to the fact that the code generator creates a multiline comment containing, amongst other things, the pattern.
> Here is an excerpt from my stacktrace to illustrate: (Helpfully, the stack trace includes all of the auto-generated code)
> {code}
> Caused by: org.codehaus.commons.compiler.CompileException: Line 232, Column 54: Unexpected token "," in primary
> at org.codehaus.janino.Parser.compileException(Parser.java:3125)
> at org.codehaus.janino.Parser.parsePrimary(Parser.java:2512)
> at org.codehaus.janino.Parser.parseUnaryExpression(Parser.java:2252)
> at org.codehaus.janino.Parser.parseMultiplicativeExpression(Parser.java:2211)
> at org.codehaus.janino.Parser.parseAdditiveExpression(Parser.java:2190)
> at org.codehaus.janino.Parser.parseShiftExpression(Parser.java:2169)
> at org.codehaus.janino.Parser.parseRelationalExpression(Parser.java:2072)
> at org.codehaus.janino.Parser.parseEqualityExpression(Parser.java:2046)
> at org.codehaus.janino.Parser.parseAndExpression(Parser.java:2025)
> at org.codehaus.janino.Parser.parseExclusiveOrExpression(Parser.java:2004)
> at org.codehaus.janino.Parser.parseInclusiveOrExpression(Parser.java:1983)
> at org.codehaus.janino.Parser.parseConditionalAndExpression(Parser.java:1962)
> at org.codehaus.janino.Parser.parseConditionalOrExpression(Parser.java:1941)
> at org.codehaus.janino.Parser.parseConditionalExpression(Parser.java:1922)
> at org.codehaus.janino.Parser.parseAssignmentExpression(Parser.java:1901)
> at org.codehaus.janino.Parser.parseExpression(Parser.java:1886)
> at org.codehaus.janino.Parser.parseBlockStatement(Parser.java:1149)
> at org.codehaus.janino.Parser.parseBlockStatements(Parser.java:1085)
> at org.codehaus.janino.Parser.parseMethodDeclarationRest(Parser.java:938)
> at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:620)
> at org.codehaus.janino.Parser.parseClassBody(Parser.java:515)
> at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:481)
> at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:577)
> at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229)
> at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:192)
> at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:84)
> at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:77)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:387)
> ... line 232 ...
> /* regexp_replace(input[46, StringType],^.*/,) */
>
> /* input[46, StringType] */
>
> boolean isNull31 = i.isNullAt(46);
> UTF8String primitive32 = isNull31 ? null : (i.getUTF8String(46));
>
> boolean isNull24 = true;
> UTF8String primitive25 = null;
> if (!isNull31) {
> /* ^.*/ */
>
> /* expression: ^.*/ */
> Object obj35 = expressions[4].eval(i);
> boolean isNull33 = obj35 == null;
> UTF8String primitive34 = null;
> if (!isNull33) {
> primitive34 = (UTF8String) obj35;
> }
> ...
> {code}
> Note the multiple multiline comments, these obviously break when the regex pattern contains the end-of-comment token '*/'
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org