You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2018/02/20 07:18:19 UTC
[2/4] incubator-hivemall git commit: Close #131: [v0.5.0-rc3] Merge
v0.5.0 branch
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/pom.xml
----------------------------------------------------------------------
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 0000000..d018b8d
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,295 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>hivemall-spark</artifactId>
+ <packaging>pom</packaging>
+ <name>Hivemall on Apache Spark</name>
+
+ <modules>
+ <module>common</module>
+ <module>spark-2.0</module>
+ <module>spark-2.1</module>
+ <module>spark-2.2</module>
+ </modules>
+
+ <properties>
+ <main.basedir>${project.parent.basedir}</main.basedir>
+ <scala.version>2.11.8</scala.version>
+ <scala.binary.version>2.11</scala.binary.version>
+ <scalatest.jvm.opts>-ea -Xms768m -Xmx1024m -XX:PermSize=128m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</scalatest.jvm.opts>
+ </properties>
+
+ <dependencyManagement>
+ <dependencies>
+ <!-- compile scope -->
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-core</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-xgboost</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <version>1.8</version>
+ <scope>compile</scope>
+ </dependency>
+
+ <!-- provided scope -->
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>${scala.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- test dependencies -->
+ <dependency>
+ <groupId>org.apache.hivemall</groupId>
+ <artifactId>hivemall-mixserv</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.binary.version}</artifactId>
+ <version>2.2.4</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+
+ <build>
+ <directory>target</directory>
+ <outputDirectory>target/classes</outputDirectory>
+ <finalName>${project.artifactId}-${project.version}</finalName>
+ <testOutputDirectory>target/test-classes</testOutputDirectory>
+
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ <version>3.2.2</version>
+ </plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <version>1.0</version>
+ <configuration>
+ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+ <junitxml>.</junitxml>
+ <filereports>SparkTestSuite.txt</filereports>
+ <argLine>${scalatest.jvm.opts}</argLine>
+ <stderr />
+ <environmentVariables>
+ <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+ <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
+ <SPARK_TESTING>1</SPARK_TESTING>
+ <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
+ <PATH>${env.JAVA_HOME}/bin:${env.PATH}</PATH>
+ </environmentVariables>
+ <systemProperties>
+ <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+ <derby.system.durability>test</derby.system.durability>
+ <java.awt.headless>true</java.awt.headless>
+ <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
+ <spark.testing>1</spark.testing>
+ <spark.ui.enabled>false</spark.ui.enabled>
+ <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
+ <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
+ <!-- Needed by sql/hive tests. -->
+ <test.src.tables>__not_used__</test.src.tables>
+ </systemProperties>
+ <tagsToExclude>${test.exclude.tags}</tagsToExclude>
+ </configuration>
+ </plugin>
+ <!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
+ <outputDirectory>${main.basedir}/target</outputDirectory>
+ <minimizeJar>false</minimizeJar>
+ <createDependencyReducedPom>false</createDependencyReducedPom>
+ <createSourcesJar>true</createSourcesJar>
+ <artifactSet>
+ <includes>
+ <include>org.apache.hivemall:hivemall-spark-common</include>
+ <!-- hivemall-core -->
+ <include>org.apache.hivemall:hivemall-core</include>
+ <include>io.netty:netty-all</include>
+ <include>com.github.haifengl:smile-core</include>
+ <include>com.github.haifengl:smile-math</include>
+ <include>com.github.haifengl:smile-data</include>
+ <include>org.tukaani:xz</include>
+ <include>org.apache.commons:commons-math3</include>
+ <include>org.roaringbitmap:RoaringBitmap</include>
+ <include>it.unimi.dsi:fastutil</include>
+ <include>com.clearspring.analytics:stream</include>
+ <!-- hivemall-nlp -->
+ <include>org.apache.hivemall:hivemall-nlp</include>
+ <include>org.apache.lucene:lucene-analyzers-kuromoji</include>
+ <include>org.apache.lucene:lucene-analyzers-smartcn</include>
+ <include>org.apache.lucene:lucene-analyzers-common</include>
+ <include>org.apache.lucene:lucene-core</include>
+ <!-- hivemall-xgboost -->
+ <include>org.apache.hivemall:hivemall-xgboost</include>
+ <include>io.github.myui:xgboost4j</include>
+ <include>com.esotericsoftware.kryo:kryo</include>
+ </includes>
+ </artifactSet>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <manifestEntries>
+ <Implementation-Title>${project.name}</Implementation-Title>
+ <Implementation-Version>${project.version}</Implementation-Version>
+ <Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
+ </manifestEntries>
+ </transformer>
+ </transformers>
+ <filters>
+ <filter>
+ <artifact>org.apache.lucene:*</artifact>
+ <includes>
+ <include>**</include>
+ </includes>
+ </filter>
+ <filter>
+ <artifact>com.esotericsoftware.kryo:kryo</artifact>
+ <includes>
+ <include>**</include>
+ </includes>
+ </filter>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/LICENSE.txt</exclude>
+ <exclude>META-INF/NOTICE.txt</exclude>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>*.jar</exclude>
+ <exclude>tracker.py</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.scalastyle</groupId>
+ <artifactId>scalastyle-maven-plugin</artifactId>
+ <version>0.8.0</version>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.scalastyle</groupId>
+ <artifactId>scalastyle-maven-plugin</artifactId>
+ <configuration>
+ <verbose>false</verbose>
+ <failOnViolation>true</failOnViolation>
+ <includeTestSourceDirectory>true</includeTestSourceDirectory>
+ <failOnWarning>false</failOnWarning>
+ <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
+ <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
+ <configLocation>spark/scalastyle-config.xml</configLocation>
+ <outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
+ <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+ <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>scala-compile-first</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>add-source</goal>
+ <goal>compile</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>scala-test-compile</id>
+ <phase>process-test-resources</phase>
+ <goals>
+ <goal>testCompile</goal>
+ </goals>
+ </execution>
+ </executions>
+ <!-- For incremental compilation -->
+ <configuration>
+ <scalaVersion>${scala.version}</scalaVersion>
+ <recompileMode>incremental</recompileMode>
+ <useZincServer>true</useZincServer>
+ <args>
+ <arg>-unchecked</arg>
+ <arg>-deprecation</arg>
+ <!-- TODO: To enable this option, we need to fix many wornings -->
+ <!-- <arg>-feature</arg> -->
+ </args>
+ <jvmArgs>
+ <jvmArg>-Xms768m</jvmArg>
+ <jvmArg>-Xmx1024m</jvmArg>
+ <jvmArg>-XX:PermSize=128m</jvmArg>
+ <jvmArg>-XX:MaxPermSize=512m</jvmArg>
+ <jvmArg>-XX:ReservedCodeCacheSize=512m</jvmArg>
+ </jvmArgs>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/scalastyle-config.xml b/spark/scalastyle-config.xml
new file mode 100644
index 0000000..13d1c47
--- /dev/null
+++ b/spark/scalastyle-config.xml
@@ -0,0 +1,333 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<!--
+If you wish to turn off checking for a section of code, you can put a comment in the source
+before and after the section, with the following syntax:
+
+ // scalastyle:off
+ ... // stuff that breaks the styles
+ // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+ http://www.scalastyle.org/rules-0.7.0.html
+
+ // scalastyle:off no.finalize
+ override def finalize(): Unit = ...
+ // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
+ (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+ <name>Scalastyle standard configuration</name>
+
+ <!-- ================================================================================ -->
+ <!-- rules we enforce -->
+ <!-- ================================================================================ -->
+
+ <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
+ <parameters>
+ <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */]]></parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+ <parameters>
+ <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+ <parameter name="tabSize"><![CDATA[2]]></parameter>
+ <parameter name="ignoreImports">true</parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+ <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+ <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+ <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+ <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+ <parameters>
+ <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+ <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+
+ <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
+
+ <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
+ <parameters>
+ <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
+ <parameters>
+ <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+ </parameters>
+ </check>
+
+ <!-- ??? usually shouldn't be checked into the code base. -->
+ <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
+
+ <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
+ <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">^println$</parameter></parameters>
+ <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
+ // scalastyle:off println
+ println(...)
+ // scalastyle:on println]]></customMessage>
+ </check>
+
+ <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
+ <customMessage><![CDATA[
+ @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
+ ]]></customMessage>
+ </check>
+
+ <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
+ ShutdownHookManager.addShutdownHook instead.
+ If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
+ // scalastyle:off runtimeaddshutdownhook
+ Runtime.getRuntime.addShutdownHook(...)
+ // scalastyle:on runtimeaddshutdownhook
+ ]]></customMessage>
+ </check>
+
+ <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
+ java.util.concurrent.ConcurrentLinkedQueue instead.
+ If you must use mutable.SynchronizedBuffer, wrap the code block with
+ // scalastyle:off mutablesynchronizedbuffer
+ mutable.SynchronizedBuffer[...]
+ // scalastyle:on mutablesynchronizedbuffer
+ ]]></customMessage>
+ </check>
+
+ <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
+ If you must use Class.forName, wrap the code block with
+ // scalastyle:off classforname
+ Class.forName(...)
+ // scalastyle:on classforname
+ ]]></customMessage>
+ </check>
+
+ <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">Await\.result</parameter></parameters>
+ <customMessage><![CDATA[
+ Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
+ If you must use Await.result, wrap the code block with
+ // scalastyle:off awaitresult
+ Await.result(...)
+ // scalastyle:on awaitresult
+ ]]></customMessage>
+ </check>
+
+ <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
+ <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">JavaConversions</parameter></parameters>
+ <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
+ scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
+ </check>
+
+ <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
+ <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
+ of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
+ <parameters>
+ <parameter name="groups">java,scala,3rdParty,spark</parameter>
+ <parameter name="group.java">javax?\..*</parameter>
+ <parameter name="group.scala">scala\..*</parameter>
+ <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
+ <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+ </parameters>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
+ <parameters>
+ <parameter name="tokens">COMMA</parameter>
+ </parameters>
+ </check>
+
+ <!-- SPARK-3854: Single Space between ')' and '{' -->
+ <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">\)\{</parameter></parameters>
+ <customMessage><![CDATA[
+ Single Space between ')' and `{`.
+ ]]></customMessage>
+ </check>
+
+ <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter></parameters>
+ <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
+ </check>
+
+ <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+ <customMessage>Omit braces in case clauses.</customMessage>
+ </check>
+
+ <!-- SPARK-16877: Avoid Java annotations -->
+ <check customId="OverrideJavaCase" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+ <parameters><parameter name="regex">^Override$</parameter></parameters>
+ <customMessage>override modifier should be used instead of @java.lang.Override.</customMessage>
+ </check>
+
+ <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
+
+ <!-- ================================================================================ -->
+ <!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
+ <!-- ================================================================================ -->
+
+ <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
+ <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
+ <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
+ <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
+
+ <!-- This breaks symbolic method names so we don't turn it on. -->
+ <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
+ <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
+ <parameters>
+ <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+ </parameters>
+ </check>
+
+ <!-- Should turn this on, but we have a few places that need to be fixed first -->
+ <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+ <!-- ================================================================================ -->
+ <!-- rules we don't want -->
+ <!-- ================================================================================ -->
+
+ <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+ <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+ </check>
+
+ <!-- We want the opposite of this: NewLineAtEofChecker -->
+ <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+
+ <!-- This one complains about all kinds of random things. Disable. -->
+ <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
+
+ <!-- We use return quite a bit for control flows and guards -->
+ <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
+
+ <!-- We use null a lot in low level code and to interface with 3rd party code -->
+ <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
+ <parameters><parameter name="maxFileLength">800></parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
+ <parameters><parameter name="maxTypes">30</parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+ <parameters><parameter name="maximum">10</parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here ... -->
+ <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
+ <parameters><parameter name="maxLength">50</parameter></parameters>
+ </check>
+
+ <!-- Not exactly feasible to enforce this right now. -->
+ <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
+ <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+ <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
+ </check>
+
+ <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
+ <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
+ <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+ </check>
+
+</scalastyle>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/pom.xml b/spark/spark-2.0/pom.xml
index e197586..54c817d 100644
--- a/spark/spark-2.0/pom.xml
+++ b/spark/spark-2.0/pom.xml
@@ -16,37 +16,36 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
+ <artifactId>hivemall-spark</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>hivemall-spark</artifactId>
+ <artifactId>hivemall-spark2.0</artifactId>
<name>Hivemall on Spark 2.0</name>
<packaging>jar</packaging>
<properties>
- <main.basedir>${project.parent.basedir}</main.basedir>
+ <main.basedir>${project.parent.parent.basedir}</main.basedir>
+ <spark.version>2.0.2</spark.version>
+ <spark.binary.version>2.0</spark.binary.version>
</properties>
<dependencies>
- <!-- hivemall dependencies -->
+ <!-- compile scope -->
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-xgboost</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -56,21 +55,12 @@
<scope>compile</scope>
</dependency>
- <!-- third-party dependencies -->
+ <!-- provided scope -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
- <version>${scala.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.8</version>
- <scope>compile</scope>
+ <scope>provided</scope>
</dependency>
-
- <!-- other provided dependencies -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,114 +96,26 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-mixserv</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.xerial</groupId>
- <artifactId>xerial-core</artifactId>
- <version>3.2.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
- <version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms512m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- <!-- hivemall-spark_xx-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>false</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>org.apache.hivemall:hivemall-xgboost</include>
- <include>org.apache.hivemall:hivemall-spark-common</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>ml.dmlc:xgboost4j</include>
- <include>com.esotericsoftware.kryo:kryo</include>
- </includes>
- </artifactSet>
- </configuration>
- </execution>
- </executions>
</plugin>
<!-- disable surefire because there is no java test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
- <version>2.7</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
@@ -222,33 +124,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
- <version>1.0</version>
- <configuration>
- <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
- <junitxml>.</junitxml>
- <filereports>SparkTestSuite.txt</filereports>
- <argLine>${spark.test.jvm.opts}</argLine>
- <stderr />
- <environmentVariables>
- <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
- <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
- <SPARK_TESTING>1</SPARK_TESTING>
- <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
- </environmentVariables>
- <systemProperties>
- <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
- <derby.system.durability>test</derby.system.durability>
- <java.awt.headless>true</java.awt.headless>
- <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
- <spark.testing>1</spark.testing>
- <spark.ui.enabled>false</spark.ui.enabled>
- <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
- <!-- Needed by sql/hive tests. -->
- <test.src.tables>__not_used__</test.src.tables>
- </systemProperties>
- <tagsToExclude>${test.exclude.tags}</tagsToExclude>
- </configuration>
<executions>
<execution>
<id>test</id>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+ def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+ : DStream[Row] = {
+ ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+ f(sqlContext.createDataFrame(rdd)).rdd
+ }
+ }
+}
+
+object HivemallStreamingOps {
+
+ /**
+ * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+ */
+ implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+ : HivemallStreamingOps = {
+ new HivemallStreamingOps(ds)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index d3bf435..4a43afc 100644
--- a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -35,7 +35,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
checkAnswer(
sql(s"SELECT DISTINCT hivemall_version()"),
- Row("0.5.0-incubating-SNAPSHOT")
+ Row("0.5.1-incubating-SNAPSHOT")
)
// sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 5e99fd8..399a557 100644
--- a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -293,7 +293,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
}
test("misc - hivemall_version") {
- checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+ checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
}
test("misc - rowid") {
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/pom.xml b/spark/spark-2.1/pom.xml
index 3d07184..e10b4ab 100644
--- a/spark/spark-2.1/pom.xml
+++ b/spark/spark-2.1/pom.xml
@@ -16,23 +16,24 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
+ <artifactId>hivemall-spark</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>hivemall-spark</artifactId>
+ <artifactId>hivemall-spark2.1</artifactId>
<name>Hivemall on Spark 2.1</name>
<packaging>jar</packaging>
<properties>
- <main.basedir>${project.parent.basedir}</main.basedir>
+ <main.basedir>${project.parent.parent.basedir}</main.basedir>
+ <spark.version>2.1.1</spark.version>
+ <spark.binary.version>2.1</spark.binary.version>
</properties>
<dependencies>
@@ -40,13 +41,11 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-xgboost</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -56,21 +55,12 @@
<scope>compile</scope>
</dependency>
- <!-- third-party dependencies -->
+ <!-- provided scope -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
- <version>${scala.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.8</version>
- <scope>compile</scope>
+ <scope>provided</scope>
</dependency>
-
- <!-- other provided dependencies -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,114 +96,26 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-mixserv</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.xerial</groupId>
- <artifactId>xerial-core</artifactId>
- <version>3.2.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
- <version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms512m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- <!-- hivemall-spark_xx-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>false</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>org.apache.hivemall:hivemall-xgboost</include>
- <include>org.apache.hivemall:hivemall-spark-common</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>ml.dmlc:xgboost4j</include>
- <include>com.esotericsoftware.kryo:kryo</include>
- </includes>
- </artifactSet>
- </configuration>
- </execution>
- </executions>
</plugin>
<!-- disable surefire because there is no java test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
- <version>2.7</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
@@ -222,33 +124,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
- <version>1.0</version>
- <configuration>
- <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
- <junitxml>.</junitxml>
- <filereports>SparkTestSuite.txt</filereports>
- <argLine>${spark.test.jvm.opts}</argLine>
- <stderr />
- <environmentVariables>
- <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
- <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
- <SPARK_TESTING>1</SPARK_TESTING>
- <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
- </environmentVariables>
- <systemProperties>
- <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
- <derby.system.durability>test</derby.system.durability>
- <java.awt.headless>true</java.awt.headless>
- <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
- <spark.testing>1</spark.testing>
- <spark.ui.enabled>false</spark.ui.enabled>
- <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
- <!-- Needed by sql/hive tests. -->
- <test.src.tables>__not_used__</test.src.tables>
- </systemProperties>
- <tagsToExclude>${test.exclude.tags}</tagsToExclude>
- </configuration>
<executions>
<execution>
<id>test</id>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+ def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+ : DStream[Row] = {
+ ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+ f(sqlContext.createDataFrame(rdd)).rdd
+ }
+ }
+}
+
+object HivemallStreamingOps {
+
+ /**
+ * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+ */
+ implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+ : HivemallStreamingOps = {
+ new HivemallStreamingOps(ds)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index eb4ec04..cecceca 100644
--- a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -35,7 +35,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
checkAnswer(
sql(s"SELECT DISTINCT hivemall_version()"),
- Row("0.5.0-incubating-SNAPSHOT")
+ Row("0.5.1-incubating-SNAPSHOT")
)
// sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 84ab0cd..8dad4c3 100644
--- a/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -295,7 +295,7 @@ final class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
}
test("misc - hivemall_version") {
- checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+ checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
}
test("misc - rowid") {
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/pom.xml b/spark/spark-2.2/pom.xml
index 5366e1d..47aea92 100644
--- a/spark/spark-2.2/pom.xml
+++ b/spark/spark-2.2/pom.xml
@@ -16,40 +16,40 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
+ <artifactId>hivemall-spark</artifactId>
+ <version>0.5.1-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>hivemall-spark</artifactId>
+ <artifactId>hivemall-spark2.2</artifactId>
<name>Hivemall on Spark 2.2</name>
<packaging>jar</packaging>
<properties>
- <PermGen>64m</PermGen>
- <MaxPermGen>512m</MaxPermGen>
- <CodeCacheSize>512m</CodeCacheSize>
- <main.basedir>${project.parent.basedir}</main.basedir>
+ <main.basedir>${project.parent.parent.basedir}</main.basedir>
+ <spark.version>2.2.0</spark.version>
+ <spark.binary.version>2.2</spark.binary.version>
+ <hadoop.version>2.6.5</hadoop.version>
+ <scalatest.jvm.opts>-ea -Xms768m -Xmx2g -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m -XX:ReservedCodeCacheSize=512m</scalatest.jvm.opts>
+ <maven.compiler.source>1.8</maven.compiler.source>
+ <maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
- <!-- hivemall dependencies -->
+ <!-- compile scope -->
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-xgboost</artifactId>
- <version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
@@ -59,22 +59,13 @@
<scope>compile</scope>
</dependency>
- <!-- third-party dependencies -->
+ <!-- provided scope -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
- <version>${scala.version}</version>
- <scope>compile</scope>
+ <scope>provided</scope>
</dependency>
<dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.8</version>
- <scope>compile</scope>
- </dependency>
-
- <!-- other provided dependencies -->
- <dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
@@ -109,117 +100,26 @@
<dependency>
<groupId>org.apache.hivemall</groupId>
<artifactId>hivemall-mixserv</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.xerial</groupId>
- <artifactId>xerial-core</artifactId>
- <version>3.2.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
- <version>2.2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms1024m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- <jvmArg>-XX:PermSize=${PermGen}</jvmArg>
- <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
- <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- <!-- hivemall-spark_xx-xx.jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- </configuration>
- </plugin>
<!-- hivemall-spark_xx-xx-with-dependencies.jar including minimum dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <id>jar-with-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
- <outputDirectory>${project.parent.build.directory}</outputDirectory>
- <minimizeJar>false</minimizeJar>
- <createDependencyReducedPom>false</createDependencyReducedPom>
- <artifactSet>
- <includes>
- <include>org.apache.hivemall:hivemall-core</include>
- <include>org.apache.hivemall:hivemall-xgboost</include>
- <include>org.apache.hivemall:hivemall-spark-common</include>
- <include>com.github.haifengl:smile-core</include>
- <include>com.github.haifengl:smile-math</include>
- <include>com.github.haifengl:smile-data</include>
- <include>ml.dmlc:xgboost4j</include>
- <include>com.esotericsoftware.kryo:kryo</include>
- </includes>
- </artifactSet>
- </configuration>
- </execution>
- </executions>
</plugin>
<!-- disable surefire because there is no java test -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
- <version>2.7</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
@@ -228,33 +128,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
- <version>1.0</version>
- <configuration>
- <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
- <junitxml>.</junitxml>
- <filereports>SparkTestSuite.txt</filereports>
- <argLine>-ea -Xmx2g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
- <stderr />
- <environmentVariables>
- <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
- <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
- <SPARK_TESTING>1</SPARK_TESTING>
- <JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
- </environmentVariables>
- <systemProperties>
- <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
- <derby.system.durability>test</derby.system.durability>
- <java.awt.headless>true</java.awt.headless>
- <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
- <spark.testing>1</spark.testing>
- <spark.ui.enabled>false</spark.ui.enabled>
- <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
- <!-- Needed by sql/hive tests. -->
- <test.src.tables>__not_used__</test.src.tables>
- </systemProperties>
- <tagsToExclude>${test.exclude.tags}</tagsToExclude>
- </configuration>
<executions>
<execution>
<id>test</id>
@@ -264,6 +137,16 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <configuration>
+ <environmentVariables>
+ <JAVA_HOME>${env.JAVA8_HOME}</JAVA_HOME>
+ <PATH>${env.JAVA8_HOME}/bin:${env.PATH}</PATH>
+ </environmentVariables>
+ </configuration>
+ </plugin>
</plugins>
</build>
</project>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
index 00617b7..2982d9c 100644
--- a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
@@ -127,7 +127,7 @@ final class HivemallGroupedDataset(groupBy: RelationalGroupedDataset) {
* @group ensemble
*/
def max_label(score: String, label: String): DataFrame = {
- checkType(score, DoubleType)
+ // checkType(score, DoubleType)
checkType(label, StringType)
val udaf = HiveUDAFFunction(
"max_label",
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ b/spark/spark-2.2/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+ def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext)
+ : DStream[Row] = {
+ ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+ f(sqlContext.createDataFrame(rdd)).rdd
+ }
+ }
+}
+
+object HivemallStreamingOps {
+
+ /**
+ * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+ */
+ implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint])
+ : HivemallStreamingOps = {
+ new HivemallStreamingOps(ds)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
index 1e1c574..f16eae0 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HiveUdfSuite.scala
@@ -36,7 +36,7 @@ final class HiveUdfWithFeatureSuite extends HivemallFeatureQueryTest {
checkAnswer(
sql(s"SELECT DISTINCT hivemall_version()"),
- Row("0.5.0-incubating-SNAPSHOT")
+ Row("0.5.1-incubating-SNAPSHOT")
)
// sql("DROP TEMPORARY FUNCTION IF EXISTS hivemall_version")
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index f73cb75..f2b7b6e 100644
--- a/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ b/spark/spark-2.2/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -562,7 +562,7 @@ class HivemallOpsWithFeatureSuite extends HivemallFeatureQueryTest {
}
test("misc - hivemall_version") {
- checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.0-incubating-SNAPSHOT"))
+ checkAnswer(DummyInputData.select(hivemall_version()), Row("0.5.1-incubating-SNAPSHOT"))
}
test("misc - rowid") {
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/pom.xml b/spark/spark-common/pom.xml
deleted file mode 100644
index 50670d3..0000000
--- a/spark/spark-common/pom.xml
+++ /dev/null
@@ -1,146 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
-
- <parent>
- <groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall</artifactId>
- <version>0.5.0-incubating-SNAPSHOT</version>
- <relativePath>../../pom.xml</relativePath>
- </parent>
-
- <artifactId>hivemall-spark-common</artifactId>
- <name>Hivemall on Spark Common</name>
- <packaging>jar</packaging>
-
- <properties>
- <main.basedir>${project.parent.basedir}</main.basedir>
- </properties>
-
- <dependencies>
- <!-- hivemall dependencies -->
- <dependency>
- <groupId>org.apache.hivemall</groupId>
- <artifactId>hivemall-core</artifactId>
- <version>${project.version}</version>
- <scope>compile</scope>
- </dependency>
-
- <!-- other provided dependencies -->
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <version>${spark.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-hive_${scala.binary.version}</artifactId>
- <version>${spark.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming_${scala.binary.version}</artifactId>
- <version>${spark.version}</version>
- <scope>provided</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>${hive.version}</version>
- <scope>provided</scope>
- </dependency>
- </dependencies>
-
- <build>
- <directory>target</directory>
- <outputDirectory>target/classes</outputDirectory>
- <finalName>${project.artifactId}-${project.version}</finalName>
- <testOutputDirectory>target/test-classes</testOutputDirectory>
- <plugins>
- <!-- For resolving spark binary incompatibility -->
- <plugin>
- <artifactId>maven-clean-plugin</artifactId>
- <version>3.0.0</version>
- <executions>
- <execution>
- <phase>initialize</phase>
- <goals>
- <goal>clean</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <!-- For incremental compilation -->
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <version>3.2.2</version>
- <executions>
- <execution>
- <id>scala-compile-first</id>
- <phase>process-resources</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- </execution>
- <execution>
- <id>scala-test-compile-first</id>
- <phase>process-test-resources</phase>
- <goals>
- <goal>testCompile</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <scalaVersion>${scala.version}</scalaVersion>
- <recompileMode>incremental</recompileMode>
- <useZincServer>true</useZincServer>
- <args>
- <arg>-unchecked</arg>
- <arg>-deprecation</arg>
- <!-- TODO: To enable this option, we need to fix many wornings -->
- <!-- <arg>-feature</arg> -->
- </args>
- <jvmArgs>
- <jvmArg>-Xms512m</jvmArg>
- <jvmArg>-Xmx1024m</jvmArg>
- </jvmArgs>
- </configuration>
- </plugin>
- </plugins>
- </build>
-</project>
-
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/scalastyle-config.xml b/spark/spark-common/scalastyle-config.xml
deleted file mode 100644
index 13d1c47..0000000
--- a/spark/spark-common/scalastyle-config.xml
+++ /dev/null
@@ -1,333 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-
-<!--
-If you wish to turn off checking for a section of code, you can put a comment in the source
-before and after the section, with the following syntax:
-
- // scalastyle:off
- ... // stuff that breaks the styles
- // scalastyle:on
-
-You can also disable only one rule, by specifying its rule id, as specified in:
- http://www.scalastyle.org/rules-0.7.0.html
-
- // scalastyle:off no.finalize
- override def finalize(): Unit = ...
- // scalastyle:on no.finalize
-
-This file is divided into 3 sections:
- (1) rules that we enforce.
- (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
- (or we need to make the scalastyle rule more configurable).
- (3) rules that we don't want to enforce.
--->
-
-<scalastyle>
- <name>Scalastyle standard configuration</name>
-
- <!-- ================================================================================ -->
- <!-- rules we enforce -->
- <!-- ================================================================================ -->
-
- <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
- <parameters>
- <parameter name="header"><![CDATA[/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */]]></parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
- <parameters>
- <parameter name="maxLineLength"><![CDATA[100]]></parameter>
- <parameter name="tabSize"><![CDATA[2]]></parameter>
- <parameter name="ignoreImports">true</parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
- <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
- <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
- <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
- <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
- <parameters>
- <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
- <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
-
- <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
-
- <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
- <parameters>
- <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
- <parameters>
- <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
- </parameters>
- </check>
-
- <!-- ??? usually shouldn't be checked into the code base. -->
- <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
-
- <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
- <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">^println$</parameter></parameters>
- <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
- // scalastyle:off println
- println(...)
- // scalastyle:on println]]></customMessage>
- </check>
-
- <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
- <customMessage><![CDATA[
- @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
- ]]></customMessage>
- </check>
-
- <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
- ShutdownHookManager.addShutdownHook instead.
- If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
- // scalastyle:off runtimeaddshutdownhook
- Runtime.getRuntime.addShutdownHook(...)
- // scalastyle:on runtimeaddshutdownhook
- ]]></customMessage>
- </check>
-
- <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
- java.util.concurrent.ConcurrentLinkedQueue instead.
- If you must use mutable.SynchronizedBuffer, wrap the code block with
- // scalastyle:off mutablesynchronizedbuffer
- mutable.SynchronizedBuffer[...]
- // scalastyle:on mutablesynchronizedbuffer
- ]]></customMessage>
- </check>
-
- <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">Class\.forName</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
- If you must use Class.forName, wrap the code block with
- // scalastyle:off classforname
- Class.forName(...)
- // scalastyle:on classforname
- ]]></customMessage>
- </check>
-
- <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">Await\.result</parameter></parameters>
- <customMessage><![CDATA[
- Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
- If you must use Await.result, wrap the code block with
- // scalastyle:off awaitresult
- Await.result(...)
- // scalastyle:on awaitresult
- ]]></customMessage>
- </check>
-
- <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
- <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">JavaConversions</parameter></parameters>
- <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
- scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
- </check>
-
- <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
- <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
- of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
- <parameters>
- <parameter name="groups">java,scala,3rdParty,spark</parameter>
- <parameter name="group.java">javax?\..*</parameter>
- <parameter name="group.scala">scala\..*</parameter>
- <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
- <parameter name="group.spark">org\.apache\.spark\..*</parameter>
- </parameters>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
- <parameters>
- <parameter name="tokens">COMMA</parameter>
- </parameters>
- </check>
-
- <!-- SPARK-3854: Single Space between ')' and '{' -->
- <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">\)\{</parameter></parameters>
- <customMessage><![CDATA[
- Single Space between ')' and `{`.
- ]]></customMessage>
- </check>
-
- <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter></parameters>
- <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
- </check>
-
- <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
- <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
- <customMessage>Omit braces in case clauses.</customMessage>
- </check>
-
- <!-- SPARK-16877: Avoid Java annotations -->
- <check customId="OverrideJavaCase" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
- <parameters><parameter name="regex">^Override$</parameter></parameters>
- <customMessage>override modifier should be used instead of @java.lang.Override.</customMessage>
- </check>
-
- <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
-
- <!-- ================================================================================ -->
- <!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
- <!-- ================================================================================ -->
-
- <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
- <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
- <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
- <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
-
- <!-- This breaks symbolic method names so we don't turn it on. -->
- <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
- <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
- <parameters>
- <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
- </parameters>
- </check>
-
- <!-- Should turn this on, but we have a few places that need to be fixed first -->
- <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
-
- <!-- ================================================================================ -->
- <!-- rules we don't want -->
- <!-- ================================================================================ -->
-
- <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
- <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
- </check>
-
- <!-- We want the opposite of this: NewLineAtEofChecker -->
- <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
-
- <!-- This one complains about all kinds of random things. Disable. -->
- <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
-
- <!-- We use return quite a bit for control flows and guards -->
- <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
-
- <!-- We use null a lot in low level code and to interface with 3rd party code -->
- <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
- <parameters><parameter name="maxFileLength">800></parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
- <parameters><parameter name="maxTypes">30</parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
- <parameters><parameter name="maximum">10</parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here ... -->
- <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
- <parameters><parameter name="maxLength">50</parameter></parameters>
- </check>
-
- <!-- Not exactly feasible to enforce this right now. -->
- <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
- <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
- <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
- </check>
-
- <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
- <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
- <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
- </check>
-
-</scalastyle>