You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/04 14:29:52 UTC
[50/53] [abbrv] [partial] mahout git commit: end of day 6-2-2018
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 0edb4bb..d27bbbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,11 +12,11 @@ output-asf-email-examples/
target/
examples/bin/tmp
output
-mr/build/
-mr/input/
-mr/output/
-mr/testdata/
-mr/temp
+community/mr/build/
+community/mr/input/
+community/mr/output/
+community/mr/testdata/
+community/mr/temp
temp
foo
math-tests/
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/flink-batch/pom.xml
----------------------------------------------------------------------
diff --git a/community/community-engines/flink-batch/pom.xml b/community/community-engines/flink-batch/pom.xml
index 8103b21..64083f7 100644
--- a/community/community-engines/flink-batch/pom.xml
+++ b/community/community-engines/flink-batch/pom.xml
@@ -27,7 +27,7 @@
<relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>mahout-flink-batch_2.10</artifactId>
+ <artifactId>mahout-flink-batch_${scala.compat.version}</artifactId>
<name>-- Mahout Flink Engine (Community)</name>
<description>
Mahout Bindings for Apache Flink
@@ -35,7 +35,6 @@
<properties>
<flink.version>1.1.4</flink.version>
- <scala.compat.version>2.10</scala.compat.version>
</properties>
<packaging>jar</packaging>
@@ -193,11 +192,11 @@
<version>${project.version}</version>
</dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>javacpp</artifactId>
- <version>1.2.2</version>
- </dependency>
+ <!--<dependency>-->
+ <!--<groupId>org.bytedeco</groupId>-->
+ <!--<artifactId>javacpp</artifactId>-->
+ <!--<version>1.2.2</version>-->
+ <!--</dependency>-->
<!-- enforce current version of kryo as of 0.10.1-->
<dependency>
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/pom.xml
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/pom.xml b/community/community-engines/h2o/pom.xml
index a5c5e4f..89f560c 100644
--- a/community/community-engines/h2o/pom.xml
+++ b/community/community-engines/h2o/pom.xml
@@ -27,7 +27,7 @@
<relativePath>../pom.xml</relativePath>
</parent>
- <artifactId>mahout-h2o_2.10</artifactId>
+ <artifactId>mahout-h2o_${scala.compat.version}</artifactId>
<name>-- Mahout H2O Engine (Community)</name>
<description>
H2O Backend for Mahout DSL
@@ -37,8 +37,6 @@
<properties>
<h2o.version>0.1.25</h2o.version>
- <scala.compat.version>2.10</scala.compat.version>
- <scala.version>2.10.5</scala.version>
</properties>
@@ -162,17 +160,14 @@
</build>
<dependencies>
- <dependency>
- <groupId>org.scala-lang</groupId>
- <artifactId>scala-library</artifactId>
- <version>${scala.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.mahout</groupId>
- <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
- <version>${project.version}</version>
- </dependency>
+
+ <!--<dependency>-->
+ <!--<groupId>org.apache.mahout</groupId>-->
+ <!--<!–<artifactId>mahout-math-scala_${scala.compat.version}</artifactId>–>-->
+ <!--<artifactId>a</artifactId>-->
+ <!--<version>${project.version}</version>-->
+ <!--</dependency>-->
<!-- for MatrixWritable and VectorWritable -->
<dependency>
@@ -219,6 +214,10 @@
<artifactId>scalatest_${scala.compat.version}</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </dependency>
</dependencies>
<profiles>
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/.alphaIDrm.drm.crc
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/.alphaIDrm.drm.crc b/community/community-engines/h2o/tmp/naiveBayesModel/.alphaIDrm.drm.crc
deleted file mode 100644
index a46b916..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/.alphaIDrm.drm.crc and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/.isComplementaryDrm.drm.crc
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/.isComplementaryDrm.drm.crc b/community/community-engines/h2o/tmp/naiveBayesModel/.isComplementaryDrm.drm.crc
deleted file mode 100644
index ed8dd37..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/.isComplementaryDrm.drm.crc and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/.labelIndex.drm.crc
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/.labelIndex.drm.crc b/community/community-engines/h2o/tmp/naiveBayesModel/.labelIndex.drm.crc
deleted file mode 100644
index 73c0476..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/.labelIndex.drm.crc and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/.perlabelThetaNormalizerDrm.drm.crc
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/.perlabelThetaNormalizerDrm.drm.crc b/community/community-engines/h2o/tmp/naiveBayesModel/.perlabelThetaNormalizerDrm.drm.crc
deleted file mode 100644
index f4e564e..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/.perlabelThetaNormalizerDrm.drm.crc and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerFeatureDrm.drm.crc
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerFeatureDrm.drm.crc b/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerFeatureDrm.drm.crc
deleted file mode 100644
index 9e5209b..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerFeatureDrm.drm.crc and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelAndFeatureDrm.drm.crc
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelAndFeatureDrm.drm.crc b/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelAndFeatureDrm.drm.crc
deleted file mode 100644
index bc53d76..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelAndFeatureDrm.drm.crc and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelDrm.drm.crc
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelDrm.drm.crc b/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelDrm.drm.crc
deleted file mode 100644
index 514624c..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/.weightsPerLabelDrm.drm.crc and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/alphaIDrm.drm
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/alphaIDrm.drm b/community/community-engines/h2o/tmp/naiveBayesModel/alphaIDrm.drm
deleted file mode 100644
index d821c41..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/alphaIDrm.drm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/isComplementaryDrm.drm
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/isComplementaryDrm.drm b/community/community-engines/h2o/tmp/naiveBayesModel/isComplementaryDrm.drm
deleted file mode 100644
index 0e46cf2..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/isComplementaryDrm.drm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/labelIndex.drm
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/labelIndex.drm b/community/community-engines/h2o/tmp/naiveBayesModel/labelIndex.drm
deleted file mode 100644
index ceca885..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/labelIndex.drm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/perlabelThetaNormalizerDrm.drm
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/perlabelThetaNormalizerDrm.drm b/community/community-engines/h2o/tmp/naiveBayesModel/perlabelThetaNormalizerDrm.drm
deleted file mode 100644
index 38b4904..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/perlabelThetaNormalizerDrm.drm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerFeatureDrm.drm
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerFeatureDrm.drm b/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerFeatureDrm.drm
deleted file mode 100644
index 1c3beea..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerFeatureDrm.drm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelAndFeatureDrm.drm
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelAndFeatureDrm.drm b/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelAndFeatureDrm.drm
deleted file mode 100644
index a2a0e13..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelAndFeatureDrm.drm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelDrm.drm
----------------------------------------------------------------------
diff --git a/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelDrm.drm b/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelDrm.drm
deleted file mode 100644
index 15c894f..0000000
Binary files a/community/community-engines/h2o/tmp/naiveBayesModel/weightsPerLabelDrm.drm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/pom.xml
----------------------------------------------------------------------
diff --git a/community/mahout-mr/pom.xml b/community/mahout-mr/pom.xml
new file mode 100644
index 0000000..6db69a1
--- /dev/null
+++ b/community/mahout-mr/pom.xml
@@ -0,0 +1,309 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.mahout</groupId>
+ <artifactId>community</artifactId>
+ <version>0.13.1-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <!-- modules inherit parent's group id and version. -->
+ <artifactId>mahout-mr</artifactId>
+ <name>- Mahout Classic (Map-Reduce Based Machine Learning)</name>
+ <description>Scalable machine learning libraries</description>
+
+ <packaging>jar</packaging>
+
+ <properties>
+ <hadoop.version>2.4.1</hadoop.version>
+ <lucene.version>5.5.2</lucene.version>
+ </properties>
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ </resource>
+ <resource>
+ <directory>../src/conf</directory>
+ <includes>
+ <include>driver.classes.default.props</include>
+ </includes>
+ </resource>
+ </resources>
+ <plugins>
+ <!-- copy jars to top directory, which is MAHOUT_HOME -->
+ <plugin>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <version>1.4</version>
+ <executions>
+ <execution>
+ <id>copy</id>
+ <phase>package</phase>
+ <configuration>
+ <tasks>
+ <copy file="target/mahout-mr-${version}.jar" tofile="../mahout-mr-${version}.jar" />
+ </tasks>
+ </configuration>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <!-- create test jar so other modules can reuse the core test utility classes. -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <!-- create core hadoop job jar -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>job</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/job.xml</descriptor>
+ </descriptors>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <artifactId>maven-source-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-remote-resources-plugin</artifactId>
+ <configuration>
+ <appendedResourcesDirectory>src/main/resources</appendedResourcesDirectory>
+ <resourceBundles>
+ <resourceBundle>org.apache:apache-jar-resource-bundle:1.4</resourceBundle>
+ </resourceBundles>
+ <supplementalModels>
+ <supplementalModel>supplemental-models.xml</supplementalModel>
+ </supplementalModels>
+ </configuration>
+ </plugin>
+ <!-- remove jars from top directory on clean -->
+ <plugin>
+ <artifactId>maven-clean-plugin</artifactId>
+ <version>3.0.0</version>
+ <configuration>
+ <filesets>
+ <fileset>
+ <directory>../</directory>
+ <includes>
+ <include>mahout-mr*.jar</include>
+ </includes>
+ <followSymlinks>false</followSymlinks>
+ </fileset>
+ </filesets>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+
+ <!-- our modules -->
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>core</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mahout-hdfs</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mahout-hdfs</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Third Party -->
+
+ <dependency>
+ <groupId>com.tdunning</groupId>
+ <artifactId>t-digest</artifactId>
+ <version>3.1</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>11.0.2</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>2.7.4</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>${slf4j.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-jcl</artifactId>
+ <version>${slf4j.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <version>3.1</version>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.2</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.thoughtworks.xstream</groupId>
+ <artifactId>xstream</artifactId>
+ <version>1.4.4</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-core</artifactId>
+ <version>${lucene.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers-common</artifactId>
+ <version>${lucene.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.mahout.commons</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>2.0-mahout</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <version>3.2</version>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-all</artifactId>
+ <version>1.3</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.carrotsearch.randomizedtesting</groupId>
+ <artifactId>randomizedtesting-runner</artifactId>
+ <version>2.0.15</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.easymock</groupId>
+ <artifactId>easymock</artifactId>
+ <version>3.2</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.mrunit</groupId>
+ <artifactId>mrunit</artifactId>
+ <version>1.0.0</version>
+ <classifier>hadoop2</classifier>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-httpclient</groupId>
+ <artifactId>commons-httpclient</artifactId>
+ <version>3.0.1</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-commons-csv</artifactId>
+ <version>3.5.0</version>
+ </dependency>
+
+ </dependencies>
+
+</project>
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/assembly/job.xml
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/assembly/job.xml b/community/mahout-mr/src/main/assembly/job.xml
new file mode 100644
index 0000000..2bdb3ce
--- /dev/null
+++ b/community/mahout-mr/src/main/assembly/job.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<assembly
+ xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0
+ http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+ <id>job</id>
+ <formats>
+ <format>jar</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <dependencySets>
+ <dependencySet>
+ <unpack>true</unpack>
+ <unpackOptions>
+ <!-- MAHOUT-1126 -->
+ <excludes>
+ <exclude>META-INF/LICENSE</exclude>
+ </excludes>
+ </unpackOptions>
+ <scope>runtime</scope>
+ <outputDirectory>/</outputDirectory>
+ <useTransitiveFiltering>true</useTransitiveFiltering>
+ <excludes>
+ <exclude>org.apache.hadoop:hadoop-core</exclude>
+ </excludes>
+ </dependencySet>
+ </dependencySets>
+ <fileSets>
+ <fileSet>
+ <directory>${basedir}/target/classes</directory>
+ <outputDirectory>/</outputDirectory>
+ <excludes>
+ <exclude>*.jar</exclude>
+ </excludes>
+ </fileSet>
+ <fileSet>
+ <directory>${basedir}/target/classes</directory>
+ <outputDirectory>/</outputDirectory>
+ <includes>
+ <include>driver.classes.default.props</include>
+ </includes>
+ </fileSet>
+ </fileSets>
+</assembly>
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/Version.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/Version.java b/community/mahout-mr/src/main/java/org/apache/mahout/Version.java
new file mode 100644
index 0000000..5f3c879
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/Version.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout;
+
+import com.google.common.base.Charsets;
+import com.google.common.io.Resources;
+
+import java.io.IOException;
+
+public final class Version {
+
+ private Version() {
+ }
+
+ public static String version() {
+ return Version.class.getPackage().getImplementationVersion();
+ }
+
+ public static String versionFromResource() throws IOException {
+ return Resources.toString(Resources.getResource("version"), Charsets.UTF_8);
+ }
+
+ public static void main(String[] args) throws IOException {
+ System.out.println(version() + ' ' + versionFromResource());
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
new file mode 100644
index 0000000..1ac5b72
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+public final class NoSuchItemException extends TasteException {
+
+ public NoSuchItemException() { }
+
+ public NoSuchItemException(long itemID) {
+ this(String.valueOf(itemID));
+ }
+
+ public NoSuchItemException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
new file mode 100644
index 0000000..cbb60fa
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+public final class NoSuchUserException extends TasteException {
+
+ public NoSuchUserException() { }
+
+ public NoSuchUserException(long userID) {
+ this(String.valueOf(userID));
+ }
+
+ public NoSuchUserException(String message) {
+ super(message);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
new file mode 100644
index 0000000..9b26bee
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+import java.util.Collection;
+
+/**
+ * <p>
+ * Implementations of this interface have state that can be periodically refreshed. For example, an
+ * implementation instance might contain some pre-computed information that should be periodically refreshed.
+ * The {@link #refresh(Collection)} method triggers such a refresh.
+ * </p>
+ *
+ * <p>
+ * All Taste components implement this. In particular,
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}s do. Callers may want to call
+ * {@link #refresh(Collection)} periodically to re-compute information throughout the system and bring it up
+ * to date, though this operation may be expensive.
+ * </p>
+ */
+public interface Refreshable {
+
+ /**
+ * <p>
+ * Triggers "refresh" -- whatever that means -- of the implementation. The general contract is that any
+ * {@link Refreshable} should always leave itself in a consistent, operational state, and that the refresh
+ * atomically updates internal state from old to new.
+ * </p>
+ *
+ * @param alreadyRefreshed
+ * {@link org.apache.mahout.cf.taste.common.Refreshable}s that are known to have already been
+ * refreshed as a result of an initial call to a {#refresh(Collection)} method on some
+ * object. This ensure that objects in a refresh dependency graph aren't refreshed twice
+ * needlessly.
+ */
+ void refresh(Collection<Refreshable> alreadyRefreshed);
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
new file mode 100644
index 0000000..1792eff
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+/**
+ * <p>
+ * An exception thrown when an error occurs inside the Taste engine.
+ * </p>
+ */
+public class TasteException extends Exception {
+
+ public TasteException() { }
+
+ public TasteException(String message) {
+ super(message);
+ }
+
+ public TasteException(Throwable cause) {
+ super(cause);
+ }
+
+ public TasteException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
new file mode 100644
index 0000000..4e39617
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+/**
+ * <p>
+ * A simple enum which gives symbolic names to the ideas of "weighted" and "unweighted", to make various API
+ * calls which take a weighting parameter more readable.
+ * </p>
+ */
+public enum Weighting {
+
+ WEIGHTED,
+ UNWEIGHTED
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
new file mode 100644
index 0000000..875c65e
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+/**
+ * <p>
+ * Implementations of this inner interface are simple helper classes which create a {@link DataModel} to be
+ * used while evaluating a {@link org.apache.mahout.cf.taste.recommender.Recommender}.
+ *
+ * @see RecommenderBuilder
+ * @see RecommenderEvaluator
+ */
+public interface DataModelBuilder {
+
+ /**
+ * <p>
+ * Builds a {@link DataModel} implementation to be used in an evaluation, given training data.
+ * </p>
+ *
+ * @param trainingData
+ * data to be used in the {@link DataModel}
+ * @return {@link DataModel} based upon the given data
+ */
+ DataModel buildDataModel(FastByIDMap<PreferenceArray> trainingData);
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
new file mode 100644
index 0000000..9c442ff
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+/**
+ * <p>
+ * Implementations encapsulate information retrieval-related statistics about a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
+ * </p>
+ *
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Information_retrieval">Information retrieval</a>.
+ * </p>
+ */
+public interface IRStatistics {
+
+ /**
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Precision">Precision</a>.
+ * </p>
+ */
+ double getPrecision();
+
+ /**
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Recall">Recall</a>.
+ * </p>
+ */
+ double getRecall();
+
+ /**
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Fall-Out">Fall-Out</a>.
+ * </p>
+ */
+ double getFallOut();
+
+ /**
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure">F-measure</a>.
+ * </p>
+ */
+ double getF1Measure();
+
+ /**
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure">F-measure</a>.
+ * </p>
+ */
+ double getFNMeasure(double n);
+
+ /**
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG">
+ * Normalized Discounted Cumulative Gain</a>.
+ * </p>
+ */
+ double getNormalizedDiscountedCumulativeGain();
+
+ /**
+ * @return the fraction of all users for whom recommendations could be produced
+ */
+ double getReach();
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
new file mode 100644
index 0000000..1805092
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+/**
+ * <p>
+ * Implementations of this inner interface are simple helper classes which create a {@link Recommender} to be
+ * evaluated based on the given {@link DataModel}.
+ * </p>
+ */
+public interface RecommenderBuilder {
+
+ /**
+ * <p>
+ * Builds a {@link Recommender} implementation to be evaluated, using the given {@link DataModel}.
+ * </p>
+ *
+ * @param dataModel
+ * {@link DataModel} to build the {@link Recommender} on
+ * @return {@link Recommender} based upon the given {@link DataModel}
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ Recommender buildRecommender(DataModel dataModel) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
new file mode 100644
index 0000000..dcbbcf8
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>
+ * Implementations of this interface evaluate the quality of a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
+ * </p>
+ */
+public interface RecommenderEvaluator {
+
+ /**
+ * <p>
+ * Evaluates the quality of a {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
+ * The range of values that may be returned depends on the implementation, but <em>lower</em> values must
+ * mean better recommendations, with 0 being the lowest / best possible evaluation, meaning a perfect match.
+ * This method does not accept a {@link org.apache.mahout.cf.taste.recommender.Recommender} directly, but
+ * rather a {@link RecommenderBuilder} which can build the
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender} to test on top of a given {@link DataModel}.
+ * </p>
+ *
+ * <p>
+ * Implementations will take a certain percentage of the preferences supplied by the given {@link DataModel}
+ * as "training data". This is typically most of the data, like 90%. This data is used to produce
+ * recommendations, and the rest of the data is compared against estimated preference values to see how much
+ * the {@link org.apache.mahout.cf.taste.recommender.Recommender}'s predicted preferences match the user's
+ * real preferences. Specifically, for each user, this percentage of the user's ratings are used to produce
+ * recommendations, and for each user, the remaining preferences are compared against the user's real
+ * preferences.
+ * </p>
+ *
+ * <p>
+ * For large datasets, it may be desirable to only evaluate based on a small percentage of the data.
+ * {@code evaluationPercentage} controls how many of the {@link DataModel}'s users are used in
+ * evaluation.
+ * </p>
+ *
+ * <p>
+ * To be clear, {@code trainingPercentage} and {@code evaluationPercentage} are not related. They
+ * do not need to add up to 1.0, for example.
+ * </p>
+ *
+ * @param recommenderBuilder
+ * object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
+ * @param dataModelBuilder
+ * {@link DataModelBuilder} to use, or if null, a default {@link DataModel}
+ * implementation will be used
+ * @param dataModel
+ * dataset to test on
+ * @param trainingPercentage
+ * percentage of each user's preferences to use to produce recommendations; the rest are compared
+ * to estimated preference values to evaluate
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender} performance
+ * @param evaluationPercentage
+ * percentage of users to use in evaluation
+ * @return a "score" representing how well the {@link org.apache.mahout.cf.taste.recommender.Recommender}'s
+ * estimated preferences match real values; <em>lower</em> scores mean a better match and 0 is a
+ * perfect match
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ double evaluate(RecommenderBuilder recommenderBuilder,
+ DataModelBuilder dataModelBuilder,
+ DataModel dataModel,
+ double trainingPercentage,
+ double evaluationPercentage) throws TasteException;
+
+ /**
+ * @deprecated see {@link DataModel#getMaxPreference()}
+ */
+ @Deprecated
+ float getMaxPreference();
+
+ @Deprecated
+ void setMaxPreference(float maxPreference);
+
+ /**
+ * @deprecated see {@link DataModel#getMinPreference()}
+ */
+ @Deprecated
+ float getMinPreference();
+
+ @Deprecated
+ void setMinPreference(float minPreference);
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
new file mode 100644
index 0000000..6e4e9c7
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+
+/**
+ * <p>
+ * Implementations collect information retrieval-related statistics on a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s performance, including precision, recall and
+ * f-measure.
+ * </p>
+ *
+ * <p>
+ * See <a href="http://en.wikipedia.org/wiki/Information_retrieval">Information retrieval</a>.
+ */
+public interface RecommenderIRStatsEvaluator {
+
+ /**
+ * @param recommenderBuilder
+ * object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
+ * @param dataModelBuilder
+ * {@link DataModelBuilder} to use, or if null, a default {@link DataModel} implementation will be
+ * used
+ * @param dataModel
+ * dataset to test on
+ * @param rescorer
+ * if any, to use when computing recommendations
+ * @param at
+ * as in, "precision at 5". The number of recommendations to consider when evaluating precision,
+ * etc.
+ * @param relevanceThreshold
+ * items whose preference value is at least this value are considered "relevant" for the purposes
+ * of computations
+ * @return {@link IRStatistics} with resulting precision, recall, etc.
+ * @throws TasteException
+ * if an error occurs while accessing the {@link DataModel}
+ */
+ IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
+ DataModelBuilder dataModelBuilder,
+ DataModel dataModel,
+ IDRescorer rescorer,
+ int at,
+ double relevanceThreshold,
+ double evaluationPercentage) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
new file mode 100644
index 0000000..da318d5
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+/**
+ * Implementations of this interface determine the items that are considered relevant,
+ * and splits data into a training and test subset, for purposes of precision/recall
+ * tests as implemented by implementations of {@link RecommenderIRStatsEvaluator}.
+ */
+public interface RelevantItemsDataSplitter {
+
+ /**
+ * During testing, relevant items are removed from a particular users' preferences,
+ * and a model is build using this user's other preferences and all other users.
+ *
+ * @param at Maximum number of items to be removed
+ * @param relevanceThreshold Minimum strength of preference for an item to be considered
+ * relevant
+ * @return IDs of relevant items
+ */
+ FastIDSet getRelevantItemsIDs(long userID,
+ int at,
+ double relevanceThreshold,
+ DataModel dataModel) throws TasteException;
+
+ /**
+ * Adds a single user and all their preferences to the training model.
+ *
+ * @param userID ID of user whose preferences we are trying to predict
+ * @param relevantItemIDs IDs of items considered relevant to that user
+ * @param trainingUsers the database of training preferences to which we will
+ * append the ones for otherUserID.
+ * @param otherUserID for whom we are adding preferences to the training model
+ */
+ void processOtherUser(long userID,
+ FastIDSet relevantItemIDs,
+ FastByIDMap<PreferenceArray> trainingUsers,
+ long otherUserID,
+ DataModel dataModel) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
new file mode 100644
index 0000000..e70a675
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import com.google.common.primitives.Longs;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.math.Varint;
+
+/** A {@link WritableComparable} encapsulating two items. */
+public final class EntityEntityWritable implements WritableComparable<EntityEntityWritable>, Cloneable {
+
+ private long aID;
+ private long bID;
+
+ public EntityEntityWritable() {
+ // do nothing
+ }
+
+ public EntityEntityWritable(long aID, long bID) {
+ this.aID = aID;
+ this.bID = bID;
+ }
+
+ long getAID() {
+ return aID;
+ }
+
+ long getBID() {
+ return bID;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ Varint.writeSignedVarLong(aID, out);
+ Varint.writeSignedVarLong(bID, out);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ aID = Varint.readSignedVarLong(in);
+ bID = Varint.readSignedVarLong(in);
+ }
+
+ @Override
+ public int compareTo(EntityEntityWritable that) {
+ int aCompare = compare(aID, that.getAID());
+ return aCompare == 0 ? compare(bID, that.getBID()) : aCompare;
+ }
+
+ private static int compare(long a, long b) {
+ return a < b ? -1 : a > b ? 1 : 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return Longs.hashCode(aID) + 31 * Longs.hashCode(bID);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof EntityEntityWritable) {
+ EntityEntityWritable that = (EntityEntityWritable) o;
+ return aID == that.getAID() && bID == that.getBID();
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return aID + "\t" + bID;
+ }
+
+ @Override
+ public EntityEntityWritable clone() {
+ return new EntityEntityWritable(aID, bID);
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
new file mode 100644
index 0000000..2aab63c
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VarLongWritable;
+
+/** A {@link org.apache.hadoop.io.Writable} encapsulating an item ID and a preference value. */
+public final class EntityPrefWritable extends VarLongWritable implements Cloneable {
+
+ private float prefValue;
+
+ public EntityPrefWritable() {
+ // do nothing
+ }
+
+ public EntityPrefWritable(long itemID, float prefValue) {
+ super(itemID);
+ this.prefValue = prefValue;
+ }
+
+ public EntityPrefWritable(EntityPrefWritable other) {
+ this(other.get(), other.getPrefValue());
+ }
+
+ public long getID() {
+ return get();
+ }
+
+ public float getPrefValue() {
+ return prefValue;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ out.writeFloat(prefValue);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+ prefValue = in.readFloat();
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() ^ RandomUtils.hashFloat(prefValue);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof EntityPrefWritable)) {
+ return false;
+ }
+ EntityPrefWritable other = (EntityPrefWritable) o;
+ return get() == other.get() && prefValue == other.getPrefValue();
+ }
+
+ @Override
+ public String toString() {
+ return get() + "\t" + prefValue;
+ }
+
+ @Override
+ public EntityPrefWritable clone() {
+ return new EntityPrefWritable(get(), prefValue);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java
new file mode 100644
index 0000000..3de272d
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.RandomUtils;
+
+/**
+ * Mutable variant of {@link RecommendedItem}
+ */
+public class MutableRecommendedItem implements RecommendedItem {
+
+ private long itemID;
+ private float value;
+
+ public MutableRecommendedItem() {}
+
+ public MutableRecommendedItem(long itemID, float value) {
+ this.itemID = itemID;
+ this.value = value;
+ }
+
+ @Override
+ public long getItemID() {
+ return itemID;
+ }
+
+ @Override
+ public float getValue() {
+ return value;
+ }
+
+ public void setItemID(long itemID) {
+ this.itemID = itemID;
+ }
+
+ public void set(long itemID, float value) {
+ this.itemID = itemID;
+ this.value = value;
+ }
+
+ public void capToMaxValue(float maxValue) {
+ if (value > maxValue) {
+ value = maxValue;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "MutableRecommendedItem[item:" + itemID + ", value:" + value + ']';
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) itemID ^ RandomUtils.hashFloat(value);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof MutableRecommendedItem)) {
+ return false;
+ }
+ RecommendedItem other = (RecommendedItem) o;
+ return itemID == other.getItemID() && value == other.getValue();
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
new file mode 100644
index 0000000..bc832aa
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.math.Varint;
+
+/**
+ * A {@link Writable} which encapsulates a list of {@link RecommendedItem}s. This is the mapper (and reducer)
+ * output, and represents items recommended to a user. The first item is the one whose estimated preference is
+ * highest.
+ */
+public final class RecommendedItemsWritable implements Writable {
+
+ private List<RecommendedItem> recommended;
+
+ public RecommendedItemsWritable() {
+ // do nothing
+ }
+
+ public RecommendedItemsWritable(List<RecommendedItem> recommended) {
+ this.recommended = recommended;
+ }
+
+ public List<RecommendedItem> getRecommendedItems() {
+ return recommended;
+ }
+
+ public void set(List<RecommendedItem> recommended) {
+ this.recommended = recommended;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(recommended.size());
+ for (RecommendedItem item : recommended) {
+ Varint.writeSignedVarLong(item.getItemID(), out);
+ out.writeFloat(item.getValue());
+ }
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ int size = in.readInt();
+ recommended = new ArrayList<>(size);
+ for (int i = 0; i < size; i++) {
+ long itemID = Varint.readSignedVarLong(in);
+ float value = in.readFloat();
+ RecommendedItem recommendedItem = new GenericRecommendedItem(itemID, value);
+ recommended.add(recommendedItem);
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder(200);
+ result.append('[');
+ boolean first = true;
+ for (RecommendedItem item : recommended) {
+ if (first) {
+ first = false;
+ } else {
+ result.append(',');
+ }
+ result.append(String.valueOf(item.getItemID()));
+ result.append(':');
+ result.append(String.valueOf(item.getValue()));
+ }
+ result.append(']');
+ return result.toString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
new file mode 100644
index 0000000..e3fab29
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import com.google.common.primitives.Longs;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
+import org.apache.mahout.math.map.OpenIntLongHashMap;
+
+import java.util.regex.Pattern;
+
+/**
+ * Some helper methods for the hadoop-related stuff in org.apache.mahout.cf.taste
+ */
+public final class TasteHadoopUtils {
+
+ public static final int USER_ID_POS = 0;
+ public static final int ITEM_ID_POS = 1;
+
+ /** Standard delimiter of textual preference data */
+ private static final Pattern PREFERENCE_TOKEN_DELIMITER = Pattern.compile("[\t,]");
+
+ private TasteHadoopUtils() {}
+
+ /**
+ * Splits a preference data line into string tokens
+ */
+ public static String[] splitPrefTokens(CharSequence line) {
+ return PREFERENCE_TOKEN_DELIMITER.split(line);
+ }
+
+ /**
+ * Maps a long to an int with range of 0 to Integer.MAX_VALUE-1
+ */
+ public static int idToIndex(long id) {
+ return 0x7FFFFFFF & Longs.hashCode(id) % 0x7FFFFFFE;
+ }
+
+ public static int readID(String token, boolean usesLongIDs) {
+ return usesLongIDs ? idToIndex(Long.parseLong(token)) : Integer.parseInt(token);
+ }
+
+ /**
+ * Reads a binary mapping file
+ */
+ public static OpenIntLongHashMap readIDIndexMap(String idIndexPathStr, Configuration conf) {
+ OpenIntLongHashMap indexIDMap = new OpenIntLongHashMap();
+ Path itemIDIndexPath = new Path(idIndexPathStr);
+ for (Pair<VarIntWritable,VarLongWritable> record
+ : new SequenceFileDirIterable<VarIntWritable,VarLongWritable>(itemIDIndexPath,
+ PathType.LIST,
+ PathFilters.partFilter(),
+ null,
+ true,
+ conf)) {
+ indexIDMap.put(record.getFirst().get(), record.getSecond().get());
+ }
+ return indexIDMap;
+ }
+
+
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
new file mode 100644
index 0000000..fdb552e
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
+import org.apache.mahout.math.VarLongWritable;
+
+import java.io.IOException;
+import java.util.regex.Pattern;
+
+public abstract class ToEntityPrefsMapper extends
+ Mapper<LongWritable,Text, VarLongWritable,VarLongWritable> {
+
+ public static final String TRANSPOSE_USER_ITEM = ToEntityPrefsMapper.class + "transposeUserItem";
+ public static final String RATING_SHIFT = ToEntityPrefsMapper.class + "shiftRatings";
+
+ private static final Pattern DELIMITER = Pattern.compile("[\t,]");
+
+ private boolean booleanData;
+ private boolean transpose;
+ private final boolean itemKey;
+ private float ratingShift;
+
+ ToEntityPrefsMapper(boolean itemKey) {
+ this.itemKey = itemKey;
+ }
+
+ @Override
+ protected void setup(Context context) {
+ Configuration jobConf = context.getConfiguration();
+ booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
+ transpose = jobConf.getBoolean(TRANSPOSE_USER_ITEM, false);
+ ratingShift = Float.parseFloat(jobConf.get(RATING_SHIFT, "0.0"));
+ }
+
+ @Override
+ public void map(LongWritable key,
+ Text value,
+ Context context) throws IOException, InterruptedException {
+ String[] tokens = DELIMITER.split(value.toString());
+ long userID = Long.parseLong(tokens[0]);
+ long itemID = Long.parseLong(tokens[1]);
+ if (itemKey ^ transpose) {
+ // If using items as keys, and not transposing items and users, then users are items!
+ // Or if not using items as keys (users are, as usual), but transposing items and users,
+ // then users are items! Confused?
+ long temp = userID;
+ userID = itemID;
+ itemID = temp;
+ }
+ if (booleanData) {
+ context.write(new VarLongWritable(userID), new VarLongWritable(itemID));
+ } else {
+ float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) + ratingShift : 1.0f;
+ context.write(new VarLongWritable(userID), new EntityPrefWritable(itemID, prefValue));
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
new file mode 100644
index 0000000..f5f9574
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+/**
+ * <h1>Input</h1>
+ *
+ * <p>
+ * Intended for use with {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat};
+ * accepts line number / line pairs as
+ * {@link org.apache.hadoop.io.LongWritable}/{@link org.apache.hadoop.io.Text} pairs.
+ * </p>
+ *
+ * <p>
+ * Each line is assumed to be of the form {@code userID,itemID,preference}, or {@code userID,itemID}.
+ * </p>
+ *
+ * <h1>Output</h1>
+ *
+ * <p>
+ * Outputs the user ID as a {@link org.apache.mahout.math.VarLongWritable} mapped to the item ID and preference as a
+ * {@link EntityPrefWritable}.
+ * </p>
+ */
+public final class ToItemPrefsMapper extends ToEntityPrefsMapper {
+
+ public ToItemPrefsMapper() {
+ super(false);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
new file mode 100644
index 0000000..8f563b0
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+public class TopItemsQueue extends PriorityQueue<MutableRecommendedItem> {
+
+ private static final long SENTINEL_ID = Long.MIN_VALUE;
+
+ private final int maxSize;
+
+ public TopItemsQueue(int maxSize) {
+ super(maxSize);
+ this.maxSize = maxSize;
+ }
+
+ public List<RecommendedItem> getTopItems() {
+ List<RecommendedItem> recommendedItems = new ArrayList<>(maxSize);
+ while (size() > 0) {
+ MutableRecommendedItem topItem = pop();
+ // filter out "sentinel" objects necessary for maintaining an efficient priority queue
+ if (topItem.getItemID() != SENTINEL_ID) {
+ recommendedItems.add(topItem);
+ }
+ }
+ Collections.reverse(recommendedItems);
+ return recommendedItems;
+ }
+
+ @Override
+ protected boolean lessThan(MutableRecommendedItem one, MutableRecommendedItem two) {
+ return one.getValue() < two.getValue();
+ }
+
+ @Override
+ protected MutableRecommendedItem getSentinelObject() {
+ return new MutableRecommendedItem(SENTINEL_ID, Float.MIN_VALUE);
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
new file mode 100644
index 0000000..4bb95ae
--- /dev/null
+++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.als.AlternatingLeastSquaresSolver;
+import org.apache.mahout.math.map.OpenIntObjectHashMap;
+
+final class ALS {
+
+ private ALS() {}
+
+ static Vector readFirstRow(Path dir, Configuration conf) throws IOException {
+ Iterator<VectorWritable> iterator = new SequenceFileDirValueIterator<>(dir, PathType.LIST,
+ PathFilters.partFilter(), null, true, conf);
+ return iterator.hasNext() ? iterator.next().get() : null;
+ }
+
+ public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities,
+ Configuration conf) throws IOException {
+
+ IntWritable rowIndex = new IntWritable();
+ VectorWritable row = new VectorWritable();
+
+
+ OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0
+ ? new OpenIntObjectHashMap<Vector>(numEntities) : new OpenIntObjectHashMap<Vector>();
+
+ Path[] cachedFiles = HadoopUtil.getCachedFiles(conf);
+ LocalFileSystem localFs = FileSystem.getLocal(conf);
+
+ for (Path cachedFile : cachedFiles) {
+ try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs.getConf(), SequenceFile.Reader.file(cachedFile))) {
+ while (reader.next(rowIndex, row)) {
+ featureMatrix.put(rowIndex.get(), row.get());
+ }
+ }
+ }
+
+ Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty");
+ return featureMatrix;
+ }
+
+ public static OpenIntObjectHashMap<Vector> readMatrixByRows(Path dir, Configuration conf) {
+ OpenIntObjectHashMap<Vector> matrix = new OpenIntObjectHashMap<>();
+ for (Pair<IntWritable,VectorWritable> pair
+ : new SequenceFileDirIterable<IntWritable,VectorWritable>(dir, PathType.LIST, PathFilters.partFilter(), conf)) {
+ int rowIndex = pair.getFirst().get();
+ Vector row = pair.getSecond().get();
+ matrix.put(rowIndex, row);
+ }
+ return matrix;
+ }
+
+ public static Vector solveExplicit(VectorWritable ratingsWritable, OpenIntObjectHashMap<Vector> uOrM,
+ double lambda, int numFeatures) {
+ Vector ratings = ratingsWritable.get();
+
+ List<Vector> featureVectors = new ArrayList<>(ratings.getNumNondefaultElements());
+ for (Vector.Element e : ratings.nonZeroes()) {
+ int index = e.index();
+ featureVectors.add(uOrM.get(index));
+ }
+
+ return AlternatingLeastSquaresSolver.solve(featureVectors, ratings, lambda, numFeatures);
+ }
+}