You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@wayang.apache.org by al...@apache.org on 2023/11/17 07:26:24 UTC
(incubator-wayang) branch main updated: Add collaborative filtering to wayang-examples
This is an automated email from the ASF dual-hosted git repository.
aloalt pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-wayang.git
The following commit(s) were added to refs/heads/main by this push:
new 27dad258 Add collaborative filtering to wayang-examples
new fdfb094e Merge pull request #376 from vatsalkshah/main
27dad258 is described below
commit 27dad2588f7d05a598421b85ad6fccc3776fb2e8
Author: Vatsal Shah <va...@somaiya.edu>
AuthorDate: Fri Nov 17 12:10:48 2023 +0530
Add collaborative filtering to wayang-examples
---
guides/wayang-examples.md | 110 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/guides/wayang-examples.md b/guides/wayang-examples.md
index 2efd7b44..2717dde0 100644
--- a/guides/wayang-examples.md
+++ b/guides/wayang-examples.md
@@ -234,4 +234,114 @@ object kmeans {
println(finalCentroids)
}
}
+```
+
+## Collaborative Filtering
+
+This code demonstrates the implementation of a Collaborative Filtering algorithm used in Recommendation Systems using Wayang.
+
+```java
+import org.apache.wayang.api.*;
+import org.apache.wayang.basic.data.*;
+import org.apache.wayang.core.api.*;
+import org.apache.wayang.core.function.*;
+import org.apache.wayang.core.util.*;
+import org.apache.wayang.java.Java;
+import org.apache.wayang.spark.Spark;
+import org.apache.commons.math3.linear.*;
+
+import java.util.*;
+
+public class CollaborativeFiltering {
+
+ public static void main(String[] args) {
+
+ // Create a Wayang context
+ WayangContext wayangContext = new WayangContext().with(Java.basicPlugin()).with(Spark.basicPlugin());
+ PlanBuilder planBuilder = new PlanBuilder(wayangContext);
+
+ // Load the data
+ List<Tuple3<String, String, Integer>> data = Arrays.asList(
+ new Tuple3<>("user1", "item1", 5),
+ new Tuple3<>("user1", "item2", 3),
+ new Tuple3<>("user2", "item1", 4),
+ new Tuple3<>("user2", "item3", 2),
+ new Tuple3<>("user3", "item2", 1),
+ new Tuple3<>("user3", "item3", 5)
+ );
+
+ // Define a function to normalize the ratings
+ TransformationDescriptor.SerializableFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Double>> normalizationFunction =
+ tuple -> new Tuple3<>(tuple.field0, tuple.field1, (double)tuple.field2 / 5);
+
+ // Define a function to calculate the cosine similarity between users
+ TransformationDescriptor.SerializableFunction<Tuple2<String, RealVector>, Tuple2<String, RealVector>> similarityFunction =
+ tuple -> {
+ // This is a placeholder. You would need to implement a real similarity calculation here.
+ // For example, you could calculate the cosine similarity like this:
+ double dotProduct = tuple.field1.dotProduct(otherUserVector);
+ double normProduct = tuple.field1.getNorm() * otherUserVector.getNorm();
+ double cosineSimilarity = dotProduct / normProduct;
+ return new Tuple2<>(tuple.field0, cosineSimilarity);
+ };
+
+ // Define a function to calculate the predicted rating for each user-item pair
+ TransformationDescriptor.SerializableFunction<Tuple3<String, String, Double>, Tuple3<String, String, Double>> predictionFunction =
+ tuple -> {
+ // This is a placeholder. You would need to implement a real prediction calculation here.
+ // For example, you could calculate the predicted rating based on the similarity matrix and the user's ratings like this:
+ double predictedRating = 0.0;
+ double similaritySum = 0.0;
+ for (String otherUser : similarityMatrix.keySet()) {
+ double similarity = similarityMatrix.get(otherUser);
+ double otherUserRating = userRatings.get(otherUser).get(tuple.field1);
+ predictedRating += similarity * otherUserRating;
+ similaritySum += Math.abs(similarity);
+ }
+ predictedRating /= similaritySum;
+ return new Tuple3<>(tuple.field0, tuple.field1, predictedRating);
+ };
+
+ // Define a function to handle cold start problems
+ TransformationDescriptor.SerializableFunction<Tuple3<String, String, Double>, Tuple3<String, String, Double>> coldStartFunction =
+ tuple -> {
+ if (tuple.field2 == null) {
+ // If the user has no ratings, recommend the most popular item
+ String mostPopularItem = itemPopularity.entrySet().stream()
+ .max(Map.Entry.comparingByValue())
+ .get()
+ .getKey();
+ return new Tuple3<>(tuple.field0, mostPopularItem, 5.0);
+ } else {
+ return tuple;
+ }
+ };
+
+ // Define a function to handle cold start problems
+ TransformationDescriptor.SerializableFunction<Tuple3<String, String, Double>, Tuple3<String, String, Double>> coldStartFunction =
+ tuple -> {
+ if (tuple.field2 == null) {
+ // If the user has no ratings, recommend the most popular item
+ return new Tuple3<>(tuple.field0, "item1", 1.0);
+ } else {
+ return tuple;
+ }
+ };
+
+ // Execute the plan
+ Collection<Tuple3<String, String, Double>> output = planBuilder
+ .loadCollection(data)
+ .map(normalizationFunction)
+ .map(similarityFunction)
+ .map(predictionFunction)
+ .map(recommendationFunction)
+ .map(coldStartFunction)
+ .collect();
+
+ // Print the recommendations
+ for (Tuple3<String, String, Double> recommendation : output) {
+ System.out.println("User: " + recommendation.field0 + ", Item: " + recommendation.field1 + ", Rating: " + recommendation.field2);
+ }
+ }
+}
```
\ No newline at end of file