You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by jw...@apache.org on 2013/09/21 22:36:59 UTC

git commit: CRUNCH-268: Use stable names for Crunch's internal Avro schemas for tuple types.

Updated Branches:
  refs/heads/master 14f0c16b5 -> 16d4d35f7


CRUNCH-268: Use stable names for Crunch's internal Avro schemas for tuple types.


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/16d4d35f
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/16d4d35f
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/16d4d35f

Branch: refs/heads/master
Commit: 16d4d35f73d42b97c779ba6fa8dd6be425f6faaa
Parents: 14f0c16
Author: Josh Wills <jw...@apache.org>
Authored: Fri Sep 20 15:12:01 2013 -0700
Committer: Josh Wills <jw...@apache.org>
Committed: Sat Sep 21 13:32:32 2013 -0700

----------------------------------------------------------------------
 .../java/org/apache/crunch/types/avro/Avros.java | 19 ++++++++++++++++---
 .../apache/crunch/types/avro/AvroTypeTest.java   |  7 +++++++
 2 files changed, 23 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/16d4d35f/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java
index fc30eaf..a9e84a5 100644
--- a/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java
@@ -23,11 +23,16 @@ import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
 
+import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
+import com.sun.org.apache.xml.internal.security.algorithms.MessageDigestAlgorithm;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Type;
 import org.apache.avro.generic.GenericData;
@@ -35,6 +40,7 @@ import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.ReflectData;
 import org.apache.avro.specific.SpecificRecord;
 import org.apache.avro.util.Utf8;
+import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.crunch.MapFn;
@@ -604,16 +610,23 @@ public class Avros {
         ptypes), new TupleDeepCopier(clazz, ptypes), ptypes);
   }
 
-  private static Schema createTupleSchema(PType<?>... ptypes) {
+  private static Schema createTupleSchema(PType<?>... ptypes) throws RuntimeException {
     // Guarantee each tuple schema has a globally unique name
-    String tupleName = "tuple" + UUID.randomUUID().toString().replace('-', 'x');
-    Schema schema = Schema.createRecord(tupleName, "", "crunch", false);
     List<Schema.Field> fields = Lists.newArrayList();
+    MessageDigest md;
+    try {
+     md = MessageDigest.getInstance("MD5");
+    } catch (NoSuchAlgorithmException e) {
+      throw new RuntimeException(e);
+    }
     for (int i = 0; i < ptypes.length; i++) {
       AvroType atype = (AvroType) ptypes[i];
       Schema fieldSchema = allowNulls(atype.getSchema());
       fields.add(new Schema.Field("v" + i, fieldSchema, "", null));
+      md.update(fieldSchema.toString().getBytes(Charsets.UTF_8));
     }
+    String schemaName = "tuple" + Base64.encodeBase64URLSafeString(md.digest()).replace('-', 'x');
+    Schema schema = Schema.createRecord(schemaName, "", "crunch", false);
     schema.setFields(fields);
     return schema;
   }

http://git-wip-us.apache.org/repos/asf/crunch/blob/16d4d35f/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
index a874c63..ca9a352 100644
--- a/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
+++ b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
@@ -142,6 +142,13 @@ public class AvroTypeTest {
   }
 
   @Test
+  public void testStableTupleNames() {
+    AvroType<Pair<Long, Float>> at1 = Avros.pairs(Avros.longs(), Avros.floats());
+    AvroType<Pair<Long, Float>> at2 = Avros.pairs(Avros.longs(), Avros.floats());
+    assertEquals(at1.getSchema(), at2.getSchema());
+  }
+
+  @Test
   public void testGetDetachedValue_AlreadyMappedAvroType() {
     Integer value = 42;
     AvroType<Integer> intType = Avros.ints();