You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@usergrid.apache.org by gr...@apache.org on 2015/11/05 20:05:00 UTC

usergrid git commit: Added a workaround for buffers starting with garbage data instead of the application uuid. Now it filters out the beginning of the buffer.

Repository: usergrid
Updated Branches:
  refs/heads/USERGRID-1076 6d96a4c90 -> 48a8006d4


Added a workaround for buffers starting with garbage data instead of the application uuid. Now it filters out the beginning of the buffer.


Project: http://git-wip-us.apache.org/repos/asf/usergrid/repo
Commit: http://git-wip-us.apache.org/repos/asf/usergrid/commit/48a8006d
Tree: http://git-wip-us.apache.org/repos/asf/usergrid/tree/48a8006d
Diff: http://git-wip-us.apache.org/repos/asf/usergrid/diff/48a8006d

Branch: refs/heads/USERGRID-1076
Commit: 48a8006d41c23abbdae23b20c851e76f7f53d3f2
Parents: 6d96a4c
Author: George Reyes <gr...@apache.org>
Authored: Thu Nov 5 11:04:57 2015 -0800
Committer: George Reyes <gr...@apache.org>
Committed: Thu Nov 5 11:04:57 2015 -0800

----------------------------------------------------------------------
 .../usergrid/tools/UniqueIndexCleanup.java      | 41 +++++++++++++++-----
 1 file changed, 31 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/usergrid/blob/48a8006d/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
----------------------------------------------------------------------
diff --git a/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java b/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
index 39ac762..f795a20 100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
@@ -18,6 +18,7 @@ package org.apache.usergrid.tools;
 
 
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.List;
 import java.util.UUID;
 
@@ -29,8 +30,10 @@ import org.apache.commons.cli.Option;
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.commons.io.Charsets;
+import org.apache.thrift.TBaseHelper;
 
 import org.apache.usergrid.persistence.cassandra.EntityManagerImpl;
+import org.apache.usergrid.utils.UUIDUtils;
 
 import me.prettyprint.cassandra.service.RangeSlicesIterator;
 import me.prettyprint.hector.api.Keyspace;
@@ -40,6 +43,7 @@ import me.prettyprint.hector.api.beans.Row;
 import me.prettyprint.hector.api.factory.HFactory;
 import me.prettyprint.hector.api.mutation.Mutator;
 import me.prettyprint.hector.api.query.RangeSlicesQuery;
+import sun.text.normalizer.UTF16;
 
 import static me.prettyprint.hector.api.factory.HFactory.createMutator;
 import static org.apache.usergrid.persistence.cassandra.ApplicationCF.ENTITY_UNIQUE;
@@ -64,7 +68,7 @@ public class UniqueIndexCleanup extends ToolBase {
     /**
      *
      */
-    private static final int PAGE_SIZE = 100;
+    private static final int PAGE_SIZE = 1;
 
 
     private static final Logger logger = LoggerFactory.getLogger( UniqueIndexCleanup.class );
@@ -100,7 +104,6 @@ public class UniqueIndexCleanup extends ToolBase {
         logger.info( "Starting entity unique index cleanup" );
 
 
-
         // go through each collection and audit the values
         Keyspace ko = cass.getUsergridApplicationKeyspace();
         Mutator<ByteBuffer> m = createMutator( ko, be );
@@ -118,11 +121,14 @@ public class UniqueIndexCleanup extends ToolBase {
         while ( rangeSlicesIterator.hasNext() ) {
             Row rangeSliceValue = rangeSlicesIterator.next();
 
-            String returnedRowKey =
-                    new String( ( ( ByteBuffer ) rangeSliceValue.getKey() ).array(), Charsets.UTF_8 ).trim();
+
+            ByteBuffer buf = ( TBaseHelper.rightSize(( ByteBuffer ) rangeSliceValue.getKey() ) );
+            //Cassandra client library returns ByteBuffers that are views on top of a larger byte[]. These larger ones return garbage data.
+            //Discovered thanks due to https://issues.apache.org/jira/browse/NUTCH-1591
+            String returnedRowKey = new String(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining(), Charset.defaultCharset()).trim();
 
             String[] parsedRowKey = returnedRowKey.split( ":" );
-            UUID applicationId = UUID.fromString( parsedRowKey[0] );
+            UUID applicationId = UUID.fromString(uuidGarbageParser( parsedRowKey[0]) );
             String collectionName = parsedRowKey[1];
             String uniqueValueKey = parsedRowKey[2];
             String uniqueValue = parsedRowKey[3];
@@ -147,10 +153,10 @@ public class UniqueIndexCleanup extends ToolBase {
                             }
                         }
                         else if ( em.get( entityId ) == null ) {
-                            cleanup =true;
+                            cleanup = true;
                         }
 
-                        if(cleanup == true){
+                        if ( cleanup == true ) {
                             DeleteUniqueValue( m, applicationId, collectionName, uniqueValueKey, uniqueValue,
                                     entityId );
                             cleanup = false;
@@ -164,9 +170,24 @@ public class UniqueIndexCleanup extends ToolBase {
     }
 
 
-    private void DeleteUniqueValue( final Mutator<ByteBuffer> m, final UUID applicationId,
-                                    final String collectionName, final String uniqueValueKey, final String uniqueValue,
-                                    final UUID entityId ) throws Exception {
+    private String uuidGarbageParser( final String garbageString ) {
+        int index = 1;
+        String stringToBeTruncated = garbageString;
+        while( !UUIDUtils.isUUID( stringToBeTruncated ) ){
+            if( stringToBeTruncated.length()>36)
+                stringToBeTruncated = stringToBeTruncated.substring( index );
+            else {
+                System.out.println(garbageString+" is unparsable");
+                break;
+            }
+        }
+        return stringToBeTruncated;
+    }
+
+
+    private void DeleteUniqueValue( final Mutator<ByteBuffer> m, final UUID applicationId, final String collectionName,
+                                    final String uniqueValueKey, final String uniqueValue, final UUID entityId )
+            throws Exception {
         logger.warn( "Entity with id {} did not exist in app {}", entityId, applicationId );
         System.out.println( "Deleting column uuid: " + entityId.toString() );
         UUID timestampUuid = newTimeUUID();