You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@usergrid.apache.org by sn...@apache.org on 2015/11/17 23:05:09 UTC
[14/39] usergrid git commit: Added a workaround for buffers starting
with garbage data instead of the application uuid. Now it filters out the
beginning of the buffer.
Added a workaround for buffers starting with garbage data instead of the application uuid. Now it filters out the beginning of the buffer.
Project: http://git-wip-us.apache.org/repos/asf/usergrid/repo
Commit: http://git-wip-us.apache.org/repos/asf/usergrid/commit/48a8006d
Tree: http://git-wip-us.apache.org/repos/asf/usergrid/tree/48a8006d
Diff: http://git-wip-us.apache.org/repos/asf/usergrid/diff/48a8006d
Branch: refs/heads/1.x
Commit: 48a8006d41c23abbdae23b20c851e76f7f53d3f2
Parents: 6d96a4c
Author: George Reyes <gr...@apache.org>
Authored: Thu Nov 5 11:04:57 2015 -0800
Committer: George Reyes <gr...@apache.org>
Committed: Thu Nov 5 11:04:57 2015 -0800
----------------------------------------------------------------------
.../usergrid/tools/UniqueIndexCleanup.java | 41 +++++++++++++++-----
1 file changed, 31 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/usergrid/blob/48a8006d/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
----------------------------------------------------------------------
diff --git a/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java b/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
index 39ac762..f795a20 100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
@@ -18,6 +18,7 @@ package org.apache.usergrid.tools;
import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
import java.util.List;
import java.util.UUID;
@@ -29,8 +30,10 @@ import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.io.Charsets;
+import org.apache.thrift.TBaseHelper;
import org.apache.usergrid.persistence.cassandra.EntityManagerImpl;
+import org.apache.usergrid.utils.UUIDUtils;
import me.prettyprint.cassandra.service.RangeSlicesIterator;
import me.prettyprint.hector.api.Keyspace;
@@ -40,6 +43,7 @@ import me.prettyprint.hector.api.beans.Row;
import me.prettyprint.hector.api.factory.HFactory;
import me.prettyprint.hector.api.mutation.Mutator;
import me.prettyprint.hector.api.query.RangeSlicesQuery;
+import sun.text.normalizer.UTF16;
import static me.prettyprint.hector.api.factory.HFactory.createMutator;
import static org.apache.usergrid.persistence.cassandra.ApplicationCF.ENTITY_UNIQUE;
@@ -64,7 +68,7 @@ public class UniqueIndexCleanup extends ToolBase {
/**
*
*/
- private static final int PAGE_SIZE = 100;
+ private static final int PAGE_SIZE = 1;
private static final Logger logger = LoggerFactory.getLogger( UniqueIndexCleanup.class );
@@ -100,7 +104,6 @@ public class UniqueIndexCleanup extends ToolBase {
logger.info( "Starting entity unique index cleanup" );
-
// go through each collection and audit the values
Keyspace ko = cass.getUsergridApplicationKeyspace();
Mutator<ByteBuffer> m = createMutator( ko, be );
@@ -118,11 +121,14 @@ public class UniqueIndexCleanup extends ToolBase {
while ( rangeSlicesIterator.hasNext() ) {
Row rangeSliceValue = rangeSlicesIterator.next();
- String returnedRowKey =
- new String( ( ( ByteBuffer ) rangeSliceValue.getKey() ).array(), Charsets.UTF_8 ).trim();
+
+ ByteBuffer buf = ( TBaseHelper.rightSize(( ByteBuffer ) rangeSliceValue.getKey() ) );
+ //Cassandra client library returns ByteBuffers that are views on top of a larger byte[]. These larger ones return garbage data.
+ //Discovered thanks due to https://issues.apache.org/jira/browse/NUTCH-1591
+ String returnedRowKey = new String(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining(), Charset.defaultCharset()).trim();
String[] parsedRowKey = returnedRowKey.split( ":" );
- UUID applicationId = UUID.fromString( parsedRowKey[0] );
+ UUID applicationId = UUID.fromString(uuidGarbageParser( parsedRowKey[0]) );
String collectionName = parsedRowKey[1];
String uniqueValueKey = parsedRowKey[2];
String uniqueValue = parsedRowKey[3];
@@ -147,10 +153,10 @@ public class UniqueIndexCleanup extends ToolBase {
}
}
else if ( em.get( entityId ) == null ) {
- cleanup =true;
+ cleanup = true;
}
- if(cleanup == true){
+ if ( cleanup == true ) {
DeleteUniqueValue( m, applicationId, collectionName, uniqueValueKey, uniqueValue,
entityId );
cleanup = false;
@@ -164,9 +170,24 @@ public class UniqueIndexCleanup extends ToolBase {
}
- private void DeleteUniqueValue( final Mutator<ByteBuffer> m, final UUID applicationId,
- final String collectionName, final String uniqueValueKey, final String uniqueValue,
- final UUID entityId ) throws Exception {
+ private String uuidGarbageParser( final String garbageString ) {
+ int index = 1;
+ String stringToBeTruncated = garbageString;
+ while( !UUIDUtils.isUUID( stringToBeTruncated ) ){
+ if( stringToBeTruncated.length()>36)
+ stringToBeTruncated = stringToBeTruncated.substring( index );
+ else {
+ System.out.println(garbageString+" is unparsable");
+ break;
+ }
+ }
+ return stringToBeTruncated;
+ }
+
+
+ private void DeleteUniqueValue( final Mutator<ByteBuffer> m, final UUID applicationId, final String collectionName,
+ final String uniqueValueKey, final String uniqueValue, final UUID entityId )
+ throws Exception {
logger.warn( "Entity with id {} did not exist in app {}", entityId, applicationId );
System.out.println( "Deleting column uuid: " + entityId.toString() );
UUID timestampUuid = newTimeUUID();