You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@usergrid.apache.org by gr...@apache.org on 2015/12/08 22:51:01 UTC

usergrid git commit: Forgot the code, now it should be complete

Repository: usergrid
Updated Branches:
  refs/heads/unique_index_logging 626fe27d9 -> 4a3c764a5


Forgot the code, now it should be complete


Project: http://git-wip-us.apache.org/repos/asf/usergrid/repo
Commit: http://git-wip-us.apache.org/repos/asf/usergrid/commit/4a3c764a
Tree: http://git-wip-us.apache.org/repos/asf/usergrid/tree/4a3c764a
Diff: http://git-wip-us.apache.org/repos/asf/usergrid/diff/4a3c764a

Branch: refs/heads/unique_index_logging
Commit: 4a3c764a5170fa61dfdbe9a25830d5d22dfc92ba
Parents: 626fe27
Author: George Reyes <gr...@apache.org>
Authored: Tue Dec 8 13:50:59 2015 -0800
Committer: George Reyes <gr...@apache.org>
Committed: Tue Dec 8 13:50:59 2015 -0800

----------------------------------------------------------------------
 .../usergrid/tools/ManagementUserAudit.java     | 418 ++++++++++++++++++-
 1 file changed, 416 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/usergrid/blob/4a3c764a/stack/tools/src/main/java/org/apache/usergrid/tools/ManagementUserAudit.java
----------------------------------------------------------------------
diff --git a/stack/tools/src/main/java/org/apache/usergrid/tools/ManagementUserAudit.java b/stack/tools/src/main/java/org/apache/usergrid/tools/ManagementUserAudit.java
index 3af5dfd..ee7f045 100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/ManagementUserAudit.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/ManagementUserAudit.java
@@ -1,7 +1,421 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.usergrid.tools;
 
 
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.thrift.TimedOutException;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.thrift.TBaseHelper;
+
+import org.apache.usergrid.management.UserInfo;
+import org.apache.usergrid.persistence.Entity;
+import org.apache.usergrid.persistence.EntityRef;
+import org.apache.usergrid.persistence.cassandra.EntityManagerImpl;
+import org.apache.usergrid.utils.UUIDUtils;
+
+import me.prettyprint.cassandra.service.RangeSlicesIterator;
+import me.prettyprint.hector.api.Keyspace;
+import me.prettyprint.hector.api.beans.ColumnSlice;
+import me.prettyprint.hector.api.beans.HColumn;
+import me.prettyprint.hector.api.beans.Row;
+import me.prettyprint.hector.api.factory.HFactory;
+import me.prettyprint.hector.api.mutation.Mutator;
+import me.prettyprint.hector.api.query.RangeSlicesQuery;
+
+import static me.prettyprint.hector.api.factory.HFactory.createMutator;
+import static org.apache.usergrid.persistence.cassandra.ApplicationCF.ENTITY_UNIQUE;
+import static org.apache.usergrid.persistence.cassandra.CassandraPersistenceUtils.addDeleteToMutator;
+import static org.apache.usergrid.persistence.cassandra.CassandraPersistenceUtils.key;
+import static org.apache.usergrid.persistence.cassandra.CassandraService.MANAGEMENT_APPLICATION_ID;
+import static org.apache.usergrid.persistence.cassandra.Serializers.be;
+import static org.apache.usergrid.persistence.cassandra.Serializers.ue;
+import static org.apache.usergrid.utils.UUIDUtils.getTimestampInMicros;
+import static org.apache.usergrid.utils.UUIDUtils.newTimeUUID;
+
+
 /**
- * Created by ApigeeCorporation on 12/8/15.
+ * This utility audits all values in the ENTITY_UNIQUE column family. If it finds any duplicates of users then it
+ * deletes the non existing columns from the row. If there are no more columns in the row then it deletes the row. If
+ * there exists more than one existing column then the one with the most recent timestamp wins and the other is
+ * deleted.
+ *
+ * If you want the run the tool on their cluster the following is what you need to do nohup java
+ * -Dlog4j.configuration=file:log4j.properties -jar usergrid-tools-1.0.2.jar UserUniqueIndexCleanup -host
+ * <cassandra_host_here>  > log.txt
+ *
+ * if there is a specific value you want to run the tool on then you need the following
+ *
+ * nohup java -Dlog4j.configuration=file:log4j.properties -jar usergrid-tools-1.0.2.jar UserUniqueIndexCleanup -host
+ * <cassandra_host_here> -app <applicationUUID> -col <collection_name> -property <unique_property_key> -value
+ * <unique_property_value> > log.txt
+ *
+ * @author grey
  */
-public class ManagementUserAudit {}
+public class ManagementUserAudit extends ToolBase {
+
+    /**
+     *
+     */
+    private static final int PAGE_SIZE = 100;
+
+
+    private static final Logger logger = LoggerFactory.getLogger( ManagementUserAudit.class );
+
+    private static final String APPLICATION_ARG = "app";
+
+    private static final String COLLECTION_ARG = "col";
+
+    private static final String ENTITY_UNIQUE_PROPERTY_NAME = "property";
+
+    private static final String ENTITY_UNIQUE_PROPERTY_VALUE = "value";
+
+
+    @Override
+    @SuppressWarnings( "static-access" )
+    public Options createOptions() {
+
+
+        Options options = new Options();
+
+        Option hostOption =
+                OptionBuilder.withArgName( "host" ).hasArg().isRequired( true ).withDescription( "Cassandra host" )
+                             .create( "host" );
+
+        options.addOption( hostOption );
+
+
+        //        Option appOption = OptionBuilder.withArgName( APPLICATION_ARG ).hasArg().isRequired( false )
+        //                                        .withDescription( "application id" ).create( APPLICATION_ARG );
+        //
+        //
+        //        options.addOption( appOption );
+        //
+        //        Option collectionOption = OptionBuilder.withArgName( COLLECTION_ARG ).hasArg().isRequired( false )
+        //                                               .withDescription( "collection name" ).create( COLLECTION_ARG );
+        //
+        //        options.addOption( collectionOption );
+
+        //        Option entityUniquePropertyName =
+        //                OptionBuilder.withArgName( ENTITY_UNIQUE_PROPERTY_NAME ).hasArg().isRequired( false )
+        //                             .withDescription( "Entity Unique Property Name" ).create( ENTITY_UNIQUE_PROPERTY_NAME );
+        //        options.addOption( entityUniquePropertyName );
+
+        Option entityUniquePropertyValue =
+                OptionBuilder.withArgName( ENTITY_UNIQUE_PROPERTY_VALUE ).hasArg().isRequired( false )
+                             .withDescription( "Entity Unique Property Value" ).create( ENTITY_UNIQUE_PROPERTY_VALUE );
+        options.addOption( entityUniquePropertyValue );
+
+
+        return options;
+    }
+
+
+    /*
+     * (non-Javadoc)
+     *
+     * @see
+     * org.apache.usergrid.tools.ToolBase#runTool(org.apache.commons.cli.CommandLine)
+     */
+    @Override
+    public void runTool( CommandLine line ) throws Exception {
+        startSpring();
+
+        logger.info( "Starting entity unique checker" );
+
+
+        // go through each collection and audit the values
+        Keyspace ko = cass.getUsergridApplicationKeyspace();
+        Mutator<ByteBuffer> m = createMutator( ko, be );
+
+        if ( line.hasOption( ENTITY_UNIQUE_PROPERTY_VALUE ) ) {
+            deleteInvalidValuesForUniqueProperty( m, line );
+        }
+        else {
+            //maybe put a byte buffer infront.
+            RangeSlicesQuery<ByteBuffer, ByteBuffer, ByteBuffer> rangeSlicesQuery =
+                    HFactory.createRangeSlicesQuery( ko, be, be, be ).setColumnFamily( ENTITY_UNIQUE.getColumnFamily() )
+                            //not sure if I trust the lower two settings as it might iterfere with paging or set
+                            // arbitrary limits and what I want to retrieve.
+                            //That needs to be verified.
+                            .setKeys( null, null ).setRange( null, null, false, PAGE_SIZE );
+
+
+            RangeSlicesIterator rangeSlicesIterator = new RangeSlicesIterator( rangeSlicesQuery, null, null );
+
+            while ( rangeSlicesIterator.hasNext() ) {
+                Row rangeSliceValue = rangeSlicesIterator.next();
+
+
+                ByteBuffer buf = ( TBaseHelper.rightSize( ( ByteBuffer ) rangeSliceValue.getKey() ) );
+                //Cassandra client library returns ByteBuffers that are views on top of a larger byte[]. These larger
+                // ones return garbage data.
+                //Discovered thanks due to https://issues.apache.org/jira/browse/NUTCH-1591
+                String returnedRowKey = new String( buf.array(), buf.arrayOffset() + buf.position(), buf.remaining(),
+                        Charset.defaultCharset() ).trim();
+
+
+                //defensive programming, don't have to have to parse the string if it doesn't contain users.
+                if (returnedRowKey.contains("email") && returnedRowKey.contains( "users" ) && returnedRowKey.contains( MANAGEMENT_APPLICATION_ID.toString() )) {
+
+                    String[] parsedRowKey = returnedRowKey.split( ":" );
+
+                    //if the rowkey contains more than 4 parts then it may have some garbage appended to the front.
+                    if ( parsedRowKey.length > 4 ) {
+                        parsedRowKey = garbageRowKeyParser( parsedRowKey );
+
+                        if ( parsedRowKey == null ) {
+                            logger.error( "{} is a invalid row key, and unparseable. Skipped...", returnedRowKey );
+                            continue;
+                        }
+                    }
+                    //if the rowkey contains less than four parts then it is completely invalid
+                    else if ( parsedRowKey.length < 4 ) {
+                        logger.error( "{} is a invalid row key, and unparseable. Skipped...", returnedRowKey );
+                        continue;
+                    }
+
+                    UUID applicationId = null;
+                    try {
+                        applicationId = UUID.fromString( uuidGarbageParser( parsedRowKey[0] ) );
+                    }
+                    catch ( Exception e ) {
+                        logger.error( "could not parse {} despite earlier parsing. Skipping...", parsedRowKey[0] );
+                        continue;
+                    }
+                    String collectionName = parsedRowKey[1];
+                    String uniqueValueKey = parsedRowKey[2];
+                    String uniqueValue = parsedRowKey[3];
+
+
+                    if ( collectionName.equals( "users" ) ) {
+
+                        ColumnSlice<ByteBuffer, ByteBuffer> columnSlice=rangeSliceValue.getColumnSlice();
+                        //if ( columnSlice.getColumns().size() != 0 ) {
+                        List<HColumn<ByteBuffer, ByteBuffer>> cols=columnSlice.getColumns();
+
+                        entityUUIDDelete( m, applicationId, collectionName, uniqueValueKey, uniqueValue, cols,
+                                returnedRowKey );
+                    }
+                }
+            }
+        }
+        logger.info( "Completed logging successfully" );
+    }
+
+
+    //Returns a functioning rowkey if it can otherwise returns null
+    public String[] garbageRowKeyParser( String[] parsedRowKey ) {
+        String[] modifiedRowKey = parsedRowKey.clone();
+        while ( modifiedRowKey != null ) {
+            if ( modifiedRowKey.length < 4 ) {
+                return null;
+            }
+
+            String recreatedRowKey = uuidStringVerifier( modifiedRowKey[0] );
+            if ( recreatedRowKey == null ) {
+                recreatedRowKey = "";
+                modifiedRowKey = getStrings( modifiedRowKey, recreatedRowKey );
+            }
+            else {
+                recreatedRowKey = recreatedRowKey.concat( ":" );
+                modifiedRowKey = getStrings( modifiedRowKey, recreatedRowKey );
+                break;
+            }
+        }
+        return modifiedRowKey;
+    }
+
+
+    private String[] getStrings( String[] modifiedRowKey, String recreatedRowKey ) {
+        for ( int i = 1; i < modifiedRowKey.length; i++ ) {
+
+            recreatedRowKey = recreatedRowKey.concat( modifiedRowKey[i] );
+            if ( i + 1 != modifiedRowKey.length ) {
+                recreatedRowKey = recreatedRowKey.concat( ":" );
+            }
+        }
+        modifiedRowKey = recreatedRowKey.split( ":" );
+        return modifiedRowKey;
+    }
+
+
+    //    private void deleteRow( final Mutator<ByteBuffer> m, final UUID applicationId, final String collectionName,
+    //                            final String uniqueValueKey, final String uniqueValue ) throws Exception {
+    //        logger.debug( "Found 0 uuid's associated with {} Deleting row.", uniqueValue );
+    //        UUID timestampUuid = newTimeUUID();
+    //        long timestamp = getTimestampInMicros( timestampUuid );
+    //
+    //        Keyspace ko = cass.getApplicationKeyspace( applicationId );
+    //        Mutator<ByteBuffer> mutator = createMutator( ko, be );
+    //
+    //        Object key = key( applicationId, collectionName, uniqueValueKey, uniqueValue );
+    //        addDeleteToMutator( mutator, ENTITY_UNIQUE, key, timestamp );
+    //        mutator.execute();
+    //        return;
+    //    }
+
+
+    private void entityUUIDDelete( final Mutator<ByteBuffer> m, final UUID applicationId, final String collectionName,
+                                   final String uniqueValueKey, final String uniqueValue,
+                                   final List<HColumn<ByteBuffer, ByteBuffer>> cols, String rowKey ) throws Exception {
+        Boolean cleanup = false;
+        EntityManagerImpl em = ( EntityManagerImpl ) emf.getEntityManager( applicationId );
+        int numberOfColumnsDeleted = 0;
+        //these columns all come from the same row key, which means they each belong to the same row key identifier
+        //thus mixing and matching them in the below if cases won't matter.
+        // Entity[] entities = new Entity[cols.size()];
+        int numberOfRetrys = 8;
+        int numberOfTimesRetrying = 0;
+
+        int index = 0;
+
+
+
+        UserInfo userInfo = managementService.getAdminUserByEmail( uniqueValue );
+        if(userInfo==null) {
+            if(cols!=null){
+                if(cols.size()>1){
+                    for(HColumn<ByteBuffer, ByteBuffer> col : cols) {
+                        logger.warn( "This uuid: {} is associated with this duplicated email {}", ue.fromByteBuffer( col.getName()),uniqueValue );
+                    }
+
+                }
+                if(cols.size()==1){
+                    logger.error( "Management user with uuid: {} and email: {} is broken",ue.fromByteBuffer( cols.get( 0 ).getName()), uniqueValue );
+                }
+                else{
+                    logger.error( "Management user with email: {} is broken and has no uuid's associated with it",uniqueValue );
+                }
+            }
+        }
+        else {
+            logger.info( "The following email works: {}",uniqueValue );
+        }
+
+
+        //        for ( int i = 0; i < numberOfRetrys; i++ ) {
+        //            try {
+        //                Map<String, EntityRef> results =
+        //                        em.getAlias( applicationId, collectionName, Collections.singletonList( uniqueValue ) );
+        //                if ( results.size() > 1 ) {
+        //                    logger.error("failed to clean up {} from application {}. Please clean manually.",uniqueValue,applicationId);
+        //                    break;
+        //                }
+        //                else {
+        //                    continue;
+        //                }
+        //            }
+        //            catch ( Exception toe ) {
+        //                logger.error( "timeout doing em getAlias repair. This is the {} number of repairs attempted", i );
+        //                toe.printStackTrace();
+        //                Thread.sleep( 1000 * i );
+        //            }
+        //        }
+    }
+
+
+    private Entity verifyModifiedTimestamp( final Entity unverifiedEntity ) {
+        Entity entity = unverifiedEntity;
+        if ( entity != null && entity.getModified() == null ) {
+            if ( entity.getCreated() != null ) {
+                logger.debug(
+                        "{} has no modified. Subsituting created timestamp for their modified timestamp.Manually "
+                                + "adding one for comparison purposes",
+                        entity.getUuid() );
+                entity.setModified( entity.getCreated() );
+                return entity;
+            }
+            else {
+                logger.error( "Found no created or modified timestamp. Please remake the following entity: {}."
+                        + " Setting both created and modified to 1", entity.getUuid().toString() );
+                entity.setCreated( 1L );
+                entity.setModified( 1L );
+                return entity;
+            }
+        }
+        return entity;
+    }
+
+
+    //really only deletes ones that aren't existant for a specific value
+    private void deleteInvalidValuesForUniqueProperty( Mutator<ByteBuffer> m, CommandLine line ) throws Exception {
+        UUID applicationId = MANAGEMENT_APPLICATION_ID;
+        String collectionName = "users"; //line.getOptionValue( COLLECTION_ARG );
+        String uniqueValueKey = "email"; //line.getOptionValue( ENTITY_UNIQUE_PROPERTY_NAME );
+        String uniqueValue = line.getOptionValue( ENTITY_UNIQUE_PROPERTY_VALUE );
+
+        //PLEASE ADD VERIFICATION.
+
+        Object key = key( applicationId, collectionName,"email", uniqueValue );
+
+
+        List<HColumn<ByteBuffer, ByteBuffer>> cols = cass.getColumns( cass.getApplicationKeyspace( applicationId), ENTITY_UNIQUE, key, null, null, 1000,
+                false );
+
+
+        if ( cols.size() == 0 ) {
+            logger.error( "This row key: {} has zero columns", key.toString() );
+        }
+
+        entityUUIDDelete( m, applicationId, collectionName, uniqueValueKey, uniqueValue, cols, key.toString() );
+    }
+
+
+    private String uuidGarbageParser( final String garbageString ) {
+        int index = 1;
+        String stringToBeTruncated = garbageString;
+        while ( !UUIDUtils.isUUID( stringToBeTruncated ) ) {
+            if ( stringToBeTruncated.length() > 36 ) {
+                stringToBeTruncated = stringToBeTruncated.substring( index );
+            }
+            else {
+                logger.error( "{} is unparsable", garbageString );
+                break;
+            }
+        }
+        return stringToBeTruncated;
+    }
+
+
+    private String uuidStringVerifier( final String garbageString ) {
+        int index = 1;
+        String stringToBeTruncated = garbageString;
+        while ( !UUIDUtils.isUUID( stringToBeTruncated ) ) {
+            if ( stringToBeTruncated.length() > 36 ) {
+                stringToBeTruncated = stringToBeTruncated.substring( index );
+            }
+            else {
+                return null;
+            }
+        }
+        return stringToBeTruncated;
+    }
+
+}