You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@usergrid.apache.org by mr...@apache.org on 2018/10/23 03:17:50 UTC
[12/16] usergrid git commit: improvements to CollectionIterator tool
for duplicate and orphaned connections
improvements to CollectionIterator tool for duplicate and orphaned connections
Project: http://git-wip-us.apache.org/repos/asf/usergrid/repo
Commit: http://git-wip-us.apache.org/repos/asf/usergrid/commit/0632ceff
Tree: http://git-wip-us.apache.org/repos/asf/usergrid/tree/0632ceff
Diff: http://git-wip-us.apache.org/repos/asf/usergrid/diff/0632ceff
Branch: refs/heads/master
Commit: 0632ceff5b40b3a23f1561589cb911c3ca45cb6b
Parents: c98a5e9
Author: Mike Dunker <md...@google.com>
Authored: Mon Mar 12 12:19:56 2018 -0700
Committer: Keyur Karnik <ke...@gmail.com>
Committed: Tue Aug 28 16:41:44 2018 -0700
----------------------------------------------------------------------
.../persistence/index/utils/UUIDUtils.java | 7 +-
.../usergrid/tools/CollectionIterator.java | 80 ++++++++++++++------
.../org/apache/usergrid/tools/ToolBase.java | 26 +++++++
3 files changed, 89 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/usergrid/blob/0632ceff/stack/corepersistence/queryindex/src/main/java/org/apache/usergrid/persistence/index/utils/UUIDUtils.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/queryindex/src/main/java/org/apache/usergrid/persistence/index/utils/UUIDUtils.java b/stack/corepersistence/queryindex/src/main/java/org/apache/usergrid/persistence/index/utils/UUIDUtils.java
index bf8dd3c..1ba9f3c 100644
--- a/stack/corepersistence/queryindex/src/main/java/org/apache/usergrid/persistence/index/utils/UUIDUtils.java
+++ b/stack/corepersistence/queryindex/src/main/java/org/apache/usergrid/persistence/index/utils/UUIDUtils.java
@@ -303,8 +303,11 @@ public class UUIDUtils {
if ( uuid == null ) {
return 0;
}
- long t = uuid.timestamp();
- return ( t - KCLOCK_OFFSET ) / KCLOCK_MULTIPLIER_L;
+ return getUnixTimestampInMillisFromUUIDTimestamp(uuid.timestamp());
+ }
+
+ public static long getUnixTimestampInMillisFromUUIDTimestamp( long uuidTimestamp ) {
+ return ( uuidTimestamp - KCLOCK_OFFSET ) / KCLOCK_MULTIPLIER_L;
}
http://git-wip-us.apache.org/repos/asf/usergrid/blob/0632ceff/stack/tools/src/main/java/org/apache/usergrid/tools/CollectionIterator.java
----------------------------------------------------------------------
diff --git a/stack/tools/src/main/java/org/apache/usergrid/tools/CollectionIterator.java b/stack/tools/src/main/java/org/apache/usergrid/tools/CollectionIterator.java
index 26b5f5f..bfe5edf 100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/CollectionIterator.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/CollectionIterator.java
@@ -23,23 +23,16 @@ import java.util.*;
import com.google.common.base.Optional;
import com.netflix.astyanax.MutationBatch;
-import org.apache.usergrid.corepersistence.pipeline.read.ResultsPage;
-import org.apache.usergrid.corepersistence.results.EntityQueryExecutor;
-import org.apache.usergrid.corepersistence.results.IdQueryExecutor;
-import org.apache.usergrid.corepersistence.service.CollectionSearch;
import org.apache.usergrid.corepersistence.service.CollectionService;
import org.apache.usergrid.corepersistence.util.CpNamingUtils;
import org.apache.usergrid.persistence.*;
import org.apache.usergrid.persistence.core.scope.ApplicationScope;
import org.apache.usergrid.persistence.core.scope.ApplicationScopeImpl;
import org.apache.usergrid.persistence.graph.*;
-import org.apache.usergrid.persistence.graph.impl.SimpleEdge;
-import org.apache.usergrid.persistence.graph.impl.SimpleMarkedEdge;
import org.apache.usergrid.persistence.graph.impl.SimpleSearchByEdgeType;
import org.apache.usergrid.persistence.graph.serialization.EdgeSerialization;
import org.apache.usergrid.persistence.index.utils.UUIDUtils;
import org.apache.usergrid.persistence.model.entity.*;
-import org.apache.usergrid.persistence.model.entity.Entity;
import org.apache.usergrid.persistence.schema.CollectionInfo;
import org.apache.usergrid.utils.InflectionUtils;
import static org.apache.commons.lang.StringUtils.isBlank;
@@ -63,7 +56,9 @@ public class CollectionIterator extends ToolBase {
private static final String ENTITY_TYPE_ARG = "entityType";
- private static final String REMOVE_CONNECTIONS_ARG = "removeConnections";
+ private static final String REMOVE_DUPLICATE_CONNECTIONS_ARG = "removeDuplicateConnections";
+
+ private static final String REMOVE_ORPHAN_CONNECTIONS_ARG = "removeOrphanConnections";
private static final String LATEST_TIMESTAMP_ARG = "latestTimestamp";
@@ -95,11 +90,17 @@ public class CollectionIterator extends ToolBase {
options.addOption( collectionOption );
- Option removeConnectionsOption =
- OptionBuilder.withArgName(REMOVE_CONNECTIONS_ARG).hasArg().isRequired( false ).withDescription( "remove orphaned connections" )
- .create(REMOVE_CONNECTIONS_ARG);
+ Option removeOrphanConnectionsOption =
+ OptionBuilder.withArgName(REMOVE_ORPHAN_CONNECTIONS_ARG).hasArg().isRequired( false ).withDescription( "remove orphaned connections" )
+ .create(REMOVE_ORPHAN_CONNECTIONS_ARG);
+
+ options.addOption( removeOrphanConnectionsOption );
+
+ Option removeDuplicateConnectionsOption =
+ OptionBuilder.withArgName(REMOVE_DUPLICATE_CONNECTIONS_ARG).hasArg().isRequired( false ).withDescription( "remove duplicate connections" )
+ .create(REMOVE_DUPLICATE_CONNECTIONS_ARG);
- options.addOption( removeConnectionsOption );
+ options.addOption( removeDuplicateConnectionsOption );
Option earliestTimestampOption =
OptionBuilder.withArgName(EARLIEST_TIMESTAMP_ARG).hasArg().isRequired( false ).withDescription( "earliest timestamp to delete" )
@@ -137,7 +138,8 @@ public class CollectionIterator extends ToolBase {
String applicationOption = line.getOptionValue(APPLICATION_ARG);
String entityTypeOption = line.getOptionValue(ENTITY_TYPE_ARG);
- String removeConnectionsOption = line.getOptionValue(REMOVE_CONNECTIONS_ARG);
+ String removeOrphanConnectionsOption = line.getOptionValue(REMOVE_ORPHAN_CONNECTIONS_ARG);
+ String removeDuplicateConnectionsOption = line.getOptionValue(REMOVE_DUPLICATE_CONNECTIONS_ARG);
String earliestTimestampOption = line.getOptionValue(EARLIEST_TIMESTAMP_ARG);
String latestTimestampOption = line.getOptionValue(LATEST_TIMESTAMP_ARG);
String secondsInPastOption = line.getOptionValue(SECONDS_IN_PAST_ARG);
@@ -152,7 +154,8 @@ public class CollectionIterator extends ToolBase {
}
String entityType = entityTypeOption;
- final boolean removeOrphans = !isBlank(removeConnectionsOption) && removeConnectionsOption.toLowerCase().equals("yes");
+ final boolean removeOrphans = !isBlank(removeOrphanConnectionsOption) && removeOrphanConnectionsOption.toLowerCase().equals("yes");
+ final boolean removeDuplicates = !isBlank(removeDuplicateConnectionsOption) && removeDuplicateConnectionsOption.toLowerCase().equals("yes");
if (!isBlank(secondsInPastOption) && !isBlank(latestTimestampOption)) {
throw new RuntimeException("Can't specify both latest timestamp and seconds in past options.");
@@ -222,32 +225,63 @@ public class CollectionIterator extends ToolBase {
new SimpleSearchByEdgeType( applicationScopeId, simpleEdgeType, Long.MAX_VALUE, SearchByEdgeType.Order.DESCENDING,
Optional.absent(), false );
+ Set<UUID> uuidSet = new HashSet<>();
+
gm.loadEdgesFromSource(search).map(markedEdge -> {
UUID uuid = markedEdge.getTargetNode().getUuid();
+ long edgeTimestamp = markedEdge.getTimestamp();
+ String edgeType = markedEdge.getType();
+ boolean duplicate = uuidSet.contains(uuid);
+ if (!duplicate) {
+ uuidSet.add(uuid);
+ }
try {
EntityRef entityRef = new SimpleEntityRef(entityType, uuid);
org.apache.usergrid.persistence.Entity retrieved = em.get(entityRef);
long timestamp = 0;
+ DateFormat df = new SimpleDateFormat();
+ df.setTimeZone(TimeZone.getTimeZone("GMT"));
String dateString = "NOT TIME-BASED";
if (UUIDUtils.isTimeBased(uuid)){
timestamp = UUIDUtils.getTimestampInMillis(uuid);
Date uuidDate = new Date(timestamp);
- DateFormat df = new SimpleDateFormat();
- df.setTimeZone(TimeZone.getTimeZone("GMT"));
dateString = df.format(uuidDate) + " GMT";
}
+ Date uuidEdgeDate = new Date(UUIDUtils.getUnixTimestampInMillisFromUUIDTimestamp(edgeTimestamp));
+ String edgeDateString = df.format(uuidEdgeDate) + " GMT";
+
if ( retrieved != null ){
- logger.info("{} - {} - entity data found", uuid, dateString);
+ if (duplicate) {
+ if (removeDuplicates) {
+ logger.info("DUPLICATE ENTITY (REMOVING): uuid:{} edgeTimestamp:{}({}) edgeType:{} timestamp:{} uuidDate:{}",
+ uuid, edgeTimestamp, edgeDateString, edgeType, timestamp, dateString);
+ try {
+ MutationBatch batch = edgeSerialization.deleteEdge(applicationScope, markedEdge, UUIDUtils.newTimeUUID());
+ logger.info("BATCH: {}", batch);
+ batch.execute();
+ } catch (Exception e) {
+ logger.error("{} - exception while trying to remove orphaned connection, {}", uuid, e.getMessage());
+ }
+ } else {
+ logger.info("DUPLICATE ENTITY (WON'T REMOVE): uuid:{} edgeTimestamp:{}({}) edgeType:{} timestamp:{} uuidDate:{}",
+ uuid, edgeTimestamp, edgeDateString, edgeType, timestamp, dateString);
+ }
+
+ } else {
+ logger.info("ENTITY: uuid:{} edgeTimestamp:{}({}) edgeType:{} timestamp:{} uuidDate:{}",
+ uuid, edgeTimestamp, edgeDateString, edgeType, timestamp, dateString);
+ }
+
+
}else{
if (removeOrphans && timestamp >= earliestTimestamp && timestamp <= latestTimestamp) {
- logger.info("{} - {} - entity data NOT found, REMOVING", uuid, dateString);
+ logger.info("NOT FOUND (REMOVING): uuid:{} edgeTimestamp:{}({}) edgeType:{} timestamp:{} uuidDate:{} - isDeleted:{} isSourceNodeDeleted:{} isTargetNodeDeleted:{}",
+ uuid, edgeTimestamp, edgeDateString, edgeType, timestamp, dateString, markedEdge.isDeleted(), markedEdge.isSourceNodeDelete(), markedEdge.isTargetNodeDeleted());
try {
- //em.removeItemFromCollection(headEntity, collectionName, entityRef );
- logger.info("entityRef.asId(): {}", entityRef.asId());
MutationBatch batch = edgeSerialization.deleteEdge(applicationScope, markedEdge, UUIDUtils.newTimeUUID());
logger.info("BATCH: {}", batch);
batch.execute();
@@ -255,9 +289,11 @@ public class CollectionIterator extends ToolBase {
logger.error("{} - exception while trying to remove orphaned connection, {}", uuid, e.getMessage());
}
} else if (removeOrphans) {
- logger.info("{} - {} ({}) - entity data NOT found, not removing because timestamp not in range", uuid, dateString, timestamp);
+ logger.info("NOT FOUND (TIMESTAMP OUT OF RANGE): uuid:{} edgeTimestamp:{}({}) edgeType:{} timestamp:{} uuidDate:{} - isDeleted:{} isSourceNodeDeleted:{} isTargetNodeDeleted:{}",
+ uuid, edgeTimestamp, edgeDateString, edgeType, timestamp, dateString, markedEdge.isDeleted(), markedEdge.isSourceNodeDelete(), markedEdge.isTargetNodeDeleted());
} else {
- logger.info("{} - {} ({}) - entity data NOT found", uuid, dateString, timestamp);
+ logger.info("NOT FOUND: uuid:{} edgeTimestamp:{}({}) edgeType:{} timestamp:{} uuidDate:{} - isDeleted:{} isSourceNodeDeleted:{} isTargetNodeDeleted:{}",
+ uuid, edgeTimestamp, edgeDateString, edgeType, timestamp, dateString, markedEdge.isDeleted(), markedEdge.isSourceNodeDelete(), markedEdge.isTargetNodeDeleted());
}
}
} catch (Exception e) {
http://git-wip-us.apache.org/repos/asf/usergrid/blob/0632ceff/stack/tools/src/main/java/org/apache/usergrid/tools/ToolBase.java
----------------------------------------------------------------------
diff --git a/stack/tools/src/main/java/org/apache/usergrid/tools/ToolBase.java b/stack/tools/src/main/java/org/apache/usergrid/tools/ToolBase.java
index 62636ea..97a7d5a 100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/ToolBase.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/ToolBase.java
@@ -105,11 +105,29 @@ public abstract class ToolBase {
if ( line.hasOption( "host" ) ) {
System.setProperty( "cassandra.url", line.getOptionValue( "host" ) );
+ }
+
+ if ( line.hasOption( "eshost" ) ) {
System.setProperty( "elasticsearch.hosts", line.getOptionValue( "eshost" ) );
+ }
+
+ if ( line.hasOption( "escluster" ) ) {
System.setProperty( "elasticsearch.cluster_name", line.getOptionValue( "escluster" ) );
+ }
+
+ if ( line.hasOption( "ugcluster" ) ) {
System.setProperty( "usergrid.cluster_name", line.getOptionValue( "ugcluster" ) );
}
+ if ( line.hasOption( "appkeyspace" ) ) {
+ System.setProperty( "cassandra.keyspace.application", line.getOptionValue( "appkeyspace" ) );
+ }
+
+ if ( line.hasOption( "lockskeyspace" ) ) {
+ System.setProperty( "cassandra.lock.keyspace", line.getOptionValue( "lockskeyspace" ) );
+ }
+
+
try {
runTool( line );
}
@@ -153,6 +171,12 @@ public abstract class ToolBase {
Option remoteOption = OptionBuilder
.withDescription( "Use remote Cassandra instance" ).create( "remote" );
+ Option ugAppKeyspace = OptionBuilder.withArgName( "appkeyspace" ).hasArg()
+ .withDescription( "Usergrid Application keyspace" ).create( "appkeyspace" );
+
+ Option ugLocksKeyspace = OptionBuilder.withArgName( "lockskeyspace" ).hasArg()
+ .withDescription( "Usergrid Locks keyspace" ).create( "lockskeyspace" );
+
Option verbose = OptionBuilder
.withDescription( "Print on the console an echo of the content written to the file" )
.create( VERBOSE );
@@ -163,6 +187,8 @@ public abstract class ToolBase {
options.addOption( esClusterOption );
options.addOption( ugClusterOption );
options.addOption( remoteOption );
+ options.addOption( ugAppKeyspace );
+ options.addOption( ugLocksKeyspace );
options.addOption( verbose );
return options;