You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2015/11/05 20:23:06 UTC
svn commit: r1712850 - in /uima/sandbox/uima-ducc/trunk: src/main/admin/ src/main/assembly/ src/main/resources/ src/main/resources/private/ uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/ uima-ducc-database/src/main/java/org/...

Author: challngr
Date: Thu Nov  5 19:23:05 2015
New Revision: 1712850

URL: http://svn.apache.org/viewvc?rev=1712850&view=rev
Log:
UIMA-4755 Scripting and build updates.

Added:
    uima/sandbox/uima-ducc/trunk/src/main/admin/db_create   (with props)
    uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py
    uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml   (with props)
Removed:
    uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml.prep
    uima/sandbox/uima-ducc/trunk/src/main/resources/private/database.xml
Modified:
    uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc
    uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader
    uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py
    uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install
    uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py
    uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml
    uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh
    uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java

Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc Thu Nov  5 19:23:05 2015
@@ -55,7 +55,7 @@ class CheckDucc(DuccUtil):
         if ( self.db_bypass == True ):
             return True
 
-        ret = self.db_alive()
+        ret = self.db_alive(3)
         if ( ret ):
             print 'The database is running'
         else:

Added: uima/sandbox/uima-ducc/trunk/src/main/admin/db_create
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/db_create?rev=1712850&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/db_create (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/db_create Thu Nov  5 19:23:05 2015
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# -----------------------------------------------------------------------
+
+
+import os
+import sys
+import getopt
+
+import shutil
+import subprocess
+from  stat import *
+
+from ducc_util import DuccUtil
+
+
+from ducc_base import Properties
+from ducc_base import Property
+
+from ducc_base import find_ducc_home
+from ducc_base import find_localhost
+
+from ducc import Ducc
+
+import db_util as dbu
+
+# 
+# Create and initialize the DUCC database.  In a new installation this is handled by
+# ducc_post_install.  Older installations need to run db_create as one of the steps
+# of migration to the DB.
+#
+class DbCreate(DuccUtil):
+
+    def usage(self, msg):
+
+        if ( msg != None ):
+            print ' '.join(msg)
+
+              
+        print 'DbCreate configures the database and installs the schema.'
+        print ''
+        print "Usage:"
+        print "   db_create [options]"
+        print "        If no options prompts are given for expected parameters."
+        print ""
+        print "Options:"
+        print "   [-n, --head-node] <ducc head node>"
+        print "        This is the name of the host that will run the DUCC management processes."
+        print ""
+        print "   [-, --db-password] <root password for database>"
+        print "        This is the password DUCC uses to manage the database."
+        print ""
+        print "   [-h, -? --help]"
+        print "        Prints this message."
+        print ""
+        sys.exit(1) 
+                                    
+    def main(self, argv):                    
+
+        self.database_pw = None
+
+        try:
+            opts, args = getopt.getopt(argv, 'd:n:h?', ['db-password=', 'head-node=', 'help'])
+        except:
+            self.usage("Invalid arguments " + ' '.join(argv))
+
+
+        for ( o, a ) in opts:
+            if o in ('-n', '--head-node'):
+                self.ducc_head = a
+            if o in ('-d', '--db-password'):
+                self.database_pw = a
+            elif o in ('-h', '-?', '--help'):
+                self.usage(None)
+        
+
+        # start with merged properties
+        self.merge_properties();
+
+        # configure the database for local system and initialize the schema
+        db_node = self.ducc_properties.get("ducc.head")
+        dbu.configure_database(self.DUCC_HOME, db_node, self.jvm, 'doodle')
+        return
+
+        # write update site.ducc.properties with db installation
+        self.ducc_site_properties.write(self.site_properties_name)
+
+        # remerge to insure it's all correct and ready to go
+        self.merge_properties();
+
+if __name__ == "__main__":
+
+    postinstall = DbCreate()
+    postinstall.main(sys.argv[1:])

Propchange: uima/sandbox/uima-ducc/trunk/src/main/admin/db_create
------------------------------------------------------------------------------
    svn:executable = *

Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader Thu Nov  5 19:23:05 2015
@@ -32,7 +32,7 @@ import getopt
 #
 from ducc_util import DuccUtil
 
-class DbConvert(DuccUtil):
+class DbLoader(DuccUtil):
 
     def __init__(self):
         DuccUtil.__init__(self)
@@ -47,41 +47,44 @@ class DbConvert(DuccUtil):
         print 'db_loader -i in -o out [--no-archive]'
         print ''
         print 'Where:'
+        print '    -d'
+        print '         Specifies the database must be dropped and reinitialized if it already exists.'
+        print '         If not specified, the correct schema must already exist.  Object already in'
+        print '         the database are not overwritten.'
         print '    -i in'
         print '         Specifies the DUCC_HOME of the DUCC runtime to be moved into the database.'
-        print '    -o out'
-        print '         Specifies the DUCC_HOME of the DUCC runtime to contain the database.'
+        print '    -o dburl'
+        print '         Specifies the database URL to contact to store the items.'
         print '    --noarchive'
         print '         Suppresses archival of the input files.'
         print ''
         print 'Notes:'
-        print '    1.  The output location is not verfied as a DUCC runtime.  The database will be created'
-        print '        in that location nonetheless.  After it is created it may be manually moved to'
-        print '        the intended runtime.'
-        print '    2.  The "in" and "out" directories may be the same.'
-        print '    3.  Archival consists of renaming relevent input directories and files by appending'
+        print '    1.  The database must be started before running this script.  Because this is a bootstrap'
+        print '        script DUCC must be down.  The database may be started using start_ducc -c db'
+        print '    2.  Archival consists of renaming relevent input directories and files by appending'
         print '        ".archive" to their names.  It is fully non-destructive and may be reversed by'
-        print '         manually renaming them back to their original names.'
-        print '    4.  If a database already exists in the specified "out" location, it is first dropped.'
-        print '        The schema is built and the database is fully re-initialized.'
+        print '        manually renaming them back to their original names.'
 
         sys.exit(1);
 
     def main(self, argv):
 
+        drop    = False
         in_home = None
-        out_home = None
+        out_url = None
         archive = True
         try:
-            opts, args = getopt.getopt(argv, 'i:o:h?', ['no-archive'])
+            opts, args = getopt.getopt(argv, 'di:o:h?', ['no-archive'])
         except:
             self.usage("Invalid arguments", ' '.join(argv))
 
         for ( o, a ) in opts:
             if o in ('-i'):
                 in_home = a
+            elif o in ('-d'):
+                drop = True
             elif o in ('-o'):
-                out_home = a
+                out_url = a
             elif o in ('--no-archive'):
                 archive = False
             else:
@@ -90,27 +93,32 @@ class DbConvert(DuccUtil):
 
         if ( in_home == None ):
             self.usage("Missing input DUCC_HOME")
-        if ( out_home == None ):
-            self.usage("Missing output DUCC_HOME");
+        if ( out_url == None ):
+            self.usage("Missing database URL");
+
+
+        if ( not self.db_alive(3) ):
+            print "Database is not running or cannot be contacted."
+            return
+
+        DUCC_JVM_OPTS = '-DDUCC_HOME=' + self.DUCC_HOME
 
-        # DDUCC_HOME=/home/challngr/ducc_runtime_db -Xmx4G -Dstorage.useWAL=true -Dtx.useLog=false -Dstorage.wal.syncOnPageFlush=false
-        DUCC_JVM_OPTS = ''
-        DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDUCC_HOME=' + self.DUCC_HOME
-        DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -Xmx4G'
         if ( not archive ):
             DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDONT_ARCHIVE'
 
-        CMD = [self.java(), DUCC_JVM_OPTS, 'org.apache.uima.ducc.database.DbLoader', in_home, out_home]
+        if ( drop ):
+            DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDROP_DATABASE'
+
+        CMD = [self.java(), DUCC_JVM_OPTS, 'org.apache.uima.ducc.database.DbLoader', in_home, out_url]
         CMD = ' '.join(CMD)
 
-        os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + "/lib/orientdb/*"
         os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + "/resources"
-        print os.environ['CLASSPATH']
+        print 'CLASSPATH', os.environ['CLASSPATH']
+        print 'Executing', CMD
         os.system(CMD)
-
                 
 if __name__ == "__main__":
-    console = DbConvert()
+    console = DbLoader()
     console.main(sys.argv[1:])
     
 

Added: uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py?rev=1712850&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py Thu Nov  5 19:23:05 2015
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+
+import os
+
+# common routines for ducc_post_install and db_create
+def addToCp(cp, lib):
+    return cp + ':' + lib
+
+
+def execute(CMD):
+    print CMD
+    return os.system(CMD)
+
+def stop_database(pidfile):
+    print "Stopping the dtabase."
+
+    CMD = ['kill', '-TERM', '`cat ' + pidfile + '`']
+    CMD = ' '.join(CMD)
+    execute(CMD)
+
+def manual_config(DUCC_HOME, DUCC_HEAD):
+    print ''
+    print 'To manually configure the database edit', DUCC_HOME + '/cassandra-server/conf/casssandra.yaml'
+    print 'to Insure every occurance of DUCC_HEAD is replaced with', DUCC_HEAD, 'and every occurance'
+    print 'of DUCC_HOME is replaced with', DUCC_HOME + '.'
+    print ''
+    print 'Note that one occurance of DUCC_HEAD will be quoted: you must preserve these quotes, e.g. as "' + DUCC_HEAD + '".'
+
+def configure_database(DUCC_HOME, DUCC_HEAD, java, db_pw):
+    # for cassandra:
+    # in ducc_runtime/cassandra-server/conf we need to update cassandra.yaml to establish
+    # the data directories and db connection addresses
+
+    # Note this is a bootstrap routine and doesn't try to use common code that may depend on
+    # things being initialized correctly.
+    
+
+    if ( db_pw == None ):
+        db_pw = raw_input("Enter database password OR 'bypass' to bypass database support:")
+        if ( db_pw == '' ):
+            print "Must enter a DB password or 'bypass' to continue."
+        return False
+
+    if ( os.path.exists(DUCC_HOME + "/database/data") ):
+        print 'Database is already defined in', DUCC_HOME + '/database', '- not rebilding.'
+        return False
+
+
+    if ( db_pw == 'bypass' ):
+        print 'Database support will be bypassed'
+        return True
+        
+    config =  DUCC_HOME + '/cassandra-server/conf/cassandra.yaml'
+    esc_home = DUCC_HOME.replace("/", "\/")    # for sed
+
+    # must configure the database node to be the same as the ducc head, and the database location
+    # to be DUCC_HEAD
+    ch_head = "sed -i.bak s'/DUCC_HEAD/" + DUCC_HEAD + "'/ " + config
+    if ( execute(ch_head) != 0 ):
+        print 'Could not configure', config + '.  You may need to recover it from', config+'.bak.'
+        manual_config(DUCC_HOME, DUCC_HEAD)
+        return False
+
+    ch_home = "sed -i.bak s'/DUCC_HOME/" + esc_home + "'/ " + config
+    if ( execute(ch_home) != 0):
+        print 'Could not configure', config + '.  You may need to recover it from', config+'.bak.'
+        manual_config(DUCC_HOME, DUCC_HEAD)
+        return False
+
+    here = os.getcwd()
+    os.chdir(DUCC_HOME + "/cassandra-server")
+    pidfile = DUCC_HOME + '/state/cassandra.pid'
+    CMD = "bin/cassandra -p "+  pidfile + " > /dev/null 2>&1";
+    os.system(CMD);
+    print "Database is started.  Waiting for initialization";
+    
+    # Now start the db and create the schema
+    CLASSPATH = ''
+    CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/cassandra/*')
+    CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/guava/*')
+    CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/apache-log4j/*')
+    CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/uima-ducc/*')
+    CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/apache-uima/apache-activemq/lib/*')
+    os.environ['CLASSPATH'] = CLASSPATH
+    print os.environ['CLASSPATH']
+
+    ret = True
+    CMD = [java, '-DDUCC_HOME=' + DUCC_HOME, 'org.apache.uima.ducc.database.DbCreate', DUCC_HEAD, 'ducc', db_pw]
+    CMD = ' '.join(CMD)
+    if ( execute(CMD) == 0 ):
+        print 'Database is initialized.'
+    else:
+        print 'Database started but the schema could not be defined. DB logs are in', DUCC_HEAD + '/cassandra-server/logs.'
+        ret = False
+
+    stop_database(pidfile)
+    return ret

Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py Thu Nov  5 19:23:05 2015
@@ -37,16 +37,11 @@ class Ducc(DuccUtil):
     def run_db(self):
         
         print '-------- starting the database'
-        if ( self.db_disabled ):
+        if ( self.db_bypass ):
             print 'Database is disabled; not starting it.'
             print 'OK'
             return 
 
-        if ( not os.path.exists(self.DUCC_HOME + "/database/data" )):
-            print 'Database is missing.  You must initialize the database with DbCreate.'
-            print 'NOTOK'
-            return
-
         # check for the pid to see if the DB is running.
         if ( self.db_process_alive() ) :
             print 'Database is already running.'

Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install Thu Nov  5 19:23:05 2015
@@ -36,6 +36,7 @@ from ducc_base import find_localhost
 from ducc_base import which
 
 from ducc import Ducc
+import db_util as dbu
 
 class PostInstall():
 
@@ -99,23 +100,26 @@ class PostInstall():
         # Note this is a bootstrap routine and doesn't try to use common code that may depend on
         # things being initialized correctly.
 
-        return
+
         if ( os.path.exists(self.DUCC_HOME + "/database/data") ):
             print 'Database is already defined.  Not configuring'
             return
 
-        db_pw = raw_input("Enter database password OR 'disable' to disable database support:")
+        db_pw = raw_input("Enter database password OR 'bypass' to disable database support:")
         if ( db_pw == '' ):
             print "Must enter a DB password to continue."
             sys.exit(1);
 
-        if ( db_pw == 'disable' ):
+        if ( db_pw == 'bypass' ):
             print 'Database support will be disabled'
             self.update_property('ducc.database.host', '--disabled--', '# Database support is disabled')
             return;
         else:
             self.update_property('ducc.database.host', self.ducc_head, '# Database location')
             
+        dbu.configure_database(self.DUCC_HOME, self.ducc_head, self.path_to_java, db_pw)
+        return
+
         config =  self.DUCC_HOME + '/cassandra-server/conf/cassandra.yaml'
         esc_home = self.DUCC_HOME.replace("/", "\/");
         print "------ escaped home", esc_home

Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py Thu Nov  5 19:23:05 2015
@@ -196,7 +196,7 @@ class DuccUtil(DuccBase):
         return False
 
     # contact the database and see how useful it seems to be
-    def db_alive(self):
+    def db_alive(self, retry=10):
         if ( self.db_bypass == True ):
             return True
 
@@ -207,7 +207,7 @@ class DuccUtil(DuccBase):
         # get our log4j config into the path to shut up noisy logging
         os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + '/resources'
         
-        CMD = [self.java(), 'org.apache.uima.ducc.database.DbAlive', dbnode, 'ducc', self.db_password]
+        CMD = [self.java(), 'org.apache.uima.ducc.database.DbAlive', dbnode, 'ducc', self.db_password, str(retry)]
 
         CMD = ' '.join(CMD)
         rc = os.system(CMD)

Modified: uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml Thu Nov  5 19:23:05 2015
@@ -300,15 +300,29 @@ under the License.
 
     <!--    CASSANDRA server -->
     <fileSet>
-      <directory>target/cassandra/apache-cassandra-2.1.11</directory>
+      <directory>target/cassandra/apache-cassandra-${cassandra.server.version}</directory>
       <outputDirectory>cassandra-server</outputDirectory>
       <excludes>
         <exclude>javadac/*</exclude>
+        <exclude>conf/cassandra-env.sh</exclude>
+        <exclude>conf/cassandra.yaml</exclude>
       </excludes>
       <fileMode>755</fileMode>
       <directoryMode>755</directoryMode>        
     </fileSet>
 
+    <fileSet>
+      <directory>src/main/resources</directory>
+      <!-- Two files that post_install or some hapless human needs to set up with local information -->
+      <outputDirectory>cassandra-server/conf</outputDirectory>
+      <includes>
+        <include>cassandra-env.sh</include>
+        <include>cassandra.yaml</include>
+      </includes>
+      <fileMode>755</fileMode>
+      <directoryMode>755</directoryMode>        
+    </fileSet>
+
     <!--    Unversioned CLI jar -->
     <fileSet>
       <directory>uima-ducc-cli/target</directory>
@@ -329,6 +343,8 @@ under the License.
       <excludes>
 		<exclude>private/*</exclude>
 		<exclude>private</exclude>
+		<exclude>cassandra-env.sh</exclude>
+		<exclude>cassandra.yaml</exclude>
 	  </excludes>
     </fileSet>
 

Modified: uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh Thu Nov  5 19:23:05 2015
@@ -94,6 +94,8 @@ jvmver=`echo "$java_ver_output" | grep '
 JVM_VERSION=${jvmver%_*}
 JVM_PATCH_VERSION=${jvmver#*_}
 
+# DUCC: These checks don't work with the IBM Jvm.  Ducc requires a 'high-enough' level of JVM
+#       so those checks will suffice.
 #if [ "$JVM_VERSION" \< "1.7" ] ; then
 #    echo "Cassandra 2.0 and later require Java 7u25 or later."
 #    exit 1;

Added: uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml?rev=1712850&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml Thu Nov  5 19:23:05 2015
@@ -0,0 +1,813 @@
+# Cassandra storage config YAML 
+
+#
+# This is prepared for configureation for DUCC via ducc_post_install
+#
+
+# NOTE:
+#   See http://wiki.apache.org/cassandra/StorageConfiguration for
+#   full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'DUCC'
+
+# This defines the number of tokens randomly assigned to this node on the ring
+# The more tokens, relative to other nodes, the larger the proportion of data
+# that this node will store. You probably want all nodes to have the same number
+# of tokens assuming they have equal hardware capability.
+#
+# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
+# and will use the initial_token as described below.
+#
+# Specifying initial_token will override this setting on the node's initial start,
+# on subsequent starts, this setting will apply even if initial token is set.
+#
+# If you already have a cluster with 1 token per node, and wish to migrate to 
+# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
+num_tokens: 256
+
+# initial_token allows you to specify tokens manually.  While you can use # it with
+# vnodes (num_tokens > 1, above) -- in which case you should provide a 
+# comma-separated list -- it's primarily used when adding nodes # to legacy clusters 
+# that do not have vnodes enabled.
+# initial_token:
+
+# See http://wiki.apache.org/cassandra/HintedHandoff
+# May either be "true" or "false" to enable globally, or contain a list
+# of data centers to enable per-datacenter.
+# hinted_handoff_enabled: DC1,DC2
+hinted_handoff_enabled: true
+# this defines the maximum amount of time a dead host will have hints
+# generated.  After it has been dead this long, new hints for it will not be
+# created until it has been seen alive and gone down again.
+max_hint_window_in_ms: 10800000 # 3 hours
+# Maximum throttle in KBs per second, per delivery thread.  This will be
+# reduced proportionally to the number of nodes in the cluster.  (If there
+# are two nodes in the cluster, each delivery thread will use the maximum
+# rate; if there are three, each will throttle to half of the maximum,
+# since we expect two nodes to be delivering hints simultaneously.)
+hinted_handoff_throttle_in_kb: 1024
+# Number of threads with which to deliver hints;
+# Consider increasing this number when you have multi-dc deployments, since
+# cross-dc handoff tends to be slower
+max_hints_delivery_threads: 2
+
+# Maximum throttle in KBs per second, total. This will be
+# reduced proportionally to the number of nodes in the cluster.
+batchlog_replay_throttle_in_kb: 1024
+
+# Authentication backend, implementing IAuthenticator; used to identify users
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+#   users. It keeps usernames and hashed passwords in system_auth.credentials table.
+#   Please increase system_auth keyspace replication factor if you use this authenticator.
+authenticator: PasswordAuthenticator
+
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+authorizer: AllowAllAuthorizer
+
+# Validity period for permissions cache (fetching permissions can be an
+# expensive operation depending on the authorizer, CassandraAuthorizer is
+# one example). Defaults to 2000, set to 0 to disable.
+# Will be disabled automatically for AllowAllAuthorizer.
+permissions_validity_in_ms: 2000
+
+# Refresh interval for permissions cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If permissions_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as permissions_validity_in_ms.
+# permissions_update_interval_in_ms: 1000
+
+# The partitioner is responsible for distributing groups of rows (by
+# partition key) across nodes in the cluster.  You should leave this
+# alone for new clusters.  The partitioner can NOT be changed without
+# reloading all data, so when upgrading you should set this to the
+# same partitioner you were already using.
+#
+# Besides Murmur3Partitioner, partitioners included for backwards
+# compatibility include RandomPartitioner, ByteOrderedPartitioner, and
+# OrderPreservingPartitioner.
+#
+partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+
+# Directories where Cassandra should store data on disk.  Cassandra
+# will spread data evenly across them, subject to the granularity of
+# the configured compaction strategy.
+# If not set, the default directory is $CASSANDRA_HOME/data/data.
+data_file_directories:
+    - DUCC_HOME/database/data
+
+# commit log.  when running on magnetic HDD, this should be a
+# separate spindle than the data directories.
+# If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
+commitlog_directory: DUCC_HOME/database/commitlog
+
+# policy for data disk failures:
+# die: shut down gossip and client transports and kill the JVM for any fs errors or
+#      single-sstable errors, so the node can be replaced.
+# stop_paranoid: shut down gossip and client transports even for single-sstable errors,
+#                kill the JVM for errors during startup.
+# stop: shut down gossip and client transports, leaving the node effectively dead, but
+#       can still be inspected via JMX, kill the JVM for errors during startup.
+# best_effort: stop using the failed disk and respond to requests based on
+#              remaining available sstables.  This means you WILL see obsolete
+#              data at CL.ONE!
+# ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
+disk_failure_policy: stop
+
+# policy for commit disk failures:
+# die: shut down gossip and Thrift and kill the JVM, so the node can be replaced.
+# stop: shut down gossip and Thrift, leaving the node effectively dead, but
+#       can still be inspected via JMX.
+# stop_commit: shutdown the commit log, letting writes collect but
+#              continuing to service reads, as in pre-2.0.5 Cassandra
+# ignore: ignore fatal errors and let the batches fail
+commit_failure_policy: stop
+
+# Maximum size of the key cache in memory.
+#
+# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
+# minimum, sometimes more. The key cache is fairly tiny for the amount of
+# time it saves, so it's worthwhile to use it at large numbers.
+# The row cache saves even more time, but must contain the entire row,
+# so it is extremely space-intensive. It's best to only use the
+# row cache if you have hot rows or static rows.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
+key_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the key cache. Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 14400 or 4 hours.
+key_cache_save_period: 14400
+
+# Number of keys from the key cache to save
+# Disabled by default, meaning all keys are going to be saved
+# key_cache_keys_to_save: 100
+
+# Maximum size of the row cache in memory.
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is 0, to disable row caching.
+row_cache_size_in_mb: 0
+
+# Duration in seconds after which Cassandra should
+# save the row cache. Caches are saved to saved_caches_directory as specified
+# in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 0 to disable saving the row cache.
+row_cache_save_period: 0
+
+# Number of keys from the row cache to save
+# Disabled by default, meaning all keys are going to be saved
+# row_cache_keys_to_save: 100
+
+# Maximum size of the counter cache in memory.
+#
+# Counter cache helps to reduce counter locks' contention for hot counter cells.
+# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
+# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
+# of the lock hold, helping with hot counter cell updates, but will not allow skipping
+# the read entirely. Only the local (clock, count) tuple of a counter cell is kept
+# in memory, not the whole counter, so it's relatively cheap.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
+# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
+counter_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the counter cache (keys only). Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Default is 7200 or 2 hours.
+counter_cache_save_period: 7200
+
+# Number of keys from the counter cache to save
+# Disabled by default, meaning all keys are going to be saved
+# counter_cache_keys_to_save: 100
+
+# The off-heap memory allocator.  Affects storage engine metadata as
+# well as caches.  Experiments show that JEMAlloc saves some memory
+# than the native GCC allocator (i.e., JEMalloc is more
+# fragmentation-resistant).
+# 
+# Supported values are: NativeAllocator, JEMallocAllocator
+#
+# If you intend to use JEMallocAllocator you have to install JEMalloc as library and
+# modify cassandra-env.sh as directed in the file.
+#
+# Defaults to NativeAllocator
+# memory_allocator: NativeAllocator
+
+# saved caches
+# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
+saved_caches_directory: DUCC_HOME/database/saved_caches
+
+# commitlog_sync may be either "periodic" or "batch." 
+# 
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been fsynced to disk.  It will wait
+# commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
+# This window should be kept short because the writer threads will
+# be unable to do extra work while waiting.  (You may need to increase
+# concurrent_writes for the same reason.)
+#
+# commitlog_sync: batch
+# commitlog_sync_batch_window_in_ms: 2
+#
+# the other option is "periodic" where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds. 
+commitlog_sync: periodic
+commitlog_sync_period_in_ms: 10000
+
+# The size of the individual commitlog file segments.  A commitlog
+# segment may be archived, deleted, or recycled once all the data
+# in it (potentially from each columnfamily in the system) has been
+# flushed to sstables.  
+#
+# The default size is 32, which is almost always fine, but if you are
+# archiving commitlog segments (see commitlog_archiving.properties),
+# then you probably want a finer granularity of archiving; 8 or 16 MB
+# is reasonable.
+commitlog_segment_size_in_mb: 32
+
+# Reuse commit log files when possible. The default is false, and this
+# feature will be removed entirely in future versions of Cassandra.
+#commitlog_segment_recycling: false
+
+# any class that implements the SeedProvider interface and has a
+# constructor that takes a Map<String, String> of parameters will do.
+seed_provider:
+    # Addresses of hosts that are deemed contact points. 
+    # Cassandra nodes use this list of hosts to find each other and learn
+    # the topology of the ring.  You must change this if you are running
+    # multiple nodes!
+    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
+      parameters:
+          # seeds is actually a comma-delimited list of addresses.
+          # Ex: "<ip1>,<ip2>,<ip3>"
+          - seeds: "DUCC_HEAD"
+
+# For workloads with more data than can fit in memory, Cassandra's
+# bottleneck will be reads that need to fetch data from
+# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
+# order to allow the operations to enqueue low enough in the stack
+# that the OS and drives can reorder them. Same applies to
+# "concurrent_counter_writes", since counter writes read the current
+# values before incrementing and writing them back.
+#
+# On the other hand, since writes are almost never IO bound, the ideal
+# number of "concurrent_writes" is dependent on the number of cores in
+# your system; (8 * number_of_cores) is a good rule of thumb.
+concurrent_reads: 32
+concurrent_writes: 32
+concurrent_counter_writes: 32
+
+# Total memory to use for sstable-reading buffers.  Defaults to
+# the smaller of 1/4 of heap or 512MB.
+# file_cache_size_in_mb: 512
+
+# Total permitted memory to use for memtables. Cassandra will stop 
+# accepting writes when the limit is exceeded until a flush completes,
+# and will trigger a flush based on memtable_cleanup_threshold
+# If omitted, Cassandra will set both to 1/4 the size of the heap.
+# memtable_heap_space_in_mb: 2048
+# memtable_offheap_space_in_mb: 2048
+
+# Ratio of occupied non-flushing memtable size to total permitted size
+# that will trigger a flush of the largest memtable.  Lager mct will
+# mean larger flushes and hence less compaction, but also less concurrent
+# flush activity which can make it difficult to keep your disks fed
+# under heavy write load.
+#
+# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
+# memtable_cleanup_threshold: 0.11
+
+# Specify the way Cassandra allocates and manages memtable memory.
+# Options are:
+#   heap_buffers:    on heap nio buffers
+#   offheap_buffers: off heap (direct) nio buffers
+#   offheap_objects: native memory, eliminating nio buffer heap overhead
+memtable_allocation_type: heap_buffers
+
+# Total space to use for commitlogs.  Since commitlog segments are
+# mmapped, and hence use up address space, the default size is 32
+# on 32-bit JVMs, and 8192 on 64-bit JVMs.
+#
+# If space gets above this value (it will round up to the next nearest
+# segment multiple), Cassandra will flush every dirty CF in the oldest
+# segment and remove it.  So a small total commitlog space will tend
+# to cause more flush activity on less-active columnfamilies.
+# commitlog_total_space_in_mb: 8192
+
+# This sets the amount of memtable flush writer threads.  These will
+# be blocked by disk io, and each one will hold a memtable in memory
+# while blocked. 
+#
+# memtable_flush_writers defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+# 
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+#memtable_flush_writers: 8
+
+# A fixed memory pool size in MB for for SSTable index summaries. If left
+# empty, this will default to 5% of the heap size. If the memory usage of
+# all index summaries exceeds this limit, SSTables with low read rates will
+# shrink their index summaries in order to meet this limit.  However, this
+# is a best-effort process. In extreme conditions Cassandra may need to use
+# more than this amount of memory.
+index_summary_capacity_in_mb:
+
+# How frequently index summaries should be resampled.  This is done
+# periodically to redistribute memory from the fixed-size pool to sstables
+# proportional their recent read rates.  Setting to -1 will disable this
+# process, leaving existing index summaries at their current sampling level.
+index_summary_resize_interval_in_minutes: 60
+
+# Whether to, when doing sequential writing, fsync() at intervals in
+# order to force the operating system to flush the dirty
+# buffers. Enable this to avoid sudden dirty buffer flushing from
+# impacting read latencies. Almost always a good idea on SSDs; not
+# necessarily on platters.
+trickle_fsync: false
+trickle_fsync_interval_in_kb: 10240
+
+# TCP port, for commands and data
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+storage_port: 7000
+
+# SSL port, for encrypted communication.  Unused unless enabled in
+# encryption_options
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+ssl_storage_port: 7001
+
+# Address or interface to bind to and tell other Cassandra nodes to connect to.
+# You _must_ change this if you want multiple nodes to be able to communicate!
+#
+# Set listen_address OR listen_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing _if_ the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be).
+#
+# Setting listen_address to 0.0.0.0 is always wrong.
+#
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+listen_address: DUCC_HEAD
+# listen_interface: eth0
+# listen_interface_prefer_ipv6: false
+
+# Address to broadcast to other Cassandra nodes
+# Leaving this blank will set it to the same value as listen_address
+# broadcast_address: 1.2.3.4
+
+# Internode authentication backend, implementing IInternodeAuthenticator;
+# used to allow/disallow connections from peer nodes.
+# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
+
+# Whether to start the native transport server.
+# Please note that the address on which the native transport is bound is the
+# same as the rpc_address. The port however is different and specified below.
+start_native_transport: true
+# port for the CQL native transport to listen for clients on
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+native_transport_port: 9042
+# The maximum threads for handling requests when the native transport is used.
+# This is similar to rpc_max_threads though the default differs slightly (and
+# there is no native_transport_min_threads, idle threads will always be stopped
+# after 30 seconds).
+# native_transport_max_threads: 128
+#
+# The maximum size of allowed frame. Frame (requests) larger than this will
+# be rejected as invalid. The default is 256MB.
+# native_transport_max_frame_size_in_mb: 256
+
+# The maximum number of concurrent client connections.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections: -1
+
+# The maximum number of concurrent client connections per source ip.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections_per_ip: -1
+
+# Whether to start the thrift rpc server.
+start_rpc: true
+
+# The address or interface to bind the Thrift RPC service and native transport
+# server to.
+#
+# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+#
+# Leaving rpc_address blank has the same effect as on listen_address
+# (i.e. it will be based on the configured hostname of the node).
+#
+# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
+# set broadcast_rpc_address to a value other than 0.0.0.0.
+#
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+#
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+rpc_address: DUCC_HEAD
+# rpc_interface: eth1
+# rpc_interface_prefer_ipv6: false
+
+# port for Thrift to listen for clients on
+rpc_port: 9160
+
+# RPC address to broadcast to drivers and other Cassandra nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+# broadcast_rpc_address: 1.2.3.4
+
+# enable or disable keepalive on rpc/native connections
+rpc_keepalive: true
+
+# Cassandra provides two out-of-the-box options for the RPC Server:
+#
+# sync  -> One thread per thrift connection. For a very large number of clients, memory
+#          will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
+#          per thread, and that will correspond to your use of virtual memory (but physical memory
+#          may be limited depending on use of stack space).
+#
+# hsha  -> Stands for "half synchronous, half asynchronous." All thrift clients are handled
+#          asynchronously using a small number of threads that does not vary with the amount
+#          of thrift clients (and thus scales well to many clients). The rpc requests are still
+#          synchronous (one thread per active request). If hsha is selected then it is essential
+#          that rpc_max_threads is changed from the default value of unlimited.
+#
+# The default is sync because on Windows hsha is about 30% slower.  On Linux,
+# sync/hsha performance is about the same, with hsha of course using less memory.
+#
+# Alternatively,  can provide your own RPC server by providing the fully-qualified class name
+# of an o.a.c.t.TServerFactory that can create an instance of it.
+rpc_server_type: sync
+
+# Uncomment rpc_min|max_thread to set request pool size limits.
+#
+# Regardless of your choice of RPC server (see above), the number of maximum requests in the
+# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
+# RPC server, it also dictates the number of clients that can be connected at all).
+#
+# The default is unlimited and thus provides no protection against clients overwhelming the server. You are
+# encouraged to set a maximum that makes sense for you in production, but do keep in mind that
+# rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
+#
+# rpc_min_threads: 16
+# rpc_max_threads: 2048
+
+# uncomment to set socket buffer sizes on rpc connections
+# rpc_send_buff_size_in_bytes:
+# rpc_recv_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# See:
+# /proc/sys/net/core/wmem_max
+# /proc/sys/net/core/rmem_max
+# /proc/sys/net/ipv4/tcp_wmem
+# /proc/sys/net/ipv4/tcp_wmem
+# and: man tcp
+# internode_send_buff_size_in_bytes:
+# internode_recv_buff_size_in_bytes:
+
+# Frame size for thrift (maximum message length).
+thrift_framed_transport_size_in_mb: 15
+
+# Set to true to have Cassandra create a hard link to each sstable
+# flushed or streamed locally in a backups/ subdirectory of the
+# keyspace data.  Removing these links is the operator's
+# responsibility.
+incremental_backups: false
+
+# Whether or not to take a snapshot before each compaction.  Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you.  Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# Whether or not a snapshot is taken of the data before keyspace truncation
+# or dropping of column families. The STRONGLY advised default of true 
+# should be used to provide data safety. If you set this flag to false, you will
+# lose data on truncation or drop.
+auto_snapshot: true
+
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
+
+# Granularity of the collation index of rows within a partition.
+# Increase if your rows are large, or if you have a very large
+# number of rows per partition.  The competing goals are these:
+#   1) a smaller granularity means more index entries are generated
+#      and looking up rows withing the partition by collation column
+#      is faster
+#   2) but, Cassandra will keep the collation index in memory for hot
+#      rows (as part of the key cache), so a larger granularity means
+#      you can cache more hot rows
+column_index_size_in_kb: 64
+
+
+# Log WARN on any batch size exceeding this value. 5kb per batch by default.
+# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
+batch_size_warn_threshold_in_kb: 5
+
+# Number of simultaneous compactions to allow, NOT including
+# validation "compactions" for anti-entropy repair.  Simultaneous
+# compactions can help preserve read performance in a mixed read/write
+# workload, by mitigating the tendency of small sstables to accumulate
+# during a single long running compactions. The default is usually
+# fine and if you experience problems with compaction running too
+# slowly or too fast, you should look at
+# compaction_throughput_mb_per_sec first.
+#
+# concurrent_compactors defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+# 
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+#concurrent_compactors: 1
+
+# Throttles compaction to the given total throughput across the entire
+# system. The faster you insert data, the faster you need to compact in
+# order to keep the sstable count down, but in general, setting this to
+# 16 to 32 times the rate you are inserting data is more than sufficient.
+# Setting this to 0 disables throttling. Note that this account for all types
+# of compaction, including validation compaction.
+compaction_throughput_mb_per_sec: 16
+
+# Log a warning when compacting partitions larger than this value
+compaction_large_partition_warning_threshold_mb: 100
+
+# When compacting, the replacement sstable(s) can be opened before they
+# are completely written, and used in place of the prior sstables for
+# any range that has been written. This helps to smoothly transfer reads 
+# between the sstables, reducing page cache churn and keeping hot rows hot
+sstable_preemptive_open_interval_in_mb: 50
+
+# Throttles all outbound streaming file transfers on this node to the
+# given total throughput in Mbps. This is necessary because Cassandra does
+# mostly sequential IO when streaming data during bootstrap or repair, which
+# can lead to saturating the network connection and degrading rpc performance.
+# When unset, the default is 200 Mbps or 25 MB/s.
+# stream_throughput_outbound_megabits_per_sec: 200
+
+# Throttles all streaming file transfer between the datacenters,
+# this setting allows users to throttle inter dc stream throughput in addition
+# to throttling all network stream traffic as configured with
+# stream_throughput_outbound_megabits_per_sec
+# inter_dc_stream_throughput_outbound_megabits_per_sec:
+
+# How long the coordinator should wait for read operations to complete
+read_request_timeout_in_ms: 5000
+# How long the coordinator should wait for seq or index scans to complete
+range_request_timeout_in_ms: 10000
+# How long the coordinator should wait for writes to complete
+write_request_timeout_in_ms: 2000
+# How long the coordinator should wait for counter writes to complete
+counter_write_request_timeout_in_ms: 5000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row
+cas_contention_timeout_in_ms: 1000
+# How long the coordinator should wait for truncates to complete
+# (This can be much longer, because unless auto_snapshot is disabled
+# we need to flush first so we can snapshot before removing the data.)
+truncate_request_timeout_in_ms: 60000
+# The default timeout for other, miscellaneous operations
+request_timeout_in_ms: 10000
+
+# Enable operation timeout information exchange between nodes to accurately
+# measure request timeouts.  If disabled, replicas will assume that requests
+# were forwarded to them instantly by the coordinator, which means that
+# under overload conditions we will waste that much extra time processing 
+# already-timed-out requests.
+#
+# Warning: before enabling this property make sure to ntp is installed
+# and the times are synchronized between the nodes.
+cross_node_timeout: false
+
+# Enable socket timeout for streaming operation.
+# When a timeout occurs during streaming, streaming is retried from the start
+# of the current file. This _can_ involve re-streaming an important amount of
+# data, so you should avoid setting the value too low.
+# Default value is 3600000, which means streams timeout after an hour.
+# streaming_socket_timeout_in_ms: 3600000
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+# phi_convict_threshold: 8
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch.  The snitch has two functions:
+# - it teaches Cassandra enough about your network topology to route
+#   requests efficiently
+# - it allows Cassandra to spread replicas around your cluster to avoid
+#   correlated failures. It does this by grouping machines into
+#   "datacenters" and "racks."  Cassandra will do its best not to have
+#   more than one replica on the same "rack" (which may not actually
+#   be a physical location)
+#
+# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
+# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
+# ARE PLACED.
+#
+# Out of the box, Cassandra provides
+#  - SimpleSnitch:
+#    Treats Strategy order as proximity. This can improve cache
+#    locality when disabling read repair.  Only appropriate for
+#    single-datacenter deployments.
+#  - GossipingPropertyFileSnitch
+#    This should be your go-to snitch for production use.  The rack
+#    and datacenter for the local node are defined in
+#    cassandra-rackdc.properties and propagated to other nodes via
+#    gossip.  If cassandra-topology.properties exists, it is used as a
+#    fallback, allowing migration from the PropertyFileSnitch.
+#  - PropertyFileSnitch:
+#    Proximity is determined by rack and data center, which are
+#    explicitly configured in cassandra-topology.properties.
+#  - Ec2Snitch:
+#    Appropriate for EC2 deployments in a single Region. Loads Region
+#    and Availability Zone information from the EC2 API. The Region is
+#    treated as the datacenter, and the Availability Zone as the rack.
+#    Only private IPs are used, so this will not work across multiple
+#    Regions.
+#  - Ec2MultiRegionSnitch:
+#    Uses public IPs as broadcast_address to allow cross-region
+#    connectivity.  (Thus, you should set seed addresses to the public
+#    IP as well.) You will need to open the storage_port or
+#    ssl_storage_port on the public IP firewall.  (For intra-Region
+#    traffic, Cassandra will switch to the private IP after
+#    establishing a connection.)
+#  - RackInferringSnitch:
+#    Proximity is determined by rack and data center, which are
+#    assumed to correspond to the 3rd and 2nd octet of each node's IP
+#    address, respectively.  Unless this happens to match your
+#    deployment conventions, this is best used as an example of
+#    writing a custom Snitch class and is provided in that spirit.
+#
+# You can use a custom Snitch by setting this to the full class name
+# of the snitch, which will be assumed to be on your classpath.
+endpoint_snitch: SimpleSnitch
+
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100 
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it.  This is
+# expressed as a double which represents a percentage.  Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 0.1
+
+# request_scheduler -- Set this to a class that implements
+# RequestScheduler, which will schedule incoming client requests
+# according to the specific policy. This is useful for multi-tenancy
+# with a single Cassandra cluster.
+# NOTE: This is specifically for requests from the client and does
+# not affect inter node communication.
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
+# client requests to a node with a separate queue for each
+# request_scheduler_id. The scheduler is further customized by
+# request_scheduler_options as described below.
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+
+# Scheduler Options vary based on the type of scheduler
+# NoScheduler - Has no options
+# RoundRobin
+#  - throttle_limit -- The throttle_limit is the number of in-flight
+#                      requests per client.  Requests beyond 
+#                      that limit are queued up until
+#                      running requests can complete.
+#                      The value of 80 here is twice the number of
+#                      concurrent_reads + concurrent_writes.
+#  - default_weight -- default_weight is optional and allows for
+#                      overriding the default which is 1.
+#  - weights -- Weights are optional and will default to 1 or the
+#               overridden default_weight. The weight translates into how
+#               many requests are handled during each turn of the
+#               RoundRobin, based on the scheduler id.
+#
+# request_scheduler_options:
+#    throttle_limit: 80
+#    default_weight: 5
+#    weights:
+#      Keyspace1: 1
+#      Keyspace2: 5
+
+# request_scheduler_id -- An identifier based on which to perform
+# the request scheduling. Currently the only valid option is keyspace.
+# request_scheduler_id: keyspace
+
+# Enable or disable inter-node encryption
+# Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
+# users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
+# suite for authentication, key exchange and encryption of the actual data transfers.
+# Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode.
+# NOTE: No custom encryption options are enabled at the moment
+# The available internode options are : all, none, dc, rack
+#
+# If set to dc cassandra will encrypt the traffic between the DCs
+# If set to rack cassandra will encrypt the traffic between the racks
+#
+# The passwords used in these options must match the passwords used when generating
+# the keystore and truststore.  For instructions on generating these files, see:
+# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+#
+server_encryption_options:
+    internode_encryption: none
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    truststore: conf/.truststore
+    truststore_password: cassandra
+    # More advanced defaults below:
+    # protocol: TLS
+    # algorithm: SunX509
+    # store_type: JKS
+    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+    # require_client_auth: false
+
+# enable or disable client/server encryption.
+client_encryption_options:
+    enabled: false
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    # require_client_auth: false
+    # Set trustore and truststore_password if require_client_auth is true
+    # truststore: conf/.truststore
+    # truststore_password: cassandra
+    # More advanced defaults below:
+    # protocol: TLS
+    # algorithm: SunX509
+    # store_type: JKS
+    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+
+# internode_compression controls whether traffic between nodes is
+# compressed.
+# can be:  all  - all traffic is compressed
+#          dc   - traffic between different datacenters is compressed
+#          none - nothing is compressed.
+internode_compression: all
+
+# Enable or disable tcp_nodelay for inter-dc communication.
+# Disabling it will result in larger (but fewer) network packets being sent,
+# reducing overhead from the TCP protocol itself, at the cost of increasing
+# latency if you block for cross-datacenter responses.
+inter_dc_tcp_nodelay: false
+
+# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
+# Adjust the threshold based on your application throughput requirement
+# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
+# gc_warn_threshold_in_ms: 1000

Propchange: uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml
------------------------------------------------------------------------------
    svn:executable = *

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java Thu Nov  5 19:23:05 2015
@@ -121,11 +121,25 @@ public interface IRmPersistence
             public boolean isPrivate() { return true;}
             public boolean isMeta() { return true;}
         },
+        Memory {
+            public String pname() { return "memory"; }
+            public Type type()  { return Type.Integer; }
+            public boolean isPrimaryKey() { return true;}
+        },
+        Nodepool {
+            public String pname() { return "nodepool"; }
+            public Type type()  { return Type.String; }
+            public boolean isPrimaryKey() { return true;}
+        },
         Name {
             public String pname() { return "name"; }
             public Type type()  { return Type.String; }
             public boolean isPrimaryKey() { return true;}
-
+        },
+        SharesLeft {
+            public String pname() { return "shares_left"; }
+            public Type type()  { return Type.Integer; }
+            public boolean isPrimaryKey() { return true;}
         },
         Responsive{
             public String pname() { return "responsive"; }
@@ -139,18 +153,10 @@ public interface IRmPersistence
             public String pname() { return "ip"; }
             public Type type()  { return Type.String; }
         },
-        Nodepool {
-            public String pname() { return "nodepool"; }
-            public Type type()  { return Type.String; }
-        },
         Quantum {
             public String pname() { return "quantum"; }
             public Type type()  { return Type.Integer; }
         },
-        Memory {
-            public String pname() { return "memory"; }
-            public Type type()  { return Type.Integer; }
-        },
         ShareOrder {
             public String pname() { return "share_order"; }
             public Type type()  { return Type.Integer; }
@@ -167,10 +173,6 @@ public interface IRmPersistence
             public String pname() { return "heartbeats"; }
             public Type type()  { return Type.Integer; }
         },
-        SharesLeft {
-            public String pname() { return "shares_left"; }
-            public Type type()  { return Type.Integer; }
-        },
         Assignments {
             public String pname() { return "assignments"; }
             public Type type()  { return Type.Integer; }

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java Thu Nov  5 19:23:05 2015
@@ -131,17 +131,24 @@ public class DbAlive
 
     static void usage()
     {
-        System.out.println("Usage: DbAlive database_url id pw");
+        System.out.println("Usage: DbAlive database_url id pw retry-count");
         System.exit(1);
     }
 
     public static void main(String[] args)
     {
-        if ( args.length != 3 ) {
+        if ( args.length != 4 ) {
             usage();
         }
 
-        int max = 10;                         // we'll wait up to 60 seconds: 20 x 3 seconds
+        int max = 0;
+        try {
+            max = Integer.parseInt(args[3]);                         // we'll wait up to 60 seconds: 20 x 3 seconds
+        } catch ( NumberFormatException e ) {
+            System.out.println("Retry count must be numeric.");
+            System.exit(1);
+        }
+
         DbAlive client = null;
         RC rc = RC.OK;
         try {

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java Thu Nov  5 19:23:05 2015
@@ -34,12 +34,14 @@ import com.datastax.driver.core.PlainTex
 import com.datastax.driver.core.Session;
 import com.datastax.driver.core.SimpleStatement;
 import com.datastax.driver.core.exceptions.AuthenticationException;
+import com.datastax.driver.core.exceptions.NoHostAvailableException;
 
 public class DbCreate
 {
     static final String DUCC_KEYSPACE = "ducc";
     static final String PASSWORD_KEY  = "db_password";
     static final String PASSWORD_FILE = "database.password";
+    static final int RETRY = 10;
 
     DuccLogger logger = null;
     String dburl;
@@ -47,10 +49,10 @@ public class DbCreate
     String adminpw = null;
 
     private Cluster cluster;
-    private Session session;
+    private Session session = null;
 
 
-    public DbCreate(String dburl, DuccLogger logger, String adminid, String adminpw)
+    DbCreate(String dburl, DuccLogger logger, String adminid, String adminpw)
     {
         this.dburl = dburl;
         this.logger = logger;
@@ -58,14 +60,21 @@ public class DbCreate
         this.adminpw = adminpw;
     }
 
-    public DbCreate(String dburl, String adminid, String adminpw)
+    DbCreate(String dburl, String adminid, String adminpw)
     {
         this.dburl = dburl;
         this.adminid = adminid;
         this.adminpw = adminpw;
     }
 
-    public void connect()
+    void close()
+    {
+        if ( cluster != null ) cluster.close();
+        session = null;
+        cluster = null;
+    }
+
+    boolean connect()
         throws Exception
     {
         String methodName = "connect";
@@ -75,47 +84,62 @@ public class DbCreate
             throw new IllegalArgumentException("DUCC_HOME must be set as a system property: -DDUCC_HOME=whatever");
         }
 
-        try {
-            // If we're here, we must first of all get rid of the cassandra su and set up our own
+        // If we're here, we must first of all get rid of the cassandra su and set up our own
 
-            AuthProvider auth = new PlainTextAuthProvider("cassandra", "cassandra");
-            cluster = Cluster.builder()
-                .withAuthProvider(auth)
-                .addContactPoint(dburl)
-                .build();
-
-            session = cluster.connect();
-            session.execute("CREATE USER IF NOT EXISTS " + adminid + " with password '" + adminpw + "' SUPERUSER");
-            cluster.close();
-            doLog(methodName, "Created user " + adminid);
-
-            Properties props = new Properties();
-            props.setProperty(PASSWORD_KEY, adminpw);
-            FileOutputStream fos = new FileOutputStream(dh + "/resources.private/" + PASSWORD_FILE);
-            props.store(fos, "Db private configuration");
-            fos.close();
-
-            auth = new PlainTextAuthProvider(adminid, adminpw);
-            cluster = Cluster.builder()
-                .withAuthProvider(auth)
-                .addContactPoint(dburl)
-                .build();
-            session = cluster.connect();
-   
-            String uglypw = UUID.randomUUID().toString();
-            session.execute("ALTER USER cassandra  with password '" + uglypw + "' NOSUPERUSER");
-            doLog(methodName, "Changed default super user's password and revoked its superuser authority.");
-            doLog(methodName, "From this point, this DB can only be accessed in super user mode by user 'ducc'");
+        AuthProvider auth = new PlainTextAuthProvider("cassandra", "cassandra");
+        for ( int i = 0; i < RETRY; i++ ) {
+            try {
+                cluster = Cluster.builder()
+                    .withAuthProvider(auth)
+                    .addContactPoint(dburl)
+                    .build();
+
+                session = cluster.connect();
+                session.execute("CREATE USER IF NOT EXISTS " + adminid + " with password '" + adminpw + "' SUPERUSER");
+                cluster.close();
+                doLog(methodName, "Created user " + adminid);                    
+
+                Properties props = new Properties();
+                props.setProperty(PASSWORD_KEY, adminpw);
+                FileOutputStream fos = new FileOutputStream(dh + "/resources.private/" + PASSWORD_FILE);
+                props.store(fos, "Db private configuration");
+                fos.close();
+                    
+                auth = new PlainTextAuthProvider(adminid, adminpw);
+                cluster = Cluster.builder()
+                    .withAuthProvider(auth)
+                    .addContactPoint(dburl)
+                    .build();
+                session = cluster.connect();
+                    
+                String uglypw = UUID.randomUUID().toString();
+                session.execute("ALTER USER cassandra  with password '" + uglypw + "' NOSUPERUSER");
+                doLog(methodName, "Changed default super user's password and revoked its superuser authority.");
+                doLog(methodName, "From this point, this DB can only be accessed in super user mode by user 'ducc'");
+                    
+                return true;
+            } catch ( NoHostAvailableException e ) {
+                doLog("Waiting for database to boot ...");
+                session = null;
+                cluster = null;
+            } catch ( AuthenticationException e ) {
+                doLog("Waiting for default authentication ...");
+                session = null;
+                cluster = null;
+            } catch ( Exception e ) {
+                doLog("Unknown problem contacting database.");
+                session = null;
+                cluster = null;
+                e.printStackTrace();
+                return false;
+            } 
+            Thread.sleep(3000);
+        }
             
-        } catch (AuthenticationException e ) {
-            // if we get here the default super user isn't working and we expect a valid id and password
-            AuthProvider auth = new PlainTextAuthProvider(adminid, adminpw);
-            cluster = Cluster.builder()
-                .withAuthProvider(auth)
-                .addContactPoint(dburl)
-                .build();
+        if ( cluster == null ) {
+            doLog(methodName, "Excessive retries.  Database may not be initialized.");
+            return false;
         }
-
         Metadata metadata = cluster.getMetadata();
         doLog(methodName, "Connected to cluster: %s\n", metadata.getClusterName());
         
@@ -123,18 +147,10 @@ public class DbCreate
             doLog(methodName, "Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
         } 
         session = cluster.connect();
+        return true;
     }
 
-    public void close() {
-        cluster.close();
-    }
-    
-    public Session getSession()
-    {
-        return this.session;
-    }
-
-    public void doLog(String methodName, Object ... msg)
+    void doLog(String methodName, Object ... msg)
     {        
         if ( logger == null ) {
 
@@ -155,20 +171,20 @@ public class DbCreate
 
     }
 
-    String mkTableCreate(String tableName, String[] fields)
-    {
-        int max = fields.length - 1;
-        int current = 0;
-        StringBuffer buf = new StringBuffer("CREATE TABLE IF NOT EXISTS ");
-        buf.append(tableName);
-        buf.append(" (");
-        for (String s : fields) {
-            buf.append(s);
-            if ( current++ < max) buf.append(", ");
-        }
-        buf.append(") WITH CLUSTERING ORDER BY (ducc_dbid desc)");
-        return buf.toString();                   
-    }
+    // String mkTableCreate(String tableName, String[] fields)
+    // {
+    //     int max = fields.length - 1;
+    //     int current = 0;
+    //     StringBuffer buf = new StringBuffer("CREATE TABLE IF NOT EXISTS ");
+    //     buf.append(tableName);
+    //     buf.append(" (");
+    //     for (String s : fields) {
+    //         buf.append(s);
+    //         if ( current++ < max) buf.append(", ");
+    //     }
+    //     buf.append(") WITH CLUSTERING ORDER BY (ducc_dbid desc)");
+    //     return buf.toString();                   
+    // }
 
     void createSchema()
     {
@@ -245,15 +261,17 @@ public class DbCreate
         DbCreate client = null;
         try {
             client = new DbCreate(args[0], args[1], args[2]);
-            client.connect();
-            client.createSchema();
+            if ( client.connect() ) {
+                client.createSchema();
+                client.close();
+            } else {
+                System.exit(1);
+            }
         } catch ( Throwable e ) {
             System.out.println("Errors creating database");
             e.printStackTrace();
             System.exit(1);
-        } finally {
-            if ( client != null ) client.close();
-        }
+        } 
 
         System.exit(0);
     }

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java Thu Nov  5 19:23:05 2015
@@ -97,11 +97,11 @@ public class DbLoader
     int servicelimit = Integer.MAX_VALUE;
     int registrylimit = Integer.MAX_VALUE;
 
-    boolean dojobs         = false;
-    boolean doreservations = false;
-    boolean doservices     = false;
+    boolean dojobs         = true;
+    boolean doreservations = true;
+    boolean doservices     = true;
     boolean doregistry     = true;
-    boolean docheckpoint   = false;
+    boolean docheckpoint   = true;
 
     long jobBytes = 0;
     long resBytes = 0;
@@ -110,13 +110,14 @@ public class DbLoader
 
     AtomicInteger skippedServices = new AtomicInteger(0);
 
-    public DbLoader(String from, String to)
+    public DbLoader(String from, String state_url)
         throws Exception
     {
     	//String methodName = "<ctr>";
+        this.state_url = state_url;
         DUCC_HOME = System.getProperty("DUCC_HOME");        
         if ( DUCC_HOME == null ) {
-            System.out.println("System proprety -DDUCC_HOME must be set.");
+            System.out.println("System property -DDUCC_HOME must be set.");
             System.exit(1);
         }
         
@@ -135,35 +136,7 @@ public class DbLoader
         serviceRegistry        = from + serviceRegistry;
         checkpointFile         = from + checkpointFile;
 
-        f = new File(to);
-        if ( ! f.isDirectory() ) {
-            System.out.println("'to' must be a directory");
-            System.exit(1);
-        }
-
-        String databasedir =  to + "/database/databases";
-
-        // We always use a non-networked version for loading
-        //state_url = "plocal:" + databasedir + "/DuccState";
-        state_url = "bluej538";
         System.setProperty("ducc.state.database.url", state_url);
-
-        if ( state_url.startsWith("plocal") ) {
-            f = new File(databasedir);
-            if ( !f.exists() ) {
-                try {            
-                    if ( ! f.mkdirs() ) {
-                        System.out.println("Cannot create database directory: " + databasedir);
-                        System.exit(1);
-                    }
-                    System.out.println("Created database directory " + databasedir);
-                } catch ( Exception e ) {
-                    System.out.println("Cannot create database directory: " + databasedir + ":" + e.toString());
-                    System.exit(1);
-                }
-            }
-        }
-
     }
 
     void closeStream(InputStream in)
@@ -580,15 +553,6 @@ public class DbLoader
         dbManager = new DbManager(state_url, logger);
         dbManager.init();
 
-//        DbCreate cr = new DbCreate(state_url, logger);
-//        if ( state_url.startsWith("plocal") ) {
-//            cr.createPlocalDatabase();
-//        } else {
-//            cr.createDatabase();
-//        }
-        
-
-
         if ( true ) {
             try {
 
@@ -662,11 +626,9 @@ public class DbLoader
             System.out.println("");
             System.out.println("Where:");
             System.out.println("   from");        
-            System.out.println("      is the DUCC_HOME you wish to convert");
+            System.out.println("      is the DUCC_HOME you wish to convert.");
             System.out.println("   to");
-            System.out.println("      is the DUCC_HOME contining the new database");
-            System.out.println("");
-            System.out.println("'from' and 'to' may be the same thing");
+            System.out.println("      is the datbase URL.");
             System.exit(1);
         }
 

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java Thu Nov  5 19:23:05 2015
@@ -91,7 +91,8 @@ public class RmStatePersistence
 
         StringBuffer buf = new StringBuffer("CREATE TABLE IF NOT EXISTS ducc." + RmProperty.TABLE_NAME.pname() + " (");
         buf.append(DbUtil.mkSchema(RmProperty.values()));
-        buf.append(")");    
+        buf.append(") WITH CLUSTERING ORDER BY (memory desc)");
+
         ret.add(new SimpleStatement(buf.toString()));
         return ret;
     }

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java Thu Nov  5 19:23:05 2015
@@ -1013,9 +1013,9 @@ class NodePool
         props.put(RmProperty.Name, nid.getName());
         props.put(RmProperty.Ip, nid.getIp());
         props.put(RmProperty.Nodepool, id);
-        props.put(RmProperty.Quantum, share_quantum);
+        props.put(RmProperty.Quantum, share_quantum / ( 1024*1024));
         
-        props.put(RmProperty.Memory       , m.getMemory());
+        props.put(RmProperty.Memory       , m.getMemory() / (1024*1024));
         props.put(RmProperty.ShareOrder  , m.getShareOrder());
         props.put(RmProperty.Blacklisted  , m.isBlacklisted());