You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2015/11/05 20:23:06 UTC
svn commit: r1712850 - in /uima/sandbox/uima-ducc/trunk: src/main/admin/
src/main/assembly/ src/main/resources/ src/main/resources/private/
uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/
uima-ducc-database/src/main/java/org/...
Author: challngr
Date: Thu Nov 5 19:23:05 2015
New Revision: 1712850
URL: http://svn.apache.org/viewvc?rev=1712850&view=rev
Log:
UIMA-4755 Scripting and build updates.
Added:
uima/sandbox/uima-ducc/trunk/src/main/admin/db_create (with props)
uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py
uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml (with props)
Removed:
uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml.prep
uima/sandbox/uima-ducc/trunk/src/main/resources/private/database.xml
Modified:
uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc
uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader
uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py
uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install
uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py
uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml
uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh
uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java
uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java
uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java
uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java
uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java
uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/check_ducc Thu Nov 5 19:23:05 2015
@@ -55,7 +55,7 @@ class CheckDucc(DuccUtil):
if ( self.db_bypass == True ):
return True
- ret = self.db_alive()
+ ret = self.db_alive(3)
if ( ret ):
print 'The database is running'
else:
Added: uima/sandbox/uima-ducc/trunk/src/main/admin/db_create
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/db_create?rev=1712850&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/db_create (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/db_create Thu Nov 5 19:23:05 2015
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# -----------------------------------------------------------------------
+
+
+import os
+import sys
+import getopt
+
+import shutil
+import subprocess
+from stat import *
+
+from ducc_util import DuccUtil
+
+
+from ducc_base import Properties
+from ducc_base import Property
+
+from ducc_base import find_ducc_home
+from ducc_base import find_localhost
+
+from ducc import Ducc
+
+import db_util as dbu
+
+#
+# Create and initialize the DUCC database. In a new installation this is handled by
+# ducc_post_install. Older installations need to run db_create as one of the steps
+# of migration to the DB.
+#
+class DbCreate(DuccUtil):
+
+ def usage(self, msg):
+
+ if ( msg != None ):
+ print ' '.join(msg)
+
+
+ print 'DbCreate configures the database and installs the schema.'
+ print ''
+ print "Usage:"
+ print " db_create [options]"
+ print " If no options prompts are given for expected parameters."
+ print ""
+ print "Options:"
+ print " [-n, --head-node] <ducc head node>"
+ print " This is the name of the host that will run the DUCC management processes."
+ print ""
+ print " [-, --db-password] <root password for database>"
+ print " This is the password DUCC uses to manage the database."
+ print ""
+ print " [-h, -? --help]"
+ print " Prints this message."
+ print ""
+ sys.exit(1)
+
+ def main(self, argv):
+
+ self.database_pw = None
+
+ try:
+ opts, args = getopt.getopt(argv, 'd:n:h?', ['db-password=', 'head-node=', 'help'])
+ except:
+ self.usage("Invalid arguments " + ' '.join(argv))
+
+
+ for ( o, a ) in opts:
+ if o in ('-n', '--head-node'):
+ self.ducc_head = a
+ if o in ('-d', '--db-password'):
+ self.database_pw = a
+ elif o in ('-h', '-?', '--help'):
+ self.usage(None)
+
+
+ # start with merged properties
+ self.merge_properties();
+
+ # configure the database for local system and initialize the schema
+ db_node = self.ducc_properties.get("ducc.head")
+ dbu.configure_database(self.DUCC_HOME, db_node, self.jvm, 'doodle')
+ return
+
+ # write update site.ducc.properties with db installation
+ self.ducc_site_properties.write(self.site_properties_name)
+
+ # remerge to insure it's all correct and ready to go
+ self.merge_properties();
+
+if __name__ == "__main__":
+
+ postinstall = DbCreate()
+ postinstall.main(sys.argv[1:])
Propchange: uima/sandbox/uima-ducc/trunk/src/main/admin/db_create
------------------------------------------------------------------------------
svn:executable = *
Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/db_loader Thu Nov 5 19:23:05 2015
@@ -32,7 +32,7 @@ import getopt
#
from ducc_util import DuccUtil
-class DbConvert(DuccUtil):
+class DbLoader(DuccUtil):
def __init__(self):
DuccUtil.__init__(self)
@@ -47,41 +47,44 @@ class DbConvert(DuccUtil):
print 'db_loader -i in -o out [--no-archive]'
print ''
print 'Where:'
+ print ' -d'
+ print ' Specifies the database must be dropped and reinitialized if it already exists.'
+ print ' If not specified, the correct schema must already exist. Object already in'
+ print ' the database are not overwritten.'
print ' -i in'
print ' Specifies the DUCC_HOME of the DUCC runtime to be moved into the database.'
- print ' -o out'
- print ' Specifies the DUCC_HOME of the DUCC runtime to contain the database.'
+ print ' -o dburl'
+ print ' Specifies the database URL to contact to store the items.'
print ' --noarchive'
print ' Suppresses archival of the input files.'
print ''
print 'Notes:'
- print ' 1. The output location is not verfied as a DUCC runtime. The database will be created'
- print ' in that location nonetheless. After it is created it may be manually moved to'
- print ' the intended runtime.'
- print ' 2. The "in" and "out" directories may be the same.'
- print ' 3. Archival consists of renaming relevent input directories and files by appending'
+ print ' 1. The database must be started before running this script. Because this is a bootstrap'
+ print ' script DUCC must be down. The database may be started using start_ducc -c db'
+ print ' 2. Archival consists of renaming relevent input directories and files by appending'
print ' ".archive" to their names. It is fully non-destructive and may be reversed by'
- print ' manually renaming them back to their original names.'
- print ' 4. If a database already exists in the specified "out" location, it is first dropped.'
- print ' The schema is built and the database is fully re-initialized.'
+ print ' manually renaming them back to their original names.'
sys.exit(1);
def main(self, argv):
+ drop = False
in_home = None
- out_home = None
+ out_url = None
archive = True
try:
- opts, args = getopt.getopt(argv, 'i:o:h?', ['no-archive'])
+ opts, args = getopt.getopt(argv, 'di:o:h?', ['no-archive'])
except:
self.usage("Invalid arguments", ' '.join(argv))
for ( o, a ) in opts:
if o in ('-i'):
in_home = a
+ elif o in ('-d'):
+ drop = True
elif o in ('-o'):
- out_home = a
+ out_url = a
elif o in ('--no-archive'):
archive = False
else:
@@ -90,27 +93,32 @@ class DbConvert(DuccUtil):
if ( in_home == None ):
self.usage("Missing input DUCC_HOME")
- if ( out_home == None ):
- self.usage("Missing output DUCC_HOME");
+ if ( out_url == None ):
+ self.usage("Missing database URL");
+
+
+ if ( not self.db_alive(3) ):
+ print "Database is not running or cannot be contacted."
+ return
+
+ DUCC_JVM_OPTS = '-DDUCC_HOME=' + self.DUCC_HOME
- # DDUCC_HOME=/home/challngr/ducc_runtime_db -Xmx4G -Dstorage.useWAL=true -Dtx.useLog=false -Dstorage.wal.syncOnPageFlush=false
- DUCC_JVM_OPTS = ''
- DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDUCC_HOME=' + self.DUCC_HOME
- DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -Xmx4G'
if ( not archive ):
DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDONT_ARCHIVE'
- CMD = [self.java(), DUCC_JVM_OPTS, 'org.apache.uima.ducc.database.DbLoader', in_home, out_home]
+ if ( drop ):
+ DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDROP_DATABASE'
+
+ CMD = [self.java(), DUCC_JVM_OPTS, 'org.apache.uima.ducc.database.DbLoader', in_home, out_url]
CMD = ' '.join(CMD)
- os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + "/lib/orientdb/*"
os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + "/resources"
- print os.environ['CLASSPATH']
+ print 'CLASSPATH', os.environ['CLASSPATH']
+ print 'Executing', CMD
os.system(CMD)
-
if __name__ == "__main__":
- console = DbConvert()
+ console = DbLoader()
console.main(sys.argv[1:])
Added: uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py?rev=1712850&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/db_util.py Thu Nov 5 19:23:05 2015
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+
+import os
+
+# common routines for ducc_post_install and db_create
+def addToCp(cp, lib):
+ return cp + ':' + lib
+
+
+def execute(CMD):
+ print CMD
+ return os.system(CMD)
+
+def stop_database(pidfile):
+ print "Stopping the dtabase."
+
+ CMD = ['kill', '-TERM', '`cat ' + pidfile + '`']
+ CMD = ' '.join(CMD)
+ execute(CMD)
+
+def manual_config(DUCC_HOME, DUCC_HEAD):
+ print ''
+ print 'To manually configure the database edit', DUCC_HOME + '/cassandra-server/conf/casssandra.yaml'
+ print 'to Insure every occurance of DUCC_HEAD is replaced with', DUCC_HEAD, 'and every occurance'
+ print 'of DUCC_HOME is replaced with', DUCC_HOME + '.'
+ print ''
+ print 'Note that one occurance of DUCC_HEAD will be quoted: you must preserve these quotes, e.g. as "' + DUCC_HEAD + '".'
+
+def configure_database(DUCC_HOME, DUCC_HEAD, java, db_pw):
+ # for cassandra:
+ # in ducc_runtime/cassandra-server/conf we need to update cassandra.yaml to establish
+ # the data directories and db connection addresses
+
+ # Note this is a bootstrap routine and doesn't try to use common code that may depend on
+ # things being initialized correctly.
+
+
+ if ( db_pw == None ):
+ db_pw = raw_input("Enter database password OR 'bypass' to bypass database support:")
+ if ( db_pw == '' ):
+ print "Must enter a DB password or 'bypass' to continue."
+ return False
+
+ if ( os.path.exists(DUCC_HOME + "/database/data") ):
+ print 'Database is already defined in', DUCC_HOME + '/database', '- not rebilding.'
+ return False
+
+
+ if ( db_pw == 'bypass' ):
+ print 'Database support will be bypassed'
+ return True
+
+ config = DUCC_HOME + '/cassandra-server/conf/cassandra.yaml'
+ esc_home = DUCC_HOME.replace("/", "\/") # for sed
+
+ # must configure the database node to be the same as the ducc head, and the database location
+ # to be DUCC_HEAD
+ ch_head = "sed -i.bak s'/DUCC_HEAD/" + DUCC_HEAD + "'/ " + config
+ if ( execute(ch_head) != 0 ):
+ print 'Could not configure', config + '. You may need to recover it from', config+'.bak.'
+ manual_config(DUCC_HOME, DUCC_HEAD)
+ return False
+
+ ch_home = "sed -i.bak s'/DUCC_HOME/" + esc_home + "'/ " + config
+ if ( execute(ch_home) != 0):
+ print 'Could not configure', config + '. You may need to recover it from', config+'.bak.'
+ manual_config(DUCC_HOME, DUCC_HEAD)
+ return False
+
+ here = os.getcwd()
+ os.chdir(DUCC_HOME + "/cassandra-server")
+ pidfile = DUCC_HOME + '/state/cassandra.pid'
+ CMD = "bin/cassandra -p "+ pidfile + " > /dev/null 2>&1";
+ os.system(CMD);
+ print "Database is started. Waiting for initialization";
+
+ # Now start the db and create the schema
+ CLASSPATH = ''
+ CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/cassandra/*')
+ CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/guava/*')
+ CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/apache-log4j/*')
+ CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/lib/uima-ducc/*')
+ CLASSPATH = addToCp(CLASSPATH, DUCC_HOME + '/apache-uima/apache-activemq/lib/*')
+ os.environ['CLASSPATH'] = CLASSPATH
+ print os.environ['CLASSPATH']
+
+ ret = True
+ CMD = [java, '-DDUCC_HOME=' + DUCC_HOME, 'org.apache.uima.ducc.database.DbCreate', DUCC_HEAD, 'ducc', db_pw]
+ CMD = ' '.join(CMD)
+ if ( execute(CMD) == 0 ):
+ print 'Database is initialized.'
+ else:
+ print 'Database started but the schema could not be defined. DB logs are in', DUCC_HEAD + '/cassandra-server/logs.'
+ ret = False
+
+ stop_database(pidfile)
+ return ret
Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/ducc.py Thu Nov 5 19:23:05 2015
@@ -37,16 +37,11 @@ class Ducc(DuccUtil):
def run_db(self):
print '-------- starting the database'
- if ( self.db_disabled ):
+ if ( self.db_bypass ):
print 'Database is disabled; not starting it.'
print 'OK'
return
- if ( not os.path.exists(self.DUCC_HOME + "/database/data" )):
- print 'Database is missing. You must initialize the database with DbCreate.'
- print 'NOTOK'
- return
-
# check for the pid to see if the DB is running.
if ( self.db_process_alive() ) :
print 'Database is already running.'
Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_post_install Thu Nov 5 19:23:05 2015
@@ -36,6 +36,7 @@ from ducc_base import find_localhost
from ducc_base import which
from ducc import Ducc
+import db_util as dbu
class PostInstall():
@@ -99,23 +100,26 @@ class PostInstall():
# Note this is a bootstrap routine and doesn't try to use common code that may depend on
# things being initialized correctly.
- return
+
if ( os.path.exists(self.DUCC_HOME + "/database/data") ):
print 'Database is already defined. Not configuring'
return
- db_pw = raw_input("Enter database password OR 'disable' to disable database support:")
+ db_pw = raw_input("Enter database password OR 'bypass' to disable database support:")
if ( db_pw == '' ):
print "Must enter a DB password to continue."
sys.exit(1);
- if ( db_pw == 'disable' ):
+ if ( db_pw == 'bypass' ):
print 'Database support will be disabled'
self.update_property('ducc.database.host', '--disabled--', '# Database support is disabled')
return;
else:
self.update_property('ducc.database.host', self.ducc_head, '# Database location')
+ dbu.configure_database(self.DUCC_HOME, self.ducc_head, self.path_to_java, db_pw)
+ return
+
config = self.DUCC_HOME + '/cassandra-server/conf/cassandra.yaml'
esc_home = self.DUCC_HOME.replace("/", "\/");
print "------ escaped home", esc_home
Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/ducc_util.py Thu Nov 5 19:23:05 2015
@@ -196,7 +196,7 @@ class DuccUtil(DuccBase):
return False
# contact the database and see how useful it seems to be
- def db_alive(self):
+ def db_alive(self, retry=10):
if ( self.db_bypass == True ):
return True
@@ -207,7 +207,7 @@ class DuccUtil(DuccBase):
# get our log4j config into the path to shut up noisy logging
os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + '/resources'
- CMD = [self.java(), 'org.apache.uima.ducc.database.DbAlive', dbnode, 'ducc', self.db_password]
+ CMD = [self.java(), 'org.apache.uima.ducc.database.DbAlive', dbnode, 'ducc', self.db_password, str(retry)]
CMD = ' '.join(CMD)
rc = os.system(CMD)
Modified: uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/assembly/bin.xml Thu Nov 5 19:23:05 2015
@@ -300,15 +300,29 @@ under the License.
<!-- CASSANDRA server -->
<fileSet>
- <directory>target/cassandra/apache-cassandra-2.1.11</directory>
+ <directory>target/cassandra/apache-cassandra-${cassandra.server.version}</directory>
<outputDirectory>cassandra-server</outputDirectory>
<excludes>
<exclude>javadac/*</exclude>
+ <exclude>conf/cassandra-env.sh</exclude>
+ <exclude>conf/cassandra.yaml</exclude>
</excludes>
<fileMode>755</fileMode>
<directoryMode>755</directoryMode>
</fileSet>
+ <fileSet>
+ <directory>src/main/resources</directory>
+ <!-- Two files that post_install or some hapless human needs to set up with local information -->
+ <outputDirectory>cassandra-server/conf</outputDirectory>
+ <includes>
+ <include>cassandra-env.sh</include>
+ <include>cassandra.yaml</include>
+ </includes>
+ <fileMode>755</fileMode>
+ <directoryMode>755</directoryMode>
+ </fileSet>
+
<!-- Unversioned CLI jar -->
<fileSet>
<directory>uima-ducc-cli/target</directory>
@@ -329,6 +343,8 @@ under the License.
<excludes>
<exclude>private/*</exclude>
<exclude>private</exclude>
+ <exclude>cassandra-env.sh</exclude>
+ <exclude>cassandra.yaml</exclude>
</excludes>
</fileSet>
Modified: uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra-env.sh Thu Nov 5 19:23:05 2015
@@ -94,6 +94,8 @@ jvmver=`echo "$java_ver_output" | grep '
JVM_VERSION=${jvmver%_*}
JVM_PATCH_VERSION=${jvmver#*_}
+# DUCC: These checks don't work with the IBM Jvm. Ducc requires a 'high-enough' level of JVM
+# so those checks will suffice.
#if [ "$JVM_VERSION" \< "1.7" ] ; then
# echo "Cassandra 2.0 and later require Java 7u25 or later."
# exit 1;
Added: uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml?rev=1712850&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml Thu Nov 5 19:23:05 2015
@@ -0,0 +1,813 @@
+# Cassandra storage config YAML
+
+#
+# This is prepared for configureation for DUCC via ducc_post_install
+#
+
+# NOTE:
+# See http://wiki.apache.org/cassandra/StorageConfiguration for
+# full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'DUCC'
+
+# This defines the number of tokens randomly assigned to this node on the ring
+# The more tokens, relative to other nodes, the larger the proportion of data
+# that this node will store. You probably want all nodes to have the same number
+# of tokens assuming they have equal hardware capability.
+#
+# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
+# and will use the initial_token as described below.
+#
+# Specifying initial_token will override this setting on the node's initial start,
+# on subsequent starts, this setting will apply even if initial token is set.
+#
+# If you already have a cluster with 1 token per node, and wish to migrate to
+# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
+num_tokens: 256
+
+# initial_token allows you to specify tokens manually. While you can use # it with
+# vnodes (num_tokens > 1, above) -- in which case you should provide a
+# comma-separated list -- it's primarily used when adding nodes # to legacy clusters
+# that do not have vnodes enabled.
+# initial_token:
+
+# See http://wiki.apache.org/cassandra/HintedHandoff
+# May either be "true" or "false" to enable globally, or contain a list
+# of data centers to enable per-datacenter.
+# hinted_handoff_enabled: DC1,DC2
+hinted_handoff_enabled: true
+# this defines the maximum amount of time a dead host will have hints
+# generated. After it has been dead this long, new hints for it will not be
+# created until it has been seen alive and gone down again.
+max_hint_window_in_ms: 10800000 # 3 hours
+# Maximum throttle in KBs per second, per delivery thread. This will be
+# reduced proportionally to the number of nodes in the cluster. (If there
+# are two nodes in the cluster, each delivery thread will use the maximum
+# rate; if there are three, each will throttle to half of the maximum,
+# since we expect two nodes to be delivering hints simultaneously.)
+hinted_handoff_throttle_in_kb: 1024
+# Number of threads with which to deliver hints;
+# Consider increasing this number when you have multi-dc deployments, since
+# cross-dc handoff tends to be slower
+max_hints_delivery_threads: 2
+
+# Maximum throttle in KBs per second, total. This will be
+# reduced proportionally to the number of nodes in the cluster.
+batchlog_replay_throttle_in_kb: 1024
+
+# Authentication backend, implementing IAuthenticator; used to identify users
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+# users. It keeps usernames and hashed passwords in system_auth.credentials table.
+# Please increase system_auth keyspace replication factor if you use this authenticator.
+authenticator: PasswordAuthenticator
+
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
+# increase system_auth keyspace replication factor if you use this authorizer.
+authorizer: AllowAllAuthorizer
+
+# Validity period for permissions cache (fetching permissions can be an
+# expensive operation depending on the authorizer, CassandraAuthorizer is
+# one example). Defaults to 2000, set to 0 to disable.
+# Will be disabled automatically for AllowAllAuthorizer.
+permissions_validity_in_ms: 2000
+
+# Refresh interval for permissions cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If permissions_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as permissions_validity_in_ms.
+# permissions_update_interval_in_ms: 1000
+
+# The partitioner is responsible for distributing groups of rows (by
+# partition key) across nodes in the cluster. You should leave this
+# alone for new clusters. The partitioner can NOT be changed without
+# reloading all data, so when upgrading you should set this to the
+# same partitioner you were already using.
+#
+# Besides Murmur3Partitioner, partitioners included for backwards
+# compatibility include RandomPartitioner, ByteOrderedPartitioner, and
+# OrderPreservingPartitioner.
+#
+partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+
+# Directories where Cassandra should store data on disk. Cassandra
+# will spread data evenly across them, subject to the granularity of
+# the configured compaction strategy.
+# If not set, the default directory is $CASSANDRA_HOME/data/data.
+data_file_directories:
+ - DUCC_HOME/database/data
+
+# commit log. when running on magnetic HDD, this should be a
+# separate spindle than the data directories.
+# If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
+commitlog_directory: DUCC_HOME/database/commitlog
+
+# policy for data disk failures:
+# die: shut down gossip and client transports and kill the JVM for any fs errors or
+# single-sstable errors, so the node can be replaced.
+# stop_paranoid: shut down gossip and client transports even for single-sstable errors,
+# kill the JVM for errors during startup.
+# stop: shut down gossip and client transports, leaving the node effectively dead, but
+# can still be inspected via JMX, kill the JVM for errors during startup.
+# best_effort: stop using the failed disk and respond to requests based on
+# remaining available sstables. This means you WILL see obsolete
+# data at CL.ONE!
+# ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
+disk_failure_policy: stop
+
+# policy for commit disk failures:
+# die: shut down gossip and Thrift and kill the JVM, so the node can be replaced.
+# stop: shut down gossip and Thrift, leaving the node effectively dead, but
+# can still be inspected via JMX.
+# stop_commit: shutdown the commit log, letting writes collect but
+# continuing to service reads, as in pre-2.0.5 Cassandra
+# ignore: ignore fatal errors and let the batches fail
+commit_failure_policy: stop
+
+# Maximum size of the key cache in memory.
+#
+# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
+# minimum, sometimes more. The key cache is fairly tiny for the amount of
+# time it saves, so it's worthwhile to use it at large numbers.
+# The row cache saves even more time, but must contain the entire row,
+# so it is extremely space-intensive. It's best to only use the
+# row cache if you have hot rows or static rows.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
+key_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the key cache. Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 14400 or 4 hours.
+key_cache_save_period: 14400
+
+# Number of keys from the key cache to save
+# Disabled by default, meaning all keys are going to be saved
+# key_cache_keys_to_save: 100
+
+# Maximum size of the row cache in memory.
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is 0, to disable row caching.
+row_cache_size_in_mb: 0
+
+# Duration in seconds after which Cassandra should
+# save the row cache. Caches are saved to saved_caches_directory as specified
+# in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 0 to disable saving the row cache.
+row_cache_save_period: 0
+
+# Number of keys from the row cache to save
+# Disabled by default, meaning all keys are going to be saved
+# row_cache_keys_to_save: 100
+
+# Maximum size of the counter cache in memory.
+#
+# Counter cache helps to reduce counter locks' contention for hot counter cells.
+# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
+# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
+# of the lock hold, helping with hot counter cell updates, but will not allow skipping
+# the read entirely. Only the local (clock, count) tuple of a counter cell is kept
+# in memory, not the whole counter, so it's relatively cheap.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
+# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
+counter_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the counter cache (keys only). Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Default is 7200 or 2 hours.
+counter_cache_save_period: 7200
+
+# Number of keys from the counter cache to save
+# Disabled by default, meaning all keys are going to be saved
+# counter_cache_keys_to_save: 100
+
+# The off-heap memory allocator. Affects storage engine metadata as
+# well as caches. Experiments show that JEMAlloc saves some memory
+# than the native GCC allocator (i.e., JEMalloc is more
+# fragmentation-resistant).
+#
+# Supported values are: NativeAllocator, JEMallocAllocator
+#
+# If you intend to use JEMallocAllocator you have to install JEMalloc as library and
+# modify cassandra-env.sh as directed in the file.
+#
+# Defaults to NativeAllocator
+# memory_allocator: NativeAllocator
+
+# saved caches
+# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
+saved_caches_directory: DUCC_HOME/database/saved_caches
+
+# commitlog_sync may be either "periodic" or "batch."
+#
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been fsynced to disk. It will wait
+# commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
+# This window should be kept short because the writer threads will
+# be unable to do extra work while waiting. (You may need to increase
+# concurrent_writes for the same reason.)
+#
+# commitlog_sync: batch
+# commitlog_sync_batch_window_in_ms: 2
+#
+# the other option is "periodic" where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds.
+commitlog_sync: periodic
+commitlog_sync_period_in_ms: 10000
+
+# The size of the individual commitlog file segments. A commitlog
+# segment may be archived, deleted, or recycled once all the data
+# in it (potentially from each columnfamily in the system) has been
+# flushed to sstables.
+#
+# The default size is 32, which is almost always fine, but if you are
+# archiving commitlog segments (see commitlog_archiving.properties),
+# then you probably want a finer granularity of archiving; 8 or 16 MB
+# is reasonable.
+commitlog_segment_size_in_mb: 32
+
+# Reuse commit log files when possible. The default is false, and this
+# feature will be removed entirely in future versions of Cassandra.
+#commitlog_segment_recycling: false
+
+# any class that implements the SeedProvider interface and has a
+# constructor that takes a Map<String, String> of parameters will do.
+seed_provider:
+ # Addresses of hosts that are deemed contact points.
+ # Cassandra nodes use this list of hosts to find each other and learn
+ # the topology of the ring. You must change this if you are running
+ # multiple nodes!
+ - class_name: org.apache.cassandra.locator.SimpleSeedProvider
+ parameters:
+ # seeds is actually a comma-delimited list of addresses.
+ # Ex: "<ip1>,<ip2>,<ip3>"
+ - seeds: "DUCC_HEAD"
+
+# For workloads with more data than can fit in memory, Cassandra's
+# bottleneck will be reads that need to fetch data from
+# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
+# order to allow the operations to enqueue low enough in the stack
+# that the OS and drives can reorder them. Same applies to
+# "concurrent_counter_writes", since counter writes read the current
+# values before incrementing and writing them back.
+#
+# On the other hand, since writes are almost never IO bound, the ideal
+# number of "concurrent_writes" is dependent on the number of cores in
+# your system; (8 * number_of_cores) is a good rule of thumb.
+concurrent_reads: 32
+concurrent_writes: 32
+concurrent_counter_writes: 32
+
+# Total memory to use for sstable-reading buffers. Defaults to
+# the smaller of 1/4 of heap or 512MB.
+# file_cache_size_in_mb: 512
+
+# Total permitted memory to use for memtables. Cassandra will stop
+# accepting writes when the limit is exceeded until a flush completes,
+# and will trigger a flush based on memtable_cleanup_threshold
+# If omitted, Cassandra will set both to 1/4 the size of the heap.
+# memtable_heap_space_in_mb: 2048
+# memtable_offheap_space_in_mb: 2048
+
+# Ratio of occupied non-flushing memtable size to total permitted size
+# that will trigger a flush of the largest memtable. Lager mct will
+# mean larger flushes and hence less compaction, but also less concurrent
+# flush activity which can make it difficult to keep your disks fed
+# under heavy write load.
+#
+# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
+# memtable_cleanup_threshold: 0.11
+
+# Specify the way Cassandra allocates and manages memtable memory.
+# Options are:
+# heap_buffers: on heap nio buffers
+# offheap_buffers: off heap (direct) nio buffers
+# offheap_objects: native memory, eliminating nio buffer heap overhead
+memtable_allocation_type: heap_buffers
+
+# Total space to use for commitlogs. Since commitlog segments are
+# mmapped, and hence use up address space, the default size is 32
+# on 32-bit JVMs, and 8192 on 64-bit JVMs.
+#
+# If space gets above this value (it will round up to the next nearest
+# segment multiple), Cassandra will flush every dirty CF in the oldest
+# segment and remove it. So a small total commitlog space will tend
+# to cause more flush activity on less-active columnfamilies.
+# commitlog_total_space_in_mb: 8192
+
+# This sets the amount of memtable flush writer threads. These will
+# be blocked by disk io, and each one will hold a memtable in memory
+# while blocked.
+#
+# memtable_flush_writers defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+#
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+#memtable_flush_writers: 8
+
+# A fixed memory pool size in MB for for SSTable index summaries. If left
+# empty, this will default to 5% of the heap size. If the memory usage of
+# all index summaries exceeds this limit, SSTables with low read rates will
+# shrink their index summaries in order to meet this limit. However, this
+# is a best-effort process. In extreme conditions Cassandra may need to use
+# more than this amount of memory.
+index_summary_capacity_in_mb:
+
+# How frequently index summaries should be resampled. This is done
+# periodically to redistribute memory from the fixed-size pool to sstables
+# proportional their recent read rates. Setting to -1 will disable this
+# process, leaving existing index summaries at their current sampling level.
+index_summary_resize_interval_in_minutes: 60
+
+# Whether to, when doing sequential writing, fsync() at intervals in
+# order to force the operating system to flush the dirty
+# buffers. Enable this to avoid sudden dirty buffer flushing from
+# impacting read latencies. Almost always a good idea on SSDs; not
+# necessarily on platters.
+trickle_fsync: false
+trickle_fsync_interval_in_kb: 10240
+
+# TCP port, for commands and data
+# For security reasons, you should not expose this port to the internet. Firewall it if needed.
+storage_port: 7000
+
+# SSL port, for encrypted communication. Unused unless enabled in
+# encryption_options
+# For security reasons, you should not expose this port to the internet. Firewall it if needed.
+ssl_storage_port: 7001
+
+# Address or interface to bind to and tell other Cassandra nodes to connect to.
+# You _must_ change this if you want multiple nodes to be able to communicate!
+#
+# Set listen_address OR listen_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing _if_ the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be).
+#
+# Setting listen_address to 0.0.0.0 is always wrong.
+#
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+listen_address: DUCC_HEAD
+# listen_interface: eth0
+# listen_interface_prefer_ipv6: false
+
+# Address to broadcast to other Cassandra nodes
+# Leaving this blank will set it to the same value as listen_address
+# broadcast_address: 1.2.3.4
+
+# Internode authentication backend, implementing IInternodeAuthenticator;
+# used to allow/disallow connections from peer nodes.
+# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
+
+# Whether to start the native transport server.
+# Please note that the address on which the native transport is bound is the
+# same as the rpc_address. The port however is different and specified below.
+start_native_transport: true
+# port for the CQL native transport to listen for clients on
+# For security reasons, you should not expose this port to the internet. Firewall it if needed.
+native_transport_port: 9042
+# The maximum threads for handling requests when the native transport is used.
+# This is similar to rpc_max_threads though the default differs slightly (and
+# there is no native_transport_min_threads, idle threads will always be stopped
+# after 30 seconds).
+# native_transport_max_threads: 128
+#
+# The maximum size of allowed frame. Frame (requests) larger than this will
+# be rejected as invalid. The default is 256MB.
+# native_transport_max_frame_size_in_mb: 256
+
+# The maximum number of concurrent client connections.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections: -1
+
+# The maximum number of concurrent client connections per source ip.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections_per_ip: -1
+
+# Whether to start the thrift rpc server.
+start_rpc: true
+
+# The address or interface to bind the Thrift RPC service and native transport
+# server to.
+#
+# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+#
+# Leaving rpc_address blank has the same effect as on listen_address
+# (i.e. it will be based on the configured hostname of the node).
+#
+# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
+# set broadcast_rpc_address to a value other than 0.0.0.0.
+#
+# For security reasons, you should not expose this port to the internet. Firewall it if needed.
+#
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+rpc_address: DUCC_HEAD
+# rpc_interface: eth1
+# rpc_interface_prefer_ipv6: false
+
+# port for Thrift to listen for clients on
+rpc_port: 9160
+
+# RPC address to broadcast to drivers and other Cassandra nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+# broadcast_rpc_address: 1.2.3.4
+
+# enable or disable keepalive on rpc/native connections
+rpc_keepalive: true
+
+# Cassandra provides two out-of-the-box options for the RPC Server:
+#
+# sync -> One thread per thrift connection. For a very large number of clients, memory
+# will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
+# per thread, and that will correspond to your use of virtual memory (but physical memory
+# may be limited depending on use of stack space).
+#
+# hsha -> Stands for "half synchronous, half asynchronous." All thrift clients are handled
+# asynchronously using a small number of threads that does not vary with the amount
+# of thrift clients (and thus scales well to many clients). The rpc requests are still
+# synchronous (one thread per active request). If hsha is selected then it is essential
+# that rpc_max_threads is changed from the default value of unlimited.
+#
+# The default is sync because on Windows hsha is about 30% slower. On Linux,
+# sync/hsha performance is about the same, with hsha of course using less memory.
+#
+# Alternatively, can provide your own RPC server by providing the fully-qualified class name
+# of an o.a.c.t.TServerFactory that can create an instance of it.
+rpc_server_type: sync
+
+# Uncomment rpc_min|max_thread to set request pool size limits.
+#
+# Regardless of your choice of RPC server (see above), the number of maximum requests in the
+# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
+# RPC server, it also dictates the number of clients that can be connected at all).
+#
+# The default is unlimited and thus provides no protection against clients overwhelming the server. You are
+# encouraged to set a maximum that makes sense for you in production, but do keep in mind that
+# rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
+#
+# rpc_min_threads: 16
+# rpc_max_threads: 2048
+
+# uncomment to set socket buffer sizes on rpc connections
+# rpc_send_buff_size_in_bytes:
+# rpc_recv_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# See:
+# /proc/sys/net/core/wmem_max
+# /proc/sys/net/core/rmem_max
+# /proc/sys/net/ipv4/tcp_wmem
+# /proc/sys/net/ipv4/tcp_wmem
+# and: man tcp
+# internode_send_buff_size_in_bytes:
+# internode_recv_buff_size_in_bytes:
+
+# Frame size for thrift (maximum message length).
+thrift_framed_transport_size_in_mb: 15
+
+# Set to true to have Cassandra create a hard link to each sstable
+# flushed or streamed locally in a backups/ subdirectory of the
+# keyspace data. Removing these links is the operator's
+# responsibility.
+incremental_backups: false
+
+# Whether or not to take a snapshot before each compaction. Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you. Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# Whether or not a snapshot is taken of the data before keyspace truncation
+# or dropping of column families. The STRONGLY advised default of true
+# should be used to provide data safety. If you set this flag to false, you will
+# lose data on truncation or drop.
+auto_snapshot: true
+
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway. These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
+
+# Granularity of the collation index of rows within a partition.
+# Increase if your rows are large, or if you have a very large
+# number of rows per partition. The competing goals are these:
+# 1) a smaller granularity means more index entries are generated
+# and looking up rows withing the partition by collation column
+# is faster
+# 2) but, Cassandra will keep the collation index in memory for hot
+# rows (as part of the key cache), so a larger granularity means
+# you can cache more hot rows
+column_index_size_in_kb: 64
+
+
+# Log WARN on any batch size exceeding this value. 5kb per batch by default.
+# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
+batch_size_warn_threshold_in_kb: 5
+
+# Number of simultaneous compactions to allow, NOT including
+# validation "compactions" for anti-entropy repair. Simultaneous
+# compactions can help preserve read performance in a mixed read/write
+# workload, by mitigating the tendency of small sstables to accumulate
+# during a single long running compactions. The default is usually
+# fine and if you experience problems with compaction running too
+# slowly or too fast, you should look at
+# compaction_throughput_mb_per_sec first.
+#
+# concurrent_compactors defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+#
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+#concurrent_compactors: 1
+
+# Throttles compaction to the given total throughput across the entire
+# system. The faster you insert data, the faster you need to compact in
+# order to keep the sstable count down, but in general, setting this to
+# 16 to 32 times the rate you are inserting data is more than sufficient.
+# Setting this to 0 disables throttling. Note that this account for all types
+# of compaction, including validation compaction.
+compaction_throughput_mb_per_sec: 16
+
+# Log a warning when compacting partitions larger than this value
+compaction_large_partition_warning_threshold_mb: 100
+
+# When compacting, the replacement sstable(s) can be opened before they
+# are completely written, and used in place of the prior sstables for
+# any range that has been written. This helps to smoothly transfer reads
+# between the sstables, reducing page cache churn and keeping hot rows hot
+sstable_preemptive_open_interval_in_mb: 50
+
+# Throttles all outbound streaming file transfers on this node to the
+# given total throughput in Mbps. This is necessary because Cassandra does
+# mostly sequential IO when streaming data during bootstrap or repair, which
+# can lead to saturating the network connection and degrading rpc performance.
+# When unset, the default is 200 Mbps or 25 MB/s.
+# stream_throughput_outbound_megabits_per_sec: 200
+
+# Throttles all streaming file transfer between the datacenters,
+# this setting allows users to throttle inter dc stream throughput in addition
+# to throttling all network stream traffic as configured with
+# stream_throughput_outbound_megabits_per_sec
+# inter_dc_stream_throughput_outbound_megabits_per_sec:
+
+# How long the coordinator should wait for read operations to complete
+read_request_timeout_in_ms: 5000
+# How long the coordinator should wait for seq or index scans to complete
+range_request_timeout_in_ms: 10000
+# How long the coordinator should wait for writes to complete
+write_request_timeout_in_ms: 2000
+# How long the coordinator should wait for counter writes to complete
+counter_write_request_timeout_in_ms: 5000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row
+cas_contention_timeout_in_ms: 1000
+# How long the coordinator should wait for truncates to complete
+# (This can be much longer, because unless auto_snapshot is disabled
+# we need to flush first so we can snapshot before removing the data.)
+truncate_request_timeout_in_ms: 60000
+# The default timeout for other, miscellaneous operations
+request_timeout_in_ms: 10000
+
+# Enable operation timeout information exchange between nodes to accurately
+# measure request timeouts. If disabled, replicas will assume that requests
+# were forwarded to them instantly by the coordinator, which means that
+# under overload conditions we will waste that much extra time processing
+# already-timed-out requests.
+#
+# Warning: before enabling this property make sure to ntp is installed
+# and the times are synchronized between the nodes.
+cross_node_timeout: false
+
+# Enable socket timeout for streaming operation.
+# When a timeout occurs during streaming, streaming is retried from the start
+# of the current file. This _can_ involve re-streaming an important amount of
+# data, so you should avoid setting the value too low.
+# Default value is 3600000, which means streams timeout after an hour.
+# streaming_socket_timeout_in_ms: 3600000
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+# phi_convict_threshold: 8
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch. The snitch has two functions:
+# - it teaches Cassandra enough about your network topology to route
+# requests efficiently
+# - it allows Cassandra to spread replicas around your cluster to avoid
+# correlated failures. It does this by grouping machines into
+# "datacenters" and "racks." Cassandra will do its best not to have
+# more than one replica on the same "rack" (which may not actually
+# be a physical location)
+#
+# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
+# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
+# ARE PLACED.
+#
+# Out of the box, Cassandra provides
+# - SimpleSnitch:
+# Treats Strategy order as proximity. This can improve cache
+# locality when disabling read repair. Only appropriate for
+# single-datacenter deployments.
+# - GossipingPropertyFileSnitch
+# This should be your go-to snitch for production use. The rack
+# and datacenter for the local node are defined in
+# cassandra-rackdc.properties and propagated to other nodes via
+# gossip. If cassandra-topology.properties exists, it is used as a
+# fallback, allowing migration from the PropertyFileSnitch.
+# - PropertyFileSnitch:
+# Proximity is determined by rack and data center, which are
+# explicitly configured in cassandra-topology.properties.
+# - Ec2Snitch:
+# Appropriate for EC2 deployments in a single Region. Loads Region
+# and Availability Zone information from the EC2 API. The Region is
+# treated as the datacenter, and the Availability Zone as the rack.
+# Only private IPs are used, so this will not work across multiple
+# Regions.
+# - Ec2MultiRegionSnitch:
+# Uses public IPs as broadcast_address to allow cross-region
+# connectivity. (Thus, you should set seed addresses to the public
+# IP as well.) You will need to open the storage_port or
+# ssl_storage_port on the public IP firewall. (For intra-Region
+# traffic, Cassandra will switch to the private IP after
+# establishing a connection.)
+# - RackInferringSnitch:
+# Proximity is determined by rack and data center, which are
+# assumed to correspond to the 3rd and 2nd octet of each node's IP
+# address, respectively. Unless this happens to match your
+# deployment conventions, this is best used as an example of
+# writing a custom Snitch class and is provided in that spirit.
+#
+# You can use a custom Snitch by setting this to the full class name
+# of the snitch, which will be assumed to be on your classpath.
+endpoint_snitch: SimpleSnitch
+
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it. This is
+# expressed as a double which represents a percentage. Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 0.1
+
+# request_scheduler -- Set this to a class that implements
+# RequestScheduler, which will schedule incoming client requests
+# according to the specific policy. This is useful for multi-tenancy
+# with a single Cassandra cluster.
+# NOTE: This is specifically for requests from the client and does
+# not affect inter node communication.
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
+# client requests to a node with a separate queue for each
+# request_scheduler_id. The scheduler is further customized by
+# request_scheduler_options as described below.
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+
+# Scheduler Options vary based on the type of scheduler
+# NoScheduler - Has no options
+# RoundRobin
+# - throttle_limit -- The throttle_limit is the number of in-flight
+# requests per client. Requests beyond
+# that limit are queued up until
+# running requests can complete.
+# The value of 80 here is twice the number of
+# concurrent_reads + concurrent_writes.
+# - default_weight -- default_weight is optional and allows for
+# overriding the default which is 1.
+# - weights -- Weights are optional and will default to 1 or the
+# overridden default_weight. The weight translates into how
+# many requests are handled during each turn of the
+# RoundRobin, based on the scheduler id.
+#
+# request_scheduler_options:
+# throttle_limit: 80
+# default_weight: 5
+# weights:
+# Keyspace1: 1
+# Keyspace2: 5
+
+# request_scheduler_id -- An identifier based on which to perform
+# the request scheduling. Currently the only valid option is keyspace.
+# request_scheduler_id: keyspace
+
+# Enable or disable inter-node encryption
+# Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
+# users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
+# suite for authentication, key exchange and encryption of the actual data transfers.
+# Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode.
+# NOTE: No custom encryption options are enabled at the moment
+# The available internode options are : all, none, dc, rack
+#
+# If set to dc cassandra will encrypt the traffic between the DCs
+# If set to rack cassandra will encrypt the traffic between the racks
+#
+# The passwords used in these options must match the passwords used when generating
+# the keystore and truststore. For instructions on generating these files, see:
+# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+#
+server_encryption_options:
+ internode_encryption: none
+ keystore: conf/.keystore
+ keystore_password: cassandra
+ truststore: conf/.truststore
+ truststore_password: cassandra
+ # More advanced defaults below:
+ # protocol: TLS
+ # algorithm: SunX509
+ # store_type: JKS
+ # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+ # require_client_auth: false
+
+# enable or disable client/server encryption.
+client_encryption_options:
+ enabled: false
+ keystore: conf/.keystore
+ keystore_password: cassandra
+ # require_client_auth: false
+ # Set trustore and truststore_password if require_client_auth is true
+ # truststore: conf/.truststore
+ # truststore_password: cassandra
+ # More advanced defaults below:
+ # protocol: TLS
+ # algorithm: SunX509
+ # store_type: JKS
+ # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+
+# internode_compression controls whether traffic between nodes is
+# compressed.
+# can be: all - all traffic is compressed
+# dc - traffic between different datacenters is compressed
+# none - nothing is compressed.
+internode_compression: all
+
+# Enable or disable tcp_nodelay for inter-dc communication.
+# Disabling it will result in larger (but fewer) network packets being sent,
+# reducing overhead from the TCP protocol itself, at the cost of increasing
+# latency if you block for cross-datacenter responses.
+inter_dc_tcp_nodelay: false
+
+# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
+# Adjust the threshold based on your application throughput requirement
+# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
+# gc_warn_threshold_in_ms: 1000
Propchange: uima/sandbox/uima-ducc/trunk/src/main/resources/cassandra.yaml
------------------------------------------------------------------------------
svn:executable = *
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/rm/IRmPersistence.java Thu Nov 5 19:23:05 2015
@@ -121,11 +121,25 @@ public interface IRmPersistence
public boolean isPrivate() { return true;}
public boolean isMeta() { return true;}
},
+ Memory {
+ public String pname() { return "memory"; }
+ public Type type() { return Type.Integer; }
+ public boolean isPrimaryKey() { return true;}
+ },
+ Nodepool {
+ public String pname() { return "nodepool"; }
+ public Type type() { return Type.String; }
+ public boolean isPrimaryKey() { return true;}
+ },
Name {
public String pname() { return "name"; }
public Type type() { return Type.String; }
public boolean isPrimaryKey() { return true;}
-
+ },
+ SharesLeft {
+ public String pname() { return "shares_left"; }
+ public Type type() { return Type.Integer; }
+ public boolean isPrimaryKey() { return true;}
},
Responsive{
public String pname() { return "responsive"; }
@@ -139,18 +153,10 @@ public interface IRmPersistence
public String pname() { return "ip"; }
public Type type() { return Type.String; }
},
- Nodepool {
- public String pname() { return "nodepool"; }
- public Type type() { return Type.String; }
- },
Quantum {
public String pname() { return "quantum"; }
public Type type() { return Type.Integer; }
},
- Memory {
- public String pname() { return "memory"; }
- public Type type() { return Type.Integer; }
- },
ShareOrder {
public String pname() { return "share_order"; }
public Type type() { return Type.Integer; }
@@ -167,10 +173,6 @@ public interface IRmPersistence
public String pname() { return "heartbeats"; }
public Type type() { return Type.Integer; }
},
- SharesLeft {
- public String pname() { return "shares_left"; }
- public Type type() { return Type.Integer; }
- },
Assignments {
public String pname() { return "assignments"; }
public Type type() { return Type.Integer; }
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbAlive.java Thu Nov 5 19:23:05 2015
@@ -131,17 +131,24 @@ public class DbAlive
static void usage()
{
- System.out.println("Usage: DbAlive database_url id pw");
+ System.out.println("Usage: DbAlive database_url id pw retry-count");
System.exit(1);
}
public static void main(String[] args)
{
- if ( args.length != 3 ) {
+ if ( args.length != 4 ) {
usage();
}
- int max = 10; // we'll wait up to 60 seconds: 20 x 3 seconds
+ int max = 0;
+ try {
+ max = Integer.parseInt(args[3]); // we'll wait up to 60 seconds: 20 x 3 seconds
+ } catch ( NumberFormatException e ) {
+ System.out.println("Retry count must be numeric.");
+ System.exit(1);
+ }
+
DbAlive client = null;
RC rc = RC.OK;
try {
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbCreate.java Thu Nov 5 19:23:05 2015
@@ -34,12 +34,14 @@ import com.datastax.driver.core.PlainTex
import com.datastax.driver.core.Session;
import com.datastax.driver.core.SimpleStatement;
import com.datastax.driver.core.exceptions.AuthenticationException;
+import com.datastax.driver.core.exceptions.NoHostAvailableException;
public class DbCreate
{
static final String DUCC_KEYSPACE = "ducc";
static final String PASSWORD_KEY = "db_password";
static final String PASSWORD_FILE = "database.password";
+ static final int RETRY = 10;
DuccLogger logger = null;
String dburl;
@@ -47,10 +49,10 @@ public class DbCreate
String adminpw = null;
private Cluster cluster;
- private Session session;
+ private Session session = null;
- public DbCreate(String dburl, DuccLogger logger, String adminid, String adminpw)
+ DbCreate(String dburl, DuccLogger logger, String adminid, String adminpw)
{
this.dburl = dburl;
this.logger = logger;
@@ -58,14 +60,21 @@ public class DbCreate
this.adminpw = adminpw;
}
- public DbCreate(String dburl, String adminid, String adminpw)
+ DbCreate(String dburl, String adminid, String adminpw)
{
this.dburl = dburl;
this.adminid = adminid;
this.adminpw = adminpw;
}
- public void connect()
+ void close()
+ {
+ if ( cluster != null ) cluster.close();
+ session = null;
+ cluster = null;
+ }
+
+ boolean connect()
throws Exception
{
String methodName = "connect";
@@ -75,47 +84,62 @@ public class DbCreate
throw new IllegalArgumentException("DUCC_HOME must be set as a system property: -DDUCC_HOME=whatever");
}
- try {
- // If we're here, we must first of all get rid of the cassandra su and set up our own
+ // If we're here, we must first of all get rid of the cassandra su and set up our own
- AuthProvider auth = new PlainTextAuthProvider("cassandra", "cassandra");
- cluster = Cluster.builder()
- .withAuthProvider(auth)
- .addContactPoint(dburl)
- .build();
-
- session = cluster.connect();
- session.execute("CREATE USER IF NOT EXISTS " + adminid + " with password '" + adminpw + "' SUPERUSER");
- cluster.close();
- doLog(methodName, "Created user " + adminid);
-
- Properties props = new Properties();
- props.setProperty(PASSWORD_KEY, adminpw);
- FileOutputStream fos = new FileOutputStream(dh + "/resources.private/" + PASSWORD_FILE);
- props.store(fos, "Db private configuration");
- fos.close();
-
- auth = new PlainTextAuthProvider(adminid, adminpw);
- cluster = Cluster.builder()
- .withAuthProvider(auth)
- .addContactPoint(dburl)
- .build();
- session = cluster.connect();
-
- String uglypw = UUID.randomUUID().toString();
- session.execute("ALTER USER cassandra with password '" + uglypw + "' NOSUPERUSER");
- doLog(methodName, "Changed default super user's password and revoked its superuser authority.");
- doLog(methodName, "From this point, this DB can only be accessed in super user mode by user 'ducc'");
+ AuthProvider auth = new PlainTextAuthProvider("cassandra", "cassandra");
+ for ( int i = 0; i < RETRY; i++ ) {
+ try {
+ cluster = Cluster.builder()
+ .withAuthProvider(auth)
+ .addContactPoint(dburl)
+ .build();
+
+ session = cluster.connect();
+ session.execute("CREATE USER IF NOT EXISTS " + adminid + " with password '" + adminpw + "' SUPERUSER");
+ cluster.close();
+ doLog(methodName, "Created user " + adminid);
+
+ Properties props = new Properties();
+ props.setProperty(PASSWORD_KEY, adminpw);
+ FileOutputStream fos = new FileOutputStream(dh + "/resources.private/" + PASSWORD_FILE);
+ props.store(fos, "Db private configuration");
+ fos.close();
+
+ auth = new PlainTextAuthProvider(adminid, adminpw);
+ cluster = Cluster.builder()
+ .withAuthProvider(auth)
+ .addContactPoint(dburl)
+ .build();
+ session = cluster.connect();
+
+ String uglypw = UUID.randomUUID().toString();
+ session.execute("ALTER USER cassandra with password '" + uglypw + "' NOSUPERUSER");
+ doLog(methodName, "Changed default super user's password and revoked its superuser authority.");
+ doLog(methodName, "From this point, this DB can only be accessed in super user mode by user 'ducc'");
+
+ return true;
+ } catch ( NoHostAvailableException e ) {
+ doLog("Waiting for database to boot ...");
+ session = null;
+ cluster = null;
+ } catch ( AuthenticationException e ) {
+ doLog("Waiting for default authentication ...");
+ session = null;
+ cluster = null;
+ } catch ( Exception e ) {
+ doLog("Unknown problem contacting database.");
+ session = null;
+ cluster = null;
+ e.printStackTrace();
+ return false;
+ }
+ Thread.sleep(3000);
+ }
- } catch (AuthenticationException e ) {
- // if we get here the default super user isn't working and we expect a valid id and password
- AuthProvider auth = new PlainTextAuthProvider(adminid, adminpw);
- cluster = Cluster.builder()
- .withAuthProvider(auth)
- .addContactPoint(dburl)
- .build();
+ if ( cluster == null ) {
+ doLog(methodName, "Excessive retries. Database may not be initialized.");
+ return false;
}
-
Metadata metadata = cluster.getMetadata();
doLog(methodName, "Connected to cluster: %s\n", metadata.getClusterName());
@@ -123,18 +147,10 @@ public class DbCreate
doLog(methodName, "Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
}
session = cluster.connect();
+ return true;
}
- public void close() {
- cluster.close();
- }
-
- public Session getSession()
- {
- return this.session;
- }
-
- public void doLog(String methodName, Object ... msg)
+ void doLog(String methodName, Object ... msg)
{
if ( logger == null ) {
@@ -155,20 +171,20 @@ public class DbCreate
}
- String mkTableCreate(String tableName, String[] fields)
- {
- int max = fields.length - 1;
- int current = 0;
- StringBuffer buf = new StringBuffer("CREATE TABLE IF NOT EXISTS ");
- buf.append(tableName);
- buf.append(" (");
- for (String s : fields) {
- buf.append(s);
- if ( current++ < max) buf.append(", ");
- }
- buf.append(") WITH CLUSTERING ORDER BY (ducc_dbid desc)");
- return buf.toString();
- }
+ // String mkTableCreate(String tableName, String[] fields)
+ // {
+ // int max = fields.length - 1;
+ // int current = 0;
+ // StringBuffer buf = new StringBuffer("CREATE TABLE IF NOT EXISTS ");
+ // buf.append(tableName);
+ // buf.append(" (");
+ // for (String s : fields) {
+ // buf.append(s);
+ // if ( current++ < max) buf.append(", ");
+ // }
+ // buf.append(") WITH CLUSTERING ORDER BY (ducc_dbid desc)");
+ // return buf.toString();
+ // }
void createSchema()
{
@@ -245,15 +261,17 @@ public class DbCreate
DbCreate client = null;
try {
client = new DbCreate(args[0], args[1], args[2]);
- client.connect();
- client.createSchema();
+ if ( client.connect() ) {
+ client.createSchema();
+ client.close();
+ } else {
+ System.exit(1);
+ }
} catch ( Throwable e ) {
System.out.println("Errors creating database");
e.printStackTrace();
System.exit(1);
- } finally {
- if ( client != null ) client.close();
- }
+ }
System.exit(0);
}
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java Thu Nov 5 19:23:05 2015
@@ -97,11 +97,11 @@ public class DbLoader
int servicelimit = Integer.MAX_VALUE;
int registrylimit = Integer.MAX_VALUE;
- boolean dojobs = false;
- boolean doreservations = false;
- boolean doservices = false;
+ boolean dojobs = true;
+ boolean doreservations = true;
+ boolean doservices = true;
boolean doregistry = true;
- boolean docheckpoint = false;
+ boolean docheckpoint = true;
long jobBytes = 0;
long resBytes = 0;
@@ -110,13 +110,14 @@ public class DbLoader
AtomicInteger skippedServices = new AtomicInteger(0);
- public DbLoader(String from, String to)
+ public DbLoader(String from, String state_url)
throws Exception
{
//String methodName = "<ctr>";
+ this.state_url = state_url;
DUCC_HOME = System.getProperty("DUCC_HOME");
if ( DUCC_HOME == null ) {
- System.out.println("System proprety -DDUCC_HOME must be set.");
+ System.out.println("System property -DDUCC_HOME must be set.");
System.exit(1);
}
@@ -135,35 +136,7 @@ public class DbLoader
serviceRegistry = from + serviceRegistry;
checkpointFile = from + checkpointFile;
- f = new File(to);
- if ( ! f.isDirectory() ) {
- System.out.println("'to' must be a directory");
- System.exit(1);
- }
-
- String databasedir = to + "/database/databases";
-
- // We always use a non-networked version for loading
- //state_url = "plocal:" + databasedir + "/DuccState";
- state_url = "bluej538";
System.setProperty("ducc.state.database.url", state_url);
-
- if ( state_url.startsWith("plocal") ) {
- f = new File(databasedir);
- if ( !f.exists() ) {
- try {
- if ( ! f.mkdirs() ) {
- System.out.println("Cannot create database directory: " + databasedir);
- System.exit(1);
- }
- System.out.println("Created database directory " + databasedir);
- } catch ( Exception e ) {
- System.out.println("Cannot create database directory: " + databasedir + ":" + e.toString());
- System.exit(1);
- }
- }
- }
-
}
void closeStream(InputStream in)
@@ -580,15 +553,6 @@ public class DbLoader
dbManager = new DbManager(state_url, logger);
dbManager.init();
-// DbCreate cr = new DbCreate(state_url, logger);
-// if ( state_url.startsWith("plocal") ) {
-// cr.createPlocalDatabase();
-// } else {
-// cr.createDatabase();
-// }
-
-
-
if ( true ) {
try {
@@ -662,11 +626,9 @@ public class DbLoader
System.out.println("");
System.out.println("Where:");
System.out.println(" from");
- System.out.println(" is the DUCC_HOME you wish to convert");
+ System.out.println(" is the DUCC_HOME you wish to convert.");
System.out.println(" to");
- System.out.println(" is the DUCC_HOME contining the new database");
- System.out.println("");
- System.out.println("'from' and 'to' may be the same thing");
+ System.out.println(" is the datbase URL.");
System.exit(1);
}
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java Thu Nov 5 19:23:05 2015
@@ -91,7 +91,8 @@ public class RmStatePersistence
StringBuffer buf = new StringBuffer("CREATE TABLE IF NOT EXISTS ducc." + RmProperty.TABLE_NAME.pname() + " (");
buf.append(DbUtil.mkSchema(RmProperty.values()));
- buf.append(")");
+ buf.append(") WITH CLUSTERING ORDER BY (memory desc)");
+
ret.add(new SimpleStatement(buf.toString()));
return ret;
}
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java?rev=1712850&r1=1712849&r2=1712850&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java Thu Nov 5 19:23:05 2015
@@ -1013,9 +1013,9 @@ class NodePool
props.put(RmProperty.Name, nid.getName());
props.put(RmProperty.Ip, nid.getIp());
props.put(RmProperty.Nodepool, id);
- props.put(RmProperty.Quantum, share_quantum);
+ props.put(RmProperty.Quantum, share_quantum / ( 1024*1024));
- props.put(RmProperty.Memory , m.getMemory());
+ props.put(RmProperty.Memory , m.getMemory() / (1024*1024));
props.put(RmProperty.ShareOrder , m.getShareOrder());
props.put(RmProperty.Blacklisted , m.isBlacklisted());