You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spot.apache.org by na...@apache.org on 2018/03/19 19:28:30 UTC

[21/42] incubator-spot git commit: [SPOT-213][SPOT-216] [setup] updated scripts, documentation and spot.conf to support mutiple DB engines

[SPOT-213][SPOT-216] [setup] updated scripts, documentation and spot.conf to support mutiple DB engines


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/49f4934c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/49f4934c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/49f4934c

Branch: refs/heads/SPOT-181_ODM
Commit: 49f4934c47e32ccda80111025ececf9e53780f11
Parents: 3383c07
Author: natedogs911 <na...@gmail.com>
Authored: Thu Jan 18 12:32:09 2018 -0800
Committer: natedogs911 <na...@gmail.com>
Committed: Thu Jan 18 12:32:09 2018 -0800

----------------------------------------------------------------------
 spot-setup/README.md     |   7 +++
 spot-setup/hdfs_setup.sh | 120 +++++++++++++++++++++++++++++++++++++-----
 spot-setup/spot.conf     |  28 +++++++++-
 3 files changed, 139 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/49f4934c/spot-setup/README.md
----------------------------------------------------------------------
diff --git a/spot-setup/README.md b/spot-setup/README.md
index 1d486a6..c5d245a 100644
--- a/spot-setup/README.md
+++ b/spot-setup/README.md
@@ -21,6 +21,11 @@ To collaborate and run spot-setup, it is required the following prerequisites:
 
 The main script in the repository is **hdfs_setup.sh** which is responsible of loading environment variables, creating folders in Hadoop for the different use cases (flow, DNS or Proxy), create the Impala database, and finally execute Impala query scripts that creates Impala tables needed to access netflow, dns and proxy data.
 
+Options:
+--no-sudo     will execute commands as the existing user while setting `HADOOP_USER_NAME=hdfs`
+-c            specify a custom location for the spot.conf, defaults to /etc/spot.conf
+-d            specific which database client to use `-d beeline` NOTE: Impala supports kerberos
+
 ## Environment Variables
 
 **spot.conf** is the file storing the variables needed during the installation process including node assignment, User interface, Machine Learning and Ingest gateway nodes.
@@ -33,6 +38,8 @@ To read more about these variables, please review the [documentation](http://spo
 
 spot-setup contains a script per use case, as of today, there is a table creation script for each DNS, flow and Proxy data.
 
+the HQL scripts are seperated by the underlying database in the ./spot-setup/ folder.
+
 These HQL scripts are intended to be executed as a Impala statement and must comply HQL standards.
 
 We create tables using Parquet format to get a faster query performance. This format is an industry standard and you can find more information about it on:

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/49f4934c/spot-setup/hdfs_setup.sh
----------------------------------------------------------------------
diff --git a/spot-setup/hdfs_setup.sh b/spot-setup/hdfs_setup.sh
index df898c8..6e73a20 100755
--- a/spot-setup/hdfs_setup.sh
+++ b/spot-setup/hdfs_setup.sh
@@ -17,6 +17,27 @@
 # limitations under the License.
 #
 
+set -e
+
+function log() {
+printf "hdfs_setup.sh:\n $1\n"
+}
+
+function safe_mkdir() {
+        # takes the hdfs command options and a directory
+        # checks for the directory before trying to create it
+        # keeps the script from existing on existing folders
+        local hdfs_cmd=$1
+        local dir=$2
+        if $(hdfs dfs -test -d ${dir}); then
+            log "${dir} already exists"
+        else
+            log "running mkdir on ${dir}"
+            ${hdfs_cmd} dfs -mkdir ${dir}
+        fi
+}
+
+SPOTCONF="/etc/spot.conf"
 DSOURCES=('flow' 'dns' 'proxy')
 DFOLDERS=('binary' 
 'stage'
@@ -33,37 +54,108 @@ DFOLDERS=('binary'
 'hive/oa/threat_dendro'
 )
 
+
+# input options
+for arg in "$@"; do
+    case $arg in
+        "--no-sudo")
+            log "not using sudo"
+            no_sudo=true
+            shift
+            ;;
+        "-c")
+            shift
+            SPOTCONF=$1
+            log "Spot Configuration file: ${SPOTCONF}"
+            shift
+            ;;
+        "-d")
+            shift
+            db_override=$1
+            shift
+            ;;
+    esac
+done
+
 # Sourcing spot configuration variables
-source /etc/spot.conf
+log "Sourcing ${SPOTCONF}\n"
+source $SPOTCONF
+
+if [[ ${no_sudo} == "true" ]]; then
+    hdfs_cmd="hdfs"
+
+    if [[ ! -z "${HADOOP_USER_NAME}" ]]; then
+        log "HADOOP_USER_NAME: ${HADOOP_USER_NAME}"
+    else
+        log "setting HADOOP_USER_NAME to hdfs"
+        HADOOP_USER_NAME=hdfs
+    fi
+else
+    hdfs_cmd="sudo -u hdfs hdfs"
+fi
+
+if [[ -z "${db_override}" ]]; then
+        DBENGINE=$(echo ${DBENGINE} | tr '[:upper:]' '[:lower:]')
+        log "setting database engine to ${DBENGINE}"
+else
+        DBENGINE=$(echo ${db_override} | tr '[:upper:]' '[:lower:]')
+        log "setting database engine to $db_override"
+fi
+
+case ${DBENGINE} in
+    impala)
+        db_shell="impala-shell -i ${IMPALA_DEM}"
+        if [[ ${KERBEROS} == "true" ]]; then
+            db_shell="${db_shell} -k"
+        fi
+        db_query="${db_shell} -q"
+        db_script="${db_shell} --var=huser=${HUSER} --var=dbname=${DBNAME} -c -f"
+        ;;
+    hive)
+        db_shell="hive"
+        db_query="${db_shell} -e"
+        db_script="${db_shell} -hiveconf huser=${HUSER} -hiveconf dbname=${DBNAME} -f"
+        ;;
+    beeline)
+        db_shell="beeline -u jdbc:${JDBC_URL}"
+        db_query="${db_shell} -e"
+        db_script="${db_shell} --hivevar huser=${HUSER} --hivevar dbname=${DBNAME} -f"
+        ;;
+    *)
+        log "DBENGINE not compatible or not set in spot.conf: DBENGINE--> ${DBENGINE:-empty}"
+        exit 1
+        ;;
+esac
 
 # Creating HDFS user's folder
-sudo -u hdfs hdfs dfs -mkdir ${HUSER}
-sudo -u hdfs hdfs dfs -chown ${USER}:supergroup ${HUSER}
-sudo -u hdfs hdfs dfs -chmod 775 ${HUSER}
+safe_mkdir ${hdfs_cmd} ${HUSER}
+${hdfs_cmd} dfs -chown ${USER}:supergroup ${HUSER}
+${hdfs_cmd} dfs -chmod 775 ${HUSER}
 
 # Creating HDFS paths for each use case
 for d in "${DSOURCES[@]}" 
-do 
+do
 	echo "creating /$d"
-	hdfs dfs -mkdir ${HUSER}/$d 
+	safe_mkdir hdfs ${HUSER}/$d
 	for f in "${DFOLDERS[@]}" 
 	do 
 		echo "creating $d/$f"
-		hdfs dfs -mkdir ${HUSER}/$d/$f
+		safe_mkdir ${hdfs_cmd} ${HUSER}/$d/$f
 	done
 
 	# Modifying permission on HDFS folders to allow Impala to read/write
 	hdfs dfs -chmod -R 775 ${HUSER}/$d
-	sudo -u hdfs hdfs dfs -setfacl -R -m user:impala:rwx ${HUSER}/$d
-	sudo -u hdfs hdfs dfs -setfacl -R -m user:${USER}:rwx ${HUSER}/$d
+	${hdfs_cmd} dfs -setfacl -R -m user:${db_override}:rwx ${HUSER}/$d
+	${hdfs_cmd} dfs -setfacl -R -m user:${USER}:rwx ${HUSER}/$d
 done
 
+
 # Creating Spot Database
-impala-shell -i ${IMPALA_DEM} -q "CREATE DATABASE IF NOT EXISTS ${DBNAME};"
+ ${db_query} "CREATE DATABASE IF NOT EXISTS ${DBNAME}";
+
 
-# Creating Impala tables
+# Creating tables
 for d in "${DSOURCES[@]}" 
-do 
-	impala-shell -i ${IMPALA_DEM} --var=huser=${HUSER} --var=dbname=${DBNAME} -c -f create_${d}_parquet.hql
+do
+	${db_script} "./${DBENGINE}/create_${d}_parquet.hql"
 done
-

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/49f4934c/spot-setup/spot.conf
----------------------------------------------------------------------
diff --git a/spot-setup/spot.conf b/spot-setup/spot.conf
index a0cba3d..aa08ea7 100755
--- a/spot-setup/spot.conf
+++ b/spot-setup/spot.conf
@@ -19,7 +19,6 @@
 UINODE='node03'
 MLNODE='node04'
 GWNODE='node16'
-DBNAME='spot'
 
 #hdfs - base user and data source config
 HUSER='/user/spot'
@@ -30,10 +29,35 @@ PROXY_PATH=${HUSER}/${DSOURCE}/hive/y=${YR}/m=${MH}/d=${DY}/
 FLOW_PATH=${HUSER}/${DSOURCE}/hive/y=${YR}/m=${MH}/d=${DY}/
 HPATH=${HUSER}/${DSOURCE}/scored_results/${FDATE}
 
-#impala config
+# Database
+DBNAME='spot'
+DBENGINE="" # hive,impala and beeline supported
+JDBC_URL="" # example hive2://node01:10000/default;principal=hive/node01@REALM.COM
+
+# impala config
 IMPALA_DEM=node04
 IMPALA_PORT=21050
 
+# Hive Server2
+HS2_HOST=''
+HS2_PORT=''
+
+#kerberos config
+KERBEROS='false'
+KINIT=/usr/bin/kinit
+PRINCIPAL='user'
+KEYTAB='/opt/security/user.keytab'
+SASL_MECH='GSSAPI'
+SECURITY_PROTO='sasl_plaintext'
+KAFKA_SERVICE_NAME=''
+
+#ssl config
+SSL='false'
+SSL_VERIFY='true'
+CA_LOCATION=''
+CERT=''
+KEY=''
+
 #local fs base user and data source config
 LUSER='/home/spot'
 LPATH=${LUSER}/ml/${DSOURCE}/${FDATE}