You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by is...@apache.org on 2013/11/21 12:14:51 UTC
svn commit: r1544115 - /mahout/site/mahout_cms/trunk/content/users/clustering/lda-commandline.mdtext

Author: isabel
Date: Thu Nov 21 11:14:50 2013
New Revision: 1544115

URL: http://svn.apache.org/r1544115
Log:
MAHOUT-1245 - fix formatting

Modified:
    mahout/site/mahout_cms/trunk/content/users/clustering/lda-commandline.mdtext

Modified: mahout/site/mahout_cms/trunk/content/users/clustering/lda-commandline.mdtext
URL: http://svn.apache.org/viewvc/mahout/site/mahout_cms/trunk/content/users/clustering/lda-commandline.mdtext?rev=1544115&r1=1544114&r2=1544115&view=diff
==============================================================================
--- mahout/site/mahout_cms/trunk/content/users/clustering/lda-commandline.mdtext (original)
+++ mahout/site/mahout_cms/trunk/content/users/clustering/lda-commandline.mdtext Thu Nov 21 11:14:50 2013
@@ -1,4 +1,5 @@
 Title: lda-commandline
+
 <a name="lda-commandline-RunningLatentDirichletAllocation(algorithm)fromtheCommandLine"></a>
 # Running Latent Dirichlet Allocation (algorithm) from the Command Line
 [Since Mahout v0.6](https://issues.apache.org/jira/browse/MAHOUT-897)
@@ -50,48 +51,28 @@ to view all outputs.
 # Command line options from Mahout cvb version 0.8
 
     mahout cvb -h 
-      --input (-i) input					  Path to job input
-directory.	      
-      --output (-o) output					  The directory
-pathname for output.  
-      --maxIter (-x) maxIter				  The maximum
-number of iterations.		
-      --convergenceDelta (-cd) convergenceDelta		  The convergence
-delta value		    
-      --overwrite (-ow)					  If present,
-overwrite the output directory before running job    
-      --num_topics (-k) num_topics				  Number of topics
-to learn		 
+      --input (-i) input					  Path to job input directory.	      
+      --output (-o) output					  The directory pathname for output.  
+      --maxIter (-x) maxIter				  The maximum number of iterations.		
+      --convergenceDelta (-cd) convergenceDelta		  The convergence delta value		    
+      --overwrite (-ow)					  If present, overwrite the output directory before running job    
+      --num_topics (-k) num_topics				  Number of topics to learn		 
       --num_terms (-nt) num_terms				  Vocabulary size   
-      --doc_topic_smoothing (-a) doc_topic_smoothing	  Smoothing for
-document/topic distribution	     
-      --term_topic_smoothing (-e) term_topic_smoothing	  Smoothing for
-topic/term distribution 	 
-      --dictionary (-dict) dictionary			  Path to
-term-dictionary file(s) (glob expression supported) 
-      --doc_topic_output (-dt) doc_topic_output		  Output path for
-the training doc/topic distribution	     
-      --topic_model_temp_dir (-mt) topic_model_temp_dir	  Path to
-intermediate model path (useful for restarting)       
-      --iteration_block_size (-block) iteration_block_size	  Number of
-iterations per perplexity check  
+      --doc_topic_smoothing (-a) doc_topic_smoothing	  Smoothing for document/topic distribution	     
+      --term_topic_smoothing (-e) term_topic_smoothing	  Smoothing for topic/term distribution 	 
+      --dictionary (-dict) dictionary			  Path to term-dictionary file(s) (glob expression supported) 
+      --doc_topic_output (-dt) doc_topic_output		  Output path for the training doc/topic distribution	     
+      --topic_model_temp_dir (-mt) topic_model_temp_dir	  Path to intermediate model path (useful for restarting)       
+      --iteration_block_size (-block) iteration_block_size	  Number of iterations per perplexity check  
       --random_seed (-seed) random_seed			  Random seed	    
-      --test_set_fraction (-tf) test_set_fraction		  Fraction of data
-to hold out for testing  
-      --num_train_threads (-ntt) num_train_threads		  number of threads
-per mapper to train with  
-      --num_update_threads (-nut) num_update_threads	  number of threads
-per mapper to update the model with	       
-      --max_doc_topic_iters (-mipd) max_doc_topic_iters	  max number of
-iterations per doc for p(topic|doc) learning		  
-      --num_reduce_tasks num_reduce_tasks			  number of
-reducers to use during model estimation 	   
-      --backfill_perplexity 				  enable
-backfilling of missing perplexity values		
+      --test_set_fraction (-tf) test_set_fraction		  Fraction of data to hold out for testing  
+      --num_train_threads (-ntt) num_train_threads		  number of threads per mapper to train with  
+      --num_update_threads (-nut) num_update_threads	  number of threads per mapper to update the model with	       
+      --max_doc_topic_iters (-mipd) max_doc_topic_iters	  max number of iterations per doc for p(topic|doc) learning		  
+      --num_reduce_tasks num_reduce_tasks			  number of reducers to use during model estimation 	   
+      --backfill_perplexity 				  enable backfilling of missing perplexity values		
       --help (-h)						  Print out help    
-      --tempDir tempDir					  Intermediate
-output directory	     
-      --startPhase startPhase				  First phase to
-run    
+      --tempDir tempDir					  Intermediate output directory	     
+      --startPhase startPhase				  First phase to run    
       --endPhase endPhase					  Last phase to run