You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by bu...@apache.org on 2013/11/21 12:14:54 UTC

svn commit: r887496 - in /websites/staging/mahout/trunk/content: ./ users/clustering/lda-commandline.html

Author: buildbot
Date: Thu Nov 21 11:14:54 2013
New Revision: 887496

Log:
Staging update by buildbot for mahout

Modified:
    websites/staging/mahout/trunk/content/   (props changed)
    websites/staging/mahout/trunk/content/users/clustering/lda-commandline.html

Propchange: websites/staging/mahout/trunk/content/
------------------------------------------------------------------------------
--- cms:source-revision (original)
+++ cms:source-revision Thu Nov 21 11:14:54 2013
@@ -1 +1 @@
-1544114
+1544115

Modified: websites/staging/mahout/trunk/content/users/clustering/lda-commandline.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/clustering/lda-commandline.html (original)
+++ websites/staging/mahout/trunk/content/users/clustering/lda-commandline.html Thu Nov 21 11:14:54 2013
@@ -430,53 +430,31 @@ to view all outputs.</p>
 <p><a name="lda-commandline-CommandlineoptionsfromMahoutcvbversion0.8"></a></p>
 <h1 id="command-line-options-from-mahout-cvb-version-08">Command line options from Mahout cvb version 0.8</h1>
 <div class="codehilite"><pre><span class="n">mahout</span> <span class="n">cvb</span> <span class="o">-</span><span class="n">h</span> 
-  <span class="o">--</span><span class="n">input</span> <span class="p">(</span><span class="o">-</span><span class="nb">i</span><span class="p">)</span> <span class="n">input</span>                      <span class="n">Path</span> <span class="n">to</span> <span class="n">job</span> <span class="n">input</span>
+  <span class="o">--</span><span class="n">input</span> <span class="p">(</span><span class="o">-</span><span class="nb">i</span><span class="p">)</span> <span class="n">input</span>                      <span class="n">Path</span> <span class="n">to</span> <span class="n">job</span> <span class="n">input</span> <span class="n">directory</span><span class="p">.</span>        
+  <span class="o">--</span><span class="n">output</span> <span class="p">(</span><span class="o">-</span><span class="n">o</span><span class="p">)</span> <span class="n">output</span>                    <span class="n">The</span> <span class="n">directory</span> <span class="n">pathname</span> <span class="k">for</span> <span class="n">output</span><span class="p">.</span>  
+  <span class="o">--</span><span class="n">maxIter</span> <span class="p">(</span><span class="o">-</span><span class="n">x</span><span class="p">)</span> <span class="n">maxIter</span>                  <span class="n">The</span> <span class="n">maximum</span> <span class="n">number</span> <span class="n">of</span> <span class="n">iterations</span><span class="p">.</span>     
+  <span class="o">--</span><span class="n">convergenceDelta</span> <span class="p">(</span><span class="o">-</span><span class="n">cd</span><span class="p">)</span> <span class="n">convergenceDelta</span>       <span class="n">The</span> <span class="n">convergence</span> <span class="n">delta</span> <span class="n">value</span>           
+  <span class="o">--</span><span class="n">overwrite</span> <span class="p">(</span><span class="o">-</span><span class="n">ow</span><span class="p">)</span>                   <span class="n">If</span> <span class="n">present</span><span class="p">,</span> <span class="n">overwrite</span> <span class="n">the</span> <span class="n">output</span> <span class="n">directory</span> <span class="n">before</span> <span class="n">running</span> <span class="n">job</span>    
+  <span class="o">--</span><span class="n">num_topics</span> <span class="p">(</span><span class="o">-</span><span class="n">k</span><span class="p">)</span> <span class="n">num_topics</span>                <span class="n">Number</span> <span class="n">of</span> <span class="n">topics</span> <span class="n">to</span> <span class="n">learn</span>      
+  <span class="o">--</span><span class="n">num_terms</span> <span class="p">(</span><span class="o">-</span><span class="n">nt</span><span class="p">)</span> <span class="n">num_terms</span>                 <span class="n">Vocabulary</span> <span class="nb">size</span>   
+  <span class="o">--</span><span class="n">doc_topic_smoothing</span> <span class="p">(</span><span class="o">-</span><span class="n">a</span><span class="p">)</span> <span class="n">doc_topic_smoothing</span>      <span class="n">Smoothing</span> <span class="k">for</span> <span class="n">document</span><span class="o">/</span><span class="n">topic</span> <span class="n">distribution</span>      
+  <span class="o">--</span><span class="n">term_topic_smoothing</span> <span class="p">(</span><span class="o">-</span><span class="n">e</span><span class="p">)</span> <span class="n">term_topic_smoothing</span>    <span class="n">Smoothing</span> <span class="k">for</span> <span class="n">topic</span><span class="o">/</span><span class="n">term</span> <span class="n">distribution</span>      
+  <span class="o">--</span><span class="n">dictionary</span> <span class="p">(</span><span class="o">-</span><span class="n">dict</span><span class="p">)</span> <span class="n">dictionary</span>             <span class="n">Path</span> <span class="n">to</span> <span class="n">term</span><span class="o">-</span><span class="n">dictionary</span> <span class="n">file</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="p">(</span><span class="n">glob</span> <span class="n">expression</span> <span class="n">supported</span><span class="p">)</span> 
+  <span class="o">--</span><span class="n">doc_topic_output</span> <span class="p">(</span><span class="o">-</span><span class="n">dt</span><span class="p">)</span> <span class="n">doc_topic_output</span>       <span class="n">Output</span> <span class="n">path</span> <span class="k">for</span> <span class="n">the</span> <span class="n">training</span> <span class="n">doc</span><span class="o">/</span><span class="n">topic</span> <span class="n">distribution</span>        
+  <span class="o">--</span><span class="n">topic_model_temp_dir</span> <span class="p">(</span><span class="o">-</span><span class="n">mt</span><span class="p">)</span> <span class="n">topic_model_temp_dir</span>   <span class="n">Path</span> <span class="n">to</span> <span class="n">intermediate</span> <span class="n">model</span> <span class="n">path</span> <span class="p">(</span><span class="n">useful</span> <span class="k">for</span> <span class="n">restarting</span><span class="p">)</span>       
+  <span class="o">--</span><span class="n">iteration_block_size</span> <span class="p">(</span><span class="o">-</span><span class="n">block</span><span class="p">)</span> <span class="n">iteration_block_size</span>    <span class="n">Number</span> <span class="n">of</span> <span class="n">iterations</span> <span class="n">per</span> <span class="n">perplexity</span> <span class="n">check</span>  
+  <span class="o">--</span><span class="n">random_seed</span> <span class="p">(</span><span class="o">-</span><span class="n">seed</span><span class="p">)</span> <span class="n">random_seed</span>           <span class="n">Random</span> <span class="n">seed</span>       
+  <span class="o">--</span><span class="n">test_set_fraction</span> <span class="p">(</span><span class="o">-</span><span class="n">tf</span><span class="p">)</span> <span class="n">test_set_fraction</span>         <span class="n">Fraction</span> <span class="n">of</span> <span class="n">data</span> <span class="n">to</span> <span class="n">hold</span> <span class="n">out</span> <span class="k">for</span> <span class="n">testing</span>  
+  <span class="o">--</span><span class="n">num_train_threads</span> <span class="p">(</span><span class="o">-</span><span class="n">ntt</span><span class="p">)</span> <span class="n">num_train_threads</span>        <span class="n">number</span> <span class="n">of</span> <span class="n">threads</span> <span class="n">per</span> <span class="n">mapper</span> <span class="n">to</span> <span class="n">train</span> <span class="n">with</span>  
+  <span class="o">--</span><span class="n">num_update_threads</span> <span class="p">(</span><span class="o">-</span><span class="n">nut</span><span class="p">)</span> <span class="n">num_update_threads</span>      <span class="n">number</span> <span class="n">of</span> <span class="n">threads</span> <span class="n">per</span> <span class="n">mapper</span> <span class="n">to</span> <span class="n">update</span> <span class="n">the</span> <span class="n">model</span> <span class="n">with</span>        
+  <span class="o">--</span><span class="n">max_doc_topic_iters</span> <span class="p">(</span><span class="o">-</span><span class="n">mipd</span><span class="p">)</span> <span class="n">max_doc_topic_iters</span>   <span class="n">max</span> <span class="n">number</span> <span class="n">of</span> <span class="n">iterations</span> <span class="n">per</span> <span class="n">doc</span> <span class="k">for</span> <span class="n">p</span><span class="p">(</span><span class="n">topic</span><span class="o">|</span><span class="n">doc</span><span class="p">)</span> <span class="n">learning</span>          
+  <span class="o">--</span><span class="n">num_reduce_tasks</span> <span class="n">num_reduce_tasks</span>             <span class="n">number</span> <span class="n">of</span> <span class="n">reducers</span> <span class="n">to</span> <span class="n">use</span> <span class="n">during</span> <span class="n">model</span> <span class="n">estimation</span>        
+  <span class="o">--</span><span class="n">backfill_perplexity</span>                   <span class="n">enable</span> <span class="n">backfilling</span> <span class="n">of</span> <span class="n">missing</span> <span class="n">perplexity</span> <span class="n">values</span>       
+  <span class="o">--</span><span class="n">help</span> <span class="p">(</span><span class="o">-</span><span class="n">h</span><span class="p">)</span>                         <span class="n">Print</span> <span class="n">out</span> <span class="n">help</span>    
+  <span class="o">--</span><span class="n">tempDir</span> <span class="n">tempDir</span>                   <span class="n">Intermediate</span> <span class="n">output</span> <span class="n">directory</span>      
+  <span class="o">--</span><span class="n">startPhase</span> <span class="n">startPhase</span>                 <span class="n">First</span> <span class="n">phase</span> <span class="n">to</span> <span class="n">run</span>    
+  <span class="o">--</span><span class="n">endPhase</span> <span class="n">endPhase</span>                     <span class="n">Last</span> <span class="n">phase</span> <span class="n">to</span> <span class="n">run</span>
 </pre></div>
-
-
-<p>directory.      <br />
-      --output (-o) output                    The directory
-pathname for output.<br />
-      --maxIter (-x) maxIter                  The maximum
-number of iterations.     <br />
-      --convergenceDelta (-cd) convergenceDelta       The convergence
-delta value       <br />
-      --overwrite (-ow)                   If present,
-overwrite the output directory before running job  <br />
-      --num_topics (-k) num_topics                Number of topics
-to learn       <br />
-      --num_terms (-nt) num_terms                 Vocabulary size <br />
-      --doc_topic_smoothing (-a) doc_topic_smoothing      Smoothing for
-document/topic distribution    <br />
-      --term_topic_smoothing (-e) term_topic_smoothing    Smoothing for
-topic/term distribution    <br />
-      --dictionary (-dict) dictionary             Path to
-term-dictionary file(s) (glob expression supported) 
-      --doc_topic_output (-dt) doc_topic_output       Output path for
-the training doc/topic distribution    <br />
-      --topic_model_temp_dir (-mt) topic_model_temp_dir   Path to
-intermediate model path (useful for restarting)     <br />
-      --iteration_block_size (-block) iteration_block_size    Number of
-iterations per perplexity check<br />
-      --random_seed (-seed) random_seed           Random seed     <br />
-      --test_set_fraction (-tf) test_set_fraction         Fraction of data
-to hold out for testing<br />
-      --num_train_threads (-ntt) num_train_threads        number of threads
-per mapper to train with<br />
-      --num_update_threads (-nut) num_update_threads      number of threads
-per mapper to update the model with      <br />
-      --max_doc_topic_iters (-mipd) max_doc_topic_iters   max number of
-iterations per doc for p(topic|doc) learning        <br />
-      --num_reduce_tasks num_reduce_tasks             number of
-reducers to use during model estimation      <br />
-      --backfill_perplexity                   enable
-backfilling of missing perplexity values      <br />
-      --help (-h)                         Print out help  <br />
-      --tempDir tempDir                   Intermediate
-output directory       <br />
-      --startPhase startPhase                 First phase to
-run  <br />
-      --endPhase endPhase                     Last phase to run</p>
    </div>
   </div>     
 </div>