You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ignite.apache.org by "Alexey Zinoviev (Jira)" <ji...@apache.org> on 2019/10/08 12:58:00 UTC

[jira] [Updated] (IGNITE-12269) [ML] The method printTree is corrupted on the example

     [ https://issues.apache.org/jira/browse/IGNITE-12269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Alexey Zinoviev updated IGNITE-12269:
-------------------------------------
    Description: 
This example
{code:java}
package org.apache.ignite.examples.ml.tutorial;

import java.io.FileNotFoundException;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.Ignition;
import org.apache.ignite.ml.composition.ModelsComposition;
import org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory;
import org.apache.ignite.ml.composition.boosting.convergence.median.MedianOfMedianConvergenceCheckerFactory;
import org.apache.ignite.ml.dataset.feature.extractor.Vectorizer;
import org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer;
import org.apache.ignite.ml.math.primitives.vector.Vector;
import org.apache.ignite.ml.preprocessing.Preprocessor;
import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer;
import org.apache.ignite.ml.preprocessing.encoding.EncoderType;
import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer;
import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator;
import org.apache.ignite.ml.selection.scoring.metric.MetricName;
import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter;
import org.apache.ignite.ml.selection.split.TrainTestSplit;
import org.apache.ignite.ml.trainers.DatasetTrainer;
import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;

/**
 * {@link MinMaxScalerTrainer} and {@link NormalizationTrainer} are used in this example due to different values
 * distribution in columns and rows.
 * <p>
 * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p>
 * <p>
 * After that it defines preprocessors that extract features from an upstream data and perform other desired changes
 * over the extracted data, including the scaling.</p>
 * <p>
 * Then, it trains the model based on the processed data using decision tree classification.</p>
 * <p>
 * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p>
 */
public class Step_11_Boosting {
    /**
     * Run example.
     */
    public static void main(String[] args) {
        System.out.println();
        System.out.println(">>> Tutorial step 11 (Boosting) example started.");

        try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
            try {
                IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite);

                // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare".
                final Vectorizer<Integer, Vector, Integer, Double> vectorizer
                    = new DummyVectorizer<Integer>(0, 3, 4, 5, 6, 8, 10).labeled(1);

                TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>()
                    .split(0.75);

                Preprocessor<Integer, Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Vector>()
                    .withEncoderType(EncoderType.STRING_ENCODER)
                    .withEncodedFeature(1)
                    .withEncodedFeature(6) // <--- Changed index here.
                    .fit(ignite,
                        dataCache,
                        vectorizer
                    );

                Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>()
                    .fit(ignite,
                        dataCache,
                        strEncoderPreprocessor
                    );

                Preprocessor<Integer, Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Vector>()
                    .fit(
                        ignite,
                        dataCache,
                        imputingPreprocessor
                    );

                Preprocessor<Integer, Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Vector>()
                    .withP(1)
                    .fit(
                        ignite,
                        dataCache,
                        minMaxScalerPreprocessor
                    );

                // Create classification trainer.
                DatasetTrainer<ModelsComposition, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.01, 1000, 10, 0.)
                    .withCheckConvergenceStgyFactory(new MedianOfMedianConvergenceCheckerFactory(0.01));

                // Train decision tree model.
                ModelsComposition mdl = trainer.fit(
                    ignite,
                    dataCache,
                    split.getTrainFilter(),
                    normalizationPreprocessor
                );

                System.out.println("\n>>> Trained model: " + mdl);

                double accuracy = Evaluator.evaluate(
                    dataCache,
                    split.getTestFilter(),
                    mdl,
                    normalizationPreprocessor,
                    MetricName.ACCURACY
                );

                System.out.println("\n>>> Accuracy " + accuracy);
                System.out.println("\n>>> Test Error " + (1 - accuracy));

                System.out.println(">>> Tutorial step 11 (Boosting) example completed.");
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
        finally {
            System.out.flush();
        }
    }
}

{code}
is ruined with
{code:java}
Exception in thread "main" java.lang.IllegalArgumentExceptionException in thread "main" java.lang.IllegalArgumentException at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:105) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:328) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:123) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:118) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.ml.util.ModelTrace.lambda$fieldToString$1(ModelTrace.java:122) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.fieldToString(ModelTrace.java:123) at org.apache.ignite.ml.util.ModelTrace.lambda$toString$0(ModelTrace.java:97) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.toString(ModelTrace.java:98) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:101) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:93) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.examples.ml.tutorial.Step_11_Boosting.main(Step_11_Boosting.java:117)
{code}
 

 

> [ML] The method printTree is corrupted on the example
> -----------------------------------------------------
>
>                 Key: IGNITE-12269
>                 URL: https://issues.apache.org/jira/browse/IGNITE-12269
>             Project: Ignite
>          Issue Type: Bug
>          Components: ml
>    Affects Versions: 2.8
>            Reporter: Alexey Zinoviev
>            Assignee: Alexey Zinoviev
>            Priority: Major
>              Labels: await
>             Fix For: 2.8
>
>
> This example
> {code:java}
> package org.apache.ignite.examples.ml.tutorial;
> import java.io.FileNotFoundException;
> import org.apache.ignite.Ignite;
> import org.apache.ignite.IgniteCache;
> import org.apache.ignite.Ignition;
> import org.apache.ignite.ml.composition.ModelsComposition;
> import org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory;
> import org.apache.ignite.ml.composition.boosting.convergence.median.MedianOfMedianConvergenceCheckerFactory;
> import org.apache.ignite.ml.dataset.feature.extractor.Vectorizer;
> import org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer;
> import org.apache.ignite.ml.math.primitives.vector.Vector;
> import org.apache.ignite.ml.preprocessing.Preprocessor;
> import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer;
> import org.apache.ignite.ml.preprocessing.encoding.EncoderType;
> import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
> import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
> import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer;
> import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator;
> import org.apache.ignite.ml.selection.scoring.metric.MetricName;
> import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter;
> import org.apache.ignite.ml.selection.split.TrainTestSplit;
> import org.apache.ignite.ml.trainers.DatasetTrainer;
> import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;
> /**
>  * {@link MinMaxScalerTrainer} and {@link NormalizationTrainer} are used in this example due to different values
>  * distribution in columns and rows.
>  * <p>
>  * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p>
>  * <p>
>  * After that it defines preprocessors that extract features from an upstream data and perform other desired changes
>  * over the extracted data, including the scaling.</p>
>  * <p>
>  * Then, it trains the model based on the processed data using decision tree classification.</p>
>  * <p>
>  * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p>
>  */
> public class Step_11_Boosting {
>     /**
>      * Run example.
>      */
>     public static void main(String[] args) {
>         System.out.println();
>         System.out.println(">>> Tutorial step 11 (Boosting) example started.");
>         try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
>             try {
>                 IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite);
>                 // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare".
>                 final Vectorizer<Integer, Vector, Integer, Double> vectorizer
>                     = new DummyVectorizer<Integer>(0, 3, 4, 5, 6, 8, 10).labeled(1);
>                 TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>()
>                     .split(0.75);
>                 Preprocessor<Integer, Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Vector>()
>                     .withEncoderType(EncoderType.STRING_ENCODER)
>                     .withEncodedFeature(1)
>                     .withEncodedFeature(6) // <--- Changed index here.
>                     .fit(ignite,
>                         dataCache,
>                         vectorizer
>                     );
>                 Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>()
>                     .fit(ignite,
>                         dataCache,
>                         strEncoderPreprocessor
>                     );
>                 Preprocessor<Integer, Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Vector>()
>                     .fit(
>                         ignite,
>                         dataCache,
>                         imputingPreprocessor
>                     );
>                 Preprocessor<Integer, Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Vector>()
>                     .withP(1)
>                     .fit(
>                         ignite,
>                         dataCache,
>                         minMaxScalerPreprocessor
>                     );
>                 // Create classification trainer.
>                 DatasetTrainer<ModelsComposition, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.01, 1000, 10, 0.)
>                     .withCheckConvergenceStgyFactory(new MedianOfMedianConvergenceCheckerFactory(0.01));
>                 // Train decision tree model.
>                 ModelsComposition mdl = trainer.fit(
>                     ignite,
>                     dataCache,
>                     split.getTrainFilter(),
>                     normalizationPreprocessor
>                 );
>                 System.out.println("\n>>> Trained model: " + mdl);
>                 double accuracy = Evaluator.evaluate(
>                     dataCache,
>                     split.getTestFilter(),
>                     mdl,
>                     normalizationPreprocessor,
>                     MetricName.ACCURACY
>                 );
>                 System.out.println("\n>>> Accuracy " + accuracy);
>                 System.out.println("\n>>> Test Error " + (1 - accuracy));
>                 System.out.println(">>> Tutorial step 11 (Boosting) example completed.");
>             }
>             catch (FileNotFoundException e) {
>                 e.printStackTrace();
>             }
>         }
>         finally {
>             System.out.flush();
>         }
>     }
> }
> {code}
> is ruined with
> {code:java}
> Exception in thread "main" java.lang.IllegalArgumentExceptionException in thread "main" java.lang.IllegalArgumentException at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:105) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:328) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:123) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:118) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.ml.util.ModelTrace.lambda$fieldToString$1(ModelTrace.java:122) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.fieldToString(ModelTrace.java:123) at org.apache.ignite.ml.util.ModelTrace.lambda$toString$0(ModelTrace.java:97) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.toString(ModelTrace.java:98) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:101) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:93) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.examples.ml.tutorial.Step_11_Boosting.main(Step_11_Boosting.java:117)
> {code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)