You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ignite.apache.org by "Alexey Zinoviev (Jira)" <ji...@apache.org> on 2019/10/08 12:58:00 UTC
[jira] [Updated] (IGNITE-12269) [ML] The method printTree is
corrupted on the example
[ https://issues.apache.org/jira/browse/IGNITE-12269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alexey Zinoviev updated IGNITE-12269:
-------------------------------------
Description:
This example
{code:java}
package org.apache.ignite.examples.ml.tutorial;
import java.io.FileNotFoundException;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.Ignition;
import org.apache.ignite.ml.composition.ModelsComposition;
import org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory;
import org.apache.ignite.ml.composition.boosting.convergence.median.MedianOfMedianConvergenceCheckerFactory;
import org.apache.ignite.ml.dataset.feature.extractor.Vectorizer;
import org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer;
import org.apache.ignite.ml.math.primitives.vector.Vector;
import org.apache.ignite.ml.preprocessing.Preprocessor;
import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer;
import org.apache.ignite.ml.preprocessing.encoding.EncoderType;
import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer;
import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator;
import org.apache.ignite.ml.selection.scoring.metric.MetricName;
import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter;
import org.apache.ignite.ml.selection.split.TrainTestSplit;
import org.apache.ignite.ml.trainers.DatasetTrainer;
import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;
/**
* {@link MinMaxScalerTrainer} and {@link NormalizationTrainer} are used in this example due to different values
* distribution in columns and rows.
* <p>
* Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p>
* <p>
* After that it defines preprocessors that extract features from an upstream data and perform other desired changes
* over the extracted data, including the scaling.</p>
* <p>
* Then, it trains the model based on the processed data using decision tree classification.</p>
* <p>
* Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p>
*/
public class Step_11_Boosting {
/**
* Run example.
*/
public static void main(String[] args) {
System.out.println();
System.out.println(">>> Tutorial step 11 (Boosting) example started.");
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
try {
IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite);
// Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare".
final Vectorizer<Integer, Vector, Integer, Double> vectorizer
= new DummyVectorizer<Integer>(0, 3, 4, 5, 6, 8, 10).labeled(1);
TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>()
.split(0.75);
Preprocessor<Integer, Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Vector>()
.withEncoderType(EncoderType.STRING_ENCODER)
.withEncodedFeature(1)
.withEncodedFeature(6) // <--- Changed index here.
.fit(ignite,
dataCache,
vectorizer
);
Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>()
.fit(ignite,
dataCache,
strEncoderPreprocessor
);
Preprocessor<Integer, Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Vector>()
.fit(
ignite,
dataCache,
imputingPreprocessor
);
Preprocessor<Integer, Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Vector>()
.withP(1)
.fit(
ignite,
dataCache,
minMaxScalerPreprocessor
);
// Create classification trainer.
DatasetTrainer<ModelsComposition, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.01, 1000, 10, 0.)
.withCheckConvergenceStgyFactory(new MedianOfMedianConvergenceCheckerFactory(0.01));
// Train decision tree model.
ModelsComposition mdl = trainer.fit(
ignite,
dataCache,
split.getTrainFilter(),
normalizationPreprocessor
);
System.out.println("\n>>> Trained model: " + mdl);
double accuracy = Evaluator.evaluate(
dataCache,
split.getTestFilter(),
mdl,
normalizationPreprocessor,
MetricName.ACCURACY
);
System.out.println("\n>>> Accuracy " + accuracy);
System.out.println("\n>>> Test Error " + (1 - accuracy));
System.out.println(">>> Tutorial step 11 (Boosting) example completed.");
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
}
finally {
System.out.flush();
}
}
}
{code}
is ruined with
{code:java}
Exception in thread "main" java.lang.IllegalArgumentExceptionException in thread "main" java.lang.IllegalArgumentException at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:105) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:328) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:123) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:118) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.ml.util.ModelTrace.lambda$fieldToString$1(ModelTrace.java:122) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.fieldToString(ModelTrace.java:123) at org.apache.ignite.ml.util.ModelTrace.lambda$toString$0(ModelTrace.java:97) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.toString(ModelTrace.java:98) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:101) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:93) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.examples.ml.tutorial.Step_11_Boosting.main(Step_11_Boosting.java:117)
{code}
> [ML] The method printTree is corrupted on the example
> -----------------------------------------------------
>
> Key: IGNITE-12269
> URL: https://issues.apache.org/jira/browse/IGNITE-12269
> Project: Ignite
> Issue Type: Bug
> Components: ml
> Affects Versions: 2.8
> Reporter: Alexey Zinoviev
> Assignee: Alexey Zinoviev
> Priority: Major
> Labels: await
> Fix For: 2.8
>
>
> This example
> {code:java}
> package org.apache.ignite.examples.ml.tutorial;
> import java.io.FileNotFoundException;
> import org.apache.ignite.Ignite;
> import org.apache.ignite.IgniteCache;
> import org.apache.ignite.Ignition;
> import org.apache.ignite.ml.composition.ModelsComposition;
> import org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory;
> import org.apache.ignite.ml.composition.boosting.convergence.median.MedianOfMedianConvergenceCheckerFactory;
> import org.apache.ignite.ml.dataset.feature.extractor.Vectorizer;
> import org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer;
> import org.apache.ignite.ml.math.primitives.vector.Vector;
> import org.apache.ignite.ml.preprocessing.Preprocessor;
> import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer;
> import org.apache.ignite.ml.preprocessing.encoding.EncoderType;
> import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
> import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
> import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer;
> import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator;
> import org.apache.ignite.ml.selection.scoring.metric.MetricName;
> import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter;
> import org.apache.ignite.ml.selection.split.TrainTestSplit;
> import org.apache.ignite.ml.trainers.DatasetTrainer;
> import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;
> /**
> * {@link MinMaxScalerTrainer} and {@link NormalizationTrainer} are used in this example due to different values
> * distribution in columns and rows.
> * <p>
> * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p>
> * <p>
> * After that it defines preprocessors that extract features from an upstream data and perform other desired changes
> * over the extracted data, including the scaling.</p>
> * <p>
> * Then, it trains the model based on the processed data using decision tree classification.</p>
> * <p>
> * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p>
> */
> public class Step_11_Boosting {
> /**
> * Run example.
> */
> public static void main(String[] args) {
> System.out.println();
> System.out.println(">>> Tutorial step 11 (Boosting) example started.");
> try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
> try {
> IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite);
> // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare".
> final Vectorizer<Integer, Vector, Integer, Double> vectorizer
> = new DummyVectorizer<Integer>(0, 3, 4, 5, 6, 8, 10).labeled(1);
> TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>()
> .split(0.75);
> Preprocessor<Integer, Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Vector>()
> .withEncoderType(EncoderType.STRING_ENCODER)
> .withEncodedFeature(1)
> .withEncodedFeature(6) // <--- Changed index here.
> .fit(ignite,
> dataCache,
> vectorizer
> );
> Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>()
> .fit(ignite,
> dataCache,
> strEncoderPreprocessor
> );
> Preprocessor<Integer, Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Vector>()
> .fit(
> ignite,
> dataCache,
> imputingPreprocessor
> );
> Preprocessor<Integer, Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Vector>()
> .withP(1)
> .fit(
> ignite,
> dataCache,
> minMaxScalerPreprocessor
> );
> // Create classification trainer.
> DatasetTrainer<ModelsComposition, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.01, 1000, 10, 0.)
> .withCheckConvergenceStgyFactory(new MedianOfMedianConvergenceCheckerFactory(0.01));
> // Train decision tree model.
> ModelsComposition mdl = trainer.fit(
> ignite,
> dataCache,
> split.getTrainFilter(),
> normalizationPreprocessor
> );
> System.out.println("\n>>> Trained model: " + mdl);
> double accuracy = Evaluator.evaluate(
> dataCache,
> split.getTestFilter(),
> mdl,
> normalizationPreprocessor,
> MetricName.ACCURACY
> );
> System.out.println("\n>>> Accuracy " + accuracy);
> System.out.println("\n>>> Test Error " + (1 - accuracy));
> System.out.println(">>> Tutorial step 11 (Boosting) example completed.");
> }
> catch (FileNotFoundException e) {
> e.printStackTrace();
> }
> }
> finally {
> System.out.flush();
> }
> }
> }
> {code}
> is ruined with
> {code:java}
> Exception in thread "main" java.lang.IllegalArgumentExceptionException in thread "main" java.lang.IllegalArgumentException at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:105) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:328) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:123) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:118) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.ml.util.ModelTrace.lambda$fieldToString$1(ModelTrace.java:122) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.fieldToString(ModelTrace.java:123) at org.apache.ignite.ml.util.ModelTrace.lambda$toString$0(ModelTrace.java:97) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.toString(ModelTrace.java:98) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:101) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:93) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.examples.ml.tutorial.Step_11_Boosting.main(Step_11_Boosting.java:117)
> {code}
>
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)