You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2013/07/19 18:35:10 UTC
svn commit: r1504935 -
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/Chi2FeatureSelection.java
Author: clin
Date: Fri Jul 19 16:35:10 2013
New Revision: 1504935
URL: http://svn.apache.org/r1504935
Log:
Make a Yate's parameter for Chi2 feature selection. So people may turn on or off the Yate's correction for Chi2 value calculation.
If Yate's correction is on (boolean Yates = true), small difference between observed value and expected value (<0.5) will be ignored. More features will be trimmed.
Else, small difference will be kept. Users have the freedom to keep all features.
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/Chi2FeatureSelection.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/Chi2FeatureSelection.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/Chi2FeatureSelection.java?rev=1504935&r1=1504934&r2=1504935&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/Chi2FeatureSelection.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/selection/Chi2FeatureSelection.java Fri Jul 19 16:35:10 2013
@@ -38,9 +38,12 @@ public class Chi2FeatureSelection<OUTCOM
protected Table<String, OUTCOME_T, Integer> featValueClassCount;
- public Chi2Scorer() {
+ private boolean yates = false;
+
+ public Chi2Scorer(boolean yate) {
this.classCounts = HashMultiset.<OUTCOME_T> create();
this.featValueClassCount = HashBasedTable.<String, OUTCOME_T, Integer> create();
+ this.yates = yate;
}
public void update(String featureName, OUTCOME_T outcome, int occurrences) {
@@ -88,13 +91,12 @@ public class Chi2FeatureSelection<OUTCOM
return chi2val;
}
- boolean yates = true;
for (int lbl = 0; lbl < numOfClass; lbl++) {
// for positive part of feature:
double expected = (outcomeCounts[lbl] / (double) n) * (posiFeatCount);
if (expected > 0) {
double diff = Math.abs(posiOutcomeCounts[lbl] - expected);
- if (yates) { // apply Yate's correction
+ if (this.yates ) { // apply Yate's correction
diff -= 0.5;
}
if (diff > 0)
@@ -106,7 +108,7 @@ public class Chi2FeatureSelection<OUTCOM
double observ = outcomeCounts[lbl] - posiOutcomeCounts[lbl];
if (expected > 0) {
double diff = Math.abs(observ - expected);
- if (yates) { // apply Yate's correction
+ if (this.yates) { // apply Yate's correction
diff -= 0.5;
}
if (diff > 0)
@@ -121,6 +123,8 @@ public class Chi2FeatureSelection<OUTCOM
private double chi2Threshold;
private Chi2Scorer<OUTCOME_T> chi2Function;
+
+ private boolean yates = false;
public Chi2FeatureSelection(String name) {
this(name, 0.0);
@@ -131,6 +135,17 @@ public class Chi2FeatureSelection<OUTCOM
this.chi2Threshold = threshold;
}
+ /**
+ * Constructor that can let use control the yate's correction
+ * @param name
+ * @param threshold
+ * @param yates : true for using yate's correction, false for turn off yate's correction
+ */
+ public Chi2FeatureSelection(String name, double threshold, boolean yates) {
+ super(name);
+ this.chi2Threshold = threshold;
+ this.yates = yates;
+ }
@Override
public boolean apply(Feature feature) {
return this.selectedFeatureNames.contains(this.getFeatureName(feature));
@@ -139,7 +154,7 @@ public class Chi2FeatureSelection<OUTCOM
@Override
public void train(Iterable<Instance<OUTCOME_T>> instances) {
// aggregate statistics for all features
- this.chi2Function = new Chi2Scorer<OUTCOME_T>();
+ this.chi2Function = new Chi2Scorer<OUTCOME_T>(this.yates);
for (Instance<OUTCOME_T> instance : instances) {
OUTCOME_T outcome = instance.getOutcome();
for (Feature feature : instance.getFeatures()) {