You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by jo...@apache.org on 2009/11/13 22:21:37 UTC

svn commit: r835994 - in /poi/trunk/src: documentation/content/xdocs/ java/org/apache/poi/hssf/record/formula/functions/ java/org/apache/poi/ss/formula/ testcases/org/apache/poi/hssf/usermodel/

Author: josh
Date: Fri Nov 13 21:21:23 2009
New Revision: 835994

URL: http://svn.apache.org/viewvc?rev=835994&view=rev
Log:
Bugzilla 48195 - short-circuit evaluation of IF() and CHOOSE()

Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java
    poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/If.java
    poi/trunk/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java
    poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java
    poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=835994&r1=835993&r2=835994&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Fri Nov 13 21:21:23 2009
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.6-beta1" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="add">48195 - short-circuit evaluation of IF() and CHOOSE()</action>
            <action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
            <action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
            <action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>

Modified: poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java?rev=835994&r1=835993&r2=835994&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java (original)
+++ poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java Fri Nov 13 21:21:23 2009
@@ -23,7 +23,6 @@
 import org.apache.poi.hssf.record.formula.eval.ValueEval;
 
 /**
- *
  * @author Josh Micich
  */
 public final class Choose implements Function {
@@ -34,8 +33,7 @@
 		}
 
 		try {
-			ValueEval ev = OperandResolver.getSingleValue(args[0], srcRowIndex, srcColumnIndex);
-			int ix = OperandResolver.coerceValueToInt(ev);
+			int ix = evaluateFirstArg(args[0], srcRowIndex, srcColumnIndex);
 			if (ix < 1 || ix >= args.length) {
 				return ErrorEval.VALUE_INVALID;
 			}
@@ -44,4 +42,10 @@
 			return e.getErrorEval();
 		}
 	}
+
+	public static int evaluateFirstArg(ValueEval arg0, int srcRowIndex, int srcColumnIndex)
+			throws EvaluationException {
+		ValueEval ev = OperandResolver.getSingleValue(arg0, srcRowIndex, srcColumnIndex);
+		return OperandResolver.coerceValueToInt(ev);
+	}
 }

Modified: poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/If.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/If.java?rev=835994&r1=835993&r2=835994&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/If.java (original)
+++ poi/trunk/src/java/org/apache/poi/hssf/record/formula/functions/If.java Fri Nov 13 21:21:23 2009
@@ -53,7 +53,7 @@
 		return falseResult;
 	}
 
-	private static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, short srcCellCol)
+	public static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, int srcCellCol)
 			throws EvaluationException {
 		ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol);
 		Boolean b = OperandResolver.coerceValueToBoolean(ve, false);

Modified: poi/trunk/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java?rev=835994&r1=835993&r2=835994&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java (original)
+++ poi/trunk/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java Fri Nov 13 21:21:23 2009
@@ -52,6 +52,7 @@
 import org.apache.poi.hssf.record.formula.eval.BlankEval;
 import org.apache.poi.hssf.record.formula.eval.BoolEval;
 import org.apache.poi.hssf.record.formula.eval.ErrorEval;
+import org.apache.poi.hssf.record.formula.eval.EvaluationException;
 import org.apache.poi.hssf.record.formula.eval.MissingArgEval;
 import org.apache.poi.hssf.record.formula.eval.NameEval;
 import org.apache.poi.hssf.record.formula.eval.NameXEval;
@@ -60,7 +61,9 @@
 import org.apache.poi.hssf.record.formula.eval.RefEval;
 import org.apache.poi.hssf.record.formula.eval.StringEval;
 import org.apache.poi.hssf.record.formula.eval.ValueEval;
+import org.apache.poi.hssf.record.formula.functions.Choose;
 import org.apache.poi.hssf.record.formula.functions.FreeRefFunction;
+import org.apache.poi.hssf.record.formula.functions.If;
 import org.apache.poi.hssf.record.formula.udf.UDFFinder;
 import org.apache.poi.hssf.util.CellReference;
 import org.apache.poi.ss.formula.CollaboratingWorkbooksEnvironment.WorkbookNotFoundException;
@@ -228,7 +231,7 @@
 	private ValueEval evaluateAny(EvaluationCell srcCell, int sheetIndex,
 				int rowIndex, int columnIndex, EvaluationTracker tracker) {
 
-		// avoid tracking dependencies for cells that have constant definition
+		// avoid tracking dependencies to cells that have constant definition
 		boolean shouldCellDependencyBeRecorded = _stabilityClassifier == null ? true
 					: !_stabilityClassifier.isCellFinal(sheetIndex, rowIndex, columnIndex);
 		if (srcCell == null || srcCell.getCellType() != Cell.CELL_TYPE_FORMULA) {
@@ -344,6 +347,66 @@
 					byte nArgs = 1;  // tAttrSum always has 1 parameter
 					ptg = new FuncVarPtg("SUM", nArgs);
 				}
+				if (attrPtg.isOptimizedChoose()) {
+					ValueEval arg0 = stack.pop();
+					int[] jumpTable = attrPtg.getJumpTable();
+					int dist;
+					int nChoices = jumpTable.length;
+					try {
+						int switchIndex = Choose.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
+						if (switchIndex<1 || switchIndex > nChoices) {
+							stack.push(ErrorEval.VALUE_INVALID);
+							dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
+						} else {
+							dist = jumpTable[switchIndex-1];
+						}
+					} catch (EvaluationException e) {
+						stack.push(e.getErrorEval());
+						dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
+					}
+					// Encoded dist for tAttrChoose includes size of jump table, but
+					// countTokensToBeSkipped() does not (it counts whole tokens).
+					dist -= nChoices*2+2; // subtract jump table size
+					i+= countTokensToBeSkipped(ptgs, i, dist);
+					continue;
+				}
+				if (attrPtg.isOptimizedIf()) {
+					ValueEval arg0 = stack.pop();
+					boolean evaluatedPredicate;
+					try {
+						evaluatedPredicate = If.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
+					} catch (EvaluationException e) {
+						stack.push(e.getErrorEval());
+						int dist = attrPtg.getData();
+						i+= countTokensToBeSkipped(ptgs, i, dist);
+						attrPtg = (AttrPtg) ptgs[i];
+						dist = attrPtg.getData()+1;
+						i+= countTokensToBeSkipped(ptgs, i, dist);
+						continue;
+					}
+					if (evaluatedPredicate) {
+						// nothing to skip - true param folows
+					} else {
+						int dist = attrPtg.getData();
+						i+= countTokensToBeSkipped(ptgs, i, dist);
+						Ptg nextPtg = ptgs[i+1];
+						if (ptgs[i] instanceof AttrPtg && nextPtg instanceof FuncVarPtg) {
+							// this is an if statement without a false param (as opposed to MissingArgPtg as the false param)
+							i++;
+							stack.push(BoolEval.FALSE);
+						}
+					}
+					continue;
+				}
+				if (attrPtg.isSkip()) {
+					int dist = attrPtg.getData()+1;
+					i+= countTokensToBeSkipped(ptgs, i, dist);
+					if (stack.peek() == MissingArgEval.instance) {
+						stack.pop();
+						stack.push(BlankEval.INSTANCE);
+					}
+					continue;
+				}
 			}
 			if (ptg instanceof ControlPtg) {
 				// skip Parentheses, Attr, etc
@@ -403,6 +466,27 @@
 	}
 
 	/**
+	 * Calculates the number of tokens that the evaluator should skip upon reaching a tAttrSkip.
+	 *
+	 * @return the number of tokens (starting from <tt>startIndex+1</tt>) that need to be skipped
+	 * to achieve the specified <tt>distInBytes</tt> skip distance.
+	 */
+	private static int countTokensToBeSkipped(Ptg[] ptgs, int startIndex, int distInBytes) {
+		int remBytes = distInBytes;
+		int index = startIndex;
+		while (remBytes != 0) {
+			index++;
+			remBytes -= ptgs[index].getSize();
+			if (remBytes < 0) {
+				throw new RuntimeException("Bad skip distance (wrong token size calculation).");
+			}
+			if (index >= ptgs.length) {
+				throw new RuntimeException("Skip distance too far (ran out of formula tokens).");
+			}
+		}
+		return index-startIndex;
+	}
+	/**
 	 * Dereferences a single value from any AreaEval or RefEval evaluation result.
 	 * If the supplied evaluationResult is just a plain value, it is returned as-is.
 	 * @return a <tt>NumberEval</tt>, <tt>StringEval</tt>, <tt>BoolEval</tt>,

Modified: poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java?rev=835994&r1=835993&r2=835994&view=diff
==============================================================================
--- poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java (original)
+++ poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java Fri Nov 13 21:21:23 2009
@@ -31,6 +31,7 @@
 import org.apache.poi.hssf.record.formula.AreaPtg;
 import org.apache.poi.hssf.record.formula.FuncVarPtg;
 import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.eval.ErrorEval;
 import org.apache.poi.hssf.record.formula.eval.ValueEval;
 import org.apache.poi.ss.formula.EvaluationCell;
 import org.apache.poi.ss.formula.EvaluationListener;
@@ -319,6 +320,10 @@
 	 * The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
 	 */
 	public void testSlowEvaluate45376() {
+		/*
+		 * Note - to observe behaviour without caching, disable the call to
+		 * updateValue() from FormulaCellCacheEntry.updateFormulaResult().
+		 */
 
 		// Firstly set up a sequence of formula cells where each depends on the  previous multiple
 		// times.  Without caching, each subsequent cell take about 4 times longer to evaluate.
@@ -330,30 +335,39 @@
 			char prevCol = (char) ('A' + i-1);
 			String prevCell = prevCol + "1";
 			// this formula is inspired by the offending formula of the attachment for bug 45376
+			// IF(DATE(YEAR(A1),MONTH(A1)+1,1)<=$D$3,DATE(YEAR(A1),MONTH(A1)+1,1),NA()) etc
 			String formula = "IF(DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1)<=$D$3," +
 					"DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1),NA())";
 			cell.setCellFormula(formula);
-
 		}
 		Calendar cal = new GregorianCalendar(2000, 0, 1, 0, 0, 0);
 		row.createCell(0).setCellValue(cal);
 
-		// Choose cell A9, so that the failing test case doesn't take too long to execute.
+		// Choose cell A9 instead of A10, so that the failing test case doesn't take too long to execute.
 		HSSFCell cell = row.getCell(8);
 		EvalListener evalListener = new EvalListener();
 		WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
-		evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
+		ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
 		int evalCount = evalListener.getCountCacheMisses();
 		if (evalCount > 10) {
 			// Without caching, evaluating cell 'A9' takes 21845 evaluations which consumes
 			// much time (~3 sec on Core 2 Duo 2.2GHz)
+			// short-circuit-if optimisation cuts this down to 255 evaluations which is still too high
 			System.err.println("Cell A9 took " + evalCount + " intermediate evaluations");
 			throw new AssertionFailedError("Identifed bug 45376 - Formula evaluator should cache values");
 		}
-		// With caching, the evaluationCount is 8 which is a big improvement
-		// Note - these expected values may change if the WorkbookEvaluator is
-		// ever optimised to short circuit 'if' functions.
+		// With caching, the evaluationCount is 8 which is exactly the
+		// number of formula cells that needed to be evaluated.
 		assertEquals(8, evalCount);
-		assertEquals(24, evalListener.getCountCacheHits());
+
+		// The cache hits would be 24 if fully evaluating all arguments of the
+		// "IF()" functions (Each of the 8 formulas has 4 refs to formula cells
+		// which result in 1 cache miss and 3 cache hits). However with the
+		// short-circuit-if optimisation, 2 of the cell refs get skipped
+		// reducing this metric 8.
+		assertEquals(8, evalListener.getCountCacheHits());
+
+		// confirm the evaluation result too
+		assertEquals(ErrorEval.NA, ve);
 	}
 }

Modified: poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java?rev=835994&r1=835993&r2=835994&view=diff
==============================================================================
--- poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java (original)
+++ poi/trunk/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java Fri Nov 13 21:21:23 2009
@@ -22,6 +22,13 @@
 
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.hssf.record.NameRecord;
+import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.eval.NumberEval;
+import org.apache.poi.hssf.record.formula.eval.ValueEval;
+import org.apache.poi.ss.formula.EvaluationCell;
+import org.apache.poi.ss.formula.EvaluationListener;
+import org.apache.poi.ss.formula.WorkbookEvaluator;
+import org.apache.poi.ss.formula.WorkbookEvaluatorTestHelper;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.CellValue;
 /**
@@ -167,4 +174,46 @@
 		assertEquals(Cell.CELL_TYPE_NUMERIC, value.getCellType());
 		assertEquals(5.33, value.getNumberValue(), 0.0);
 	}
+	private static final class EvalCountListener extends EvaluationListener {
+		private int _evalCount;
+		public EvalCountListener() {
+			_evalCount = 0;
+		}
+		public void onStartEvaluate(EvaluationCell cell, ICacheEntry entry, Ptg[] ptgs) {
+			_evalCount++;
+		}
+		public int getEvalCount() {
+			return _evalCount;
+		}
+	}
+
+	/**
+	 * The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
+	 */
+	public void testShortCircuitIfEvaluation() {
+
+		// Set up a simple IF() formula that has measurable evaluation cost for its operands.
+		HSSFWorkbook wb = new HSSFWorkbook();
+		HSSFSheet sheet = wb.createSheet("Sheet1");
+		HSSFRow row = sheet.createRow(0);
+		HSSFCell cellA1 = row.createCell(0);
+		cellA1.setCellFormula("if(B1,C1,D1+E1+F1)");
+		// populate cells B1..F1 with simple formulas instead of plain values so we can use
+		// EvaluationListener to check which parts of the first formula get evaluated
+		for (int i=1; i<6; i++) {
+			// formulas are just literal constants "1".."5"
+			row.createCell(i).setCellFormula(String.valueOf(i));
+		}
+
+		EvalCountListener evalListener = new EvalCountListener();
+		WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
+		ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cellA1));
+		int evalCount = evalListener.getEvalCount();
+		if (evalCount == 6) {
+			// Without short-circuit-if evaluation, evaluating cell 'A1' takes 3 extra evaluations (for D1,E1,F1)
+			throw new AssertionFailedError("Identifed bug 48195 - Formula evaluator should short-circuit IF() calculations.");
+		}
+		assertEquals(3, evalCount);
+		assertEquals(2.0, ((NumberEval)ve).getNumberValue(), 0D);
+	}
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org