You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2014/10/12 17:39:32 UTC

svn commit: r1631191 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/gdal/GDALParser.java test/java/org/apache/tika/parser/gdal/TestGDALParser.java

Author: mattmann
Date: Sun Oct 12 15:39:31 2014
New Revision: 1631191

URL: http://svn.apache.org/r1631191
Log:
- TIKA-605: deal with heading boundaries; add associated unit tests to expose and prove fixed for regression

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java?rev=1631191&r1=1631190&r2=1631191&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java Sun Oct 12 15:39:31 2014
@@ -297,10 +297,11 @@ public class GDALParser extends Abstract
 	private void extractMetFromOutput(String output, Metadata met) {
 		Scanner scanner = new Scanner(output);
 		String currentKey = null;
+		String[] headings = {"Subdatasets", "Corner Coordinates"};
 		StringBuilder metVal = new StringBuilder();
 		while (scanner.hasNextLine()) {
 			String line = scanner.nextLine();
-			if (line.contains("=")) {
+			if (line.contains("=") || hasHeadings(line, headings)) {
 				if (currentKey != null) {
 					// time to flush this key and met val
 					met.add(currentKey, metVal.toString());
@@ -320,6 +321,18 @@ public class GDALParser extends Abstract
 
 		}
 	}
+	
+	private boolean hasHeadings(String line, String[] headings){
+		if (headings != null && headings.length > 0){
+			for(String heading: headings){
+				if(line.contains(heading)){
+					return true;
+				}
+			}
+			return false;
+		}
+		else return false;
+	}
 
 	private void applyPatternsToOutput(String output, Metadata metadata,
 			Map<Pattern, String> metadataPatterns) {

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java?rev=1631191&r1=1631190&r2=1631191&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java Sun Oct 12 15:39:31 2014
@@ -30,6 +30,7 @@ import org.apache.tika.sax.BodyContentHa
 //Junit imports
 import org.junit.Test;
 import static org.junit.Assert.fail;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assume.assumeTrue;
@@ -81,7 +82,6 @@ public class TestGDALParser extends Tika
 			assertEquals(expectedLowerRight, met.get("Lower Right"));
 			assertNotNull(met.get("Upper Right"));
 			assertEquals(expectedLowerLeft, met.get("Lower Left"));
-
 		} catch (Exception e) {
 			e.printStackTrace();
 			fail(e.getMessage());
@@ -91,11 +91,14 @@ public class TestGDALParser extends Tika
 	@Test
 	public void testParseMetadata() {
 		assumeTrue(canRun());
-		String expectedNcInst = "NCAR (National Center for Atmospheric Research, Boulder, CO, USA)";
-		String expectedModelNameEnglish = "NCAR CCSM";
-		String expectedProgramId = "Source file unknown Version unknown Date unknown";
-		String expectedProjectId = "IPCC Fourth Assessment";
-		String expectedRealization = "1";
+		final String expectedNcInst = "NCAR (National Center for Atmospheric Research, Boulder, CO, USA)";
+		final String expectedModelNameEnglish = "NCAR CCSM";
+		final String expectedProgramId = "Source file unknown Version unknown Date unknown";
+		final String expectedProjectId = "IPCC Fourth Assessment";
+		final String expectedRealization = "1";
+		final String expectedTitle = "model output prepared for IPCC AR4";
+		final String expectedSub8Name = "\":ua";
+		final String expectedSub8Desc = "[1x17x128x256] eastward_wind (32-bit floating-point)";
 
 		GDALParser parser = new GDALParser();
 		InputStream stream = TestGDALParser.class
@@ -118,6 +121,12 @@ public class TestGDALParser extends Tika
 			assertEquals(expectedProjectId, met.get("NC_GLOBAL#project_id"));
 			assertNotNull(met.get("NC_GLOBAL#realization"));
 			assertEquals(expectedRealization, met.get("NC_GLOBAL#realization"));
+			assertNotNull(met.get("NC_GLOBAL#title"));
+			assertEquals(expectedTitle, met.get("NC_GLOBAL#title"));
+			assertNotNull(met.get("SUBDATASET_8_NAME"));
+			assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
+			assertNotNull(met.get("SUBDATASET_8_DESC"));
+			assertEquals(expectedSub8Desc, met.get("SUBDATASET_8_DESC"));
 		} catch (Exception e) {
 			e.printStackTrace();
 			fail(e.getMessage());