You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/09/15 18:16:49 UTC
svn commit: r1385103 - in /nutch/branches/2.x: ./ conf/ src/plugin/ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/ src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/ src/plugin/parse-js/ src/plu...

Author: lewismc
Date: Sat Sep 15 16:16:48 2012
New Revision: 1385103

URL: http://svn.apache.org/viewvc?rev=1385103&view=rev
Log:
NUTCH-1162 Write JUnit tests for parse-js

Added:
    nutch/branches/2.x/src/plugin/parse-js/sample/
    nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package.html
    nutch/branches/2.x/src/plugin/parse-js/src/test/
    nutch/branches/2.x/src/plugin/parse-js/src/test/org/
    nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/
    nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/
    nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/parse/
    nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/
    nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java
Removed:
    nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/package.html
Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/conf/nutch-default.xml
    nutch/branches/2.x/src/plugin/build.xml
    nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
    nutch/branches/2.x/src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java
    nutch/branches/2.x/src/plugin/parse-js/build.xml
    nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
    nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Sep 15 16:16:48 2012
@@ -2,6 +2,12 @@ Nutch Change Log
 
 Release 2.1 - Current Development
 
+* NUTCH-1162 Write JUnit tests for parse-js (lewismc)
+
+* NUTCH-1161 Write JUnit tests for microformats-reltag plugin (lewismc)
+
+* NUTCH-1160 Write JUnit tests for index-basic (lewismc)
+
 * NUTCH-1456 Updater not setting batchId in markers correctly. (Alexander Kingson via ferdy)
 
 * NUTCH-1459 Remove dead code (phase2) from InjectorJob (ferdy)

Modified: nutch/branches/2.x/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Sat Sep 15 16:16:48 2012
@@ -749,6 +749,8 @@
   effect.</description>
 </property>
 
+<!-- BasicIndexingfilter plugin properties -->
+
 <property>
   <name>indexer.max.title.length</name>
   <value>100</value>

Modified: nutch/branches/2.x/src/plugin/build.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/build.xml?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/build.xml (original)
+++ nutch/branches/2.x/src/plugin/build.xml Sat Sep 15 16:16:48 2012
@@ -74,6 +74,7 @@
      <ant dir="parse-tika" target="test"/>
      <ant dir="protocol-file" target="test"/>
      <ant dir="parse-html" target="test"/>
+  	 <ant dir="parse-js" target="test"/>
      <ant dir="index-anchor" target="test"/>
      <ant dir="index-basic" target="test"/>
      <ant dir="index-more" target="test"/>

Modified: nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java (original)
+++ nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java Sat Sep 15 16:16:48 2012
@@ -135,7 +135,13 @@ public class RelTagParser implements Par
 	FIELDS.add(WebPage.Field.BASE_URL);
 	FIELDS.add(WebPage.Field.METADATA);
   }
-	
+  
+  /**
+   * Gets all the fields for a given {@link WebPage}
+   * Many datastores need to setup the mapreduce job by specifying the fields
+   * needed. All extensions that work on WebPage are able to specify what fields
+   * they need.
+   */
   @Override
   public Collection<Field> getFields() {
 	return FIELDS;

Modified: nutch/branches/2.x/src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java (original)
+++ nutch/branches/2.x/src/plugin/microformats-reltag/src/test/org/apache/nutch/microformats/reltag/TestRelTagParser.java Sat Sep 15 16:16:48 2012
@@ -27,17 +27,20 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.ParseUtil;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.junit.Test;
-
 import junit.framework.TestCase;
 
 /**
- * Junit test for {@link RelTagParser} based on John Xing's parser tests.
+ * Junit test for {@link RelTagParser} based mainly John Xing's parser tests.
+ * We are not concerned with actual parse text within the sample file, instead
+ * we assert that the rel-tags we expect are found in the WebPage metadata.
+ * To check the parser is working as expected we unwrap the ByteBuffer obtained 
+ * from metadata, the same type as  * we use in expected (String). So just the 
+ * other way around as we wrapped the metadata value.
  * 
  * @author lewismc
  *
@@ -51,10 +54,10 @@ public class TestRelTagParser extends Te
 
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/microformats-reltag/build.xml during plugin compilation.
-
-  private String[] sampleFile = { "microformats_reltag_test.html" };
+  private String sampleFile = "microformats_reltag_test.html";
   
-  private String expectedText = "rel=\"tag\" Â· Microformats Wiki";
+  // rel-tag's we expect to be extracted from page.getMetadata()
+  private String expectedRelTags = "Category:Specifications	Category:rel-tag	";
   
   private Configuration conf;
   
@@ -62,42 +65,35 @@ public class TestRelTagParser extends Te
     super(name);
   }
   
-  protected void setUp() {
-    conf = NutchConfiguration.create();
+  @Test
+  public void testRelTagParser() throws ProtocolException, ParseException, IOException {
+	conf = NutchConfiguration.create();
 	conf.set("file.content.limit", "-1");
-  }
-
-  protected void tearDown() {
-  }
-  
-  public String getTextContent(String fileName) throws ProtocolException, ParseException, IOException {
 	Parse parse;
-	String urlString = sampleDir + fileSeparator + fileName;
+	String urlString = "file:" + sampleDir + fileSeparator + sampleFile;
 
-	File file = new File(urlString);
+	File file = new File(sampleDir + fileSeparator + sampleFile);
 	byte[] bytes = new byte[(int) file.length()];
 	DataInputStream in = new DataInputStream(new FileInputStream(file));
 	in.readFully(bytes);
 	in.close();
 
 	WebPage page = new WebPage();
-	page.setBaseUrl(new Utf8("file:"+urlString));
+	page.setBaseUrl(new Utf8(urlString));
 	page.setContent(ByteBuffer.wrap(bytes));
 	MimeUtil mimeutil = new MimeUtil(conf);
 	String mtype = mimeutil.getMimeType(file);
 	page.setContentType(new Utf8(mtype));
-	parse = new ParseUtil(conf).parse("file:"+urlString, page);
-
-	return parse.getText();
+	parse = new ParseUtil(conf).parse(urlString, page);
+    
+	//begin assertion for tests
+	ByteBuffer bbuf = page.getFromMetadata(new Utf8("Rel-Tag"));
+	byte[] byteArray = new byte[bbuf.remaining()];
+	bbuf.get(byteArray);
+	String s = new String(byteArray);
+	//bbuf.flip();
+	assertEquals("We expect 2 tab-separated rel-tag's extracted by the filter", 
+	  expectedRelTags, s);
   }
   
-  @Test
-  public void testRelTagParser() throws ProtocolException, ParseException, IOException {
-
-	for (int i = 0; i < sampleFile.length; i++) {
-	  String found = getTextContent(sampleFile[i]);
-	  assertTrue("text found : '" + found + "'", found.startsWith(expectedText));
-	}
-  }
-	
 }
\ No newline at end of file

Modified: nutch/branches/2.x/src/plugin/parse-js/build.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-js/build.xml?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-js/build.xml (original)
+++ nutch/branches/2.x/src/plugin/parse-js/build.xml Sat Sep 15 16:16:48 2012
@@ -19,4 +19,18 @@
 
   <import file="../build-plugin.xml"/>
 
+  <!-- Deploy Unit test dependencies -->
+  <target name="deps-test">
+    <ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/>
+    <ant target="deploy" inheritall="false" dir="../protocol-file"/>
+  </target>
+
+  <!-- for junit test -->
+  <mkdir dir="${build.test}/data"/>
+  <copy todir="${build.test}/data">
+    <fileset dir="sample">
+      <include name="*.html"/>
+      <include name="*.js"/>
+    </fileset>
+  </copy>
 </project>

Modified: nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java (original)
+++ nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java Sat Sep 15 16:16:48 2012
@@ -69,6 +69,15 @@ public class JSParseFilter implements Pa
 
   private Configuration conf;
 
+  /**
+   * Scan the JavaScript looking for possible {@link Outlink}'s
+   * @param url URL of the {@link WebPage} to be parsed 
+   * @param page {@link WebPage} object relative to the URL
+   * @param parse {@link Parse} object holding parse status
+   * @param metatags within the {@link NutchDocument}
+   * @param doc The {@link NutchDocument} object
+   * @return parse the actual {@link Parse} object
+   */
   @Override
   public Parse filter(String url, WebPage page, Parse parse,
       HTMLMetaTags metaTags, DocumentFragment doc) {
@@ -104,9 +113,10 @@ public class JSParseFilter implements Pa
             if (i > 0) script.append('\n');
             script.append(nn.item(i).getNodeValue());
           }
-          // if (LOG.isInfoEnabled()) {
-          //   LOG.info("script: language=" + lang + ", text: " + script.toString());
-          // }
+          // This logging makes the output very messy.
+          //if (LOG.isInfoEnabled()) {
+          //  LOG.info("script: language=" + lang + ", text: " + script.toString());
+          //}
           Outlink[] links = getJSLinks(script.toString(), "", base);
           if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
           // no other children of interest here, go one level up.
@@ -141,6 +151,12 @@ public class JSParseFilter implements Pa
     }
   }
 
+  /**
+   * Set the {@link Configuration} object
+   * @param url URL of the {@link WebPage} which is parsed
+   * @param page {@link WebPage} object relative to the URL
+   * @return parse the actual {@link Parse} object
+   */
   @Override
   public Parse getParse(String url, WebPage page) {
     String type = TableUtil.toString(page.getContentType());
@@ -182,7 +198,9 @@ public class JSParseFilter implements Pa
     try {
       baseURL = new URL(base);
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", e); }
+      if (LOG.isErrorEnabled()) { 
+        LOG.error("error assigning base URL", e); 
+      }
     }
 
     try {
@@ -207,7 +225,9 @@ public class JSParseFilter implements Pa
         url = result.group(2);
         PatternMatcherInput input1 = new PatternMatcherInput(url);
         if (!matcher1.matches(input1, pattern1)) {
-          //if (LOG.isTraceEnabled()) { LOG.trace(" - invalid '" + url + "'"); }
+          if (LOG.isTraceEnabled()) { 
+        	LOG.trace(" - invalid '" + url + "'"); 
+          }
           continue;
         }
         if (url.startsWith("www.")) {
@@ -234,7 +254,9 @@ public class JSParseFilter implements Pa
     } catch (Exception ex) {
       // if it is a malformed URL we just throw it away and continue with
       // extraction.
-      if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", ex); }
+      if (LOG.isErrorEnabled()) { 
+        LOG.error(" - invalid or malformed URL", ex); 
+      }
     }
 
     final Outlink[] retval;
@@ -249,6 +271,12 @@ public class JSParseFilter implements Pa
     return retval;
   }
 
+  /**
+   * Main method which can be run from command line with the plugin option.
+   * The method takes two arguments e.g. o.a.n.parse.js.JSParseFilter file.js baseURL  
+   * @param args
+   * @throws Exception
+   */
   public static void main(String[] args) throws Exception {
     if (args.length < 2) {
       System.err.println(JSParseFilter.class.getName() + " file.js baseURL");
@@ -267,14 +295,26 @@ public class JSParseFilter implements Pa
       System.out.println(" - " + links[i]);
   }
 
+  /**
+   * Set the {@link Configuration} object
+   */
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
 
+  /**
+   * Get the {@link Configuration} object
+   */
   public Configuration getConf() {
     return this.conf;
   }
 
+  /**
+   * Gets all the fields for a given {@link WebPage}
+   * Many datastores need to setup the mapreduce job by specifying the fields
+   * needed. All extensions that work on WebPage are able to specify what fields
+   * they need.
+   */
   @Override
   public Collection<WebPage.Field> getFields() {
     return null;

Added: nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package.html?rev=1385103&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package.html (added)
+++ nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package.html Sat Sep 15 16:16:48 2012
@@ -0,0 +1,6 @@
+<html>
+<body>
+<p>A parser plugin and content filter to extract all (possible) links
+from JavaScript files and code snippets.</p>
+</body>
+</html>

Added: nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java?rev=1385103&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java (added)
+++ nutch/branches/2.x/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java Sat Sep 15 16:16:48 2012
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.parse.js;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.avro.util.Utf8;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.parse.Outlink;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.parse.ParseUtil;
+import org.apache.nutch.protocol.ProtocolException;
+import org.apache.nutch.storage.WebPage;
+import org.apache.nutch.util.MimeUtil;
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+/**
+ * JUnit test case for {@link JSParseFilter} which tests 
+ * 1. That 5 outlinks are extracted from JavaScript snippets embedded in HTML
+ * 2. That X outlinks are extracted from a pure JavaScript file (this is temporarily disabled)
+ * 
+ * @author lewismc
+ */
+
+public class TestJSParseFilter extends TestCase {
+
+  private String fileSeparator = System.getProperty("file.separator");
+
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-js/build.xml during plugin compilation.
+  private String[] sampleFiles = { "parse_pure_js_test.js", "parse_embedded_js_test.html" };
+	  
+  private Configuration conf;
+	  
+  public TestJSParseFilter(String name) {
+	super(name);
+  }
+	  
+  protected void setUp() {
+	conf = NutchConfiguration.create();
+	conf.set("file.content.limit", "-1");
+  }
+
+  protected void tearDown() {
+  }	
+  
+  public Outlink[] getOutlinks(String[] sampleFiles) throws ProtocolException, ParseException, IOException {
+	String urlString;
+	Parse parse;
+	
+	urlString = "file:" + sampleDir + fileSeparator + sampleFiles;
+	File file = new File(urlString);
+	byte[] bytes = new byte[(int) file.length()];
+	DataInputStream dip = new DataInputStream(new FileInputStream(file));
+	dip.readFully(bytes);
+	dip.close();
+    
+	WebPage page = new WebPage();
+	page.setBaseUrl(new Utf8(urlString));
+	page.setContent(ByteBuffer.wrap(bytes));
+	MimeUtil mutil = new MimeUtil(conf);
+	String mime = mutil.getMimeType(file);
+	page.setContentType(new Utf8(mime));
+	
+	parse = new ParseUtil(conf).parse(urlString, page);
+	return parse.getOutlinks();
+  }
+  
+  @Test
+  public void testOutlinkExtraction() throws ProtocolException, ParseException, IOException {
+	String[] filenames = new File(sampleDir).list();
+	for (int i = 0; i < filenames.length; i++) {
+	  if (filenames[i].endsWith(".js") == true) {
+		assertEquals("number of outlinks in .js test file should be 5", 5, getOutlinks(sampleFiles));
+		// temporarily disabled as a suitable pure JS file could not be be found.
+	    //} else {
+		//assertEquals("number of outlinks in .html file should be X", 5, getOutlinks(sampleFiles));
+	  }
+    }
+  }
+
+}
\ No newline at end of file

Modified: nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java?rev=1385103&r1=1385102&r2=1385103&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java (original)
+++ nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java Sat Sep 15 16:16:48 2012
@@ -16,23 +16,6 @@
  ******************************************************************************/
 package org.apache.nutch.parse.tika;
 
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 // JUnit imports
 import java.io.DataInputStream;
 import java.io.File;
@@ -79,7 +62,6 @@ public class TestRTFParser extends TestC
     }
 
     public void testIt() throws ProtocolException, ParseException, IOException {
-        /* Temporarily disabled - see Tika-748
 
 	String urlString;
 	Parse parse;
@@ -97,22 +79,23 @@ public class TestRTFParser extends TestC
 	WebPage page = new WebPage();
 	page.setBaseUrl(new Utf8(urlString));
 	page.setContent(ByteBuffer.wrap(bytes));
-	MimeType mtype = mimeutil.getMimeType(file);
-	page.setContentType(new Utf8(mtype.getName()));
+	String mtype = mimeutil.getMimeType(file);
+	page.setContentType(new Utf8(mtype));
 
 	parse = new ParseUtil(conf).parse(urlString, page);
 
+	String title = parse.getTitle();
 	String text = parse.getText();
-	assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
+	assertEquals("test rft document", title);
+	//assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
 
-	String title = parse.getTitle();
+	
 	// HOW DO WE GET THE PARSE METADATA?
 	// Metadata meta = parse();
 
 	// METADATA extraction is not yet supported in Tika
-	// assertEquals("test rft document", title);
+	// 
 	// assertEquals("tests", meta.get(DublinCore.SUBJECT));
-        */
     }
 
 }