You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2015/11/07 00:03:43 UTC

incubator-datafu git commit: DATAFU-102 SentenceDetect fails to load input bin file

Repository: incubator-datafu
Updated Branches:
  refs/heads/master a7687d34e -> 282a42f64


DATAFU-102 SentenceDetect fails to load input bin file

I've tested this on a Hadoop cluster and can confirm it works.  I've also tested the other OpenNLP wrappers to make sure they work as well in this environment.


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/282a42f6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/282a42f6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/282a42f6

Branch: refs/heads/master
Commit: 282a42f643aa5e2a1635e07d41bf615c8dbbdf6b
Parents: a7687d3
Author: Matthew Hayes <ma...@gmail.com>
Authored: Fri Nov 6 14:57:44 2015 -0800
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Fri Nov 6 15:03:26 2015 -0800

----------------------------------------------------------------------
 datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java   | 6 +++---
 .../src/main/java/datafu/pig/text/opennlp/SentenceDetect.java  | 6 +++---
 .../src/main/java/datafu/pig/text/opennlp/TokenizeME.java      | 2 +-
 .../main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java  | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
index fb17c63..e29d6d7 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
@@ -42,17 +42,17 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
  *
  * -- input:
  * -- (Appetizers during happy hour range from low to high.)
- * input = LOAD 'input' AS (text:chararray);
+ * infoo = LOAD 'input' AS (text:chararray);
  * --
  * -- ({(Appetizers),(during),(happy),(hour),(range),(from),(low),(to),(high),(.)})
- * tokenized = FOREACH input GENERATE TokenizeME(text) AS tokens;
+ * tokenized = FOREACH infoo GENERATE TokenizeME(text) AS tokens;
  * --
  * -- output:
  * -- Tuple schema is: (word, tag, confidence)
  * -- ({(Appetizers,NNP,0.3619277937390988),(during,IN,0.7945543860326094),(happy,JJ,0.9888504792754391),
  * -- (hour,NN,0.9427455123502427),(range,NN,0.7335527963654751),(from,IN,0.9911576465589752),(low,JJ,0.9652034031895174),
  * -- (to,IN,0.7005347487371849),(high,JJ,0.8227771746247106),(.,.,0.9900983495480891)})
- * output = FOREACH tokenized GENERATE POSTag(tokens) AS tagged;
+ * outfoo = FOREACH tokenized GENERATE POSTag(tokens) AS tagged;
  * }
  * </pre>
  */

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
index 50537fd..4bdcc32 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
@@ -35,7 +35,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
  * Example:
  * <pre>
  * {@code
- * define SentenceDetect datafu.pig.text.SentenceDetect('data/en-sent.bin');
+ * define SentenceDetect datafu.pig.text.opennlp.SentenceDetect('data/en-sent.bin');
  *
  * -- input:
  * -- ("I believe the Masons have infiltrated the Apache PMC. I believe laser beams control cat brains.")
@@ -43,7 +43,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
 
  * -- output:
  * -- ({(I believe the Masons have infiltrated the Apache PMC.)(I believe laser beams control cat brains.)})
- * outfoo = FOREACH input GENERATE SentenceDetect(text) as sentences;
+ * outfoo = FOREACH infoo GENERATE SentenceDetect(text) as sentences;
  * }
  * </pre>
  */
@@ -80,7 +80,7 @@ public class SentenceDetect extends EvalFunc<DataBag>
         DataBag outBag = bf.newDefaultBag();
         if(sdetector == null) {
             String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);
-            InputStream is = new FileInputStream(modelPath);
+            InputStream is = new FileInputStream(loadFile);
             InputStream buffer = new BufferedInputStream(is);
             SentenceModel model = new SentenceModel(buffer);
             this.sdetector = new SentenceDetectorME(model);

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
index f1f4257..d017f32 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
@@ -44,7 +44,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
 
  * -- output:
  * -- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
- * outfoo = FOREACH input GENERATE TokenizeME(text) as tokens;
+ * outfoo = FOREACH infoo GENERATE TokenizeME(text) as tokens;
  * }
  * </pre>
  */

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
index 8efafb0..a211277 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
@@ -43,7 +43,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
 
  * -- output:
  * -- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
- * outfoo = FOREACH input GENERATE TokenizeWhitespace(text) as tokens;
+ * outfoo = FOREACH infoo GENERATE TokenizeWhitespace(text) as tokens;
  * }
  * </pre>
  */