You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2015/11/08 03:18:15 UTC
[2/3] incubator-datafu git commit: DATAFU-102 SentenceDetect fails to
load input bin file
DATAFU-102 SentenceDetect fails to load input bin file
I've tested this on a Hadoop cluster and can confirm it works. I've also tested the other OpenNLP wrappers to make sure they work as well in this environment.
Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/282a42f6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/282a42f6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/282a42f6
Branch: refs/heads/1.3.0
Commit: 282a42f643aa5e2a1635e07d41bf615c8dbbdf6b
Parents: a7687d3
Author: Matthew Hayes <ma...@gmail.com>
Authored: Fri Nov 6 14:57:44 2015 -0800
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Fri Nov 6 15:03:26 2015 -0800
----------------------------------------------------------------------
datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java | 6 +++---
.../src/main/java/datafu/pig/text/opennlp/SentenceDetect.java | 6 +++---
.../src/main/java/datafu/pig/text/opennlp/TokenizeME.java | 2 +-
.../main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java | 2 +-
4 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
index fb17c63..e29d6d7 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java
@@ -42,17 +42,17 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
*
* -- input:
* -- (Appetizers during happy hour range from low to high.)
- * input = LOAD 'input' AS (text:chararray);
+ * infoo = LOAD 'input' AS (text:chararray);
* --
* -- ({(Appetizers),(during),(happy),(hour),(range),(from),(low),(to),(high),(.)})
- * tokenized = FOREACH input GENERATE TokenizeME(text) AS tokens;
+ * tokenized = FOREACH infoo GENERATE TokenizeME(text) AS tokens;
* --
* -- output:
* -- Tuple schema is: (word, tag, confidence)
* -- ({(Appetizers,NNP,0.3619277937390988),(during,IN,0.7945543860326094),(happy,JJ,0.9888504792754391),
* -- (hour,NN,0.9427455123502427),(range,NN,0.7335527963654751),(from,IN,0.9911576465589752),(low,JJ,0.9652034031895174),
* -- (to,IN,0.7005347487371849),(high,JJ,0.8227771746247106),(.,.,0.9900983495480891)})
- * output = FOREACH tokenized GENERATE POSTag(tokens) AS tagged;
+ * outfoo = FOREACH tokenized GENERATE POSTag(tokens) AS tagged;
* }
* </pre>
*/
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
index 50537fd..4bdcc32 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/SentenceDetect.java
@@ -35,7 +35,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
* Example:
* <pre>
* {@code
- * define SentenceDetect datafu.pig.text.SentenceDetect('data/en-sent.bin');
+ * define SentenceDetect datafu.pig.text.opennlp.SentenceDetect('data/en-sent.bin');
*
* -- input:
* -- ("I believe the Masons have infiltrated the Apache PMC. I believe laser beams control cat brains.")
@@ -43,7 +43,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
* -- output:
* -- ({(I believe the Masons have infiltrated the Apache PMC.)(I believe laser beams control cat brains.)})
- * outfoo = FOREACH input GENERATE SentenceDetect(text) as sentences;
+ * outfoo = FOREACH infoo GENERATE SentenceDetect(text) as sentences;
* }
* </pre>
*/
@@ -80,7 +80,7 @@ public class SentenceDetect extends EvalFunc<DataBag>
DataBag outBag = bf.newDefaultBag();
if(sdetector == null) {
String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);
- InputStream is = new FileInputStream(modelPath);
+ InputStream is = new FileInputStream(loadFile);
InputStream buffer = new BufferedInputStream(is);
SentenceModel model = new SentenceModel(buffer);
this.sdetector = new SentenceDetectorME(model);
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
index f1f4257..d017f32 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeME.java
@@ -44,7 +44,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
* -- output:
* -- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
- * outfoo = FOREACH input GENERATE TokenizeME(text) as tokens;
+ * outfoo = FOREACH infoo GENERATE TokenizeME(text) as tokens;
* }
* </pre>
*/
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/282a42f6/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
index 8efafb0..a211277 100644
--- a/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
+++ b/datafu-pig/src/main/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
@@ -43,7 +43,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
* -- output:
* -- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
- * outfoo = FOREACH input GENERATE TokenizeWhitespace(text) as tokens;
+ * outfoo = FOREACH infoo GENERATE TokenizeWhitespace(text) as tokens;
* }
* </pre>
*/