You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@mahout.apache.org by JAGANADH G <ja...@gmail.com> on 2012/10/11 12:03:50 UTC
Mahout 0.7 API Naive Bayes
Hi
I just created a sample use class of NaiveBayes . Can somebody say wheather
I am in the right track or not
Here is my code
public class NaiveBayesClassifierExample {
public static void loadClassifier(String strModelPath, Vector v)
throws IOException {
Configuration conf = new Configuration();
NaiveBayesModel model = NaiveBayesModel.materialize(new Path(
strModelPath), conf);
AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(
model);
Vector st = classifier.classifyFull(v);
System.out.println(st.toString());
}
public static Vector createVect() throws IOException {
FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
StringReader in = new StringReader(
"The movie sherk was very cool and attractive one. We like the movie"
+ "because of the theme and directon. All the actores were excellent");
TokenStream ts = analyzer.tokenStream("body", in);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
Vector v1 = new RandomAccessSparseVector(100000);
while (ts.incrementToken()) {
char[] termBuffer = termAtt.buffer();
int termLen = termAtt.length();
String w = new String(termBuffer, 0, termLen);
encoder.addToVector(w, 1.0, v1);
}
v1.normalize();
return v1;
}
public static void main(String[] args) throws IOException {
Vector v = createVect();
String mp =
"/home/u179995/Downloads/mahout-distribution-0.7/playg/movie_model";
loadClassifier(mp, v);
}
}
--
**********************************
JAGANADH G
http://jaganadhg.in
*ILUGCBE*
http://ilugcbe.org.in
Re: Mahout 0.7 API Naive Bayes
Posted by Thomas Quenolle <tq...@multiposting.fr>.
Hi I also struggled on this. Here is some code from my classifier, hope you
will find this helpful.
The key point is to use the dictionary you created on your process to train
your model. Which I modified to get the total number of docs.
private void loadTermDictionary(InputStream is) throws IOException {
123 /**
124 * Read and load a dictionary file.
125 * Retrieving:
126 * Number of documents used for tfidf computation.
127 * Retrieving for each word:
128 * the feature's index used in the vectors.
129 * it's document frequency (the number of docs it appears in).
130 * Inspired by VectorHelper.class
131 */
132
133 FileLineIterator it = new FileLineIterator(is);
134
135 int numEntries = Integer.parseInt(it.next());
136 dictMap = new HashMap<String, Integer>();
137 docFreqMap = new HashMap<Integer, Integer>();
138
139 while (it.hasNext()) {
140 String line = it.next();
141 if (line.startsWith("#")) {
142 if (line.startsWith("#numDocs")) {
143 this.numDocs =
Integer.parseInt(SPACE.split(line)[1]);
144 }
145 continue;
146 }
147 String[] tokens = TAB_PATTERN.split(line);
148 // tokens[0] is the word
149 // tokens[1] is the doc freq
150 // tokens[2] is the feature index
151 if (tokens.length < 3) {
152 continue;
153 }
154 int index = Integer.parseInt(tokens[2]);
155 int docfreq = Integer.parseInt(tokens[1]);
156 // Saving mapping word -> feature index
157 if (!dictMap.containsKey(tokens[0]))
158 dictMap.put(tokens[0], new Integer(index));
159 // Saving mapping feature index -> doc freq
160 if (!docFreqMap.containsKey(tokens[0]))
161 docFreqMap.put(new Integer(index), new
Integer(docfreq));
162 }
163 }
230 private String classify(String[] ts) {
231 /**
232 * Return the guessed category's label.
233 * Term Frequency computation.
234 * TFIDF weight computation.
235 * Classification based on a model.
236 * The best score is returned.
237 */
238
239 Map<Integer, Integer> termFreqs = new HashMap<Integer,
Integer>();
240 for (int k = 0; k<ts.length; k++) {
241 String val = ts[k];
242 Integer index = dictMap.get(val);
243 if (index != null) {
244 if (termFreqs.containsKey(index)) {
245 termFreqs.put(index, termFreqs.get(index) + new
Integer(1));
246 } else {
247 termFreqs.put(index, new Integer(1));
248 }
249 }
250 }
251 Vector vec = new RandomAccessSparseVector((int)
termFreqs.size());
252 for (Integer idx: termFreqs.keySet()) {
253 double termWeight =
weight.calculate((int)termFreqs.get(idx), (int) docFreqMap.get(idx), 0,
numDocs);
254 vec.setQuick((int) idx, termWeight);
255 }
256 Vector scores = classifier.classifyFull(vec.normalize());
257 int bestIdx = Integer.MIN_VALUE;
258 double bestScore = Long.MIN_VALUE;
259 for (Iterator<Vector.Element> score = scores.iterator();
score.hasNext();) {
260 Vector.Element element = score.next();
261 if (element.get() > bestScore) {
262 bestScore = element.get();
263 bestIdx = element.index();
264 }
265 }
266 if (debug)
267 System.out.println("Classified as: " +
labelMap.get(bestIdx));
268
269 return labelMap.get(bestIdx);
270 }
2012/10/17 Sarath P R <sa...@gmail.com>
> Hi Jaggu,
>
> I am also working with 0.7 . I too tried input vector to the classifier .
> Vector nbResult = nbClassifier.classifyFull(getVector());
>
> But don't know how to get the correct label. I got the following piece of
> code from
>
> /mahout-distribution-0.7/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
> for analyzing result.
>
> private static void analyzeResults(Map<Integer, String> labelMap,
> SequenceFileDirIterable<Text,
> VectorWritable> dirIterable,
> ResultAnalyzer analyzer) {
> for (Pair<Text, VectorWritable> pair : dirIterable) {
> int bestIdx = Integer.MIN_VALUE;
> double bestScore = Long.MIN_VALUE;
> for (Vector.Element element : pair.getSecond().get()) {
> if (element.get() > bestScore) {
> bestScore = element.get();
> bestIdx = element.index();
> }
> }
> if (bestIdx != Integer.MIN_VALUE) {
> ClassifierResult classifierResult = new
> ClassifierResult(labelMap.get(bestIdx), bestScore);
> analyzer.addInstance(pair.getFirst().toString(), classifierResult);
> }
> }
> }
>
> But couldn't get the correct label.
>
>
>
> On Thu, Oct 11, 2012 at 3:33 PM, JAGANADH G <ja...@gmail.com> wrote:
>
> > Hi
> >
> > I just created a sample use class of NaiveBayes . Can somebody say
> wheather
> > I am in the right track or not
> >
> > Here is my code
> >
> > public class NaiveBayesClassifierExample {
> >
> > public static void loadClassifier(String strModelPath, Vector v)
> > throws IOException {
> > Configuration conf = new Configuration();
> >
> > NaiveBayesModel model = NaiveBayesModel.materialize(new Path(
> > strModelPath), conf);
> > AbstractNaiveBayesClassifier classifier = new
> StandardNaiveBayesClassifier(
> > model);
> >
> > Vector st = classifier.classifyFull(v);
> > System.out.println(st.toString());
> > }
> >
> > public static Vector createVect() throws IOException {
> > FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
> > Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
> > StringReader in = new StringReader(
> > "The movie sherk was very cool and attractive one. We like the movie"
> > + "because of the theme and directon. All the actores were excellent");
> >
> > TokenStream ts = analyzer.tokenStream("body", in);
> >
> > CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
> > Vector v1 = new RandomAccessSparseVector(100000);
> >
> > while (ts.incrementToken()) {
> > char[] termBuffer = termAtt.buffer();
> > int termLen = termAtt.length();
> > String w = new String(termBuffer, 0, termLen);
> > encoder.addToVector(w, 1.0, v1);
> > }
> > v1.normalize();
> > return v1;
> > }
> >
> > public static void main(String[] args) throws IOException {
> > Vector v = createVect();
> > String mp =
> > "/home/u179995/Downloads/mahout-distribution-0.7/playg/movie_model";
> > loadClassifier(mp, v);
> > }
> > }
> >
> > --
> > **********************************
> > JAGANADH G
> > http://jaganadhg.in
> > *ILUGCBE*
> > http://ilugcbe.org.in
> >
>
>
>
> --
> Thank You
> Sarath P R | cell +91 99 95 02 4287 | http://sprism.blogspot.com
>
--
*Thomas Quenolle* | Chef de Projet
Direct : +33 6 80 84 26 58
www.multiposting.fr
Standard : +33 1 42 72 57 84 | Fax : +33 1 73 76 93 23
Rejoignez-nous sur Facebook <http://www.facebook.com/multiposting>
Suivez-nous sur Twitter <http://twitter.com/multiposting>
N’imprimez cet email qu’en cas de nécessité / Please do not print this
email unless necessary
Re: Mahout 0.7 API Naive Bayes
Posted by Sarath P R <sa...@gmail.com>.
Hi Jaggu,
I am also working with 0.7 . I too tried input vector to the classifier .
Vector nbResult = nbClassifier.classifyFull(getVector());
But don't know how to get the correct label. I got the following piece of
code from
/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
for analyzing result.
private static void analyzeResults(Map<Integer, String> labelMap,
SequenceFileDirIterable<Text,
VectorWritable> dirIterable,
ResultAnalyzer analyzer) {
for (Pair<Text, VectorWritable> pair : dirIterable) {
int bestIdx = Integer.MIN_VALUE;
double bestScore = Long.MIN_VALUE;
for (Vector.Element element : pair.getSecond().get()) {
if (element.get() > bestScore) {
bestScore = element.get();
bestIdx = element.index();
}
}
if (bestIdx != Integer.MIN_VALUE) {
ClassifierResult classifierResult = new
ClassifierResult(labelMap.get(bestIdx), bestScore);
analyzer.addInstance(pair.getFirst().toString(), classifierResult);
}
}
}
But couldn't get the correct label.
On Thu, Oct 11, 2012 at 3:33 PM, JAGANADH G <ja...@gmail.com> wrote:
> Hi
>
> I just created a sample use class of NaiveBayes . Can somebody say wheather
> I am in the right track or not
>
> Here is my code
>
> public class NaiveBayesClassifierExample {
>
> public static void loadClassifier(String strModelPath, Vector v)
> throws IOException {
> Configuration conf = new Configuration();
>
> NaiveBayesModel model = NaiveBayesModel.materialize(new Path(
> strModelPath), conf);
> AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(
> model);
>
> Vector st = classifier.classifyFull(v);
> System.out.println(st.toString());
> }
>
> public static Vector createVect() throws IOException {
> FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
> Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
> StringReader in = new StringReader(
> "The movie sherk was very cool and attractive one. We like the movie"
> + "because of the theme and directon. All the actores were excellent");
>
> TokenStream ts = analyzer.tokenStream("body", in);
>
> CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
> Vector v1 = new RandomAccessSparseVector(100000);
>
> while (ts.incrementToken()) {
> char[] termBuffer = termAtt.buffer();
> int termLen = termAtt.length();
> String w = new String(termBuffer, 0, termLen);
> encoder.addToVector(w, 1.0, v1);
> }
> v1.normalize();
> return v1;
> }
>
> public static void main(String[] args) throws IOException {
> Vector v = createVect();
> String mp =
> "/home/u179995/Downloads/mahout-distribution-0.7/playg/movie_model";
> loadClassifier(mp, v);
> }
> }
>
> --
> **********************************
> JAGANADH G
> http://jaganadhg.in
> *ILUGCBE*
> http://ilugcbe.org.in
>
--
Thank You
Sarath P R | cell +91 99 95 02 4287 | http://sprism.blogspot.com