You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by df...@apache.org on 2006/02/14 10:28:55 UTC

svn commit: r377676 - /jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java

Author: dflorey
Date: Tue Feb 14 01:28:53 2006
New Revision: 377676

URL: http://svn.apache.org/viewcvs?rev=377676&view=rev
Log:
Fixed bug to enable concurrent indexing of many MS PowerPoints.

Modified:
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java

Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=377676&r1=377675&r2=377676&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java Tue Feb 14 01:28:53 2006
@@ -39,84 +39,67 @@
 /**
  * Content extractor for Microsoft Powerpoint documents.
  */
-public class MSPowerPointExtractor extends AbstractContentExtractor implements POIFSReaderListener{
+public class MSPowerPointExtractor extends AbstractContentExtractor {
 
-    static final String CONTENT_TYPE_POWERPOINT_1 = "application/mspowerpoint";
-    static final String CONTENT_TYPE_POWERPOINT_2 = "application/vnd.ms-powerpoint";
-    static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = CONTENT_TYPE_POWERPOINT_1+","+CONTENT_TYPE_POWERPOINT_2;
-	
-    private ByteArrayOutputStream writer = new ByteArrayOutputStream();
-
-    public MSPowerPointExtractor(String uri, String contentType, String namespace) {
-        super(uri, contentType, namespace);
-    }
-
-    public Reader extract(InputStream content)  throws ExtractorException {
-        try {
-            POIFSReader reader = new POIFSReader();
-            reader.registerListener(this);
-            reader.read(content);
-
-            return new InputStreamReader(new ByteArrayInputStream(writer.toByteArray()));
-        }
-        catch(Exception e) {
-                throw new ExtractorException(e.getMessage());
-        }
-    }
-
-    public void processPOIFSReaderEvent(POIFSReaderEvent event)
-    {
-        try{
-            if(!event.getName().equalsIgnoreCase("PowerPoint Document"))
-                return;
-
-            DocumentInputStream input = event.getStream();
-
-            byte[] buffer = new byte[input.available()];
-            input.read(buffer, 0, input.available());
-
-            for(int i=0; i<buffer.length-20; i++)
-            {
-                long type = LittleEndian.getUShort(buffer,i+2);
-                long size = LittleEndian.getUInt(buffer,i+4);
-
-                if(type==4008)
-                {
-                    writer.write(buffer, i + 4 + 1, (int) size +3);
-                    i = i + 4 + 1 + (int) size - 1;
-
-                }
-            }
-        }
-        catch (Exception e)
-        {
-
-        }
-    }
-
-    public static void main(String[] args) throws Exception
-    {
-        FileInputStream in = new FileInputStream(args[0]);
-
-        MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null, null);
-
-        Reader reader = ex.extract(in);
-
-        int c;
-        do
-        {
-            c = reader.read();
-
-            System.out.print((char)c);
-        }
-        while( c != -1 );
-    }
-    
-	/* (non-Javadoc)
+	static final String CONTENT_TYPE_POWERPOINT_1 = "application/mspowerpoint";
+
+	static final String CONTENT_TYPE_POWERPOINT_2 = "application/vnd.ms-powerpoint";
+
+	static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = CONTENT_TYPE_POWERPOINT_1
+			+ "," + CONTENT_TYPE_POWERPOINT_2;
+
+	public MSPowerPointExtractor(String uri, String contentType,
+			String namespace) {
+		super(uri, contentType, namespace);
+	}
+
+	public Reader extract(InputStream content) throws ExtractorException {
+		try {
+			final ByteArrayOutputStream writer = new ByteArrayOutputStream();
+
+			POIFSReader reader = new POIFSReader();
+			reader.registerListener(new POIFSReaderListener() {
+				public void processPOIFSReaderEvent(POIFSReaderEvent event) {
+					try {
+						if (!event.getName().equalsIgnoreCase(
+								"PowerPoint Document"))
+							return;
+
+						DocumentInputStream input = event.getStream();
+
+						byte[] buffer = new byte[input.available()];
+						input.read(buffer, 0, input.available());
+
+						for (int i = 0; i < buffer.length - 20; i++) {
+							long type = LittleEndian.getUShort(buffer, i + 2);
+							long size = LittleEndian.getUInt(buffer, i + 4);
+
+							if (type == 4008) {
+								writer.write(buffer, i + 4 + 1, (int) size + 3);
+								i = i + 4 + 1 + (int) size - 1;
+							}
+						}
+					} catch (Exception e) {
+
+					}
+				}
+			});
+			reader.read(content);
+
+			return new InputStreamReader(new ByteArrayInputStream(writer
+					.toByteArray()));
+		} catch (Exception e) {
+			throw new ExtractorException(e.getMessage());
+		}
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
 	 * @see org.apache.slide.extractor.Extractor#getContentType()
 	 */
 	public String getContentType() {
-		if(super.getContentType()==null){
+		if (super.getContentType() == null) {
 			return CONTENT_TYPE_POWERPOINT_ALL_CSV;
 		}
 		return super.getContentType();



---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org