You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by df...@apache.org on 2006/02/14 10:28:55 UTC
svn commit: r377676 -
/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
Author: dflorey
Date: Tue Feb 14 01:28:53 2006
New Revision: 377676
URL: http://svn.apache.org/viewcvs?rev=377676&view=rev
Log:
Fixed bug to enable concurrent indexing of many MS PowerPoints.
Modified:
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=377676&r1=377675&r2=377676&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java (original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java Tue Feb 14 01:28:53 2006
@@ -39,84 +39,67 @@
/**
* Content extractor for Microsoft Powerpoint documents.
*/
-public class MSPowerPointExtractor extends AbstractContentExtractor implements POIFSReaderListener{
+public class MSPowerPointExtractor extends AbstractContentExtractor {
- static final String CONTENT_TYPE_POWERPOINT_1 = "application/mspowerpoint";
- static final String CONTENT_TYPE_POWERPOINT_2 = "application/vnd.ms-powerpoint";
- static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = CONTENT_TYPE_POWERPOINT_1+","+CONTENT_TYPE_POWERPOINT_2;
-
- private ByteArrayOutputStream writer = new ByteArrayOutputStream();
-
- public MSPowerPointExtractor(String uri, String contentType, String namespace) {
- super(uri, contentType, namespace);
- }
-
- public Reader extract(InputStream content) throws ExtractorException {
- try {
- POIFSReader reader = new POIFSReader();
- reader.registerListener(this);
- reader.read(content);
-
- return new InputStreamReader(new ByteArrayInputStream(writer.toByteArray()));
- }
- catch(Exception e) {
- throw new ExtractorException(e.getMessage());
- }
- }
-
- public void processPOIFSReaderEvent(POIFSReaderEvent event)
- {
- try{
- if(!event.getName().equalsIgnoreCase("PowerPoint Document"))
- return;
-
- DocumentInputStream input = event.getStream();
-
- byte[] buffer = new byte[input.available()];
- input.read(buffer, 0, input.available());
-
- for(int i=0; i<buffer.length-20; i++)
- {
- long type = LittleEndian.getUShort(buffer,i+2);
- long size = LittleEndian.getUInt(buffer,i+4);
-
- if(type==4008)
- {
- writer.write(buffer, i + 4 + 1, (int) size +3);
- i = i + 4 + 1 + (int) size - 1;
-
- }
- }
- }
- catch (Exception e)
- {
-
- }
- }
-
- public static void main(String[] args) throws Exception
- {
- FileInputStream in = new FileInputStream(args[0]);
-
- MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null, null);
-
- Reader reader = ex.extract(in);
-
- int c;
- do
- {
- c = reader.read();
-
- System.out.print((char)c);
- }
- while( c != -1 );
- }
-
- /* (non-Javadoc)
+ static final String CONTENT_TYPE_POWERPOINT_1 = "application/mspowerpoint";
+
+ static final String CONTENT_TYPE_POWERPOINT_2 = "application/vnd.ms-powerpoint";
+
+ static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = CONTENT_TYPE_POWERPOINT_1
+ + "," + CONTENT_TYPE_POWERPOINT_2;
+
+ public MSPowerPointExtractor(String uri, String contentType,
+ String namespace) {
+ super(uri, contentType, namespace);
+ }
+
+ public Reader extract(InputStream content) throws ExtractorException {
+ try {
+ final ByteArrayOutputStream writer = new ByteArrayOutputStream();
+
+ POIFSReader reader = new POIFSReader();
+ reader.registerListener(new POIFSReaderListener() {
+ public void processPOIFSReaderEvent(POIFSReaderEvent event) {
+ try {
+ if (!event.getName().equalsIgnoreCase(
+ "PowerPoint Document"))
+ return;
+
+ DocumentInputStream input = event.getStream();
+
+ byte[] buffer = new byte[input.available()];
+ input.read(buffer, 0, input.available());
+
+ for (int i = 0; i < buffer.length - 20; i++) {
+ long type = LittleEndian.getUShort(buffer, i + 2);
+ long size = LittleEndian.getUInt(buffer, i + 4);
+
+ if (type == 4008) {
+ writer.write(buffer, i + 4 + 1, (int) size + 3);
+ i = i + 4 + 1 + (int) size - 1;
+ }
+ }
+ } catch (Exception e) {
+
+ }
+ }
+ });
+ reader.read(content);
+
+ return new InputStreamReader(new ByteArrayInputStream(writer
+ .toByteArray()));
+ } catch (Exception e) {
+ throw new ExtractorException(e.getMessage());
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
* @see org.apache.slide.extractor.Extractor#getContentType()
*/
public String getContentType() {
- if(super.getContentType()==null){
+ if (super.getContentType() == null) {
return CONTENT_TYPE_POWERPOINT_ALL_CSV;
}
return super.getContentType();
---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org