You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by fa...@apache.org on 2019/12/23 09:18:38 UTC

svn commit: r1871921 - in /poi/trunk/src/scratchpad/src/org/apache/poi: hpbf/HPBFDocument.java hpbf/dev/HPBFDumper.java hpbf/dev/PLCDumper.java hpbf/extractor/PublisherTextExtractor.java hslf/extractor/QuickButCruddyTextExtractor.java

Author: fanningpj
Date: Mon Dec 23 09:18:38 2019
New Revision: 1871921

URL: http://svn.apache.org/viewvc?rev=1871921&view=rev
Log:
convert some tabs to spaces

Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java Mon Dec 23 09:18:38 2019
@@ -34,49 +34,49 @@ import org.apache.poi.poifs.filesystem.P
  *  file format.
  */
 public final class HPBFDocument extends POIReadOnlyDocument {
-	private MainContents mainContents;
-	private QuillContents quillContents;
-	private EscherStm escherStm;
-	private EscherDelayStm escherDelayStm;
-
-	/**
-	 * Opens a new publisher document
-	 */
-	public HPBFDocument(POIFSFileSystem fs) throws IOException {
-	   this(fs.getRoot());
-	}
-
-	public HPBFDocument(InputStream inp) throws IOException {
-	   this(new POIFSFileSystem(inp));
-	}
-
-	/**
-	 * Opens an embedded publisher document,
-	 *  at the given directory.
-	 */
-	public HPBFDocument(DirectoryNode dir) throws IOException {
-	   super(dir);
-
-	   // Go looking for our interesting child
-	   //  streams
-	   mainContents = new MainContents(dir);
-	   quillContents = new QuillContents(dir);
-
-	   // Now the Escher bits
-	   escherStm = new EscherStm(dir);
-	   escherDelayStm = new EscherDelayStm(dir);
-	}
-
-	public MainContents getMainContents() {
-		return mainContents;
-	}
-	public QuillContents getQuillContents() {
-		return quillContents;
-	}
-	public EscherStm getEscherStm() {
-		return escherStm;
-	}
-	public EscherDelayStm getEscherDelayStm() {
-		return escherDelayStm;
-	}
+    private MainContents mainContents;
+    private QuillContents quillContents;
+    private EscherStm escherStm;
+    private EscherDelayStm escherDelayStm;
+
+    /**
+     * Opens a new publisher document
+     */
+    public HPBFDocument(POIFSFileSystem fs) throws IOException {
+       this(fs.getRoot());
+    }
+
+    public HPBFDocument(InputStream inp) throws IOException {
+       this(new POIFSFileSystem(inp));
+    }
+
+    /**
+     * Opens an embedded publisher document,
+     *  at the given directory.
+     */
+    public HPBFDocument(DirectoryNode dir) throws IOException {
+       super(dir);
+
+       // Go looking for our interesting child
+       //  streams
+       mainContents = new MainContents(dir);
+       quillContents = new QuillContents(dir);
+
+       // Now the Escher bits
+       escherStm = new EscherStm(dir);
+       escherDelayStm = new EscherDelayStm(dir);
+    }
+
+    public MainContents getMainContents() {
+        return mainContents;
+    }
+    public QuillContents getQuillContents() {
+        return quillContents;
+    }
+    public EscherStm getEscherStm() {
+        return escherStm;
+    }
+    public EscherDelayStm getEscherDelayStm() {
+        return escherDelayStm;
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java Mon Dec 23 09:18:38 2019
@@ -36,319 +36,319 @@ import org.apache.poi.util.StringUtil;
  *  constructed.
  */
 public final class HPBFDumper {
-	private POIFSFileSystem fs;
-	public HPBFDumper(POIFSFileSystem fs) {
-		this.fs = fs;
-	}
-	
-	@SuppressWarnings("resource")
+    private POIFSFileSystem fs;
+    public HPBFDumper(POIFSFileSystem fs) {
+        this.fs = fs;
+    }
+
+    @SuppressWarnings("resource")
     public HPBFDumper(InputStream inp) throws IOException {
-		this(new POIFSFileSystem(inp));
-	}
+        this(new POIFSFileSystem(inp));
+    }
 
-	private static byte[] getData(DirectoryNode dir, String name) throws IOException {
-		// Grab the document stream
-		InputStream is = dir.createDocumentInputStream(name);
-		byte[] d = IOUtils.toByteArray(is);
-		is.close();
-
-		// All done
-		return d;
-	}
-
-	/**
-	 * Dumps out the given number of bytes as hex,
-	 *  two chars
-	 */
-	private String dumpBytes(byte[] data, int offset, int len) {
-		StringBuilder ret = new StringBuilder();
-		for(int i=0; i<len; i++) {
-			int j = i + offset;
-			int b = data[j];
-			if(b < 0) { b += 256; }
-
-			String bs = Integer.toHexString(b);
-			if(bs.length() == 1)
-				ret.append('0');
-			ret.append(bs);
-			ret.append(' ');
-		}
-		return ret.toString();
-	}
+    private static byte[] getData(DirectoryNode dir, String name) throws IOException {
+        // Grab the document stream
+        InputStream is = dir.createDocumentInputStream(name);
+        byte[] d = IOUtils.toByteArray(is);
+        is.close();
+
+        // All done
+        return d;
+    }
+
+    /**
+     * Dumps out the given number of bytes as hex,
+     *  two chars
+     */
+    private String dumpBytes(byte[] data, int offset, int len) {
+        StringBuilder ret = new StringBuilder();
+        for(int i=0; i<len; i++) {
+            int j = i + offset;
+            int b = data[j];
+            if(b < 0) { b += 256; }
+
+            String bs = Integer.toHexString(b);
+            if(bs.length() == 1)
+                ret.append('0');
+            ret.append(bs);
+            ret.append(' ');
+        }
+        return ret.toString();
+    }
 
-	@SuppressWarnings("resource")
+    @SuppressWarnings("resource")
     public static void main(String[] args) throws Exception {
-		if(args.length < 1) {
-			System.err.println("Use:");
-			System.err.println("  HPBFDumper <filename>");
-			System.exit(1);
-		}
-		HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0])));
-
-		System.out.println("Dumping " + args[0]);
-		dump.dumpContents();
-		dump.dumpEnvelope();
-		dump.dumpEscher();
-		dump.dump001CompObj(dump.fs.getRoot());
-		dump.dumpQuill();
-
-		// Still to go:
-		//  (0x03)Internal
-		//  Objects
-	}
-
-	/**
-	 * Dump out the escher parts of the file.
-	 * Escher -> EscherStm and EscherDelayStm
-	 */
-	public void dumpEscher() throws IOException {
-		DirectoryNode escherDir = (DirectoryNode)
-			fs.getRoot().getEntry("Escher");
-
-		dumpEscherStm(escherDir);
-		dumpEscherDelayStm(escherDir);
-	}
-	private void dumpEscherStream(byte[] data) {
-		DefaultEscherRecordFactory erf =
-			new DefaultEscherRecordFactory();
-
-		// Dump
-		int left = data.length;
-		while(left > 0) {
-			EscherRecord er = erf.createRecord(data, 0);
-			er.fillFields(data, 0, erf);
-			left -= er.getRecordSize();
-
-			System.out.println(er);
-		}
-	}
-	protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
-		byte[] data = getData(escherDir, "EscherStm");
-		System.out.println();
-		System.out.println("EscherStm - " + data.length + " bytes long:");
-		if(data.length > 0)
-			dumpEscherStream(data);
-	}
-	protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
-		byte[] data = getData(escherDir, "EscherDelayStm");
-		System.out.println();
-		System.out.println("EscherDelayStm - " + data.length + " bytes long:");
-		if(data.length > 0)
-			dumpEscherStream(data);
-	}
-
-	public void dumpEnvelope() throws IOException {
-		byte[] data = getData(fs.getRoot(), "Envelope");
-
-		System.out.println();
-		System.out.println("Envelope - " + data.length + " bytes long:");
-	}
-
-	public void dumpContents() throws IOException {
-		byte[] data = getData(fs.getRoot(), "Contents");
-
-		System.out.println();
-		System.out.println("Contents - " + data.length + " bytes long:");
-
-		// 8 bytes, always seems to be
-		// E8 AC 2C 00 E8 03 05 01
-		// E8 AC 2C 00 E8 03 05 01
-
-		// 4 bytes - size of contents
-		// 13/15 00 00 01
-
-		// ....
-
-	    // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
-
-	    // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
-
-		// 01 18 30 00 03 20 00 00
-		// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
-		// 00 00 00 88 1E 00 00 00
-
-		// 01 18 31 00 03 20 00 00
-		// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
-		// 00 00 00 88 1E 00 00 00
-
-		// 01 18 32 00 03 20 00 00
-		// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
-		// 00 00 00 88 1E 00 00 00
-	}
-
-	public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
-		byte[] data = getData(dir, "CONTENTS");
-
-		System.out.println();
-		System.out.println("CONTENTS - " + data.length + " bytes long:");
-
-		// Between the start and 0x200 we have
-		//  CHNKINK(space) + 24 bytes
-		//  0x1800
-		//  TEXT + 6 bytes
-		//  TEXT + 8 bytes
-		//  0x1800
-		//  STSH + 6 bytes
-		//  STSH + 8 bytes
-		//  0x1800
-		//  STSH + 6 bytes
-		//  STSH + 8 bytes
-		// but towards 0x200 the pattern may
-		//  break down a little bit
-
-		// After the second of a given type,
-		//  it seems to be 4 bytes giving the start,
-		//  then 4 bytes giving the length, then
-		//  18 00
-		System.out.println(
-				new String(data, 0, 8, LocaleUtil.CHARSET_1252) +
-				dumpBytes(data, 8, 0x20-8)
-		);
-
-		int pos = 0x20;
-		boolean sixNotEight = true;
-		while(pos < 0x200) {
-			if(sixNotEight) {
-				System.out.println(
-						dumpBytes(data, pos, 2)
-				);
-				pos += 2;
-			}
-			String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252);
-			int blen = 8;
-			if(sixNotEight)
-				blen = 6;
-			System.out.println(
-					text + " " + dumpBytes(data, pos+4, blen)
-			);
-
-			pos += 4 + blen;
-			sixNotEight = ! sixNotEight;
-		}
-
-		// Text from 0x200 onwards until we get
-		//  to \r(00)\n(00)(00)(00)
-		int textStop = -1;
-		for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
-			if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
-				textStop = i;
-			}
-		}
-		if(textStop > 0) {
-			int len = (textStop - 0x200) / 2;
-			System.out.println();
-			System.out.println(
-					StringUtil.getFromUnicodeLE(data, 0x200, len)
-			);
-		}
-
-		// The font list comes slightly later
-
-		// The hyperlinks may come before the fonts,
-		//  or slightly in front
-	}
-	public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
-		byte[] data = getData(dir, "CONTENTS");
-
-		System.out.println();
-		System.out.println("CONTENTS - " + data.length + " bytes long:");
-
-		String[] startType = new String[20];
-		String[] endType = new String[20];
-		int[] optA = new int[20];
-		int[] optB = new int[20];
-		int[] optC = new int[20];
-		int[] from = new int[20];
-		int[] len = new int[20];
-
-		for(int i=0; i<20; i++) {
-			int offset = 0x20 + i*24;
-			if(data[offset] == 0x18 && data[offset+1] == 0x00) {
-				// Has data
-				startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252);
-				optA[i] = LittleEndian.getUShort(data, offset+6);
-				optB[i] = LittleEndian.getUShort(data, offset+8);
-				optC[i] = LittleEndian.getUShort(data, offset+10);
-				endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252);
-				from[i] = (int)LittleEndian.getUInt(data, offset+16);
-				len[i] = (int)LittleEndian.getUInt(data, offset+20);
-			} else {
-				// Doesn't have data
-			}
-		}
-
-		String text = StringUtil.getFromUnicodeLE(
-				data, from[0], len[0]/2
-		);
-
-		// Dump
-		for(int i=0; i<20; i++) {
-			String num = Integer.toString(i);
-			if(i < 10) {
-				num = "0" + i;
-			}
-			System.out.print(num + " ");
-
-			if(startType[i] == null) {
-				System.out.println("(not present)");
-			} else {
-				System.out.println(
-						"\t" +
-						startType[i] + " " +
-						optA[i] + " " +
-						optB[i] + " " +
-						optC[i]
-				);
-				System.out.println(
-						"\t" +
-						endType[i] + " " +
-						"from: " +
-						Integer.toHexString(from[i]) +
-						" (" + from[i] + ")" +
-						", len: " +
-						Integer.toHexString(len[i]) +
-						" (" + len[i] + ")"
-				);
-			}
-		}
-
-		// Text
-		System.out.println();
-		System.out.println("TEXT:");
-		System.out.println(text);
-		System.out.println();
-
-		// All the others
-		for(int i=0; i<20; i++) {
-			if(startType[i] == null) {
-				continue;
-			}
-			int start = from[i];
-
-			System.out.println(
-					startType[i] + " -> " + endType[i] +
-					" @ " + Integer.toHexString(start) +
-					" (" + start + ")"
-			);
-			System.out.println("\t" + dumpBytes(data, start, 4));
-			System.out.println("\t" + dumpBytes(data, start+4, 4));
-			System.out.println("\t" + dumpBytes(data, start+8, 4));
-			System.out.println("\t(etc)");
-		}
-	}
-
-	protected void dump001CompObj(DirectoryNode dir) {
-		// TODO
-	}
-
-	public void dumpQuill() throws IOException {
-		DirectoryNode quillDir = (DirectoryNode)
-			fs.getRoot().getEntry("Quill");
-		DirectoryNode quillSubDir = (DirectoryNode)
-			quillDir.getEntry("QuillSub");
-
-		dump001CompObj(quillSubDir);
-		dumpCONTENTSraw(quillSubDir);
-		dumpCONTENTSguessed(quillSubDir);
-	}
+        if(args.length < 1) {
+            System.err.println("Use:");
+            System.err.println("  HPBFDumper <filename>");
+            System.exit(1);
+        }
+        HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0])));
+
+        System.out.println("Dumping " + args[0]);
+        dump.dumpContents();
+        dump.dumpEnvelope();
+        dump.dumpEscher();
+        dump.dump001CompObj(dump.fs.getRoot());
+        dump.dumpQuill();
+
+        // Still to go:
+        //  (0x03)Internal
+        //  Objects
+    }
+
+    /**
+     * Dump out the escher parts of the file.
+     * Escher -> EscherStm and EscherDelayStm
+     */
+    public void dumpEscher() throws IOException {
+        DirectoryNode escherDir = (DirectoryNode)
+            fs.getRoot().getEntry("Escher");
+
+        dumpEscherStm(escherDir);
+        dumpEscherDelayStm(escherDir);
+    }
+    private void dumpEscherStream(byte[] data) {
+        DefaultEscherRecordFactory erf =
+            new DefaultEscherRecordFactory();
+
+        // Dump
+        int left = data.length;
+        while(left > 0) {
+            EscherRecord er = erf.createRecord(data, 0);
+            er.fillFields(data, 0, erf);
+            left -= er.getRecordSize();
+
+            System.out.println(er);
+        }
+    }
+    protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
+        byte[] data = getData(escherDir, "EscherStm");
+        System.out.println();
+        System.out.println("EscherStm - " + data.length + " bytes long:");
+        if(data.length > 0)
+            dumpEscherStream(data);
+    }
+    protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
+        byte[] data = getData(escherDir, "EscherDelayStm");
+        System.out.println();
+        System.out.println("EscherDelayStm - " + data.length + " bytes long:");
+        if(data.length > 0)
+            dumpEscherStream(data);
+    }
+
+    public void dumpEnvelope() throws IOException {
+        byte[] data = getData(fs.getRoot(), "Envelope");
+
+        System.out.println();
+        System.out.println("Envelope - " + data.length + " bytes long:");
+    }
+
+    public void dumpContents() throws IOException {
+        byte[] data = getData(fs.getRoot(), "Contents");
+
+        System.out.println();
+        System.out.println("Contents - " + data.length + " bytes long:");
+
+        // 8 bytes, always seems to be
+        // E8 AC 2C 00 E8 03 05 01
+        // E8 AC 2C 00 E8 03 05 01
+
+        // 4 bytes - size of contents
+        // 13/15 00 00 01
+
+        // ....
+
+        // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
+
+        // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
+
+        // 01 18 30 00 03 20 00 00
+        // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+        // 00 00 00 88 1E 00 00 00
+
+        // 01 18 31 00 03 20 00 00
+        // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+        // 00 00 00 88 1E 00 00 00
+
+        // 01 18 32 00 03 20 00 00
+        // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+        // 00 00 00 88 1E 00 00 00
+    }
+
+    public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
+        byte[] data = getData(dir, "CONTENTS");
+
+        System.out.println();
+        System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+        // Between the start and 0x200 we have
+        //  CHNKINK(space) + 24 bytes
+        //  0x1800
+        //  TEXT + 6 bytes
+        //  TEXT + 8 bytes
+        //  0x1800
+        //  STSH + 6 bytes
+        //  STSH + 8 bytes
+        //  0x1800
+        //  STSH + 6 bytes
+        //  STSH + 8 bytes
+        // but towards 0x200 the pattern may
+        //  break down a little bit
+
+        // After the second of a given type,
+        //  it seems to be 4 bytes giving the start,
+        //  then 4 bytes giving the length, then
+        //  18 00
+        System.out.println(
+                new String(data, 0, 8, LocaleUtil.CHARSET_1252) +
+                dumpBytes(data, 8, 0x20-8)
+        );
+
+        int pos = 0x20;
+        boolean sixNotEight = true;
+        while(pos < 0x200) {
+            if(sixNotEight) {
+                System.out.println(
+                        dumpBytes(data, pos, 2)
+                );
+                pos += 2;
+            }
+            String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252);
+            int blen = 8;
+            if(sixNotEight)
+                blen = 6;
+            System.out.println(
+                    text + " " + dumpBytes(data, pos+4, blen)
+            );
+
+            pos += 4 + blen;
+            sixNotEight = ! sixNotEight;
+        }
+
+        // Text from 0x200 onwards until we get
+        //  to \r(00)\n(00)(00)(00)
+        int textStop = -1;
+        for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
+            if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
+                textStop = i;
+            }
+        }
+        if(textStop > 0) {
+            int len = (textStop - 0x200) / 2;
+            System.out.println();
+            System.out.println(
+                    StringUtil.getFromUnicodeLE(data, 0x200, len)
+            );
+        }
+
+        // The font list comes slightly later
+
+        // The hyperlinks may come before the fonts,
+        //  or slightly in front
+    }
+    public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
+        byte[] data = getData(dir, "CONTENTS");
+
+        System.out.println();
+        System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+        String[] startType = new String[20];
+        String[] endType = new String[20];
+        int[] optA = new int[20];
+        int[] optB = new int[20];
+        int[] optC = new int[20];
+        int[] from = new int[20];
+        int[] len = new int[20];
+
+        for(int i=0; i<20; i++) {
+            int offset = 0x20 + i*24;
+            if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+                // Has data
+                startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252);
+                optA[i] = LittleEndian.getUShort(data, offset+6);
+                optB[i] = LittleEndian.getUShort(data, offset+8);
+                optC[i] = LittleEndian.getUShort(data, offset+10);
+                endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252);
+                from[i] = (int)LittleEndian.getUInt(data, offset+16);
+                len[i] = (int)LittleEndian.getUInt(data, offset+20);
+            } else {
+                // Doesn't have data
+            }
+        }
+
+        String text = StringUtil.getFromUnicodeLE(
+                data, from[0], len[0]/2
+        );
+
+        // Dump
+        for(int i=0; i<20; i++) {
+            String num = Integer.toString(i);
+            if(i < 10) {
+                num = "0" + i;
+            }
+            System.out.print(num + " ");
+
+            if(startType[i] == null) {
+                System.out.println("(not present)");
+            } else {
+                System.out.println(
+                        "\t" +
+                        startType[i] + " " +
+                        optA[i] + " " +
+                        optB[i] + " " +
+                        optC[i]
+                );
+                System.out.println(
+                        "\t" +
+                        endType[i] + " " +
+                        "from: " +
+                        Integer.toHexString(from[i]) +
+                        " (" + from[i] + ")" +
+                        ", len: " +
+                        Integer.toHexString(len[i]) +
+                        " (" + len[i] + ")"
+                );
+            }
+        }
+
+        // Text
+        System.out.println();
+        System.out.println("TEXT:");
+        System.out.println(text);
+        System.out.println();
+
+        // All the others
+        for(int i=0; i<20; i++) {
+            if(startType[i] == null) {
+                continue;
+            }
+            int start = from[i];
+
+            System.out.println(
+                    startType[i] + " -> " + endType[i] +
+                    " @ " + Integer.toHexString(start) +
+                    " (" + start + ")"
+            );
+            System.out.println("\t" + dumpBytes(data, start, 4));
+            System.out.println("\t" + dumpBytes(data, start+4, 4));
+            System.out.println("\t" + dumpBytes(data, start+8, 4));
+            System.out.println("\t(etc)");
+        }
+    }
+
+    protected void dump001CompObj(DirectoryNode dir) {
+        // TODO
+    }
+
+    public void dumpQuill() throws IOException {
+        DirectoryNode quillDir = (DirectoryNode)
+            fs.getRoot().getEntry("Quill");
+        DirectoryNode quillSubDir = (DirectoryNode)
+            quillDir.getEntry("QuillSub");
+
+        dump001CompObj(quillSubDir);
+        dumpCONTENTSraw(quillSubDir);
+        dumpCONTENTSguessed(quillSubDir);
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java Mon Dec 23 09:18:38 2019
@@ -33,53 +33,53 @@ import org.apache.poi.util.HexDump;
  *  what the format of them is.
  */
 public final class PLCDumper {
-	private HPBFDocument doc;
-	private QuillContents qc;
+    private HPBFDocument doc;
+    private QuillContents qc;
 
-	public PLCDumper(HPBFDocument hpbfDoc) {
-		doc = hpbfDoc;
-		qc = doc.getQuillContents();
-	}
-	public PLCDumper(POIFSFileSystem fs) throws IOException {
-		this(new HPBFDocument(fs));
-	}
-	public PLCDumper(InputStream inp) throws IOException {
-		this(new POIFSFileSystem(inp));
-	}
-
-	public static void main(String[] args) throws Exception {
-		if(args.length < 1) {
-			System.err.println("Use:");
-			System.err.println("  PLCDumper <filename>");
-			System.exit(1);
-		}
-
-		try (FileInputStream fis = new FileInputStream(args[0])) {
-			PLCDumper dump = new PLCDumper(fis);
-
-			System.out.println("Dumping " + args[0]);
-			dump.dumpPLC();
-		}
-	}
-
-	private void dumpPLC() {
-		QCBit[] bits = qc.getBits();
-
-		for(int i=0; i<bits.length; i++) {
-			if(bits[i] == null) continue;
-			if(bits[i].getBitType().equals("PLC ")) {
-				dumpBit(bits[i], i);
-			}
-		}
-	}
-
-	private void dumpBit(QCBit bit, int index) {
-		System.out.println();
-		System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
-		System.out.println("  Is a " + bit.getThingType() + ", number is " + bit.getOptA());
-		System.out.println("  Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")");
-		System.out.println("  Runs for  " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")");
+    public PLCDumper(HPBFDocument hpbfDoc) {
+        doc = hpbfDoc;
+        qc = doc.getQuillContents();
+    }
+    public PLCDumper(POIFSFileSystem fs) throws IOException {
+        this(new HPBFDocument(fs));
+    }
+    public PLCDumper(InputStream inp) throws IOException {
+        this(new POIFSFileSystem(inp));
+    }
+
+    public static void main(String[] args) throws Exception {
+        if(args.length < 1) {
+            System.err.println("Use:");
+            System.err.println("  PLCDumper <filename>");
+            System.exit(1);
+        }
+
+        try (FileInputStream fis = new FileInputStream(args[0])) {
+            PLCDumper dump = new PLCDumper(fis);
+
+            System.out.println("Dumping " + args[0]);
+            dump.dumpPLC();
+        }
+    }
+
+    private void dumpPLC() {
+        QCBit[] bits = qc.getBits();
+
+        for(int i=0; i<bits.length; i++) {
+            if(bits[i] == null) continue;
+            if(bits[i].getBitType().equals("PLC ")) {
+                dumpBit(bits[i], i);
+            }
+        }
+    }
+
+    private void dumpBit(QCBit bit, int index) {
+        System.out.println();
+        System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
+        System.out.println("  Is a " + bit.getThingType() + ", number is " + bit.getOptA());
+        System.out.println("  Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")");
+        System.out.println("  Runs for  " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")");
 
-		System.out.println(HexDump.dump(bit.getData(), 0, 0));
-	}
+        System.out.println(HexDump.dump(bit.getData(), 0, 0));
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java Mon Dec 23 09:18:38 2019
@@ -50,65 +50,65 @@ public final class PublisherTextExtracto
       this(new POIFSFileSystem(is));
    }
 
-	/**
-	 * Should a call to getText() return hyperlinks inline
-	 *  with the text?
-	 * Default is no
-	 */
-	public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
-		this.hyperlinksByDefault = hyperlinksByDefault;
-	}
-
-
-	public String getText() {
-		StringBuilder text = new StringBuilder();
-
-		// Get the text from the Quill Contents
-		QCBit[] bits = doc.getQuillContents().getBits();
-		for (QCBit bit1 : bits) {
-			if (bit1 != null && bit1 instanceof QCTextBit) {
-				QCTextBit t = (QCTextBit) bit1;
-				text.append(t.getText().replace('\r', '\n'));
-			}
-		}
-
-		// If requested, add in the hyperlinks
-		// Ideally, we'd do these inline, but the hyperlink
-		//  positions are relative to the text area the
-		//  hyperlink is in, and we have yet to figure out
-		//  how to tie that together.
-		if(hyperlinksByDefault) {
-			for (QCBit bit : bits) {
-				if (bit != null && bit instanceof Type12) {
-					Type12 hyperlinks = (Type12) bit;
-					for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
-						text.append("<");
-						text.append(hyperlinks.getHyperlink(j));
-						text.append(">\n");
-					}
-				}
-			}
-		}
-
-		// Get more text
-		// TODO
-
-		return text.toString();
-	}
-
-
-	public static void main(String[] args) throws Exception {
-		if(args.length == 0) {
-			System.err.println("Use:");
-			System.err.println("  PublisherTextExtractor <file.pub>");
-		}
-
-		for (String arg : args) {
-			try (FileInputStream fis = new FileInputStream(arg)) {
-				PublisherTextExtractor te = new PublisherTextExtractor(fis);
-				System.out.println(te.getText());
-				te.close();
-			}
-		}
-	}
+    /**
+     * Should a call to getText() return hyperlinks inline
+     *  with the text?
+     * Default is no
+     */
+    public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
+        this.hyperlinksByDefault = hyperlinksByDefault;
+    }
+
+
+    public String getText() {
+        StringBuilder text = new StringBuilder();
+
+        // Get the text from the Quill Contents
+        QCBit[] bits = doc.getQuillContents().getBits();
+        for (QCBit bit1 : bits) {
+            if (bit1 != null && bit1 instanceof QCTextBit) {
+                QCTextBit t = (QCTextBit) bit1;
+                text.append(t.getText().replace('\r', '\n'));
+            }
+        }
+
+        // If requested, add in the hyperlinks
+        // Ideally, we'd do these inline, but the hyperlink
+        //  positions are relative to the text area the
+        //  hyperlink is in, and we have yet to figure out
+        //  how to tie that together.
+        if(hyperlinksByDefault) {
+            for (QCBit bit : bits) {
+                if (bit != null && bit instanceof Type12) {
+                    Type12 hyperlinks = (Type12) bit;
+                    for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
+                        text.append("<");
+                        text.append(hyperlinks.getHyperlink(j));
+                        text.append(">\n");
+                    }
+                }
+            }
+        }
+
+        // Get more text
+        // TODO
+
+        return text.toString();
+    }
+
+
+    public static void main(String[] args) throws Exception {
+        if(args.length == 0) {
+            System.err.println("Use:");
+            System.err.println("  PublisherTextExtractor <file.pub>");
+        }
+
+        for (String arg : args) {
+            try (FileInputStream fis = new FileInputStream(arg)) {
+                PublisherTextExtractor te = new PublisherTextExtractor(fis);
+                System.out.println(te.getText());
+                te.close();
+            }
+        }
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java Mon Dec 23 09:18:38 2019
@@ -53,158 +53,158 @@ import org.apache.poi.util.LittleEndian;
  *  lucene indexers) that would ever want to use this!
  */
 public final class QuickButCruddyTextExtractor {
-	private POIFSFileSystem fs;
-	private InputStream is;
-	private byte[] pptContents;
-
-	/**
-	 * Really basic text extractor, that will also return lots of crud text.
-	 * Takes a single argument, the file to extract from
-	 */
-	public static void main(String[] args) throws IOException
-	{
-		if(args.length < 1) {
-			System.err.println("Useage:");
-			System.err.println("\tQuickButCruddyTextExtractor <file>");
-			System.exit(1);
-		}
-
-		String file = args[0];
-
-		QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file);
-		System.out.println(ppe.getTextAsString());
-		ppe.close();
-	}
-
-	/**
-	 * Creates an extractor from a given file name
-	 * @param fileName
-	 */
-	@SuppressWarnings("resource")
+    private POIFSFileSystem fs;
+    private InputStream is;
+    private byte[] pptContents;
+
+    /**
+     * Really basic text extractor, that will also return lots of crud text.
+     * Takes a single argument, the file to extract from
+     */
+    public static void main(String[] args) throws IOException
+    {
+        if(args.length < 1) {
+            System.err.println("Useage:");
+            System.err.println("\tQuickButCruddyTextExtractor <file>");
+            System.exit(1);
+        }
+
+        String file = args[0];
+
+        QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file);
+        System.out.println(ppe.getTextAsString());
+        ppe.close();
+    }
+
+    /**
+     * Creates an extractor from a given file name
+     * @param fileName
+     */
+    @SuppressWarnings("resource")
     public QuickButCruddyTextExtractor(String fileName) throws IOException {
-		this(new POIFSFileSystem(new File(fileName)));
-	}
+        this(new POIFSFileSystem(new File(fileName)));
+    }
 
-	/**
-	 * Creates an extractor from a given input stream
-	 * @param iStream
-	 */
+    /**
+     * Creates an extractor from a given input stream
+     * @param iStream
+     */
     @SuppressWarnings("resource")
-	public QuickButCruddyTextExtractor(InputStream iStream) throws IOException {
-		this(new POIFSFileSystem(iStream));
-		is = iStream;
-	}
-
-	/**
-	 * Creates an extractor from a POIFS Filesystem
-	 * @param poifs
-	 */
-	public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException {
-		fs = poifs;
-
-		// Find the PowerPoint bit, and get out the bytes
-		InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
-		pptContents = IOUtils.toByteArray(pptIs);
-		pptIs.close();
-	}
-
-
-	/**
-	 * Shuts down the underlying streams
-	 */
-	public void close() throws IOException {
-		if(is != null) { is.close(); }
-		fs = null;
-	}
-
-	/**
-	 * Fetches the ALL the text of the powerpoint file, as a single string
-	 */
-	public String getTextAsString() {
-		StringBuilder ret = new StringBuilder();
-		List<String> textV = getTextAsVector();
-		for(String text : textV) {
-			ret.append(text);
-			if(! text.endsWith("\n")) {
-				ret.append('\n');
-			}
-		}
-		return ret.toString();
-	}
-
-	/**
-	 * Fetches the ALL the text of the powerpoint file, in a List of
-	 *  strings, one per text record
-	 */
-	public List<String> getTextAsVector() {
-	    List<String> textV = new ArrayList<>();
+    public QuickButCruddyTextExtractor(InputStream iStream) throws IOException {
+        this(new POIFSFileSystem(iStream));
+        is = iStream;
+    }
+
+    /**
+     * Creates an extractor from a POIFS Filesystem
+     * @param poifs
+     */
+    public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException {
+        fs = poifs;
+
+        // Find the PowerPoint bit, and get out the bytes
+        InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
+        pptContents = IOUtils.toByteArray(pptIs);
+        pptIs.close();
+    }
+
+
+    /**
+     * Shuts down the underlying streams
+     */
+    public void close() throws IOException {
+        if(is != null) { is.close(); }
+        fs = null;
+    }
+
+    /**
+     * Fetches the ALL the text of the powerpoint file, as a single string
+     */
+    public String getTextAsString() {
+        StringBuilder ret = new StringBuilder();
+        List<String> textV = getTextAsVector();
+        for(String text : textV) {
+            ret.append(text);
+            if(! text.endsWith("\n")) {
+                ret.append('\n');
+            }
+        }
+        return ret.toString();
+    }
+
+    /**
+     * Fetches the ALL the text of the powerpoint file, in a List of
+     *  strings, one per text record
+     */
+    public List<String> getTextAsVector() {
+        List<String> textV = new ArrayList<>();
 
-		// Set to the start of the file
-		int walkPos = 0;
+        // Set to the start of the file
+        int walkPos = 0;
 
-		// Start walking the file, looking for the records
-		while(walkPos != -1) {
+        // Start walking the file, looking for the records
+        while(walkPos != -1) {
             walkPos = findTextRecords(walkPos,textV);
-		}
+        }
 
-		// Return what we find
-		return textV;
-	}
-
-	/**
-	 * For the given position, look if the record is a text record, and wind
-	 *  on after.
-	 * If it is a text record, grabs out the text. Whatever happens, returns
-	 *  the position of the next record, or -1 if no more.
-	 */
-	public int findTextRecords(int startPos, List<String> textV) {
-		// Grab the length, and the first option byte
-		// Note that the length doesn't include the 8 byte atom header
-		int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
-		byte opt = pptContents[startPos];
-
-		// If it's a container, step into it and return
-		// (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
-		int container = opt & 0x0f;
-		if(container == 0x0f) {
-			return (startPos+8);
-		}
-
-		// Otherwise, check the type to see if it's text
-		int type = LittleEndian.getUShort(pptContents,startPos+2);
-
-		// TextBytesAtom
-		if(type == RecordTypes.TextBytesAtom.typeID) {
-			TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
-			String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
-			textV.add(text);
-		}
-		// TextCharsAtom
-		if(type == RecordTypes.TextCharsAtom.typeID) {
-			TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
+        // Return what we find
+        return textV;
+    }
+
+    /**
+     * For the given position, look if the record is a text record, and wind
+     *  on after.
+     * If it is a text record, grabs out the text. Whatever happens, returns
+     *  the position of the next record, or -1 if no more.
+     */
+    public int findTextRecords(int startPos, List<String> textV) {
+        // Grab the length, and the first option byte
+        // Note that the length doesn't include the 8 byte atom header
+        int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
+        byte opt = pptContents[startPos];
+
+        // If it's a container, step into it and return
+        // (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
+        int container = opt & 0x0f;
+        if(container == 0x0f) {
+            return (startPos+8);
+        }
+
+        // Otherwise, check the type to see if it's text
+        int type = LittleEndian.getUShort(pptContents,startPos+2);
+
+        // TextBytesAtom
+        if(type == RecordTypes.TextBytesAtom.typeID) {
+            TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
+            String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
+            textV.add(text);
+        }
+        // TextCharsAtom
+        if(type == RecordTypes.TextCharsAtom.typeID) {
+            TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
             String text = HSLFTextParagraph.toExternalString(tca.getText(), -1);
             textV.add(text);
-		}
+        }
 
-		// CString (doesn't go via a TextRun)
-		if(type == RecordTypes.CString.typeID) {
-			CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8);
-			String text = cs.getText();
-
-			// Ignore the ones we know to be rubbish
-			if(text.equals("___PPT10")) {
-			} else if(text.equals("Default Design")) {
-			} else {
-				textV.add(text);
-			}
-		}
-
-
-		// Wind on by the atom length, and check we're not at the end
-		int newPos = (startPos + 8 + len);
-		if(newPos > (pptContents.length - 8)) {
-			newPos = -1;
-		}
-		return newPos;
-	}
+        // CString (doesn't go via a TextRun)
+        if(type == RecordTypes.CString.typeID) {
+            CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8);
+            String text = cs.getText();
+
+            // Ignore the ones we know to be rubbish
+            if(text.equals("___PPT10")) {
+            } else if(text.equals("Default Design")) {
+            } else {
+                textV.add(text);
+            }
+        }
+
+
+        // Wind on by the atom length, and check we're not at the end
+        int newPos = (startPos + 8 + len);
+        if(newPos > (pptContents.length - 8)) {
+            newPos = -1;
+        }
+        return newPos;
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org