You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@poi.apache.org by Fermin Da Costa Gomez <da...@dcgconsultancy.nl> on 2010/01/21 21:35:45 UTC

Modified XSL2CSV example

Hi,

Attached y'll find a modified version of the XLS2CSV example written by Nick
Burch.
I required a version that was able to take the different sheet into
consideration instead of dumping all data in 1 file.

This version can take a sheetname as a input or a null.
In the latter case the whole sheet will be exported on a file per sheet
basis.

I call the class from a Utility class using the following two lines
...
            XLS2CSV xls2csv=new
XLS2CSV(fileOrDirectoryName.getCanonicalPath(), -1);
            xls2csv.process(sheetName);
...

I hope it can save someone a few cycles .. ;-)

Cheers,

Fermin DCG


-- 
“The reasonable man adapts himself to the world; the unreasonable one
persists in trying to adapt the world to himself. Therefore all progress
depends on the unreasonable man.”
- George Bernard Shaw (1856 - 1950)

Re: Modified XSL2CSV example

Posted by MSB <ma...@tiscali.co.uk>.
Thank you for this, I am confident that others will find it useful.

Yours

Mark B


f.dcg wrote:
> 
> Hi,
> 
> Attached y'll find a modified version of the XLS2CSV example written by
> Nick
> Burch.
> I required a version that was able to take the different sheet into
> consideration instead of dumping all data in 1 file.
> 
> This version can take a sheetname as a input or a null.
> In the latter case the whole sheet will be exported on a file per sheet
> basis.
> 
> I call the class from a Utility class using the following two lines
> ...
>             XLS2CSV xls2csv=new
> XLS2CSV(fileOrDirectoryName.getCanonicalPath(), -1);
>             xls2csv.process(sheetName);
> ...
> 
> I hope it can save someone a few cycles .. ;-)
> 
> Cheers,
> 
> Fermin DCG
> 
> 
> -- 
> “The reasonable man adapts himself to the world; the unreasonable one
> persists in trying to adapt the world to himself. Therefore all progress
> depends on the unreasonable man.”
> - George Bernard Shaw (1856 - 1950)
> 
> package file;
> 
> /* ====================================================================
>  Licensed to the Apache Software Foundation (ASF) under one or more
>  contributor license agreements.  See the NOTICE file distributed with
>  this work for additional information regarding copyright ownership.
>  The ASF licenses this file to You under the Apache License, Version 2.0
>  (the "License"); you may not use this file except in compliance with
>  the License.  You may obtain a copy of the License at
> 
>  http://www.apache.org/licenses/LICENSE-2.0
> 
>  Unless required by applicable law or agreed to in writing, software
>  distributed under the License is distributed on an "AS IS" BASIS,
>  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>  See the License for the specific language governing permissions and
>  limitations under the License.
>  ==================================================================== */
> 
> import java.io.File;
> import java.io.FileInputStream;
> import java.io.FileNotFoundException;
> import java.io.IOException;
> import java.io.PrintStream;
> import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.Map;
> 
> import org.apache.commons.io.FilenameUtils;
> import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
> import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
> import org.apache.poi.hssf.eventusermodel.HSSFListener;
> import org.apache.poi.hssf.eventusermodel.HSSFRequest;
> import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
> import
> org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
> import
> org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
> import
> org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
> import org.apache.poi.hssf.model.HSSFFormulaParser;
> import org.apache.poi.hssf.record.BOFRecord;
> import org.apache.poi.hssf.record.BlankRecord;
> import org.apache.poi.hssf.record.BoolErrRecord;
> import org.apache.poi.hssf.record.BoundSheetRecord;
> import org.apache.poi.hssf.record.FormulaRecord;
> import org.apache.poi.hssf.record.LabelRecord;
> import org.apache.poi.hssf.record.LabelSSTRecord;
> import org.apache.poi.hssf.record.NoteRecord;
> import org.apache.poi.hssf.record.NumberRecord;
> import org.apache.poi.hssf.record.RKRecord;
> import org.apache.poi.hssf.record.Record;
> import org.apache.poi.hssf.record.SSTRecord;
> import org.apache.poi.hssf.record.StringRecord;
> import org.apache.poi.hssf.usermodel.HSSFWorkbook;
> import org.apache.poi.poifs.filesystem.POIFSFileSystem;
> 
> 
> /**
>  * A XLS -> CSV processor, that uses the MissingRecordAware EventModel
> code to ensure it outputs all columns and rows.
>  * 
>  * @author Nick Burch
>  */
> public class XLS2CSV implements HSSFListener {
> 	private int minColumns;
> 	private POIFSFileSystem fs;
> 	private PrintStream output;
> 
> 	private int lastRowNumber;
> 	private int lastColumnNumber;
> 
> 	/** Should we output the formula, or the value it has? */
> 	private boolean outputFormulaValues=true;
> 
> 	/** For parsing Formulas */
> 	private SheetRecordCollectingListener workbookBuildingListener;
> 	private HSSFWorkbook stubWorkbook;
> 
> 	// Records we pick up as we process
> 	private SSTRecord sstRecord;
> 	private FormatTrackingHSSFListener formatListener;
> 
> 	/** So we known which sheet we're on */
> 	private int sheetIndex=0;
> 	private String sheetName=null;
> 	private BoundSheetRecord[] orderedBSRs;
> 	private ArrayList boundSheetRecords=new ArrayList();
> 
> 	// For handling formulas with string results
> 	private int nextRow;
> 	private int nextColumn;
> 	private boolean outputNextStringRecord;
> 
> 	/**
> 	 * Creates a new XLS -> CSV converter
> 	 * 
> 	 * @param fs
> 	 *          The POIFSFileSystem to process
> 	 * @param output
> 	 *          The PrintStream to output the CSV to
> 	 * @param minColumns
> 	 *          The minimum number of columns to output, or -1 for no minimum
> 	 */
> 	public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
> 		this.fs=fs;
> 		this.output=output;
> 		this.minColumns=minColumns;
> 	}
> 
> 	private String csvFilePrefix=null;
> 	/**
> 	 * Creates a new XLS -> CSV converter
> 	 * 
> 	 * @param filename The file to process
> 	 * @param minColumns The minimum number of columns to output, or -1 for
> no minimum
> 	 * @throws IOException
> 	 * @throws FileNotFoundException
> 	 */
> 	public XLS2CSV(String filename, int minColumns) throws IOException,
> FileNotFoundException {
> 		//this(new POIFSFileSystem(new FileInputStream(filename)), new
> PrintStream(new File(filename+".csv")), minColumns);
> 		this(new POIFSFileSystem(new FileInputStream(filename)), null,
> minColumns);
> 		File f = new File(filename);
> 		String fname = f.getCanonicalPath();
> 		
> 		String fileToSort= FilenameUtils.getBaseName(fname);
> 		String pathToFile= "/"+ FilenameUtils.getPath(fname);
> 		csvFilePrefix = pathToFile+ fileToSort+ "-";
> 	}
> 
> 	/**
> 	 * Initiates the processing of the XLS file to CSV
> 	 */
> 	public void process() throws IOException {
> 		// If a mismatch occurs skip the whole thing
> 		MissingRecordAwareHSSFListener listener=new
> MissingRecordAwareHSSFListener(this);
> 		formatListener=new FormatTrackingHSSFListener(listener);
> 
> 		HSSFEventFactory factory=new HSSFEventFactory();
> 		HSSFRequest request=new HSSFRequest();
> 
> 		if (outputFormulaValues) {
> 			request.addListenerForAllRecords(formatListener);
> 		} else {
> 			workbookBuildingListener=new
> SheetRecordCollectingListener(formatListener);
> 			
> 			request.addListenerForAllRecords(workbookBuildingListener);
> 		}
> 
> 		factory.processWorkbookEvents(request, fs);
> 
> 		for (PrintStream output : outputStreams.values()) {
> 	    output.flush();
> 	    output.close();
>     }
> 	}
> 
> 	/**
> 	 * Initiates the processing of an XLS tab-sheet to a single CSV file
> 	 */
> 	public void process(String sheetName) throws IOException {
> 		this.sheetName=sheetName;
> 		process();
> 	}
> 
> 	private Map<Integer, String> sheetIndices= new HashMap<Integer,
> String>(3);
> 	private Map<String, PrintStream> outputStreams=new HashMap<String,
> PrintStream>(3);
> 	
> 	/**
> 	 * Main HSSFListener method, processes events, and outputs the CSV as the
> file is processed.
> 	 */
> 	public void processRecord(Record record) {
> 		int thisRow=-1;
> 		int thisColumn=-1;
> 		String thisStr=null;
> 
> 		switch (record.getSid()) {
>       case BOFRecord.sid:
>         BOFRecord br=(BOFRecord) record;
>         if (br.getType()==BOFRecord.TYPE_WORKSHEET) {
>     	    // Create sub workbook if required
>     	    if (workbookBuildingListener!=null&&stubWorkbook==null) {
>     		    stubWorkbook=workbookBuildingListener.getStubHSSFWorkbook();
>     	    }
> 
>     	    // Output the worksheet name
>     	    // Works by ordering the BSRs by the location of their
> BOFRecords, and then knowing that we process BOFRecords in byte offset
> order
> 
>     	    sheetIndex++;
>     	    System.out.println("Encountered new sheet reference for "+
> sheetIndices.get(sheetIndex));
>     	    // Set the proper output stream for the sheet
>     	    if (this.sheetName==null || "".equals(sheetName)) {
>     	    	this.output = outputStreams.get(sheetIndices.get(sheetIndex));
>     	    } else if (sheetIndices.get(sheetIndex).equals(sheetName)) { //
> Test for proper sheetname 
>     	    	this.output = outputStreams.get(sheetName);
>     	    }
>     	    
>     	    if (orderedBSRs==null) {
>     		   
> orderedBSRs=BoundSheetRecord.orderByBofPosition(boundSheetRecords);
>     	    }
>         }
>         break;
> 
>       case BoundSheetRecord.sid:
> 				BoundSheetRecord bsr=(BoundSheetRecord) record;
> 				Integer sIdx = sheetIndices.size()+ 1;
> 				sheetIndices.put( sIdx, bsr.getSheetname());
> 				
> 				String exportFilename = csvFilePrefix+ bsr.getSheetname()+ ".csv";
> 				// Export to a csv file output-stream
> 				PrintStream output=null;
>   	    // Set the proper output stream for the sheet
>   	    if (this.sheetName==null || "".equals(sheetName)) {
>           try {
>   	        output=new PrintStream(new File(exportFilename));
>           } catch (FileNotFoundException e) {
>   	        e.printStackTrace();
>           }
>   				outputStreams.put(bsr.getSheetname(), output);
>   	    } else if (bsr.getSheetname().equals(sheetName)) { // Test for
> proper sheetname 
>           try {
>   	        output=new PrintStream(new File(exportFilename));
>           } catch (FileNotFoundException e) {
>   	        e.printStackTrace();
>           }
>   				outputStreams.put(bsr.getSheetname(), output);
>   	    }
> 
> 				System.out.println("Detected sheet named: "+bsr.getSheetname());
> 
> 				if (bsr.getSheetname().equals(this.sheetName)) { // Only process one
> specified sheet
>       		boundSheetRecords.add(record);
>       	}
> 
>       	break;
> 
>       case SSTRecord.sid:
>         sstRecord=(SSTRecord) record;
>         break;
> 
>       case BlankRecord.sid:
>         BlankRecord brec=(BlankRecord) record;
> 
>         thisRow=brec.getRow();
>         thisColumn=brec.getColumn();
>         thisStr="";
>         break;
>       case BoolErrRecord.sid:
>         BoolErrRecord berec=(BoolErrRecord) record;
> 
>         thisRow=berec.getRow();
>         thisColumn=berec.getColumn();
>         thisStr="";
>         break;
> 
>       case FormulaRecord.sid:
>         FormulaRecord frec=(FormulaRecord) record;
> 
>         thisRow=frec.getRow();
>         thisColumn=frec.getColumn();
> 
>         if (outputFormulaValues) {
>     	    if (Double.isNaN(frec.getValue())) {
>     		    // Formula result is a string
>     		    // This is stored in the next record
>     		    outputNextStringRecord=true;
>     		    nextRow=frec.getRow();
>     		    nextColumn=frec.getColumn();
>     	    } else {
>     		    thisStr=formatListener.formatNumberDateCell(frec);
>     	    }
>         } else {
>     	    thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook,
> frec.getParsedExpression())+'"';
>         }
>         break;
>       case StringRecord.sid:
>         if (outputNextStringRecord) {
>     	    // String for formula
>     	    StringRecord srec=(StringRecord) record;
>     	    thisStr=srec.getString();
>     	    thisRow=nextRow;
>     	    thisColumn=nextColumn;
>     	    outputNextStringRecord=false;
>         }
>         break;
> 
>       case LabelRecord.sid:
>         LabelRecord lrec=(LabelRecord) record;
> 
>         thisRow=lrec.getRow();
>         thisColumn=lrec.getColumn();
>         thisStr='"'+lrec.getValue()+'"';
>         break;
>       case LabelSSTRecord.sid:
>         LabelSSTRecord lsrec=(LabelSSTRecord) record;
> 
>         thisRow=lsrec.getRow();
>         thisColumn=lsrec.getColumn();
>         if (sstRecord==null) {
>     	    thisStr='"'+"(No SST Record, can't identify string)"+'"';
>         } else {
>     	   
> thisStr='"'+sstRecord.getString(lsrec.getSSTIndex()).toString()+'"';
>         }
>         break;
>       case NoteRecord.sid:
>         NoteRecord nrec=(NoteRecord) record;
> 
>         thisRow=nrec.getRow();
>         thisColumn=nrec.getColumn();
>         // TODO: Find object to match nrec.getShapeId()
>         thisStr='"'+"(TODO)"+'"';
>         break;
>       case NumberRecord.sid:
>         NumberRecord numrec=(NumberRecord) record;
> 
>         thisRow=numrec.getRow();
>         thisColumn=numrec.getColumn();
> 
>         // Format
>         thisStr=formatListener.formatNumberDateCell(numrec);
>         break;
>       case RKRecord.sid:
>         RKRecord rkrec=(RKRecord) record;
> 
>         thisRow=rkrec.getRow();
>         thisColumn=rkrec.getColumn();
>         thisStr='"'+"(TODO)"+'"';
>         break;
>       default:
>         break;
>     }
> 		
> 
>     // Handle new row, but only if it belongs to the sheet to be processed
>     if (thisRow!=-1&&thisRow!=lastRowNumber) {
> 	    lastColumnNumber=-1;
>     }
> 
>     if (output!=null) { // Make sure there is an outputStream
> 	    // Handle missing column
> 	    if (record instanceof MissingCellDummyRecord) {
> 		    MissingCellDummyRecord mc=(MissingCellDummyRecord) record;
> 		    thisRow=mc.getRow();
> 		    thisColumn=mc.getColumn();
> 		    thisStr="";
> 	    }
> 	    // If we got something to print out, do so
> 	    if (thisStr!=null) {
> 		    if (thisColumn>0) {
> 			    output.print(',');
> 			    //System.out.print(',');
> 		    }
> 		    output.print(thisStr);
> 		    //System.out.print(thisStr);
> 	    }
> 	    // Update column and row count
> 	    if (thisRow>-1)
> 		    lastRowNumber=thisRow;
> 	    if (thisColumn>-1)
> 		    lastColumnNumber=thisColumn;
> 	    // Handle end of row
> 	    if (record instanceof LastCellOfRowDummyRecord) {
> 		    // Print out any missing commas if needed
> 		    if (minColumns>0) {
> 			    // Columns are 0 based
> 			    if (lastColumnNumber==-1) {
> 				    lastColumnNumber=0;
> 			    }
> 			    for (int i=lastColumnNumber; i< (minColumns); i++) {
> 				    output.print(',');
> 				    //System.out.print(',');
> 			    }
> 		    }
> 
> 		    // We're onto a new row
> 		    lastColumnNumber=-1;
> 
> 		    // End the row
> 		    output.println();
> 		    //System.out.println();
> 	    }
>     }
>     
> 	}
> 
> 	public static void main(String[] args) throws Exception {
> 		if (args.length<1) {
> 			System.err.println("Use:");
> 			System.err.println("  XLS2CSVmra <xls file> [min columns]");
> 			System.exit(1);
> 		}
> 
> 		int minColumns=-1;
> 		if (args.length>=2) {
> 			minColumns=Integer.parseInt(args[1]);
> 		}
> 
> 		XLS2CSV xls2csv=new XLS2CSV(args[0], minColumns);
> 		xls2csv.process();
> 	}
> }
> 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: user-unsubscribe@poi.apache.org
> For additional commands, e-mail: user-help@poi.apache.org
> 

-- 
View this message in context: http://old.nabble.com/Modified-XSL2CSV-example-tp27264115p27269649.html
Sent from the POI - User mailing list archive at Nabble.com.


---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@poi.apache.org
For additional commands, e-mail: user-help@poi.apache.org