You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@poi.apache.org by Fermin Da Costa Gomez <da...@dcgconsultancy.nl> on 2010/01/21 21:35:45 UTC
Modified XSL2CSV example
Hi,
Attached y'll find a modified version of the XLS2CSV example written by Nick
Burch.
I required a version that was able to take the different sheet into
consideration instead of dumping all data in 1 file.
This version can take a sheetname as a input or a null.
In the latter case the whole sheet will be exported on a file per sheet
basis.
I call the class from a Utility class using the following two lines
...
XLS2CSV xls2csv=new
XLS2CSV(fileOrDirectoryName.getCanonicalPath(), -1);
xls2csv.process(sheetName);
...
I hope it can save someone a few cycles .. ;-)
Cheers,
Fermin DCG
--
“The reasonable man adapts himself to the world; the unreasonable one
persists in trying to adapt the world to himself. Therefore all progress
depends on the unreasonable man.”
- George Bernard Shaw (1856 - 1950)
Re: Modified XSL2CSV example
Posted by MSB <ma...@tiscali.co.uk>.
Thank you for this, I am confident that others will find it useful.
Yours
Mark B
f.dcg wrote:
>
> Hi,
>
> Attached y'll find a modified version of the XLS2CSV example written by
> Nick
> Burch.
> I required a version that was able to take the different sheet into
> consideration instead of dumping all data in 1 file.
>
> This version can take a sheetname as a input or a null.
> In the latter case the whole sheet will be exported on a file per sheet
> basis.
>
> I call the class from a Utility class using the following two lines
> ...
> XLS2CSV xls2csv=new
> XLS2CSV(fileOrDirectoryName.getCanonicalPath(), -1);
> xls2csv.process(sheetName);
> ...
>
> I hope it can save someone a few cycles .. ;-)
>
> Cheers,
>
> Fermin DCG
>
>
> --
> “The reasonable man adapts himself to the world; the unreasonable one
> persists in trying to adapt the world to himself. Therefore all progress
> depends on the unreasonable man.”
> - George Bernard Shaw (1856 - 1950)
>
> package file;
>
> /* ====================================================================
> Licensed to the Apache Software Foundation (ASF) under one or more
> contributor license agreements. See the NOTICE file distributed with
> this work for additional information regarding copyright ownership.
> The ASF licenses this file to You under the Apache License, Version 2.0
> (the "License"); you may not use this file except in compliance with
> the License. You may obtain a copy of the License at
>
> http://www.apache.org/licenses/LICENSE-2.0
>
> Unless required by applicable law or agreed to in writing, software
> distributed under the License is distributed on an "AS IS" BASIS,
> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> See the License for the specific language governing permissions and
> limitations under the License.
> ==================================================================== */
>
> import java.io.File;
> import java.io.FileInputStream;
> import java.io.FileNotFoundException;
> import java.io.IOException;
> import java.io.PrintStream;
> import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.Map;
>
> import org.apache.commons.io.FilenameUtils;
> import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
> import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
> import org.apache.poi.hssf.eventusermodel.HSSFListener;
> import org.apache.poi.hssf.eventusermodel.HSSFRequest;
> import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
> import
> org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
> import
> org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
> import
> org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
> import org.apache.poi.hssf.model.HSSFFormulaParser;
> import org.apache.poi.hssf.record.BOFRecord;
> import org.apache.poi.hssf.record.BlankRecord;
> import org.apache.poi.hssf.record.BoolErrRecord;
> import org.apache.poi.hssf.record.BoundSheetRecord;
> import org.apache.poi.hssf.record.FormulaRecord;
> import org.apache.poi.hssf.record.LabelRecord;
> import org.apache.poi.hssf.record.LabelSSTRecord;
> import org.apache.poi.hssf.record.NoteRecord;
> import org.apache.poi.hssf.record.NumberRecord;
> import org.apache.poi.hssf.record.RKRecord;
> import org.apache.poi.hssf.record.Record;
> import org.apache.poi.hssf.record.SSTRecord;
> import org.apache.poi.hssf.record.StringRecord;
> import org.apache.poi.hssf.usermodel.HSSFWorkbook;
> import org.apache.poi.poifs.filesystem.POIFSFileSystem;
>
>
> /**
> * A XLS -> CSV processor, that uses the MissingRecordAware EventModel
> code to ensure it outputs all columns and rows.
> *
> * @author Nick Burch
> */
> public class XLS2CSV implements HSSFListener {
> private int minColumns;
> private POIFSFileSystem fs;
> private PrintStream output;
>
> private int lastRowNumber;
> private int lastColumnNumber;
>
> /** Should we output the formula, or the value it has? */
> private boolean outputFormulaValues=true;
>
> /** For parsing Formulas */
> private SheetRecordCollectingListener workbookBuildingListener;
> private HSSFWorkbook stubWorkbook;
>
> // Records we pick up as we process
> private SSTRecord sstRecord;
> private FormatTrackingHSSFListener formatListener;
>
> /** So we known which sheet we're on */
> private int sheetIndex=0;
> private String sheetName=null;
> private BoundSheetRecord[] orderedBSRs;
> private ArrayList boundSheetRecords=new ArrayList();
>
> // For handling formulas with string results
> private int nextRow;
> private int nextColumn;
> private boolean outputNextStringRecord;
>
> /**
> * Creates a new XLS -> CSV converter
> *
> * @param fs
> * The POIFSFileSystem to process
> * @param output
> * The PrintStream to output the CSV to
> * @param minColumns
> * The minimum number of columns to output, or -1 for no minimum
> */
> public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
> this.fs=fs;
> this.output=output;
> this.minColumns=minColumns;
> }
>
> private String csvFilePrefix=null;
> /**
> * Creates a new XLS -> CSV converter
> *
> * @param filename The file to process
> * @param minColumns The minimum number of columns to output, or -1 for
> no minimum
> * @throws IOException
> * @throws FileNotFoundException
> */
> public XLS2CSV(String filename, int minColumns) throws IOException,
> FileNotFoundException {
> //this(new POIFSFileSystem(new FileInputStream(filename)), new
> PrintStream(new File(filename+".csv")), minColumns);
> this(new POIFSFileSystem(new FileInputStream(filename)), null,
> minColumns);
> File f = new File(filename);
> String fname = f.getCanonicalPath();
>
> String fileToSort= FilenameUtils.getBaseName(fname);
> String pathToFile= "/"+ FilenameUtils.getPath(fname);
> csvFilePrefix = pathToFile+ fileToSort+ "-";
> }
>
> /**
> * Initiates the processing of the XLS file to CSV
> */
> public void process() throws IOException {
> // If a mismatch occurs skip the whole thing
> MissingRecordAwareHSSFListener listener=new
> MissingRecordAwareHSSFListener(this);
> formatListener=new FormatTrackingHSSFListener(listener);
>
> HSSFEventFactory factory=new HSSFEventFactory();
> HSSFRequest request=new HSSFRequest();
>
> if (outputFormulaValues) {
> request.addListenerForAllRecords(formatListener);
> } else {
> workbookBuildingListener=new
> SheetRecordCollectingListener(formatListener);
>
> request.addListenerForAllRecords(workbookBuildingListener);
> }
>
> factory.processWorkbookEvents(request, fs);
>
> for (PrintStream output : outputStreams.values()) {
> output.flush();
> output.close();
> }
> }
>
> /**
> * Initiates the processing of an XLS tab-sheet to a single CSV file
> */
> public void process(String sheetName) throws IOException {
> this.sheetName=sheetName;
> process();
> }
>
> private Map<Integer, String> sheetIndices= new HashMap<Integer,
> String>(3);
> private Map<String, PrintStream> outputStreams=new HashMap<String,
> PrintStream>(3);
>
> /**
> * Main HSSFListener method, processes events, and outputs the CSV as the
> file is processed.
> */
> public void processRecord(Record record) {
> int thisRow=-1;
> int thisColumn=-1;
> String thisStr=null;
>
> switch (record.getSid()) {
> case BOFRecord.sid:
> BOFRecord br=(BOFRecord) record;
> if (br.getType()==BOFRecord.TYPE_WORKSHEET) {
> // Create sub workbook if required
> if (workbookBuildingListener!=null&&stubWorkbook==null) {
> stubWorkbook=workbookBuildingListener.getStubHSSFWorkbook();
> }
>
> // Output the worksheet name
> // Works by ordering the BSRs by the location of their
> BOFRecords, and then knowing that we process BOFRecords in byte offset
> order
>
> sheetIndex++;
> System.out.println("Encountered new sheet reference for "+
> sheetIndices.get(sheetIndex));
> // Set the proper output stream for the sheet
> if (this.sheetName==null || "".equals(sheetName)) {
> this.output = outputStreams.get(sheetIndices.get(sheetIndex));
> } else if (sheetIndices.get(sheetIndex).equals(sheetName)) { //
> Test for proper sheetname
> this.output = outputStreams.get(sheetName);
> }
>
> if (orderedBSRs==null) {
>
> orderedBSRs=BoundSheetRecord.orderByBofPosition(boundSheetRecords);
> }
> }
> break;
>
> case BoundSheetRecord.sid:
> BoundSheetRecord bsr=(BoundSheetRecord) record;
> Integer sIdx = sheetIndices.size()+ 1;
> sheetIndices.put( sIdx, bsr.getSheetname());
>
> String exportFilename = csvFilePrefix+ bsr.getSheetname()+ ".csv";
> // Export to a csv file output-stream
> PrintStream output=null;
> // Set the proper output stream for the sheet
> if (this.sheetName==null || "".equals(sheetName)) {
> try {
> output=new PrintStream(new File(exportFilename));
> } catch (FileNotFoundException e) {
> e.printStackTrace();
> }
> outputStreams.put(bsr.getSheetname(), output);
> } else if (bsr.getSheetname().equals(sheetName)) { // Test for
> proper sheetname
> try {
> output=new PrintStream(new File(exportFilename));
> } catch (FileNotFoundException e) {
> e.printStackTrace();
> }
> outputStreams.put(bsr.getSheetname(), output);
> }
>
> System.out.println("Detected sheet named: "+bsr.getSheetname());
>
> if (bsr.getSheetname().equals(this.sheetName)) { // Only process one
> specified sheet
> boundSheetRecords.add(record);
> }
>
> break;
>
> case SSTRecord.sid:
> sstRecord=(SSTRecord) record;
> break;
>
> case BlankRecord.sid:
> BlankRecord brec=(BlankRecord) record;
>
> thisRow=brec.getRow();
> thisColumn=brec.getColumn();
> thisStr="";
> break;
> case BoolErrRecord.sid:
> BoolErrRecord berec=(BoolErrRecord) record;
>
> thisRow=berec.getRow();
> thisColumn=berec.getColumn();
> thisStr="";
> break;
>
> case FormulaRecord.sid:
> FormulaRecord frec=(FormulaRecord) record;
>
> thisRow=frec.getRow();
> thisColumn=frec.getColumn();
>
> if (outputFormulaValues) {
> if (Double.isNaN(frec.getValue())) {
> // Formula result is a string
> // This is stored in the next record
> outputNextStringRecord=true;
> nextRow=frec.getRow();
> nextColumn=frec.getColumn();
> } else {
> thisStr=formatListener.formatNumberDateCell(frec);
> }
> } else {
> thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook,
> frec.getParsedExpression())+'"';
> }
> break;
> case StringRecord.sid:
> if (outputNextStringRecord) {
> // String for formula
> StringRecord srec=(StringRecord) record;
> thisStr=srec.getString();
> thisRow=nextRow;
> thisColumn=nextColumn;
> outputNextStringRecord=false;
> }
> break;
>
> case LabelRecord.sid:
> LabelRecord lrec=(LabelRecord) record;
>
> thisRow=lrec.getRow();
> thisColumn=lrec.getColumn();
> thisStr='"'+lrec.getValue()+'"';
> break;
> case LabelSSTRecord.sid:
> LabelSSTRecord lsrec=(LabelSSTRecord) record;
>
> thisRow=lsrec.getRow();
> thisColumn=lsrec.getColumn();
> if (sstRecord==null) {
> thisStr='"'+"(No SST Record, can't identify string)"+'"';
> } else {
>
> thisStr='"'+sstRecord.getString(lsrec.getSSTIndex()).toString()+'"';
> }
> break;
> case NoteRecord.sid:
> NoteRecord nrec=(NoteRecord) record;
>
> thisRow=nrec.getRow();
> thisColumn=nrec.getColumn();
> // TODO: Find object to match nrec.getShapeId()
> thisStr='"'+"(TODO)"+'"';
> break;
> case NumberRecord.sid:
> NumberRecord numrec=(NumberRecord) record;
>
> thisRow=numrec.getRow();
> thisColumn=numrec.getColumn();
>
> // Format
> thisStr=formatListener.formatNumberDateCell(numrec);
> break;
> case RKRecord.sid:
> RKRecord rkrec=(RKRecord) record;
>
> thisRow=rkrec.getRow();
> thisColumn=rkrec.getColumn();
> thisStr='"'+"(TODO)"+'"';
> break;
> default:
> break;
> }
>
>
> // Handle new row, but only if it belongs to the sheet to be processed
> if (thisRow!=-1&&thisRow!=lastRowNumber) {
> lastColumnNumber=-1;
> }
>
> if (output!=null) { // Make sure there is an outputStream
> // Handle missing column
> if (record instanceof MissingCellDummyRecord) {
> MissingCellDummyRecord mc=(MissingCellDummyRecord) record;
> thisRow=mc.getRow();
> thisColumn=mc.getColumn();
> thisStr="";
> }
> // If we got something to print out, do so
> if (thisStr!=null) {
> if (thisColumn>0) {
> output.print(',');
> //System.out.print(',');
> }
> output.print(thisStr);
> //System.out.print(thisStr);
> }
> // Update column and row count
> if (thisRow>-1)
> lastRowNumber=thisRow;
> if (thisColumn>-1)
> lastColumnNumber=thisColumn;
> // Handle end of row
> if (record instanceof LastCellOfRowDummyRecord) {
> // Print out any missing commas if needed
> if (minColumns>0) {
> // Columns are 0 based
> if (lastColumnNumber==-1) {
> lastColumnNumber=0;
> }
> for (int i=lastColumnNumber; i< (minColumns); i++) {
> output.print(',');
> //System.out.print(',');
> }
> }
>
> // We're onto a new row
> lastColumnNumber=-1;
>
> // End the row
> output.println();
> //System.out.println();
> }
> }
>
> }
>
> public static void main(String[] args) throws Exception {
> if (args.length<1) {
> System.err.println("Use:");
> System.err.println(" XLS2CSVmra <xls file> [min columns]");
> System.exit(1);
> }
>
> int minColumns=-1;
> if (args.length>=2) {
> minColumns=Integer.parseInt(args[1]);
> }
>
> XLS2CSV xls2csv=new XLS2CSV(args[0], minColumns);
> xls2csv.process();
> }
> }
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: user-unsubscribe@poi.apache.org
> For additional commands, e-mail: user-help@poi.apache.org
>
--
View this message in context: http://old.nabble.com/Modified-XSL2CSV-example-tp27264115p27269649.html
Sent from the POI - User mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@poi.apache.org
For additional commands, e-mail: user-help@poi.apache.org