POI事件模式指北(三)- 读取Excel实战

  1. 云栖社区>
  2. 博客>
  3. 正文

POI事件模式指北(三)- 读取Excel实战

不科学计算 2019-02-24 19:59:03 浏览840
展开阅读全文

POI事件模式指北(三)- 读取Excel实战

1、简介

这篇文章将介绍一个完整的利用POI事件模式读取Excel的实例,目的是贴近实际使用中的情况,并且考虑了细节的问题而不仅仅满足于实现简单功能。

1.1、业务背景

系统中使用了POI的用户模式读取Excel文件,某一次用户上传了一个超过1M的文件,导致系统内存很快耗光,最后出现了OOM异常,最后分析决定使用事件模式进行改造以提高性能。

1.2、版本和依赖

下面列出来的是实例代码实际使用的情况,您可以选择其他版本,绝大多数情况下可以正常运行。

  • JDK6
  • poi-3.9.jar
  • poi-ooxml-3.9.jar
  • dom4j-1.6.1.jar
  • xmlbeans-2.3.0.jar

以上是实现读取Excel所需的最小jar包;如果你还需要或者想要学习POI的其他部分,请自行导入其他包;强烈建议导入POI全部jar包。

2、示例代码

下面的导入针对比较典型的导入场景,主要有以下假定条件:

  • 导入的文件列数是确定的
  • 需要导入的内容在第一个sheet中
  • 文件中所有数据都是有意义的且不包含无效数据

上面的假设代表一种典型场景,如果实际场景有所不同,只需要对下面代码进行简单改造即可。

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

public class ExcelEventParser {

    enum xssfDataType {
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
    }

    class XLSReader implements HSSFListener {
        
        //POIFS文件流
        private POIFSFileSystem fs;
        //当前行数
        private int lastRowNumber;
        //当前列数
        private int lastColumnNumber;
        //是否计算公式的值
        private boolean outputFormulaValues = true;

        private SheetRecordCollectingListener workbookBuildingListener;

        private HSSFWorkbook stubWorkbook;
        //共享字符集
        private SSTRecord sstRecord;

        private FormatTrackingHSSFListener formatListener;

        private int sheetIndex = -1;

        private List<Record> boundSheetRecords = new ArrayList<Record>();
        //公式单元格结果所在行
        private int nextRow;
        //公式单元格结果所在列
        private int nextColumn;
        //公式单元格是否存在计算值
        private boolean outputNextStringRecord;
        //当前行数
        private int curRow = 0;
        //当前最大列数
        private int maxRef = 0;
        
        //空单元格使用的填充字符
        private String NULL = "-";

        private List<String> row = new ArrayList<String>();

        private List<List<String>> rows = new ArrayList<List<String>>();

        /**
         * 读取Excel2003文件
         * @param filePath 文件路径
         * @return
         * @throws IOException
         */
        public List<List<String>> process(String filePath) throws IOException {
            //使用POIFS读取文件流
            this.fs = new POIFSFileSystem(new FileInputStream(filePath));
            MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
            formatListener = new FormatTrackingHSSFListener(listener);
            HSSFEventFactory factory = new HSSFEventFactory();
            HSSFRequest request = new HSSFRequest();

            if (outputFormulaValues) {
                //添加全部监听器
                request.addListenerForAllRecords(formatListener);
            } else {
                workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
                request.addListenerForAllRecords(workbookBuildingListener);
            }
            //处理文件流
            factory.processWorkbookEvents(request, fs);
            return rows;
        }

        /**
         * 依次处理每个监听的record
         * @param record
         * @return
         * @throws IOException
         */
        public void processRecord(Record record) {
            //只处理第一个sheet,其他全部忽略
            if (sheetIndex > 0) {
                return;
            }
            if (maxRef > MAX_COLUMN) {
                return;
            }
            int thisRow = -1;
            int thisColumn = -1;
            String thisStr = null;
            String value = null;
            //根据record的sid匹配类型分别进行处理
            //可以在这里添加业务逻辑
            switch (record.getSid()) {
            // 记录了sheetName
            case BoundSheetRecord.sid:
                boundSheetRecords.add(record);
                break;
            // Workbook、Sheet的开始
            case BOFRecord.sid:
                BOFRecord br = (BOFRecord) record;
                if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
                    if (workbookBuildingListener != null && stubWorkbook == null) {
                        stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
                    }
                    sheetIndex++;
                }
                break;
            //共享字符集
            case SSTRecord.sid:
                sstRecord = (SSTRecord) record;
                break;
            // 存在单元格样式的空单元格
            case BlankRecord.sid:
                BlankRecord brec = (BlankRecord) record;
                thisRow = brec.getRow();
                thisColumn = brec.getColumn();
                thisStr = NULL;
                row.add(thisColumn, thisStr);
                break;
            // 布尔或错误单元格
            case BoolErrRecord.sid:
                BoolErrRecord berec = (BoolErrRecord) record;
                thisRow = berec.getRow();
                thisColumn = berec.getColumn();
                thisStr = berec.getBooleanValue() + "";
                row.add(thisColumn, thisStr);
                break;
            // 公式单元格
            case FormulaRecord.sid:
                //调用方法计算单元格的值
                FormulaRecord frec = (FormulaRecord) record;
                thisRow = frec.getRow();
                thisColumn = frec.getColumn();
                if (outputFormulaValues) {
                    if (Double.isNaN(frec.getValue())) {
                        outputNextStringRecord = true;
                        nextRow = frec.getRow();
                        nextColumn = frec.getColumn();
                    } else {
                        thisStr = formatListener.formatNumberDateCell(frec);
                    }
                } else {
                    thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
                }
                row.add(thisColumn, thisStr);
                break;
            // 公式的计算结果单元格
            case StringRecord.sid:
                if (outputNextStringRecord) {
                    StringRecord srec = (StringRecord) record;
                    thisStr = srec.getString();
                    thisRow = nextRow;
                    thisColumn = nextColumn;
                    outputNextStringRecord = false;
                }
                break;
            // 文本单元格
            case LabelRecord.sid:
                LabelRecord lrec = (LabelRecord) record;
                curRow = thisRow = lrec.getRow();
                thisColumn = lrec.getColumn();
                value = lrec.getValue().trim();
                value = value.equals("") ? NULL : value;
                this.row.add(thisColumn, value);
                break;
            // 共用的文本单元格
            case LabelSSTRecord.sid:
                LabelSSTRecord lsrec = (LabelSSTRecord) record;
                curRow = thisRow = lsrec.getRow();
                thisColumn = lsrec.getColumn();
                if (sstRecord == null) {
                    row.add(thisColumn, NULL);
                } else {
                    value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
                    value = value.equals("") ? NULL : value;
                    row.add(thisColumn, value);
                }
                break;
            // 数值单元格:数字单元格和日期单元格
            case NumberRecord.sid:
                NumberRecord numrec = (NumberRecord) record;
                curRow = thisRow = numrec.getRow();
                thisColumn = numrec.getColumn();
                //处理时间类型的数字
                value = formatListener.formatNumberDateCell(numrec).trim();
                value = value.equals("") ? NULL : value;
                row.add(thisColumn, value);
                break;

            default:
                break;
            }

            if (thisRow != -1 && thisRow != lastRowNumber) {
                lastColumnNumber = -1;
            }

            if (record instanceof MissingCellDummyRecord) {
                MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
                curRow = thisRow = mc.getRow();
                thisColumn = mc.getColumn();
                row.add(thisColumn, NULL);
            }

            if (thisRow > -1)
                lastRowNumber = thisRow;
            if (thisColumn > -1)
                lastColumnNumber = thisColumn;
            //进入行结束处理
            if (record instanceof LastCellOfRowDummyRecord) {

                if (maxRef > row.size()) {
                    int len = maxRef - row.size();
                    for (int i = 0; i < len; i++) {
                        lastColumnNumber++;
                        row.add(lastColumnNumber, NULL);
                    }
                }
                if (curRow == 0) {
                    maxRef = row.size();
                }
                rows.add(row);
                lastColumnNumber = -1;
                row = new ArrayList<String>();
            }
        }
    }

    class XLSXReader extends DefaultHandler {

        //样式数据
        private StylesTable stylesTable;
        //共享字符集
        private ReadOnlySharedStringsTable sharedStringsTable;
        //是否存在内联字符
        private boolean vIsOpen;
        //记录数据类型
        private xssfDataType nextDataType;
        
        private short formatIndex;
        private String formatString;
        //格式化类
        private final DataFormatter formatter;
        //当前所在列
        private int thisColumn = -1;
        //当前单元格值
        private StringBuffer value;
        //储存一列的数据
        private List<String> record = new ArrayList<String>();
        //储存全部数据
        private List<List<String>> rows = new ArrayList<List<String>>();
        //判断当前行有没有空单元格
        private boolean isCellNull = false;
        //前一个单元格和当前单元格
        private String preRef = null, ref = null;
        //避免同一个单元格重复计数
        private String Newcell = null;
        //计数当前行中的空格
        private int rowNull = 0;
        //当前单元格对应的序号
        private int curCol = 0;
        //当前行对应的序号
        private int curRow = 0;
        //空单元格使用的填充字符
        private String NULL = "-";

        /**
         * XLSXReader构造函数
         * @param styles 单元格样式数据
         * @param strings 共享字符集
         */
        public XLSXReader(StylesTable styles, ReadOnlySharedStringsTable strings) {
            this.stylesTable = styles;
            this.sharedStringsTable = strings;
            this.value = new StringBuffer();
            this.nextDataType = xssfDataType.NUMBER;
            this.formatter = new DataFormatter();
            record = new ArrayList<String>();
            rows.clear();
        }

        /**
         * 每当遇到开始标签时进入
         * @param uri 命名空间路径或空
         * @param localName 本地名称或空
         * @param name 标签名称
         * @param attributes 标签内容
         * @return
         * @throws SAXException
         */
        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
            if ("inlineStr".equals(name) || "v".equals(name)) {
                vIsOpen = true;
                value.setLength(0);
            //每当遇到<c>表示新的单元格    
            } else if ("c".equals(name)) {
                //记录上一个单元格和当前单元格用于填充空格
                if (preRef == null) {
                    preRef = attributes.getValue("r");
                } else {
                    preRef = ref;
                }
                ref = attributes.getValue("r");
                
                String r = ref;
                int firstDigit = -1;
                for (int c = 0; c < r.length(); ++c) {
                    if (Character.isDigit(r.charAt(c))) {
                        firstDigit = c;
                        break;
                    }
                }
                //获取当前列数
                thisColumn = nameToColumn(r.substring(0, firstDigit));
                //判断是否超过最大列数
                if (thisColumn > MAX_COLUMN) {
                    return;
                }
                //将单元格类型翻译成设定名称
                this.nextDataType = xssfDataType.NUMBER;
                this.formatIndex = -1;
                this.formatString = null;
                String cellTpye = attributes.getValue("t");
                String cellStyleStr = attributes.getValue("s");
                if ("b".equals(cellTpye))
                    nextDataType = xssfDataType.BOOL;
                else if ("e".equals(cellTpye))
                    nextDataType = xssfDataType.ERROR;
                else if ("inlineStr".equals(cellTpye))
                    nextDataType = xssfDataType.INLINESTR;
                else if ("s".equals(cellTpye))
                    nextDataType = xssfDataType.SSTINDEX;
                else if ("str".equals(cellTpye))
                    nextDataType = xssfDataType.FORMULA;
                else if (cellStyleStr != null) {
                    int styleIndex = Integer.parseInt(cellStyleStr);
                    XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
                    this.formatIndex = style.getDataFormat();
                    this.formatString = style.getDataFormatString();

                    if (this.formatString == null) {
                        this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
                    }
                }
            }
        }

        /**
         * 每当遇到结束标签时进入
         * @param uri 命名空间路径或空
         * @param localName 本地名称或空
         * @param name 标签名称
         * @return
         * @throws SAXException
         */
        public void endElement(String uri, String localName, String name) throws SAXException {
            //当遇到</v>标签时,表面存在一个需要处理的值
            if ("v".equals(name)) {
                if (thisColumn > MAX_COLUMN) {
                    return;
                }
                String thisStr = NULL;
                //根据当前单元格和上一个单元格判断需要补充的空单元格
                if (null != ref && !ref.equals(Newcell)) {
                    if (!ref.equals(preRef)) {
                        int num = Integer.parseInt(ref.replaceAll("[A-Z]", ""));
                        int num_1 = Integer.parseInt(preRef.replaceAll("[A-Z]", ""));
                        int len = 0;
                        //取出两个单元格所在行数,如果行数不同则进行换行,从A1开始填充
                        if (num > num_1) {
                            len = countNullCell(ref, "A1")+1;
                        }else{
                            len = countNullCell(ref, preRef);
                        }
                        //填充空单元格
                        for (int i = 0; i < len; i++) {
                            record.add(curCol, thisStr);
                            curCol++;
                            Newcell = ref;
                            rowNull++;
                        }
                    }
                }
                //选择合适的方式处理单元格
                switch (nextDataType) {

                case BOOL:
                    char first = value.charAt(0);
                    thisStr = first == '0' ? "否" : "是";
                    break;

                case ERROR:
                    thisStr = value.toString();
                    break;

                case FORMULA:
                    thisStr = value.toString();
                    break;

                case INLINESTR:
                    XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
                    thisStr = rtsi.toString();
                    break;

                case SSTINDEX:
                    String sstIndex = value.toString();
                    try {
                        //查询字符索引对应的字符值
                        int idx = Integer.parseInt(sstIndex);
                        XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
                        thisStr = rtss.toString();
                    } catch (NumberFormatException ex) {
                        System.out.println("未找到对应的SST索引 '" + sstIndex + "': " + ex.toString());
                    }
                    break;

                case NUMBER:
                    String n = value.toString();
                    //判断该数字是不是时间
                    if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {
                        Double d = Double.parseDouble(n);
                        Date date = HSSFDateUtil.getJavaDate(d);
                        thisStr = formatDateToString(date);
                    } else if (this.formatString != null)
                        thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex,
                                this.formatString);
                    else
                        thisStr = n;
                    break;

                //其他特殊类型可以自己扩展
                default:
                    thisStr = "(尚未支持的类型: " + nextDataType + ")";
                    break;
                }
                if (thisStr == null || thisStr.equals(NULL)) {
                    isCellNull = true;
                }
                //这里可以根据实际业务进行改造,以单元格为单位处理
                record.add(curCol, thisStr);
                curCol++;
            } else if ("row".equals(name)) {
                if (isCellNull == false && record.size() != rowNull) {
                    if (MAX_COLUMN >= record.size()) {
                        int len = MAX_COLUMN - record.size();
                        for (int i = 0; i < len; i++) {
                            record.add(curCol, NULL);
                            curCol++;
                        }
                    }
                    //这里可以根据实际业务进行改造,以行为单位处理
                    rows.add(curRow, record);
                    isCellNull = false;
                    //这里必须新建对象,不能为了节约清空之前的对象
                    record = new ArrayList<String>();
                    curRow++;
                    curCol = 0;
                    rowNull = 0;
                }
            }
        }


        /**
         * 计算两个单元格中间的空单元格数量
         * <p>Excel2007文件最大行数为1048576,最大列数为16284,列名为XFD</P>
         * @param ref 当前单元格名称
         * @param preRef 之前一个单元格名称
         * @return
         */
        private int countNullCell(String ref, String preRef) {
            String xfd = ref.replaceAll("\\d+", "");
            String xfd_1 = preRef.replaceAll("\\d+", "");

            xfd = fillChar(xfd, 3, '@', true);
            xfd_1 = fillChar(xfd_1, 3, '@', true);

            char[] letter = xfd.toCharArray();
            char[] letter_1 = xfd_1.toCharArray();
            int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);
            return res - 1;
        }
        
        /**
         * 用特定字符填充字符串到指定长度
         * @param str 原字符串
         * @param len 目标串的长度
         * @param let 填充使用的字符
         * @param isPre 是否在前面填充字符
         * @return
         */
        private String fillChar(String str, int len, char let, boolean isPre) {
            int len_1 = str.length();
            if (len_1 < len) {
                if (isPre) {
                    for (int i = 0; i < (len - len_1); i++) {
                        str = let + str;
                    }
                } else {
                    for (int i = 0; i < (len - len_1); i++) {
                        str = str + let;
                    }
                }
            }
            return str;
        }

        /**
         * 每遇到一个标签调用,用于记录标签类型
         * @param ch 文件流字符数组
         * @param start 开始位置
         * @param length 读取长度
         * @return
         * @throws SAXException
         */
        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (vIsOpen)
                value.append(ch, start, length);
        }

        /**
         * 根据单元格名称计算列数
         * @param name 单元格名称(如:A1)
         * @return
         */
        private int nameToColumn(String name) {
            int column = -1;
            for (int i = 0; i < name.length(); ++i) {
                int c = name.charAt(i);
                column = (column + 1) * 26 + c - 'A';
            }
            return column;
        }

        /**
         * 将日期类型转变为String格式
         * @param date 日期
         * @return
         */
        private String formatDateToString(Date date) {
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            return sdf.format(date);
        }
    }

    //事先定义的最大列数
    private int MAX_COLUMN = 7;

    private static String XLS = ".xls";
    private static String XLSX = ".xlsx";

    public ExcelEventParser() {
    }

    /**
     * 构造XLSXReader读取Excel内容
     * @param styles 样式信息
     * @param strings 共享字符集
     * @param sheetInputStream sheet输入流
     * @return
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    private List<List<String>> XLSXProcessSheet(StylesTable styles, ReadOnlySharedStringsTable strings,
            InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {
        //将输入流包装成XML源
        InputSource sheetSource = new InputSource(sheetInputStream);
        //构造基于标签回调的XMLReader
        SAXParserFactory saxFactory = SAXParserFactory.newInstance();
        SAXParser saxParser = saxFactory.newSAXParser();
        XMLReader sheetParser = saxParser.getXMLReader();
        //构造XLSXReader
        XLSXReader handler = new XLSXReader(styles, strings);
        //添加管理者
        sheetParser.setContentHandler(handler);
        //读入XML源
        sheetParser.parse(sheetSource);
        return handler.rows;
    }

    /**
     * 读取Excel2007文件
     * @param filePath 文件路径
     * @return
     * @throws IOException
     * @throws OpenXML4JException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    private List<List<String>> XLSXprocess(String filePath)
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
        //将文件以压缩包的形式读入
        OPCPackage opcPackage = OPCPackage.open(filePath, PackageAccess.READ);
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcPackage);
        XSSFReader xssfReader = new XSSFReader(opcPackage);
        List<List<String>> sheet = null;
        //读入样式信息
        StylesTable styles = xssfReader.getStylesTable();
        //构造sheet迭代器
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        //这里只读入了第一个sheet,如果需要读取多个可以遍历这个迭代器
        if (iter.hasNext()) {
            InputStream stream = iter.next();
            //构造XLSXReader读取内容
            sheet = XLSXProcessSheet(styles, strings, stream);
            //关闭流
            stream.close();
        }
        opcPackage.close();
        return sheet;
    }

    /**
     * 读取Excel2003文件
     * @param filePath 文件路径
     * @return
     * @throws IOException
     * @throws OpenXML4JException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    private List<List<String>> XLSprocess(String filePath)
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
        XLSReader xls = new XLSReader();
        List<List<String>> sheet = xls.process(filePath);
        return sheet;
    }

    /**
     * 读取Excel文件内容并返回包含文件内容的List
     * @param filePath 文件路径
     * @return
     * @throws IOException
     * @throws OpenXML4JException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    public static List<List<String>> readerExcel(String filePath)
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
        // 将文件数据储存起来
        List<List<String>> sheet = null;
        // 获取文件后缀名,判断文件类型
        int index = filePath.lastIndexOf(".");
        String excelVer = filePath.substring(index, filePath.length());
        ExcelEventParser csvReader = new ExcelEventParser();
        if (XLS.equals(excelVer)) {
            sheet = csvReader.XLSprocess(filePath);
        } else if (XLSX.equals(excelVer)) {
            sheet = csvReader.XLSXprocess(filePath);
        }
        return sheet;
    }

    public static void main(String[] args) {
        try {
            // 要导入的文件地址(例:D:/新建 Microsoft Excel 工作表 .xlsx)
            String filePath = "D:/新建 Microsoft Excel 工作表 .xls";
            List<List<String>> sheet = readerExcel(filePath);
            for (List<String> row : sheet) {
                for (String cell : row) {
                    System.out.print(cell + ",");
                }
                System.out.println();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

代码中按照惯例,将文件内容全部读出储存起来了,其实并不必要;使用时可以结合实际情况修改。

2.1、读取行数

读取Excel时常常会需要在读取之前先判断行数,虽然用户模式直接提供了获取行数的方法,但显然这里是不能使用的;下面读取行数的方法就是简单简化而来,主要考虑资源占用和速度。

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RowRecord;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;


public class MaxRowHandler {

    public static final String Excel_03 = ".xls";
    
    public static final String Excel_07 = ".xlsx";
    
    class XLSXMaxRowHandler extends DefaultHandler {
        
        //最大行数
        private int maxRow = 0;
        
        private int cellNum = 0;
        
        public void endElement(String uri, String localName, String name)
                throws SAXException {
                
            if ("row".equals(name)) {
                if (cellNum > 0) {
                    maxRow++;
                }
                cellNum = 0;
            }
        }
    }
    
    class XLSMaxRowHandler implements HSSFListener {
        
        private POIFSFileSystem fs;
        //最大行数
        private int maxRow = 0;
        
        public int process(String filePath) throws IOException {
            
            this.fs = new POIFSFileSystem(new FileInputStream(filePath));
            MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
            HSSFEventFactory factory = new HSSFEventFactory();
            HSSFRequest request = new HSSFRequest();
            //只监听RowRecord一种
            request.addListener(listener, RowRecord.sid);
            factory.processWorkbookEvents(request, fs);
            return maxRow;
        }
        
        /**
         * 处理Excel2003文件并计算行数
         * @param fileName 文件路径
         * @return
         * @throws IOException
         */
        public void processRecord(Record record) {
            if (record.getSid() == RowRecord.sid) {
                maxRow++;
            }
        }
    }
    
    public MaxRowHandler() {
    }
    
    /**
     * 读取Excel2003文件最大行数
     * @param filePath 文件路径
     * @return
     * @throws IOException
     * @throws OpenXML4JException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    public int maxXLSprocess(String filePath) throws IOException, OpenXML4JException,
    ParserConfigurationException, SAXException {
        XLSMaxRowHandler xls = new XLSMaxRowHandler();
        int num = xls.process(filePath);
        return num;
    }
    
    /**
     * 读取Excel2007文件最大行数
     * @param filePath 文件路径
     * @return
     * @throws IOException
     * @throws OpenXML4JException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    private int maxXLSXprocess(String filePath) throws IOException, OpenXML4JException,
    ParserConfigurationException, SAXException {
        //将文件以压缩包的形式读入
        OPCPackage opcPackage = OPCPackage.open(filePath, PackageAccess.READ);
        XSSFReader xssfReader = new XSSFReader(opcPackage);
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        InputStream stream = iter.next();
        InputSource sheetSource = new InputSource(stream);
        SAXParserFactory saxFactory = SAXParserFactory.newInstance();
        SAXParser saxParser = saxFactory.newSAXParser();
        XMLReader sheetParser = saxParser.getXMLReader();
        XLSXMaxRowHandler handler = new XLSXMaxRowHandler();
        sheetParser.setContentHandler(handler);
        sheetParser.parse(sheetSource);
        int num = handler.maxRow;
        stream.close();
        opcPackage.close();
        return num;
    }
    
    /**
     * 获取最大行数不再区分sheet和空行等
     * @param filePath 文件路径
     * @return
     */
    public static int getMaxRow(String filePath) {
        int index = filePath.lastIndexOf(".");
        String excelVer = filePath.substring(index, filePath.length());
        int num = 0;
        try {
            MaxRowHandler csvReader = new MaxRowHandler();
            if (".xls".equals(excelVer)) {
                num = csvReader.maxXLSprocess(filePath);
            } else if (".xlsx".equals(excelVer)) {
                num = csvReader.maxXLSXprocess(filePath);
            }
            return num;
        }catch(Exception e) {
            e.printStackTrace();
        }
        return 0;
    }
    
    public static void main(String[] args) {
        try {
            // 要导入的文件地址(例:D:/新建 Microsoft Excel 工作表 .xlsx)
            String filePath = "D:/新建 Microsoft Excel 工作表 .xlsx";
            int max = getMaxRow(filePath);
            System.out.println(max);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

3、后记

至此POI事件模式读取Excel文件就介绍完了,但提高读取性能还远不止这些;POI的SXSSF(流用户模式),阿里巴巴开源的easyexcel工具也是各有特点,非常值得一试。

本文代码较多,受限于作者水平,难免有缺漏之处。如果有疑问可以在评论区交流,如果对大家有所帮助就再好不过了。

网友评论

登录后评论
0/500
评论
不科学计算
+ 关注