问题描述
当想要使用poi读取一个稍微大一点的excel时,如果项目设置的最大堆内存较小,可能报错内存溢出;
例如代码:
FileInputStream fileInputStream = new FileInputStream("C:\\Users\\admin\\Desktop\\迭代文件\\迭代24\\尾程对账\\2022-04-03 08-15 Auto FedExInv 886388772.xlsx");
HSSFWorkbook xssfWorkbook = new XSSFWorkbook(fileInputStream);
System.out.println(xssfWorkbook.toString());
文件只有5M,但是占用的内存却又一千多M;
解决方法
1.使用easyexcel
导入依赖
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
</dependency>
使用方式
写一个类继承ReadListener
package com.alibaba.excel.read.listener;
import java.util.Map;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.Listener;
import com.alibaba.excel.metadata.CellExtra;
import com.alibaba.excel.metadata.data.ReadCellData;
/**
* Interface to listen for read results
*
* @author Jiaju Zhuang
*/
public interface ReadListener<T> extends Listener {
/**
* 当任何一个 Listener 报告错误时,所有侦听器都会收到此方法。如果此处抛出异常,则整个读取将终止。
*
* @param exception
* @param context
* @throws Exception
*/
default void onException(Exception exception, AnalysisContext context) throws Exception {
throw exception;
}
/**
* When analysis one head row trigger invoke function.
*
* @param headMap
* @param context
*/
default void invokeHead(Map<Integer, ReadCellData<?>> headMap, AnalysisContext context) {}
/**
* When analysis one row trigger invoke function.
*
* @param data one row value. Is is same as {@link AnalysisContext#readRowHolder()}
* @param context analysis context
*/
void invoke(T data, AnalysisContext context);
/**
* The current method is called when extra information is returned
*
* @param extra extra information
* @param context analysis context
*/
default void extra(CellExtra extra, AnalysisContext context) {}
/**
* if have something to do after all analysis
*
* @param context
*/
void doAfterAllAnalysed(AnalysisContext context);
/**
* Verify that there is another piece of data.You can stop the read by returning false
*
* @param context
* @return
*/
default boolean hasNext(AnalysisContext context) {
return true;
}
}
自己实现MyEasyExcelLister
package com.example.kafkademo.excel;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.read.listener.ReadListener;
/**
* @description:
* @author: xiongfanmeng
* @time: 2022/6/16 10:15
*/
public class MyEasyExcelLister implements ReadListener {
@Override
public void invoke(Object data, AnalysisContext context) {
System.out.println("当前行号:" + context.readRowHolder().getRowIndex());
System.out.println(data.toString());
}
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
}
}
读取
package com.example.kafkademo.excel;
import com.alibaba.excel.EasyExcel;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
/**
* @description:
* @author: xiongfanmeng
* @time: 2022/6/16 10:15
*/
public class EasyExcelTest {
public static void main(String[] args) throws FileNotFoundException {
FileInputStream fileInputStream = new FileInputStream("C:\\Users\\admin\\Desktop\\迭代文件\\迭代24\\尾程对账\\2022-04-03 08-15 Auto FedExInv 886388772.xlsx");
MyEasyExcelLister myEasyExcelLister = new MyEasyExcelLister();
EasyExcel.read(fileInputStream,myEasyExcelLister).useDefaultListener(false).sheet().doRead();
}
}
2.使用xlsx-streamer
导入依赖
<dependency>
<groupId>com.monitorjbl</groupId>
<artifactId>xlsx-streamer</artifactId>
<version>2.1.0</version>
</dependency>
使用方式
package com.example.kafkademo.excel;
import com.monitorjbl.xlsx.StreamingReader;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import java.io.FileInputStream;
import java.io.IOException;
/**
* @description:
* @author: xiongfanmeng
* @time: 2022/5/26 10:44
*/
public class ImportExcelOutOfMemory {
public static void main(String[] args) throws IOException {
FileInputStream fileInputStream = new FileInputStream("C:\\Users\\admin\\Desktop\\迭代文件\\迭代24\\尾程对账\\2022-04-03 08-15 Auto FedExInv 886388772.xlsx");
Workbook wk = StreamingReader.builder()
.rowCacheSize(100) //缓存到内存中的行数,默认是10
.bufferSize(4096) //读取资源时,缓存到内存的字节大小,默认是1024
.open(fileInputStream); //打开资源,必须,可以是InputStream或者是File,注意:只能打开XLSX格式的文件
Sheet sheet = wk.getSheetAt(0);
//遍历所有的行
for (Row row : sheet) {
System.out.println("开始遍历第" + row.getRowNum() + "行数据:");
//遍历所有的列
for (Cell cell : row) {
System.out.print(cell.getStringCellValue() + " ");
}
System.out.println(" ");
}
fileInputStream.close();
}
}