最近做开发时遇到一个需求,导出百万级别的数据到excel文件中。但是用传统的poi方式,查寻数据库然后poi工具写入excel一直内存溢出的错误,无奈中找到两种解决方案一种是写出多个excel文件打包成zip给用户,这种速度还是不快,要求多线程分页操作数据库,比较麻烦。另外一种是利用官方提供的一种基于XML的方案。此文介绍这种方案的代码实现,笔者在本地抽成了一个工具类,如下。
其实对于一个Excel文件来说,最核心的是它的数据。Excel文件中的数据和样式文件是分开存储的,它们都对应于它自己体系中的一个XML文件。有兴趣的朋友可以把Excel文件的后缀名改成“.zip”,然后用压缩文件把它解压缩,可以看到它里面的结构是由一堆的XML文件组成的。如果我们把解压缩后的文件再压缩成一个压缩文件,并把它的后缀名改为Excel文件对应的后缀名“.xlsx”或“.xls”,然后再用Excel程序把它打开。这个时候你会发现它也是可以打开的。笔者本文所要讲述的基于大量的数据生成Excel的方案就是基于这种XML文件的方案,它依赖于一个现有的Excel文件(这个Excel文件可以在运行时生成好),然后把我们的数据生成对应的XML表示,再把我们的XML替换原来的XML文件,再进行打包后就变成了一个Excel文件了。基于这种方式,笔者做了一个测试,生成了一个拥有3500万行,5列的Excel文件,该文件大小为1GB,耗时412秒。这种效率比起我们应用传统的API来说是指数倍的。
细节的实现详情,请读者自己参考以下示例代码,该示例代码是笔者从Apache官方下载的,原地址是https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/usermodel/examples/BigGridDemo.java。需要注意的是生成的XML中需要应用到的样式需要事先生成,需要应用函数、合并单元格等逻辑的时候,可以先拿一个Excel文件应用对应的函数、合并逻辑,再把它解压缩后查看里面的XML文件的展现形式,然后自己拼接的时候也拼接成对应的形式,这样自己生成的Excel文件也会有对应的效果。
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFDataFormat;
import org.apache.poi.xssf.usermodel.XSSFFont;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.util.StringUtils;
import java.io.*;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
/**
* @author wangwei (yuhui@shanshu.ai)
* @date 2018/07/20
*/
public class ExcelExportUtil {
private ExcelExportUtil() {}
private static final StringXML_ENCODING ="UTF-8";
/**
*
* @param sheetName EXCEL中的表名
* @param absolutePath 实际服务器路径
* @param titles 表头
* @param data 表数据
* @param 数据泛型
* @throws Exception
*/
public static void exportExcel(String sheetName, String path,String absolutePath, String[] titles, List data)
throws Exception {
// Step 1. Create a template file. Setup sheets and workbook-level objects such as
// cell styles, number formats, etc.
XSSFWorkbook wb =new XSSFWorkbook();
XSSFSheet sheet = wb.createSheet(sheetName);
Map styles =createStyles(wb);
//name of the zip entry holding sheet data, e.g. /xl/worksheets/sheet1.xml
String sheetRef = sheet.getPackagePart().getPartName().getName();
//save the template
FileOutputStream os =new FileOutputStream(path);
wb.write(os);
os.close();
//Step 2. Generate XML file.
File tmp = File.createTempFile("sheet", ".xml");
Writer fw =new OutputStreamWriter(new FileOutputStream(tmp), XML_ENCODING);
writeDate(fw, styles, titles, data);
fw.close();
//Step 3. Substitute the template entry with the generated data
FileOutputStream out =new FileOutputStream(absolutePath);
//用心拼接生成的XML文件,替换原来模板Excel文件中对应的XML文件,再压缩打包为一个Excel文件。
substitute(new File(path), tmp, sheetRef.substring(1), out);
out.close();
wb.close();
}
/**
* 支持的Cell样式
*
* @param wb
*
* @return
*/
private static MapcreateStyles(XSSFWorkbook wb) {
Map styles =new HashMap<>();
XSSFDataFormat fmt = wb.createDataFormat();
XSSFCellStyle style1 = wb.createCellStyle();
style1.setAlignment(HorizontalAlignment.RIGHT);
style1.setDataFormat(fmt.getFormat("0.0%"));
styles.put("percent", style1);
XSSFCellStyle style2 = wb.createCellStyle();
style2.setAlignment(HorizontalAlignment.CENTER);
style2.setDataFormat(fmt.getFormat("0.0X"));
styles.put("coeff", style2);
XSSFCellStyle style3 = wb.createCellStyle();
style3.setAlignment(HorizontalAlignment.RIGHT);
style3.setDataFormat(fmt.getFormat("$#,##0.00"));
styles.put("currency", style3);
XSSFCellStyle style4 = wb.createCellStyle();
style4.setAlignment(HorizontalAlignment.RIGHT);
style4.setDataFormat(fmt.getFormat("mmm dd"));
styles.put("date", style4);
XSSFCellStyle style5 = wb.createCellStyle();
XSSFFont headerFont = wb.createFont();
headerFont.setBold(true);
style5.setFillForegroundColor(IndexedColors.GREY_25_PERCENT.getIndex());
style5.setFillPattern(FillPatternType.SOLID_FOREGROUND);
style5.setFont(headerFont);
styles.put("header", style5);
return styles;
}
/**
* 写出Excel Title
*
* @param sw
* @param styles
* @param titles
*/
private static void writeTitle(SpreadsheetWriter sw, Map styles, String[] titles) {
//insert header row
try {
if (titles !=null && titles.length >0) {
sw.insertRow(0);
int styleIndex = styles.get("header").getIndex();
int index =0;
for (String title : titles) {
sw.createCell(index, title, styleIndex);
index++;
}
sw.endRow();
}
}catch (IOException e) {
e.printStackTrace();
}
}
/**
* 写出Excel data
*
* @param out
* @param styles
* @param titles
* @param data
* @param
*
* @throws Exception
*/
private static void writeDate(Writer out, Map styles, String[] titles, List data)
throws Exception {
SpreadsheetWriter sw =new SpreadsheetWriter(out);
sw.beginSheet();
int length =0;
if (titles !=null) {
writeTitle(sw, styles, titles);
length = titles.length;
}
//write data rows
int rownum =1;
if (data !=null && data.size() >0) {
for (T obj : data) {
String[] items = obj.toString().split(";");
for (int i =0; i < length; i++) {
if (i == length) {
break;
}
sw.insertRow(rownum);
String str = items[i];
if (!StringUtils.isEmpty(str)) {
sw.createCell(i, items[i]);
}else {
sw.createCell(i, "");
}
sw.endRow();
}
rownum++;
}
}
sw.endSheet();
}
/**
* @param zipfile the template file
* @param tmpfile the XML file with the sheet data
* @param entry the name of the sheet entry to substitute, e.g. xl/worksheets/sheet1.xml
* @param out the stream to write the result to
*/
private static void substitute(File zipfile, File tmpfile, String entry, OutputStream out)throws IOException {
ZipFile zip = ZipHelper.openZipFile(zipfile);
try {
ZipOutputStream zos =new ZipOutputStream(out);
Enumeration en = zip.entries();
while (en.hasMoreElements()) {
ZipEntry ze = en.nextElement();
if (!ze.getName().equals(entry)) {
zos.putNextEntry(new ZipEntry(ze.getName()));
InputStream is = zip.getInputStream(ze);
copyStream(is, zos);
is.close();
}
}
zos.putNextEntry(new ZipEntry(entry));
InputStream is =new FileInputStream(tmpfile);
copyStream(is, zos);
is.close();
zos.close();
}finally {
zip.close();
}
}
private static void copyStream(InputStream in, OutputStream out)throws IOException {
byte[] chunk =new byte[1024];
int count;
while ((count = in.read(chunk)) >=0) {
out.write(chunk, 0, count);
}
}
/**
* Writes spreadsheet data in a Writer.
* (YK: in future it may evolve in a full-featured API for streaming data in Excel)
*/
public static class SpreadsheetWriter {
private final Writer_out;
private int _rownum;
public SpreadsheetWriter(Writer out) {
_out = out;
}
public void beginSheet()throws IOException {
_out.write(""
+"
+".org/spreadsheetml/2006/main\">");
_out.write("\n");
}
public void endSheet()throws IOException {
_out.write("");
_out.write("");
}
/**
* Insert a new row
*
* @param rownum 0-based row number
*/
public void insertRow(int rownum)throws IOException {
_out.write("\n");
this._rownum = rownum;
}
/**
* Insert row end marker
*/
public void endRow()throws IOException {
_out.write("\n");
}
public void createCell(int columnIndex, String value, int styleIndex)throws IOException {
String ref =new CellReference(_rownum, columnIndex).formatAsString();
_out.write("
if (styleIndex != -1) {
_out.write(" s=\"" + styleIndex +"\"");
}
_out.write(">");
_out.write("" + value +"");
_out.write("");
}
public void createCell(int columnIndex, String value)throws IOException {
createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, double value, int styleIndex)throws IOException {
String ref =new CellReference(_rownum, columnIndex).formatAsString();
_out.write("
if (styleIndex != -1) {
_out.write(" s=\"" + styleIndex +"\"");
}
_out.write(">");
_out.write("" + value +"");
_out.write("");
}
public void createCell(int columnIndex, double value)throws IOException {
createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, Calendar value, int styleIndex)throws IOException {
createCell(columnIndex, DateUtil.getExcelDate(value, false), styleIndex);
}
}
}