本文将通过实例来介绍apache poi 读取excel的原理,包括各种数据类型的处理,本文提供的代码非常通用,即使不规则的excel文件,也可以读取。
package poi.excel;
import java.io.File;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
/**
* ClassName:ExcelFileParser <br/>
* Function: TODO ADD FUNCTION. <br/>
* Reason: TODO ADD REASON. <br/>
* Date: 2015年12月29日 上午9:26:21 <br/>
*
* @author chiwei
* @version
* @since JDK 1.6
* @see
*/
public class ExcelFileParser {
public static Workbook getWb(String path) {
try {
return WorkbookFactory.create(new File(path));
} catch (Exception e) {
throw new RuntimeException("读取EXCEL文件出错", e);
}
}
public static Sheet getSheet(Workbook wb, int sheetIndex) {
if (wb == null) {
throw new RuntimeException("工作簿对象为空");
}
int sheetSize = wb.getNumberOfSheets();
if (sheetIndex < 0 || sheetIndex > sheetSize - 1) {
throw new RuntimeException("工作表获取错误");
}
return wb.getSheetAt(sheetIndex);
}
public static List<List<String>> getExcelRows(Sheet sheet, int startLine, int endLine) {
List<List<String>> list = new ArrayList<List<String>>();
// 如果开始行号和结束行号都是-1的话,则全表读取
if (startLine == -1)
startLine = 0;
if (endLine == -1) {
endLine = sheet.getLastRowNum() + 1;
} else {
endLine += 1;
}
for (int i = startLine; i < endLine; i++) {
Row row = sheet.getRow(i);
if (row == null) {
System.out.println("该行为空,直接跳过");
continue;
}
int rowSize = row.getLastCellNum();
List<String> rowList = new ArrayList<String>();
for (int j = 0; j < rowSize; j++) {
Cell cell = row.getCell(j);
String temp = "";
if (cell == null) {
System.out.println("该列为空,赋值双引号");
temp = "NULL";
} else {
int cellType = cell.getCellType();
switch (cellType) {
case Cell.CELL_TYPE_STRING:
temp = cell.getStringCellValue().trim();
temp = StringUtils.isEmpty(temp) ? "NULL" : temp;
break;
case Cell.CELL_TYPE_BOOLEAN:
temp = String.valueOf(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
temp = String.valueOf(cell.getCellFormula().trim());
break;
case Cell.CELL_TYPE_NUMERIC:
if (HSSFDateUtil.isCellDateFormatted(cell)) {
temp = DateUtil.parseToString(cell.getDateCellValue(),
DateUtil.FORMAT_DATE);
} else {
temp = new DecimalFormat("#.######").format(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BLANK:
temp = "NULL";
break;
case Cell.CELL_TYPE_ERROR:
temp = "ERROR";
break;
default:
temp = cell.toString().trim();
break;
}
}
rowList.add(temp);
}
list.add(rowList);
}
return list;
}
public static void main(String a[]) {
String path = "D:\\test.xlsx";
Workbook wb = getWb(path);
List<List<String>> list = getExcelRows(getSheet(wb, 0), -1, -1);
List<List<String>> listList = new ArrayList<>();
List<List<CheckItemIndexInfos>> checkItemIndexInfos = new ArrayList<>();
for (List<String> row : list) {
/ / 过滤掉无用数据
row = row.stream().filter(s -> !s.equals("NULL")).collect(Collectors.toList());
if (row.size() > 0) {
listList.add(row);
}
}
for (int i = 0; i < listList .size(); i++) {
List<String> row = listList .get(i);
for (int j = 0; j < row.size(); j++) {
System.out.print(row.get(j) + "\t");
}
System.out.println();
}
}
}
maven 依赖如下:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.11</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.11</version>
</dependency>
代码流程:
首先根据文件路径去创建一个工作簿对象Workbook
然后通过工作簿对象获取一个工作表对象Sheet
最后对工作表的row进行遍历
注意获取每行的列数,我用的row.getLastCellNum();而不是row.getPhysicalNumberOfCells()
这个方法就可以处理不规则的excel单元格内容了,即某一行3列,某一行4列,5列之类的
然后对行循环内存的单元列也要进行空判断,防止异常
代码提供的方法非常通用,只需传进去一个文件路径,和几个必要的参数即可,有其它需求的话,在此代码上二次开发非常简单
原文链接:https://blog.csdn.net/simonchi/article/details/50433219