1. 需求:从网页中读取一个表格的信息(dom方式)
<b>网页源码</b>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"></meta>
<title>Document</title>
<style type="text/css">
table{
width: 400px;
height: 100px;
text-align: center;
border: 1px solid #ffffff;
}
table td,table th{
padding: 0px;
margin: 0px;
border: 1px solid;
}
</style>
</head>
<body>
<table>
<caption>学生信息表</caption>
<thead>
<tr>
<th>name</th>
<th>age</th>
<th>sex</th>
<th>tel</th>
<th>address</th>
</tr>
</thead>
<tbody>
<tr>
<td>丁昌江</td>
<td>22</td>
<td>男</td>
<td>13247842137</td>
<td>贵州</td>
</tr>
<tr>
<td>杨燕语</td>
<td>20</td>
<td>女</td>
<td>13323234523</td>
<td>贵州</td>
</tr>
</tbody>
</table>
</body>
</html>
<b>java代码</b>
Contact类
public class Contact {
String name;
int age;
String sex;
String tel;
String address;
public Contact(){
super();
}
public Contact (String name,int age,String sex,String tel,String address){
this.name=name;
this.age=age;
this.sex=sex;
this.tel=tel;
this.address=address;
}
@Override
public String toString() {
// TODO Auto-generated method stub
return "\n姓名:"+this.name+" 年龄:"+this.age+" 性别:"+this.sex+" 电话:"+this.tel+" 地址:"+this.address;
}
public void setName(String name) {
this.name = name;
}
public void setAge(int age) {
this.age = age;
}
public void setSex(String sex) {
this.sex = sex;
}
public void setTel(String tel) {
this.tel = tel;
}
public void setAddress(String address) {
this.address = address;
}
}
测试类
public class demo01 {
public static void main(String[] args) throws DocumentException {
//创建一个List用来装联系人的信息
List <Contact> contactList = new ArrayList <Contact>();
//创建dom解析器
SAXReader saxReader = new SAXReader();
//读取xml文件
Document doc = saxReader.read(new File("./src/contact.html"));
//获取所有tr标签
List <Element> trList = doc.selectNodes("//tbody//tr");
//遍历标签且拿到标签的td元素
for(Element td : trList){//一个tr标签也就相当于一个人的信息
String name=td.selectSingleNode("td[1]").getText();
int age = Integer.parseInt(td.selectSingleNode("td[2]").getText());
String sex = td.selectSingleNode("td[3]").getText();
String tel = td.selectSingleNode("td[4]").getText();
String address = td.selectSingleNode("td[5]").getText();
Contact contact = new Contact(name,age,sex,tel,address);
contactList.add(contact);
}
System.out.println(contactList);
}
}
<b>总结:</b>
这是事先知道td个数,Contact类属性的情况下写的,明显不智能有没有;
期待神级的html转对象工具(反射???)
2.需求:读取一个html文件且原样输出(dom方式)
测试类
public class demo03 {
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
//创建SAXParser解析对象
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
//解析xml文件
MyDefaultHandler2 default2 = new MyDefaultHandler2();
parser.parse(new File("./src/contact.xml"),default2 );
System.out.println(default2.getContent());
}
}
MyDefaultHandler2
public class MyDefaultHandler2 extends DefaultHandler{
//存储contact.xml文件信息
//当contact.xml读取完毕之后,这个变量就有了所有xml文件信息
private StringBuilder sb = new StringBuilder();
public String getContent(){
return sb.toString();
}
//开始标签
/**
* qName:开始标签的名称
* attributes: 属性列表
*/
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
sb.append("<"+qName); //加上标签的<符号
//属性列表
if(attributes!=null){
//遍历属性
for(int i=0;i<attributes.getLength();i++){
String name = attributes.getQName(i);//属性名称
String value = attributes.getValue(i);//属性值
sb.append(" "+name+"=\""+value+"\""); //<contact id="01">
}
}
sb.append(">"); //补上标签的>符号
}
//文本内容
public void characters(char[] ch, int start, int length)
throws SAXException {
//当前文本内容
String content = new String(ch,start,length);
sb.append(content);
}
//结束标签
//qName: 结束标签名称
public void endElement(String uri, String localName, String qName)
throws SAXException {
sb.append("</"+qName+">");
}
}
输出效果
3.需求:从xml中读取对象信息(SAX方式)
测试类
public class demo04 {
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
MyDefaultHandler3 dh = new MyDefaultHandler3();
parser.parse(new File("./src/contact.xml"),dh);
System.out.println(dh.getAllObject());
}
}
MyDefaultHandler3
public class MyDefaultHandler3 extends DefaultHandler{
String tmp = "";
private Contact contact;
List <Contact> contactList = new ArrayList<Contact>();
//开始读取标签时
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
tmp = qName;
if ("contact".equals(tmp)){
contact = new Contact();
}
}
//读取到标签内容时
public void characters(char[] ch, int start, int length) throws SAXException {
switch (tmp){
case "name":
contact.setName(new String(ch,start,length));
break;
case "age":
contact.setAge(Integer.parseInt(new String(ch,start,length)));
break;
case "sex":
contact.setSex(new String(ch,start,length));
break;
case "tel":
contact.setTel(new String(ch,start,length));
break;
case "address":
contact.setAddress(new String(ch,start,length));
break;
}
}
//读取到标签尾部时
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
//读取到尾部标签时,一定要让tmp为空,不然会把标签间的换行和空格添加到contact对象中
if("contact".equals(qName)){
contactList.add(contact);
}else
tmp = "";
}
public List<Contact> getAllObject(){
return contactList;
}
}
contact类(同上)
效果: