https://www.shiyanlou.com/contests/lou5/console
创建数据库表
create database shiyanlou_data;
use shiyanlou_data;
create table course_data (
id int(11) not null auto_increment,
cname varchar(255) default null,
cdesc varchar(255) default null,
ctype varchar(255) default null,
nlong enum('true','false') default null,
primary key (id)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
php 代码
<?php
/**
* micro php Crawler
* 以下代码结构仅供参考,可自由发挥,只要能实现挑战目的就可以。
*/
class Crawler
{
private $content;
private $data;
static private $mysql;
function __construct()
{
echo "开始爬取内容...<br>";
//使用面向对象进行数据库的连接,在创建对象的时候就自动的连接数据
self::$mysql = new MySQLi('localhost','root','','shiyanlou_data',3306);
//判断数据库是否连接
if(self::$mysql ->connect_errno){
die('连接错误' . self::$mysql ->connect_error);
}
//设置字符集
self::$mysql ->set_charset('utf8');
}
public function loadFile($file_path)
{
echo "正在加载文件...<br>";
//TODO
$this->content = file_get_contents($file_path);
}
public function parseCourseBody()
{
//TODO
}
public function parseContent()
{
echo "开始解析内容...<br>";
preg_match_all('#<div class="course-body">[\s\S]*?</div>\s*?</div>#',$this->content,$data);
$this->content = $data[0]; //全匹配
foreach ($this->content as $item) {
$course = [];
preg_match('#<div class="course-name">(.*?)</div>#',$item,$title);
$course['cname'] = $title[1];
preg_match('#<div class="course-desc">(.*?)</div>#',$item,$desc);
if(count($desc)==2){
$course['cdesc'] = $desc[1];
}else {
$course['cdesc']='';
}
preg_match('#<div class="course-desc">(.*?)</div>#',$item,$desc);//
preg_match('#<span class=".*?pull-right">(.*?)</span>#',$item,$type);
if (!empty($type)) {
$course['ctype'] = $type[1];
}else{
$course['ctype'] = '';
}
if (strlen($course['cname'])> 16) {
$course['nlong'] = 'true';
} else {
$course['nlong'] = 'false';
}
$course['name_length'] = strlen($course['cname']);
$this->data[] = $course;
}
echo "<pre>";
print_r($this->data);
echo "<pre>";
echo "解析内容结束! <br>";
}
public function saveData()
{
echo "存入数据库...";
//TODO
foreach ($this->data as $one) {
$cname = $one['cname'];
$ctype = $one['ctype'];
$nlong = $one['nlong'];
$cdesc = $one['cdesc'];
$sql = "insert into course_data (ctype,cname,nlong,cdesc) values ('$ctype','$cname','$nlong','$cdesc')";
var_dump($sql);
$res = self::$mysql->query($sql);
if($res){
echo '添加成功';
}else{
echo '添加失败' . self::$mysql-> error;
}
}
}
public function parseTitle()
{
echo "解析课程标题...<br>";
//TODO
}
public function parseDesc()
{
echo "解析课程简介...<br>";
//TODO
}
public function parseType()
{
echo "解析课程类型...<br>";
//TODO
}
public function titleIsLong()
{
echo "判断课程名是否超长...<br>";
//TODO
}
}
header("Content-Type: text/html;charset=utf-8");
$Crawler = new Crawler();
$Crawler->loadFile('shiyanlou.html');
$Crawler->parseContent();
$Crawler->saveData();