第一次做大数据分析,有点小小的不顺利.不过还好,能出来东西,反正数据大家拿到以后,你们自己也可以做分析.
工资比较高的城市,根据百度招聘信息来分析的.
根据市场需求分析,开发人员的年限
市场人才需求分析
各大城市对开发经验的要求
数据爬取,源码
public class GetDate {
public static void main(String[] args) throws JSONException {
String srr[]={"北京","上海","广州","天津","武汉","沈阳","哈尔滨","西安","南京","成都","重庆 大城市;深圳","杭州","青岛","苏州","太原","郑州","济南","长春"," 合肥",
"长沙","南昌","无锡","昆明","宁波","福州","石家庄 较大的城市;南宁","徐州","烟台","唐山","柳州","常州","鞍山","厦门","抚顺","吉林市","洛阳","大同","包头",
"大庆","淄博","乌鲁木齐","佛山","呼和浩特","齐齐哈尔","泉州","西宁","兰州","贵阳","温州"};
String brr[]={"java","python","C++",".NET","WEB前端","UI设计师","Android","IOS","PHP","C","C#","R","Swift","GO","大数据"};
//java
//String urlX="http://zhaopin.baidu.com/api/quanzhiasync?query=java&sort_type=1&detailmode=close&rn=20&pn=";
//python
//String urlX="http://zhaopin.baidu.com/api/quanzhiasync?query=python&sort_type=1&detailmode=close&rn=20&pn=";
//c++
String urlX="http://zhaopin.baidu.com/api/quanzhiasync?sort_type=1&detailmode=close&rn=20&pn=";
for (int d = 0; d < brr.length; d++) {
String query=brr[d];
System.err.println(query);
for (int c = 0; c < srr.length; c++) {
//城市列表 city=%E6%9D%AD%E5%B7%9E&
String city =srr[c];
for (int j =0; j <=740; j+=20) {
try{
String url=urlX+j+"&city="+city+"&query="+query;
String json=loadJSON(url);
json=jsonJX(json);
//JSONObject jsonObject =new JSONObject(json);
JSONArray array = new JSONArray(json);//将json字符串转成json数组
for (int i = 0; i < array.length(); i++) {
JSONObject ob = (JSONObject) array.get(i);//得到json对象
insert(ob.toString());
}
}catch (Exception e) {
System.err.println(".................错误................");
}
}
}
}
}
//存数据库
public static void insert(String json){
try {
//String jsons=json.substring(1, json.length()-1);
JSONObject jsonObject =new JSONObject(json);
String jobfirstclass=jsonObject.getString("jobfirstclass");
String joblink=jsonObject.getString("joblink");
String experience=jsonObject.getString("experience");
String education=jsonObject.getString("education");
String employertype=jsonObject.getString("employertype");
String ori_city=jsonObject.getString("ori_city");
String salary=jsonObject.getString("salary");
String title=jsonObject.getString("title");
String sql="insert into Baidu (jobfirstclass,joblink,experience,education,employertype,ori_city,salary,title) VALUES(?,?,?,?,?,?,?,?)";
Object [] obj={jobfirstclass,joblink,experience,education,employertype,ori_city,salary,title};
DataSource dataSource = DBUtils.getDataSource();
QueryRunner qr = new QueryRunner(dataSource);
qr.execute(sql, obj);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//salary
}
//百度json解析
public static String jsonJX(String json) throws JSONException{
JSONObject jsonObject =new JSONObject(json);
String date2=new JSONObject(jsonObject.getString("data")).getString("main");
String Date3=new JSONObject(date2).getString("data");
String BaiduDate=new JSONObject(Date3).getString("disp_data");
System.out.println(BaiduDate);
return BaiduDate;
}
//获取Json数据
public static String loadJSON (String url) {
StringBuilder json = new StringBuilder();
try {
URL oracle = new URL(url);
URLConnection yc = oracle.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(
yc.getInputStream()));
String inputLine = null;
while ( (inputLine = in.readLine()) != null) {
json.append(inputLine);
}
in.close();
} catch (MalformedURLException e) {
} catch (IOException e) {
}
return json.toString();
}
jar包:
工具类
public class DBUtils {
// 获得c3p0连接池对象
private static ComboPooledDataSource dataSource = new ComboPooledDataSource();
static {
// 对池进行四大参数的配置
try {
dataSource.setDriverClass("com.mysql.jdbc.Driver");
} catch (PropertyVetoException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
dataSource.setJdbcUrl("jdbc:mysql://localhost:3306/stone?useUnicode=true&characterEncoding=UTF-8");
dataSource.setUser("root");
dataSource.setPassword("admin");
// 池配置
//每次新增多少连接
dataSource.setAcquireIncrement(5);
//初始连接数多少
dataSource.setInitialPoolSize(20);
//最少连接数
dataSource.setMinPoolSize(2);
//最大连接数
dataSource.setMaxPoolSize(50);
}
/**
* 获得数据库连接对象
*
* @return
* @throws SQLException
*/
public static Connection getConnection() throws SQLException {
return dataSource.getConnection();
}
/**
* 获得c3p0连接池对象
* @return
*/
public static DataSource getDataSource() {
return dataSource;
}
}
然后给大家看效果:
有些数据爬不到,应该是百度的反爬机制.(所以建议大家把上面的主方法,拆开运行)
源码仅供学习,禁用于商业用途,转载,请留原链接和作者