1.添加Maven依赖
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-server</artifactId>
<version>3.0.1</version>
</dependency>
2.下载chromedriver
http://npm.taobao.org/mirrors/chromedriver/
注意:chrome和chromedriver版本一定要一致,不然会出现一些小问题
3.小栗子
private static String webDriver = "webdriver.chrome.driver";
private static String webDriverPath = "C:\\MineProjects\\JavaDemo\\chromedriver_win32\\chromedriver.exe";
private static String targetPath = "https://mp.weixin.qq.com";
private static String searchPath = "https://mp.weixin.qq.com/cgi-bin/searchbiz";
private static String appmsgPath = "https://mp.weixin.qq.com/cgi-bin/appmsg";
private static Random random = new Random(1);
private static Gson gson = new Gson();
private static String sourceName = "渔愉鱼"; // 要爬的公众号名称(准确名称)
private static String username = null;
private static String password = null;
static {
ResourceBundle rb = ResourceBundle.getBundle("reptile");
username = rb.getString("reptile.username");
password = rb.getString("reptile.password");
}
public static void main(String[] args) {
System.setProperty(webDriver, webDriverPath);
WebDriver driver = null;
try {
driver = new ChromeDriver();
weixinLogin(driver);
String token = getToken(driver);
Platform platform = getPlatform(driver, token);
if (Objects.isNull(platform)) {
throw new Exception("不存在" + sourceName + "公众号");
}
InfoResult infoResult = getInfoResult(driver, token, platform.getFakeId(), 0, 5);
System.out.println(infoResult);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (Objects.nonNull(driver)) {
driver.close();
}
}
}
/**
* 获取公众号某一页数据
*/
private static InfoResult getInfoResult(WebDriver driver, String token, String fakeId,
Integer begin, Integer count) throws Exception {
Map<String, String> queryInfoParams = new HashMap<>();
queryInfoParams.put("token", token);
queryInfoParams.put("lang", "zh_CN");
queryInfoParams.put("f", "json");
queryInfoParams.put("ajax", "1");
queryInfoParams.put("random", random.nextDouble() + "");
queryInfoParams.put("action", "list_ex");
queryInfoParams.put("query", "");
queryInfoParams.put("type", "9");
queryInfoParams.put("fakeid", fakeId);
queryInfoParams.put("begin", begin + "");
queryInfoParams.put("count", count + "");
appmsgPath = HttpUtils.setParams(appmsgPath, queryInfoParams);
driver.get(appmsgPath);
Document infoDocument = Jsoup.parse(driver.getPageSource());
Elements infoList = infoDocument.select("pre");
if (Objects.isNull(infoList)) {
throw new Exception("获取公众号文章错误");
}
return gson.fromJson(infoList.text(), InfoResult.class);
}
/**
* 获取公众号信息
*/
private static Platform getPlatform(WebDriver driver, String token) throws Exception {
Map<String, String> searchNameParams = new HashMap<>();
searchNameParams.put("action", "search_biz");
searchNameParams.put("token", token);
searchNameParams.put("lang", "zh_CN");
searchNameParams.put("f", "json");
searchNameParams.put("ajax", "1");
searchNameParams.put("random", random.nextDouble() + "");
searchNameParams.put("query", sourceName);
searchNameParams.put("begin", "0");
searchNameParams.put("count", "5");
searchPath = HttpUtils.setParams(searchPath, searchNameParams);
driver.get(searchPath);
Document preDocument = Jsoup.parse(driver.getPageSource());
Elements preList = preDocument.select("pre");
if (Objects.isNull(preList)) {
throw new Exception("获取公众号错误");
}
PlatformResult result = gson.fromJson(preList.text(), PlatformResult.class);
Platform platform = null;
for (int index = 0; index < result.getList().size(); index ++) {
Platform item = result.getList().get(index);
if (sourceName.equals(item.getNickname())) {
platform = item;
}
}
return platform;
}
/**
* 获取token
*/
private static String getToken(WebDriver driver) throws Exception {
String current = driver.getCurrentUrl();
if (StringUtils.isBlank(current)) {
throw new Exception("获取token链接有误");
}
String token = current.split("token=")[1];
if (StringUtils.isBlank(token)) {
throw new Exception("token错误");
}
return token;
}
/**
* 登录模块
*/
private static void weixinLogin(WebDriver driver) throws Exception {
driver.get(targetPath);
WebElement usernameWebElement = driver.findElement(By.name("account"));
usernameWebElement.clear();
usernameWebElement.sendKeys(username);
WebElement passwordWebElement = driver.findElement(By.name("password"));
passwordWebElement.clear();
passwordWebElement.sendKeys(password);
WebElement helpWebElement = driver.findElement(By.className("icon_checkbox"));
helpWebElement.click();
WebElement btnWebElement = driver.findElement(By.className("btn_login"));
btnWebElement.click();
System.out.println("请用手机微信扫码二维码登录公众号");
Thread.sleep(15000);
}
4.总结
以前没有用过selenium,这2天研究了一下,还是挺好玩的。抢票什么的,用这个应该可以做。这里贴的只是部分源码,可以直接到码云上,看我写的ReptileDemo。