1、安装
go get -u github.com/gocolly/colly/...
2、示例
url := "https://ime.sogoucdn.com/d5f5361bcfe5336d05137716ea7947ed/5ffbd544/dl/index/1609387427/sogou_pinyin_100a.exe"
urls := strings.Split(url, "/")
fileName := urls[len(urls)-1]
// 创建收集器
c := colly.NewCollector(
// 设置UserAgent
colly.UserAgent("Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36"),
// 设置响应体中body的字节数限制、0为不限制、避坑:当下载文件时、不设置此项,下载的文件会以10兆为限制
colly.MaxBodySize(0),
)
// 请求回调函数
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
// 请求错误回调函数
c.OnError(func(_ *colly.Response, err error) {
fmt.Println("Something went wrong:", err)
})
// 请求响应回调函数
c.OnResponse(func(r *colly.Response) {
err := r.Save(fileName)
if err != nil {
fmt.Println("err", err)
}
})
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
e.Request.Visit(e.Attr("href"))
})
c.OnHTML("tr td:nth-of-type(1)", func(e *colly.HTMLElement) {
fmt.Println("First column of a table row:", e.Text)
})
err := c.Visit(url)
if err != nil {
fmt.Println("err visit", err)
}