前几天看了个抓取斗鱼弹幕的帖子,觉得挺有意思的。本来也想学着做下,发现也不是那么好弄,现在就做了一个抓取英雄联盟页面的主播图片的实验,只是抓一个页面的。
require 'net/http'
require 'open-uri'
def query_url(url)
return Net::HTTP.get(URI.parse(url))
end
def save_url(url,dir,filename)
filename = url[39,70] if filename == nil || filename.empty?
open(url) do |f| if true
File.new("#{dir}#{filename}","wb").close
open("#{dir}#{filename}","wb") do |fo|
while buf = f.read(1024) do
fo.write buf
STDOUT.flush
end
end
end
end
end
begin
start_url = 'http://www.douyutv.com/directory/game/LOL'
while start_url != nil && !start_url.empty? do
print "开始下载#{start_url}\n"
content = query_url(start_url)
imgs = content.scan(/http://rpic.douyucdn.cn/z1603/13/\w{2}/\w{5,10}_\w{12}.jpg/)
for img in imgs
url = img
save_url(url,File.dirname(FILE),nil)
end
break;
end
end