import datetime
import requests
import urllib2
import random
from tld import get_tld
import multiprocessing
from multiprocessing.dummy import Pool
from multiprocessing import current_process
t1 = datetime.datetime.now()
pool = Pool(processes=10)
def conn_url(url):
""""""
url = url.strip('\r').strip('\n')
url = url
try:
html_url = requests.get(url, timeout=random.randint(3,5))
url_code = html_url.status_code
if url_code == 200:
return url
else:
pass
except Exception,e:
pass
#----------------------------------------------------------------------
def domain_url(url):
""""""
url_domain = get_tld(url)
print url_domain
url_list = []
with open('kehu_ok.txt') as s:
for i in s:
url = i.strip('\r').strip('\n')
url_ok = conn_url(url)
url_list.append(url_ok)
pool.map(domain_url,url_list)
print 'Starting'+multiprocessing.current_process().name
pool.close()
pool.join()
print 'Multiprocess Scanning Completed in ', datetime.datetime.now() - t1
测试网站连通性2
import tld
import requests
import urllib2
import random
def link_code(url):
""""""
try:
#request = urllib2.Request(url,headers=header.get_header())
html_url = requests.get(url, timeout=random.randint(3,8))
url_code = html_url.status_code
if url_code == 200:
print url_code
return url
else:
pass
except Exception,e:
#print 'xxx'
pass
#link_code('http://www.baidu.com')
#----------------------------------------------------------------------
def input_url(url_file):
""""""
w1 = open('target_200_url.txt','w+')
with open(url_file) as f:
urls = f.readlines()
for i in urls:
i = i.strip('\n').strip('\r')
print i
try:
url_200 = link_code(i)
w1.writelines(i)
w1.write('\n')
print i + ' yes!'
except Exception,e:
print i + str(e)
#input_url('formal_url.txt')
a = []
#----------------------------------------------------------------------
def ceshi_url(url_file):
""""""
with open(url_file) as w:
urls = w.readlines()
for i in urls:
i = i.strip('\n').strip('\r')
try:
url = tld.get_tld(i)
list_url = 'http://' + url
a.append(list_url)
except Exception,e:
print str(e)
#ceshi_url('formal_url.txt')
#g = open('url.txt','w+')
#a = list(set(a))
#print len(a)
#for i in a:
#print i
#g.writelines(i)
#g.writelines('\n')