需要获取的信息有:
# 歌名、歌手、链接、图片、歌词、源链接、来源
# song, singer, time, song_url, image, lyrics, real_Link, source
上代码喽!!!
"""run_163 开始"""
def run_163(self):
url = 'https://music.163.com/#/discover/toplist?id=3778678' #此处为音乐排行榜链接
self.getMusic_163List(url)
"""仅获取排行榜 获取163歌单"""
def getMusic_163List(self, url):
prefs = { 'profile.default_content_setting_values': { 'images': 2 } }
driver = self.getChromeDriver(prefs)
driver.get(url) driver.switch_to.frame('contentFrame')
tr_list = driver.find_elements_by_xpath('//tbody/tr')
self.getMusic(tr_list, driver)
""" 获取Chromedriver """
def getChromeDriver(self, prefs):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('prefs', prefs) # 无头浏览器
user-agent = "user-agent=Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"
chrome_options.add_argument( user-agent )
#注:path路径是chromedriver 的路径
path = 'E:/soft/Chrome/Application/chromedriver'
driver = webdriver.Chrome(options=chrome_options,executable_path=path)
return driver
"""获取163 music"""
def getMusic(self, tr_list, driver):
item = []
for tr in tr_list:
music = {}
song_url = str(tr.find_element_by_xpath('.//span[@class="txt"]/a').get_attribute('href'))
song_id = song_url.rsplit('=')[1]
music['real_Link'] = song_url
music['song_url'] = 'http://music.163.com/song/media/outer/url?id=' + song_id
music['song']=str(tr.find_element_by_xpath('.//span[@class="txt"]/a/b').get_attribute('title')) .replace( '\xa0', '')
music['time'] = tr.find_element_by_xpath('.//span[@class="u-dur "]').text
music['singer'] = tr.find_element_by_xpath('.//div[@class="text"]').get_attribute('title')
url_ = "http://music.163.com/api/song/lyric?id=" + song_id + "&lv=1&kv=1&tv=-1"
req_ = requests.get(url_, headers=getHeader())
detail = json.loads(req_.text)
try:
music['lyrics'] = detail['lrc']['lyric'].replace('\n', '#')
except:
music['lyrics'] = '暂无歌词'
item.append(music)
# 获取图片
for music in item:
print('------', music['real_Link']) driver.get(music['real_Link'])
driver.switch_to.frame('contentFrame')
music['image'] = driver.find_element_by_xpath('.//img[@class="j-img"]').get_attribute('src')
if music['song_url'] != '':
print(music['song'], music['singer'], music['time'], music['song_url'], music['image'], music['lyrics'], music['real_Link'], '网易')