import re
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import os
import requests
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url='http://www.qeecc.com/'
################################
#创建下载目录
PYPATH=os.path.abspath(__file__)
PYDIR=os.path.dirname(PYPATH)
MP3DIR=os.path.join(PYDIR,'MP3')
if not os.path.exists(MP3DIR):
os.mkdir(MP3DIR)
#创建浏览器对象
# 启动浏览器
driver = webdriver.Chrome()
# 打开网页
driver.get(url)
# 查找元素
lst=driver.find_elements(By.XPATH,'//div[@class="ilingkuplay_list"]//li')
MP3_HTML_DICT={}
NUM=1
for i in lst:
NAME=i.find_element(By.XPATH,'.//div[@class="name"]/a').text
HTML=i.find_element(By.XPATH,'.//div[@class="name"]/a')
#跳转页面
HTML.click()
driver.switch_to.window(driver.window_handles[-1])
#等待页面加载完成
WebDriverWait(driver,15).until(EC.presence_of_element_located((By.XPATH,'//*[@id="jp_audio_0"]')))
#延迟2秒
time.sleep(2)
print(driver.current_url)
#获取MP3下载地址
LINK=driver.find_element(By.XPATH,'//*[@id="jp_audio_0"]').get_attribute('src')
print(NAME,LINK)
MP3_HTML_DICT[NAME]=LINK
#获取歌词链接
LRC=driver.find_element(By.XPATH,'//div[@class="dance_wl"]/a[2]').get_attribute('href')
print('歌词:',LRC)
#下载音乐
with open(MP3DIR+'/'+NAME+'.mp3','wb') as f:
f.write(requests.get(LINK).content)
#下载歌词
with open(MP3DIR+'/'+NAME+'.lrc','w') as f:
LRC_TEXT=requests.get(LRC)
#编码转换
LRC_TEXT.encoding="unicode_escape"
f.write(requests.get(LRC).text)
#切换首页
print('-----------------------第', NUM, '首采集完成-----------------------')
driver.switch_to.window(driver.window_handles[0])
NUM+=1
if NUM >10:
break
#打印字典
print(MP3_HTML_DICT)
#关闭浏览器
driver.quit()