selenium爬取音乐网

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import re

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import os
import requests
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

url='http://www.qeecc.com/'

################################
#创建下载目录
PYPATH=os.path.abspath(__file__)
PYDIR=os.path.dirname(PYPATH)
MP3DIR=os.path.join(PYDIR,'MP3')
if not os.path.exists(MP3DIR):
    os.mkdir(MP3DIR)

#创建浏览器对象
# 启动浏览器
driver = webdriver.Chrome()

# 打开网页
driver.get(url)
# 查找元素
lst=driver.find_elements(By.XPATH,'//div[@class="ilingkuplay_list"]//li')
MP3_HTML_DICT={}
NUM=1
for i in lst:
    NAME=i.find_element(By.XPATH,'.//div[@class="name"]/a').text
    HTML=i.find_element(By.XPATH,'.//div[@class="name"]/a')
    #跳转页面
    HTML.click()
    driver.switch_to.window(driver.window_handles[-1])
    #等待页面加载完成
    WebDriverWait(driver,15).until(EC.presence_of_element_located((By.XPATH,'//*[@id="jp_audio_0"]')))
    #延迟2秒
    time.sleep(2)
    print(driver.current_url)
    #获取MP3下载地址
    LINK=driver.find_element(By.XPATH,'//*[@id="jp_audio_0"]').get_attribute('src')
    print(NAME,LINK)
    MP3_HTML_DICT[NAME]=LINK


    #获取歌词链接
    LRC=driver.find_element(By.XPATH,'//div[@class="dance_wl"]/a[2]').get_attribute('href')
    print('歌词:',LRC)




    #下载音乐
    with open(MP3DIR+'/'+NAME+'.mp3','wb') as f:
        f.write(requests.get(LINK).content)
    #下载歌词
    with open(MP3DIR+'/'+NAME+'.lrc','w') as f:
        LRC_TEXT=requests.get(LRC)
        #编码转换
        LRC_TEXT.encoding="unicode_escape"
        f.write(requests.get(LRC).text)


    #切换首页
    print('-----------------------第', NUM, '首采集完成-----------------------')
    driver.switch_to.window(driver.window_handles[0])
    NUM+=1
    if NUM >10:
        break


#打印字典
print(MP3_HTML_DICT)

#关闭浏览器
driver.quit()