前言:之前寫過一篇關(guān)于爬取數(shù)據(jù)的文章,但是我們想在網(wǎng)頁上進行操作,總不可能只在命令行進行爬取吧,那么就要用到Django來操作,那么我們開始吧?。。ㄖ皇怯糜诮虒W(xué),若有商用,概不承擔(dān)責(zé)任?。?/strong>
?
?
一:爬蟲端
既然前面已經(jīng)講過如何爬取了,那么我們就直接看代碼:
url = 'https://music.163.com/discover/toplist?id=%s' % data['ids']
#這個ids是前端傳來的值,這個我們后面會講到
head = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36'
}
down_url = 'https://music.163.com/song/media/outer/url?id='
respone = requests.get(url, headers=head)
# print(respone)
html = etree.HTML(respone.text)
id_list = html.xpath('//a[contains(@href,"song?")]')
# print(id_list)
i = 0
for id in id_list:
href = id.xpath('./@href')[0]
# print(href)
music_id = href.split('=')[1]
# print(music_id)
if "$" not in music_id:
music_name = id.xpath('./text()')[0]
# print(music_name)
music_url = down_url + music_id
music = requests.get(url=music_url, headers=head)
# print(music_url)
if not os.path.exists(r'D: witternodeface witternodefacestatic'):
os.mkdir(r'D: witternodeface witternodefacestatic')
else:
try:
with open(r'絕對路徑static/%s.mp3' % music_name, "wb") as f:
i += 1
print("正在下載第 %i 首歌曲 歌名為: 《%s》 ..." % (i, music_name))
f.write(music.content)
except OSError as e:
print('OSError', e)
結(jié)果:
二:數(shù)據(jù)庫以及node.js
1.數(shù)據(jù)庫
數(shù)據(jù)庫采用MySql,用于存儲我們歌曲的名稱以及ID:
2:node.js
采用node.js來進行我們數(shù)據(jù)庫的內(nèi)容的顯示,不可能我們在頁面輸入數(shù)據(jù)吧,畢竟那么多我們也不可能記?。?/strong>
- 封裝的MySql模塊
const mysql = require("mysql");
// 開發(fā)環(huán)境
var connection = mysql.createConnection({
host: 'localhost',
user: 'root',
password: '',
port: '3306',
database: 'growup'
});
module.exports = connection
- node模塊(music模塊)
// 導(dǎo)入express模塊
const express = require('express')
const router = express.Router()
// 導(dǎo)入mysql
const connet = require('./setmysql')
router.get('/getmusic', (req, res) => {
let sql = 'SELECT * FROM music_table'
connet.query(sql, function (err, result) {
if (err) {
console.log('[SELECT ERROR] - ', err.message);
let data = {
code: 400,
result: err.message
}
res.send(data)
return;
} else {
let data = {
code: 200,
result: result
}
res.send(data)
}
});
})
module.exports = router
- 主模塊(main)
const express = require('express')
const bodyParser = require('body-parser')
const app = express()
const cors = require('cors');
// 解決跨域問題
app.use(cors());
// parse application/x-www-form-urlencoded
app.use(bodyParser.urlencoded({
extended: false
}));
// parse application/json
app.use(bodyParser.json());
//設(shè)置跨域訪問
app.all('*', function (req, res, next) {
res.header("Access-Control-Allow-Origin", "*");
res.header("Access-Control-Allow-Headers", "X-Requested-With");
res.header("Access-Control-Allow-Methods", "PUT,POST,GET,DELETE,OPTIONS");
res.header("X-Powered-By", ' 3.2.1')
res.header("Content-Type", "application/x-www-form-urlencoded");
next();
})
// 導(dǎo)入模塊
const music = require('./music')
app.use(music)
// 監(jiān)測服務(wù)的開啟
app.listen(3333, '0.0.0.0', function (res) {
console.log('Server start on http://0.0.0.0:3333');
})
三:前端
首先我們看一下頁面的顯示效果:
邏輯梳理
- 點擊
任務(wù)下發(fā)
開始進行數(shù)據(jù)庫查詢,返回查詢結(jié)果;選中想要爬取的榜單
,那么它對應(yīng)的value會被拿到,也就是數(shù)據(jù)庫里面的id;- 選中后點擊
執(zhí)行任務(wù)
,會把數(shù)據(jù)提交到后臺進行爬取,爬取完之后將結(jié)果返回;- 進行過濾并進行顯示數(shù)據(jù)
我們使用vue.js來進行前端頁面編寫,代碼如下:
<template>
<div class="music">
<!-- 頭部 -->
<div class="music_head">
<span style="float: left;margin-left: 20px;color:black">網(wǎng)易云爬取(目前歌曲:<b>{{musiclist[0]}}</b>首)</span>
<span style="margin-left: 10px;color: red">注:因為可能存在重復(fù)數(shù)據(jù),所以總數(shù)據(jù)可能不會變化</span>
<el-button type="primary" size="small" style="float: right;margin-top: 10px;margin-right: 20px;" @click="gotask">任務(wù)下發(fā)</el-button>
</div>
<!-- 信息展示 -->
<div class="music_bottom">
<div class="bottom_left">
<ul>
<li v-for="(item,index) in musictitle[2]" :key="index">{{index+1}}、{{item.slice(33)}}</li>
</ul>
</div>
<div class="bottom_right">
<ul>
<li v-for="(item1,index1) in musiclist[2]" :key="index1">
<audio style="margin-top: 30px;" :src="item1" controls="controls"></audio>
</li>
</ul>
</div>
</div>
<el-dialog :append-to-body="true" title="進行爬取" :visible.sync="dialogVisible" width="30%">
<table cellpadding="50px" cellspacing="30px" style="width:100%;text-align: center;border:1px solid black">
<tr>
<td>選擇榜名:</td>
<td>
<el-select v-model="value" clearable placeholder="請選擇">
<el-option v-for="item in musicname" :key="item.muscid" :label="item.type" :value="item.muscid">
</el-option>
</el-select>
</td>
</tr>
</table>
<span slot="footer" class="dialog-footer">
<el-button type="primary" @click="tasksend">執(zhí)行任務(wù)</el-button>
</span>
</el-dialog>
</div>
</template>
<script>
export default {
name: 'music',
data () {
return {
dialogVisible: false,
// 榜名
musicname: [],
// 值
value: '',
// 用戶的任務(wù)
task: [],
// 頁碼
currentPage: 1,
size: 10,
// 狀態(tài)
status: '進行中...',
// 歌單
musiclist: [],
title: '',
musictitle: [],
count: ''
}
},
created () {
var that = this
that.get()
},
components: {},
mounted () {},
methods: {
// 任務(wù)下發(fā)
gotask () {
var that = this
that.dialogVisible = true
that.$get('http://192.168.1.107:3333/getmusic').then(res => {
console.log(res)
if (res.code === 200) {
that.musicname = res.result
} else {
that.$message({
type: 'error',
message: '服務(wù)異常,請稍后重試'
})
}
}).catch(e => {
that.$message({
type: 'error',
message: '服務(wù)異常,請稍后重試'
})
})
},
// 獲取榜名
get () {
var that = this
that.$get('http://192.168.1.107:8000/seemusic/').then(res => {
that.musictitle = res
for (var i = 0; i < res[2].length; i++) {
res[2][i] = 'http://192.168.1.107:8000/static/' + res[2][i]
}
console.log(res)
that.musiclist = res
console.log(that.musiclist.length)
}).catch(e => {
that.$message({
type: 'error',
message: '服務(wù)異常,請稍后重試'
})
})
},
// 執(zhí)行任務(wù)
tasksend () {
var that = this
var loading = this.$loading({
lock: true,
text: '數(shù)據(jù)量可能比較大,正在加載...',
spinner: 'el-icon-loading',
background: 'rgba(0, 0, 0, 0.7)'
})
that.dialogVisible = false
let data1 = {
ids: Number(that.value)
}
that.$post('http://192.168.1.107:8000/music/', data1).then(res => {
console.log(res)
if (res.indexOf('ok') > -1) {
that.count = res.slice(2)
that.get()
loading.close()
that.$message({
type: 'success',
message: `爬取${that.count}首`
})
} else {
loading.close()
that.$message({
type: 'error',
message: '服務(wù)異常,請稍后重試'
})
}
})
},
// 刪除
handleSee (index, row) {
console.log(index, row)
},
handleCurrentChange (val) {
var that = this
that.currentPage = val
},
// 時間轉(zhuǎn)換以及補零操作
timestampToTime (timestamp) {
var date = new Date(timestamp)
var Y = date.getFullYear() + '-'
var M = (date.getMonth() + 1).toString().padStart(2, '0') + '-'
var D =
date
.getDate()
.toString()
.padStart(2, '0') + ' '
var h =
date
.getHours()
.toString()
.padStart(2, '0') + ':'
var m =
date
.getMinutes()
.toString()
.padStart(2, '0') + ':'
var s = date
.getSeconds()
.toString()
.padStart(2, '0')
return Y + M + D + h + m + s
}
}
}
</script>
<style scoped>
.music {
width: 900px;
height: 650px;
/* background-color: aquamarine; */
margin: 0 auto;
box-shadow: -5px 0 5px #d7eafc, 5px 0 5px #d7eafc, 0 5px 5px #a7ccf0;
}
/* 頭部菜單 */
.music_head {
width: 100%;
height: 50px;
/* background-color: pink; */
line-height: 50px;
}
/* 信息展示 */
.music_bottom {
width: 100%;
height: 600px;
border-top: 1px solid black;
/* background-color: antiquewhite; */
display: flex;
overflow: auto;
}
/* 左邊 */
.bottom_left {
width: 550px;
height: 100%;
/* background-color: aqua; */
}
.bottom_left ul li {
height: 100px;
line-height: 100px;
text-align: left;
text-overflow: ellipsis;
white-space: nowrap;
overflow: hidden;
color: black;
}
/* 右邊 */
.bottom_right {
width: 350px;
height: 100%;
/* background-color: pink; */
}
.bottom_right ul li {
height: 100px;
line-height: 100px;
text-align: center;
}
/* 頁碼 */
.ym {
width: 100%;
height: 34px;
/* background-color: skyblue; */
text-align: center;
}
/*修改操作的樣式*/
/deep/ .el-dialog .el-dialog__header {
background-color: #26639c;
}
/deep/ .el-dialog .el-dialog__header .el-dialog__title {
color: #ffffff;
font-size: 20px;
padding: 15px 20px 10px;
}
/deep/ .el-dialog .el-dialog__header .el-dialog__headerbtn {
top: 18px;
border-radius: 50%;
width: 22px;
height: 21px;
background-color: #ffffff;
}
</style>
四:Django模塊
我們數(shù)據(jù)的爬取以及結(jié)果返回運用python來解決,通過Django來進行接口的編寫:
- 爬取模塊
def music (request):
data = request.body.decode(encoding="utf8")
data = json.loads(data)
print(data['ids'])
time.sleep(1)
# id = input('請輸入您要爬取歌單的id號:')
url = 'https://music.163.com/discover/toplist?id=%s' % data['ids']
head = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
}
down_url = 'https://music.163.com/song/media/outer/url?id='
respone = requests.get(url, headers=head)
# print(respone)
html = etree.HTML(respone.text)
id_list = html.xpath('//a[contains(@href,"song?")]')
# print(id_list)
i = 0
for id in id_list:
href = id.xpath('./@href')[0]
# print(href)
music_id = href.split('=')[1]
# print(music_id)
if "$" not in music_id:
music_name = id.xpath('./text()')[0]
# print(music_name)
music_url = down_url + music_id
music = requests.get(url=music_url, headers=head)
# print(music_url)
if not os.path.exists(r'路徑static'):
os.mkdir(r'路徑static')
else:
try:
with open(r'路徑static/%s.mp3' % music_name, "wb") as f:
i += 1
print("正在下載第 %i 首歌曲 歌名為: 《%s》 ..." % (i, music_name))
f.write(music.content)
except OSError as e:
print('OSError', e)
return HttpResponse('ok{0}'.format(i))
- 結(jié)果返回處理模塊
也就是我們存儲在文件夾之后進行文件夾內(nèi)容的映射,靜態(tài)文件的訪問:
def seemusic(request):
# 結(jié)果集
result = []
# 總數(shù)
count = 0
# 成功數(shù)
i = 0
for j in os.listdir(r'路徑static'):
print(j)
count += 1
#判斷文件大小,如果小于94386則證明文件為空文件,進行過濾掉
if os.path.getsize(r'路徑static/'+j) > 94386:
result.append(j)
i += 1
data = [count, i, result]
print(data)
return JsonResponse(data,safe=False)
五:結(jié)果預(yù)覽
人生從來沒有真正的絕境。無論遭受多少艱辛,無論經(jīng)歷多少苦難,只要一個人的心中還懷著一粒信念的種子,那么總有一天,他就能走出困境,讓生命重新開花結(jié)果。
?
本文摘自 :https://blog.51cto.com/u