-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtesttmp.py
More file actions
53 lines (50 loc) · 1.8 KB
/
testtmp.py
File metadata and controls
53 lines (50 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
import requests
import sys
from lxml import etree
import pandas as pd
from multiprocessing.dummy import Pool as ThreadPool
reload(sys)
sys.setdefaultencoding("utf-8")
Name=[]
Money=[]
Project=[]
Date=[]
Style=[]
User_type=[]
page=[]
def spider(url):
html=requests.get(url).text
html=html.encode("utf-8")
selector = etree.HTML(html)
field=selector.xpath('//table[@class="table table-striped table-bordered"]')[0]
content=field.xpath('tr')
for each in content:
list=each.xpath('td')
name=list[0][0]
name=name.xpath('string(.)').replace('\t','').replace(' ','')
money=list[1].xpath('string(.)').replace('\t','').encode("utf-8")
money=money.split()[0]
project=list[2][0]
project=project.xpath('string(.)').replace('\t','').replace(' ','')
date=list[3][0]
date=date.xpath('string(.)').replace('\t','').replace(' ','')
style=list[4][0]
style=style.xpath('string(.)').replace('\t','').replace(' ','')
user_type=list[5][0]
user_type=user_type.xpath('string(.)').replace('\t','').replace(' ','')
Name.append(name)
Money.append(money)
Project.append(project)
Date.append(date)
Style.append(style)
User_type.append(user_type)
for i in range(1,101):
newpage='http://alumni.xjtu.edu.cn/donation/namelist?pageNo='+str(i)+'&pageSize=10&billnum=&donateUserName=&orderWay=&donationid=0'
page.append(newpage)
# 我的电脑是双核,所以下面是2,根据自己电脑情况来定,也可以把下面一句删了,运行会慢一点
pool = ThreadPool(2)
pool.map(spider,page)
data={'Name':Name,'Money':Money,'Project':Project,'Date':Date,'Style':Style,'User_type':User_type}
data=pd.DataFrame(data)
data.to_csv('donation.csv',encoding='gbk')