-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvacancyes.py
More file actions
108 lines (85 loc) · 3.11 KB
/
vacancyes.py
File metadata and controls
108 lines (85 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import re
import csv
import fake_useragent
from bs4 import BeautifulSoup
import requests
from fake_useragent import UserAgent
url_1 = 'https://www.rabota.ru/vacancy/?query=%D0%9F%D1%80%D0%BE%D0%B3%D1%80%D0%B0%D0%BC%D0%BC%D0%B8%D1%81%D1%82%20python&page=1'
url_2 = 'https://joblab.ru/search.php?r=vac&srprofecy=python&kw_w2=1&srzpmin=&srregion=50&srcity=&srcategory=&submit=1'
user = UserAgent()
headers = {'User-Agent': user.random}
page_1 = requests.get(url_1)
page_2 = requests.get(url_2, headers=headers)
soup_rabota = BeautifulSoup(page_1.text, 'html.parser')
soup_joblab = BeautifulSoup(page_2.text, 'html.parser')
# Парсинг названий компаний
company_name_rabota = []
rabota_soup = soup_rabota.find_all('span', class_='vacancy-preview-card__company-name')
for i in rabota_soup:
company_name_rabota.append((i.text).replace('\n', '').strip())
company_name_joblab = []
joblab_soup = soup_joblab.find_all('p', class_='org')
for i in joblab_soup:
company_name_joblab.append(i.text)
# Парсинг заголовков вакансий
reviews_rabota = []
rabota_soup_1 = soup_rabota.find_all('a', class_='vacancy-preview-card__title_border')
for i in rabota_soup_1:
reviews_rabota.append((i.text).replace('\n', '').strip())
reviews_joblab = []
joblab_soup_1 = soup_joblab.find_all('p', class_='prof')
for i in joblab_soup_1:
reviews_joblab.append(i.text)
# Парсинг описаний вакансий
description_rabota = []
rabota_soup_2 = soup_rabota.find_all('div', class_='vacancy-preview-card__short-description')
for i in rabota_soup_2:
description_rabota.append(i.text)
description_joblab = []
joblab_soup_2 = soup_joblab.find_all('p', class_='descr2')
for i in joblab_soup_2:
description_joblab.append((i.text))
# Парсинг ссылок на вакансии
href_rabota = []
for i in rabota_soup_1:
href_rabota.append('https://www.rabota.ru/' + i['href'])
href_joblab = []
for i in joblab_soup_1:
href_joblab.append('https://joblab.ru/' + i.a['href'])
# Систематизация информации по блокам: Компания, Вакансия, Описание, Ссылка
company = []
title = []
overflow = []
href = []
for i in company_name_rabota:
company.append(i)
for i in company_name_joblab:
company.append(i)
for i in reviews_rabota:
title.append(i)
for i in reviews_joblab:
title.append(i)
for i in href_rabota:
href.append(i)
for i in href_joblab:
href.append(i)
for i in description_rabota:
overflow.append(i)
for i in description_joblab:
overflow.append(i)
title_text = ['Company', 'Title', 'Overflow', 'Link']
full_text = []
for i in range(len(company)):
full_text.append([])
full_text[i].append(company[i])
full_text[i].append(title[i])
full_text[i].append(overflow[i])
full_text[i].append(href[i])
# Запись информации в файл - vacancyes.csv
with open('vacancyes.csv', 'w') as f:
writer = csv.writer(f, delimiter=';')
writer.writerow(title_text)
with open('vacancyes.csv', 'a') as f:
writer = csv.writer(f, delimiter=';')
for i in full_text:
writer.writerow(i)