82 lines
3.1 KiB
Python
82 lines
3.1 KiB
Python
import csv
|
|
import re
|
|
import time
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from tqdm import tqdm
|
|
|
|
f = open('lynx_pn_input.txt', 'r')
|
|
urls = f.read().split()
|
|
f.close()
|
|
with open('oen.csv', 'w', newline='', encoding='windows-1251') as csvfile:
|
|
writer = csv.writer(csvfile)
|
|
writer.writerow(
|
|
['url', 'ID', 'Название', 'Описание', 'Фото', 'OeN', 'Аналоги'])
|
|
for url in tqdm(urls, desc="Processing", unit="iteration"):
|
|
try:
|
|
# Отправляем GET-запрос к странице
|
|
response = requests.get(url)
|
|
|
|
html_content = response.text
|
|
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
data = [url]
|
|
IDD = soup.find("div", class_="pcard-model").get_text(strip=True)
|
|
if IDD:
|
|
data.append(IDD)
|
|
else:
|
|
data.append('None')
|
|
Name = soup.find("div", class_="pcard-name").get_text(strip=True)
|
|
|
|
if Name:
|
|
data.append(Name)
|
|
else:
|
|
data.append('None')
|
|
|
|
form_element = soup.find("div", id="pcard-props")
|
|
if form_element:
|
|
data.append(' | '.join([
|
|
f'{row.find("td", class_="title").get_text(strip=True)} {row.find("td", class_="value").get_text(strip=True)}'
|
|
for row in form_element.find_all("tr")]))
|
|
else:
|
|
data.append('None')
|
|
|
|
pcard_view_wrapper = soup.find("div", id="pcard-view-images")
|
|
if pcard_view_wrapper:
|
|
img = ' | '.join([img['src'] for img in pcard_view_wrapper.find_all('img')])
|
|
if img == 'https://lynxauto.info/image/trumb/400x300/no_image.jpg':
|
|
data.append('None')
|
|
else:
|
|
data.append(img)
|
|
else:
|
|
data.append('None')
|
|
|
|
OeN = []
|
|
table = soup.find('div', id='pcard-oeno')
|
|
if table:
|
|
for row in table.find_all('tr')[1:]:
|
|
row_data = [re.sub(r'\n+', ' | ', (
|
|
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
|
|
row.find_all(['td', 'th'])]
|
|
OeN.append(' | '.join(row_data))
|
|
data.append(' <> '.join(OeN))
|
|
else:
|
|
data.append('None')
|
|
|
|
analog = []
|
|
table = soup.find('div', id='pcard-analog')
|
|
if table:
|
|
for row in table.find_all('tr')[1:]:
|
|
row_data = [re.sub(r'\n+', ' | ', (
|
|
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
|
|
row.find_all(['td', 'th'])]
|
|
analog.append(' | '.join(row_data))
|
|
data.append(' <> '.join(analog))
|
|
else:
|
|
data.append('None')
|
|
writer.writerow(data)
|
|
except Exception as e:
|
|
print(e)
|
|
time.sleep(60)
|