Загрузить файлы в «/»

This commit is contained in:
2024-08-01 03:33:38 +00:00
parent 7bfd735f1f
commit cd3e5f38f7
2 changed files with 129 additions and 0 deletions

81
pars_oem.py Normal file
View File

@@ -0,0 +1,81 @@
import csv
import re
import time
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
f = open('lynx_pn_input.txt', 'r')
urls = f.read().split()
f.close()
with open('oen.csv', 'w', newline='', encoding='windows-1251') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(
['url', 'ID', 'Название', 'Описание', 'Фото', 'OeN', 'Аналоги'])
for url in tqdm(urls, desc="Processing", unit="iteration"):
try:
# Отправляем GET-запрос к странице
response = requests.get(url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
data = [url]
IDD = soup.find("div", class_="pcard-model").get_text(strip=True)
if IDD:
data.append(IDD)
else:
data.append('None')
Name = soup.find("div", class_="pcard-name").get_text(strip=True)
if Name:
data.append(Name)
else:
data.append('None')
form_element = soup.find("div", id="pcard-props")
if form_element:
data.append(' | '.join([
f'{row.find("td", class_="title").get_text(strip=True)} {row.find("td", class_="value").get_text(strip=True)}'
for row in form_element.find_all("tr")]))
else:
data.append('None')
pcard_view_wrapper = soup.find("div", id="pcard-view-images")
if pcard_view_wrapper:
img = ' | '.join([img['src'] for img in pcard_view_wrapper.find_all('img')])
if img == 'https://lynxauto.info/image/trumb/400x300/no_image.jpg':
data.append('None')
else:
data.append(img)
else:
data.append('None')
OeN = []
table = soup.find('div', id='pcard-oeno')
if table:
for row in table.find_all('tr')[1:]:
row_data = [re.sub(r'\n+', ' | ', (
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
row.find_all(['td', 'th'])]
OeN.append(' | '.join(row_data))
data.append(' <> '.join(OeN))
else:
data.append('None')
analog = []
table = soup.find('div', id='pcard-analog')
if table:
for row in table.find_all('tr')[1:]:
row_data = [re.sub(r'\n+', ' | ', (
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
row.find_all(['td', 'th'])]
analog.append(' | '.join(row_data))
data.append(' <> '.join(analog))
else:
data.append('None')
writer.writerow(data)
except Exception as e:
print(e)
time.sleep(60)