import csv import re import time import requests from bs4 import BeautifulSoup from tqdm import tqdm f = open('lynx_pn_input.txt', 'r') urls = f.read().split() f.close() with open('oen.csv', 'w', newline='', encoding='windows-1251') as csvfile: writer = csv.writer(csvfile) writer.writerow( ['url', 'ID', 'Название', 'Описание', 'Фото', 'OeN', 'Аналоги']) for url in tqdm(urls, desc="Processing", unit="iteration"): try: # Отправляем GET-запрос к странице response = requests.get(url) html_content = response.text soup = BeautifulSoup(html_content, "html.parser") data = [url] IDD = soup.find("div", class_="pcard-model").get_text(strip=True) if IDD: data.append(IDD) else: data.append('None') Name = soup.find("div", class_="pcard-name").get_text(strip=True) if Name: data.append(Name) else: data.append('None') form_element = soup.find("div", id="pcard-props") if form_element: data.append(' | '.join([ f'{row.find("td", class_="title").get_text(strip=True)} {row.find("td", class_="value").get_text(strip=True)}' for row in form_element.find_all("tr")])) else: data.append('None') pcard_view_wrapper = soup.find("div", id="pcard-view-images") if pcard_view_wrapper: img = ' | '.join([img['src'] for img in pcard_view_wrapper.find_all('img')]) if img == 'https://lynxauto.info/image/trumb/400x300/no_image.jpg': data.append('None') else: data.append(img) else: data.append('None') OeN = [] table = soup.find('div', id='pcard-oeno') if table: for row in table.find_all('tr')[1:]: row_data = [re.sub(r'\n+', ' | ', ( cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in row.find_all(['td', 'th'])] OeN.append(' | '.join(row_data)) data.append(' <> '.join(OeN)) else: data.append('None') analog = [] table = soup.find('div', id='pcard-analog') if table: for row in table.find_all('tr')[1:]: row_data = [re.sub(r'\n+', ' | ', ( cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in row.find_all(['td', 'th'])] analog.append(' | '.join(row_data)) data.append(' <> '.join(analog)) else: data.append('None') writer.writerow(data) except Exception as e: print(e) time.sleep(60)