Загрузить файлы в «/»

This commit is contained in:
2024-08-01 03:33:38 +00:00
parent 7bfd735f1f
commit cd3e5f38f7
2 changed files with 129 additions and 0 deletions

81
pars_oem.py Normal file
View File

@@ -0,0 +1,81 @@
import csv
import re
import time
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
f = open('lynx_pn_input.txt', 'r')
urls = f.read().split()
f.close()
with open('oen.csv', 'w', newline='', encoding='windows-1251') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(
['url', 'ID', 'Название', 'Описание', 'Фото', 'OeN', 'Аналоги'])
for url in tqdm(urls, desc="Processing", unit="iteration"):
try:
# Отправляем GET-запрос к странице
response = requests.get(url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
data = [url]
IDD = soup.find("div", class_="pcard-model").get_text(strip=True)
if IDD:
data.append(IDD)
else:
data.append('None')
Name = soup.find("div", class_="pcard-name").get_text(strip=True)
if Name:
data.append(Name)
else:
data.append('None')
form_element = soup.find("div", id="pcard-props")
if form_element:
data.append(' | '.join([
f'{row.find("td", class_="title").get_text(strip=True)} {row.find("td", class_="value").get_text(strip=True)}'
for row in form_element.find_all("tr")]))
else:
data.append('None')
pcard_view_wrapper = soup.find("div", id="pcard-view-images")
if pcard_view_wrapper:
img = ' | '.join([img['src'] for img in pcard_view_wrapper.find_all('img')])
if img == 'https://lynxauto.info/image/trumb/400x300/no_image.jpg':
data.append('None')
else:
data.append(img)
else:
data.append('None')
OeN = []
table = soup.find('div', id='pcard-oeno')
if table:
for row in table.find_all('tr')[1:]:
row_data = [re.sub(r'\n+', ' | ', (
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
row.find_all(['td', 'th'])]
OeN.append(' | '.join(row_data))
data.append(' <> '.join(OeN))
else:
data.append('None')
analog = []
table = soup.find('div', id='pcard-analog')
if table:
for row in table.find_all('tr')[1:]:
row_data = [re.sub(r'\n+', ' | ', (
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
row.find_all(['td', 'th'])]
analog.append(' | '.join(row_data))
data.append(' <> '.join(analog))
else:
data.append('None')
writer.writerow(data)
except Exception as e:
print(e)
time.sleep(60)

48
ДОПАРСИНГ.py Normal file
View File

@@ -0,0 +1,48 @@
import csv
from tqdm import tqdm
def chort_text(parts):
if parts[0] == 'None':
return 'None'
# Создаем словарь для хранения данных
processed_data = {}
# Обрабатываем каждую часть
for part in parts:
# Разбиваем часть по разделителю |
subparts = part.split('|')
# Получаем производителя и номер детали
manufacturer = subparts[0].strip()
number = subparts[1].strip()
# Получаем дополнительную информацию
info = subparts[2].strip() if len(subparts) > 2 else ''
# Добавляем данные в словарь
if manufacturer in processed_data:
processed_data[manufacturer].append(f"{number}")
else:
processed_data[manufacturer] = [info, number]
# Преобразуем словарь в требуемый формат
string = ''
for key, value in processed_data.items():
if value[0] == '':
string += f'{key} : {" | ".join(value[1:])} <> '
else:
string += f'{key} : {" | ".join(value[1:])} : {value[0]} <> '
return string
with open('oen.csv', newline='') as csvfile:
reader = csv.reader(csvfile)
csv_data = list(reader)
with open('oem2.csv', 'w', newline='', encoding='windows-1251') as csvfile:
writer = csv.writer(csvfile)
for datas in tqdm(csv_data[1:], desc="Processing", unit="iteration"):
row = datas
parts = datas[5].split('<>')
row[5] = chort_text(parts)
parts = datas[6].split('<>')
row[6] = chort_text(parts)
writer.writerow(row)