Загрузить файлы в «/»
This commit is contained in:
81
pars_oem.py
Normal file
81
pars_oem.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import csv
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from tqdm import tqdm
|
||||
|
||||
f = open('lynx_pn_input.txt', 'r')
|
||||
urls = f.read().split()
|
||||
f.close()
|
||||
with open('oen.csv', 'w', newline='', encoding='windows-1251') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
writer.writerow(
|
||||
['url', 'ID', 'Название', 'Описание', 'Фото', 'OeN', 'Аналоги'])
|
||||
for url in tqdm(urls, desc="Processing", unit="iteration"):
|
||||
try:
|
||||
# Отправляем GET-запрос к странице
|
||||
response = requests.get(url)
|
||||
|
||||
html_content = response.text
|
||||
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
data = [url]
|
||||
IDD = soup.find("div", class_="pcard-model").get_text(strip=True)
|
||||
if IDD:
|
||||
data.append(IDD)
|
||||
else:
|
||||
data.append('None')
|
||||
Name = soup.find("div", class_="pcard-name").get_text(strip=True)
|
||||
|
||||
if Name:
|
||||
data.append(Name)
|
||||
else:
|
||||
data.append('None')
|
||||
|
||||
form_element = soup.find("div", id="pcard-props")
|
||||
if form_element:
|
||||
data.append(' | '.join([
|
||||
f'{row.find("td", class_="title").get_text(strip=True)} {row.find("td", class_="value").get_text(strip=True)}'
|
||||
for row in form_element.find_all("tr")]))
|
||||
else:
|
||||
data.append('None')
|
||||
|
||||
pcard_view_wrapper = soup.find("div", id="pcard-view-images")
|
||||
if pcard_view_wrapper:
|
||||
img = ' | '.join([img['src'] for img in pcard_view_wrapper.find_all('img')])
|
||||
if img == 'https://lynxauto.info/image/trumb/400x300/no_image.jpg':
|
||||
data.append('None')
|
||||
else:
|
||||
data.append(img)
|
||||
else:
|
||||
data.append('None')
|
||||
|
||||
OeN = []
|
||||
table = soup.find('div', id='pcard-oeno')
|
||||
if table:
|
||||
for row in table.find_all('tr')[1:]:
|
||||
row_data = [re.sub(r'\n+', ' | ', (
|
||||
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
|
||||
row.find_all(['td', 'th'])]
|
||||
OeN.append(' | '.join(row_data))
|
||||
data.append(' <> '.join(OeN))
|
||||
else:
|
||||
data.append('None')
|
||||
|
||||
analog = []
|
||||
table = soup.find('div', id='pcard-analog')
|
||||
if table:
|
||||
for row in table.find_all('tr')[1:]:
|
||||
row_data = [re.sub(r'\n+', ' | ', (
|
||||
cell.get_text().strip().replace('\t', '').replace(' ', ''))) for cell in
|
||||
row.find_all(['td', 'th'])]
|
||||
analog.append(' | '.join(row_data))
|
||||
data.append(' <> '.join(analog))
|
||||
else:
|
||||
data.append('None')
|
||||
writer.writerow(data)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
time.sleep(60)
|
||||
48
ДОПАРСИНГ.py
Normal file
48
ДОПАРСИНГ.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import csv
|
||||
from tqdm import tqdm
|
||||
|
||||
def chort_text(parts):
|
||||
if parts[0] == 'None':
|
||||
return 'None'
|
||||
# Создаем словарь для хранения данных
|
||||
processed_data = {}
|
||||
|
||||
# Обрабатываем каждую часть
|
||||
for part in parts:
|
||||
# Разбиваем часть по разделителю |
|
||||
subparts = part.split('|')
|
||||
# Получаем производителя и номер детали
|
||||
manufacturer = subparts[0].strip()
|
||||
number = subparts[1].strip()
|
||||
# Получаем дополнительную информацию
|
||||
info = subparts[2].strip() if len(subparts) > 2 else ''
|
||||
# Добавляем данные в словарь
|
||||
if manufacturer in processed_data:
|
||||
processed_data[manufacturer].append(f"{number}")
|
||||
else:
|
||||
processed_data[manufacturer] = [info, number]
|
||||
# Преобразуем словарь в требуемый формат
|
||||
string = ''
|
||||
for key, value in processed_data.items():
|
||||
if value[0] == '':
|
||||
string += f'{key} : {" | ".join(value[1:])} <> '
|
||||
else:
|
||||
string += f'{key} : {" | ".join(value[1:])} : {value[0]} <> '
|
||||
return string
|
||||
|
||||
with open('oen.csv', newline='') as csvfile:
|
||||
reader = csv.reader(csvfile)
|
||||
csv_data = list(reader)
|
||||
|
||||
|
||||
|
||||
with open('oem2.csv', 'w', newline='', encoding='windows-1251') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
|
||||
for datas in tqdm(csv_data[1:], desc="Processing", unit="iteration"):
|
||||
row = datas
|
||||
parts = datas[5].split('<>')
|
||||
row[5] = chort_text(parts)
|
||||
parts = datas[6].split('<>')
|
||||
row[6] = chort_text(parts)
|
||||
writer.writerow(row)
|
||||
Reference in New Issue
Block a user