import os
from bs4 import BeautifulSoup
import csv
import re
from tqdm import tqdm
# Задаем путь к директории
directory = r'D:\Работа\DATA\lynx'

# Используем генератор списка для поиска файлов .html
html_files = [os.path.join(root, file) for root, dirs, files in os.walk(directory) for file in files if file.endswith(".html")]

# Выводим массив с путями к файлам .html

with open('output.csv', 'w', newline='', encoding='windows-1251') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(
        ['Производитель', 'Модель', 'Двигатель', 'Мощность кВт (л. с.)', 'Год выпуска', 'Тип детали', 'No.', 'Link',
         'Description', 'Information'])
    for path in tqdm(html_files, desc="Processing", unit="iteration"):
        try:
            file = open(path, 'r', encoding='utf-8')
            text = file.read()
            file.close()
            soup_path = BeautifulSoup(text, 'html.parser')
            vendor = soup_path.find('span', id='select2-vendor-container').text.strip()
            car = soup_path.find('span', id='select2-car-container').text.strip()
            modification, power_engine, year = soup_path.find('span', id='select2-modification-container').text.strip().split(' | ')
            category_type = soup_path.find('div', class_='header-info-block-category').find('span').text.strip()

            table = soup_path.find('table', class_='list_showtable')
            for row in table.find_all('tr')[1:]:
                row_data = [re.sub(r'\n+', ' | ', (
                    cell.get_text().strip().replace('\t', '').replace('  ',''))) if count == 3 else cell.get_text(
                    strip=True) for count, cell in enumerate(row.find_all(['td', 'th']))]
                row_data[1] = row['data-link']
                row_data = [vendor, car, modification, power_engine, year, category_type] + row_data
                writer.writerow(row_data)
        except Exception as e:
            pass