Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions lab2_var22.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import argparse
import re
import requests
Comment thread
NikitaTikhonov321 marked this conversation as resolved.
import csv
from pathlib import Path

from bs4 import BeautifulSoup


def parse_arguments() -> argparse.Namespace:
"""
Разбор и возврат аргументов командной строки.
"""

parser = argparse.ArgumentParser()

parser.add_argument(
"-c", "--csv", dest="csv_file", type=str, default="out.csv", help="Название CSV файла"
)
parser.add_argument(
"-d", "--dir", dest="directory", type=str, default="downloaded_mp3", help="Директория с файлами mp3"
)

return parser.parse_args()


def parse_html_content() -> BeautifulSoup | None:
"""
Получение HTML-содержимого
"""
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/58.0.3029.110 Safari/537.36"
)
}
response = requests.get("https://mixkit.co/free-stock-music/pop/", headers=headers)
if response.ok:
return BeautifulSoup(response.text, "html.parser")
return None


def write_file_paths_to_csv(file_paths: list[Path], filename: Path, out: Path) -> None:
Comment thread
NikitaTikhonov321 marked this conversation as resolved.
"""
Запись путей к файлам в CSV-файл.
"""

with filename.open(mode="w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["filename", "relative_path", "absolute_path"])
for file_path in file_paths:
absolute = file_path.resolve()
relative = absolute.relative_to(Path.cwd())
writer.writerow([file_path.name, str(relative), str(absolute)])


def extract_mp3_urls_from_html(soup: BeautifulSoup) -> list[Path]:
"""
Извлечение URL-адресов MP3-файлов из JSON-LD скриптов в HTML-документе.
"""

mp3_urls = []

for script in soup.find_all("script", type="application/ld+json"):
mp3_links = re.findall(r'"url"\s*:\s*"([^"]+\.mp3)"', script.string)
for link in mp3_links:
mp3_urls.append(link)

return mp3_urls


class AudioFileIterator:
"""
Итерируемый класс для обхода аудиофайлов из директории или CSV-файла.
"""

def __init__(self, src: Path) -> None:
"""
Инициализация итератора с указанием пути к источнику.
"""
self.src = src
self.file_paths: list[str] = []
self.index = 0

if self.src.is_dir():
self.file_paths = sorted([f for f in self.src.glob("*.mp3") if f.is_file()])
elif self.src.suffix == ".csv":
self.load_from_csv(src)

def __iter__(self) -> "IteratorFile":
"""
Возвращает объект итератора.
"""
self.index = 0
return self

def __next__(self) -> Path:
"""
Возвращает следующий путь к файлу в итерации.
"""
if self.index >= len(self.file_paths):
raise StopIteration
current = self.file_paths[self.index]
self.index += 1
return current

def load_from_csv(self, filename: Path) -> None:
"""
Загрузка путей к файлам из CSV-файла.
"""
with filename.open("r", encoding="utf-8") as file:
reader = csv.DictReader(file)
for row in reader:
absolute = row.get('absolute_path')
if absolute:
self.file_paths.append(absolute)



def download_mp3_files() -> list[Path]:
"""
Загрузка MP3-файлов с целевого веб-сайта и сохранение их локально.
"""

files = []
soup = parse_html_content()
extracted = extract_mp3_urls_from_html(soup)
mkdir_path = Path("downloaded_mp3")
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Захардкожена директория для сохранения. Аргумент "directory" при запуске скрипта вообще не используется, получается.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mkdir_path = Path(directory)

mkdir_path.mkdir(exist_ok=True)
for link in extracted:
filename = Path(link).name
mp3_file = mkdir_path / filename
responce = requests.get(link)
with mp3_file.open("wb") as file:
file.write(responce.content)
files.append(mp3_file)
return files


def main():
try:
args = parse_arguments()

source = Path(args.csv_file)

# Если CSV файл не существует или пуст, скачиваем файлы
if not source.exists() or source.stat().st_size == 0:
print("CSV файл не найден или пуст. Выполняется загрузка MP3 файлов...")
downloaded_files = download_mp3_files()
write_file_paths_to_csv(downloaded_files, source, Path(args.directory))

iterator = AudioFileIterator(source)
for item in iterator:
print(item)

except Exception as ex:
print("Ошибка: ", ex)


if __name__ == "__main__":
main()

37 changes: 37 additions & 0 deletions out.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
filename,relative_path,absolute_path
250.mp3,downloaded_mp3\250.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\250.mp3
288.mp3,downloaded_mp3\288.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\288.mp3
5.mp3,downloaded_mp3\5.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\5.mp3
970.mp3,downloaded_mp3\970.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\970.mp3
1000.mp3,downloaded_mp3\1000.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1000.mp3
200.mp3,downloaded_mp3\200.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\200.mp3
821.mp3,downloaded_mp3\821.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\821.mp3
801.mp3,downloaded_mp3\801.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\801.mp3
837.mp3,downloaded_mp3\837.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\837.mp3
1131.mp3,downloaded_mp3\1131.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1131.mp3
349.mp3,downloaded_mp3\349.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\349.mp3
854.mp3,downloaded_mp3\854.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\854.mp3
935.mp3,downloaded_mp3\935.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\935.mp3
1147.mp3,downloaded_mp3\1147.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1147.mp3
228.mp3,downloaded_mp3\228.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\228.mp3
1164.mp3,downloaded_mp3\1164.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1164.mp3
1166.mp3,downloaded_mp3\1166.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1166.mp3
831.mp3,downloaded_mp3\831.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\831.mp3
350.mp3,downloaded_mp3\350.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\350.mp3
851.mp3,downloaded_mp3\851.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\851.mp3
1124.mp3,downloaded_mp3\1124.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1124.mp3
927.mp3,downloaded_mp3\927.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\927.mp3
14.mp3,downloaded_mp3\14.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\14.mp3
158.mp3,downloaded_mp3\158.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\158.mp3
1138.mp3,downloaded_mp3\1138.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1138.mp3
553.mp3,downloaded_mp3\553.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\553.mp3
432.mp3,downloaded_mp3\432.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\432.mp3
218.mp3,downloaded_mp3\218.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\218.mp3
487.mp3,downloaded_mp3\487.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\487.mp3
1030.mp3,downloaded_mp3\1030.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1030.mp3
632.mp3,downloaded_mp3\632.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\632.mp3
827.mp3,downloaded_mp3\827.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\827.mp3
227.mp3,downloaded_mp3\227.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\227.mp3
1012.mp3,downloaded_mp3\1012.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\1012.mp3
559.mp3,downloaded_mp3\559.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\559.mp3
905.mp3,downloaded_mp3\905.mp3,C:\Users\Никита\Desktop\lab2\downloaded_mp3\905.mp3
Loading
Loading