diff options
-rwxr-xr-x | src/subscleaner/subscleaner.py | 100 | ||||
-rw-r--r-- | tests/test_subscleaner.py | 121 |
2 files changed, 112 insertions, 109 deletions
diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py index 504d43f..f79e10d 100755 --- a/src/subscleaner/subscleaner.py +++ b/src/subscleaner/subscleaner.py @@ -26,7 +26,7 @@ import time import chardet import pysrt -ADS = [ +AD_PATTERNS = [ re.compile(r"\bnordvpn\b", re.IGNORECASE), re.compile(r"\ba Card Shark AMERICASCARDROOM\b", re.IGNORECASE), re.compile(r"\bOpenSubtitles\b", re.IGNORECASE), @@ -68,127 +68,127 @@ ADS = [ ] -def ads_in_line(line: str) -> bool: +def contains_ad(subtitle_line: str) -> bool: """ - Check if the given line contains an ad. + Check if the given subtitle line contains an ad. Args: - line (str): The line of text to be checked. + subtitle_line (str): The subtitle line to be checked. Returns: - bool: True if the line contains an ad, False otherwise. + bool: True if the subtitle line contains an ad, False otherwise. """ - return any(ad.search(line) for ad in ADS) + return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS) -def is_already_processed(filename: str) -> bool: +def is_processed_before(subtitle_file: str) -> bool: """ - Check if the file has already been processed. + Check if the subtitle file has already been processed. Args: - filename (str): The path to the subtitle file. + subtitle_file (str): The path to the subtitle file. Returns: - bool: True if the file has already been processed, False otherwise. + bool: True if the subtitle file has already been processed, False otherwise. """ - created = os.path.getctime(filename) - already_processed = time.mktime( + file_creation_time = os.path.getctime(subtitle_file) + processed_timestamp = time.mktime( time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"), ) - return created < already_processed + return file_creation_time < processed_timestamp -def detect_encoding(filename: str) -> str: +def get_encoding(subtitle_file: str) -> str: """ Detect the encoding of the subtitle file. Args: - filename (str): The path to the subtitle file. + subtitle_file (str): The path to the subtitle file. Returns: - str: The detected encoding of the file. + str: The detected encoding of the subtitle file. """ - with open(filename, "rb") as f: - return chardet.detect(f.read())["encoding"] + with open(subtitle_file, "rb") as file: + return chardet.detect(file.read())["encoding"] -def remove_ads(subs: pysrt.SubRipFile) -> bool: +def remove_ad_lines(subtitle_data: pysrt.SubRipFile) -> bool: """ - Remove ads from the subtitle file. + Remove ad lines from the subtitle data. Args: - subs (pysrt.SubRipFile): The subtitle file object. + subtitle_data (pysrt.SubRipFile): The subtitle data object. Returns: - bool: True if the file was modified, False otherwise. + bool: True if the subtitle data was modified, False otherwise. """ modified = False - for i, line in enumerate(subs): - if ads_in_line(line.text): - print(f"Removing: {line}\n") - del subs[i] + for index, subtitle in enumerate(subtitle_data): + if contains_ad(subtitle.text): + print(f"Removing: {subtitle}\n") + del subtitle_data[index] modified = True return modified -def process_file(filename: str) -> bool: +def process_subtitle_file(subtitle_file: str) -> bool: """ - Process a subtitle file to remove ads. + Process a subtitle file to remove ad lines. Args: - filename (str): The path to the subtitle file. + subtitle_file (str): The path to the subtitle file. Returns: - bool: True if the file was modified, False otherwise. + bool: True if the subtitle file was modified, False otherwise. """ try: - if is_already_processed(filename): - print(f"Already processed {filename}") + if is_processed_before(subtitle_file): + print(f"Already processed {subtitle_file}") return False - print(f"Analyzing: {filename}") + print(f"Analyzing: {subtitle_file}") - encoding = detect_encoding(filename) - subs = pysrt.open(filename, encoding=encoding) + encoding = get_encoding(subtitle_file) + subtitle_data = pysrt.open(subtitle_file, encoding=encoding) - if remove_ads(subs): - print(f"Saving {filename}") - subs.save(filename) + if remove_ad_lines(subtitle_data): + print(f"Saving {subtitle_file}") + subtitle_data.save(subtitle_file) return True return False except Exception as e: - print(f"Error processing {filename}: {e}") + print(f"Error processing {subtitle_file}: {e}") return False -def process_files(filenames: list[str]) -> list[str]: +def process_subtitle_files(subtitle_files: list[str]) -> list[str]: """ - Process multiple subtitle files to remove ads. + Process multiple subtitle files to remove ad lines. Args: - filenames (list[str]): A list of subtitle file paths. + subtitle_files (list[str]): A list of subtitle file paths. Returns: - list[str]: A list of modified file paths. + list[str]: A list of modified subtitle file paths. """ modified_files = [] - for filename in filenames: - if process_file(filename): - modified_files.append(filename) + for subtitle_file in subtitle_files: + if process_subtitle_file(subtitle_file): + modified_files.append(subtitle_file) return modified_files def main(): """ - Process subtitle files to remove ads. + Process subtitle files to remove ad lines. - Read filenames from standard input, process each file to remove ads, + Read subtitle file paths from standard input, process each file to remove ad lines, and print the result. Keep track of the modified files and print a summary at the end. """ - filenames = [filename.strip() for filename in sys.stdin] + subtitle_files = [file_path.strip() for file_path in sys.stdin] print("Starting script") - modified_files = process_files(filenames) + modified_files = process_subtitle_files(subtitle_files) if modified_files: print(f"Modified {len(modified_files)} files") print("Done") diff --git a/tests/test_subscleaner.py b/tests/test_subscleaner.py index 6172ff8..54cb521 100644 --- a/tests/test_subscleaner.py +++ b/tests/test_subscleaner.py @@ -7,13 +7,13 @@ import pysrt import pytest from src.subscleaner.subscleaner import ( - ads_in_line, - detect_encoding, - is_already_processed, + contains_ad, + get_encoding, + is_processed_before, main, - process_file, - process_files, - remove_ads, + process_subtitle_file, + process_subtitle_files, + remove_ad_lines, ) @@ -42,118 +42,118 @@ def create_sample_srt_file(tmpdir, content): @pytest.mark.parametrize( - "line, expected", + "subtitle_line, expected_result", [ ("This is a normal line", False), ("This line contains OpenSubtitles", True), ("Subtitles by XYZ", True), ], ) -def test_ads_in_line(line, expected): +def test_contains_ad(subtitle_line, expected_result): """ - Test the ads_in_line function with different input lines and expected results. + Test the contains_ad function with different subtitle lines and expected results. Args: - line (str): The input line to be tested. - expected (bool): The expected result (True if the line contains an ad, False otherwise). + subtitle_line (str): The subtitle line to be tested. + expected_result (bool): The expected result (True if the line contains an ad, False otherwise). """ - assert ads_in_line(line) is expected + assert contains_ad(subtitle_line) is expected_result -def test_is_already_processed(tmpdir): +def test_is_processed_before(tmpdir): """ - Test the is_already_processed function. + Test the is_processed_before function. Args: tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file. """ - file_path = create_sample_srt_file(tmpdir, "") + subtitle_file = create_sample_srt_file(tmpdir, "") with patch("src.subscleaner.subscleaner.os.path.getctime", return_value=0): - assert is_already_processed(file_path) is True + assert is_processed_before(subtitle_file) is True with patch("src.subscleaner.subscleaner.os.path.getctime", return_value=9999999999): - assert is_already_processed(file_path) is False + assert is_processed_before(subtitle_file) is False -def test_detect_encoding(tmpdir, sample_srt_content): +def test_get_encoding(tmpdir, sample_srt_content): """ - Test the detect_encoding function. + Test the get_encoding function. Args: tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file. sample_srt_content (str): The sample SRT content. """ - file_path = create_sample_srt_file(tmpdir, sample_srt_content) - assert detect_encoding(file_path) == "ascii" + subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) + assert get_encoding(subtitle_file) == "ascii" -def test_remove_ads(sample_srt_content): +def test_remove_ad_lines(sample_srt_content): """ - Test the remove_ads function. + Test the remove_ad_lines function. Args: sample_srt_content (str): The sample SRT content. """ - subs = pysrt.from_string(sample_srt_content) - subs_expected_ammount = 2 - assert remove_ads(subs) is True - assert len(subs) == subs_expected_ammount + subtitle_data = pysrt.from_string(sample_srt_content) + expected_subtitle_count = 2 + assert remove_ad_lines(subtitle_data) is True + assert len(subtitle_data) == expected_subtitle_count - subs = pysrt.from_string("1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.") - assert remove_ads(subs) is False - assert len(subs) == 1 + subtitle_data = pysrt.from_string("1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.") + assert remove_ad_lines(subtitle_data) is False + assert len(subtitle_data) == 1 -def test_process_file_no_modification(tmpdir, sample_srt_content): +def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content): """ - Test the process_file function when the file does not require modification. + Test the process_subtitle_file function when the file does not require modification. Args: tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file. sample_srt_content (str): The sample SRT content. """ - file_path = create_sample_srt_file(tmpdir, sample_srt_content) - with patch("src.subscleaner.subscleaner.is_already_processed", return_value=True): - assert process_file(file_path) is False + subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) + with patch("src.subscleaner.subscleaner.is_processed_before", return_value=True): + assert process_subtitle_file(subtitle_file) is False -def test_process_file_with_modification(tmpdir, sample_srt_content): +def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content): """ - Test the process_file function when the file requires modification. + Test the process_subtitle_file function when the file requires modification. Args: tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file. sample_srt_content (str): The sample SRT content. """ - file_path = create_sample_srt_file(tmpdir, sample_srt_content) - with patch("src.subscleaner.subscleaner.is_already_processed", return_value=False): - assert process_file(file_path) is True + subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) + with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False): + assert process_subtitle_file(subtitle_file) is True -def test_process_file_error(tmpdir): +def test_process_subtitle_file_error(tmpdir): """ - Test the process_file function when an error occurs (e.g., file not found). + Test the process_subtitle_file function when an error occurs (e.g., file not found). Args: tmpdir (pytest.fixture): A temporary directory. """ - file_path = tmpdir.join("nonexistent.srt") - assert process_file(str(file_path)) is False + subtitle_file = tmpdir.join("nonexistent.srt") + assert process_subtitle_file(str(subtitle_file)) is False -def test_process_files(tmpdir, sample_srt_content): +def test_process_subtitle_files(tmpdir, sample_srt_content): """ - Test the process_files function. + Test the process_subtitle_files function. Args: tmpdir (pytest.fixture): A temporary directory for creating the sample SRT files. sample_srt_content (str): The sample SRT content. """ - file_path1 = create_sample_srt_file(tmpdir, sample_srt_content) - file_path2 = create_sample_srt_file(tmpdir, "1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.") - with patch("src.subscleaner.subscleaner.process_file", side_effect=[True, False]): - modified_files = process_files([file_path1, file_path2]) - assert modified_files == [file_path1] + subtitle_file1 = create_sample_srt_file(tmpdir, sample_srt_content) + subtitle_file2 = create_sample_srt_file(tmpdir, "1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.") + with patch("src.subscleaner.subscleaner.process_subtitle_file", side_effect=[True, False]): + modified_subtitle_files = process_subtitle_files([subtitle_file1, subtitle_file2]) + assert modified_subtitle_files == [subtitle_file1] def test_main_no_modification(tmpdir, sample_srt_content): @@ -164,13 +164,13 @@ def test_main_no_modification(tmpdir, sample_srt_content): tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file. sample_srt_content (str): The sample SRT content. """ - file_path = create_sample_srt_file(tmpdir, sample_srt_content) + subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) with ( - patch("sys.stdin", StringIO(file_path)), - patch("src.subscleaner.subscleaner.process_files", return_value=[]) as mock_process_files, + patch("sys.stdin", StringIO(subtitle_file)), + patch("src.subscleaner.subscleaner.process_subtitle_files", return_value=[]) as mock_process_subtitle_files, ): main() - mock_process_files.assert_called_once_with([file_path]) + mock_process_subtitle_files.assert_called_once_with([subtitle_file]) def test_main_with_modification(tmpdir, sample_srt_content): @@ -181,10 +181,13 @@ def test_main_with_modification(tmpdir, sample_srt_content): tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file. sample_srt_content (str): The sample SRT content. """ - file_path = create_sample_srt_file(tmpdir, sample_srt_content) + subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) with ( - patch("sys.stdin", StringIO(file_path)), - patch("src.subscleaner.subscleaner.process_files", return_value=[file_path]) as mock_process_files, + patch("sys.stdin", StringIO(subtitle_file)), + patch( + "src.subscleaner.subscleaner.process_subtitle_files", + return_value=[subtitle_file], + ) as mock_process_subtitle_files, ): main() - mock_process_files.assert_called_once_with([file_path]) + mock_process_subtitle_files.assert_called_once_with([subtitle_file]) |