summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsrc/subscleaner/subscleaner.py100
-rw-r--r--tests/test_subscleaner.py121
2 files changed, 112 insertions, 109 deletions
diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py
index 504d43f..f79e10d 100755
--- a/src/subscleaner/subscleaner.py
+++ b/src/subscleaner/subscleaner.py
@@ -26,7 +26,7 @@ import time
import chardet
import pysrt
-ADS = [
+AD_PATTERNS = [
re.compile(r"\bnordvpn\b", re.IGNORECASE),
re.compile(r"\ba Card Shark AMERICASCARDROOM\b", re.IGNORECASE),
re.compile(r"\bOpenSubtitles\b", re.IGNORECASE),
@@ -68,127 +68,127 @@ ADS = [
]
-def ads_in_line(line: str) -> bool:
+def contains_ad(subtitle_line: str) -> bool:
"""
- Check if the given line contains an ad.
+ Check if the given subtitle line contains an ad.
Args:
- line (str): The line of text to be checked.
+ subtitle_line (str): The subtitle line to be checked.
Returns:
- bool: True if the line contains an ad, False otherwise.
+ bool: True if the subtitle line contains an ad, False otherwise.
"""
- return any(ad.search(line) for ad in ADS)
+ return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS)
-def is_already_processed(filename: str) -> bool:
+def is_processed_before(subtitle_file: str) -> bool:
"""
- Check if the file has already been processed.
+ Check if the subtitle file has already been processed.
Args:
- filename (str): The path to the subtitle file.
+ subtitle_file (str): The path to the subtitle file.
Returns:
- bool: True if the file has already been processed, False otherwise.
+ bool: True if the subtitle file has already been processed, False otherwise.
"""
- created = os.path.getctime(filename)
- already_processed = time.mktime(
+ file_creation_time = os.path.getctime(subtitle_file)
+ processed_timestamp = time.mktime(
time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
)
- return created < already_processed
+ return file_creation_time < processed_timestamp
-def detect_encoding(filename: str) -> str:
+def get_encoding(subtitle_file: str) -> str:
"""
Detect the encoding of the subtitle file.
Args:
- filename (str): The path to the subtitle file.
+ subtitle_file (str): The path to the subtitle file.
Returns:
- str: The detected encoding of the file.
+ str: The detected encoding of the subtitle file.
"""
- with open(filename, "rb") as f:
- return chardet.detect(f.read())["encoding"]
+ with open(subtitle_file, "rb") as file:
+ return chardet.detect(file.read())["encoding"]
-def remove_ads(subs: pysrt.SubRipFile) -> bool:
+def remove_ad_lines(subtitle_data: pysrt.SubRipFile) -> bool:
"""
- Remove ads from the subtitle file.
+ Remove ad lines from the subtitle data.
Args:
- subs (pysrt.SubRipFile): The subtitle file object.
+ subtitle_data (pysrt.SubRipFile): The subtitle data object.
Returns:
- bool: True if the file was modified, False otherwise.
+ bool: True if the subtitle data was modified, False otherwise.
"""
modified = False
- for i, line in enumerate(subs):
- if ads_in_line(line.text):
- print(f"Removing: {line}\n")
- del subs[i]
+ for index, subtitle in enumerate(subtitle_data):
+ if contains_ad(subtitle.text):
+ print(f"Removing: {subtitle}\n")
+ del subtitle_data[index]
modified = True
return modified
-def process_file(filename: str) -> bool:
+def process_subtitle_file(subtitle_file: str) -> bool:
"""
- Process a subtitle file to remove ads.
+ Process a subtitle file to remove ad lines.
Args:
- filename (str): The path to the subtitle file.
+ subtitle_file (str): The path to the subtitle file.
Returns:
- bool: True if the file was modified, False otherwise.
+ bool: True if the subtitle file was modified, False otherwise.
"""
try:
- if is_already_processed(filename):
- print(f"Already processed {filename}")
+ if is_processed_before(subtitle_file):
+ print(f"Already processed {subtitle_file}")
return False
- print(f"Analyzing: {filename}")
+ print(f"Analyzing: {subtitle_file}")
- encoding = detect_encoding(filename)
- subs = pysrt.open(filename, encoding=encoding)
+ encoding = get_encoding(subtitle_file)
+ subtitle_data = pysrt.open(subtitle_file, encoding=encoding)
- if remove_ads(subs):
- print(f"Saving {filename}")
- subs.save(filename)
+ if remove_ad_lines(subtitle_data):
+ print(f"Saving {subtitle_file}")
+ subtitle_data.save(subtitle_file)
return True
return False
except Exception as e:
- print(f"Error processing {filename}: {e}")
+ print(f"Error processing {subtitle_file}: {e}")
return False
-def process_files(filenames: list[str]) -> list[str]:
+def process_subtitle_files(subtitle_files: list[str]) -> list[str]:
"""
- Process multiple subtitle files to remove ads.
+ Process multiple subtitle files to remove ad lines.
Args:
- filenames (list[str]): A list of subtitle file paths.
+ subtitle_files (list[str]): A list of subtitle file paths.
Returns:
- list[str]: A list of modified file paths.
+ list[str]: A list of modified subtitle file paths.
"""
modified_files = []
- for filename in filenames:
- if process_file(filename):
- modified_files.append(filename)
+ for subtitle_file in subtitle_files:
+ if process_subtitle_file(subtitle_file):
+ modified_files.append(subtitle_file)
return modified_files
def main():
"""
- Process subtitle files to remove ads.
+ Process subtitle files to remove ad lines.
- Read filenames from standard input, process each file to remove ads,
+ Read subtitle file paths from standard input, process each file to remove ad lines,
and print the result. Keep track of the modified files and print
a summary at the end.
"""
- filenames = [filename.strip() for filename in sys.stdin]
+ subtitle_files = [file_path.strip() for file_path in sys.stdin]
print("Starting script")
- modified_files = process_files(filenames)
+ modified_files = process_subtitle_files(subtitle_files)
if modified_files:
print(f"Modified {len(modified_files)} files")
print("Done")
diff --git a/tests/test_subscleaner.py b/tests/test_subscleaner.py
index 6172ff8..54cb521 100644
--- a/tests/test_subscleaner.py
+++ b/tests/test_subscleaner.py
@@ -7,13 +7,13 @@ import pysrt
import pytest
from src.subscleaner.subscleaner import (
- ads_in_line,
- detect_encoding,
- is_already_processed,
+ contains_ad,
+ get_encoding,
+ is_processed_before,
main,
- process_file,
- process_files,
- remove_ads,
+ process_subtitle_file,
+ process_subtitle_files,
+ remove_ad_lines,
)
@@ -42,118 +42,118 @@ def create_sample_srt_file(tmpdir, content):
@pytest.mark.parametrize(
- "line, expected",
+ "subtitle_line, expected_result",
[
("This is a normal line", False),
("This line contains OpenSubtitles", True),
("Subtitles by XYZ", True),
],
)
-def test_ads_in_line(line, expected):
+def test_contains_ad(subtitle_line, expected_result):
"""
- Test the ads_in_line function with different input lines and expected results.
+ Test the contains_ad function with different subtitle lines and expected results.
Args:
- line (str): The input line to be tested.
- expected (bool): The expected result (True if the line contains an ad, False otherwise).
+ subtitle_line (str): The subtitle line to be tested.
+ expected_result (bool): The expected result (True if the line contains an ad, False otherwise).
"""
- assert ads_in_line(line) is expected
+ assert contains_ad(subtitle_line) is expected_result
-def test_is_already_processed(tmpdir):
+def test_is_processed_before(tmpdir):
"""
- Test the is_already_processed function.
+ Test the is_processed_before function.
Args:
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
"""
- file_path = create_sample_srt_file(tmpdir, "")
+ subtitle_file = create_sample_srt_file(tmpdir, "")
with patch("src.subscleaner.subscleaner.os.path.getctime", return_value=0):
- assert is_already_processed(file_path) is True
+ assert is_processed_before(subtitle_file) is True
with patch("src.subscleaner.subscleaner.os.path.getctime", return_value=9999999999):
- assert is_already_processed(file_path) is False
+ assert is_processed_before(subtitle_file) is False
-def test_detect_encoding(tmpdir, sample_srt_content):
+def test_get_encoding(tmpdir, sample_srt_content):
"""
- Test the detect_encoding function.
+ Test the get_encoding function.
Args:
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
sample_srt_content (str): The sample SRT content.
"""
- file_path = create_sample_srt_file(tmpdir, sample_srt_content)
- assert detect_encoding(file_path) == "ascii"
+ subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
+ assert get_encoding(subtitle_file) == "ascii"
-def test_remove_ads(sample_srt_content):
+def test_remove_ad_lines(sample_srt_content):
"""
- Test the remove_ads function.
+ Test the remove_ad_lines function.
Args:
sample_srt_content (str): The sample SRT content.
"""
- subs = pysrt.from_string(sample_srt_content)
- subs_expected_ammount = 2
- assert remove_ads(subs) is True
- assert len(subs) == subs_expected_ammount
+ subtitle_data = pysrt.from_string(sample_srt_content)
+ expected_subtitle_count = 2
+ assert remove_ad_lines(subtitle_data) is True
+ assert len(subtitle_data) == expected_subtitle_count
- subs = pysrt.from_string("1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.")
- assert remove_ads(subs) is False
- assert len(subs) == 1
+ subtitle_data = pysrt.from_string("1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.")
+ assert remove_ad_lines(subtitle_data) is False
+ assert len(subtitle_data) == 1
-def test_process_file_no_modification(tmpdir, sample_srt_content):
+def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content):
"""
- Test the process_file function when the file does not require modification.
+ Test the process_subtitle_file function when the file does not require modification.
Args:
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
sample_srt_content (str): The sample SRT content.
"""
- file_path = create_sample_srt_file(tmpdir, sample_srt_content)
- with patch("src.subscleaner.subscleaner.is_already_processed", return_value=True):
- assert process_file(file_path) is False
+ subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
+ with patch("src.subscleaner.subscleaner.is_processed_before", return_value=True):
+ assert process_subtitle_file(subtitle_file) is False
-def test_process_file_with_modification(tmpdir, sample_srt_content):
+def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content):
"""
- Test the process_file function when the file requires modification.
+ Test the process_subtitle_file function when the file requires modification.
Args:
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
sample_srt_content (str): The sample SRT content.
"""
- file_path = create_sample_srt_file(tmpdir, sample_srt_content)
- with patch("src.subscleaner.subscleaner.is_already_processed", return_value=False):
- assert process_file(file_path) is True
+ subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
+ with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
+ assert process_subtitle_file(subtitle_file) is True
-def test_process_file_error(tmpdir):
+def test_process_subtitle_file_error(tmpdir):
"""
- Test the process_file function when an error occurs (e.g., file not found).
+ Test the process_subtitle_file function when an error occurs (e.g., file not found).
Args:
tmpdir (pytest.fixture): A temporary directory.
"""
- file_path = tmpdir.join("nonexistent.srt")
- assert process_file(str(file_path)) is False
+ subtitle_file = tmpdir.join("nonexistent.srt")
+ assert process_subtitle_file(str(subtitle_file)) is False
-def test_process_files(tmpdir, sample_srt_content):
+def test_process_subtitle_files(tmpdir, sample_srt_content):
"""
- Test the process_files function.
+ Test the process_subtitle_files function.
Args:
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT files.
sample_srt_content (str): The sample SRT content.
"""
- file_path1 = create_sample_srt_file(tmpdir, sample_srt_content)
- file_path2 = create_sample_srt_file(tmpdir, "1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.")
- with patch("src.subscleaner.subscleaner.process_file", side_effect=[True, False]):
- modified_files = process_files([file_path1, file_path2])
- assert modified_files == [file_path1]
+ subtitle_file1 = create_sample_srt_file(tmpdir, sample_srt_content)
+ subtitle_file2 = create_sample_srt_file(tmpdir, "1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.")
+ with patch("src.subscleaner.subscleaner.process_subtitle_file", side_effect=[True, False]):
+ modified_subtitle_files = process_subtitle_files([subtitle_file1, subtitle_file2])
+ assert modified_subtitle_files == [subtitle_file1]
def test_main_no_modification(tmpdir, sample_srt_content):
@@ -164,13 +164,13 @@ def test_main_no_modification(tmpdir, sample_srt_content):
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
sample_srt_content (str): The sample SRT content.
"""
- file_path = create_sample_srt_file(tmpdir, sample_srt_content)
+ subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
with (
- patch("sys.stdin", StringIO(file_path)),
- patch("src.subscleaner.subscleaner.process_files", return_value=[]) as mock_process_files,
+ patch("sys.stdin", StringIO(subtitle_file)),
+ patch("src.subscleaner.subscleaner.process_subtitle_files", return_value=[]) as mock_process_subtitle_files,
):
main()
- mock_process_files.assert_called_once_with([file_path])
+ mock_process_subtitle_files.assert_called_once_with([subtitle_file])
def test_main_with_modification(tmpdir, sample_srt_content):
@@ -181,10 +181,13 @@ def test_main_with_modification(tmpdir, sample_srt_content):
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
sample_srt_content (str): The sample SRT content.
"""
- file_path = create_sample_srt_file(tmpdir, sample_srt_content)
+ subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
with (
- patch("sys.stdin", StringIO(file_path)),
- patch("src.subscleaner.subscleaner.process_files", return_value=[file_path]) as mock_process_files,
+ patch("sys.stdin", StringIO(subtitle_file)),
+ patch(
+ "src.subscleaner.subscleaner.process_subtitle_files",
+ return_value=[subtitle_file],
+ ) as mock_process_subtitle_files,
):
main()
- mock_process_files.assert_called_once_with([file_path])
+ mock_process_subtitle_files.assert_called_once_with([subtitle_file])