summaryrefslogtreecommitdiff
path: root/src/subscleaner/subscleaner.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/subscleaner/subscleaner.py')
-rwxr-xr-xsrc/subscleaner/subscleaner.py98
1 files changed, 74 insertions, 24 deletions
diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py
index 6e92fd5..0896d6c 100755
--- a/src/subscleaner/subscleaner.py
+++ b/src/subscleaner/subscleaner.py
@@ -1,4 +1,3 @@
-#!/usr/bin/python3
"""Main Subscleaner module."""
"""
@@ -84,7 +83,57 @@ def ads_in_line(line: str) -> bool:
return any(re.match(ad, line, re.DOTALL) for ad in ADS)
-def process_file(filename):
+def is_already_processed(filename: str) -> bool:
+ """
+ Check if the file has already been processed.
+
+ Args:
+ filename (str): The path to the subtitle file.
+
+ Returns:
+ bool: True if the file has already been processed, False otherwise.
+ """
+ created = os.path.getctime(filename)
+ already_processed = time.mktime(
+ time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
+ )
+ return created < already_processed
+
+
+def detect_encoding(filename: str) -> str:
+ """
+ Detect the encoding of the subtitle file.
+
+ Args:
+ filename (str): The path to the subtitle file.
+
+ Returns:
+ str: The detected encoding of the file.
+ """
+ with open(filename, "rb") as f:
+ return chardet.detect(f.read())["encoding"]
+
+
+def remove_ads(subs: pysrt.SubRipFile) -> bool:
+ """
+ Remove ads from the subtitle file.
+
+ Args:
+ subs (pysrt.SubRipFile): The subtitle file object.
+
+ Returns:
+ bool: True if the file was modified, False otherwise.
+ """
+ modified = False
+ for i, line in enumerate(subs):
+ if ads_in_line(line.text):
+ print(f"Removing: {line}\n")
+ del subs[i]
+ modified = True
+ return modified
+
+
+def process_file(filename: str) -> bool:
"""
Process a subtitle file to remove ads.
@@ -95,28 +144,16 @@ def process_file(filename):
bool: True if the file was modified, False otherwise.
"""
try:
- created = os.path.getctime(filename)
- already_processed = time.mktime(
- time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
- )
- if created < already_processed:
+ if is_already_processed(filename):
print(f"Already processed {filename}")
return False
print(f"Analyzing: {filename}")
- with open(filename, "rb") as f:
- encoding = chardet.detect(f.read())["encoding"]
-
+ encoding = detect_encoding(filename)
subs = pysrt.open(filename, encoding=encoding)
- modified = False
- for i, line in enumerate(subs):
- if ads_in_line(line.text):
- print(f"Removing: {line}\n")
- del subs[i]
- modified = True
-
- if modified:
+
+ if remove_ads(subs):
print(f"Saving {filename}")
subs.save(filename)
return True
@@ -126,6 +163,23 @@ def process_file(filename):
return False
+def process_files(filenames: list[str]) -> list[str]:
+ """
+ Process multiple subtitle files to remove ads.
+
+ Args:
+ filenames (list[str]): A list of subtitle file paths.
+
+ Returns:
+ list[str]: A list of modified file paths.
+ """
+ modified_files = []
+ for filename in filenames:
+ if process_file(filename):
+ modified_files.append(filename)
+ return modified_files
+
+
def main():
"""
Process subtitle files to remove ads.
@@ -134,13 +188,9 @@ def main():
and print the result. Keep track of the modified files and print
a summary at the end.
"""
- modified_files = []
+ filenames = [filename.strip() for filename in sys.stdin]
print("Starting script")
- for filename in sys.stdin:
- filename = filename.strip()
- if process_file(filename):
- modified_files.append(filename)
-
+ modified_files = process_files(filenames)
if modified_files:
print(f"Modified {len(modified_files)} files")
print("Done")