summaryrefslogtreecommitdiff
path: root/src/subscleaner/subscleaner.py
diff options
context:
space:
mode:
authorRoger Gonzalez <roger@rogs.me>2024-03-03 17:52:17 -0300
committerRoger Gonzalez <roger@rogs.me>2024-03-03 17:52:17 -0300
commit5387c348a6a8401bad2f717f4d464f1549d5b405 (patch)
tree31ab8c8b6f78b02efdb5b14d6c8e3bca18e87580 /src/subscleaner/subscleaner.py
parent92a05e38b4dd53de30c39a8af6cd32f7c55972b2 (diff)
Added subscleaner script
Diffstat (limited to 'src/subscleaner/subscleaner.py')
-rwxr-xr-xsrc/subscleaner/subscleaner.py150
1 files changed, 150 insertions, 0 deletions
diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py
new file mode 100755
index 0000000..6e92fd5
--- /dev/null
+++ b/src/subscleaner/subscleaner.py
@@ -0,0 +1,150 @@
+#!/usr/bin/python3
+"""Main Subscleaner module."""
+
+"""
+Subscleaner.
+Copyright (C) 2023 Roger Gonzalez
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""
+
+import os
+import re
+import sys
+import time
+
+import chardet
+import pysrt
+
+ADS = [
+ r".*nordvpn.*",
+ r".*a Card Shark AMERICASCARDROOM.*",
+ r".*OpenSubtitles.*",
+ r".*Advertise your product or brand here.*",
+ r".*Apóyanos y conviértete en miembro VIP Para.*",
+ r".*Addic7ed.*",
+ r".*argenteam.*",
+ r".*AllSubs.*",
+ r"Created and Encoded by.*",
+ r".*corrected.*by.*",
+ r".*Entre a AmericasCardroom. com Hoy.*",
+ r".*Everyone is intimidated by a shark. Become.*",
+ r".*Juegue Poker en Línea por Dinero Real.*",
+ r".*OpenSubtitles.*",
+ r".*Open Subtitles.*",
+ r".*MKV Player.*",
+ r".*MKV player.*",
+ r".*Resync.*for.*",
+ r".*Resync.*improved.*",
+ r".*Ripped?By.*",
+ r'.*Sigue "Community" en.*',
+ r".*Subtitles.*by.*",
+ r".*Subt?tulos.*por.*",
+ r".*Support us and become VIP member.*",
+ r".*Subs.*Team.*",
+ r".*subscene.*",
+ r".*Subtitulado por.*",
+ r".*subtitulamos.*",
+ r".*Synchronized.*by.*",
+ r".*Sincronizado y corregido por.*",
+ r".*subdivx.*",
+ r".*Sync.*Corrected.*",
+ r".*Sync.*corrections.*by.*",
+ r".*sync and corrections by.*" r".*Sync.*by.*",
+ r".*Una.*traducci?n.*de.*",
+ r".*tvsubtitles.*",
+ r".*Una.*traducci?n.*de.*",
+ "Tacho8",
+ r".*www. com.*",
+ r".*www. es.*",
+]
+
+
+def ads_in_line(line: str) -> bool:
+ """
+ Check if the given line contains an ad.
+
+ Args:
+ line (str): The line of text to be checked.
+
+ Returns:
+ bool: True if the line contains an ad, False otherwise.
+ """
+ return any(re.match(ad, line, re.DOTALL) for ad in ADS)
+
+
+def process_file(filename):
+ """
+ Process a subtitle file to remove ads.
+
+ Args:
+ filename (str): The path to the subtitle file.
+
+ Returns:
+ bool: True if the file was modified, False otherwise.
+ """
+ try:
+ created = os.path.getctime(filename)
+ already_processed = time.mktime(
+ time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
+ )
+ if created < already_processed:
+ print(f"Already processed {filename}")
+ return False
+
+ print(f"Analyzing: {filename}")
+
+ with open(filename, "rb") as f:
+ encoding = chardet.detect(f.read())["encoding"]
+
+ subs = pysrt.open(filename, encoding=encoding)
+ modified = False
+ for i, line in enumerate(subs):
+ if ads_in_line(line.text):
+ print(f"Removing: {line}\n")
+ del subs[i]
+ modified = True
+
+ if modified:
+ print(f"Saving {filename}")
+ subs.save(filename)
+ return True
+ return False
+ except Exception as e:
+ print(f"Error processing {filename}: {e}")
+ return False
+
+
+def main():
+ """
+ Process subtitle files to remove ads.
+
+ Read filenames from standard input, process each file to remove ads,
+ and print the result. Keep track of the modified files and print
+ a summary at the end.
+ """
+ modified_files = []
+ print("Starting script")
+ for filename in sys.stdin:
+ filename = filename.strip()
+ if process_file(filename):
+ modified_files.append(filename)
+
+ if modified_files:
+ print(f"Modified {len(modified_files)} files")
+ print("Done")
+
+
+if __name__ == "__main__":
+ main()