Added subscleaner script
This commit is contained in:
parent
92a05e38b4
commit
5387c348a6
0
src/subscleaner/__init__.py
Normal file
0
src/subscleaner/__init__.py
Normal file
150
src/subscleaner/subscleaner.py
Executable file
150
src/subscleaner/subscleaner.py
Executable file
@ -0,0 +1,150 @@
|
||||
#!/usr/bin/python3
|
||||
"""Main Subscleaner module."""
|
||||
|
||||
"""
|
||||
Subscleaner.
|
||||
Copyright (C) 2023 Roger Gonzalez
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
import chardet
|
||||
import pysrt
|
||||
|
||||
ADS = [
|
||||
r".*nordvpn.*",
|
||||
r".*a Card Shark AMERICASCARDROOM.*",
|
||||
r".*OpenSubtitles.*",
|
||||
r".*Advertise your product or brand here.*",
|
||||
r".*Apóyanos y conviértete en miembro VIP Para.*",
|
||||
r".*Addic7ed.*",
|
||||
r".*argenteam.*",
|
||||
r".*AllSubs.*",
|
||||
r"Created and Encoded by.*",
|
||||
r".*corrected.*by.*",
|
||||
r".*Entre a AmericasCardroom. com Hoy.*",
|
||||
r".*Everyone is intimidated by a shark. Become.*",
|
||||
r".*Juegue Poker en Línea por Dinero Real.*",
|
||||
r".*OpenSubtitles.*",
|
||||
r".*Open Subtitles.*",
|
||||
r".*MKV Player.*",
|
||||
r".*MKV player.*",
|
||||
r".*Resync.*for.*",
|
||||
r".*Resync.*improved.*",
|
||||
r".*Ripped?By.*",
|
||||
r'.*Sigue "Community" en.*',
|
||||
r".*Subtitles.*by.*",
|
||||
r".*Subt?tulos.*por.*",
|
||||
r".*Support us and become VIP member.*",
|
||||
r".*Subs.*Team.*",
|
||||
r".*subscene.*",
|
||||
r".*Subtitulado por.*",
|
||||
r".*subtitulamos.*",
|
||||
r".*Synchronized.*by.*",
|
||||
r".*Sincronizado y corregido por.*",
|
||||
r".*subdivx.*",
|
||||
r".*Sync.*Corrected.*",
|
||||
r".*Sync.*corrections.*by.*",
|
||||
r".*sync and corrections by.*" r".*Sync.*by.*",
|
||||
r".*Una.*traducci?n.*de.*",
|
||||
r".*tvsubtitles.*",
|
||||
r".*Una.*traducci?n.*de.*",
|
||||
"Tacho8",
|
||||
r".*www. com.*",
|
||||
r".*www. es.*",
|
||||
]
|
||||
|
||||
|
||||
def ads_in_line(line: str) -> bool:
|
||||
"""
|
||||
Check if the given line contains an ad.
|
||||
|
||||
Args:
|
||||
line (str): The line of text to be checked.
|
||||
|
||||
Returns:
|
||||
bool: True if the line contains an ad, False otherwise.
|
||||
"""
|
||||
return any(re.match(ad, line, re.DOTALL) for ad in ADS)
|
||||
|
||||
|
||||
def process_file(filename):
|
||||
"""
|
||||
Process a subtitle file to remove ads.
|
||||
|
||||
Args:
|
||||
filename (str): The path to the subtitle file.
|
||||
|
||||
Returns:
|
||||
bool: True if the file was modified, False otherwise.
|
||||
"""
|
||||
try:
|
||||
created = os.path.getctime(filename)
|
||||
already_processed = time.mktime(
|
||||
time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
|
||||
)
|
||||
if created < already_processed:
|
||||
print(f"Already processed {filename}")
|
||||
return False
|
||||
|
||||
print(f"Analyzing: {filename}")
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
encoding = chardet.detect(f.read())["encoding"]
|
||||
|
||||
subs = pysrt.open(filename, encoding=encoding)
|
||||
modified = False
|
||||
for i, line in enumerate(subs):
|
||||
if ads_in_line(line.text):
|
||||
print(f"Removing: {line}\n")
|
||||
del subs[i]
|
||||
modified = True
|
||||
|
||||
if modified:
|
||||
print(f"Saving {filename}")
|
||||
subs.save(filename)
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error processing {filename}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Process subtitle files to remove ads.
|
||||
|
||||
Read filenames from standard input, process each file to remove ads,
|
||||
and print the result. Keep track of the modified files and print
|
||||
a summary at the end.
|
||||
"""
|
||||
modified_files = []
|
||||
print("Starting script")
|
||||
for filename in sys.stdin:
|
||||
filename = filename.strip()
|
||||
if process_file(filename):
|
||||
modified_files.append(filename)
|
||||
|
||||
if modified_files:
|
||||
print(f"Modified {len(modified_files)} files")
|
||||
print("Done")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user