Compare commits
15 Commits
Author | SHA1 | Date | |
---|---|---|---|
254e6b9c26 | |||
5dbfbcf888 | |||
0a51edb883 | |||
fb14e7f5f8 | |||
7a463713c2 | |||
cda431aa85 | |||
f54223db41 | |||
1c7fba5f5b | |||
0217b0b5a1 | |||
73fdc2e44f | |||
51b0628694 | |||
e8265ca061 | |||
a1e46ff198 | |||
6885a0c491 | |||
518146097e |
@ -9,7 +9,9 @@ RUN apk add --no-cache curl \
|
||||
&& mv supercronic-linux-amd64 /usr/local/bin/supercronic \
|
||||
&& apk del curl
|
||||
|
||||
RUN mkdir -p /data
|
||||
|
||||
RUN pip install --no-cache-dir subscleaner
|
||||
|
||||
CMD echo "${CRON:-0 0 * * *} find /files -name \"*.srt\" | $(which subscleaner)" > /crontab && \
|
||||
CMD echo "${CRON:-0 0 * * *} find /files -name \"*.srt\" | $(which subscleaner) --db-location /data/subscleaner.db" > /crontab && \
|
||||
/usr/local/bin/supercronic /crontab
|
||||
|
54
README.md
54
README.md
@ -90,6 +90,18 @@ docker run -e CRON="0 0 * * *" -v /your/media/location:/files -v /etc/localtime:
|
||||
|
||||
The Docker container will run the Subscleaner script according to the specified cron schedule and process the subtitle files in the mounted media directory.
|
||||
|
||||
#### Database Persistence in Docker
|
||||
|
||||
By default, the Docker container uses an internal database that will be lost when the container is removed. To maintain a persistent database across container restarts, you should mount a volume for the database:
|
||||
|
||||
``` sh
|
||||
docker run -e CRON="0 0 * * *" \
|
||||
-v /your/media/location:/files \
|
||||
-v /path/for/database:/data \
|
||||
-v /etc/localtime:/etc/localtime:ro \
|
||||
rogsme/subscleaner
|
||||
```
|
||||
|
||||
#### If you are using YAMS
|
||||
|
||||
YAMS is a highly opinionated media server. You can know more about it here: https://yams.media/
|
||||
@ -103,9 +115,12 @@ Add this container to your `docker-compose.custom.yaml`:
|
||||
- CRON=0 0 * * *
|
||||
volumes:
|
||||
- ${MEDIA_DIRECTORY}:/files
|
||||
- ./subscleaner-data:/data
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
```
|
||||
|
||||
This ensures that the database is preserved between container restarts, preventing unnecessary reprocessing of subtitle files.
|
||||
|
||||
To get more information on how to add your own containers in YAMS: https://yams.media/advanced/add-your-own-containers/
|
||||
|
||||
## Contributing
|
||||
@ -121,3 +136,42 @@ Subscleaner is licensed under the GNU General Public License v3.0 or later. See
|
||||
This repository is a rewrite of this Github repository: https://github.com/FraMecca/subscleaner.
|
||||
|
||||
Thanks to [FraMecca](https://github.com/FraMecca/) in Github!
|
||||
|
||||
## Database and Caching
|
||||
|
||||
Subscleaner now uses a SQLite database to track processed files, which significantly improves performance by avoiding redundant processing of unchanged subtitle files.
|
||||
|
||||
### How it works
|
||||
|
||||
1. When Subscleaner processes a subtitle file, it generates an MD5 hash of the file content.
|
||||
2. This hash is stored in a SQLite database along with the file path.
|
||||
3. On subsequent runs, Subscleaner checks if the file has already been processed by comparing the current hash with the stored hash.
|
||||
4. If the file hasn't changed, it's skipped, saving processing time.
|
||||
|
||||
### Database Location
|
||||
|
||||
The SQLite database is stored in the following locations, depending on your operating system:
|
||||
|
||||
- **Linux**: `~/.local/share/subscleaner/subscleaner.db`
|
||||
- **macOS**: `~/Library/Application Support/subscleaner/subscleaner.db`
|
||||
- **Windows**: `C:\Users\<username>\AppData\Local\subscleaner\subscleaner\subscleaner.db`
|
||||
|
||||
### Command Line Options
|
||||
|
||||
Several command line options are available:
|
||||
|
||||
- `--db-location`: Specify a custom location for the database file
|
||||
- `--force`: Processes all files regardless of whether they've been processed before
|
||||
- `--reset-db`: Reset the database (remove all stored file hashes)
|
||||
- `--list-patterns`: List all advertisement patterns being used
|
||||
- `--version`: Show version information and exit
|
||||
- `-v`, `--verbose`: Increase output verbosity (show analyzing/skipping messages)
|
||||
|
||||
Example usage:
|
||||
```sh
|
||||
find /your/media/location -name "*.srt" | subscleaner --force
|
||||
find /your/media/location -name "*.srt" | subscleaner --db-location /path/to/custom/database.db
|
||||
find /your/media/location -name "*.srt" | subscleaner --verbose
|
||||
```
|
||||
|
||||
This feature makes Subscleaner more efficient, especially when running regularly via cron jobs or other scheduled tasks, as it will only process new or modified subtitle files.
|
||||
|
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "subscleaner"
|
||||
version = "1.3.0"
|
||||
version = "2.1.1"
|
||||
description = "Remove advertisements from subtitle files"
|
||||
authors = [
|
||||
{name = "Roger Gonzalez", email = "roger@rogs.me"}
|
||||
@ -17,6 +17,7 @@ classifiers = [
|
||||
dependencies = [
|
||||
"pysrt>=1.1.2",
|
||||
"chardet>=5.2.0",
|
||||
"appdirs>=1.4.4",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
@ -0,0 +1,3 @@
|
||||
"""Subscleaner package."""
|
||||
|
||||
__version__ = "2.1.1"
|
@ -18,14 +18,16 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import hashlib
|
||||
import pathlib
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
import time
|
||||
|
||||
import chardet
|
||||
import pysrt
|
||||
from appdirs import user_data_dir
|
||||
|
||||
AD_PATTERNS = [
|
||||
re.compile(r"\bnordvpn\b", re.IGNORECASE),
|
||||
@ -98,6 +100,120 @@ AD_PATTERNS = [
|
||||
]
|
||||
|
||||
|
||||
def get_db_path(db_location=None):
|
||||
"""
|
||||
Get the path to the SQLite database.
|
||||
|
||||
Args:
|
||||
db_location (str, optional): Custom path for the database file.
|
||||
|
||||
Returns:
|
||||
pathlib.Path: The path to the database file.
|
||||
"""
|
||||
if db_location:
|
||||
db_path = pathlib.Path(db_location)
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
return db_path
|
||||
|
||||
app_data_dir = pathlib.Path(user_data_dir("subscleaner", "subscleaner"))
|
||||
app_data_dir.mkdir(parents=True, exist_ok=True)
|
||||
return app_data_dir / "subscleaner.db"
|
||||
|
||||
|
||||
def init_db(db_path):
|
||||
"""
|
||||
Initialize the database if it doesn't exist.
|
||||
|
||||
Args:
|
||||
db_path (pathlib.Path): The path to the database file.
|
||||
"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS processed_files (
|
||||
file_path TEXT PRIMARY KEY,
|
||||
file_hash TEXT NOT NULL,
|
||||
processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_file_hash(file_path):
|
||||
"""
|
||||
Generate an MD5 hash of the file content.
|
||||
|
||||
Args:
|
||||
file_path (pathlib.Path): The path to the file.
|
||||
|
||||
Returns:
|
||||
str: The MD5 hash of the file content.
|
||||
"""
|
||||
hash_md5 = hashlib.md5()
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
except Exception as e:
|
||||
print(f"Error generating hash for {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def is_file_processed(db_path, file_path, file_hash):
|
||||
"""
|
||||
Check if the file has been processed before.
|
||||
|
||||
Args:
|
||||
db_path (pathlib.Path): The path to the database file.
|
||||
file_path (str): The path to the file.
|
||||
file_hash (str): The MD5 hash of the file content.
|
||||
|
||||
Returns:
|
||||
bool: True if the file has been processed before, False otherwise.
|
||||
"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute(
|
||||
"SELECT file_hash FROM processed_files WHERE file_path = ?",
|
||||
(str(file_path),),
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
|
||||
conn.close()
|
||||
|
||||
if result is None:
|
||||
return False
|
||||
|
||||
# If the hash has changed, the file has been modified
|
||||
return result[0] == file_hash
|
||||
|
||||
|
||||
def mark_file_processed(db_path, file_path, file_hash):
|
||||
"""
|
||||
Mark the file as processed in the database.
|
||||
|
||||
Args:
|
||||
db_path (pathlib.Path): The path to the database file.
|
||||
file_path (str): The path to the file.
|
||||
file_hash (str): The MD5 hash of the file content.
|
||||
"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute(
|
||||
"INSERT OR REPLACE INTO processed_files (file_path, file_hash) VALUES (?, ?)",
|
||||
(str(file_path), file_hash),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def contains_ad(subtitle_line: str) -> bool:
|
||||
"""
|
||||
Check if the given subtitle line contains an ad.
|
||||
@ -111,27 +227,6 @@ def contains_ad(subtitle_line: str) -> bool:
|
||||
return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS)
|
||||
|
||||
|
||||
def is_processed_before(subtitle_file: pathlib.Path) -> bool:
|
||||
"""
|
||||
Check if the subtitle file has already been processed.
|
||||
|
||||
Args:
|
||||
subtitle_file (pathlib.Path): The path to the subtitle file.
|
||||
|
||||
Returns:
|
||||
bool: True if the subtitle file has already been processed, False otherwise.
|
||||
"""
|
||||
try:
|
||||
file_creation_time = os.path.getctime(subtitle_file)
|
||||
processed_timestamp = time.mktime(
|
||||
time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
|
||||
)
|
||||
return file_creation_time < processed_timestamp
|
||||
except Exception as e:
|
||||
print(f"Error checking if file was processed before: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_encoding(subtitle_file: pathlib.Path) -> str:
|
||||
"""
|
||||
Detect the encoding of the subtitle file.
|
||||
@ -175,77 +270,210 @@ def remove_ad_lines(subtitle_data: pysrt.SubRipFile) -> bool:
|
||||
return modified
|
||||
|
||||
|
||||
def process_subtitle_file(subtitle_file_path: str) -> bool:
|
||||
def is_already_processed(subtitle_file, db_path, file_hash, force=False, verbose=False):
|
||||
"""
|
||||
Check if the subtitle file has already been processed.
|
||||
|
||||
This function checks the database to determine if a file has already been processed.
|
||||
|
||||
Args:
|
||||
subtitle_file (pathlib.Path): The path to the subtitle file.
|
||||
db_path (pathlib.Path): The path to the database file.
|
||||
file_hash (str): The MD5 hash of the file content.
|
||||
force (bool): If True, ignore previous processing status.
|
||||
|
||||
Returns:
|
||||
bool: True if the file has already been processed, False otherwise.
|
||||
"""
|
||||
if force:
|
||||
return False
|
||||
|
||||
# Check if the file is in the database with the same hash
|
||||
if is_file_processed(db_path, str(subtitle_file), file_hash):
|
||||
if verbose:
|
||||
print(f"Already processed {subtitle_file} (hash match)")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def process_subtitle_file(subtitle_file_path: str, db_path, force=False, verbose=False) -> bool:
|
||||
"""
|
||||
Process a subtitle file to remove ad lines.
|
||||
|
||||
Args:
|
||||
subtitle_file_path (str): The path to the subtitle file.
|
||||
db_path (pathlib.Path): The path to the database file.
|
||||
force (bool): If True, process the file even if it has been processed before.
|
||||
verbose (bool): If True, print detailed processing information.
|
||||
|
||||
Returns:
|
||||
bool: True if the subtitle file was modified, False otherwise.
|
||||
"""
|
||||
try:
|
||||
subtitle_file = pathlib.Path(subtitle_file_path)
|
||||
if verbose:
|
||||
print(f"Analyzing: {subtitle_file}")
|
||||
|
||||
print(f"Analyzing: {subtitle_file}")
|
||||
|
||||
# Early validation checks
|
||||
if not subtitle_file.exists():
|
||||
print(f"File not found: {subtitle_file}")
|
||||
return False
|
||||
|
||||
if is_processed_before(subtitle_file):
|
||||
print(f"Already processed {subtitle_file}")
|
||||
# Get file hash and check if already processed
|
||||
file_hash = get_file_hash(subtitle_file)
|
||||
if file_hash is None or is_already_processed(subtitle_file, db_path, file_hash, force):
|
||||
return False
|
||||
|
||||
# Process the subtitle file
|
||||
modified = False
|
||||
encoding = get_encoding(subtitle_file)
|
||||
|
||||
# Try to open the subtitle file
|
||||
subtitle_data = None
|
||||
try:
|
||||
subtitle_data = pysrt.open(subtitle_file, encoding=encoding)
|
||||
except UnicodeDecodeError:
|
||||
print(f"Failed to open with detected encoding {encoding}, trying utf-8")
|
||||
subtitle_data = pysrt.open(subtitle_file, encoding="utf-8")
|
||||
try:
|
||||
subtitle_data = pysrt.open(subtitle_file, encoding="utf-8")
|
||||
except Exception as e:
|
||||
print(f"Error opening subtitle file with pysrt: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error opening subtitle file with pysrt: {e}")
|
||||
return False
|
||||
|
||||
if remove_ad_lines(subtitle_data):
|
||||
# Remove ad lines and save if modified
|
||||
if subtitle_data and remove_ad_lines(subtitle_data):
|
||||
print(f"Saving {subtitle_file}")
|
||||
subtitle_data.save(subtitle_file)
|
||||
return True
|
||||
return False
|
||||
# Update the hash after modification
|
||||
new_hash = get_file_hash(subtitle_file)
|
||||
mark_file_processed(db_path, str(subtitle_file), new_hash)
|
||||
modified = True
|
||||
else:
|
||||
# Mark as processed even if no changes were made
|
||||
mark_file_processed(db_path, str(subtitle_file), file_hash)
|
||||
|
||||
return modified
|
||||
except Exception as e:
|
||||
print(f"Error processing {subtitle_file_path}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def process_subtitle_files(subtitle_files: list[str]) -> list[str]:
|
||||
def process_subtitle_files(subtitle_files: list[str], db_path, force=False, verbose=False) -> list[str]:
|
||||
"""
|
||||
Process multiple subtitle files to remove ad lines.
|
||||
|
||||
Args:
|
||||
subtitle_files (list[str]): A list of subtitle file paths.
|
||||
db_path (pathlib.Path): The path to the database file.
|
||||
force (bool): If True, process files even if they have been processed before.
|
||||
verbose (bool): If True, print detailed processing information.
|
||||
|
||||
Returns:
|
||||
list[str]: A list of modified subtitle file paths.
|
||||
"""
|
||||
modified_files = []
|
||||
for subtitle_file in subtitle_files:
|
||||
if process_subtitle_file(subtitle_file):
|
||||
if process_subtitle_file(subtitle_file, db_path, force, verbose):
|
||||
modified_files.append(subtitle_file)
|
||||
return modified_files
|
||||
|
||||
|
||||
def _parse_args():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description="Remove advertisements from subtitle files.")
|
||||
parser.add_argument(
|
||||
"--db-location",
|
||||
type=str,
|
||||
help="Specify a custom location for the database file",
|
||||
)
|
||||
parser.add_argument("--force", action="store_true", help="Process files even if they have been processed before")
|
||||
parser.add_argument("--version", action="store_true", help="Show version information and exit")
|
||||
parser.add_argument("--reset-db", action="store_true", help="Reset the database (remove all stored file hashes)")
|
||||
parser.add_argument("--list-patterns", action="store_true", help="List all advertisement patterns being used")
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Increase output verbosity (show analyzing/skipping messages)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def _print_version():
|
||||
"""Print the application version."""
|
||||
try:
|
||||
from subscleaner import __version__
|
||||
|
||||
print(f"Subscleaner version {__version__}")
|
||||
except ImportError:
|
||||
import importlib.metadata
|
||||
|
||||
version = importlib.metadata.version("subscleaner")
|
||||
print(f"Subscleaner version {version}")
|
||||
|
||||
|
||||
def _reset_database(db_path):
|
||||
"""Reset the database file."""
|
||||
if db_path.exists():
|
||||
try:
|
||||
db_path.unlink()
|
||||
print(f"Database reset successfully: {db_path}")
|
||||
except Exception as e:
|
||||
print(f"Error resetting database: {e}")
|
||||
else:
|
||||
print(f"No database found at {db_path}")
|
||||
|
||||
|
||||
def _list_patterns():
|
||||
"""List the configured ad patterns."""
|
||||
print("Advertisement patterns being used:")
|
||||
for i, pattern in enumerate(AD_PATTERNS, 1):
|
||||
print(f"{i}. {pattern.pattern}")
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Process subtitle files to remove ad lines.
|
||||
Run the main entry point for the Subscleaner script.
|
||||
|
||||
Read subtitle file paths from standard input, process each file to remove ad lines,
|
||||
and print the result. Keep track of the modified files and print
|
||||
a summary at the end.
|
||||
Parse arguments, handle special commands like version or reset-db,
|
||||
and processes subtitle files provided via stdin.
|
||||
"""
|
||||
args = _parse_args()
|
||||
|
||||
# Handle version request
|
||||
if args.version:
|
||||
_print_version()
|
||||
return
|
||||
|
||||
# Get database path
|
||||
db_path = get_db_path(args.db_location)
|
||||
|
||||
# Handle reset database request
|
||||
if args.reset_db:
|
||||
_reset_database(db_path)
|
||||
return
|
||||
|
||||
# Handle list patterns request
|
||||
if args.list_patterns:
|
||||
_list_patterns()
|
||||
return
|
||||
|
||||
# Initialize database if not resetting
|
||||
init_db(db_path)
|
||||
|
||||
# Process subtitle files
|
||||
subtitle_files = [file_path.strip() for file_path in sys.stdin]
|
||||
print("Starting script")
|
||||
modified_files = process_subtitle_files(subtitle_files)
|
||||
if not subtitle_files:
|
||||
print("No subtitle files provided. Pipe filenames to subscleaner or use --help for more information.")
|
||||
return
|
||||
|
||||
if args.verbose:
|
||||
print("Starting script")
|
||||
modified_files = process_subtitle_files(subtitle_files, db_path, args.force, args.verbose)
|
||||
if modified_files:
|
||||
print(f"Modified {len(modified_files)} files")
|
||||
print("Done")
|
||||
|
@ -11,7 +11,6 @@ import pytest
|
||||
from src.subscleaner.subscleaner import (
|
||||
contains_ad,
|
||||
get_encoding,
|
||||
is_processed_before,
|
||||
main,
|
||||
process_subtitle_file,
|
||||
process_subtitle_files,
|
||||
@ -36,6 +35,12 @@ Another sample subtitle.
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_db_path():
|
||||
"""Return a mock database path."""
|
||||
return Path("/tmp/test_subscleaner.db")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def special_chars_temp_dir(tmpdir):
|
||||
"""Create a temporary directory with special character filenames."""
|
||||
@ -93,23 +98,6 @@ def test_contains_ad(subtitle_line, expected_result):
|
||||
assert contains_ad(subtitle_line) is expected_result
|
||||
|
||||
|
||||
def test_is_processed_before(tmpdir):
|
||||
"""
|
||||
Test the is_processed_before function.
|
||||
|
||||
Args:
|
||||
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
|
||||
"""
|
||||
subtitle_file = create_sample_srt_file(tmpdir, "")
|
||||
subtitle_path = Path(subtitle_file)
|
||||
|
||||
with patch("os.path.getctime", return_value=0):
|
||||
assert is_processed_before(subtitle_path) is True
|
||||
|
||||
with patch("os.path.getctime", return_value=9999999999):
|
||||
assert is_processed_before(subtitle_path) is False
|
||||
|
||||
|
||||
def test_get_encoding(tmpdir, sample_srt_content):
|
||||
"""
|
||||
Test the get_encoding function.
|
||||
@ -140,56 +128,71 @@ def test_remove_ad_lines(sample_srt_content):
|
||||
assert len(subtitle_data) == 1
|
||||
|
||||
|
||||
def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content):
|
||||
def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content, mock_db_path):
|
||||
"""
|
||||
Test the process_subtitle_file function when the file does not require modification.
|
||||
|
||||
Args:
|
||||
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
|
||||
sample_srt_content (str): The sample SRT content.
|
||||
mock_db_path (Path): A mock database path.
|
||||
"""
|
||||
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
||||
with patch("src.subscleaner.subscleaner.is_processed_before", return_value=True):
|
||||
assert process_subtitle_file(subtitle_file) is False
|
||||
with (
|
||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=True),
|
||||
):
|
||||
assert process_subtitle_file(subtitle_file, mock_db_path) is False
|
||||
|
||||
|
||||
def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content):
|
||||
def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content, mock_db_path):
|
||||
"""
|
||||
Test the process_subtitle_file function when the file requires modification.
|
||||
|
||||
Args:
|
||||
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
|
||||
sample_srt_content (str): The sample SRT content.
|
||||
mock_db_path (Path): A mock database path.
|
||||
"""
|
||||
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
||||
with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
|
||||
assert process_subtitle_file(subtitle_file) is True
|
||||
with (
|
||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||
):
|
||||
assert process_subtitle_file(subtitle_file, mock_db_path) is True
|
||||
|
||||
|
||||
def test_process_subtitle_file_error(tmpdir):
|
||||
def test_process_subtitle_file_error(tmpdir, mock_db_path):
|
||||
"""
|
||||
Test the process_subtitle_file function when an error occurs (e.g., file not found).
|
||||
|
||||
Args:
|
||||
tmpdir (pytest.fixture): A temporary directory.
|
||||
mock_db_path (Path): A mock database path.
|
||||
"""
|
||||
subtitle_file = tmpdir.join("nonexistent.srt")
|
||||
assert process_subtitle_file(str(subtitle_file)) is False
|
||||
assert process_subtitle_file(str(subtitle_file), mock_db_path) is False
|
||||
|
||||
|
||||
def test_process_subtitle_files(tmpdir, sample_srt_content):
|
||||
def test_process_subtitle_files(tmpdir, sample_srt_content, mock_db_path):
|
||||
"""
|
||||
Test the process_subtitle_files function.
|
||||
|
||||
Args:
|
||||
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT files.
|
||||
sample_srt_content (str): The sample SRT content.
|
||||
mock_db_path (Path): A mock database path.
|
||||
"""
|
||||
subtitle_file1 = create_sample_srt_file(tmpdir, sample_srt_content)
|
||||
subtitle_file2 = create_sample_srt_file(tmpdir, "1\n00:00:01,000 --> 00:00:03,000\nThis is a sample subtitle.")
|
||||
with patch("src.subscleaner.subscleaner.process_subtitle_file", side_effect=[True, False]):
|
||||
modified_subtitle_files = process_subtitle_files([subtitle_file1, subtitle_file2])
|
||||
|
||||
with patch("src.subscleaner.subscleaner.process_subtitle_file", side_effect=[True, False]) as mock_process:
|
||||
modified_subtitle_files = process_subtitle_files([subtitle_file1, subtitle_file2], mock_db_path)
|
||||
assert modified_subtitle_files == [subtitle_file1]
|
||||
assert mock_process.call_count == 2 # noqa PLR2004
|
||||
# Check that db_path was passed to process_subtitle_file
|
||||
mock_process.assert_any_call(subtitle_file1, mock_db_path, False, False)
|
||||
mock_process.assert_any_call(subtitle_file2, mock_db_path, False, False)
|
||||
|
||||
|
||||
def test_main_no_modification(tmpdir, sample_srt_content):
|
||||
@ -201,12 +204,16 @@ def test_main_no_modification(tmpdir, sample_srt_content):
|
||||
sample_srt_content (str): The sample SRT content.
|
||||
"""
|
||||
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
||||
|
||||
with (
|
||||
patch("sys.stdin", StringIO(subtitle_file)),
|
||||
patch("sys.argv", ["subscleaner"]),
|
||||
patch("src.subscleaner.subscleaner.get_db_path", return_value=Path("/tmp/test_db.db")),
|
||||
patch("src.subscleaner.subscleaner.init_db"),
|
||||
patch("src.subscleaner.subscleaner.process_subtitle_files", return_value=[]) as mock_process_subtitle_files,
|
||||
):
|
||||
main()
|
||||
mock_process_subtitle_files.assert_called_once_with([subtitle_file])
|
||||
mock_process_subtitle_files.assert_called_once_with([subtitle_file], Path("/tmp/test_db.db"), False, False)
|
||||
|
||||
|
||||
def test_main_with_modification(tmpdir, sample_srt_content):
|
||||
@ -218,37 +225,41 @@ def test_main_with_modification(tmpdir, sample_srt_content):
|
||||
sample_srt_content (str): The sample SRT content.
|
||||
"""
|
||||
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
||||
|
||||
with (
|
||||
patch("sys.stdin", StringIO(subtitle_file)),
|
||||
patch("sys.argv", ["subscleaner"]),
|
||||
patch("src.subscleaner.subscleaner.get_db_path", return_value=Path("/tmp/test_db.db")),
|
||||
patch("src.subscleaner.subscleaner.init_db"),
|
||||
patch(
|
||||
"src.subscleaner.subscleaner.process_subtitle_files",
|
||||
return_value=[subtitle_file],
|
||||
) as mock_process_subtitle_files,
|
||||
):
|
||||
main()
|
||||
mock_process_subtitle_files.assert_called_once_with([subtitle_file])
|
||||
mock_process_subtitle_files.assert_called_once_with([subtitle_file], Path("/tmp/test_db.db"), False, False)
|
||||
|
||||
|
||||
def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_content):
|
||||
def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
|
||||
"""
|
||||
Test processing subtitle files with special characters in their names.
|
||||
|
||||
Args:
|
||||
special_chars_temp_dir: Temporary directory for special character files
|
||||
sample_srt_content: Sample SRT content
|
||||
mock_db_path (Path): A mock database path.
|
||||
"""
|
||||
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
|
||||
|
||||
with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
|
||||
modified_files = process_subtitle_files(special_files)
|
||||
with (
|
||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||
):
|
||||
modified_files = process_subtitle_files(special_files, mock_db_path)
|
||||
|
||||
assert len(modified_files) == len(special_files), "Not all files with special characters were processed"
|
||||
|
||||
for file_path in special_files:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
assert "OpenSubtitles" not in content, f"Ad not removed from {file_path}"
|
||||
|
||||
|
||||
def test_get_encoding_with_special_chars(special_chars_temp_dir, sample_srt_content):
|
||||
"""
|
||||
@ -273,62 +284,51 @@ def test_get_encoding_with_special_chars(special_chars_temp_dir, sample_srt_cont
|
||||
pytest.fail(f"get_encoding raised {e} with non-existent file")
|
||||
|
||||
|
||||
def test_is_processed_before_with_special_chars(special_chars_temp_dir):
|
||||
"""
|
||||
Test is_processed_before function with special character filenames.
|
||||
|
||||
Args:
|
||||
special_chars_temp_dir: Temporary directory for special character files
|
||||
"""
|
||||
file_path = special_chars_temp_dir / "check_processed_ümlaut.srt"
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write("Test content")
|
||||
|
||||
with patch("os.path.getctime", return_value=0):
|
||||
assert is_processed_before(file_path) is True
|
||||
|
||||
with patch("os.path.getctime", return_value=9999999999):
|
||||
assert is_processed_before(file_path) is False
|
||||
|
||||
non_existent_file = special_chars_temp_dir / "non_existent_ümlaut.srt"
|
||||
assert is_processed_before(non_existent_file) is False
|
||||
|
||||
|
||||
def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content):
|
||||
def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
|
||||
"""
|
||||
Test process_subtitle_file function with special character filenames.
|
||||
|
||||
Args:
|
||||
special_chars_temp_dir: Temporary directory for special character files
|
||||
sample_srt_content: Sample SRT content
|
||||
mock_db_path (Path): A mock database path.
|
||||
"""
|
||||
file_path = special_chars_temp_dir / "process_this_ümlaut,file.srt"
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(sample_srt_content)
|
||||
|
||||
with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
|
||||
assert process_subtitle_file(str(file_path)) is True
|
||||
with (
|
||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||
):
|
||||
assert process_subtitle_file(str(file_path), mock_db_path) is True
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
assert "OpenSubtitles" not in content
|
||||
|
||||
non_existent_file = str(special_chars_temp_dir / "non_existent_ümlaut,file.srt")
|
||||
assert process_subtitle_file(non_existent_file) is False
|
||||
assert process_subtitle_file(non_existent_file, mock_db_path) is False
|
||||
|
||||
|
||||
def test_file_saving_with_special_chars(special_chars_temp_dir, sample_srt_content):
|
||||
def test_file_saving_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
|
||||
"""
|
||||
Test that files with special characters can be saved correctly after modification.
|
||||
|
||||
Args:
|
||||
special_chars_temp_dir: Temporary directory for special character files
|
||||
sample_srt_content: Sample SRT content
|
||||
mock_db_path (Path): A mock database path.
|
||||
"""
|
||||
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
|
||||
|
||||
with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
|
||||
modified_files = process_subtitle_files(special_files)
|
||||
with (
|
||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||
):
|
||||
modified_files = process_subtitle_files(special_files, mock_db_path)
|
||||
|
||||
for file_path in modified_files:
|
||||
assert os.path.exists(file_path), f"File {file_path} does not exist after saving"
|
||||
@ -357,10 +357,13 @@ def test_main_with_special_chars(special_chars_temp_dir, sample_srt_content):
|
||||
|
||||
with (
|
||||
patch("sys.stdin", StringIO(stdin_content)),
|
||||
patch("sys.argv", ["subscleaner"]),
|
||||
patch("src.subscleaner.subscleaner.get_db_path", return_value=Path("/tmp/test_db.db")),
|
||||
patch("src.subscleaner.subscleaner.init_db"),
|
||||
patch(
|
||||
"src.subscleaner.subscleaner.process_subtitle_files",
|
||||
return_value=[str(file_path)],
|
||||
) as mock_process_subtitle_files,
|
||||
):
|
||||
main()
|
||||
mock_process_subtitle_files.assert_called_once_with([str(file_path)])
|
||||
mock_process_subtitle_files.assert_called_once_with([str(file_path)], Path("/tmp/test_db.db"), False, False)
|
||||
|
13
uv.lock
generated
13
uv.lock
generated
@ -2,6 +2,15 @@ version = 1
|
||||
revision = 1
|
||||
requires-python = ">=3.9"
|
||||
|
||||
[[package]]
|
||||
name = "appdirs"
|
||||
version = "1.4.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "attrs"
|
||||
version = "25.3.0"
|
||||
@ -447,9 +456,10 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "subscleaner"
|
||||
version = "1.2.0"
|
||||
version = "2.1.1"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "appdirs" },
|
||||
{ name = "chardet" },
|
||||
{ name = "pysrt" },
|
||||
]
|
||||
@ -465,6 +475,7 @@ dev = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "appdirs", specifier = ">=1.4.4" },
|
||||
{ name = "chardet", specifier = ">=5.2.0" },
|
||||
{ name = "pysrt", specifier = ">=1.1.2" },
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user