From e7c4dc16f842ab047a3f7e1bf4bb2490cfed06ff Mon Sep 17 00:00:00 2001
From: Roger Gonzalez <roger@rogs.me>
Date: Wed, 19 Mar 2025 15:49:38 -0300
Subject: [PATCH 1/3] Add support for special characters in filenames

- Added a new function to handle special characters in filenames.
- Modified existing functions to handle special characters in filenames.
- Updated tests to ensure correct behavior with special characters in filenames.
---
 src/subscleaner/subscleaner.py |  64 ++++++++----
 tests/test_subscleaner.py      | 183 ++++++++++++++++++++++++++++++++-
 2 files changed, 224 insertions(+), 23 deletions(-)

diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py
index a6fc36b..e12a29b 100755
--- a/src/subscleaner/subscleaner.py
+++ b/src/subscleaner/subscleaner.py
@@ -22,6 +22,7 @@ import os
 import re
 import sys
 import time
+import pathlib
 
 import chardet
 import pysrt
@@ -110,35 +111,43 @@ def contains_ad(subtitle_line: str) -> bool:
     return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS)
 
 
-def is_processed_before(subtitle_file: str) -> bool:
+def is_processed_before(subtitle_file: pathlib.Path) -> bool:
     """
     Check if the subtitle file has already been processed.
 
     Args:
-        subtitle_file (str): The path to the subtitle file.
+        subtitle_file (pathlib.Path): The path to the subtitle file.
 
     Returns:
         bool: True if the subtitle file has already been processed, False otherwise.
     """
-    file_creation_time = os.path.getctime(subtitle_file)
-    processed_timestamp = time.mktime(
-        time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
-    )
-    return file_creation_time < processed_timestamp
+    try:
+        file_creation_time = os.path.getctime(subtitle_file)
+        processed_timestamp = time.mktime(
+            time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
+        )
+        return file_creation_time < processed_timestamp
+    except Exception as e:
+        print(f"Error checking if file was processed before: {e}")
+        return False
 
 
-def get_encoding(subtitle_file: str) -> str:
+def get_encoding(subtitle_file: pathlib.Path) -> str:
     """
     Detect the encoding of the subtitle file.
 
     Args:
-        subtitle_file (str): The path to the subtitle file.
+        subtitle_file (pathlib.Path): The path to the subtitle file.
 
     Returns:
         str: The detected encoding of the subtitle file.
     """
-    with open(subtitle_file, "rb") as file:
-        return chardet.detect(file.read())["encoding"]
+    try:
+        with open(subtitle_file, "rb") as file:
+            return chardet.detect(file.read())["encoding"] or "utf-8"
+    except Exception as e:
+        print(f"Error detecting encoding: {e}")
+        return "utf-8"
 
 
 def remove_ad_lines(subtitle_data: pysrt.SubRipFile) -> bool:
@@ -152,33 +161,52 @@ def remove_ad_lines(subtitle_data: pysrt.SubRipFile) -> bool:
         bool: True if the subtitle data was modified, False otherwise.
     """
     modified = False
+    indices_to_remove = []
+
     for index, subtitle in enumerate(subtitle_data):
         if contains_ad(subtitle.text):
             print(f"Removing: {subtitle}\n")
-            del subtitle_data[index]
+            indices_to_remove.append(index)
             modified = True
+
+    for index in sorted(indices_to_remove, reverse=True):
+        del subtitle_data[index]
+
     return modified
 
 
-def process_subtitle_file(subtitle_file: str) -> bool:
+def process_subtitle_file(subtitle_file_path: str) -> bool:
     """
     Process a subtitle file to remove ad lines.
 
     Args:
-        subtitle_file (str): The path to the subtitle file.
+        subtitle_file_path (str): The path to the subtitle file.
 
     Returns:
         bool: True if the subtitle file was modified, False otherwise.
     """
     try:
+        subtitle_file = pathlib.Path(subtitle_file_path)
+
+        print(f"Analyzing: {subtitle_file}")
+
+        if not subtitle_file.exists():
+            print(f"File not found: {subtitle_file}")
+            return False
+
         if is_processed_before(subtitle_file):
             print(f"Already processed {subtitle_file}")
             return False
 
-        print(f"Analyzing: {subtitle_file}")
-
         encoding = get_encoding(subtitle_file)
-        subtitle_data = pysrt.open(subtitle_file, encoding=encoding)
+        try:
+            subtitle_data = pysrt.open(subtitle_file, encoding=encoding)
+        except UnicodeDecodeError:
+            print(f"Failed to open with detected encoding {encoding}, trying utf-8")
+            subtitle_data = pysrt.open(subtitle_file, encoding="utf-8")
+        except Exception as e:
+            print(f"Error opening subtitle file with pysrt: {e}")
+            return False
 
         if remove_ad_lines(subtitle_data):
             print(f"Saving {subtitle_file}")
@@ -186,7 +214,7 @@ def process_subtitle_file(subtitle_file: str) -> bool:
             return True
         return False
     except Exception as e:
-        print(f"Error processing {subtitle_file}: {e}")
+        print(f"Error processing {subtitle_file_path}: {e}")
         return False
 
 
diff --git a/tests/test_subscleaner.py b/tests/test_subscleaner.py
index 52eec01..7147f8e 100644
--- a/tests/test_subscleaner.py
+++ b/tests/test_subscleaner.py
@@ -1,6 +1,9 @@
 """Unit tests for the subscleaner module."""
 
+import os
+import sys
 from io import StringIO
+from pathlib import Path
 from unittest.mock import patch
 
 import pysrt
@@ -34,6 +37,14 @@ Another sample subtitle.
 """
 
 
+@pytest.fixture
+def special_chars_temp_dir(tmpdir):
+    """Create a temporary directory with special character filenames."""
+    special_chars_dir = Path(tmpdir) / "special_chars"
+    special_chars_dir.mkdir(exist_ok=True)
+    return special_chars_dir
+
+
 def create_sample_srt_file(tmpdir, content):
     """Create a sample SRT file with the given content."""
     file_path = tmpdir.join("sample.srt")
@@ -41,6 +52,28 @@ def create_sample_srt_file(tmpdir, content):
     return str(file_path)
 
 
+def create_special_char_files(dir_path, content):
+    """Create sample SRT files with special characters in their names."""
+    special_filenames = [
+        "file,with,commas.srt",
+        "file with spaces.srt",
+        "file_with_ümlaut.srt",
+        "file_with_ß_char.srt",
+        "file_with_áccent.srt",
+        "file_with_$ymbol.srt",
+        "file_with_パーセント.srt",  # Japanese characters
+    ]
+
+    created_files = []
+    for filename in special_filenames:
+        file_path = dir_path / filename
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(content)
+        created_files.append(str(file_path))
+
+    return created_files
+
+
 @pytest.mark.parametrize(
     "subtitle_line, expected_result",
     [
@@ -69,11 +102,13 @@ def test_is_processed_before(tmpdir):
         tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
     """
     subtitle_file = create_sample_srt_file(tmpdir, "")
-    with patch("src.subscleaner.subscleaner.os.path.getctime", return_value=0):
-        assert is_processed_before(subtitle_file) is True
+    subtitle_path = Path(subtitle_file)
 
-    with patch("src.subscleaner.subscleaner.os.path.getctime", return_value=9999999999):
-        assert is_processed_before(subtitle_file) is False
+    with patch("os.path.getctime", return_value=0):
+        assert is_processed_before(subtitle_path) is True
+
+    with patch("os.path.getctime", return_value=9999999999):
+        assert is_processed_before(subtitle_path) is False
 
 
 def test_get_encoding(tmpdir, sample_srt_content):
@@ -85,7 +120,8 @@ def test_get_encoding(tmpdir, sample_srt_content):
         sample_srt_content (str): The sample SRT content.
     """
     subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
-    assert get_encoding(subtitle_file) == "ascii"
+    encoding = get_encoding(Path(subtitle_file))
+    assert encoding in ("ascii", "utf-8"), f"Expected ascii or utf-8, got {encoding}"
 
 
 def test_remove_ad_lines(sample_srt_content):
@@ -192,3 +228,140 @@ def test_main_with_modification(tmpdir, sample_srt_content):
     ):
         main()
         mock_process_subtitle_files.assert_called_once_with([subtitle_file])
+
+
+def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_content):
+    """
+    Test processing subtitle files with special characters in their names.
+
+    Args:
+        special_chars_temp_dir: Temporary directory for special character files
+        sample_srt_content: Sample SRT content
+    """
+    special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
+
+    with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
+        modified_files = process_subtitle_files(special_files)
+
+    assert len(modified_files) == len(special_files), "Not all files with special characters were processed"
+
+    for file_path in special_files:
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+        assert "OpenSubtitles" not in content, f"Ad not removed from {file_path}"
+
+
+def test_get_encoding_with_special_chars(special_chars_temp_dir, sample_srt_content):
+    """
+    Test encoding detection for files with special characters in their names.
+
+    Args:
+        special_chars_temp_dir: Temporary directory for special character files
+        sample_srt_content: Sample SRT content
+    """
+    file_path = special_chars_temp_dir / "test_ümlaut_ß_áccent.srt"
+    with open(file_path, "w", encoding="utf-8") as f:
+        f.write(sample_srt_content)
+
+    encoding = get_encoding(file_path)
+    assert encoding is not None, "Encoding detection failed for file with special characters"
+
+    non_existent_file = special_chars_temp_dir / "non_existent_ümlaut.srt"
+    try:
+        encoding = get_encoding(non_existent_file)
+        assert encoding == "utf-8", "Fallback encoding is not utf-8"
+    except Exception as e:
+        pytest.fail(f"get_encoding raised {e} with non-existent file")
+
+
+def test_is_processed_before_with_special_chars(special_chars_temp_dir):
+    """
+    Test is_processed_before function with special character filenames.
+
+    Args:
+        special_chars_temp_dir: Temporary directory for special character files
+    """
+    file_path = special_chars_temp_dir / "check_processed_ümlaut.srt"
+    with open(file_path, "w", encoding="utf-8") as f:
+        f.write("Test content")
+
+    with patch("os.path.getctime", return_value=0):
+        assert is_processed_before(file_path) is True
+
+    with patch("os.path.getctime", return_value=9999999999):
+        assert is_processed_before(file_path) is False
+
+    non_existent_file = special_chars_temp_dir / "non_existent_ümlaut.srt"
+    assert is_processed_before(non_existent_file) is False
+
+
+def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content):
+    """
+    Test process_subtitle_file function with special character filenames.
+
+    Args:
+        special_chars_temp_dir: Temporary directory for special character files
+        sample_srt_content: Sample SRT content
+    """
+    file_path = special_chars_temp_dir / "process_this_ümlaut,file.srt"
+    with open(file_path, "w", encoding="utf-8") as f:
+        f.write(sample_srt_content)
+
+    with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
+        assert process_subtitle_file(str(file_path)) is True
+
+    with open(file_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    assert "OpenSubtitles" not in content
+
+    non_existent_file = str(special_chars_temp_dir / "non_existent_ümlaut,file.srt")
+    assert process_subtitle_file(non_existent_file) is False
+
+
+def test_file_saving_with_special_chars(special_chars_temp_dir, sample_srt_content):
+    """
+    Test that files with special characters can be saved correctly after modification.
+
+    Args:
+        special_chars_temp_dir: Temporary directory for special character files
+        sample_srt_content: Sample SRT content
+    """
+    special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
+
+    with patch("src.subscleaner.subscleaner.is_processed_before", return_value=False):
+        modified_files = process_subtitle_files(special_files)
+
+    for file_path in modified_files:
+        assert os.path.exists(file_path), f"File {file_path} does not exist after saving"
+
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            assert "OpenSubtitles" not in content, f"Content was not properly saved in {file_path}"
+        except Exception as e:
+            pytest.fail(f"Failed to reopen file {file_path} after saving: {e}")
+
+
+def test_main_with_special_chars(special_chars_temp_dir, sample_srt_content):
+    """
+    Test the main function with filenames containing special characters.
+
+    Args:
+        special_chars_temp_dir: Temporary directory for special character files
+        sample_srt_content: Sample SRT content
+    """
+    file_path = special_chars_temp_dir / "main_test_ümlaut,file.srt"
+    with open(file_path, "w", encoding="utf-8") as f:
+        f.write(sample_srt_content)
+
+    stdin_content = str(file_path)
+
+    with (
+        patch("sys.stdin", StringIO(stdin_content)),
+        patch(
+            "src.subscleaner.subscleaner.process_subtitle_files",
+            return_value=[str(file_path)],
+        ) as mock_process_subtitle_files,
+    ):
+        main()
+        mock_process_subtitle_files.assert_called_once_with([str(file_path)])

From 5f58289e8f70059b15f05f8c8ed4dae4cc611d65 Mon Sep 17 00:00:00 2001
From: Roger Gonzalez <roger@rogs.me>
Date: Wed, 19 Mar 2025 15:50:54 -0300
Subject: [PATCH 2/3] Update version to 1.2.0 in pyproject.toml

Update the version number in pyproject.toml from 1.1.5 to 1.2.0.
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d57b64e..890d84a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "subscleaner"
-version = "1.1.5"
+version = "1.2.0"
 description = "Remove advertisements from subtitle files"
 authors = ["Roger Gonzalez <roger@rogs.me>"]
 license = "GPL-3.0-or-later"

From 9d1b6c2342018ff72d558d9e5447b47e69792ab7 Mon Sep 17 00:00:00 2001
From: Roger Gonzalez <roger@rogs.me>
Date: Wed, 19 Mar 2025 15:52:43 -0300
Subject: [PATCH 3/3] Fixed linter issues

---
 src/subscleaner/subscleaner.py | 2 +-
 tests/test_subscleaner.py      | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py
index e12a29b..967ab8a 100755
--- a/src/subscleaner/subscleaner.py
+++ b/src/subscleaner/subscleaner.py
@@ -19,10 +19,10 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 """
 
 import os
+import pathlib
 import re
 import sys
 import time
-import pathlib
 
 import chardet
 import pysrt
diff --git a/tests/test_subscleaner.py b/tests/test_subscleaner.py
index 7147f8e..e9e9a7e 100644
--- a/tests/test_subscleaner.py
+++ b/tests/test_subscleaner.py
@@ -1,7 +1,6 @@
 """Unit tests for the subscleaner module."""
 
 import os
-import sys
 from io import StringIO
 from pathlib import Path
 from unittest.mock import patch