[Scummvm-git-logs] scummvm master -> 1e82f0760d77bd0bbc0d5dc6d7978f26c7fa2a00
aquadran
aquadran at gmail.com
Fri Nov 12 15:31:34 UTC 2021
This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
1e82f0760d DEVTOOLS: allow correcting filenames when punycoding
Commit: 1e82f0760d77bd0bbc0d5dc6d7978f26c7fa2a00
https://github.com/scummvm/scummvm/commit/1e82f0760d77bd0bbc0d5dc6d7978f26c7fa2a00
Author: Misty De Meo (mistydemeo at gmail.com)
Date: 2021-11-12T16:31:31+01:00
Commit Message:
DEVTOOLS: allow correcting filenames when punycoding
Changed paths:
devtools/dumper-companion.py
diff --git a/devtools/dumper-companion.py b/devtools/dumper-companion.py
index b9a7c4c7e0..08cc59ea53 100755
--- a/devtools/dumper-companion.py
+++ b/devtools/dumper-companion.py
@@ -22,6 +22,7 @@ from binascii import crc_hqx
from pathlib import Path
from struct import pack, unpack
from typing import Any, ByteString, List, Tuple
+import unicodedata
import machfs
@@ -305,11 +306,14 @@ def extract_volume(args: argparse.Namespace) -> int:
return 0
-def punyencode_paths(paths: List[Path], verbose: bool = False) -> int:
+def punyencode_paths(paths: List[Path], verbose: bool = False, source_encoding: str = None) -> int:
"""Rename filepaths to their punyencoded names"""
count = 0
for path in paths:
- new_name = punyencode(path.name)
+ if source_encoding is not None:
+ new_name = punyencode(demojibake_hfs_bytestring(bytes(path.name, "utf8"), source_encoding))
+ else:
+ new_name = punyencode(path.name)
if path.stem != new_name:
count += 1
new_path = path.parent / new_name
@@ -319,13 +323,45 @@ def punyencode_paths(paths: List[Path], verbose: bool = False) -> int:
return count
+def demojibake_hfs_bytestring(s: ByteString, encoding: str):
+ """
+ Takes misinterpreted bytestrings from macOS and transforms
+ them into the correct interpretation.
+ When not able to figure out the correct encoding for legacy
+ non-Unicode HFS filesystems, which is most of the time, macOS
+ interprets filenames as though they're MacRoman. Once mounted,
+ the files are presented via all of the macOS filesystem APIs
+ as though they're UTF-8.
+ This is great for Western European languages, but falls over for
+ other languages. For example, Japanese filenames will be rendered
+ as gibberish (mojibake). This can be fixed by normalizing the
+ filenames' UTF-8 encoding, transforming it back to "MacRoman",
+ then correctly reinterpreting via the correct encoding.
+ """
+ return decode_bytestring(
+ # macOS renders paths as NFD, but to correctly translate
+ # this back to the original MacRoman, we first have to
+ # renormalize it to NFC.
+ unicodedata.normalize('NFC', s.decode('utf8')).encode('macroman'),
+ encoding
+ )
+
+
+def decode_bytestring(s: ByteString, encoding: str):
+ """Wrapper for decode() that can dispatch to decode_macjapanese"""
+ if encoding == "mac_japanese":
+ return decode_macjapanese(s)
+ else:
+ return s.decode(encoding)
+
+
def punyencode_arg(args: argparse.Namespace) -> int:
"""wrapper function"""
punyencode_dir(args.directory, verbose=True)
return 0
-def punyencode_dir(directory: Path, verbose: bool = False) -> int:
+def punyencode_dir(directory: Path, verbose: bool = False, source_encoding: str = None) -> int:
"""
Recursively punyencode all directory and filenames
@@ -333,6 +369,8 @@ def punyencode_dir(directory: Path, verbose: bool = False) -> int:
"""
files: List[Path] = []
dirs: List[Path] = []
+ if source_encoding is not None:
+ directory = Path(demojibake_hfs_bytestring(directory, source_encoding))
path_glob = directory.glob("**/*")
for item in path_glob:
if item.is_file():
@@ -342,8 +380,8 @@ def punyencode_dir(directory: Path, verbose: bool = False) -> int:
dirs.reverse() # start renaming with the one at the bottom
- count = punyencode_paths(files, verbose=verbose)
- count += punyencode_paths(dirs, verbose=verbose)
+ count = punyencode_paths(files, verbose=verbose, source_encoding=source_encoding)
+ count += punyencode_paths(dirs, verbose=verbose, source_encoding=source_encoding)
return count
@@ -410,7 +448,7 @@ def collect_forks(args: argparse.Namespace) -> int:
(info.st_mtime, info.st_mtime),
)
if punify:
- count_renames = punyencode_dir(directory, verbose=True)
+ count_renames = punyencode_dir(directory, verbose=True, source_encoding=args.source_encoding)
print(f"Macbinary {count_resources}, Renamed {count_renames} files")
return 0
@@ -472,6 +510,12 @@ def generate_parser() -> argparse.ArgumentParser:
action="store_true",
help="encode pathnames into punycode",
)
+ parser_macbinary.add_argument(
+ "--source-encoding",
+ metavar="source_encoding",
+ type=str,
+ help="encoding used for filenames in this path",
+ )
parser_macbinary.add_argument(
"dir", metavar="directory", type=Path, help="input directory"
)
More information about the Scummvm-git-logs
mailing list