[Scummvm-git-logs] scummvm master -> dd848d8ad8a21bae4212687d0ca927f5a06e703c
rvanlaar
roland at rolandvanlaar.nl
Mon Aug 23 16:16:23 UTC 2021
This automated email contains information about 4 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
9ddc4a88a3 DEVTOOLS: COMPANION: add decode string option
c957c45d7d DEVTOOLS: COMPANION: include suffix in dir mode
6cfd387798 JANITORAL: DEVTOOLS: fix formatting companion
dd848d8ad8 DEVTOOLS: COMPANION: remove old perl version
Commit: 9ddc4a88a3db049393d12c1e24b04a221f10d6cf
https://github.com/scummvm/scummvm/commit/9ddc4a88a3db049393d12c1e24b04a221f10d6cf
Author: Roland van Laar (roland at rolandvanlaar.nl)
Date: 2021-08-23T18:16:07+02:00
Commit Message:
DEVTOOLS: COMPANION: add decode string option
Will automatically decode punyencoded strings in str mode.
Changed paths:
devtools/dumper-companion.py
diff --git a/devtools/dumper-companion.py b/devtools/dumper-companion.py
index bfc5e15d40..be286d8de9 100755
--- a/devtools/dumper-companion.py
+++ b/devtools/dumper-companion.py
@@ -181,6 +181,26 @@ def escape_string(s: str) -> str:
return new_name
+def unescape_string(s: str) -> str:
+ """unescape strings"""
+
+ orig_name = ""
+ s_iter = iter(s)
+ hi = next(s_iter, None)
+ while hi is not None:
+ if hi == "\x81":
+ low = next(s_iter, None)
+ assert low is not None, "Error decoding string"
+ if low == "\x79":
+ orig_name += "\x81"
+ else:
+ orig_name += chr(ord(low) - 0x80)
+ else:
+ orig_name += hi
+ hi = next(s_iter, None)
+ return orig_name
+
+
def needs_punyencoding(orig: str) -> bool:
"""
A filename needs to be punyencoded when it:
@@ -206,18 +226,31 @@ def punyencode(orig: str) -> str:
encoded = s.encode("punycode").decode("ascii")
# punyencoding adds an '-' at the end when there are no special chars
# don't use it for comparing
- compare = encoded[:-1]
+ compare = encoded
+ if encoded.endswith("-"):
+ compare = encoded[:-1]
if orig != compare or compare[-1] in " .":
return "xn--" + encoded
return orig
+def decode_string(orig: str) -> str:
+ """
+ Decode punyencoded strings
+ """
+ st = orig[4:].encode("ascii").decode("punycode")
+ return unescape_string(st)
+
+
def encode_string(args: argparse.Namespace) -> int:
if args.string:
var = args.string
if args.stdin:
var = input()
- print(punyencode(var))
+ if var.startswith("xn--"):
+ print(decode_string(var))
+ else:
+ print(punyencode(var))
return 0
@@ -407,7 +440,7 @@ def generate_parser() -> argparse.ArgumentParser:
parser_dir.add_argument("directory", metavar="directory ", type=Path, help="Path")
parser_dir.set_defaults(func=punyencode_arg)
- parser_str = subparsers.add_parser("str", help="Punyencode strings or standard in")
+ parser_str = subparsers.add_parser("str", help="Convert strings or standard in to or from punycode")
parser_str.add_argument(
"--stdin", action="store_true", help="Convert stdin to punycode"
)
@@ -415,7 +448,7 @@ def generate_parser() -> argparse.ArgumentParser:
"string",
metavar="STRING",
type=str,
- help="Convert string to punycode",
+ help="Convert string to or from punycode",
nargs="?",
)
parser_str.set_defaults(func=encode_string)
@@ -465,10 +498,15 @@ def test_decode_mac_japanese():
def test_encode_string(capsys):
- call_test_parser(["str", "Icon\r"])
- captured = capsys.readouterr()
- assert captured.out == "xn--Icon-ja6e\n"
+ checks = [["Icon\r", "xn--Icon-ja6e"]]
+ for input, output in checks:
+ call_test_parser(["str", input])
+ captured = capsys.readouterr()
+ assert captured.out == output + "\n"
+ call_test_parser(["str", output])
+ captured = capsys.readouterr()
+ assert captured.out == input + "\n"
def test_encode_stdin(capsys, monkeypatch):
monkeypatch.setattr("sys.stdin", io.StringIO("Icon\r"))
@@ -484,8 +522,9 @@ def test_decode_name():
["ends with space ", "xn--ends with space -"],
["ããããã¤(Power PC)", "xn--(Power PC)-jx4ilmwb1a7h"],
]
- for input, expected in checks:
- assert punyencode(input) == expected
+ for input, output in checks:
+ assert punyencode(input) == output
+ assert decode_string(output) == input
def test_needs_punyencoding():
@@ -502,5 +541,6 @@ def test_needs_punyencoding():
def test_escape_string():
checks = [["\r", "\x81\x8d"], ["\x81", "\x81\x79"]]
- for input, expected in checks:
- assert escape_string(input) == expected
+ for input, output in checks:
+ assert escape_string(input) == output
+ assert unescape_string(output) == input
Commit: c957c45d7d6b464de3f2647c104f264a4cca057c
https://github.com/scummvm/scummvm/commit/c957c45d7d6b464de3f2647c104f264a4cca057c
Author: Roland van Laar (roland at rolandvanlaar.nl)
Date: 2021-08-23T18:16:07+02:00
Commit Message:
DEVTOOLS: COMPANION: include suffix in dir mode
The file stem, i.e. the part before the suffix, was used to punyencode
it. Now it takes the whole file, including suffix for encoding.
Changed paths:
devtools/dumper-companion.py
diff --git a/devtools/dumper-companion.py b/devtools/dumper-companion.py
index be286d8de9..7b6f57430f 100755
--- a/devtools/dumper-companion.py
+++ b/devtools/dumper-companion.py
@@ -301,7 +301,7 @@ def punyencode_paths(paths: List[Path], verbose: bool = False) -> int:
"""Rename filepaths to their punyencoded names"""
count = 0
for path in paths:
- new_name = punyencode(path.stem)
+ new_name = punyencode(path.name)
if path.stem != new_name:
count += 1
new_path = path.parent / new_name
Commit: 6cfd387798b0dab44ddbd1d05289719fc4fa7371
https://github.com/scummvm/scummvm/commit/6cfd387798b0dab44ddbd1d05289719fc4fa7371
Author: Roland van Laar (roland at rolandvanlaar.nl)
Date: 2021-08-23T18:16:07+02:00
Commit Message:
JANITORAL: DEVTOOLS: fix formatting companion
Changed paths:
devtools/dumper-companion.py
diff --git a/devtools/dumper-companion.py b/devtools/dumper-companion.py
index 7b6f57430f..862990850a 100755
--- a/devtools/dumper-companion.py
+++ b/devtools/dumper-companion.py
@@ -326,7 +326,7 @@ def punyencode_dir(directory: Path, verbose: bool = False) -> int:
files: List[Path] = []
dirs: List[Path] = []
path_glob = directory.glob("**/*")
- next(path_glob) # Don't punyencode the root directory
+ next(path_glob) # Don't punyencode the root directory
for item in path_glob:
if item.is_file():
files.append(item)
Commit: dd848d8ad8a21bae4212687d0ca927f5a06e703c
https://github.com/scummvm/scummvm/commit/dd848d8ad8a21bae4212687d0ca927f5a06e703c
Author: Roland van Laar (roland at rolandvanlaar.nl)
Date: 2021-08-23T18:16:07+02:00
Commit Message:
DEVTOOLS: COMPANION: remove old perl version
The python dumper-companion had the same and more features as the perl
one.
Changed paths:
R devtools/dumper-companion.pl
diff --git a/devtools/dumper-companion.pl b/devtools/dumper-companion.pl
deleted file mode 100755
index 67de9f373b..0000000000
--- a/devtools/dumper-companion.pl
+++ /dev/null
@@ -1,453 +0,0 @@
-#!/usr/bin/perl
-#
-# Dumping Mac files into MacBinary format
-# Extractins HFS+ disk volumes
-# Encoding/decoding into punycode
-
-use strict;
-use utf8;
-use Carp;
-
-use Getopt::Std;
-use Encode;
-use File::Find;
-
-use integer;
-
-use constant BASE => 36;
-use constant TMIN => 1;
-use constant TMAX => 26;
-use constant SKEW => 38;
-use constant DAMP => 700;
-use constant INITIAL_BIAS => 72;
-use constant INITIAL_N => 128;
-
-use constant UNICODE_MIN => 0;
-use constant UNICODE_MAX => 0x10FFFF;
-
-my $Delimiter = chr 0x2D;
-my $BasicRE = "\x00-\x7f";
-my $PunyRE = "A-Za-z0-9";
-my $outPath = "./";
-my $verbose = 0;
-
-sub VERSION_MESSAGE() {
- print "$0 version 1.0\n"
-}
-
-sub HELP_MESSAGE();
-sub processIso($);
-sub processMacbinary();
-sub decode_punycode;
-sub encode_punycode;
-sub encode_punycodefilename;
-sub system1($);
-
-getopts('hmf:c:edsS:o:v');
-
-if ($::opt_h) {
- HELP_MESSAGE();
- exit 0;
-}
-
-if ($::opt_v) {
- $verbose = 1;
-}
-
-if ($::opt_s) {
- local $/;
- my $input = <>;
-
- print encode_punycodefilename($input) . "\n";
- exit 0;
-}
-
-if ($::opt_S) {
- print encode_punycodefilename($::opt_S) . "\n";
- exit 0;
-}
-
-if ($::opt_o) {
- $outPath = $::opt_o;
-
- if ($outPath !~ m'/$') {
- $outPath .= "/";
- }
-
- if (not -d $outPath) {
- die "Directory $outPath does not exits";
- }
-}
-
-if ($::opt_m) {
- processMacbinary;
- exit 0;
-}
-
-if ($::opt_f) {
- processIso($::opt_f);
-}
-
-exit 0;
-
-sub processIso($) {
- my $isofile = shift;
-
- print "Mounting ISO...";
- flush STDOUT;
- print "\n" if $verbose;
-
- my $redirect = $verbose ? "" : ">/dev/null 2>&1";
-
- system1("hmount \"$isofile\" $redirect") == 0 or die "Can't execute hmount";
-
- print "done\n" unless $verbose;
-
- print "C: hls -1alRU\n" if $verbose;
-
- open(my $ls, "-|", "hls -1alRU");
-
- my @lines;
-
- while (<$ls>) {
- print "LS: $_" if $verbose;
-
- push @lines, $_;
- }
-
- close $ls;
-
-
- my $dir = "";
- my $mdir = "";
-
- my $numfiles = 0;
- my $numdirs = 0;
- my $numrens = 0;
- my $numres = 0;
- my $prevlen = 0;
-
- for $_ (@lines) {
- print "LINE: $_" if $verbose;
-
- chomp;
-
- if (/^:/) {
- $mdir = $_;
- s/^://;
- $dir = $_;
-
- if ($::opt_c) {
- $dir = encode_utf8(decode($::opt_c, $dir));
- }
-
- if ($::opt_e) {
- my $dir1 = $dir;
- my $changed = 0;
-
- $dir = join '/', map { my $a = encode_punycodefilename $_; $changed = 1 if $a ne $_; $a } split /:/, $dir;
-
- if ($changed) {
- $dir1 =~ s/([\x00-\x1f])/@{[sprintf "\\x%02x", ord($1)]}/g;
- print ((" " x $prevlen) . "\r") unless $verbose;
- print "Renamed dir \"$dir1\" -> \"$dir\"\n";
- $numrens++;
- }
- }
-
- $dir .= '/' if $dir !~ m'/$';
-
- mkdir "$outPath$dir";
- print "mkdir \"$outPath$dir\"\n" if $verbose;
- $numdirs++;
- } elsif (/^[fF]/) {
- if (/[fF]i?\s+.{4}\/.{4}\s+([0-9]+)\s+([0-9]+)\s+\w+\s+\d+\s+\d+:?\d*\s+(.*)/) {
- my $res = $1;
- my $data = $2;
- my $fname = $3;
-
- my $decfname = $fname;
-
- if ($::opt_c) {
- $decfname = encode_utf8(decode($::opt_c, $fname));
- }
-
- if ($::opt_e) {
- my $decfname1 = $decfname;
- $decfname = encode_punycodefilename $decfname;
- if ($decfname1 ne $decfname) {
- $decfname1 =~ s/([\x00-\x1f])/@{[sprintf "\\x%02x", ord($1)]}/g;
- print ((" " x $prevlen) . "\r") unless $verbose;
- print "Renamed file \"$decfname1\" -> \"$decfname\"" . ($res != 0 ? ", macbinary\n" : "\n");
- $numrens++;
- } else {
- if ($res != 0) {
- print ((" " x $prevlen) . "\r") unless $verbose;
- print "Resource \"$decfname\"\n" if $verbose;
- }
- }
- }
-
- print ((" " x $prevlen) . "\r") unless $verbose;
- print "$dir$decfname\r" unless $verbose;
- $prevlen = length "$dir$decfname";
- flush STDOUT;
-
- $fname =~ s/([*+\[\]\|<>&`\^\{\}\\])/\\$1/g; # Files cound have special symbols in them, escape, so shell is happy
-
- if ($res != 0) {
- system1("hcopy -m -- \"$mdir$fname\" \"$outPath$dir$decfname\"") == 0 or die "Can't execute hcopy";
- $numres++;
- } else {
- system1("hcopy -r -- \"$mdir$fname\" \"$outPath$dir$decfname\"") == 0 or die "Can't execute hcopy";
- }
- $numfiles++;
- } else {
- die "Bad format:\n$_\n";
- }
- }
- }
- print ((" " x $prevlen) . "\r") unless $verbose;
- print "Extracted $numdirs dirs and $numfiles files, Macbinary $numres files, made $numrens renames\n";
-
- print "Unmounting ISO...";
- flush STDOUT;
- print "\n" if $verbose;
-
- system1("humount $redirect") == 0 or die "Can't execute humount";
- print "done\n" unless $verbose;
-}
-
-sub processMacbinary() {
- my $countres = 0;
- my $countren = 0;
-
- find( sub {
- my $fname = $_;
- my $fname1 = $fname;
-
- if (open F, "$fname/..namedfork/rsrc") {
- print "Resource in $fname\n";
- close F;
-
- $countres++;
-
- system1("macbinary encode \"$fname\"");
- system1("touch -r \"$fname\" \"$fname.bin\"");
-
- if ($::opt_e) {
- $fname1 = encode_punycodefilename $fname;
- }
-
- if ($fname1 ne $fname) {
- print "Renamed \"$fname\" -> \"$fname1\"\n" unless $verbose;
- $countren++;
- }
-
- system1("mv \"$fname.bin\" \"$fname1\"");
- } else {
- if ($::opt_e) {
- $fname1 = encode_punycodefilename $fname;
-
- if ($fname1 ne $fname) {
- system1("mv \"$fname\" \"$fname1\"");
-
- $fname =~ s/([\x00-\x1f])/@{[sprintf "\\x%02x", ord($1)]}/g;
- print "Renamed \"$fname\" -> \"$fname1\"\n" unless $verbose;
- $countren++;
- }
- }
- }
-
- }, ".");
- print "Macbinary $countres files, Renamed $countren files\n";
-}
-
-sub HELP_MESSAGE() {
- print <<EOF;
-Usage: $0 [OPTIONS]...
-
-Dumping Mac files into MacBinary format
-
-There are 3 operation modes. Direct MacBinary encoding (Mac-only) and dumping ISO
-contents with hfsutils.
-
-Mode 1:
- $0 -m [-e]
- Operate in MacBinary encoding mode. Recursively encode all resource forks in the current directory.
- It works only in-place.
- -e encode filenames into punycode
-
-Mode 2:
- $0 [-c <encoding>] [-e] [-o directory] -f <file.iso>
- Operate in disk dumping mode
- Optionally specify encoding (MacRoman, MacJapanese)
- If -e is specified, then encode filenames into punycode
- If -o is specified, outputs the file to the specified directory
-
-Mode 3:
- $0 -s
- Read whole standard input and encode it with punycode
- $0 -S <string>
- Encodes specified string with punycode
-
-Miscellaneous:
- -h, --help display this help and exit
-EOF
-}
-
-sub system1($) {
- my $cmd = shift;
-
- print "C: $cmd\n" if $verbose;
-
- return system $cmd;
-}
-
-######### Punycode implementation.
-## Borrowed from Net::IDN::Punycode::PP CPAN module version 2.500
-##
-## Copyright 2002-2004 Tatsuhiko Miyagawa miyagawa at bulknews.net
-##
-## Copyright 2007-2018 Claus FE<auml>rber CFAERBER at cpan.org
-
-sub _adapt {
- my($delta, $numpoints, $firsttime) = @_;
- $delta = int($firsttime ? $delta / DAMP : $delta / 2);
- $delta += int($delta / $numpoints);
- my $k = 0;
- while ($delta > int(((BASE - TMIN) * TMAX) / 2)) {
- $delta /= BASE - TMIN;
- $k += BASE;
- }
- return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW));
-}
-
-sub decode_punycode {
- no warnings 'utf8';
-
- my $input = shift;
-
- my $n = INITIAL_N;
- my $i = 0;
- my $bias = INITIAL_BIAS;
- my @output;
-
- return undef unless defined $input;
- return '' unless length $input;
- return $input unless $input =~ m/^xn--/;
-
- $input =~ s/^xn--//;
-
- if ($input =~ s/(.*)$Delimiter//os) {
- my $base_chars = $1;
- croak("non-base character in input for decode_punycode")
- if $base_chars =~ m/[^$BasicRE]/os;
- push @output, split //, $base_chars;
- }
- my $code = $input;
-
- croak('invalid digit in input for decode_punycode') if $code =~ m/[^$PunyRE]/os;
-
- utf8::downgrade($input); ## handling failure of downgrade is more expensive than
- ## doing the above regexp w/ utf8 semantics
-
- while (length $code) {
- my $oldi = $i;
- my $w = 1;
- LOOP:
- for (my $k = BASE; 1; $k += BASE) {
- my $cp = substr($code, 0, 1, '');
- croak("incomplete encoded code point in decode_punycode") if !defined $cp;
- my $digit = ord $cp;
-
- ## NB: this depends on the PunyRE catching invalid digit characters
- ## before they turn up here
- ##
- $digit = $digit < 0x40 ? $digit + (26-0x30) : ($digit & 0x1f) -1;
-
- $i += $digit * $w;
- my $t = $k - $bias;
- $t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t;
-
- last LOOP if $digit < $t;
- $w *= (BASE - $t);
- }
- $bias = _adapt($i - $oldi, @output + 1, $oldi == 0);
- $n += $i / (@output + 1);
- $i = $i % (@output + 1);
- croak('invalid code point') if $n < UNICODE_MIN or $n > UNICODE_MAX;
- splice(@output, $i, 0, chr($n));
- $i++;
- }
- return join '', @output;
-}
-
-sub encode_punycode {
- no warnings 'utf8';
-
- my $input = shift;
- utf8::decode($input);
- my $input_length = length $input;
-
- my $output = $input; $output =~ s/[^$BasicRE]+//ogs;
-
- my $h = my $bb = length $output;
- $output .= $Delimiter if $bb > 0;
- utf8::downgrade($output); ## no unnecessary use of utf8 semantics
-
- my @input = map ord, split //, $input;
- my @chars = sort { $a<=> $b } grep { $_ >= INITIAL_N } @input;
-
- my $n = INITIAL_N;
- my $delta = 0;
- my $bias = INITIAL_BIAS;
-
- foreach my $m (@chars) {
- next if $m < $n;
- $delta += ($m - $n) * ($h + 1);
- $n = $m;
- for (my $i = 0; $i < $input_length; $i++) {
- my $c = $input[$i];
- $delta++ if $c < $n;
- if ($c == $n) {
- my $q = $delta;
- LOOP:
- for (my $k = BASE; 1; $k += BASE) {
- my $t = $k - $bias;
- $t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t;
-
- last LOOP if $q < $t;
-
- my $o = $t + (($q - $t) % (BASE - $t));
- $output .= chr $o + ($o < 26 ? 0x61 : 0x30-26);
-
- $q = int(($q - $t) / (BASE - $t));
- }
- croak("input exceeds punycode limit") if $q > BASE;
- $output .= chr $q + ($q < 26 ? 0x61 : 0x30-26);
-
- $bias = _adapt($delta, $h + 1, $h == $bb);
- $delta = 0;
- $h++;
- }
- }
- $delta++;
- $n++;
- }
- return 'xn--' . $output;
-}
-
-sub encode_punycodefilename {
- my $decfname = shift;
-
- $decfname =~ s/\x81/\x81\x79/g;
- # escape non-printables, "/"*[]:+|"
- $decfname =~ s/([\x00-\x1f\/":\*\[\]\+\|\\?%<>,;=])/\x81@{[chr(ord($1) + 0x80)]}/g;
-
- if ($decfname =~ /[\x80-\xff]/) {
- $decfname = encode_punycode $decfname;
- }
-
- return $decfname;
-}
More information about the Scummvm-git-logs
mailing list