[Scummvm-git-logs] scummvm master -> 3cb57e2078daa86a9b7fcfd76ad151ffd6890af5
bluegr
bluegr at gmail.com
Sat Aug 24 17:13:10 CEST 2019
This automated email contains information about 37 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
965f894224 CONFIGURE: Add option to compile with iconv.
dfbbd228e6 COMMON: Add encoding conversion using iconv
a43526cb3e OSYSTEM: Add convertEncoding virtual method
d6d6ac1823 SDL: Add SDL implementation of convertEncoding
bfdff38d01 COMMON: Use backend specific encoding conversion
807c55e800 WIN32: Add a way to convert codepage name to cp ID
0db194eed4 WIN32: Add include guard to codepage.h
8c284c0917 WIN32: Add Win32 implementation of convertEncoding
888f3e4d30 COMMON: Add transMan mapping encoding conversion.
61cf628bfb COMMON: Add cyrilic transliteration to Encoding.
73fa9d921f COMMON: Add documentation to Common::Encoding
5043dec13c COMMON: Add propper Encoding setters
e01f0af5b0 WIN32: Check calloc return value in covertEncoding
9fa09eeefe COMMON: Check for return values in Encoding
24d35df476 COMMON: Refactor convertIconv
613613568c COMMON: Rename methods in Common::Encoding
93c6b2fafc WIN32: Implement conversion to and from UTF-32
3513972e92 TEST: Add tests for Common::Encoding
a0564bc564 COMMON: Resolve endianity in Encoding
3e4b5c7d3b WIN32: Resolve endianity in convertEncoding()
5fcd14fbea TEST: Remove tests for ascii transliteration
41d3a70c58 SDL: Fix convertEncoding for multibyte encodings.
4edf35e414 WIN32: Fix conversion of multibyte encodings.
96270d4bf2 JANITORIAL: Remove debuging code.
b20922942c COMMON: Add missing documentation.
1346dcc3ef TESTBED: Move encoding conversion tests to testbed
05df774905 COMMON: Add OSystem::convertEncoding documentation
f2715d77ed TESTBED: Comment correction
4de634ee76 CONFIGURE: Move check for iconv down
6dba0bbfd4 SDL: Remove check for SDL2 in convertEncoding()
f8ac40af7c COMMON: Encoding refactoring
04b28b208d WIN32: Fix infinite loop when converting from utf32
e0f2a3460a WIN32: Move getCodePageId to codepage.h
0c74a7f27e COMMON: Fix typos
fcbf59f5ba COMMON: Convert endianity when needed in Encoding.
6e72cd2c08 WIN32: Handle endianity in convertEncoding
3cb57e2078 WIN32: Delete obsolete comment in convertEncoding.
Commit: 965f89422474e8eab4c01b620f9e44146a147dcf
https://github.com/scummvm/scummvm/commit/965f89422474e8eab4c01b620f9e44146a147dcf
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
CONFIGURE: Add option to compile with iconv.
Changed paths:
configure
diff --git a/configure b/configure
index 90adb94..419c93d 100755
--- a/configure
+++ b/configure
@@ -165,6 +165,7 @@ _taskbar=auto
_updates=no
_libunity=auto
_dialogs=auto
+_iconv=auto
# Default option behavior yes/no
_debug_build=auto
_release_build=auto
@@ -1139,6 +1140,9 @@ Optional Libraries:
--with-libcurl-prefix=DIR prefix where libcurl is installed (optional)
--disable-libcurl disable libcurl networking library [autodetect]
+ --with-iconv-prefix=DIR prefix where libiconv is installed (optional)
+ --disable-iconv disable libiconv encoding conversion library [autodetect]
+
Some influential environment variables:
AR archiver command
AS assembler command
@@ -1270,6 +1274,8 @@ for ac_option in $@; do
--disable-eventrecorder) _eventrec=no ;;
--enable-text-console) _text_console=yes ;;
--disable-text-console) _text_console=no ;;
+ --enable-iconv) _iconv=yes ;;
+ --disable-iconv) _iconv=no ;;
--with-fluidsynth-prefix=*)
arg=`echo $ac_option | cut -d '=' -f 2`
FLUIDSYNTH_CFLAGS="-I$arg/include"
@@ -1369,6 +1375,11 @@ for ac_option in $@; do
SDL_NET_CFLAGS="-I$arg/include"
SDL_NET_LIBS="-L$arg/lib"
;;
+ --with-iconv-prefix=*)
+ arg=`echo $ac_option | cut -d '=' -f 2`
+ ICONV_CFLAGS="-I$arg/include"
+ ICONV_LIBS="-L$arg/lib"
+ ;;
--backend=*)
_backend=`echo $ac_option | cut -d '=' -f 2`
;;
@@ -4207,6 +4218,25 @@ define_in_config_if_yes "$_vorbis" 'USE_VORBIS'
echo "$_vorbis"
#
+# Check for iconv
+#
+echocheck "Iconv"
+if test "$_iconv" = auto ; then
+ _iconv=no
+ cat > $TMPC << EOF
+#include <iconv.h>
+int main(void) { iconv_t conv = iconv_open("UTF-8//IGNORE", "CP850"); return 0; }
+EOF
+ cc_check $ICONV_CFLAGS $ICONV_LIBS -liconv && _iconv=yes
+fi
+if test "$_iconv" = yes ; then
+ append_var LIBS "$ICONV_LIBS -liconv"
+ append_var INCLUDES "$ICONV_CFLAGS"
+fi
+define_in_config_if_yes "$_iconv" 'USE_ICONV'
+echo "$_iconv"
+
+#
# Check for Tremor
#
echocheck "Tremor"
Commit: dfbbd228e621679ce25caafafd82cea09dabecfb
https://github.com/scummvm/scummvm/commit/dfbbd228e621679ce25caafafd82cea09dabecfb
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Add encoding conversion using iconv
Changed paths:
A common/encoding.cpp
A common/encoding.h
common/module.mk
diff --git a/common/encoding.cpp b/common/encoding.cpp
new file mode 100644
index 0000000..43dce04
--- /dev/null
+++ b/common/encoding.cpp
@@ -0,0 +1,154 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "common/encoding.h"
+#include "common/debug.h"
+#include "common/textconsole.h"
+#include <cerrno>
+
+namespace Common {
+
+Encoding::Encoding(const String &to, const String &from)
+ : _to(to)
+ , _from(from) {
+#ifdef USE_ICONV
+ String toTranslit = to + "//TRANSLIT";
+ _iconvHandle = iconv_open(toTranslit.c_str(), from.c_str());
+#endif // USE_ICONV
+}
+
+Encoding::~Encoding() {
+#ifdef USE_ICONV
+ if (_iconvHandle != (iconv_t) -1)
+ iconv_close(_iconvHandle);
+#endif // USE_ICONV
+}
+
+char *Encoding::convert(const char *string, size_t size) {
+#ifndef USE_ICONV
+ _iconvHandle = 0;
+#endif
+ return doConversion(_iconvHandle, _to, _from, string, size);
+}
+
+char *Encoding::convert(const String &to, const String &from, const char *string, size_t size) {
+#ifdef USE_ICONV
+ String toTranslit = to + "//TRANSLIT";
+ iconv_t iconvHandle = iconv_open(toTranslit.c_str(), from.c_str());
+#else
+ iconv_t iconvHandle = 0;
+#endif // USE_ICONV
+
+ char *result = doConversion(iconvHandle, to, from, string, size);
+
+#ifdef USE_ICONV
+ if (iconvHandle != (iconv_t) -1)
+ iconv_close(iconvHandle);
+#endif // USE_ICONV
+ return result;
+}
+
+char *Encoding::doConversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
+ char *result = nullptr;
+#ifdef USE_ICONV
+ if (iconvHandle != (iconv_t) -1)
+ result = convertIconv(iconvHandle, string, length);
+ else
+ debug("Could not convert from %s to %s using iconv", from.c_str(), to.c_str());
+ if (result == nullptr)
+ debug("Error while converting with iconv");
+#else
+ debug("Iconv is not available");
+#endif // USE_ICONV
+
+ return result;
+}
+
+char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t length) {
+#ifdef USE_ICONV
+ debug("Trying iconv...");
+
+ size_t inSize = length;
+ size_t outSize = inSize;
+ size_t stringSize = inSize > 4 ? inSize : outSize;
+
+
+#ifdef ICONV_USES_CONST
+ const char *src = string;
+#else
+ char *src = new char[length];
+ char *originalSrc = src;
+ memcpy(src, string, length);
+#endif // ICONV_USES_CONST
+
+ char *buffer = (char *) malloc(sizeof(char) * stringSize);
+ if (!buffer) {
+ warning ("Cannot allocate memory for converting string");
+ return nullptr;
+ }
+ memset(buffer, 0, stringSize);
+ char *dst = buffer;
+ bool error = false;
+
+ while (inSize > 0) {
+ if (iconv(iconvHandle, &src, &inSize, &dst, &outSize) == ((size_t)-1)) {
+ // from SDLs implementation of SDL_iconv_string (slightly altered)
+ if (errno == E2BIG) {
+ char *oldString = buffer;
+ stringSize *= 2;
+ buffer = (char *) realloc(buffer, stringSize);
+ if (!buffer) {
+ warning ("Cannot allocate memory for converting string");
+ error = true;
+ break;
+ }
+ dst = buffer + (dst - oldString);
+ outSize = stringSize - (dst - buffer);
+ memset(dst, 0, stringSize / 2);
+ } else {
+ error = true;
+ debug("iconv failed");
+ break;
+ }
+ }
+ }
+ // Add a zero character to the end. Hopefuly UTF32 uses the most bytes from
+ // all possible encodings, so add 4 zero bytes.
+ buffer = (char *) realloc(buffer, stringSize + 4);
+ memset(buffer + stringSize, 0, 4);
+
+#ifndef ICONV_USES_CONST
+ delete[] originalSrc;
+#endif // ICONV_USES_CONST
+
+ if (error)
+ return nullptr;
+ debug("Size: %d", stringSize);
+
+ return buffer;
+#else
+ debug("Iconv isn't available");
+ return nullptr;
+#endif //USE_ICONV
+}
+
+}
diff --git a/common/encoding.h b/common/encoding.h
new file mode 100644
index 0000000..3fbd482
--- /dev/null
+++ b/common/encoding.h
@@ -0,0 +1,67 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef COMMON_ENCODING_H
+#define COMMON_ENCODING_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif // HAVE_CONFIG_H
+
+#ifdef USE_ICONV
+#include <iconv.h>
+#else
+typedef void* iconv_t;
+#endif // USE_ICONV
+
+#include "common/scummsys.h"
+#include "common/str.h"
+
+namespace Common {
+
+class Encoding {
+ public:
+ Encoding(const String &to, const String &from);
+ ~Encoding();
+
+ char *convert(const char *string, size_t length);
+ static char *convert(const String &to, const String &from, const char *string, size_t length);
+
+ String getFrom() {return _from;};
+ void setFrom(const String &from) {_from = from;};
+
+ String getTo() {return _to;};
+ void setTo(const String &to) {_to = to;};
+
+ private:
+ String _to;
+ String _from;
+
+ static char *doConversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
+
+ iconv_t _iconvHandle;
+ static char *convertIconv(iconv_t iconvHandle, const char *string, size_t length);
+
+};
+
+}
+
+#endif // COMMON_ENCODING_H
diff --git a/common/module.mk b/common/module.mk
index 5ab2367..46e32d7 100644
--- a/common/module.mk
+++ b/common/module.mk
@@ -50,6 +50,7 @@ MODULE_OBJS += \
dct.o \
fft.o \
rdft.o \
+ encoding.o \
sinetables.o
ifdef ENABLE_EVENTRECORDER
Commit: a43526cb3ea60481ca8013c7453b13022630f177
https://github.com/scummvm/scummvm/commit/a43526cb3ea60481ca8013c7453b13022630f177
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
OSYSTEM: Add convertEncoding virtual method
Changed paths:
common/system.h
diff --git a/common/system.h b/common/system.h
index c66b5f6..92fdfc5 100644
--- a/common/system.h
+++ b/common/system.h
@@ -60,6 +60,7 @@ class HardwareInputSet;
class Keymap;
class KeymapperDefaultBindings;
#endif
+class Encoding;
}
class AudioCDManager;
@@ -107,6 +108,7 @@ enum Type {
* control audio CD playback, and sound output.
*/
class OSystem : Common::NonCopyable {
+ friend class Common::Encoding;
protected:
OSystem();
virtual ~OSystem();
@@ -1490,6 +1492,9 @@ public:
virtual bool isConnectionLimited();
//@}
+
+ protected:
+ virtual char *convertEncoding(const char *to, const char *from, const char *string, size_t length) { return nullptr; }
};
Commit: d6d6ac1823309b5a9ec05740442a8d7066fb8717
https://github.com/scummvm/scummvm/commit/d6d6ac1823309b5a9ec05740442a8d7066fb8717
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
SDL: Add SDL implementation of convertEncoding
Changed paths:
backends/platform/sdl/sdl.cpp
backends/platform/sdl/sdl.h
diff --git a/backends/platform/sdl/sdl.cpp b/backends/platform/sdl/sdl.cpp
index e4726dc..105a232 100644
--- a/backends/platform/sdl/sdl.cpp
+++ b/backends/platform/sdl/sdl.cpp
@@ -768,3 +768,13 @@ int SDL_SetColorKey_replacement(SDL_Surface *surface, Uint32 flag, Uint32 key) {
}
#endif
+char *OSystem_SDL::convertEncoding(const char *to, const char *from, const char *string, size_t length) {
+#if SDL_VERSION_ATLEAST(2, 0, 0)
+ debug("Trying SDL...");
+ return SDL_iconv_string(to, from, string, length + 1);
+#else
+ debug("SDL isn't available");
+ return nullptr;
+#endif // SDL_VERSION_ATLEAST(2, 0, 0)
+}
+
diff --git a/backends/platform/sdl/sdl.h b/backends/platform/sdl/sdl.h
index 521e67a..ccbaedd 100644
--- a/backends/platform/sdl/sdl.h
+++ b/backends/platform/sdl/sdl.h
@@ -157,6 +157,8 @@ protected:
virtual bool setGraphicsMode(int mode);
virtual int getGraphicsMode() const;
#endif
+protected:
+ virtual char *convertEncoding(const char *to, const char *from, const char *string, size_t length);
};
#endif
Commit: bfdff38d01aae131cefb6d48e23d44b6ae2fa234
https://github.com/scummvm/scummvm/commit/bfdff38d01aae131cefb6d48e23d44b6ae2fa234
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Use backend specific encoding conversion
Changed paths:
common/encoding.cpp
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 43dce04..b2a704c 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -23,6 +23,7 @@
#include "common/encoding.h"
#include "common/debug.h"
#include "common/textconsole.h"
+#include "common/system.h"
#include <cerrno>
namespace Common {
@@ -79,6 +80,11 @@ char *Encoding::doConversion(iconv_t iconvHandle, const String &to, const String
#else
debug("Iconv is not available");
#endif // USE_ICONV
+ if (result == nullptr)
+ result = g_system->convertEncoding(to.c_str(), from.c_str(), string, length);
+
+ if (result == nullptr)
+ debug("Could not convert from %s to %s using backend specific conversion", from.c_str(), to.c_str());
return result;
}
Commit: 807c55e800bb69f606eb444237edf97d8056a80a
https://github.com/scummvm/scummvm/commit/807c55e800bb69f606eb444237edf97d8056a80a
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Add a way to convert codepage name to cp ID
Changed paths:
A backends/platform/sdl/win32/codepage.cpp
A backends/platform/sdl/win32/codepage.h
backends/platform/sdl/module.mk
diff --git a/backends/platform/sdl/module.mk b/backends/platform/sdl/module.mk
index 62ef94f..5d34177 100644
--- a/backends/platform/sdl/module.mk
+++ b/backends/platform/sdl/module.mk
@@ -23,7 +23,8 @@ MODULE_OBJS += \
win32/win32-main.o \
win32/win32-window.o \
win32/win32_wrapper.o \
- win32/win32.o
+ win32/win32.o \
+ win32/codepage.o
endif
ifdef AMIGAOS
diff --git a/backends/platform/sdl/win32/codepage.cpp b/backends/platform/sdl/win32/codepage.cpp
new file mode 100644
index 0000000..2bb3501
--- /dev/null
+++ b/backends/platform/sdl/win32/codepage.cpp
@@ -0,0 +1,39 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifdef WIN32
+#include "backends/platform/sdl/win32/codepage.h"
+namespace Win32 {
+
+int getCodePageId(Common::String codePageName) {
+ const CodePageDescription *cp = g_cpDescriptions;
+ for (; cp->name; cp++) {
+ if (codePageName.equalsIgnoreCase(cp->name))
+ return cp->id;
+ }
+ return -1;
+}
+
+}
+
+#endif
+
diff --git a/backends/platform/sdl/win32/codepage.h b/backends/platform/sdl/win32/codepage.h
new file mode 100644
index 0000000..98ef3ce
--- /dev/null
+++ b/backends/platform/sdl/win32/codepage.h
@@ -0,0 +1,193 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "common/scummsys.h"
+#include "common/str.h"
+namespace Win32 {
+struct CodePageDescription {
+ const char *name;
+ int id;
+};
+
+//For more info look at https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
+//If there is a different name for a codepage, just add another row.
+const CodePageDescription g_cpDescriptions[] = {
+ {"ibm037", 37}, //IBM EBCDIC US-Canada
+ {"ibm437", 437}, //OEM United States
+ {"ibm500", 500}, //IBM EBCDIC International
+ {"asmo-708", 708}, //Arabic (ASMO 708)
+ {"arabic1", 709}, //Arabic (ASMO-449+, BCON V4)
+ {"arabic2", 710}, //Arabic - Transparent Arabic
+ {"dos-720", 720}, //Arabic (Transparent ASMO); Arabic (DOS)
+ {"ibm737", 737}, //OEM Greek (formerly 437G); Greek (DOS)
+ {"ibm775", 775}, //OEM Baltic; Baltic (DOS)
+ {"ibm850", 850}, //OEM Multilingual Latin 1; Western European (DOS)
+ {"cp850", 850}, //OEM Multilingual Latin 1; Western European (DOS)
+ {"ascii", 850}, //We have multiple choices for codepage for ascii, this is one of many that works
+ {"ibm852", 852}, //OEM Latin 2; Central European (DOS)
+ {"ibm855", 855}, //OEM Cyrillic (primarily Russian)
+ {"ibm857", 857}, //OEM Turkish; Turkish (DOS)
+ {"ibm00858", 858}, //OEM Multilingual Latin 1 + Euro symbol
+ {"ibm858", 858}, //OEM Multilingual Latin 1 + Euro symbol
+ {"ibm860", 860}, //OEM Portuguese; Portuguese (DOS)
+ {"ibm861", 861}, //OEM Icelandic; Icelandic (DOS)
+ {"dos-862", 862}, //OEM Hebrew; Hebrew (DOS)
+ {"ibm863", 863}, //OEM French Canadian; French Canadian (DOS)
+ {"ibm864", 864}, //OEM Arabic; Arabic (864)
+ {"ibm865", 865}, //OEM Nordic; Nordic (DOS)
+ {"cp866", 866}, //OEM Russian; Cyrillic (DOS)
+ {"ibm869", 869}, //OEM Modern Greek; Greek, Modern (DOS)
+ {"ibm870", 870}, //IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
+ {"windows-874", 874}, //ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
+ {"cp875", 875}, //IBM EBCDIC Greek Modern
+ {"shift_jis", 932}, //ANSI/OEM Japanese; Japanese (Shift-JIS)
+ {"gb2312", 936}, //ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
+ {"ks_c_5601-1987", 949}, //ANSI/OEM Korean (Unified Hangul Code)
+ {"big5", 950}, //ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
+ {"ibm1026", 1026}, //IBM EBCDIC Turkish (Latin 5)
+ {"ibm01047", 1047}, //IBM EBCDIC Latin 1/Open System
+ {"ibm01140", 1140}, //IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
+ {"ibm01141", 1141}, //IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
+ {"ibm01142", 1142}, //IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
+ {"ibm01143", 1143}, //IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
+ {"ibm01144", 1144}, //IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
+ {"ibm01145", 1145}, //IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
+ {"ibm01146", 1146}, //IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
+ {"ibm01147", 1147}, //IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
+ {"ibm01148", 1148}, //IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
+ {"ibm01149", 1149}, //IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
+ {"utf-16", 1200}, //Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
+ {"unicodefffe", 1201}, //Unicode UTF-16, big endian byte order; available only to managed applications
+ {"windows-1250", 1250}, //ANSI Central European; Central European (Windows)
+ {"windows-1251", 1251}, //ANSI Cyrillic; Cyrillic (Windows)
+ {"windows-1252", 1252}, //ANSI Latin 1; Western European (Windows)
+ {"windows-1253", 1253}, //ANSI Greek; Greek (Windows)
+ {"windows-1254", 1254}, //ANSI Turkish; Turkish (Windows)
+ {"windows-1255", 1255}, //ANSI Hebrew; Hebrew (Windows)
+ {"windows-1256", 1256}, //ANSI Arabic; Arabic (Windows)
+ {"windows-1257", 1257}, //ANSI Baltic; Baltic (Windows)
+ {"windows-1258", 1258}, //ANSI/OEM Vietnamese; Vietnamese (Windows)
+ {"johab", 1361}, //Korean (Johab)
+ {"macintosh", 10000}, //MAC Roman; Western European (Mac)
+ {"x-mac-japanese", 10001}, //Japanese (Mac)
+ {"x-mac-chinesetrad", 10002}, //MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
+ {"x-mac-korean", 10003}, //Korean (Mac)
+ {"x-mac-arabic", 10004}, //Arabic (Mac)
+ {"x-mac-hebrew", 10005}, //Hebrew (Mac)
+ {"x-mac-greek", 10006}, //Greek (Mac)
+ {"x-mac-cyrillic", 10007}, //Cyrillic (Mac)
+ {"x-mac-chinesesimp", 10008}, //MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
+ {"x-mac-romanian", 10010}, //Romanian (Mac)
+ {"x-mac-ukrainian", 10017}, //Ukrainian (Mac)
+ {"x-mac-thai", 10021}, //Thai (Mac)
+ {"x-mac-ce", 10029}, //MAC Latin 2; Central European (Mac)
+ {"x-mac-icelandic", 10079}, //Icelandic (Mac)
+ {"x-mac-turkish", 10081}, //Turkish (Mac)
+ {"x-mac-croatian", 10082}, //Croatian (Mac)
+ {"utf-32", 12000}, //Unicode UTF-32, little endian byte order; available only to managed applications
+ {"utf-32be", 12001}, //Unicode UTF-32, big endian byte order; available only to managed applications
+ {"x-chinese_cns", 20000}, //CNS Taiwan; Chinese Traditional (CNS)
+ {"x-cp20001", 20001}, //TCA Taiwan
+ {"x_chinese-eten", 20002}, //Eten Taiwan; Chinese Traditional (Eten)
+ {"x-cp20003", 20003}, //IBM5550 Taiwan
+ {"x-cp20004", 20004}, //TeleText Taiwan
+ {"x-cp20005", 20005}, //Wang Taiwan
+ {"x-ia5", 20105}, //IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
+ {"x-ia5-german", 20106}, //IA5 German (7-bit)
+ {"x-ia5-swedish", 20107}, //IA5 Swedish (7-bit)
+ {"x-ia5-norwegian", 20108}, //IA5 Norwegian (7-bit)
+ {"us-ascii", 20127}, //US-ASCII (7-bit)
+ {"x-cp20261", 20261}, //T.61
+ {"x-cp20269", 20269}, //ISO 6937 Non-Spacing Accent
+ {"ibm273", 20273}, //IBM EBCDIC Germany
+ {"ibm277", 20277}, //IBM EBCDIC Denmark-Norway
+ {"ibm278", 20278}, //IBM EBCDIC Finland-Sweden
+ {"ibm280", 20280}, //IBM EBCDIC Italy
+ {"ibm284", 20284}, //IBM EBCDIC Latin America-Spain
+ {"ibm285", 20285}, //IBM EBCDIC United Kingdom
+ {"ibm290", 20290}, //IBM EBCDIC Japanese Katakana Extended
+ {"ibm297", 20297}, //IBM EBCDIC France
+ {"ibm420", 20420}, //IBM EBCDIC Arabic
+ {"ibm423", 20423}, //IBM EBCDIC Greek
+ {"ibm424", 20424}, //IBM EBCDIC Hebrew
+ {"x-ebcdic-koreanextended", 20833}, //IBM EBCDIC Korean Extended
+ {"ibm-thai", 20838}, //IBM EBCDIC Thai
+ {"koi8-r", 20866}, //Russian (KOI8-R); Cyrillic (KOI8-R)
+ {"ibm871", 20871}, //IBM EBCDIC Icelandic
+ {"ibm880", 20880}, //IBM EBCDIC Cyrillic Russian
+ {"ibm905", 20905}, //IBM EBCDIC Turkish
+ {"ibm00924", 20924}, //IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
+ {"euc-jp", 20932}, //Japanese (JIS 0208-1990 and 0212-1990)
+ {"x-cp20936", 20936}, //Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
+ {"x-cp20949", 20949}, //Korean Wansung
+ {"cp1025", 21025}, //IBM EBCDIC Cyrillic Serbian-Bulgarian
+ {"deprecated", 21027}, //(deprecated)
+ {"koi8-u", 21866}, //Ukrainian (KOI8-U); Cyrillic (KOI8-U)
+ {"iso-8859-1", 28591}, //ISO 8859-1 Latin 1; Western European (ISO)
+ {"iso-8859-2", 28592}, //ISO 8859-2 Central European; Central European (ISO)
+ {"iso-8859-3", 28593}, //ISO 8859-3 Latin 3
+ {"iso-8859-4", 28594}, //ISO 8859-4 Baltic
+ {"iso-8859-5", 28595}, //ISO 8859-5 Cyrillic
+ {"iso-8859-6", 28596}, //ISO 8859-6 Arabic
+ {"kIso-8859-7", 28597}, //ISO 8859-7 Greek
+ {"iso-8859-8", 28598}, //ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
+ {"iso-8859-9", 28599}, //ISO 8859-9 Turkish
+ {"iso-8859-13", 28603}, //ISO 8859-13 Estonian
+ {"iso-8859-15", 28605}, //ISO 8859-15 Latin 9
+ {"x-europa", 29001}, //Europa 3
+ {"iso-8859-8-i", 38598}, //ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
+ {"iso-2022-jp", 50220}, //ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
+ {"csiso2022jp", 50221}, //ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
+ {"iso-2022-jp", 50222}, //ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
+ {"iso-2022-kr", 50225}, //ISO 2022 Korean
+ {"x-cp50227", 50227}, //ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
+ {"iso2022", 50229}, //Traditional Chinese
+ {"ebcdic1", 50930}, //Japanese (Katakana) Extended
+ {"ebcdic2", 50931}, //US-Canada and Japanese
+ {"ebcdic3", 50933}, //Korean Extended and Korean
+ {"ebcdic4", 50935}, //Simplified Chinese Extended and Simplified Chinese
+ {"ebcdic5", 50936}, //Simplified Chinese
+ {"ebcdic6", 50937}, //US-Canada and Traditional Chinese
+ {"ebcdic7", 50939}, //Japanese (Latin) Extended and Japanese
+ {"euc-jp", 51932}, //EUC Japanese
+ {"euc-cn", 51936}, //EUC Simplified Chinese; Chinese Simplified (EUC)
+ {"euc-kr", 51949}, //EUC Korean
+ {"euc", 51950}, //Traditional Chinese
+ {"hz-gb-2312", 52936}, //HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
+ {"gb18030", 54936}, //Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
+ {"x-iscii-de", 57002}, //ISCII Devanagari
+ {"x-iscii-be", 57003}, //ISCII Bangla
+ {"x-iscii-ta", 57004}, //ISCII Tamil
+ {"x-iscii-te", 57005}, //ISCII Telugu
+ {"x-iscii-as", 57006}, //ISCII Assamese
+ {"x-iscii-or", 57007}, //ISCII Odia
+ {"x-iscii-ka", 57008}, //ISCII Kannada
+ {"x-iscii-ma", 57009}, //ISCII Malayalam
+ {"x-iscii-gu", 57010}, //ISCII Gujarati
+ {"x-iscii-pa", 57011}, //ISCII Punjabi
+ {"utf-7", 65000}, //Unicode (UTF-7)
+ {"utf-8", 65001}, //Unicode (UTF-8)
+ {nullptr, 0} //End
+};
+
+int getCodePageId(Common::String name);
+}
Commit: 0db194eed47bfd30a0ac830d7aa0375969caaae5
https://github.com/scummvm/scummvm/commit/0db194eed47bfd30a0ac830d7aa0375969caaae5
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Add include guard to codepage.h
Changed paths:
backends/platform/sdl/win32/codepage.h
diff --git a/backends/platform/sdl/win32/codepage.h b/backends/platform/sdl/win32/codepage.h
index 98ef3ce..db1f7dd 100644
--- a/backends/platform/sdl/win32/codepage.h
+++ b/backends/platform/sdl/win32/codepage.h
@@ -20,6 +20,9 @@
*
*/
+#ifndef WIN32_CODEPAGE_H
+#define WIN32_CODEPAGE_H
+
#include "common/scummsys.h"
#include "common/str.h"
namespace Win32 {
@@ -191,3 +194,5 @@ const CodePageDescription g_cpDescriptions[] = {
int getCodePageId(Common::String name);
}
+
+#endif // WIN32_CODEPAGE_H
Commit: 8c284c0917adb3ac3e724c0c5bfc0c86d61a9c36
https://github.com/scummvm/scummvm/commit/8c284c0917adb3ac3e724c0c5bfc0c86d61a9c36
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Add Win32 implementation of convertEncoding
Changed paths:
backends/platform/sdl/win32/win32.cpp
backends/platform/sdl/win32/win32.h
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index cb65fa8..82b21ae 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -43,6 +43,7 @@
#include "backends/platform/sdl/win32/win32.h"
#include "backends/platform/sdl/win32/win32-window.h"
#include "backends/platform/sdl/win32/win32_wrapper.h"
+#include "backends/platform/sdl/win32/codepage.h"
#include "backends/saves/windows/windows-saves.h"
#include "backends/fs/windows/windows-fs-factory.h"
#include "backends/taskbar/win32/win32-taskbar.h"
@@ -384,4 +385,29 @@ AudioCDManager *OSystem_Win32::createAudioCDManager() {
return createWin32AudioCDManager();
}
+char *OSystem_Win32::convertEncoding(const char* to, const char *from, const char *string, size_t length) {
+ char *result = OSystem_SDL::convertEncoding(to, from, string, length);
+ if (result != nullptr)
+ return result;
+ if (Common::String(from).equalsIgnoreCase("utf-32"))
+ return nullptr;
+
+ WCHAR *tmpStr;
+ if (Common::String(from).equalsIgnoreCase("utf-16")) {
+ // Allocate space for string and 2 ending zeros
+ tmpStr = (WCHAR *) calloc(sizeof(char), length + 2);
+ memcpy(tmpStr, string, length);
+ } else {
+ tmpStr = Win32::ansiToUnicode(string, Win32::getCodePageId(from));
+ }
+
+ if (Common::String(to).equalsIgnoreCase("utf-16"))
+ return (char *) tmpStr;
+ else {
+ char *result = Win32::unicodeToAnsi(tmpStr, Win32::getCodePageId(to));
+ free(tmpStr);
+ return result;
+ }
+}
+
#endif
diff --git a/backends/platform/sdl/win32/win32.h b/backends/platform/sdl/win32/win32.h
index 050137f..2a496f5 100644
--- a/backends/platform/sdl/win32/win32.h
+++ b/backends/platform/sdl/win32/win32.h
@@ -54,6 +54,8 @@ protected:
virtual AudioCDManager *createAudioCDManager();
HWND getHwnd() { return ((SdlWindow_Win32*)_window)->getHwnd(); }
+
+ virtual char *convertEncoding(const char *to, const char *from, const char *string, size_t length);
};
#endif
Commit: 888f3e4d3052e3dc42290e020388fb1c24999fb3
https://github.com/scummvm/scummvm/commit/888f3e4d3052e3dc42290e020388fb1c24999fb3
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Add transMan mapping encoding conversion.
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index b2a704c..1c2ef1d 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -24,6 +24,7 @@
#include "common/debug.h"
#include "common/textconsole.h"
#include "common/system.h"
+#include "common/translation.h"
#include <cerrno>
namespace Common {
@@ -83,8 +84,10 @@ char *Encoding::doConversion(iconv_t iconvHandle, const String &to, const String
if (result == nullptr)
result = g_system->convertEncoding(to.c_str(), from.c_str(), string, length);
- if (result == nullptr)
+ if (result == nullptr) {
debug("Could not convert from %s to %s using backend specific conversion", from.c_str(), to.c_str());
+ result = convertTransManMapping(to.c_str(), from.c_str(), string, length);
+ }
return result;
}
@@ -157,4 +160,61 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
#endif //USE_ICONV
}
+// This algorithm is able to convert only between the current TransMan charset
+// and UTF-32, but if it fails, it tries to at least convert from the current
+// TransMan encoding to UTF-32 and then it calls convert() again with that.
+char *Encoding::convertTransManMapping(const char *to, const char *from, const char *string, size_t length) {
+#ifdef USE_TRANSLATION
+ debug("Trying TransMan...");
+ String currentCharset = TransMan.getCurrentCharset();
+ if (currentCharset.equalsIgnoreCase(from)) {
+ // We can use the transMan mapping directly
+ uint32 *partialResult = (uint32 *) calloc(sizeof(uint32), (strlen(string) + 1));
+ if (!partialResult) {
+ warning("Couldn't allocate memory for encoding conversion");
+ return nullptr;
+ }
+ const uint32 *mapping = TransMan.getCharsetMapping();
+ if (mapping == 0) {
+ for(unsigned i = 0; i < strlen(string); i++) {
+ partialResult[i] = string[i];
+ }
+ } else {
+ for(unsigned i = 0; i < strlen(string); i++) {
+ partialResult[i] = mapping[(unsigned char) string[i]] & 0x7FFFFFFF;
+ }
+ }
+#ifdef SCUMM_BIG_ENDIAN
+ char *finalResult = convert(to, "UTF-32BE", (char *) partialResult, strlen(string) * 4);
+#else
+ char *finalResult = convert(to, "UTF-32LE", (char *) partialResult, strlen(string) * 4);
+#endif // SCUMM_BIG_ENDIAN
+ free(partialResult);
+ return finalResult;
+ } else if (currentCharset.equalsIgnoreCase(to) && String(from).equalsIgnoreCase("utf-32")) {
+ // We can do reverse mapping
+ const uint32 *mapping = TransMan.getCharsetMapping();
+ const uint32 *src = (const uint32 *) string;
+ char *result = (char *) calloc(sizeof(char), (length + 4));
+ if (!result) {
+ warning("Couldn't allocate memory for encoding conversion");
+ return nullptr;
+ }
+ for (unsigned i = 0; i < length; i++) {
+ for (int j = 0; j < 256; j++) {
+ if ((mapping[j] & 0x7FFFFFFF) == src[i]) {
+ result[i] = j;
+ break;
+ }
+ }
+ }
+ return result;
+ } else
+ return nullptr;
+#else
+ debug("TransMan isn't available");
+ return nullptr;
+#endif // USE_TRANSLATION
+}
+
}
diff --git a/common/encoding.h b/common/encoding.h
index 3fbd482..64d9c04 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -60,6 +60,7 @@ class Encoding {
iconv_t _iconvHandle;
static char *convertIconv(iconv_t iconvHandle, const char *string, size_t length);
+ static char *convertTransManMapping(const char *to, const char *from, const char *string, size_t length);
};
}
Commit: 61cf628bfbe3031ff1cfa5f549e90f442cd1c5de
https://github.com/scummvm/scummvm/commit/61cf628bfbe3031ff1cfa5f549e90f442cd1c5de
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Add cyrilic transliteration to Encoding.
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 1c2ef1d..e0446c0 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -32,44 +32,90 @@ namespace Common {
Encoding::Encoding(const String &to, const String &from)
: _to(to)
, _from(from) {
+ _iconvHandle = initIconv(to, from);
+}
+
+Encoding::~Encoding() {
+ deinitIconv(_iconvHandle);
+}
+
+iconv_t Encoding::initIconv(const String &to, const String &from) {
#ifdef USE_ICONV
String toTranslit = to + "//TRANSLIT";
- _iconvHandle = iconv_open(toTranslit.c_str(), from.c_str());
+ return iconv_open(toTranslit.c_str(), from.c_str());
+#else
+ return 0;
#endif // USE_ICONV
}
-Encoding::~Encoding() {
+void Encoding::deinitIconv(iconv_t iconvHandle) {
#ifdef USE_ICONV
- if (_iconvHandle != (iconv_t) -1)
- iconv_close(_iconvHandle);
+ if (iconvHandle != (iconv_t) -1)
+ iconv_close(iconvHandle);
#endif // USE_ICONV
}
char *Encoding::convert(const char *string, size_t size) {
-#ifndef USE_ICONV
- _iconvHandle = 0;
-#endif
- return doConversion(_iconvHandle, _to, _from, string, size);
+ return conversion(_iconvHandle, _to, _from, string, size);
}
char *Encoding::convert(const String &to, const String &from, const char *string, size_t size) {
-#ifdef USE_ICONV
- String toTranslit = to + "//TRANSLIT";
- iconv_t iconvHandle = iconv_open(toTranslit.c_str(), from.c_str());
-#else
- iconv_t iconvHandle = 0;
-#endif // USE_ICONV
+ iconv_t iconvHandle = initIconv(to, from);
- char *result = doConversion(iconvHandle, to, from, string, size);
+ char *result = conversion(iconvHandle, to, from, string, size);
-#ifdef USE_ICONV
- if (iconvHandle != (iconv_t) -1)
- iconv_close(iconvHandle);
-#endif // USE_ICONV
+ deinitIconv(iconvHandle);
return result;
}
-char *Encoding::doConversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
+char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
+ char *newString = nullptr;
+ String newFrom = from;
+ size_t newLength = length;
+ if (String(from).equalsIgnoreCase("iso-8859-5") &&
+ !String(to).hasPrefixIgnoreCase("utf")) {
+ // There might be some cyrilic characters, which need to be transliterated.
+ newString = transliterateCyrilic(string);
+ newFrom = "ASCII";
+ }
+ if (String(from).hasPrefixIgnoreCase("utf") &&
+ !String(to).hasPrefixIgnoreCase("utf")) {
+ // There might be some cyrilic characters, which need to be transliterated.
+ char *tmpString;
+ if (String(from).hasPrefixIgnoreCase("utf-32"))
+ tmpString = nullptr;
+ else {
+ iconv_t tmpHandle = initIconv("UTF-32", from);
+ tmpString = conversion2(tmpHandle, "UTF-32", from, string, length);
+ deinitIconv(tmpHandle);
+ // find out the length in bytes of the tmpString
+ int i;
+ for (i = 0; ((const uint32 *)tmpString)[i]; i++) {}
+ newLength = i * 4;
+ newFrom = "UTF-32";
+ }
+ if (tmpString != nullptr) {
+ newString = (char *) transliterateUTF32((const uint32 *) tmpString, newLength);
+ free(tmpString);
+ } else
+ newString = (char *) transliterateUTF32((const uint32 *) string, newLength);
+ }
+ iconv_t newHandle = iconvHandle;
+ if (newFrom != from)
+ newHandle = initIconv(to, newFrom);
+ char *result;
+ if (newString != nullptr) {
+ result = conversion2(newHandle, to, newFrom, newString, newLength);
+ free(newString);
+ } else
+ result = conversion2(newHandle, to, newFrom, string, newLength);
+
+ if (newFrom != from)
+ deinitIconv(newHandle);
+ return result;
+}
+
+char *Encoding::conversion2(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
char *result = nullptr;
#ifdef USE_ICONV
if (iconvHandle != (iconv_t) -1)
@@ -217,4 +263,43 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
#endif // USE_TRANSLATION
}
+static char g_cyrilicTransliterationTable[] = {
+ ' ', 'E', 'D', 'G', 'E', 'Z', 'I', 'I', 'J', 'L', 'N', 'C', 'K', '-', 'U', 'D',
+ 'A', 'B', 'V', 'G', 'D', 'E', 'Z', 'Z', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+ 'R', 'S', 'T', 'U', 'F', 'H', 'C', 'C', 'S', 'S', '\"', 'Y', '\'', 'E', 'U', 'A',
+ 'a', 'b', 'v', 'g', 'd', 'e', 'z', 'z', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+ 'r', 's', 't', 'u', 'f', 'h', 'c', 'c', 's', 's', '\"', 'y', '\'', 'e', 'u', 'a',
+ 'N', 'e', 'd', 'g', 'e', 'z', 'i', 'i', 'j', 'l', 'n', 'c', 'k', '?', 'u', 'd',
+};
+
+char *Encoding::transliterateCyrilic(const char *string) {
+ char *result = (char *) malloc(strlen(string) + 1);
+ if (!result) {
+ warning("Could not allocate memory for encoding conversion");
+ return nullptr;
+ }
+ for(unsigned i = 0; i <= strlen(string); i++) {
+ if ((unsigned char) string[i] >= 160)
+ result[i] = g_cyrilicTransliterationTable[(unsigned char) string[i] - 160];
+ else
+ result[i] = string[i];
+ }
+ return result;
+}
+
+uint32 *Encoding::transliterateUTF32(const uint32 *string, size_t length) {
+ uint32 *result = (uint32 *) malloc(length + 4);
+ if (!result) {
+ warning("Could not allocate memory for encoding conversion");
+ return nullptr;
+ }
+ for(unsigned i = 0; i <= length / 4; i++) {
+ if (string[i] >= 0x410 && string[i] <= 0x450)
+ result[i] = g_cyrilicTransliterationTable[string[i] - 160 - 864];
+ else
+ result[i] = string[i];
+ }
+ return result;
+}
+
}
diff --git a/common/encoding.h b/common/encoding.h
index 64d9c04..bccfb36 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -55,12 +55,20 @@ class Encoding {
String _to;
String _from;
- static char *doConversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
+ static char *conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
+
+ static char *conversion2(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
iconv_t _iconvHandle;
static char *convertIconv(iconv_t iconvHandle, const char *string, size_t length);
static char *convertTransManMapping(const char *to, const char *from, const char *string, size_t length);
+
+ static char *transliterateCyrilic(const char *string);
+ static uint32 *transliterateUTF32(const uint32 *string, size_t length);
+
+ static iconv_t initIconv(const String &to, const String &from);
+ static void deinitIconv(iconv_t iconvHandle);
};
}
Commit: 73fa9d921f52045a478f6a79741615987860ca1e
https://github.com/scummvm/scummvm/commit/73fa9d921f52045a478f6a79741615987860ca1e
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Add documentation to Common::Encoding
Changed paths:
common/encoding.h
diff --git a/common/encoding.h b/common/encoding.h
index bccfb36..b55c485 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -37,37 +37,190 @@ typedef void* iconv_t;
namespace Common {
+/**
+ * A class, that allows conversion between different text encoding,
+ * the encodings available depend on the current backend and if the
+ * ScummVM is compiled with or without iconv.
+ */
class Encoding {
public:
+ /**
+ * Constructs everything needed for the conversion between 2 encodings
+ * and saves the values for future use.
+ *
+ * @param to Name of the encoding the strings will be converted to
+ * @param from Name of the encoding the strings will be converted from
+ */
Encoding(const String &to, const String &from);
~Encoding();
+ /**
+ * Converts string between encodings. The resulting string is ended by
+ * a character with value 0 (C-like ending for 1 byte per character
+ * encodings, 2 zero bytes for UTF-16, 4 zero bytes for UTF-32)
+ *
+ * The result has to be freed after use.
+ *
+ * @param string String that should be converted.
+ * @param length Length of the string to convert in bytes.
+ *
+ * @return Converted string (must be freed) or nullptr if the conversion failed
+ */
char *convert(const char *string, size_t length);
+
+ /**
+ * Static version of the method above.
+ * Converts string between encodings. The resulting string is ended by
+ * a character with value 0 (C-like ending for 1 byte per character
+ * encodings, 2 zero bytes for UTF-16, 4 zero bytes for UTF-32)
+ *
+ * The result has to be freed after use.
+ *
+ * @param to Name of the encoding the strings will be converted to
+ * @param from Name of the encoding the strings will be converted from
+ * @param string String that should be converted.
+ * @param length Length of the string to convert in bytes.
+ *
+ * @return Converted string (must be freed) or nullptr if the conversion failed
+ */
static char *convert(const String &to, const String &from, const char *string, size_t length);
+ /**
+ * @return The encoding, which is currently being converted from
+ */
String getFrom() {return _from;};
+
+ /**
+ * @param from The encoding, to convert from
+ */
void setFrom(const String &from) {_from = from;};
+ /**
+ * @return The encoding, which is currently being converted to
+ */
String getTo() {return _to;};
+
+ /**
+ * @param to The encoding, to convert to
+ */
void setTo(const String &to) {_to = to;};
private:
+ /** The encoding, which is currently being converted to */
String _to;
+
+ /** The encoding, which is currently being converted from */
String _from;
+ /**
+ * iconvHandle currently used for conversions (is void pointer to 0
+ * if the ScummVM isn't compiled with iconv)
+ */
+ iconv_t _iconvHandle;
+
+ /**
+ * Takes care of transliteration and calls conversion2 for the encoding
+ * conversion
+ *
+ * The result has to be freed after use.
+ *
+ * @param iconvHandle Handle to use for the conversion
+ * @param to Name of the encoding the strings will be converted to
+ * @param from Name of the encoding the strings will be converted from
+ * @param string String that should be converted.
+ * @param length Length of the string to convert in bytes.
+ *
+ * @return Converted string (must be freed) or nullptr if the conversion failed
+ */
static char *conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
+ /**
+ * Calls as many conversion functions as possible or until the conversion
+ * succeeds. It first tries to use iconv, then it tries to use platform
+ * specific functions and after that it tries to use TransMan mapping.
+ *
+ * The result has to be freed after use.
+ *
+ * @param iconvHandle Handle to use for the conversion
+ * @param to Name of the encoding the strings will be converted to
+ * @param from Name of the encoding the strings will be converted from
+ * @param string String that should be converted.
+ * @param length Length of the string to convert in bytes.
+ *
+ * @return Converted string (must be freed) or nullptr if the conversion failed
+ */
static char *conversion2(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
- iconv_t _iconvHandle;
+ /**
+ * Tries to convert the string using iconv.
+ *
+ * The result has to be freed after use.
+ *
+ * @param iconvHandle Handle to use for the conversion
+ * @param string String that should be converted.
+ * @param length Length of the string to convert in bytes.
+ *
+ * @return Converted string (must be freed) or nullptr if the conversion failed
+ */
static char *convertIconv(iconv_t iconvHandle, const char *string, size_t length);
+ /**
+ * Tries to use the TransMan to convert the string. It can convert only
+ * between UTF-32 and the current GUI charset. It also tries to convert
+ * from the current GUI charset to UTF-32 and then it calls convert() again.
+ *
+ * The result has to be freed after use.
+ *
+ * @param to Name of the encoding the strings will be converted to
+ * @param from Name of the encoding the strings will be converted from
+ * @param string String that should be converted.
+ * @param length Length of the string to convert in bytes.
+ *
+ * @return Converted string (must be freed) or nullptr if the conversion failed
+ */
static char *convertTransManMapping(const char *to, const char *from, const char *string, size_t length);
+ /**
+ * Transliterates cyrilic string in iso-8859-5 encoding and returns
+ * it's ASCII (latin) form.
+ *
+ * The result has to be freed after use.
+ *
+ * @param string String that should be converted
+ *
+ * @return Transliterated string in ASCII (must be freed) or nullptr on fail.
+ */
static char *transliterateCyrilic(const char *string);
+
+ /**
+ * Transliterates cyrilic in UTF-32 string.
+ *
+ * The result has to be freed after use.
+ *
+ * @param string String that should be converted
+ * @param length Length of the string in bytes
+ *
+ * @return Transliterated string in UTF-32 (must be freed) or nullptr on fail.
+ */
static uint32 *transliterateUTF32(const uint32 *string, size_t length);
+ /**
+ * Inits the iconv handle
+ *
+ * The result has to be freed after use.
+ *
+ * @param to Name of the encoding the strings will be converted to
+ * @param from Name of the encoding the strings will be converted from
+ *
+ * @return Opened iconv handle or 0 if ScummVM is compiled without iconv
+ */
static iconv_t initIconv(const String &to, const String &from);
+
+ /**
+ * Deinits the iconv handle
+ *
+ * @param iconvHandle Handle that should be deinited
+ */
static void deinitIconv(iconv_t iconvHandle);
};
Commit: 5043dec13c4019a858c397b0f2db44a75c2d0adc
https://github.com/scummvm/scummvm/commit/5043dec13c4019a858c397b0f2db44a75c2d0adc
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Add propper Encoding setters
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index e0446c0..f9dd193 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -55,6 +55,18 @@ void Encoding::deinitIconv(iconv_t iconvHandle) {
#endif // USE_ICONV
}
+void Encoding::setFrom(const String &from) {
+ deinitIconv(_iconvHandle);
+ _from = from;
+ _iconvHandle = initIconv(_to, _from);
+}
+
+void Encoding::setTo(const String &to) {
+ deinitIconv(_iconvHandle);
+ _to = to;
+ _iconvHandle = initIconv(_to, _from);
+}
+
char *Encoding::convert(const char *string, size_t size) {
return conversion(_iconvHandle, _to, _from, string, size);
}
diff --git a/common/encoding.h b/common/encoding.h
index b55c485..67c5ac6 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -93,7 +93,7 @@ class Encoding {
/**
* @param from The encoding, to convert from
*/
- void setFrom(const String &from) {_from = from;};
+ void setFrom(const String &from);
/**
* @return The encoding, which is currently being converted to
@@ -103,7 +103,7 @@ class Encoding {
/**
* @param to The encoding, to convert to
*/
- void setTo(const String &to) {_to = to;};
+ void setTo(const String &to);
private:
/** The encoding, which is currently being converted to */
Commit: e01f0af5b04a9e1a72929c8e021f9e7939a33311
https://github.com/scummvm/scummvm/commit/e01f0af5b04a9e1a72929c8e021f9e7939a33311
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Check calloc return value in covertEncoding
Changed paths:
backends/platform/sdl/win32/win32.cpp
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index 82b21ae..f2447b2 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -396,6 +396,10 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
if (Common::String(from).equalsIgnoreCase("utf-16")) {
// Allocate space for string and 2 ending zeros
tmpStr = (WCHAR *) calloc(sizeof(char), length + 2);
+ if (!tmpStr) {
+ warning("Could not allocate memory for string conversion");
+ return nullptr;
+ }
memcpy(tmpStr, string, length);
} else {
tmpStr = Win32::ansiToUnicode(string, Win32::getCodePageId(from));
Commit: 9fa09eeefedcb0b9dbf1c2883a26f37e76315151
https://github.com/scummvm/scummvm/commit/9fa09eeefedcb0b9dbf1c2883a26f37e76315151
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Check for return values in Encoding
Changed paths:
common/encoding.cpp
diff --git a/common/encoding.cpp b/common/encoding.cpp
index f9dd193..30d8dab 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -81,25 +81,40 @@ char *Encoding::convert(const String &to, const String &from, const char *string
}
char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
+ if (from.equalsIgnoreCase(to)) {
+ // don't convert, just copy the string and return it
+ char *result = (char *) calloc(sizeof(char), length + 4);
+ if (!result) {
+ warning("Could not allocate memory for string conversion");
+ return nullptr;
+ }
+ memcpy(result, string, length);
+ return result;
+ }
char *newString = nullptr;
String newFrom = from;
size_t newLength = length;
- if (String(from).equalsIgnoreCase("iso-8859-5") &&
- !String(to).hasPrefixIgnoreCase("utf")) {
+ if (from.equalsIgnoreCase("iso-8859-5") &&
+ !to.hasPrefixIgnoreCase("utf")) {
// There might be some cyrilic characters, which need to be transliterated.
newString = transliterateCyrilic(string);
+ if (!newString)
+ return nullptr;
newFrom = "ASCII";
}
- if (String(from).hasPrefixIgnoreCase("utf") &&
- !String(to).hasPrefixIgnoreCase("utf")) {
+ if (from.hasPrefixIgnoreCase("utf") &&
+ !to.hasPrefixIgnoreCase("utf") &&
+ !to.equalsIgnoreCase("iso-8859-5")) {
// There might be some cyrilic characters, which need to be transliterated.
char *tmpString;
- if (String(from).hasPrefixIgnoreCase("utf-32"))
+ if (from.hasPrefixIgnoreCase("utf-32"))
tmpString = nullptr;
else {
iconv_t tmpHandle = initIconv("UTF-32", from);
tmpString = conversion2(tmpHandle, "UTF-32", from, string, length);
deinitIconv(tmpHandle);
+ if (!tmpString)
+ return nullptr;
// find out the length in bytes of the tmpString
int i;
for (i = 0; ((const uint32 *)tmpString)[i]; i++) {}
@@ -111,6 +126,8 @@ char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &
free(tmpString);
} else
newString = (char *) transliterateUTF32((const uint32 *) string, newLength);
+ if (!newString)
+ return nullptr;
}
iconv_t newHandle = iconvHandle;
if (newFrom != from)
Commit: 24d35df4760678f7592a42c2b78453bdfd6e0050
https://github.com/scummvm/scummvm/commit/24d35df4760678f7592a42c2b78453bdfd6e0050
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Refactor convertIconv
Changed paths:
common/encoding.cpp
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 30d8dab..00870e7 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -173,7 +173,7 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
size_t inSize = length;
size_t outSize = inSize;
- size_t stringSize = inSize > 4 ? inSize : outSize;
+ size_t stringSize = inSize > 4 ? inSize : 4;
#ifdef ICONV_USES_CONST
@@ -184,12 +184,11 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
memcpy(src, string, length);
#endif // ICONV_USES_CONST
- char *buffer = (char *) malloc(sizeof(char) * stringSize);
+ char *buffer = (char *) calloc(sizeof(char), stringSize);
if (!buffer) {
warning ("Cannot allocate memory for converting string");
return nullptr;
}
- memset(buffer, 0, stringSize);
char *dst = buffer;
bool error = false;
@@ -215,6 +214,7 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
}
}
}
+ iconv(iconvHandle, NULL, NULL, &dst, &outSize);
// Add a zero character to the end. Hopefuly UTF32 uses the most bytes from
// all possible encodings, so add 4 zero bytes.
buffer = (char *) realloc(buffer, stringSize + 4);
@@ -224,8 +224,11 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
delete[] originalSrc;
#endif // ICONV_USES_CONST
- if (error)
+ if (error) {
+ if (buffer)
+ free(buffer);
return nullptr;
+ }
debug("Size: %d", stringSize);
return buffer;
Commit: 613613568cbeee923a23400716c743be01d3906e
https://github.com/scummvm/scummvm/commit/613613568cbeee923a23400716c743be01d3906e
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Rename methods in Common::Encoding
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 00870e7..d121e13 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -68,19 +68,19 @@ void Encoding::setTo(const String &to) {
}
char *Encoding::convert(const char *string, size_t size) {
- return conversion(_iconvHandle, _to, _from, string, size);
+ return convertWithTransliteration(_iconvHandle, _to, _from, string, size);
}
char *Encoding::convert(const String &to, const String &from, const char *string, size_t size) {
iconv_t iconvHandle = initIconv(to, from);
- char *result = conversion(iconvHandle, to, from, string, size);
+ char *result = convertWithTransliteration(iconvHandle, to, from, string, size);
deinitIconv(iconvHandle);
return result;
}
-char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
+char *Encoding::convertWithTransliteration(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
if (from.equalsIgnoreCase(to)) {
// don't convert, just copy the string and return it
char *result = (char *) calloc(sizeof(char), length + 4);
@@ -111,7 +111,7 @@ char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &
tmpString = nullptr;
else {
iconv_t tmpHandle = initIconv("UTF-32", from);
- tmpString = conversion2(tmpHandle, "UTF-32", from, string, length);
+ tmpString = conversion(tmpHandle, "UTF-32", from, string, length);
deinitIconv(tmpHandle);
if (!tmpString)
return nullptr;
@@ -134,17 +134,17 @@ char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &
newHandle = initIconv(to, newFrom);
char *result;
if (newString != nullptr) {
- result = conversion2(newHandle, to, newFrom, newString, newLength);
+ result = conversion(newHandle, to, newFrom, newString, newLength);
free(newString);
} else
- result = conversion2(newHandle, to, newFrom, string, newLength);
+ result = conversion(newHandle, to, newFrom, string, newLength);
if (newFrom != from)
deinitIconv(newHandle);
return result;
}
-char *Encoding::conversion2(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
+char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length) {
char *result = nullptr;
#ifdef USE_ICONV
if (iconvHandle != (iconv_t) -1)
diff --git a/common/encoding.h b/common/encoding.h
index 67c5ac6..014000d 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -119,8 +119,7 @@ class Encoding {
iconv_t _iconvHandle;
/**
- * Takes care of transliteration and calls conversion2 for the encoding
- * conversion
+ * Takes care of transliteration and calls conversion
*
* The result has to be freed after use.
*
@@ -132,7 +131,7 @@ class Encoding {
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
- static char *conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
+ static char *convertWithTransliteration(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
/**
* Calls as many conversion functions as possible or until the conversion
@@ -149,7 +148,7 @@ class Encoding {
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
- static char *conversion2(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
+ static char *conversion(iconv_t iconvHandle, const String &to, const String &from, const char *string, size_t length);
/**
* Tries to convert the string using iconv.
Commit: 93c6b2fafc69d14436cfcc537989aa44430f040a
https://github.com/scummvm/scummvm/commit/93c6b2fafc69d14436cfcc537989aa44430f040a
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Implement conversion to and from UTF-32
UTF-32 is used in transliteration in Common::Encoding, so it is
pretty important encoding and Windows should be the only thing,
that cannot convert it.
Changed paths:
backends/platform/sdl/win32/win32.cpp
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index f2447b2..e1f7964 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -51,6 +51,8 @@
#include "backends/dialogs/win32/win32-dialogs.h"
#include "common/memstream.h"
+#include "common/ustr.h"
+#include "common/encoding.h"
#define DEFAULT_CONFIG_FILE "scummvm.ini"
@@ -389,8 +391,22 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
char *result = OSystem_SDL::convertEncoding(to, from, string, length);
if (result != nullptr)
return result;
- if (Common::String(from).equalsIgnoreCase("utf-32"))
- return nullptr;
+
+ // UTF-32 is really important for us, because it is used for the
+ // transliteration in Common::Encoding and Win32 cannot convert it
+ if (Common::String(from).hasPrefixIgnoreCase("utf-32")) {
+ Common::U32String UTF32Str((const uint32 *)string, length / 4);
+ Common::String UTF8Str = Common::convertUtf32ToUtf8(UTF32Str);
+ return Common::Encoding::convert(to, "utf-8", UTF8Str.c_str(), UTF8Str.size());
+ }
+ if (Common::String(to).hasPrefixIgnoreCase("utf-32")) {
+ char *UTF8Str = Common::Encoding::convert("utf-8", from, string, length);
+ Common::U32String UTF32Str = Common::convertUtf8ToUtf32(UTF8Str);
+ free(UTF8Str);
+ result = (char *) malloc((UTF32Str.size() + 1) * 4);
+ memcpy(result, UTF32Str.c_str(), (UTF32Str.size() + 1) * 4);
+ return result;
+ }
WCHAR *tmpStr;
if (Common::String(from).equalsIgnoreCase("utf-16")) {
@@ -408,7 +424,7 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
if (Common::String(to).equalsIgnoreCase("utf-16"))
return (char *) tmpStr;
else {
- char *result = Win32::unicodeToAnsi(tmpStr, Win32::getCodePageId(to));
+ result = Win32::unicodeToAnsi(tmpStr, Win32::getCodePageId(to));
free(tmpStr);
return result;
}
Commit: 3513972e92a4a4653c30de34eb5be9488bf2ff6a
https://github.com/scummvm/scummvm/commit/3513972e92a4a4653c30de34eb5be9488bf2ff6a
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
TEST: Add tests for Common::Encoding
Changed paths:
A test/common/encoding.h
diff --git a/test/common/encoding.h b/test/common/encoding.h
new file mode 100644
index 0000000..ff445a3
--- /dev/null
+++ b/test/common/encoding.h
@@ -0,0 +1,361 @@
+#include <cxxtest/TestSuite.h>
+
+#include "common/encoding.h"
+#include "backends/platform/sdl/posix/posix.h"
+
+#ifdef USE_ICONV
+class EncodingTestSuite : public CxxTest::TestSuite {
+ public:
+ void test_conversion_unicode_machine_endian() {
+ // |dolar| cent | euro |
+ unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16be[] = {0, 0x24, 0, 0xA2, 0x20, 0xAC, 0, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16le[] = {0x24, 0, 0xA2, 0, 0xAC, 0x20, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32be[] = {0, 0, 0, 0x24, 0, 0, 0, 0xA2, 0, 0, 0x20, 0xAC, 0, 0, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32le[] = {0x24, 0, 0, 0, 0xA2, 0, 0, 0, 0xAC, 0x20, 0, 0, 0, 0, 0, 0};
+#ifdef SCUMM_BIG_ENDIAN
+ unsigned char *utf16 = utf16be;
+ unsigned char *utf32 = utf32be;
+#else
+ unsigned char *utf16 = utf16le;
+ unsigned char *utf32 = utf32le;
+#endif
+
+ // UTF16 to UTF8
+ Common::Encoding converter("UTF-8", "UTF-16");
+
+ char *result = converter.convert((char *) utf16, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-16", (char *) utf16, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ // UTF32 to UTF8
+ converter.setFrom("UTF-32");
+
+ result = converter.convert((char *) utf32, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-32", (char *) utf32, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ // UTF32 to UTF16
+ converter.setTo("UTF-16");
+
+ result = converter.convert((char *) utf32, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16, 8);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16", "UTF-32", (char *) utf32, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16, 8);
+ free(result);
+
+ // UTF8 to UTF16
+ converter.setFrom("UTF-8");
+
+ result = converter.convert((char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16, 8);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16", "UTF-8", (char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16, 8);
+ free(result);
+
+ // UTF8 to UTF32
+ converter.setTo("UTF-32");
+
+ result = converter.convert((char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32, 16);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32", "UTF-8", (char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32, 16);
+ free(result);
+
+ // UTF16 to UTF32
+ converter.setFrom("UTF-16");
+
+ result = converter.convert((char *) utf16, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32, 16);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32", "UTF-16", (char *) utf16, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32, 16);
+ free(result);
+ }
+
+ void test_conversion_unicode_big_endian() {
+ // |dolar| cent | euro |
+ unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16be[] = {0, 0x24, 0, 0xA2, 0x20, 0xAC, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32be[] = {0, 0, 0, 0x24, 0, 0, 0, 0xA2, 0, 0, 0x20, 0xAC, 0, 0, 0, 0};
+
+ // UTF16 to UTF8
+ Common::Encoding converter("UTF-8", "UTF-16BE");
+
+ char *result = converter.convert((char *) utf16be, 6);
+
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-16BE", (char *) utf16be, 6);
+
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ // UTF32 to UTF8
+ converter.setFrom("UTF-32BE");
+
+ result = converter.convert((char *) utf32be, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-32BE", (char *) utf32be, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ // UTF32 to UTF16
+ converter.setTo("UTF-16BE");
+
+ result = converter.convert((char *) utf32be, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16be, 8);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16BE", "UTF-32BE", (char *) utf32be, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16be, 8);
+ free(result);
+
+ // UTF8 to UTF16
+ converter.setFrom("UTF-8");
+
+ result = converter.convert((char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16be, 8);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16BE", "UTF-8", (char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16be, 8);
+ free(result);
+
+ // UTF8 to UTF32
+ converter.setTo("UTF-32BE");
+
+ result = converter.convert((char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32be, 16);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32BE", "UTF-8", (char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32be, 16);
+ free(result);
+
+ // UTF16 to UTF32
+ converter.setFrom("UTF-16BE");
+
+ result = converter.convert((char *) utf16be, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32be, 16);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32BE", "UTF-16BE", (char *) utf16be, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32be, 16);
+ free(result);
+ }
+
+ void test_conversion_unicode_little_endian() {
+ // |dolar| cent | euro |
+ unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16le[] = {0x24, 0, 0xA2, 0, 0xAC, 0x20, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32le[] = {0x24, 0, 0, 0, 0xA2, 0, 0, 0, 0xAC, 0x20, 0, 0, 0, 0, 0, 0};
+
+ // UTF16 to UTF8
+ Common::Encoding converter("UTF-8", "UTF-16LE");
+
+ char *result = converter.convert((char *) utf16le, 6);
+
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-16LE", (char *) utf16le, 6);
+
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ // UTF32 to UTF8
+ converter.setFrom("UTF-32LE");
+
+ result = converter.convert((char *) utf32le, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-32LE", (char *) utf32le, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 7);
+ free(result);
+
+ // UTF32 to UTF16
+ converter.setTo("UTF-16LE");
+
+ result = converter.convert((char *) utf32le, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16le, 8);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16LE", "UTF-32LE", (char *) utf32le, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16le, 8);
+ free(result);
+
+ // UTF8 to UTF16
+ converter.setFrom("UTF-8");
+
+ result = converter.convert((char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16le, 8);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16LE", "UTF-8", (char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf16le, 8);
+ free(result);
+
+ // UTF8 to UTF32
+ converter.setTo("UTF-32LE");
+
+ result = converter.convert((char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32le, 16);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32LE", "UTF-8", (char *) utf8, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32le, 16);
+ free(result);
+
+ // UTF16 to UTF32
+ converter.setFrom("UTF-16LE");
+
+ result = converter.convert((char *) utf16le, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32le, 16);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32LE", "UTF-16LE", (char *) utf16le, 6);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf32le, 16);
+ free(result);
+ }
+
+ void test_cyrilic_transliteration() {
+ unsigned char utf8[] = {/* Z */0xD0, 0x97, /* d */ 0xD0, 0xB4, /* r */ 0xD1, 0x80, /* a */ 0xD0, 0xB0, /* v */ 0xD0, 0xB2, /* s */ 0xD1, 0x81, /* t */ 0xD1, 0x82, /* v */ 0xD0, 0xB2, /* u */ 0xD1, 0x83, /* j */ 0xD0, 0xB9, /* t */ 0xD1, 0x82, /* e */ 0xD0, 0xB5, 0};
+ unsigned char iso_8859_5[] = {0xB7, 0xD4, 0xE0, 0xD0, 0xD2, 0xE1, 0xE2, 0xD2, 0xE3, 0xD9, 0xE2, 0xD5, 0};
+ unsigned char ascii[] = "Zdravstvujte";
+
+ Common::Encoding converter("ASCII", "UTF-8");
+ char *result = converter.convert((char *)utf8, 24);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, ascii, 13);
+ free(result);
+
+ converter.setFrom("iso-8859-5");
+ result = converter.convert((char *)iso_8859_5, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, ascii, 13);
+ free(result);
+
+ converter.setTo("UTF-8");
+ result = converter.convert((char *)iso_8859_5, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8, 25);
+ free(result);
+
+ converter.setTo("iso-8859-5");
+ converter.setFrom("UTF-8");
+ result = converter.convert((char *)utf8, 24);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, iso_8859_5, 13);
+ free(result);
+
+ // this should stay the same
+ converter.setFrom("ASCII");
+ result = converter.convert((char *)ascii, 12);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, ascii, 13);
+ free(result);
+ }
+
+ void test_other_conversions() {
+ unsigned char cp850[] = {0x99, 0xE0, 0xEA, 0x41, 0x64, 0};
+ unsigned char utf8_1[] = {0xC3, 0x96, 0xC3, 0x93, 0xC3, 0x9B, 0x41, 0x64, 0};
+
+ unsigned char iso_8859_2[] = {0xA9, 0xE1, 0x6C, 0x65, 0xE8, 0x65, 0x6B, 0};
+ unsigned char utf8_2[] = {0xC5, 0xA0, 0xC3, 0xA1, 0x6C, 0x65, 0xC4, 0x8D, 0x65, 0x6B, 0};
+
+ char *result = Common::Encoding::convert("UTF-8", "CP850", (char *)cp850, 5);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8_1, 9);
+ free(result);
+
+ result = Common::Encoding::convert("CP850", "UTF-8", (char *)utf8_1, 8);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, cp850, 6);
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "iso-8859-2", (char *)iso_8859_2, 7);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, utf8_2, 11);
+ free(result);
+
+ result = Common::Encoding::convert("iso-8859-2", "UTF-8", (char *)utf8_2, 11);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, iso_8859_2, 8);
+ free(result);
+
+ result = Common::Encoding::convert("ASCII", "UTF-8", (char *)utf8_2, 11);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, "Salecek", 8);
+ free(result);
+
+ result = Common::Encoding::convert("ASCII", "iso-8859-2", (char *)iso_8859_2, 7);
+ TS_ASSERT(result != NULL);
+ TS_ASSERT_SAME_DATA(result, "Salecek", 8);
+ free(result);
+ }
+};
+#endif
Commit: a0564bc564559abf87165331a42d22b89c998dce
https://github.com/scummvm/scummvm/commit/a0564bc564559abf87165331a42d22b89c998dce
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Resolve endianity in Encoding
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index d121e13..66d2381 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -39,10 +39,22 @@ Encoding::~Encoding() {
deinitIconv(_iconvHandle);
}
+String Encoding::addUtfEndianness(const String &str) {
+ if (str.equalsIgnoreCase("utf-16") || str.equalsIgnoreCase("utf-32")) {
+#ifdef SCUMM_BIG_ENDIAN
+ return str + "BE";
+#else
+ return str + "LE";
+#endif
+ } else
+ return String(str);
+}
+
iconv_t Encoding::initIconv(const String &to, const String &from) {
#ifdef USE_ICONV
- String toTranslit = to + "//TRANSLIT";
- return iconv_open(toTranslit.c_str(), from.c_str());
+ String toTranslit = addUtfEndianness(to) + "//TRANSLIT";
+ return iconv_open(toTranslit.c_str(),
+ addUtfEndianness(from).c_str());
#else
return 0;
#endif // USE_ICONV
@@ -157,11 +169,12 @@ char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &
debug("Iconv is not available");
#endif // USE_ICONV
if (result == nullptr)
- result = g_system->convertEncoding(to.c_str(), from.c_str(), string, length);
+ result = g_system->convertEncoding(addUtfEndianness(to).c_str(),
+ addUtfEndianness(from).c_str(), string, length);
if (result == nullptr) {
debug("Could not convert from %s to %s using backend specific conversion", from.c_str(), to.c_str());
- result = convertTransManMapping(to.c_str(), from.c_str(), string, length);
+ result = convertTransManMapping(addUtfEndianness(to).c_str(), addUtfEndianness(from).c_str(), string, length);
}
return result;
@@ -262,14 +275,18 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
partialResult[i] = mapping[(unsigned char) string[i]] & 0x7FFFFFFF;
}
}
-#ifdef SCUMM_BIG_ENDIAN
- char *finalResult = convert(to, "UTF-32BE", (char *) partialResult, strlen(string) * 4);
-#else
- char *finalResult = convert(to, "UTF-32LE", (char *) partialResult, strlen(string) * 4);
-#endif // SCUMM_BIG_ENDIAN
+ char *finalResult = convert(to, "UTF-32", (char *) partialResult, strlen(string) * 4);
free(partialResult);
return finalResult;
- } else if (currentCharset.equalsIgnoreCase(to) && String(from).equalsIgnoreCase("utf-32")) {
+ } else if (currentCharset.equalsIgnoreCase(to) && String(from).hasPrefixIgnoreCase("utf-32")) {
+ // We accept only the machine endianness
+#ifdef SCUMM_BIG_ENDIAN
+ if (String(from).hasSuffixIgnoreCase("LE"))
+ return nullptr;
+#else
+ if (String(from).hasSuffixIgnoreCase("BE"))
+ return nullptr;
+#endif
// We can do reverse mapping
const uint32 *mapping = TransMan.getCharsetMapping();
const uint32 *src = (const uint32 *) string;
diff --git a/common/encoding.h b/common/encoding.h
index 014000d..309e0f8 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -221,6 +221,8 @@ class Encoding {
* @param iconvHandle Handle that should be deinited
*/
static void deinitIconv(iconv_t iconvHandle);
+
+ static String addUtfEndianness(const String &str);
};
}
Commit: 3e4b5c7d3b75c5ff59e68c29296b57ac2b7f607d
https://github.com/scummvm/scummvm/commit/3e4b5c7d3b75c5ff59e68c29296b57ac2b7f607d
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Resolve endianity in convertEncoding()
Changed paths:
backends/platform/sdl/win32/win32.cpp
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index e1f7964..d3e8c4d 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -392,6 +392,16 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
if (result != nullptr)
return result;
+ // We accept only the machine endianness
+#ifdef SCUMM_BIG_ENDIAN
+ if (Common::String(from).hasSuffixIgnoreCase("le") ||
+ Common::String(to).hasSuffixIgnoreCase("le"))
+ return nullptr;
+#else
+ if (Common::String(from).hasSuffixIgnoreCase("be") ||
+ Common::String(to).hasSuffixIgnoreCase("be"))
+ return nullptr;
+#endif
// UTF-32 is really important for us, because it is used for the
// transliteration in Common::Encoding and Win32 cannot convert it
if (Common::String(from).hasPrefixIgnoreCase("utf-32")) {
Commit: 5fcd14fbea7b77b9bd91b5324fd377478910f873
https://github.com/scummvm/scummvm/commit/5fcd14fbea7b77b9bd91b5324fd377478910f873
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
TEST: Remove tests for ascii transliteration
This can be handled differently by each conversion method.
The "ŠáleÄek" could be transliterated as "Salecek" or as
"S'alecek" or maybe even differently.
Changed paths:
test/common/encoding.h
diff --git a/test/common/encoding.h b/test/common/encoding.h
index ff445a3..9b848fa 100644
--- a/test/common/encoding.h
+++ b/test/common/encoding.h
@@ -346,16 +346,6 @@ class EncodingTestSuite : public CxxTest::TestSuite {
TS_ASSERT(result != NULL);
TS_ASSERT_SAME_DATA(result, iso_8859_2, 8);
free(result);
-
- result = Common::Encoding::convert("ASCII", "UTF-8", (char *)utf8_2, 11);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, "Salecek", 8);
- free(result);
-
- result = Common::Encoding::convert("ASCII", "iso-8859-2", (char *)iso_8859_2, 7);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, "Salecek", 8);
- free(result);
}
};
#endif
Commit: 41d3a70c5848267916d01c26b99b4b2991ae1d0e
https://github.com/scummvm/scummvm/commit/41d3a70c5848267916d01c26b99b4b2991ae1d0e
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
SDL: Fix convertEncoding for multibyte encodings.
Changed paths:
backends/platform/sdl/sdl.cpp
diff --git a/backends/platform/sdl/sdl.cpp b/backends/platform/sdl/sdl.cpp
index 105a232..0139bac 100644
--- a/backends/platform/sdl/sdl.cpp
+++ b/backends/platform/sdl/sdl.cpp
@@ -771,7 +771,12 @@ int SDL_SetColorKey_replacement(SDL_Surface *surface, Uint32 flag, Uint32 key) {
char *OSystem_SDL::convertEncoding(const char *to, const char *from, const char *string, size_t length) {
#if SDL_VERSION_ATLEAST(2, 0, 0)
debug("Trying SDL...");
- return SDL_iconv_string(to, from, string, length + 1);
+ int zeroBytes = 1;
+ if (Common::String(from).hasPrefixIgnoreCase("utf-16"))
+ zeroBytes = 2;
+ if (Common::String(from).hasPrefixIgnoreCase("utf-32"))
+ zeroBytes = 4;
+ return SDL_iconv_string(to, from, string, length + zeroBytes);
#else
debug("SDL isn't available");
return nullptr;
Commit: 4edf35e414aa43fcce8c5a25aff6b1f1ce92244f
https://github.com/scummvm/scummvm/commit/4edf35e414aa43fcce8c5a25aff6b1f1ce92244f
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Fix conversion of multibyte encodings.
Changed paths:
backends/platform/sdl/win32/win32.cpp
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index d3e8c4d..aa83a37 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -419,7 +419,7 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
}
WCHAR *tmpStr;
- if (Common::String(from).equalsIgnoreCase("utf-16")) {
+ if (Common::String(from).hasPrefixIgnoreCase("utf-16")) {
// Allocate space for string and 2 ending zeros
tmpStr = (WCHAR *) calloc(sizeof(char), length + 2);
if (!tmpStr) {
@@ -431,7 +431,7 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
tmpStr = Win32::ansiToUnicode(string, Win32::getCodePageId(from));
}
- if (Common::String(to).equalsIgnoreCase("utf-16"))
+ if (Common::String(to).hasPrefixIgnoreCase("utf-16"))
return (char *) tmpStr;
else {
result = Win32::unicodeToAnsi(tmpStr, Win32::getCodePageId(to));
Commit: 96270d4bf23aa77bcda5ec98b808dcd26931e0b1
https://github.com/scummvm/scummvm/commit/96270d4bf23aa77bcda5ec98b808dcd26931e0b1
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
JANITORIAL: Remove debuging code.
Changed paths:
backends/platform/sdl/sdl.cpp
common/encoding.cpp
diff --git a/backends/platform/sdl/sdl.cpp b/backends/platform/sdl/sdl.cpp
index 0139bac..800df58 100644
--- a/backends/platform/sdl/sdl.cpp
+++ b/backends/platform/sdl/sdl.cpp
@@ -770,7 +770,6 @@ int SDL_SetColorKey_replacement(SDL_Surface *surface, Uint32 flag, Uint32 key) {
char *OSystem_SDL::convertEncoding(const char *to, const char *from, const char *string, size_t length) {
#if SDL_VERSION_ATLEAST(2, 0, 0)
- debug("Trying SDL...");
int zeroBytes = 1;
if (Common::String(from).hasPrefixIgnoreCase("utf-16"))
zeroBytes = 2;
@@ -778,7 +777,6 @@ char *OSystem_SDL::convertEncoding(const char *to, const char *from, const char
zeroBytes = 4;
return SDL_iconv_string(to, from, string, length + zeroBytes);
#else
- debug("SDL isn't available");
return nullptr;
#endif // SDL_VERSION_ATLEAST(2, 0, 0)
}
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 66d2381..fa30853 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -21,7 +21,6 @@
*/
#include "common/encoding.h"
-#include "common/debug.h"
#include "common/textconsole.h"
#include "common/system.h"
#include "common/translation.h"
@@ -161,19 +160,12 @@ char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &
#ifdef USE_ICONV
if (iconvHandle != (iconv_t) -1)
result = convertIconv(iconvHandle, string, length);
- else
- debug("Could not convert from %s to %s using iconv", from.c_str(), to.c_str());
- if (result == nullptr)
- debug("Error while converting with iconv");
-#else
- debug("Iconv is not available");
#endif // USE_ICONV
if (result == nullptr)
result = g_system->convertEncoding(addUtfEndianness(to).c_str(),
addUtfEndianness(from).c_str(), string, length);
if (result == nullptr) {
- debug("Could not convert from %s to %s using backend specific conversion", from.c_str(), to.c_str());
result = convertTransManMapping(addUtfEndianness(to).c_str(), addUtfEndianness(from).c_str(), string, length);
}
@@ -182,7 +174,6 @@ char *Encoding::conversion(iconv_t iconvHandle, const String &to, const String &
char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t length) {
#ifdef USE_ICONV
- debug("Trying iconv...");
size_t inSize = length;
size_t outSize = inSize;
@@ -222,7 +213,6 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
memset(dst, 0, stringSize / 2);
} else {
error = true;
- debug("iconv failed");
break;
}
}
@@ -242,11 +232,8 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
free(buffer);
return nullptr;
}
- debug("Size: %d", stringSize);
-
return buffer;
#else
- debug("Iconv isn't available");
return nullptr;
#endif //USE_ICONV
}
@@ -256,7 +243,6 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
// TransMan encoding to UTF-32 and then it calls convert() again with that.
char *Encoding::convertTransManMapping(const char *to, const char *from, const char *string, size_t length) {
#ifdef USE_TRANSLATION
- debug("Trying TransMan...");
String currentCharset = TransMan.getCurrentCharset();
if (currentCharset.equalsIgnoreCase(from)) {
// We can use the transMan mapping directly
@@ -307,7 +293,6 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
} else
return nullptr;
#else
- debug("TransMan isn't available");
return nullptr;
#endif // USE_TRANSLATION
}
Commit: b20922942c6a567324eb40dfb2367fe1192e173e
https://github.com/scummvm/scummvm/commit/b20922942c6a567324eb40dfb2367fe1192e173e
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Add missing documentation.
Changed paths:
common/encoding.h
diff --git a/common/encoding.h b/common/encoding.h
index 309e0f8..70ba2bf 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -222,6 +222,16 @@ class Encoding {
*/
static void deinitIconv(iconv_t iconvHandle);
+ /**
+ * If the string is "utf-16" or "utf-32", this adds either BE for big endian
+ * or LE for little endian to the end of the string. Otherwise this does
+ * nothing.
+ *
+ * @param str String to append the endianness to
+ *
+ * @return The same string with appended endianness if it is needed, or
+ * the same string.
+ */
static String addUtfEndianness(const String &str);
};
Commit: 1346dcc3ef64e1a69a60800e7b8bef4d669846a6
https://github.com/scummvm/scummvm/commit/1346dcc3ef64e1a69a60800e7b8bef4d669846a6
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
TESTBED: Move encoding conversion tests to testbed
This way it is possible to test the backend conversions too.
Changed paths:
A engines/testbed/encoding.cpp
A engines/testbed/encoding.h
R test/common/encoding.h
engines/testbed/module.mk
engines/testbed/testbed.cpp
diff --git a/engines/testbed/encoding.cpp b/engines/testbed/encoding.cpp
new file mode 100644
index 0000000..8fe2c6b
--- /dev/null
+++ b/engines/testbed/encoding.cpp
@@ -0,0 +1,746 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "common/encoding.h"
+#include "common/system.h"
+#include "engines/testbed/encoding.h"
+
+namespace Testbed {
+
+TestExitStatus Encodingtests::testConversionUnicodeMachineEndian() {
+ Testsuite::displayMessage("Encoding conversion tests.\nTo test iconv conversion, compile with --enable-iconv.\nTo test backend conversion, compile with --disable-iconv.\nSome tests may fail without iconv, because some backends support only a handful of conversions.");
+ Common::String info = "Unicode conversion test. Multiple conversions between UTF-8, UTF-16 and UTF-32 in the default machinge endian will be performed.";
+
+ Common::Point pt(0, 100);
+ Testsuite::writeOnScreen("Testing unicode conversion in machine native endianness.", pt);
+
+ if (Testsuite::handleInteractiveInput(info, "OK", "Skip", kOptionRight)) {
+ Testsuite::logPrintf("Info! Skipping test : testConversionUnicodeMachineEndian\n");
+ return kTestSkipped;
+ }
+ // |dolar| cent | euro |
+ unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16be[] = {0, 0x24, 0, 0xA2, 0x20, 0xAC, 0, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16le[] = {0x24, 0, 0xA2, 0, 0xAC, 0x20, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32be[] = {0, 0, 0, 0x24, 0, 0, 0, 0xA2, 0, 0, 0x20, 0xAC, 0, 0, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32le[] = {0x24, 0, 0, 0, 0xA2, 0, 0, 0, 0xAC, 0x20, 0, 0, 0, 0, 0, 0};
+#ifdef SCUMM_BIG_ENDIAN
+ unsigned char *utf16 = utf16be;
+ unsigned char *utf32 = utf32be;
+#else
+ unsigned char *utf16 = utf16le;
+ unsigned char *utf32 = utf32le;
+#endif
+
+ // UTF16 to UTF8
+ Common::Encoding converter("UTF-8", "UTF-16");
+
+ char *result = converter.convert((char *) utf16, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-16", (char *) utf16, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF32 to UTF8
+ converter.setFrom("UTF-32");
+
+ result = converter.convert((char *) utf32, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-32", (char *) utf32, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF32 to UTF16
+ converter.setTo("UTF-16");
+
+ result = converter.convert((char *) utf32, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16, 8)) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16", "UTF-32", (char *) utf32, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16, 8)) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF8 to UTF16
+ converter.setFrom("UTF-8");
+
+ result = converter.convert((char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16, 8)) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16", "UTF-8", (char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16, 8)) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF8 to UTF32
+ converter.setTo("UTF-32");
+
+ result = converter.convert((char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32, 16)) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32", "UTF-8", (char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32, 16)) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF16 to UTF32
+ converter.setFrom("UTF-16");
+
+ result = converter.convert((char *) utf16, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32, 16)) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32", "UTF-16", (char *) utf16, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32, 16)) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ return kTestPassed;
+}
+
+TestExitStatus Encodingtests::testConversionUnicodeBigEndian() {
+ Common::String info = "Unicode conversion test. Multiple conversions between UTF-8, UTF-16 and UTF-32 in big endian will be performed.";
+
+ Common::Point pt(0, 100);
+ Testsuite::writeOnScreen("Testing unicode conversion in big endian.", pt);
+
+ if (Testsuite::handleInteractiveInput(info, "OK", "Skip", kOptionRight)) {
+ Testsuite::logPrintf("Info! Skipping test : testConversionUnicodeBigEndian\n");
+ return kTestSkipped;
+ }
+ // |dolar| cent | euro |
+ unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16be[] = {0, 0x24, 0, 0xA2, 0x20, 0xAC, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32be[] = {0, 0, 0, 0x24, 0, 0, 0, 0xA2, 0, 0, 0x20, 0xAC, 0, 0, 0, 0};
+
+ // UTF16 to UTF8
+ Common::Encoding converter("UTF-8", "UTF-16BE");
+
+ char *result = converter.convert((char *) utf16be, 6);
+
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-16BE", (char *) utf16be, 6);
+
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF32 to UTF8
+ converter.setFrom("UTF-32BE");
+
+ result = converter.convert((char *) utf32be, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-32BE", (char *) utf32be, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF32 to UTF16
+ converter.setTo("UTF-16BE");
+
+ result = converter.convert((char *) utf32be, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16be, 8)) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16BE", "UTF-32BE", (char *) utf32be, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16be, 8)) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF8 to UTF16
+ converter.setFrom("UTF-8");
+
+ result = converter.convert((char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16be, 8)) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16BE", "UTF-8", (char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16be, 8)) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF8 to UTF32
+ converter.setTo("UTF-32BE");
+
+ result = converter.convert((char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32be, 16)) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32BE", "UTF-8", (char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32be, 16)) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF16 to UTF32
+ converter.setFrom("UTF-16BE");
+
+ result = converter.convert((char *) utf16be, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32be, 16)) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32BE", "UTF-16BE", (char *) utf16be, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32be, 16)) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+ return kTestPassed;
+}
+
+TestExitStatus Encodingtests::testConversionUnicodeLittleEndian() {
+ Common::String info = "Unicode conversion test. Multiple conversions between UTF-8, UTF-16 and UTF-32 in little endian will be performed.";
+
+ Common::Point pt(0, 100);
+ Testsuite::writeOnScreen("Testing unicode conversion in little endianness.", pt);
+
+ if (Testsuite::handleInteractiveInput(info, "OK", "Skip", kOptionRight)) {
+ Testsuite::logPrintf("Info! Skipping test : testConversionUnicodeLittleEndian\n");
+ return kTestSkipped;
+ }
+ // |dolar| cent | euro |
+ unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
+ //| dolar | cent | euro |
+ unsigned char utf16le[] = {0x24, 0, 0xA2, 0, 0xAC, 0x20, 0, 0};
+ //| dolar | cent | euro
+ unsigned char utf32le[] = {0x24, 0, 0, 0, 0xA2, 0, 0, 0, 0xAC, 0x20, 0, 0, 0, 0, 0, 0};
+
+ // UTF16 to UTF8
+ Common::Encoding converter("UTF-8", "UTF-16LE");
+
+ char *result = converter.convert((char *) utf16le, 6);
+
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-16LE", (char *) utf16le, 6);
+
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-16 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF32 to UTF8
+ converter.setFrom("UTF-32LE");
+
+ result = converter.convert((char *) utf32le, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "UTF-32LE", (char *) utf32le, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 7)) {
+ Testsuite::logPrintf("UTF-32 to UTF-8 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF32 to UTF16
+ converter.setTo("UTF-16LE");
+
+ result = converter.convert((char *) utf32le, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16le, 8)) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16LE", "UTF-32LE", (char *) utf32le, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16le, 8)) {
+ Testsuite::logPrintf("UTF-32 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF8 to UTF16
+ converter.setFrom("UTF-8");
+
+ result = converter.convert((char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16le, 8)) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-16LE", "UTF-8", (char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf16le, 8)) {
+ Testsuite::logPrintf("UTF-8 to UTF-16 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF8 to UTF32
+ converter.setTo("UTF-32LE");
+
+ result = converter.convert((char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32le, 16)) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32LE", "UTF-8", (char *) utf8, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32le, 16)) {
+ Testsuite::logPrintf("UTF-8 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // UTF16 to UTF32
+ converter.setFrom("UTF-16LE");
+
+ result = converter.convert((char *) utf16le, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32le, 16)) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-32LE", "UTF-16LE", (char *) utf16le, 6);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf32le, 16)) {
+ Testsuite::logPrintf("UTF-16 to UTF-32 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+ return kTestPassed;
+}
+
+TestExitStatus Encodingtests::testCyrillicTransliteration() {
+ Common::String info = "Cyrillic transliteration test. Multiple conversions between unicode, iso-8859-5 and ASCII will be performed.";
+
+ Common::Point pt(0, 100);
+ Testsuite::writeOnScreen("Testing Cyrillic transliteration", pt);
+
+ if (Testsuite::handleInteractiveInput(info, "OK", "Skip", kOptionRight)) {
+ Testsuite::logPrintf("Info! Skipping test : testCyrillicTransliteration\n");
+ return kTestSkipped;
+ }
+ unsigned char utf8[] = {/* Z */0xD0, 0x97, /* d */ 0xD0, 0xB4, /* r */ 0xD1, 0x80, /* a */ 0xD0, 0xB0, /* v */ 0xD0, 0xB2, /* s */ 0xD1, 0x81, /* t */ 0xD1, 0x82, /* v */ 0xD0, 0xB2, /* u */ 0xD1, 0x83, /* j */ 0xD0, 0xB9, /* t */ 0xD1, 0x82, /* e */ 0xD0, 0xB5, 0};
+ unsigned char iso_8859_5[] = {0xB7, 0xD4, 0xE0, 0xD0, 0xD2, 0xE1, 0xE2, 0xD2, 0xE3, 0xD9, 0xE2, 0xD5, 0};
+ unsigned char ascii[] = "Zdravstvujte";
+
+ Common::Encoding converter("ASCII", "UTF-8");
+ char *result = converter.convert((char *)utf8, 24);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to ASCII conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, ascii, 13)) {
+ Testsuite::logPrintf("UTF-8 to ASCII conversion isn'differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ converter.setFrom("iso-8859-5");
+ result = converter.convert((char *)iso_8859_5, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("iso-8859-5 to ASCII conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, ascii, 13)) {
+ Testsuite::logPrintf("iso-8859-5 to ASCII conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ converter.setTo("UTF-8");
+ result = converter.convert((char *)iso_8859_5, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("iso-8859-5 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8, 25)) {
+ Testsuite::logPrintf("iso-8859-5 to UTF-differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ converter.setTo("iso-8859-5");
+ converter.setFrom("UTF-8");
+ result = converter.convert((char *)utf8, 24);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to iso-8859-5 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, iso_8859_5, 13)) {
+ Testsuite::logPrintf("UTF-8 to iso-8859-differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ // this should stay the same
+ converter.setFrom("ASCII");
+ result = converter.convert((char *)ascii, 12);
+ if (result == NULL) {
+ Testsuite::logPrintf("ASCII to iso-8859-5 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, ascii, 13)) {
+ Testsuite::logPrintf("ASCII to iso-8859-5 conversion differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+ return kTestPassed;
+}
+
+TestExitStatus Encodingtests::testOtherConversions() {
+ Common::String info = "Other conversions test. Some regular encoding conversions will be performed.";
+
+ Common::Point pt(0, 100);
+ Testsuite::writeOnScreen("Testing other encoding conversions", pt);
+
+ if (Testsuite::handleInteractiveInput(info, "OK", "Skip", kOptionRight)) {
+ Testsuite::logPrintf("Info! Skipping test : testOtherConversions\n");
+ return kTestSkipped;
+ }
+ unsigned char cp850[] = {0x99, 0xE0, 0xEA, 0x41, 0x64, 0};
+ unsigned char utf8_1[] = {0xC3, 0x96, 0xC3, 0x93, 0xC3, 0x9B, 0x41, 0x64, 0};
+
+ unsigned char iso_8859_2[] = {0xA9, 0xE1, 0x6C, 0x65, 0xE8, 0x65, 0x6B, 0};
+ unsigned char utf8_2[] = {0xC5, 0xA0, 0xC3, 0xA1, 0x6C, 0x65, 0xC4, 0x8D, 0x65, 0x6B, 0};
+
+ char *result = Common::Encoding::convert("UTF-8", "CP850", (char *)cp850, 5);
+ if (result == NULL) {
+ Testsuite::logPrintf("CP850 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8_1, 9)) {
+ Testsuite::logPrintf("CP850 to UTF-8 conversion isn'differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("CP850", "UTF-8", (char *)utf8_1, 8);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to CP850 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, cp850, 6)) {
+ Testsuite::logPrintf("UTF-8 to CP850 conversion isn'differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("UTF-8", "iso-8859-2", (char *)iso_8859_2, 7);
+ if (result == NULL) {
+ Testsuite::logPrintf("iso-8859-2 to UTF-8 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, utf8_2, 11)) {
+ Testsuite::logPrintf("iso-8859-2 to UTF-differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+
+ result = Common::Encoding::convert("iso-8859-2", "UTF-8", (char *)utf8_2, 11);
+ if (result == NULL) {
+ Testsuite::logPrintf("UTF-8 to iso-8859-2 conversion isn't available");
+ return kTestFailed;
+ }
+ if (memcmp(result, iso_8859_2, 8)) {
+ Testsuite::logPrintf("UTF-8 to iso-8859-differs from the expected result.");
+ free(result);
+ return kTestFailed;
+ }
+ free(result);
+ return kTestPassed;
+}
+
+EncodingTestSuite::EncodingTestSuite() {
+ addTest("testConversionUnicodeMachineEndian", &Encodingtests::testConversionUnicodeMachineEndian, true);
+ addTest("testConversionUnicodeBigEndian", &Encodingtests::testConversionUnicodeBigEndian, true);
+ addTest("testConversionUnicodeLittleEndian", &Encodingtests::testConversionUnicodeLittleEndian, true);
+ addTest("testCyrillicTransliteration", &Encodingtests::testCyrillicTransliteration, true);
+ addTest("testOtherConversions", &Encodingtests::testOtherConversions, true);
+}
+
+} // End of namespace Testbed
diff --git a/engines/testbed/encoding.h b/engines/testbed/encoding.h
new file mode 100644
index 0000000..ab01d7a
--- /dev/null
+++ b/engines/testbed/encoding.h
@@ -0,0 +1,71 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef TESTBED_ENCODING_H
+#define TESTBED_ENCODING_H
+
+#include "testbed/testsuite.h"
+#include "common/encoding.h"
+
+namespace Testbed {
+
+namespace Encodingtests {
+
+// Helper functions for Speech tests
+
+// will contain function declarations for Encoding tests
+// add more here
+
+TestExitStatus testConversionUnicodeMachineEndian();
+TestExitStatus testConversionUnicodeBigEndian();
+TestExitStatus testConversionUnicodeLittleEndian();
+TestExitStatus testCyrillicTransliteration();
+TestExitStatus testOtherConversions();
+
+} // End of namespace Speechtests
+
+class EncodingTestSuite : public Testsuite {
+public:
+ /**
+ * The constructor for the XXXTestSuite
+ * For every test to be executed one must:
+ * 1) Create a function that would invoke the test
+ * 2) Add that test to list by executing addTest()
+ *
+ * @see addTest()
+ */
+ EncodingTestSuite();
+ ~EncodingTestSuite() {}
+ const char *getName() const {
+ return "Encoding";
+ }
+
+ const char *getDescription() const {
+ return "Encoding conversion";
+ }
+
+};
+
+
+} // End of namespace Testbed
+
+#endif // TESTBED_ENCODING_H
diff --git a/engines/testbed/module.mk b/engines/testbed/module.mk
index 5838751..d8ff0e7 100644
--- a/engines/testbed/module.mk
+++ b/engines/testbed/module.mk
@@ -11,6 +11,7 @@ MODULE_OBJS := \
misc.o \
savegame.o \
sound.o \
+ encoding.o \
testbed.o \
testsuite.o
diff --git a/engines/testbed/testbed.cpp b/engines/testbed/testbed.cpp
index f071ab3..1b0c2dd 100644
--- a/engines/testbed/testbed.cpp
+++ b/engines/testbed/testbed.cpp
@@ -38,6 +38,7 @@
#include "testbed/misc.h"
#include "testbed/savegame.h"
#include "testbed/sound.h"
+#include "testbed/encoding.h"
#include "testbed/testbed.h"
#ifdef USE_CLOUD
#include "testbed/cloud.h"
@@ -150,6 +151,8 @@ TestbedEngine::TestbedEngine(OSystem *syst)
ts = new WebserverTestSuite();
_testsuiteList.push_back(ts);
#endif
+ ts = new EncodingTestSuite();
+ _testsuiteList.push_back(ts);
}
TestbedEngine::~TestbedEngine() {
diff --git a/test/common/encoding.h b/test/common/encoding.h
deleted file mode 100644
index 9b848fa..0000000
--- a/test/common/encoding.h
+++ /dev/null
@@ -1,351 +0,0 @@
-#include <cxxtest/TestSuite.h>
-
-#include "common/encoding.h"
-#include "backends/platform/sdl/posix/posix.h"
-
-#ifdef USE_ICONV
-class EncodingTestSuite : public CxxTest::TestSuite {
- public:
- void test_conversion_unicode_machine_endian() {
- // |dolar| cent | euro |
- unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
- //| dolar | cent | euro |
- unsigned char utf16be[] = {0, 0x24, 0, 0xA2, 0x20, 0xAC, 0, 0};
- //| dolar | cent | euro |
- unsigned char utf16le[] = {0x24, 0, 0xA2, 0, 0xAC, 0x20, 0, 0};
- //| dolar | cent | euro
- unsigned char utf32be[] = {0, 0, 0, 0x24, 0, 0, 0, 0xA2, 0, 0, 0x20, 0xAC, 0, 0, 0, 0};
- //| dolar | cent | euro
- unsigned char utf32le[] = {0x24, 0, 0, 0, 0xA2, 0, 0, 0, 0xAC, 0x20, 0, 0, 0, 0, 0, 0};
-#ifdef SCUMM_BIG_ENDIAN
- unsigned char *utf16 = utf16be;
- unsigned char *utf32 = utf32be;
-#else
- unsigned char *utf16 = utf16le;
- unsigned char *utf32 = utf32le;
-#endif
-
- // UTF16 to UTF8
- Common::Encoding converter("UTF-8", "UTF-16");
-
- char *result = converter.convert((char *) utf16, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- result = Common::Encoding::convert("UTF-8", "UTF-16", (char *) utf16, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- // UTF32 to UTF8
- converter.setFrom("UTF-32");
-
- result = converter.convert((char *) utf32, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- result = Common::Encoding::convert("UTF-8", "UTF-32", (char *) utf32, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- // UTF32 to UTF16
- converter.setTo("UTF-16");
-
- result = converter.convert((char *) utf32, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16, 8);
- free(result);
-
- result = Common::Encoding::convert("UTF-16", "UTF-32", (char *) utf32, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16, 8);
- free(result);
-
- // UTF8 to UTF16
- converter.setFrom("UTF-8");
-
- result = converter.convert((char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16, 8);
- free(result);
-
- result = Common::Encoding::convert("UTF-16", "UTF-8", (char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16, 8);
- free(result);
-
- // UTF8 to UTF32
- converter.setTo("UTF-32");
-
- result = converter.convert((char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32, 16);
- free(result);
-
- result = Common::Encoding::convert("UTF-32", "UTF-8", (char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32, 16);
- free(result);
-
- // UTF16 to UTF32
- converter.setFrom("UTF-16");
-
- result = converter.convert((char *) utf16, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32, 16);
- free(result);
-
- result = Common::Encoding::convert("UTF-32", "UTF-16", (char *) utf16, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32, 16);
- free(result);
- }
-
- void test_conversion_unicode_big_endian() {
- // |dolar| cent | euro |
- unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
- //| dolar | cent | euro |
- unsigned char utf16be[] = {0, 0x24, 0, 0xA2, 0x20, 0xAC, 0, 0};
- //| dolar | cent | euro
- unsigned char utf32be[] = {0, 0, 0, 0x24, 0, 0, 0, 0xA2, 0, 0, 0x20, 0xAC, 0, 0, 0, 0};
-
- // UTF16 to UTF8
- Common::Encoding converter("UTF-8", "UTF-16BE");
-
- char *result = converter.convert((char *) utf16be, 6);
-
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- result = Common::Encoding::convert("UTF-8", "UTF-16BE", (char *) utf16be, 6);
-
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- // UTF32 to UTF8
- converter.setFrom("UTF-32BE");
-
- result = converter.convert((char *) utf32be, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- result = Common::Encoding::convert("UTF-8", "UTF-32BE", (char *) utf32be, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- // UTF32 to UTF16
- converter.setTo("UTF-16BE");
-
- result = converter.convert((char *) utf32be, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16be, 8);
- free(result);
-
- result = Common::Encoding::convert("UTF-16BE", "UTF-32BE", (char *) utf32be, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16be, 8);
- free(result);
-
- // UTF8 to UTF16
- converter.setFrom("UTF-8");
-
- result = converter.convert((char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16be, 8);
- free(result);
-
- result = Common::Encoding::convert("UTF-16BE", "UTF-8", (char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16be, 8);
- free(result);
-
- // UTF8 to UTF32
- converter.setTo("UTF-32BE");
-
- result = converter.convert((char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32be, 16);
- free(result);
-
- result = Common::Encoding::convert("UTF-32BE", "UTF-8", (char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32be, 16);
- free(result);
-
- // UTF16 to UTF32
- converter.setFrom("UTF-16BE");
-
- result = converter.convert((char *) utf16be, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32be, 16);
- free(result);
-
- result = Common::Encoding::convert("UTF-32BE", "UTF-16BE", (char *) utf16be, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32be, 16);
- free(result);
- }
-
- void test_conversion_unicode_little_endian() {
- // |dolar| cent | euro |
- unsigned char utf8[] = {0x24, 0xC2, 0xA2, 0xE2, 0x82, 0xAC, 0};
- //| dolar | cent | euro |
- unsigned char utf16le[] = {0x24, 0, 0xA2, 0, 0xAC, 0x20, 0, 0};
- //| dolar | cent | euro
- unsigned char utf32le[] = {0x24, 0, 0, 0, 0xA2, 0, 0, 0, 0xAC, 0x20, 0, 0, 0, 0, 0, 0};
-
- // UTF16 to UTF8
- Common::Encoding converter("UTF-8", "UTF-16LE");
-
- char *result = converter.convert((char *) utf16le, 6);
-
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- result = Common::Encoding::convert("UTF-8", "UTF-16LE", (char *) utf16le, 6);
-
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- // UTF32 to UTF8
- converter.setFrom("UTF-32LE");
-
- result = converter.convert((char *) utf32le, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- result = Common::Encoding::convert("UTF-8", "UTF-32LE", (char *) utf32le, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 7);
- free(result);
-
- // UTF32 to UTF16
- converter.setTo("UTF-16LE");
-
- result = converter.convert((char *) utf32le, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16le, 8);
- free(result);
-
- result = Common::Encoding::convert("UTF-16LE", "UTF-32LE", (char *) utf32le, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16le, 8);
- free(result);
-
- // UTF8 to UTF16
- converter.setFrom("UTF-8");
-
- result = converter.convert((char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16le, 8);
- free(result);
-
- result = Common::Encoding::convert("UTF-16LE", "UTF-8", (char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf16le, 8);
- free(result);
-
- // UTF8 to UTF32
- converter.setTo("UTF-32LE");
-
- result = converter.convert((char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32le, 16);
- free(result);
-
- result = Common::Encoding::convert("UTF-32LE", "UTF-8", (char *) utf8, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32le, 16);
- free(result);
-
- // UTF16 to UTF32
- converter.setFrom("UTF-16LE");
-
- result = converter.convert((char *) utf16le, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32le, 16);
- free(result);
-
- result = Common::Encoding::convert("UTF-32LE", "UTF-16LE", (char *) utf16le, 6);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf32le, 16);
- free(result);
- }
-
- void test_cyrilic_transliteration() {
- unsigned char utf8[] = {/* Z */0xD0, 0x97, /* d */ 0xD0, 0xB4, /* r */ 0xD1, 0x80, /* a */ 0xD0, 0xB0, /* v */ 0xD0, 0xB2, /* s */ 0xD1, 0x81, /* t */ 0xD1, 0x82, /* v */ 0xD0, 0xB2, /* u */ 0xD1, 0x83, /* j */ 0xD0, 0xB9, /* t */ 0xD1, 0x82, /* e */ 0xD0, 0xB5, 0};
- unsigned char iso_8859_5[] = {0xB7, 0xD4, 0xE0, 0xD0, 0xD2, 0xE1, 0xE2, 0xD2, 0xE3, 0xD9, 0xE2, 0xD5, 0};
- unsigned char ascii[] = "Zdravstvujte";
-
- Common::Encoding converter("ASCII", "UTF-8");
- char *result = converter.convert((char *)utf8, 24);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, ascii, 13);
- free(result);
-
- converter.setFrom("iso-8859-5");
- result = converter.convert((char *)iso_8859_5, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, ascii, 13);
- free(result);
-
- converter.setTo("UTF-8");
- result = converter.convert((char *)iso_8859_5, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8, 25);
- free(result);
-
- converter.setTo("iso-8859-5");
- converter.setFrom("UTF-8");
- result = converter.convert((char *)utf8, 24);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, iso_8859_5, 13);
- free(result);
-
- // this should stay the same
- converter.setFrom("ASCII");
- result = converter.convert((char *)ascii, 12);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, ascii, 13);
- free(result);
- }
-
- void test_other_conversions() {
- unsigned char cp850[] = {0x99, 0xE0, 0xEA, 0x41, 0x64, 0};
- unsigned char utf8_1[] = {0xC3, 0x96, 0xC3, 0x93, 0xC3, 0x9B, 0x41, 0x64, 0};
-
- unsigned char iso_8859_2[] = {0xA9, 0xE1, 0x6C, 0x65, 0xE8, 0x65, 0x6B, 0};
- unsigned char utf8_2[] = {0xC5, 0xA0, 0xC3, 0xA1, 0x6C, 0x65, 0xC4, 0x8D, 0x65, 0x6B, 0};
-
- char *result = Common::Encoding::convert("UTF-8", "CP850", (char *)cp850, 5);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8_1, 9);
- free(result);
-
- result = Common::Encoding::convert("CP850", "UTF-8", (char *)utf8_1, 8);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, cp850, 6);
- free(result);
-
- result = Common::Encoding::convert("UTF-8", "iso-8859-2", (char *)iso_8859_2, 7);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, utf8_2, 11);
- free(result);
-
- result = Common::Encoding::convert("iso-8859-2", "UTF-8", (char *)utf8_2, 11);
- TS_ASSERT(result != NULL);
- TS_ASSERT_SAME_DATA(result, iso_8859_2, 8);
- free(result);
- }
-};
-#endif
Commit: 05df774905d36203ef357963b74f3bf083b40aa3
https://github.com/scummvm/scummvm/commit/05df774905d36203ef357963b74f3bf083b40aa3
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Add OSystem::convertEncoding documentation
Changed paths:
common/system.h
diff --git a/common/system.h b/common/system.h
index 92fdfc5..77bdcd0 100644
--- a/common/system.h
+++ b/common/system.h
@@ -1493,7 +1493,21 @@ public:
//@}
- protected:
+protected:
+
+ /**
+ * This allows derived classes to implement encoding conversion using platform
+ * specific API.
+ * This method shouldn't be called directly. Use Common::Encoding instead.
+ *
+ * @param to Encoding to convert the string to
+ * @param from Encoding to convert the string from
+ * @param string The string that should be converted
+ * @param length Size of the string in bytes
+ *
+ * @return Converted string, which must be freed, or nullptr if the conversion
+ * isn't possible.
+ */
virtual char *convertEncoding(const char *to, const char *from, const char *string, size_t length) { return nullptr; }
};
Commit: f2715d77ed379317c9068c1244c9517dd16cc99b
https://github.com/scummvm/scummvm/commit/f2715d77ed379317c9068c1244c9517dd16cc99b
Author: vyzigold (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
TESTBED: Comment correction
Co-Authored-By: Cameron Cawley <ccawley2011 at gmail.com>
Changed paths:
engines/testbed/encoding.h
diff --git a/engines/testbed/encoding.h b/engines/testbed/encoding.h
index ab01d7a..2c0f2e1 100644
--- a/engines/testbed/encoding.h
+++ b/engines/testbed/encoding.h
@@ -30,7 +30,7 @@ namespace Testbed {
namespace Encodingtests {
-// Helper functions for Speech tests
+// Helper functions for Encoding tests
// will contain function declarations for Encoding tests
// add more here
@@ -41,7 +41,7 @@ TestExitStatus testConversionUnicodeLittleEndian();
TestExitStatus testCyrillicTransliteration();
TestExitStatus testOtherConversions();
-} // End of namespace Speechtests
+} // End of namespace Encodingtests
class EncodingTestSuite : public Testsuite {
public:
Commit: 4de634ee7646410eb8f330f6974f9ee8eb594b1f
https://github.com/scummvm/scummvm/commit/4de634ee7646410eb8f330f6974f9ee8eb594b1f
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
CONFIGURE: Move check for iconv down
Changed paths:
configure
diff --git a/configure b/configure
index 419c93d..b458004 100755
--- a/configure
+++ b/configure
@@ -4218,25 +4218,6 @@ define_in_config_if_yes "$_vorbis" 'USE_VORBIS'
echo "$_vorbis"
#
-# Check for iconv
-#
-echocheck "Iconv"
-if test "$_iconv" = auto ; then
- _iconv=no
- cat > $TMPC << EOF
-#include <iconv.h>
-int main(void) { iconv_t conv = iconv_open("UTF-8//IGNORE", "CP850"); return 0; }
-EOF
- cc_check $ICONV_CFLAGS $ICONV_LIBS -liconv && _iconv=yes
-fi
-if test "$_iconv" = yes ; then
- append_var LIBS "$ICONV_LIBS -liconv"
- append_var INCLUDES "$ICONV_CFLAGS"
-fi
-define_in_config_if_yes "$_iconv" 'USE_ICONV'
-echo "$_iconv"
-
-#
# Check for Tremor
#
echocheck "Tremor"
@@ -5244,6 +5225,25 @@ if test "$_pandocext" = "default"; then
_pandocext=".$_pandocformat"
fi
fi
+#
+# Check for iconv
+#
+echocheck "Iconv"
+if test "$_iconv" = auto ; then
+ _iconv=no
+ cat > $TMPC << EOF
+#include <iconv.h>
+int main(void) { iconv_t conv = iconv_open("UTF-8//IGNORE", "CP850"); return 0; }
+EOF
+ cc_check $ICONV_CFLAGS $ICONV_LIBS -liconv && _iconv=yes
+fi
+if test "$_iconv" = yes ; then
+ append_var LIBS "$ICONV_LIBS -liconv"
+ append_var INCLUDES "$ICONV_CFLAGS"
+fi
+define_in_config_if_yes "$_iconv" 'USE_ICONV'
+echo "$_iconv"
+
#
# Enable vkeybd / keymapper / event recorder
Commit: 6dba0bbfd421121056fba0d348794ead2928c662
https://github.com/scummvm/scummvm/commit/6dba0bbfd421121056fba0d348794ead2928c662
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
SDL: Remove check for SDL2 in convertEncoding()
SDL_iconv_string() is available even with SDL1
Changed paths:
backends/platform/sdl/sdl.cpp
diff --git a/backends/platform/sdl/sdl.cpp b/backends/platform/sdl/sdl.cpp
index 800df58..b9cccbf 100644
--- a/backends/platform/sdl/sdl.cpp
+++ b/backends/platform/sdl/sdl.cpp
@@ -769,15 +769,11 @@ int SDL_SetColorKey_replacement(SDL_Surface *surface, Uint32 flag, Uint32 key) {
#endif
char *OSystem_SDL::convertEncoding(const char *to, const char *from, const char *string, size_t length) {
-#if SDL_VERSION_ATLEAST(2, 0, 0)
int zeroBytes = 1;
if (Common::String(from).hasPrefixIgnoreCase("utf-16"))
zeroBytes = 2;
if (Common::String(from).hasPrefixIgnoreCase("utf-32"))
zeroBytes = 4;
return SDL_iconv_string(to, from, string, length + zeroBytes);
-#else
- return nullptr;
-#endif // SDL_VERSION_ATLEAST(2, 0, 0)
}
Commit: f8ac40af7c2b687d8dd3ce2f5909ab3f78f4a4bd
https://github.com/scummvm/scummvm/commit/f8ac40af7c2b687d8dd3ce2f5909ab3f78f4a4bd
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Encoding refactoring
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index fa30853..089f039 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -24,7 +24,7 @@
#include "common/textconsole.h"
#include "common/system.h"
#include "common/translation.h"
-#include <cerrno>
+#include <errno.h>
namespace Common {
@@ -246,22 +246,22 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
String currentCharset = TransMan.getCurrentCharset();
if (currentCharset.equalsIgnoreCase(from)) {
// We can use the transMan mapping directly
- uint32 *partialResult = (uint32 *) calloc(sizeof(uint32), (strlen(string) + 1));
+ uint32 *partialResult = (uint32 *) calloc(sizeof(uint32), (length + 1));
if (!partialResult) {
warning("Couldn't allocate memory for encoding conversion");
return nullptr;
}
const uint32 *mapping = TransMan.getCharsetMapping();
if (mapping == 0) {
- for(unsigned i = 0; i < strlen(string); i++) {
+ for(unsigned i = 0; i < length; i++) {
partialResult[i] = string[i];
}
} else {
- for(unsigned i = 0; i < strlen(string); i++) {
+ for(unsigned i = 0; i < length; i++) {
partialResult[i] = mapping[(unsigned char) string[i]] & 0x7FFFFFFF;
}
}
- char *finalResult = convert(to, "UTF-32", (char *) partialResult, strlen(string) * 4);
+ char *finalResult = convert(to, "UTF-32", (char *) partialResult, length * 4);
free(partialResult);
return finalResult;
} else if (currentCharset.equalsIgnoreCase(to) && String(from).hasPrefixIgnoreCase("utf-32")) {
diff --git a/common/encoding.h b/common/encoding.h
index 70ba2bf..7eb4251 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -22,6 +22,7 @@
#ifndef COMMON_ENCODING_H
#define COMMON_ENCODING_H
+
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif // HAVE_CONFIG_H
Commit: 04b28b208de7973347699c8a8e2cd355f7dd77f6
https://github.com/scummvm/scummvm/commit/04b28b208de7973347699c8a8e2cd355f7dd77f6
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Fix infinite loop when converting from utf32
Because of how cyrilic transliteration and UTF-32 is handled on
Windows, it was unfortunately possible to get into an infinite
loop of conversions. The string would get converted to UTF-32
when transliterating, but because windows backend conversion
cannot convert from UTF-32, it would use Common::Ustr to convert
it to UTF-8, which would again get converted to UTF-32 when
transliterating and so on.
Changed paths:
backends/platform/sdl/win32/win32.cpp
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index aa83a37..8562892 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -406,8 +406,8 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
// transliteration in Common::Encoding and Win32 cannot convert it
if (Common::String(from).hasPrefixIgnoreCase("utf-32")) {
Common::U32String UTF32Str((const uint32 *)string, length / 4);
- Common::String UTF8Str = Common::convertUtf32ToUtf8(UTF32Str);
- return Common::Encoding::convert(to, "utf-8", UTF8Str.c_str(), UTF8Str.size());
+ string = Common::convertUtf32ToUtf8(UTF32Str).c_str();
+ from = "utf-8";
}
if (Common::String(to).hasPrefixIgnoreCase("utf-32")) {
char *UTF8Str = Common::Encoding::convert("utf-8", from, string, length);
Commit: e0f2a3460a1ab551b918c72789c2dac88b614a7c
https://github.com/scummvm/scummvm/commit/e0f2a3460a1ab551b918c72789c2dac88b614a7c
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Move getCodePageId to codepage.h
Changed paths:
R backends/platform/sdl/win32/codepage.cpp
backends/platform/sdl/module.mk
backends/platform/sdl/win32/codepage.h
diff --git a/backends/platform/sdl/module.mk b/backends/platform/sdl/module.mk
index 5d34177..62ef94f 100644
--- a/backends/platform/sdl/module.mk
+++ b/backends/platform/sdl/module.mk
@@ -23,8 +23,7 @@ MODULE_OBJS += \
win32/win32-main.o \
win32/win32-window.o \
win32/win32_wrapper.o \
- win32/win32.o \
- win32/codepage.o
+ win32/win32.o
endif
ifdef AMIGAOS
diff --git a/backends/platform/sdl/win32/codepage.cpp b/backends/platform/sdl/win32/codepage.cpp
deleted file mode 100644
index 2bb3501..0000000
--- a/backends/platform/sdl/win32/codepage.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/* ScummVM - Graphic Adventure Engine
- *
- * ScummVM is the legal property of its developers, whose names
- * are too numerous to list here. Please refer to the COPYRIGHT
- * file distributed with this source distribution.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- */
-
-#ifdef WIN32
-#include "backends/platform/sdl/win32/codepage.h"
-namespace Win32 {
-
-int getCodePageId(Common::String codePageName) {
- const CodePageDescription *cp = g_cpDescriptions;
- for (; cp->name; cp++) {
- if (codePageName.equalsIgnoreCase(cp->name))
- return cp->id;
- }
- return -1;
-}
-
-}
-
-#endif
-
diff --git a/backends/platform/sdl/win32/codepage.h b/backends/platform/sdl/win32/codepage.h
index db1f7dd..b645400 100644
--- a/backends/platform/sdl/win32/codepage.h
+++ b/backends/platform/sdl/win32/codepage.h
@@ -192,7 +192,14 @@ const CodePageDescription g_cpDescriptions[] = {
{nullptr, 0} //End
};
-int getCodePageId(Common::String name);
+int getCodePageId(Common::String codePageName) {
+ const CodePageDescription *cp = g_cpDescriptions;
+ for (; cp->name; cp++) {
+ if (codePageName.equalsIgnoreCase(cp->name))
+ return cp->id;
+ }
+ return -1;
+}
}
#endif // WIN32_CODEPAGE_H
Commit: 0c74a7f27eaad70a31a6dae814954973c5a1da85
https://github.com/scummvm/scummvm/commit/0c74a7f27eaad70a31a6dae814954973c5a1da85
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Fix typos
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 089f039..719887d 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -107,8 +107,8 @@ char *Encoding::convertWithTransliteration(iconv_t iconvHandle, const String &to
size_t newLength = length;
if (from.equalsIgnoreCase("iso-8859-5") &&
!to.hasPrefixIgnoreCase("utf")) {
- // There might be some cyrilic characters, which need to be transliterated.
- newString = transliterateCyrilic(string);
+ // There might be some cyrillic characters, which need to be transliterated.
+ newString = transliterateCyrillic(string);
if (!newString)
return nullptr;
newFrom = "ASCII";
@@ -116,7 +116,7 @@ char *Encoding::convertWithTransliteration(iconv_t iconvHandle, const String &to
if (from.hasPrefixIgnoreCase("utf") &&
!to.hasPrefixIgnoreCase("utf") &&
!to.equalsIgnoreCase("iso-8859-5")) {
- // There might be some cyrilic characters, which need to be transliterated.
+ // There might be some cyrillic characters, which need to be transliterated.
char *tmpString;
if (from.hasPrefixIgnoreCase("utf-32"))
tmpString = nullptr;
@@ -198,7 +198,7 @@ char *Encoding::convertIconv(iconv_t iconvHandle, const char *string, size_t len
while (inSize > 0) {
if (iconv(iconvHandle, &src, &inSize, &dst, &outSize) == ((size_t)-1)) {
- // from SDLs implementation of SDL_iconv_string (slightly altered)
+ // from SDL's implementation of SDL_iconv_string (slightly altered)
if (errno == E2BIG) {
char *oldString = buffer;
stringSize *= 2;
@@ -297,7 +297,7 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
#endif // USE_TRANSLATION
}
-static char g_cyrilicTransliterationTable[] = {
+static char g_cyrillicTransliterationTable[] = {
' ', 'E', 'D', 'G', 'E', 'Z', 'I', 'I', 'J', 'L', 'N', 'C', 'K', '-', 'U', 'D',
'A', 'B', 'V', 'G', 'D', 'E', 'Z', 'Z', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'R', 'S', 'T', 'U', 'F', 'H', 'C', 'C', 'S', 'S', '\"', 'Y', '\'', 'E', 'U', 'A',
@@ -306,7 +306,7 @@ static char g_cyrilicTransliterationTable[] = {
'N', 'e', 'd', 'g', 'e', 'z', 'i', 'i', 'j', 'l', 'n', 'c', 'k', '?', 'u', 'd',
};
-char *Encoding::transliterateCyrilic(const char *string) {
+char *Encoding::transliterateCyrillic(const char *string) {
char *result = (char *) malloc(strlen(string) + 1);
if (!result) {
warning("Could not allocate memory for encoding conversion");
@@ -314,7 +314,7 @@ char *Encoding::transliterateCyrilic(const char *string) {
}
for(unsigned i = 0; i <= strlen(string); i++) {
if ((unsigned char) string[i] >= 160)
- result[i] = g_cyrilicTransliterationTable[(unsigned char) string[i] - 160];
+ result[i] = g_cyrillicTransliterationTable[(unsigned char) string[i] - 160];
else
result[i] = string[i];
}
@@ -329,7 +329,7 @@ uint32 *Encoding::transliterateUTF32(const uint32 *string, size_t length) {
}
for(unsigned i = 0; i <= length / 4; i++) {
if (string[i] >= 0x410 && string[i] <= 0x450)
- result[i] = g_cyrilicTransliterationTable[string[i] - 160 - 864];
+ result[i] = g_cyrillicTransliterationTable[string[i] - 160 - 864];
else
result[i] = string[i];
}
diff --git a/common/encoding.h b/common/encoding.h
index 7eb4251..6018eaf 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -181,7 +181,7 @@ class Encoding {
static char *convertTransManMapping(const char *to, const char *from, const char *string, size_t length);
/**
- * Transliterates cyrilic string in iso-8859-5 encoding and returns
+ * Transliterates cyrillic string in iso-8859-5 encoding and returns
* it's ASCII (latin) form.
*
* The result has to be freed after use.
@@ -190,10 +190,10 @@ class Encoding {
*
* @return Transliterated string in ASCII (must be freed) or nullptr on fail.
*/
- static char *transliterateCyrilic(const char *string);
+ static char *transliterateCyrillic(const char *string);
/**
- * Transliterates cyrilic in UTF-32 string.
+ * Transliterates cyrillic in UTF-32 string.
*
* The result has to be freed after use.
*
Commit: fcbf59f5ba37c1cbfdfa0ccacfc6b602033b9d86
https://github.com/scummvm/scummvm/commit/fcbf59f5ba37c1cbfdfa0ccacfc6b602033b9d86
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
COMMON: Convert endianity when needed in Encoding.
Changed paths:
common/encoding.cpp
common/encoding.h
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 719887d..aeae520 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -24,6 +24,7 @@
#include "common/textconsole.h"
#include "common/system.h"
#include "common/translation.h"
+#include "common/endian.h"
#include <errno.h>
namespace Common {
@@ -38,6 +39,29 @@ Encoding::~Encoding() {
deinitIconv(_iconvHandle);
}
+char *Encoding::switchEndian(const char *string, int length, int bitCount) {
+ assert(bitCount % 8 == 0);
+ assert(length % (bitCount / 8) == 0);
+ char *newString = (char *) malloc(length);
+ if (!newString) {
+ warning("Could not allocate memory for string conversion");
+ return nullptr;
+ }
+ if (bitCount == 16) {
+ int characterCount = length / 2;
+ for(int i = 0; i < characterCount ; i++)
+ ((uint16 *) newString)[i] = SWAP_BYTES_16(((const uint16 *) string)[i]);
+ return newString;
+ } else if (bitCount == 32) {
+ int characterCount = length / 4;
+ for(int i = 0; i < characterCount ; i++)
+ ((uint32 *) newString)[i] = SWAP_BYTES_32(((const uint32 *) string)[i]);
+ return newString;
+ } else {
+ return nullptr;
+ }
+}
+
String Encoding::addUtfEndianness(const String &str) {
if (str.equalsIgnoreCase("utf-16") || str.equalsIgnoreCase("utf-32")) {
#ifdef SCUMM_BIG_ENDIAN
@@ -102,6 +126,21 @@ char *Encoding::convertWithTransliteration(iconv_t iconvHandle, const String &to
memcpy(result, string, length);
return result;
}
+ if ((addUtfEndianness(to).equalsIgnoreCase("utf-16be") &&
+ addUtfEndianness(from).equalsIgnoreCase("utf-16le")) ||
+ (addUtfEndianness(to).equalsIgnoreCase("utf-16le") &&
+ addUtfEndianness(from).equalsIgnoreCase("utf-16be")) ||
+ (addUtfEndianness(to).equalsIgnoreCase("utf-32be") &&
+ addUtfEndianness(from).equalsIgnoreCase("utf-32le")) ||
+ (addUtfEndianness(to).equalsIgnoreCase("utf-32le") &&
+ addUtfEndianness(from).equalsIgnoreCase("utf-32be")))
+ {
+ // The encoding is the same, we just need to switch the endianness
+ if (to.hasPrefixIgnoreCase("utf-16"))
+ return switchEndian(string, length, 16);
+ else
+ return switchEndian(string, length, 32);
+ }
char *newString = nullptr;
String newFrom = from;
size_t newLength = length;
@@ -265,20 +304,34 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
free(partialResult);
return finalResult;
} else if (currentCharset.equalsIgnoreCase(to) && String(from).hasPrefixIgnoreCase("utf-32")) {
- // We accept only the machine endianness
+ bool swapEndian = false;
+ char *newString = nullptr;
+
#ifdef SCUMM_BIG_ENDIAN
if (String(from).hasSuffixIgnoreCase("LE"))
- return nullptr;
+ swapEndian = true;
#else
if (String(from).hasSuffixIgnoreCase("BE"))
- return nullptr;
+ swapEndian = true;
#endif
+ if (swapEndian) {
+ if (String(from).hasPrefixIgnoreCase("utf-16"))
+ newString = switchEndian(string, length, 16);
+ if (String(from).hasPrefixIgnoreCase("utf-32"))
+ newString = switchEndian(string, length, 32);
+ if (newString != nullptr)
+ string = newString;
+ else
+ return nullptr;
+ }
// We can do reverse mapping
const uint32 *mapping = TransMan.getCharsetMapping();
const uint32 *src = (const uint32 *) string;
char *result = (char *) calloc(sizeof(char), (length + 4));
if (!result) {
warning("Couldn't allocate memory for encoding conversion");
+ if (newString != nullptr)
+ free(newString);
return nullptr;
}
for (unsigned i = 0; i < length; i++) {
@@ -289,6 +342,8 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
}
}
}
+ if (newString != nullptr)
+ free(newString);
return result;
} else
return nullptr;
diff --git a/common/encoding.h b/common/encoding.h
index 6018eaf..c8f864d 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -35,6 +35,11 @@ typedef void* iconv_t;
#include "common/scummsys.h"
#include "common/str.h"
+#include "common/system.h"
+
+#ifdef WIN32
+#include "backends/platform/sdl/win32/win32.h"
+#endif
namespace Common {
@@ -44,6 +49,9 @@ namespace Common {
* ScummVM is compiled with or without iconv.
*/
class Encoding {
+#ifdef WIN32
+ friend char *OSystem_Win32::convertEncoding(const char*, const char *, const char *, size_t);
+#endif
public:
/**
* Constructs everything needed for the conversion between 2 encodings
@@ -234,6 +242,17 @@ class Encoding {
* the same string.
*/
static String addUtfEndianness(const String &str);
+
+ /**
+ * Switches the endianity of a string.
+ *
+ * @param string Array containing the characters of a string.
+ * @param length Length of the string in bytes
+ * @param bitCount Number of bits used for each character.
+ *
+ * @return Array of characters with the opposite endianity
+ */
+ static char *switchEndian(const char *string, int length, int bitCount);
};
}
Commit: 6e72cd2c08d90fa193f494f38b878b9b6b3cb7dd
https://github.com/scummvm/scummvm/commit/6e72cd2c08d90fa193f494f38b878b9b6b3cb7dd
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Handle endianity in convertEncoding
Changed paths:
backends/platform/sdl/win32/win32.cpp
backends/platform/sdl/win32/win32.h
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index 8562892..ff3f92b 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -388,19 +388,43 @@ AudioCDManager *OSystem_Win32::createAudioCDManager() {
}
char *OSystem_Win32::convertEncoding(const char* to, const char *from, const char *string, size_t length) {
+ char *newString = nullptr;
char *result = OSystem_SDL::convertEncoding(to, from, string, length);
if (result != nullptr)
return result;
// We accept only the machine endianness
+ bool swapFromEndian = false;
#ifdef SCUMM_BIG_ENDIAN
- if (Common::String(from).hasSuffixIgnoreCase("le") ||
- Common::String(to).hasSuffixIgnoreCase("le"))
- return nullptr;
+ if (Common::String(from).hasSuffixIgnoreCase("le"))
+ swapFromEndian = true;
#else
- if (Common::String(from).hasSuffixIgnoreCase("be") ||
- Common::String(to).hasSuffixIgnoreCase("be"))
- return nullptr;
+ if (Common::String(from).hasSuffixIgnoreCase("be"))
+ swapFromEndian = true;
+#endif
+ if (swapFromEndian) {
+ if (Common::String(from).hasPrefixIgnoreCase("utf-16")) {
+ newString = Common::Encoding::switchEndian(string, length, 16);
+ from = "utf-16";
+ }
+ else if (Common::String(from).hasPrefixIgnoreCase("utf-32")) {
+ newString = Common::Encoding::switchEndian(string, length, 32);
+ from = "utf-32";
+ }
+ else
+ return nullptr;
+ if (newString != nullptr)
+ string = newString;
+ else
+ return nullptr;
+ }
+ bool swapToEndian = false;
+#ifdef SCUMM_BIG_ENDIAN
+ if (Common::String(to).hasSuffixIgnoreCase("le"))
+ swapToEndian = true;
+#else
+ if (Common::String(to).hasSuffixIgnoreCase("be"))
+ swapToEndian = true;
#endif
// UTF-32 is really important for us, because it is used for the
// transliteration in Common::Encoding and Win32 cannot convert it
@@ -413,16 +437,30 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
char *UTF8Str = Common::Encoding::convert("utf-8", from, string, length);
Common::U32String UTF32Str = Common::convertUtf8ToUtf32(UTF8Str);
free(UTF8Str);
- result = (char *) malloc((UTF32Str.size() + 1) * 4);
- memcpy(result, UTF32Str.c_str(), (UTF32Str.size() + 1) * 4);
+ if (swapToEndian) {
+ result = Common::Encoding::switchEndian((const char *) UTF32Str.c_str(),
+ (UTF32Str.size() + 1) * 4,
+ 32);
+ } else {
+ result = (char *) malloc((UTF32Str.size() + 1) * 4);
+ memcpy(result, UTF32Str.c_str(), (UTF32Str.size() + 1) * 4);
+ }
+ if (newString != nullptr)
+ free(newString);
return result;
}
+ // Add ending zeros
+ char *wString = (char *) calloc(sizeof(char), length + 2);
+ memcpy(wString, string, length);
+
WCHAR *tmpStr;
if (Common::String(from).hasPrefixIgnoreCase("utf-16")) {
// Allocate space for string and 2 ending zeros
tmpStr = (WCHAR *) calloc(sizeof(char), length + 2);
if (!tmpStr) {
+ if (newString != nullptr)
+ free(newString);
warning("Could not allocate memory for string conversion");
return nullptr;
}
@@ -431,9 +469,19 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
tmpStr = Win32::ansiToUnicode(string, Win32::getCodePageId(from));
}
- if (Common::String(to).hasPrefixIgnoreCase("utf-16"))
+ free(wString);
+
+ if (newString != nullptr)
+ free(newString);
+
+ if (Common::String(to).hasPrefixIgnoreCase("utf-16")) {
+ if (swapToEndian) {
+ result = Common::Encoding::switchEndian((char *)tmpStr, wcslen(tmpStr) * 2 + 2, 16);
+ free(tmpStr);
+ return result;
+ }
return (char *) tmpStr;
- else {
+ } else {
result = Win32::unicodeToAnsi(tmpStr, Win32::getCodePageId(to));
free(tmpStr);
return result;
diff --git a/backends/platform/sdl/win32/win32.h b/backends/platform/sdl/win32/win32.h
index 2a496f5..1c24285 100644
--- a/backends/platform/sdl/win32/win32.h
+++ b/backends/platform/sdl/win32/win32.h
@@ -27,6 +27,7 @@
#include "backends/platform/sdl/win32/win32-window.h"
class OSystem_Win32 : public OSystem_SDL {
+ friend class Common::Encoding;
public:
virtual void init();
virtual void initBackend();
Commit: 3cb57e2078daa86a9b7fcfd76ad151ffd6890af5
https://github.com/scummvm/scummvm/commit/3cb57e2078daa86a9b7fcfd76ad151ffd6890af5
Author: Jaromir Wysoglad (jaromirwysoglad at gmail.com)
Date: 2019-08-24T18:12:45+03:00
Commit Message:
WIN32: Delete obsolete comment in convertEncoding.
Changed paths:
backends/platform/sdl/win32/win32.cpp
diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index ff3f92b..16b7a1f 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -393,7 +393,6 @@ char *OSystem_Win32::convertEncoding(const char* to, const char *from, const cha
if (result != nullptr)
return result;
- // We accept only the machine endianness
bool swapFromEndian = false;
#ifdef SCUMM_BIG_ENDIAN
if (Common::String(from).hasSuffixIgnoreCase("le"))
More information about the Scummvm-git-logs
mailing list