[Scummvm-git-logs] scummvm master -> 4d7a2439b13d6806374ed29425c342a1d379ee9b

Fri Nov 29 01:14:28 UTC 2024

This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .

Summary:
4d7a2439b1 COMMON: Reduce duplication in the StringTokenizer classes


Commit: 4d7a2439b13d6806374ed29425c342a1d379ee9b
    https://github.com/scummvm/scummvm/commit/4d7a2439b13d6806374ed29425c342a1d379ee9b
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2024-11-29T03:14:24+02:00

Commit Message:
COMMON: Reduce duplication in the StringTokenizer classes

Changed paths:
    common/tokenizer.cpp
    common/tokenizer.h

diff --git a/common/tokenizer.cpp b/common/tokenizer.cpp
index 79d79854e4f..a1c1cc2b02c 100644
--- a/common/tokenizer.cpp
+++ b/common/tokenizer.cpp
@@ -23,85 +23,20 @@
 
 namespace Common {
 
-StringTokenizer::StringTokenizer(const String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
+template<class T>
+BaseStringTokenizer<T>::BaseStringTokenizer(const T &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
 	reset();
 }
 
-void StringTokenizer::reset() {
-	_tokenBegin = _tokenEnd = 0;
-}
-
-bool StringTokenizer::empty() const {
-	// Search for the next token's start (i.e. the next non-delimiter character)
-	for (uint i = _tokenEnd; i < _str.size(); i++) {
-		if (!_delimiters.contains(_str[i]))
-			return false; // Found a token so the tokenizer is not empty
-	}
-	// Didn't find any more tokens so the tokenizer is empty
-	return true;
-}
-
-String StringTokenizer::nextToken() {
-	// Seek to next token's start (i.e. jump over the delimiters before next token)
-	for (_tokenBegin = _tokenEnd; _tokenBegin < _str.size() && _delimiters.contains(_str[_tokenBegin]); _tokenBegin++)
-		;
-	// Seek to the token's end (i.e. jump over the non-delimiters)
-	for (_tokenEnd = _tokenBegin; _tokenEnd < _str.size() && !_delimiters.contains(_str[_tokenEnd]); _tokenEnd++)
-		;
-	// Return the found token
-	return String(_str.c_str() + _tokenBegin, _tokenEnd - _tokenBegin);
-}
-
-StringArray StringTokenizer::split() {
-	StringArray res;
-
-	while (!empty())
-		res.push_back(nextToken());
-
-	return res;
-}
-
-String StringTokenizer::delimitersAtTokenBegin() const {
-	// First token appears at beginning of the string, or no tokens have been extracted yet
-	if (_tokenBegin == 0)
-		return String();
-
-	// Iterate backwards until we hit either the previous token, or the beginning of the input string
-	int delimitersBegin;
-	for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= 0 && _delimiters.contains(_str[delimitersBegin]); delimitersBegin--)
-		;
-
-	++delimitersBegin;
-	
-	// Return the delimiters
-	return String(_str.c_str() + delimitersBegin, _tokenBegin - delimitersBegin);
-}
-
-String StringTokenizer::delimitersAtTokenEnd() const {
-	// Last token appears at end of the string, or no tokens have been extracted yet
-	if (_tokenEnd == 0 || _tokenEnd == _str.size())
-		return String();
-
-	// Iterate forwards until we hit either the next token, or the end of the input string
-	uint delimitersEnd;
-	for (delimitersEnd = _tokenEnd; delimitersEnd < _str.size() && _delimiters.contains(_str[delimitersEnd]); delimitersEnd++)
-		;
-	
-	// Return the delimiters
-	return String(_str.c_str() + _tokenEnd, delimitersEnd - _tokenEnd);
-}
-
-U32StringTokenizer::U32StringTokenizer(const U32String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
-	reset();
-}
-
-void U32StringTokenizer::reset() {
+template<class T>
+void BaseStringTokenizer<T>::reset() {
 	_tokenBegin = _tokenEnd = _str.begin();
 }
 
-bool U32StringTokenizer::empty() const {
+template<class T>
+bool BaseStringTokenizer<T>::empty() const {
 	// Search for the next token's start (i.e. the next non-delimiter character)
-	for (U32String::const_iterator itr = _tokenEnd; itr != _str.end(); itr++) {
+	for (typename T::const_iterator itr = _tokenEnd; itr != _str.end(); itr++) {
 		if (!_delimiters.contains(*itr)) {
 			return false; // Found a token so the tokenizer is not empty
 		}
@@ -111,7 +46,8 @@ bool U32StringTokenizer::empty() const {
 	return true;
 }
 
-U32String U32StringTokenizer::nextToken() {
+template<class T>
+T BaseStringTokenizer<T>::nextToken() {
 	// Skip delimiters when present at the beginning, to point to the next token
 	// For example, the below loop will set _tokenBegin & _tokenEnd to 'H' for the string -> "!!--=Hello World"
 	// And subsequently, skip all delimiters in the beginning of the next word.
@@ -123,20 +59,21 @@ U32String U32StringTokenizer::nextToken() {
 	// Loop and advance _tokenEnd until we find a delimiter at the end of a word/string
 	while (_tokenBegin != _str.end() && _tokenEnd != _str.end()) {
 		if (_delimiters.contains(*_tokenEnd)) {
-			return U32String(_tokenBegin, _tokenEnd);
+			return T(_tokenBegin, _tokenEnd);
 		}
 		_tokenEnd++;
 	}
 
 	// Returning the last word if _tokenBegin iterator isn't at the end.
 	if (_tokenBegin != _str.end())
-		return U32String(_tokenBegin, _tokenEnd);
+		return T(_tokenBegin, _tokenEnd);
 	else
-		return U32String();
+		return T();
 }
 
-U32StringArray U32StringTokenizer::split() {
-	U32StringArray res;
+template<class T>
+Array<T> BaseStringTokenizer<T>::split() {
+	Array<T> res;
 
 	while (!empty())
 		res.push_back(nextToken());
@@ -144,35 +81,39 @@ U32StringArray U32StringTokenizer::split() {
 	return res;
 }
 
-U32String U32StringTokenizer::delimitersAtTokenBegin() const {
+template<class T>
+T BaseStringTokenizer<T>::delimitersAtTokenBegin() const {
 	// First token appears at beginning of the string, or no tokens have been extracted yet
 	if (_tokenBegin == _str.begin())
-		return U32String();
+		return T();
 
 	// Iterate backwards until we hit either the previous token, or the beginning of the input string
-	U32String::const_iterator delimitersBegin;
+	typename T::const_iterator delimitersBegin;
 	for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= _str.begin() && _delimiters.contains(*delimitersBegin); delimitersBegin--)
 		;
 
 	++delimitersBegin;
 	
 	// Return the delimiters
-	return U32String(delimitersBegin, _tokenBegin - delimitersBegin);
+	return T(delimitersBegin, _tokenBegin - delimitersBegin);
 }
 
-U32String U32StringTokenizer::delimitersAtTokenEnd() const {
+template<class T>
+T BaseStringTokenizer<T>::delimitersAtTokenEnd() const {
 	// Last token appears at end of the string, or no tokens have been extracted yet
 	if (_tokenEnd == _str.begin() || _tokenEnd == _str.end())
-		return String();
+		return T();
 
 	// Iterate forwards until we hit either the next token, or the end of the input string
-	U32String::const_iterator delimitersEnd;
+	typename T::const_iterator delimitersEnd;
 	for (delimitersEnd = _tokenEnd; delimitersEnd < _str.end() && _delimiters.contains(*delimitersEnd); delimitersEnd++)
 		;
 	
 	// Return the delimiters
-	return U32String(_tokenEnd, delimitersEnd - _tokenEnd);
+	return T(_tokenEnd, delimitersEnd - _tokenEnd);
 }
 
+template class BaseStringTokenizer<String>;
+template class BaseStringTokenizer<U32String>;
 
 } // End of namespace Common
diff --git a/common/tokenizer.h b/common/tokenizer.h
index b5d8fb1f24a..b398e1d8082 100644
--- a/common/tokenizer.h
+++ b/common/tokenizer.h
@@ -42,61 +42,34 @@ namespace Common {
  * Example of use:
  * StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().
  */
-class StringTokenizer {
+template<class T>
+class BaseStringTokenizer {
 public:
 	/**
-	 * Creates a StringTokenizer.
+	 * Creates a BaseStringTokenizer.
 	 * @param str The string to be tokenized.
 	 * @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).
 	 * @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.
 	 */
-	StringTokenizer(const String &str, const String &delimiters = " \t\r\n\f\v");
-	void reset();       ///< Resets the tokenizer to its initial state
-	bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise
-	String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
-	StringArray split(); ///< Returns StringArray with all tokens. Beware of the memory usage
-
-	String delimitersAtTokenBegin() const; ///< Returns a String with all delimiters between the current and previous token
-	String delimitersAtTokenEnd() const;   ///< Returns a String with all delimiters between the current and next token
-
-private:
-	const String _str;        ///< The string to be tokenized
-	const String _delimiters; ///< String containing all the delimiter characters
-	uint         _tokenBegin; ///< Latest found token's begin (Valid after a call to nextToken(), zero otherwise)
-	uint         _tokenEnd;   ///< Latest found token's end (Valid after a call to nextToken(), zero otherwise)
-};
-
-/**
- * A simple non-optimized unicode-string tokenizer.
- *
- * Example of use:
- * U32StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().
- * Using non-ascii chars will also work, and is recommended to use this over StringTokenizer if string contains unicode chars.
- */
-class U32StringTokenizer {
-public:
-	/**
-	 * Creates a UnicodeStringTokenizer.
-	 * @param str The unicode string to be tokenized.
-	 * @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).
-	 * @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.
-	 */
-	U32StringTokenizer(const U32String &str, const String &delimiters = " \t\r\n\f\v");
+	BaseStringTokenizer(const T &str, const String &delimiters = " \t\r\n\f\v");
 	void reset();       ///< Resets the tokenizer to its initial state, i.e points boten token iterators to the beginning
 	bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise
-	U32String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
-	U32StringArray split(); ///< Returns StringArray with all tokens. Beware of the memory usage
+	T nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
+	Array<T> split(); ///< Returns an Array with all tokens. Beware of the memory usage
 
-	U32String delimitersAtTokenBegin() const; ///< Returns a U32String with all delimiters between the current and previous token
-	U32String delimitersAtTokenEnd() const;   ///< Returns a U32String with all delimiters between the current and next token
+	T delimitersAtTokenBegin() const; ///< Returns a String with all delimiters between the current and previous token
+	T delimitersAtTokenEnd() const;   ///< Returns a String with all delimiters between the current and next token
 
 private:
-	const U32String _str;        ///< The unicode string to be tokenized
-	const String    _delimiters; ///< String containing all the delimiter characters
-	U32String::const_iterator            _tokenBegin; ///< Latest found token's begin iterator (Valid after a call to nextToken())
-	U32String::const_iterator            _tokenEnd;   ///< Latest found token's end iterator (Valid after a call to nextToken())
+	const T           _str;        ///< The unicode string to be tokenized
+	const String      _delimiters; ///< String containing all the delimiter characters
+	typename T::const_iterator _tokenBegin; ///< Latest found token's begin iterator (Valid after a call to nextToken())
+	typename T::const_iterator _tokenEnd;   ///< Latest found token's end iterator (Valid after a call to nextToken())
 };
 
+typedef BaseStringTokenizer<String> StringTokenizer;
+typedef BaseStringTokenizer<U32String> U32StringTokenizer;
+
 /** @} */
 
 } // End of namespace Common