[Scummvm-git-logs] scummvm master -> 4d7a2439b13d6806374ed29425c342a1d379ee9b
bluegr
noreply at scummvm.org
Fri Nov 29 01:14:28 UTC 2024
This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
4d7a2439b1 COMMON: Reduce duplication in the StringTokenizer classes
Commit: 4d7a2439b13d6806374ed29425c342a1d379ee9b
https://github.com/scummvm/scummvm/commit/4d7a2439b13d6806374ed29425c342a1d379ee9b
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2024-11-29T03:14:24+02:00
Commit Message:
COMMON: Reduce duplication in the StringTokenizer classes
Changed paths:
common/tokenizer.cpp
common/tokenizer.h
diff --git a/common/tokenizer.cpp b/common/tokenizer.cpp
index 79d79854e4f..a1c1cc2b02c 100644
--- a/common/tokenizer.cpp
+++ b/common/tokenizer.cpp
@@ -23,85 +23,20 @@
namespace Common {
-StringTokenizer::StringTokenizer(const String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
+template<class T>
+BaseStringTokenizer<T>::BaseStringTokenizer(const T &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
reset();
}
-void StringTokenizer::reset() {
- _tokenBegin = _tokenEnd = 0;
-}
-
-bool StringTokenizer::empty() const {
- // Search for the next token's start (i.e. the next non-delimiter character)
- for (uint i = _tokenEnd; i < _str.size(); i++) {
- if (!_delimiters.contains(_str[i]))
- return false; // Found a token so the tokenizer is not empty
- }
- // Didn't find any more tokens so the tokenizer is empty
- return true;
-}
-
-String StringTokenizer::nextToken() {
- // Seek to next token's start (i.e. jump over the delimiters before next token)
- for (_tokenBegin = _tokenEnd; _tokenBegin < _str.size() && _delimiters.contains(_str[_tokenBegin]); _tokenBegin++)
- ;
- // Seek to the token's end (i.e. jump over the non-delimiters)
- for (_tokenEnd = _tokenBegin; _tokenEnd < _str.size() && !_delimiters.contains(_str[_tokenEnd]); _tokenEnd++)
- ;
- // Return the found token
- return String(_str.c_str() + _tokenBegin, _tokenEnd - _tokenBegin);
-}
-
-StringArray StringTokenizer::split() {
- StringArray res;
-
- while (!empty())
- res.push_back(nextToken());
-
- return res;
-}
-
-String StringTokenizer::delimitersAtTokenBegin() const {
- // First token appears at beginning of the string, or no tokens have been extracted yet
- if (_tokenBegin == 0)
- return String();
-
- // Iterate backwards until we hit either the previous token, or the beginning of the input string
- int delimitersBegin;
- for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= 0 && _delimiters.contains(_str[delimitersBegin]); delimitersBegin--)
- ;
-
- ++delimitersBegin;
-
- // Return the delimiters
- return String(_str.c_str() + delimitersBegin, _tokenBegin - delimitersBegin);
-}
-
-String StringTokenizer::delimitersAtTokenEnd() const {
- // Last token appears at end of the string, or no tokens have been extracted yet
- if (_tokenEnd == 0 || _tokenEnd == _str.size())
- return String();
-
- // Iterate forwards until we hit either the next token, or the end of the input string
- uint delimitersEnd;
- for (delimitersEnd = _tokenEnd; delimitersEnd < _str.size() && _delimiters.contains(_str[delimitersEnd]); delimitersEnd++)
- ;
-
- // Return the delimiters
- return String(_str.c_str() + _tokenEnd, delimitersEnd - _tokenEnd);
-}
-
-U32StringTokenizer::U32StringTokenizer(const U32String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
- reset();
-}
-
-void U32StringTokenizer::reset() {
+template<class T>
+void BaseStringTokenizer<T>::reset() {
_tokenBegin = _tokenEnd = _str.begin();
}
-bool U32StringTokenizer::empty() const {
+template<class T>
+bool BaseStringTokenizer<T>::empty() const {
// Search for the next token's start (i.e. the next non-delimiter character)
- for (U32String::const_iterator itr = _tokenEnd; itr != _str.end(); itr++) {
+ for (typename T::const_iterator itr = _tokenEnd; itr != _str.end(); itr++) {
if (!_delimiters.contains(*itr)) {
return false; // Found a token so the tokenizer is not empty
}
@@ -111,7 +46,8 @@ bool U32StringTokenizer::empty() const {
return true;
}
-U32String U32StringTokenizer::nextToken() {
+template<class T>
+T BaseStringTokenizer<T>::nextToken() {
// Skip delimiters when present at the beginning, to point to the next token
// For example, the below loop will set _tokenBegin & _tokenEnd to 'H' for the string -> "!!--=Hello World"
// And subsequently, skip all delimiters in the beginning of the next word.
@@ -123,20 +59,21 @@ U32String U32StringTokenizer::nextToken() {
// Loop and advance _tokenEnd until we find a delimiter at the end of a word/string
while (_tokenBegin != _str.end() && _tokenEnd != _str.end()) {
if (_delimiters.contains(*_tokenEnd)) {
- return U32String(_tokenBegin, _tokenEnd);
+ return T(_tokenBegin, _tokenEnd);
}
_tokenEnd++;
}
// Returning the last word if _tokenBegin iterator isn't at the end.
if (_tokenBegin != _str.end())
- return U32String(_tokenBegin, _tokenEnd);
+ return T(_tokenBegin, _tokenEnd);
else
- return U32String();
+ return T();
}
-U32StringArray U32StringTokenizer::split() {
- U32StringArray res;
+template<class T>
+Array<T> BaseStringTokenizer<T>::split() {
+ Array<T> res;
while (!empty())
res.push_back(nextToken());
@@ -144,35 +81,39 @@ U32StringArray U32StringTokenizer::split() {
return res;
}
-U32String U32StringTokenizer::delimitersAtTokenBegin() const {
+template<class T>
+T BaseStringTokenizer<T>::delimitersAtTokenBegin() const {
// First token appears at beginning of the string, or no tokens have been extracted yet
if (_tokenBegin == _str.begin())
- return U32String();
+ return T();
// Iterate backwards until we hit either the previous token, or the beginning of the input string
- U32String::const_iterator delimitersBegin;
+ typename T::const_iterator delimitersBegin;
for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= _str.begin() && _delimiters.contains(*delimitersBegin); delimitersBegin--)
;
++delimitersBegin;
// Return the delimiters
- return U32String(delimitersBegin, _tokenBegin - delimitersBegin);
+ return T(delimitersBegin, _tokenBegin - delimitersBegin);
}
-U32String U32StringTokenizer::delimitersAtTokenEnd() const {
+template<class T>
+T BaseStringTokenizer<T>::delimitersAtTokenEnd() const {
// Last token appears at end of the string, or no tokens have been extracted yet
if (_tokenEnd == _str.begin() || _tokenEnd == _str.end())
- return String();
+ return T();
// Iterate forwards until we hit either the next token, or the end of the input string
- U32String::const_iterator delimitersEnd;
+ typename T::const_iterator delimitersEnd;
for (delimitersEnd = _tokenEnd; delimitersEnd < _str.end() && _delimiters.contains(*delimitersEnd); delimitersEnd++)
;
// Return the delimiters
- return U32String(_tokenEnd, delimitersEnd - _tokenEnd);
+ return T(_tokenEnd, delimitersEnd - _tokenEnd);
}
+template class BaseStringTokenizer<String>;
+template class BaseStringTokenizer<U32String>;
} // End of namespace Common
diff --git a/common/tokenizer.h b/common/tokenizer.h
index b5d8fb1f24a..b398e1d8082 100644
--- a/common/tokenizer.h
+++ b/common/tokenizer.h
@@ -42,61 +42,34 @@ namespace Common {
* Example of use:
* StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().
*/
-class StringTokenizer {
+template<class T>
+class BaseStringTokenizer {
public:
/**
- * Creates a StringTokenizer.
+ * Creates a BaseStringTokenizer.
* @param str The string to be tokenized.
* @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).
* @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.
*/
- StringTokenizer(const String &str, const String &delimiters = " \t\r\n\f\v");
- void reset(); ///< Resets the tokenizer to its initial state
- bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise
- String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
- StringArray split(); ///< Returns StringArray with all tokens. Beware of the memory usage
-
- String delimitersAtTokenBegin() const; ///< Returns a String with all delimiters between the current and previous token
- String delimitersAtTokenEnd() const; ///< Returns a String with all delimiters between the current and next token
-
-private:
- const String _str; ///< The string to be tokenized
- const String _delimiters; ///< String containing all the delimiter characters
- uint _tokenBegin; ///< Latest found token's begin (Valid after a call to nextToken(), zero otherwise)
- uint _tokenEnd; ///< Latest found token's end (Valid after a call to nextToken(), zero otherwise)
-};
-
-/**
- * A simple non-optimized unicode-string tokenizer.
- *
- * Example of use:
- * U32StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().
- * Using non-ascii chars will also work, and is recommended to use this over StringTokenizer if string contains unicode chars.
- */
-class U32StringTokenizer {
-public:
- /**
- * Creates a UnicodeStringTokenizer.
- * @param str The unicode string to be tokenized.
- * @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).
- * @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.
- */
- U32StringTokenizer(const U32String &str, const String &delimiters = " \t\r\n\f\v");
+ BaseStringTokenizer(const T &str, const String &delimiters = " \t\r\n\f\v");
void reset(); ///< Resets the tokenizer to its initial state, i.e points boten token iterators to the beginning
bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise
- U32String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
- U32StringArray split(); ///< Returns StringArray with all tokens. Beware of the memory usage
+ T nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
+ Array<T> split(); ///< Returns an Array with all tokens. Beware of the memory usage
- U32String delimitersAtTokenBegin() const; ///< Returns a U32String with all delimiters between the current and previous token
- U32String delimitersAtTokenEnd() const; ///< Returns a U32String with all delimiters between the current and next token
+ T delimitersAtTokenBegin() const; ///< Returns a String with all delimiters between the current and previous token
+ T delimitersAtTokenEnd() const; ///< Returns a String with all delimiters between the current and next token
private:
- const U32String _str; ///< The unicode string to be tokenized
- const String _delimiters; ///< String containing all the delimiter characters
- U32String::const_iterator _tokenBegin; ///< Latest found token's begin iterator (Valid after a call to nextToken())
- U32String::const_iterator _tokenEnd; ///< Latest found token's end iterator (Valid after a call to nextToken())
+ const T _str; ///< The unicode string to be tokenized
+ const String _delimiters; ///< String containing all the delimiter characters
+ typename T::const_iterator _tokenBegin; ///< Latest found token's begin iterator (Valid after a call to nextToken())
+ typename T::const_iterator _tokenEnd; ///< Latest found token's end iterator (Valid after a call to nextToken())
};
+typedef BaseStringTokenizer<String> StringTokenizer;
+typedef BaseStringTokenizer<U32String> U32StringTokenizer;
+
/** @} */
} // End of namespace Common
More information about the Scummvm-git-logs
mailing list