Eclipse SUMO - Simulation of Urban MObility
StringTokenizer.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3 // Copyright (C) 2001-2019 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials
5 // are made available under the terms of the Eclipse Public License v2.0
6 // which accompanies this distribution, and is available at
7 // http://www.eclipse.org/legal/epl-v20.html
8 // SPDX-License-Identifier: EPL-2.0
9 /****************************************************************************/
16 // A java-style StringTokenizer for c++ (stl)
17 /****************************************************************************/
18 
19 
20 // ===========================================================================
21 // included modules
22 // ===========================================================================
23 #include <config.h>
24 
25 #include <string>
26 #include <vector>
27 #include <iostream> // !!! debug only
28 
29 #include "UtilExceptions.h"
30 #include "StringTokenizer.h"
31 
32 
33 // ===========================================================================
34 // variable definitions
35 // ===========================================================================
36 const int StringTokenizer::NEWLINE = -256;
37 const int StringTokenizer::WHITECHARS = -257;
38 const int StringTokenizer::SPACE = 32;
39 const int StringTokenizer::TAB = 9;
40 
41 
42 // ===========================================================================
43 // method definitions
44 // ===========================================================================
45 
47  myPos(0) {
48 }
49 
50 
51 StringTokenizer::StringTokenizer(std::string tosplit) :
52  myTosplit(tosplit), myPos(0) {
53  prepareWhitechar(tosplit);
54 }
55 
56 
57 StringTokenizer::StringTokenizer(std::string tosplit, std::string token, bool splitAtAllChars) :
58  myTosplit(tosplit), myPos(0) {
59  prepare(tosplit, token, splitAtAllChars);
60 }
61 
62 
63 StringTokenizer::StringTokenizer(std::string tosplit, int special) :
64  myTosplit(tosplit), myPos(0) {
65  switch (special) {
66  case NEWLINE:
67  prepare(tosplit, "\r\n", true);
68  break;
69  case TAB:
70  prepare(tosplit, "\t", true);
71  break;
72  case WHITECHARS:
73  prepareWhitechar(tosplit);
74  break;
75  default:
76  char* buf = new char[2];
77  buf[0] = (char) special;
78  buf[1] = 0;
79  prepare(tosplit, buf, false);
80  delete[] buf;
81  break;
82  }
83 }
84 
85 
87 
88 
90  myPos = 0;
91 }
92 
93 
95  return myPos != (int)myStarts.size();
96 }
97 
98 
99 std::string StringTokenizer::next() {
100  if (myPos >= (int)myStarts.size()) {
101  throw OutOfBoundsException();
102  }
103  if (myLengths[myPos] == 0) {
104  myPos++;
105  return "";
106  }
107  int start = myStarts[myPos];
108  int length = myLengths[myPos++];
109  return myTosplit.substr(start, length);
110 }
111 
112 
113 std::string StringTokenizer::front() {
114  if (myStarts.size() == 0) {
115  throw OutOfBoundsException();
116  }
117  if (myLengths[0] == 0) {
118  return "";
119  }
120  return myTosplit.substr(myStarts[0], myLengths[0]);
121 }
122 
123 
124 std::string StringTokenizer::get(int pos) const {
125  if (pos >= (int)myStarts.size()) {
126  throw OutOfBoundsException();
127  }
128  if (myLengths[pos] == 0) {
129  return "";
130  }
131  int start = myStarts[pos];
132  int length = myLengths[pos];
133  return myTosplit.substr(start, length);
134 }
135 
136 
138  return (int)myStarts.size();
139 }
140 
141 
142 void StringTokenizer::prepare(const std::string& tosplit, const std::string& token, bool splitAtAllChars) {
143  int beg = 0;
144  int len = (int)token.length();
145  if (splitAtAllChars) {
146  len = 1;
147  }
148  while (beg < (int)tosplit.length()) {
149  std::string::size_type end;
150  if (splitAtAllChars) {
151  end = tosplit.find_first_of(token, beg);
152  } else {
153  end = tosplit.find(token, beg);
154  }
155  if (end == std::string::npos) {
156  end = tosplit.length();
157  }
158  myStarts.push_back(beg);
159  myLengths.push_back((int)end - beg);
160  beg = (int)end + len;
161  if (beg == (int)tosplit.length()) {
162  myStarts.push_back(beg - 1);
163  myLengths.push_back(0);
164  }
165  }
166 }
167 
168 
169 void StringTokenizer::prepareWhitechar(const std::string& tosplit) {
170  std::string::size_type len = tosplit.length();
171  std::string::size_type beg = 0;
172  while (beg < len && tosplit[beg] <= SPACE) {
173  beg++;
174  }
175  while (beg != std::string::npos && beg < len) {
176  std::string::size_type end = beg;
177  while (end < len && tosplit[end] > SPACE) {
178  end++;
179  }
180  myStarts.push_back((int)beg);
181  myLengths.push_back((int)end - (int)beg);
182  beg = end;
183  while (beg < len && tosplit[beg] <= SPACE) {
184  beg++;
185  }
186  }
187 }
188 
189 
190 std::vector<std::string>
192  std::vector<std::string> ret;
193  ret.reserve(size());
194  while (hasNext()) {
195  ret.push_back(next());
196  }
197  reinit();
198  return ret;
199 }
200 
201 /****************************************************************************/
StringTokenizer::hasNext
bool hasNext()
returns the information whether further substrings exist
Definition: StringTokenizer.cpp:94
StringTokenizer::next
std::string next()
returns the next substring when it exists. Otherwise the behaviour is undefined
Definition: StringTokenizer.cpp:99
StringTokenizer::prepare
void prepare(const std::string &tosplit, const std::string &token, bool splitAtAllChars)
splits the first string at all occurences of the second. If the third parameter is true split at all ...
Definition: StringTokenizer.cpp:142
StringTokenizer::~StringTokenizer
~StringTokenizer()
destructor
Definition: StringTokenizer.cpp:86
StringTokenizer::WHITECHARS
static const int WHITECHARS
identifier for splitting the given string at all whitespace characters
Definition: StringTokenizer.h:67
StringTokenizer::myTosplit
std::string myTosplit
the string to split
Definition: StringTokenizer.h:139
StringTokenizer::StringTokenizer
StringTokenizer()
default constructor
Definition: StringTokenizer.cpp:46
StringTokenizer::get
std::string get(int pos) const
returns the item at the given position
Definition: StringTokenizer.cpp:124
StringTokenizer::NEWLINE
static const int NEWLINE
identifier for splitting the given string at all newline characters
Definition: StringTokenizer.h:64
StringTokenizer::myLengths
SizeVector myLengths
the list of substring lengths
Definition: StringTokenizer.h:148
OutOfBoundsException
Definition: UtilExceptions.h:134
UtilExceptions.h
StringTokenizer::size
int size() const
returns the number of existing substrings
Definition: StringTokenizer.cpp:137
StringTokenizer::SPACE
static const int SPACE
the ascii index of the highest whitespace character
Definition: StringTokenizer.h:70
StringTokenizer::TAB
static const int TAB
the ascii index of the tab character
Definition: StringTokenizer.h:73
StringTokenizer::myStarts
SizeVector myStarts
the list of substring starts
Definition: StringTokenizer.h:145
StringTokenizer::front
std::string front()
returns the first substring without moving the iterator
Definition: StringTokenizer.cpp:113
StringTokenizer::myPos
int myPos
the current position in the list of substrings
Definition: StringTokenizer.h:142
StringTokenizer::getVector
std::vector< std::string > getVector()
return vector of strings
Definition: StringTokenizer.cpp:191
config.h
StringTokenizer.h
StringTokenizer::prepareWhitechar
void prepareWhitechar(const std::string &tosplit)
splits the first string at all occurences of whitechars
Definition: StringTokenizer.cpp:169
StringTokenizer::reinit
void reinit()
reinitialises the internal iterator
Definition: StringTokenizer.cpp:89