~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to mystrings/m_string.h

Committer: Brian Aker
Date: 2008-10-07 15:15:28 UTC
mfrom: (481.1.24 codestyle)
Revision ID: brian@tangent.org-20081007151528-xc1w8m90lqb37j4r

Merge from Monty.

files added:
drizzled/innodb_plugin_extras.h

files modified:
configure.ac

drizzled/Makefile.am

drizzled/drizzled.cc

drizzled/global.h

drizzled/plugin.h

drizzled/sql_class.cc

drizzled/sql_state.cc

drizzled/sql_state.h

mystrings/m_string.h

mysys/my_pthread.h

storage/innobase/handler/ha_innodb.cc

Show diffs side-by-side

added added

removed removed

mystrings/m_string.h

#include <stdbool.h>

#include <assert.h>

#include <limits.h>

#include <ctype.h>

/* This is needed for the definitions of memcpy... on solaris */

#if defined(HAVE_MEMORY_H) && !defined(__cplusplus)

172

173

#define USTRING_WITH_LEN(X) ((unsigned char*) X), ((size_t) (sizeof(X) - 1))

173

174

#define C_STRING_WITH_LEN(X) ((char *) (X)), ((size_t) (sizeof(X) - 1))

174

175

/* SPACE_INT is a word that contains only spaces */

176

#if SIZEOF_INT == 4

177

#define SPACE_INT 0x20202020

178

#elif SIZEOF_INT == 8

179

#define SPACE_INT 0x2020202020202020

180

#else

181

#error define the appropriate constant for a word full of spaces

182

#endif

183

184

176

/**

185

177

Skip trailing space.

186

178

187

On most systems reading memory in larger chunks (ideally equal to the size of

188

the chinks that the machine physically reads from memory) causes fewer memory

189

access loops and hence increased performance.

190

This is why the 'int' type is used : it's closest to that (according to how

191

it's defined in C).

192

So when we determine the amount of whitespace at the end of a string we do

193

the following :

194

1. We divide the string into 3 zones :

195

a) from the start of the string (__start) to the first multiple

196

of sizeof(int) (__start_words)

197

b) from the end of the string (__end) to the last multiple of sizeof(int)

198

(__end_words)

199

c) a zone that is aligned to sizeof(int) and can be safely accessed

200

through an int *

201

2. We start comparing backwards from (c) char-by-char. If all we find is

202

space then we continue

203

3. If there are elements in zone (b) we compare them as unsigned ints to a

204

int mask (SPACE_INT) consisting of all spaces

205

4. Finally we compare the remaining part (a) of the string char by char.

206

This covers for the last non-space unsigned int from 3. (if any)

207

208

This algorithm works well for relatively larger strings, but it will slow

209

the things down for smaller strings (because of the additional calculations

210

and checks compared to the naive method). Thus the barrier of length 20

211

is added.

212

213

179

@param ptr pointer to the input string

214

180

@param len the length of the string

215

181

@return the last non-space character

216

182

217

183

218

static inline const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len)

184

static inline const unsigned char *

185

skip_trailing_space(const unsigned char *ptr,size_t len)

219

186

{

220

187

const unsigned char *end= ptr + len;

221

188

222

if (len > 20)

223

{

224

const unsigned char *end_words= (const unsigned char *)(intptr_t)

225

(((uint64_t)(intptr_t)end) / SIZEOF_INT * SIZEOF_INT);

226

const unsigned char *start_words= (const unsigned char *)(intptr_t)

227

((((uint64_t)(intptr_t)ptr) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT);

228

229

assert(((uint64_t)(intptr_t)ptr) >= SIZEOF_INT);

230

if (end_words > ptr)

231

{

232

while (end > end_words && end[-1] == 0x20)

233

end--;

234

if (end[-1] == 0x20 && start_words < end_words)

235

while (end > start_words && ((const unsigned *)end)[-1] == SPACE_INT)

236

end -= SIZEOF_INT;

237

}

238

}

239

while (end > ptr && end[-1] == 0x20)

240

end--;

241

return (end);

189

while (end > ptr && isspace(*--end))

190

continue;

191

return end+1;

242

192

}

243

193

244

194

#endif

Older »