~azzar1/unity/add-show-desktop-key

« back to all changes in this revision

Viewing changes to lib/common/util.py

Committer: wagrant
Date: 2008-08-20 00:04:12 UTC
Revision ID: svn-v3-trunk0:2b9c9e99-6f39-0410-b283-7f802c844ae2:trunk:1033

python-console: Move incomplete_utf8_sequence() into common.util, where it
probably belongs.

files modified:
lib/common/util.py

scripts/python-console

Show diffs side-by-side

added added

removed removed

lib/common/util.py

214

# produced by json.js module when it encodes date objects.

215

return datetime.datetime.strptime(str, "%Y-%m-%dT%H:%M:%SZ")

216

217

def incomplete_utf8_sequence(byteseq):

218

"""

219

str -> int

220

Given a UTF-8-encoded byte sequence (str), returns the number of bytes at

221

the end of the string which comprise an incomplete UTF-8 character

222

sequence.

223

224

If the string is empty or ends with a complete character OR INVALID

225

sequence, returns 0.

226

Otherwise, returns 1-3 indicating the number of bytes in the final

227

incomplete (but valid) character sequence.

228

229

Does not check any bytes before the final sequence for correctness.

230

231

>>> incomplete_utf8_sequence("")

232

233

>>> incomplete_utf8_sequence("xy")

234

235

>>> incomplete_utf8_sequence("xy\xc3\xbc")

236

237

>>> incomplete_utf8_sequence("\xc3")

238

239

>>> incomplete_utf8_sequence("\xbc\xc3")

240

241

>>> incomplete_utf8_sequence("xy\xbc\xc3")

242

243

>>> incomplete_utf8_sequence("xy\xe0\xa0")

244

245

>>> incomplete_utf8_sequence("xy\xf4")

246

247

>>> incomplete_utf8_sequence("xy\xf4\x8f")

248

249

>>> incomplete_utf8_sequence("xy\xf4\x8f\xa0")

250

251

"""

252

count = 0

253

expect = None

254

for b in byteseq[::-1]:

255

b = ord(b)

256

count += 1

257

if b & 0x80 == 0x0:

258

# 0xxxxxxx (single-byte character)

259

expect = 1

260

break

261

elif b & 0xc0 == 0x80:

262

# 10xxxxxx (subsequent byte)

263

pass

264

elif b & 0xe0 == 0xc0:

265

# 110xxxxx (start of 2-byte sequence)

266

expect = 2

267

break

268

elif b & 0xf0 == 0xe0:

269

# 1110xxxx (start of 3-byte sequence)

270

expect = 3

271

break

272

elif b & 0xf8 == 0xf0:

273

# 11110xxx (start of 4-byte sequence)

274

expect = 4

275

break

276

else:

277

# Invalid byte

278

return 0

279

280

if count >= 4:

281

# Seen too many "subsequent bytes", invalid

282

return 0

283

284

if expect is None:

285

# We never saw a "first byte", invalid

286

return 0

287

288

# We now know expect and count

289

if count >= expect:

290

# Complete, or we saw an invalid sequence

291

return 0

292

elif count < expect:

293

# Incomplete

294

return count

Older »