~azzar1/unity/add-show-desktop-key

« back to all changes in this revision

Viewing changes to scripts/python-console

Committer: wagrant
Date: 2008-08-20 00:04:12 UTC
Revision ID: svn-v3-trunk0:2b9c9e99-6f39-0410-b283-7f802c844ae2:trunk:1033

python-console: Move incomplete_utf8_sequence() into common.util, where it
probably belongs.

files modified:
lib/common/util.py

scripts/python-console

Show diffs side-by-side

added added

removed removed

scripts/python-console

'''Trim an incomplete UTF-8 character from the end of a string.

Returns (trimmed_string, count_of_trimmed_bytes).

'''

tokill = incomplete_utf8_sequence(stuff)

tokill = common.util.incomplete_utf8_sequence(stuff)

if tokill == 0:

return (stuff, tokill)

else:

367

list = list + traceback.format_exception_only(etype, value)

368

return ''.join(list)

369

370

def incomplete_utf8_sequence(byteseq):

371

"""

372

str -> int

373

Given a UTF-8-encoded byte sequence (str), returns the number of bytes at

374

the end of the string which comprise an incomplete UTF-8 character

375

sequence.

376

377

If the string is empty or ends with a complete character OR INVALID

378

sequence, returns 0.

379

Otherwise, returns 1-3 indicating the number of bytes in the final

380

incomplete (but valid) character sequence.

381

382

Does not check any bytes before the final sequence for correctness.

383

384

>>> incomplete_utf8_sequence("")

385

386

>>> incomplete_utf8_sequence("xy")

387

388

>>> incomplete_utf8_sequence("xy\xc3\xbc")

389

390

>>> incomplete_utf8_sequence("\xc3")

391

392

>>> incomplete_utf8_sequence("\xbc\xc3")

393

394

>>> incomplete_utf8_sequence("xy\xbc\xc3")

395

396

>>> incomplete_utf8_sequence("xy\xe0\xa0")

397

398

>>> incomplete_utf8_sequence("xy\xf4")

399

400

>>> incomplete_utf8_sequence("xy\xf4\x8f")

401

402

>>> incomplete_utf8_sequence("xy\xf4\x8f\xa0")

403

404

"""

405

count = 0

406

expect = None

407

for b in byteseq[::-1]:

408

b = ord(b)

409

count += 1

410

if b & 0x80 == 0x0:

411

# 0xxxxxxx (single-byte character)

412

expect = 1

413

break

414

elif b & 0xc0 == 0x80:

415

# 10xxxxxx (subsequent byte)

416

pass

417

elif b & 0xe0 == 0xc0:

418

# 110xxxxx (start of 2-byte sequence)

419

expect = 2

420

break

421

elif b & 0xf0 == 0xe0:

422

# 1110xxxx (start of 3-byte sequence)

423

expect = 3

424

break

425

elif b & 0xf8 == 0xf0:

426

# 11110xxx (start of 4-byte sequence)

427

expect = 4

428

break

429

else:

430

# Invalid byte

431

return 0

432

433

if count >= 4:

434

# Seen too many "subsequent bytes", invalid

435

return 0

436

437

if expect is None:

438

# We never saw a "first byte", invalid

439

return 0

440

441

# We now know expect and count

442

if count >= expect:

443

# Complete, or we saw an invalid sequence

444

return 0

445

elif count < expect:

446

# Incomplete

447

return count

448

370

449

371

# Takes an object and returns a flattened version suitable for JSON

450

372

def flatten(object):

Older »