~azzar1/unity/add-show-desktop-key

« back to all changes in this revision

Viewing changes to scripts/python-console

  • Committer: mattgiuca
  • Date: 2008-07-15 03:27:06 UTC
  • Revision ID: svn-v3-trunk0:2b9c9e99-6f39-0410-b283-7f802c844ae2:trunk:871
scripts/python-console: Added function incomplete_utf8_sequence, for use with
correctly splitting large prints on good UTF-8 boundaries.

Show diffs side-by-side

added added

removed removed

Lines of Context:
219
219
    list = list + traceback.format_exception_only(etype, value)
220
220
    return ''.join(list)
221
221
 
 
222
def incomplete_utf8_sequence(byteseq):
 
223
    """
 
224
    str -> int
 
225
    Given a UTF-8-encoded byte sequence (str), returns the number of bytes at
 
226
    the end of the string which comprise an incomplete UTF-8 character
 
227
    sequence.
 
228
 
 
229
    If the string is empty or ends with a complete character OR INVALID
 
230
    sequence, returns 0.
 
231
    Otherwise, returns 1-3 indicating the number of bytes in the final
 
232
    incomplete (but valid) character sequence.
 
233
 
 
234
    Does not check any bytes before the final sequence for correctness.
 
235
 
 
236
    >>> incomplete_utf8_sequence("")
 
237
    0
 
238
    >>> incomplete_utf8_sequence("xy")
 
239
    0
 
240
    >>> incomplete_utf8_sequence("xy\xc3\xbc")
 
241
    0
 
242
    >>> incomplete_utf8_sequence("\xc3")
 
243
    1
 
244
    >>> incomplete_utf8_sequence("\xbc\xc3")
 
245
    1
 
246
    >>> incomplete_utf8_sequence("xy\xbc\xc3")
 
247
    1
 
248
    >>> incomplete_utf8_sequence("xy\xe0\xa0")
 
249
    2
 
250
    >>> incomplete_utf8_sequence("xy\xf4")
 
251
    1
 
252
    >>> incomplete_utf8_sequence("xy\xf4\x8f")
 
253
    2
 
254
    >>> incomplete_utf8_sequence("xy\xf4\x8f\xa0")
 
255
    3
 
256
    """
 
257
    count = 0
 
258
    expect = None
 
259
    for b in byteseq[::-1]:
 
260
        b = ord(b)
 
261
        count += 1
 
262
        if b & 0x80 == 0x0:
 
263
            # 0xxxxxxx (single-byte character)
 
264
            expect = 1
 
265
            break
 
266
        elif b & 0xc0 == 0x80:
 
267
            # 10xxxxxx (subsequent byte)
 
268
            pass
 
269
        elif b & 0xe0 == 0xc0:
 
270
            # 110xxxxx (start of 2-byte sequence)
 
271
            expect = 2
 
272
            break
 
273
        elif b & 0xf0 == 0xe0:
 
274
            # 1110xxxx (start of 3-byte sequence)
 
275
            expect = 3
 
276
            break
 
277
        elif b & 0xf8 == 0xf0:
 
278
            # 11110xxx (start of 4-byte sequence)
 
279
            expect = 4
 
280
            break
 
281
        else:
 
282
            # Invalid byte
 
283
            return 0
 
284
 
 
285
        if count >= 4:
 
286
            # Seen too many "subsequent bytes", invalid
 
287
            return 0
 
288
 
 
289
    if expect is None:
 
290
        # We never saw a "first byte", invalid
 
291
        return 0
 
292
 
 
293
    # We now know expect and count
 
294
    if count >= expect:
 
295
        # Complete, or we saw an invalid sequence
 
296
        return 0
 
297
    elif count < expect:
 
298
        # Incomplete
 
299
        return count
 
300
 
222
301
if __name__ == "__main__":
223
302
    port = int(sys.argv[1])
224
303
    magic = sys.argv[2]