219
219
list = list + traceback.format_exception_only(etype, value)
220
220
return ''.join(list)
222
def incomplete_utf8_sequence(byteseq):
225
Given a UTF-8-encoded byte sequence (str), returns the number of bytes at
226
the end of the string which comprise an incomplete UTF-8 character
229
If the string is empty or ends with a complete character OR INVALID
231
Otherwise, returns 1-3 indicating the number of bytes in the final
232
incomplete (but valid) character sequence.
234
Does not check any bytes before the final sequence for correctness.
236
>>> incomplete_utf8_sequence("")
238
>>> incomplete_utf8_sequence("xy")
240
>>> incomplete_utf8_sequence("xy\xc3\xbc")
242
>>> incomplete_utf8_sequence("\xc3")
244
>>> incomplete_utf8_sequence("\xbc\xc3")
246
>>> incomplete_utf8_sequence("xy\xbc\xc3")
248
>>> incomplete_utf8_sequence("xy\xe0\xa0")
250
>>> incomplete_utf8_sequence("xy\xf4")
252
>>> incomplete_utf8_sequence("xy\xf4\x8f")
254
>>> incomplete_utf8_sequence("xy\xf4\x8f\xa0")
259
for b in byteseq[::-1]:
263
# 0xxxxxxx (single-byte character)
266
elif b & 0xc0 == 0x80:
267
# 10xxxxxx (subsequent byte)
269
elif b & 0xe0 == 0xc0:
270
# 110xxxxx (start of 2-byte sequence)
273
elif b & 0xf0 == 0xe0:
274
# 1110xxxx (start of 3-byte sequence)
277
elif b & 0xf8 == 0xf0:
278
# 11110xxx (start of 4-byte sequence)
286
# Seen too many "subsequent bytes", invalid
290
# We never saw a "first byte", invalid
293
# We now know expect and count
295
# Complete, or we saw an invalid sequence
222
301
if __name__ == "__main__":
223
302
port = int(sys.argv[1])
224
303
magic = sys.argv[2]