jpayne@68: """ Python 'utf-16' Codec jpayne@68: jpayne@68: jpayne@68: Written by Marc-Andre Lemburg (mal@lemburg.com). jpayne@68: jpayne@68: (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. jpayne@68: jpayne@68: """ jpayne@68: import codecs, sys jpayne@68: jpayne@68: ### Codec APIs jpayne@68: jpayne@68: encode = codecs.utf_16_encode jpayne@68: jpayne@68: def decode(input, errors='strict'): jpayne@68: return codecs.utf_16_decode(input, errors, True) jpayne@68: jpayne@68: class IncrementalEncoder(codecs.IncrementalEncoder): jpayne@68: def __init__(self, errors='strict'): jpayne@68: codecs.IncrementalEncoder.__init__(self, errors) jpayne@68: self.encoder = None jpayne@68: jpayne@68: def encode(self, input, final=False): jpayne@68: if self.encoder is None: jpayne@68: result = codecs.utf_16_encode(input, self.errors)[0] jpayne@68: if sys.byteorder == 'little': jpayne@68: self.encoder = codecs.utf_16_le_encode jpayne@68: else: jpayne@68: self.encoder = codecs.utf_16_be_encode jpayne@68: return result jpayne@68: return self.encoder(input, self.errors)[0] jpayne@68: jpayne@68: def reset(self): jpayne@68: codecs.IncrementalEncoder.reset(self) jpayne@68: self.encoder = None jpayne@68: jpayne@68: def getstate(self): jpayne@68: # state info we return to the caller: jpayne@68: # 0: stream is in natural order for this platform jpayne@68: # 2: endianness hasn't been determined yet jpayne@68: # (we're never writing in unnatural order) jpayne@68: return (2 if self.encoder is None else 0) jpayne@68: jpayne@68: def setstate(self, state): jpayne@68: if state: jpayne@68: self.encoder = None jpayne@68: else: jpayne@68: if sys.byteorder == 'little': jpayne@68: self.encoder = codecs.utf_16_le_encode jpayne@68: else: jpayne@68: self.encoder = codecs.utf_16_be_encode jpayne@68: jpayne@68: class IncrementalDecoder(codecs.BufferedIncrementalDecoder): jpayne@68: def __init__(self, errors='strict'): jpayne@68: codecs.BufferedIncrementalDecoder.__init__(self, errors) jpayne@68: self.decoder = None jpayne@68: jpayne@68: def _buffer_decode(self, input, errors, final): jpayne@68: if self.decoder is None: jpayne@68: (output, consumed, byteorder) = \ jpayne@68: codecs.utf_16_ex_decode(input, errors, 0, final) jpayne@68: if byteorder == -1: jpayne@68: self.decoder = codecs.utf_16_le_decode jpayne@68: elif byteorder == 1: jpayne@68: self.decoder = codecs.utf_16_be_decode jpayne@68: elif consumed >= 2: jpayne@68: raise UnicodeError("UTF-16 stream does not start with BOM") jpayne@68: return (output, consumed) jpayne@68: return self.decoder(input, self.errors, final) jpayne@68: jpayne@68: def reset(self): jpayne@68: codecs.BufferedIncrementalDecoder.reset(self) jpayne@68: self.decoder = None jpayne@68: jpayne@68: def getstate(self): jpayne@68: # additional state info from the base class must be None here, jpayne@68: # as it isn't passed along to the caller jpayne@68: state = codecs.BufferedIncrementalDecoder.getstate(self)[0] jpayne@68: # additional state info we pass to the caller: jpayne@68: # 0: stream is in natural order for this platform jpayne@68: # 1: stream is in unnatural order jpayne@68: # 2: endianness hasn't been determined yet jpayne@68: if self.decoder is None: jpayne@68: return (state, 2) jpayne@68: addstate = int((sys.byteorder == "big") != jpayne@68: (self.decoder is codecs.utf_16_be_decode)) jpayne@68: return (state, addstate) jpayne@68: jpayne@68: def setstate(self, state): jpayne@68: # state[1] will be ignored by BufferedIncrementalDecoder.setstate() jpayne@68: codecs.BufferedIncrementalDecoder.setstate(self, state) jpayne@68: state = state[1] jpayne@68: if state == 0: jpayne@68: self.decoder = (codecs.utf_16_be_decode jpayne@68: if sys.byteorder == "big" jpayne@68: else codecs.utf_16_le_decode) jpayne@68: elif state == 1: jpayne@68: self.decoder = (codecs.utf_16_le_decode jpayne@68: if sys.byteorder == "big" jpayne@68: else codecs.utf_16_be_decode) jpayne@68: else: jpayne@68: self.decoder = None jpayne@68: jpayne@68: class StreamWriter(codecs.StreamWriter): jpayne@68: def __init__(self, stream, errors='strict'): jpayne@68: codecs.StreamWriter.__init__(self, stream, errors) jpayne@68: self.encoder = None jpayne@68: jpayne@68: def reset(self): jpayne@68: codecs.StreamWriter.reset(self) jpayne@68: self.encoder = None jpayne@68: jpayne@68: def encode(self, input, errors='strict'): jpayne@68: if self.encoder is None: jpayne@68: result = codecs.utf_16_encode(input, errors) jpayne@68: if sys.byteorder == 'little': jpayne@68: self.encoder = codecs.utf_16_le_encode jpayne@68: else: jpayne@68: self.encoder = codecs.utf_16_be_encode jpayne@68: return result jpayne@68: else: jpayne@68: return self.encoder(input, errors) jpayne@68: jpayne@68: class StreamReader(codecs.StreamReader): jpayne@68: jpayne@68: def reset(self): jpayne@68: codecs.StreamReader.reset(self) jpayne@68: try: jpayne@68: del self.decode jpayne@68: except AttributeError: jpayne@68: pass jpayne@68: jpayne@68: def decode(self, input, errors='strict'): jpayne@68: (object, consumed, byteorder) = \ jpayne@68: codecs.utf_16_ex_decode(input, errors, 0, False) jpayne@68: if byteorder == -1: jpayne@68: self.decode = codecs.utf_16_le_decode jpayne@68: elif byteorder == 1: jpayne@68: self.decode = codecs.utf_16_be_decode jpayne@68: elif consumed>=2: jpayne@68: raise UnicodeError("UTF-16 stream does not start with BOM") jpayne@68: return (object, consumed) jpayne@68: jpayne@68: ### encodings module API jpayne@68: jpayne@68: def getregentry(): jpayne@68: return codecs.CodecInfo( jpayne@68: name='utf-16', jpayne@68: encode=encode, jpayne@68: decode=decode, jpayne@68: incrementalencoder=IncrementalEncoder, jpayne@68: incrementaldecoder=IncrementalDecoder, jpayne@68: streamreader=StreamReader, jpayne@68: streamwriter=StreamWriter, jpayne@68: )