jpayne@69: """ Python 'utf-16' Codec jpayne@69: jpayne@69: jpayne@69: Written by Marc-Andre Lemburg (mal@lemburg.com). jpayne@69: jpayne@69: (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. jpayne@69: jpayne@69: """ jpayne@69: import codecs, sys jpayne@69: jpayne@69: ### Codec APIs jpayne@69: jpayne@69: encode = codecs.utf_16_encode jpayne@69: jpayne@69: def decode(input, errors='strict'): jpayne@69: return codecs.utf_16_decode(input, errors, True) jpayne@69: jpayne@69: class IncrementalEncoder(codecs.IncrementalEncoder): jpayne@69: def __init__(self, errors='strict'): jpayne@69: codecs.IncrementalEncoder.__init__(self, errors) jpayne@69: self.encoder = None jpayne@69: jpayne@69: def encode(self, input, final=False): jpayne@69: if self.encoder is None: jpayne@69: result = codecs.utf_16_encode(input, self.errors)[0] jpayne@69: if sys.byteorder == 'little': jpayne@69: self.encoder = codecs.utf_16_le_encode jpayne@69: else: jpayne@69: self.encoder = codecs.utf_16_be_encode jpayne@69: return result jpayne@69: return self.encoder(input, self.errors)[0] jpayne@69: jpayne@69: def reset(self): jpayne@69: codecs.IncrementalEncoder.reset(self) jpayne@69: self.encoder = None jpayne@69: jpayne@69: def getstate(self): jpayne@69: # state info we return to the caller: jpayne@69: # 0: stream is in natural order for this platform jpayne@69: # 2: endianness hasn't been determined yet jpayne@69: # (we're never writing in unnatural order) jpayne@69: return (2 if self.encoder is None else 0) jpayne@69: jpayne@69: def setstate(self, state): jpayne@69: if state: jpayne@69: self.encoder = None jpayne@69: else: jpayne@69: if sys.byteorder == 'little': jpayne@69: self.encoder = codecs.utf_16_le_encode jpayne@69: else: jpayne@69: self.encoder = codecs.utf_16_be_encode jpayne@69: jpayne@69: class IncrementalDecoder(codecs.BufferedIncrementalDecoder): jpayne@69: def __init__(self, errors='strict'): jpayne@69: codecs.BufferedIncrementalDecoder.__init__(self, errors) jpayne@69: self.decoder = None jpayne@69: jpayne@69: def _buffer_decode(self, input, errors, final): jpayne@69: if self.decoder is None: jpayne@69: (output, consumed, byteorder) = \ jpayne@69: codecs.utf_16_ex_decode(input, errors, 0, final) jpayne@69: if byteorder == -1: jpayne@69: self.decoder = codecs.utf_16_le_decode jpayne@69: elif byteorder == 1: jpayne@69: self.decoder = codecs.utf_16_be_decode jpayne@69: elif consumed >= 2: jpayne@69: raise UnicodeError("UTF-16 stream does not start with BOM") jpayne@69: return (output, consumed) jpayne@69: return self.decoder(input, self.errors, final) jpayne@69: jpayne@69: def reset(self): jpayne@69: codecs.BufferedIncrementalDecoder.reset(self) jpayne@69: self.decoder = None jpayne@69: jpayne@69: def getstate(self): jpayne@69: # additional state info from the base class must be None here, jpayne@69: # as it isn't passed along to the caller jpayne@69: state = codecs.BufferedIncrementalDecoder.getstate(self)[0] jpayne@69: # additional state info we pass to the caller: jpayne@69: # 0: stream is in natural order for this platform jpayne@69: # 1: stream is in unnatural order jpayne@69: # 2: endianness hasn't been determined yet jpayne@69: if self.decoder is None: jpayne@69: return (state, 2) jpayne@69: addstate = int((sys.byteorder == "big") != jpayne@69: (self.decoder is codecs.utf_16_be_decode)) jpayne@69: return (state, addstate) jpayne@69: jpayne@69: def setstate(self, state): jpayne@69: # state[1] will be ignored by BufferedIncrementalDecoder.setstate() jpayne@69: codecs.BufferedIncrementalDecoder.setstate(self, state) jpayne@69: state = state[1] jpayne@69: if state == 0: jpayne@69: self.decoder = (codecs.utf_16_be_decode jpayne@69: if sys.byteorder == "big" jpayne@69: else codecs.utf_16_le_decode) jpayne@69: elif state == 1: jpayne@69: self.decoder = (codecs.utf_16_le_decode jpayne@69: if sys.byteorder == "big" jpayne@69: else codecs.utf_16_be_decode) jpayne@69: else: jpayne@69: self.decoder = None jpayne@69: jpayne@69: class StreamWriter(codecs.StreamWriter): jpayne@69: def __init__(self, stream, errors='strict'): jpayne@69: codecs.StreamWriter.__init__(self, stream, errors) jpayne@69: self.encoder = None jpayne@69: jpayne@69: def reset(self): jpayne@69: codecs.StreamWriter.reset(self) jpayne@69: self.encoder = None jpayne@69: jpayne@69: def encode(self, input, errors='strict'): jpayne@69: if self.encoder is None: jpayne@69: result = codecs.utf_16_encode(input, errors) jpayne@69: if sys.byteorder == 'little': jpayne@69: self.encoder = codecs.utf_16_le_encode jpayne@69: else: jpayne@69: self.encoder = codecs.utf_16_be_encode jpayne@69: return result jpayne@69: else: jpayne@69: return self.encoder(input, errors) jpayne@69: jpayne@69: class StreamReader(codecs.StreamReader): jpayne@69: jpayne@69: def reset(self): jpayne@69: codecs.StreamReader.reset(self) jpayne@69: try: jpayne@69: del self.decode jpayne@69: except AttributeError: jpayne@69: pass jpayne@69: jpayne@69: def decode(self, input, errors='strict'): jpayne@69: (object, consumed, byteorder) = \ jpayne@69: codecs.utf_16_ex_decode(input, errors, 0, False) jpayne@69: if byteorder == -1: jpayne@69: self.decode = codecs.utf_16_le_decode jpayne@69: elif byteorder == 1: jpayne@69: self.decode = codecs.utf_16_be_decode jpayne@69: elif consumed>=2: jpayne@69: raise UnicodeError("UTF-16 stream does not start with BOM") jpayne@69: return (object, consumed) jpayne@69: jpayne@69: ### encodings module API jpayne@69: jpayne@69: def getregentry(): jpayne@69: return codecs.CodecInfo( jpayne@69: name='utf-16', jpayne@69: encode=encode, jpayne@69: decode=decode, jpayne@69: incrementalencoder=IncrementalEncoder, jpayne@69: incrementaldecoder=IncrementalDecoder, jpayne@69: streamreader=StreamReader, jpayne@69: streamwriter=StreamWriter, jpayne@69: )