2020import re
2121import struct
2222import warnings
23+ import uuid
2324
2425from bson .binary import Binary
2526from bson .code import Code
4142except ImportError :
4243 _use_c = False
4344
44- try :
45- import uuid
46- _use_uuid = True
47- except ImportError :
48- _use_uuid = False
49-
5045
5146# This sort of sucks, but seems to be as good as it gets...
5247RE_TYPE = type (re .compile ("" ))
@@ -65,23 +60,26 @@ def _get_int(data, as_class=None, tz_aware=False, unsigned=False):
6560def _get_c_string (data , length = None ):
6661 if length is None :
6762 try :
68- length = data .index ("\x00 " )
63+ length = data .index (b "\x00 " )
6964 except ValueError :
7065 raise InvalidBSON ()
7166
7267 return (data [:length ].decode ("utf-8" ), data [length + 1 :])
7368
7469
7570def _make_c_string (string , check_null = False ):
76- if check_null and " \x00 " in string :
77- raise InvalidDocument ( "BSON keys / regex patterns must not "
78- "contain a NULL character" )
79- if isinstance ( string , unicode ):
80- return string .encode (" utf-8" ) + "\x00 "
71+ if isinstance ( string , str ) :
72+ if check_null and " \x00 " in string :
73+ raise InvalidDocument ( "BSON keys / regex patterns must not "
74+ "contain a NULL character" )
75+ return string .encode (' utf-8' ) + b "\x00 "
8176 else :
77+ if check_null and b"\x00 " in string :
78+ raise InvalidDocument ("BSON keys / regex patterns must not "
79+ "contain a NULL character" )
8280 try :
8381 string .decode ("utf-8" )
84- return string + "\x00 "
82+ return string + b "\x00 "
8583 except :
8684 raise InvalidStringData ("strings in documents must be valid "
8785 "UTF-8: %r" % string )
@@ -118,15 +116,17 @@ def _get_array(data, as_class, tz_aware):
118116
119117def _get_binary (data , as_class , tz_aware ):
120118 (length , data ) = _get_int (data )
121- subtype = ord ( data [0 ])
119+ subtype = data [0 ]
122120 data = data [1 :]
121+ if subtype == 0 :
122+ return (data [:length ], data [length :])
123123 if subtype == 2 :
124124 (length2 , data ) = _get_int (data )
125125 if length2 != length - 4 :
126126 raise InvalidBSON ("invalid binary (st 2) - lengths don't match!" )
127127 length = length2
128- if subtype == 3 and _use_uuid :
129- return (uuid .UUID (bytes = data [:length ]), data [length :])
128+ if subtype == 3 :
129+ return (uuid .UUID (bytes_le = data [:length ]), data [length :])
130130 return (Binary (data [:length ], subtype ), data [length :])
131131
132132
@@ -135,11 +135,11 @@ def _get_oid(data, as_class, tz_aware):
135135
136136
137137def _get_boolean (data , as_class , tz_aware ):
138- return (data [0 ] == " \x01 " , data [1 :])
138+ return (data [0 ] == 1 , data [1 :])
139139
140140
141141def _get_date (data , as_class , tz_aware ):
142- seconds = float ( struct .unpack ("<q" , data [:8 ])[0 ]) / 1000.0
142+ seconds = struct .unpack ("<q" , data [:8 ])[0 ] / 1000
143143 if tz_aware :
144144 return (datetime .datetime .fromtimestamp (seconds , utc ), data [8 :])
145145 return (datetime .datetime .utcfromtimestamp (seconds ), data [8 :])
@@ -192,26 +192,26 @@ def _get_long(data, as_class, tz_aware):
192192
193193
194194_element_getter = {
195- " \x01 " : _get_number ,
196- " \x02 " : _get_string ,
197- " \x03 " : _get_object ,
198- " \x04 " : _get_array ,
199- " \x05 " : _get_binary ,
200- " \x06 " : _get_null , # undefined
201- " \x07 " : _get_oid ,
202- " \x08 " : _get_boolean ,
203- " \x09 " : _get_date ,
204- " \x0A " : _get_null ,
205- " \x0B " : _get_regex ,
206- " \x0C " : _get_ref ,
207- " \x0D " : _get_string , # code
208- " \x0E " : _get_string , # symbol
209- " \x0F " : _get_code_w_scope ,
210- " \x10 " : _get_int , # number_int
211- " \x11 " : _get_timestamp ,
212- " \x12 " : _get_long ,
213- " \xFF " : lambda x , y , z : (MinKey (), x ),
214- " \x7F " : lambda x , y , z : (MaxKey (), x )}
195+ 0x01 : _get_number ,
196+ 0x02 : _get_string ,
197+ 0x03 : _get_object ,
198+ 0x04 : _get_array ,
199+ 0x05 : _get_binary ,
200+ 0x06 : _get_null , # undefined
201+ 0x07 : _get_oid ,
202+ 0x08 : _get_boolean ,
203+ 0x09 : _get_date ,
204+ 0x0A : _get_null ,
205+ 0x0B : _get_regex ,
206+ 0x0C : _get_ref ,
207+ 0x0D : _get_string , # code
208+ 0x0E : _get_string , # symbol
209+ 0x0F : _get_code_w_scope ,
210+ 0x10 : _get_int , # number_int
211+ 0x11 : _get_timestamp ,
212+ 0x12 : _get_long ,
213+ 0xFF : lambda x , y , z : (MinKey (), x ),
214+ 0x7F : lambda x , y , z : (MaxKey (), x )}
215215
216216
217217def _element_to_dict (data , as_class , tz_aware ):
@@ -233,7 +233,7 @@ def _bson_to_dict(data, as_class, tz_aware):
233233 obj_size = struct .unpack ("<i" , data [:4 ])[0 ]
234234 if len (data ) < obj_size :
235235 raise InvalidBSON ("objsize too large" )
236- if data [obj_size - 1 ] != " \x00 " :
236+ if data [obj_size - 1 ] != 0 :
237237 raise InvalidBSON ("bad eoo" )
238238 elements = data [4 :obj_size - 1 ]
239239 return (_elements_to_dict (elements , as_class , tz_aware ), data [obj_size :])
@@ -242,7 +242,7 @@ def _bson_to_dict(data, as_class, tz_aware):
242242
243243
244244def _element_to_bson (key , value , check_keys ):
245- if not isinstance (key , basestring ):
245+ if not isinstance (key , str ):
246246 raise InvalidDocument ("documents must have only string keys, "
247247 "key was %r" % key )
248248
@@ -254,67 +254,66 @@ def _element_to_bson(key, value, check_keys):
254254
255255 name = _make_c_string (key , True )
256256 if isinstance (value , float ):
257- return "\x01 " + name + struct .pack ("<d" , value )
257+ return b "\x01 " + name + struct .pack ("<d" , value )
258258
259259 # Use Binary w/ subtype 3 for UUID instances
260260 try :
261261 import uuid
262262
263263 if isinstance (value , uuid .UUID ):
264- value = Binary (value .bytes , subtype = 3 )
264+ value = Binary (value .bytes_le , subtype = 3 )
265265 except ImportError :
266266 pass
267267
268268 if isinstance (value , Binary ):
269269 subtype = value .subtype
270270 if subtype == 2 :
271271 value = struct .pack ("<i" , len (value )) + value
272- return "\x05 %s%s%s%s" % ( name , struct .pack ("<i" , len (value )),
273- chr ( subtype ), value )
272+ return ( b "\x05 " + name + struct .pack ("<i" , len (value )) +
273+ bytes ([ subtype ]) + value )
274274 if isinstance (value , Code ):
275275 cstring = _make_c_string (value )
276276 scope = _dict_to_bson (value .scope , False , False )
277277 full_length = struct .pack ("<i" , 8 + len (cstring ) + len (scope ))
278278 length = struct .pack ("<i" , len (cstring ))
279- return "\x0F " + name + full_length + length + cstring + scope
279+ return b"\x0F " + name + full_length + length + cstring + scope
280+ if isinstance (value , bytes ):
281+ length = struct .pack ("<i" , len (value ))
282+ return b"\x05 " + name + length + b'\x00 ' + value
280283 if isinstance (value , str ):
281284 cstring = _make_c_string (value )
282285 length = struct .pack ("<i" , len (cstring ))
283- return "\x02 " + name + length + cstring
284- if isinstance (value , unicode ):
285- cstring = _make_c_string (value )
286- length = struct .pack ("<i" , len (cstring ))
287- return "\x02 " + name + length + cstring
288- if isinstance (value , dict ):
289- return "\x03 " + name + _dict_to_bson (value , check_keys , False )
286+ return b"\x02 " + name + length + cstring
287+ if isinstance (value , (dict , SON )):
288+ return b"\x03 " + name + _dict_to_bson (value , check_keys , False )
290289 if isinstance (value , (list , tuple )):
291- as_dict = SON (zip ([str (i ) for i in range (len (value ))], value ))
292- return "\x04 " + name + _dict_to_bson (as_dict , check_keys , False )
290+ as_dict = SON (list ( zip ([str (i ) for i in range (len (value ))], value ) ))
291+ return b "\x04 " + name + _dict_to_bson (as_dict , check_keys , False )
293292 if isinstance (value , ObjectId ):
294- return "\x07 " + name + value .binary
293+ return b "\x07 " + name + value .binary
295294 if value is True :
296- return "\x08 " + name + "\x01 "
295+ return b "\x08 " + name + b "\x01 "
297296 if value is False :
298- return "\x08 " + name + "\x00 "
299- if isinstance (value , ( int , long ) ):
297+ return b "\x08 " + name + b "\x00 "
298+ if isinstance (value , int ):
300299 # TODO this is a really ugly way to check for this...
301- if value > 2 ** 64 / 2 - 1 or value < - 2 ** 64 / 2 :
300+ if value > 2 ** 63 - 1 or value < - 2 ** 63 :
302301 raise OverflowError ("BSON can only handle up to 8-byte ints" )
303- if value > 2 ** 32 / 2 - 1 or value < - 2 ** 32 / 2 :
304- return "\x12 " + name + struct .pack ("<q" , value )
305- return "\x10 " + name + struct .pack ("<i" , value )
302+ if value > 2 ** 31 - 1 or value < - 2 ** 31 :
303+ return b "\x12 " + name + struct .pack ("<q" , value )
304+ return b "\x10 " + name + struct .pack ("<i" , value )
306305 if isinstance (value , datetime .datetime ):
307306 if value .utcoffset () is not None :
308307 value = value - value .utcoffset ()
309308 millis = int (calendar .timegm (value .timetuple ()) * 1000 +
310309 value .microsecond / 1000 )
311- return "\x09 " + name + struct .pack ("<q" , millis )
310+ return b "\x09 " + name + struct .pack ("<q" , millis )
312311 if isinstance (value , Timestamp ):
313312 time = struct .pack ("<I" , value .time )
314313 inc = struct .pack ("<I" , value .inc )
315- return "\x11 " + name + inc + time
314+ return b "\x11 " + name + inc + time
316315 if value is None :
317- return "\x0A " + name
316+ return b "\x0A " + name
318317 if isinstance (value , RE_TYPE ):
319318 pattern = value .pattern
320319 flags = ""
@@ -330,25 +329,25 @@ def _element_to_bson(key, value, check_keys):
330329 flags += "u"
331330 if value .flags & re .VERBOSE :
332331 flags += "x"
333- return "\x0B " + name + _make_c_string (pattern , True ) + \
332+ return b "\x0B " + name + _make_c_string (pattern , True ) + \
334333 _make_c_string (flags )
335334 if isinstance (value , DBRef ):
336335 return _element_to_bson (key , value .as_doc (), False )
337336 if isinstance (value , MinKey ):
338- return "\xFF " + name
337+ return b "\xFF " + name
339338 if isinstance (value , MaxKey ):
340- return "\x7F " + name
339+ return b "\x7F " + name
341340
342341 raise InvalidDocument ("cannot convert value of type %s to bson" %
343342 type (value ))
344343
345344
346345def _dict_to_bson (dict , check_keys , top_level = True ):
347346 try :
348- elements = ""
347+ elements = b ""
349348 if top_level and "_id" in dict :
350349 elements += _element_to_bson ("_id" , dict ["_id" ], False )
351- for (key , value ) in dict .iteritems ():
350+ for (key , value ) in dict .items ():
352351 if not top_level or key != "_id" :
353352 elements += _element_to_bson (key , value , check_keys )
354353 except AttributeError :
@@ -358,7 +357,7 @@ def _dict_to_bson(dict, check_keys, top_level=True):
358357 if length > 4 * 1024 * 1024 :
359358 raise InvalidDocument ("document too large - BSON documents are"
360359 "limited to 4 MB" )
361- return struct .pack ("<i" , length ) + elements + "\x00 "
360+ return struct .pack ("<i" , length ) + elements + b "\x00 "
362361if _use_c :
363362 _dict_to_bson = _cbson ._dict_to_bson
364363
@@ -410,21 +409,21 @@ def is_valid(bson):
410409 :Parameters:
411410 - `bson`: the data to be validated
412411 """
413- if not isinstance (bson , str ):
414- raise TypeError ("BSON data must be an instance of a subclass of str " )
412+ if not isinstance (bson , bytes ):
413+ raise TypeError ("BSON data must be an instance of a subclass of bytes " )
415414
416415 # 4 MB limit
417416 if len (bson ) > 4 * 1024 * 1024 :
418417 raise InvalidBSON ("BSON documents are limited to 4MB" )
419418
420419 try :
421420 (_ , remainder ) = _bson_to_dict (bson , dict , True )
422- return remainder == ""
421+ return remainder == b ""
423422 except :
424423 return False
425424
426425
427- class BSON (str ):
426+ class BSON (bytes ):
428427 """BSON (Binary JSON) data.
429428 """
430429
@@ -447,7 +446,7 @@ def encode(cls, document, check_keys=False):
447446
448447 Raises :class:`TypeError` if `document` is not a mapping type,
449448 or contains keys that are not instances of
450- :class:`basestring `. Raises
449+ :class:`str `. Raises
451450 :class:`~bson.errors.InvalidDocument` if `document` cannot be
452451 converted to :class:`BSON`.
453452
0 commit comments