9393_CODEC_OPTIONS_TYPE_ERROR = TypeError (
9494 "codec_options must be an instance of bson.codec_options.CodecOptions" )
9595
96- def _get_int (data , position , as_class = None ,
96+
97+ def _raise_unknown_type (element_type , element_name ):
98+ """Unknown type helper."""
99+ raise InvalidBSON ("Detected unknown BSON type %r for fieldname %r. Are "
100+ "you using the latest driver version?" % (
101+ element_type , element_name ))
102+
103+
104+ def _get_int (data , position , name , as_class = None ,
97105 tz_aware = False , uuid_subtype = OLD_UUID_SUBTYPE ,
98106 compile_re = True , unsigned = False ):
99107 format = unsigned and "I" or "i"
@@ -137,13 +145,15 @@ def _make_c_string(string, check_null=False):
137145 "UTF-8: %r" % string )
138146
139147
140- def _get_number (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
148+ def _get_number (
149+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
141150 num = struct .unpack ("<d" , data [position :position + 8 ])[0 ]
142151 position += 8
143152 return num , position
144153
145154
146- def _get_string (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
155+ def _get_string (
156+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
147157 length = struct .unpack ("<i" , data [position :position + 4 ])[0 ]
148158 if length <= 0 or (len (data ) - position - 4 ) < length :
149159 raise InvalidBSON ("invalid string length" )
@@ -153,7 +163,8 @@ def _get_string(data, position, as_class, tz_aware, uuid_subtype, compile_re):
153163 return _get_c_string (data , position , length - 1 )
154164
155165
156- def _get_object (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
166+ def _get_object (
167+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
157168 obj_size = struct .unpack ("<i" , data [position :position + 4 ])[0 ]
158169 if data [position + obj_size - 1 :position + obj_size ] != ZERO :
159170 raise InvalidBSON ("bad eoo" )
@@ -168,26 +179,43 @@ def _get_object(data, position, as_class, tz_aware, uuid_subtype, compile_re):
168179 return object , position
169180
170181
171- def _get_array (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
172- obj , position = _get_object (data , position ,
173- as_class , tz_aware , uuid_subtype , compile_re )
182+ def _get_array (
183+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
184+ size = struct .unpack ("<i" , data [position :position + 4 ])[0 ]
185+ end = position + size - 1
186+ if data [end :end + 1 ] != ZERO :
187+ raise InvalidBSON ("bad eoo" )
188+
189+ position += 4
190+ end -= 1
174191 result = []
175- i = 0
176- while True :
192+
193+ # Avoid doing global and attibute lookups in the loop.
194+ append = result .append
195+ index = data .index
196+ getter = _element_getter
197+
198+ while position < end :
199+ element_type = data [position :position + 1 ]
200+ # Just skip the keys.
201+ position = index (ZERO , position ) + 1
177202 try :
178- result .append (obj [str (i )])
179- i += 1
203+ value , position = getter [element_type ](
204+ data , position , name ,
205+ as_class , tz_aware , uuid_subtype , compile_re )
180206 except KeyError :
181- break
182- return result , position
207+ _raise_unknown_type (element_type , name )
208+ append (value )
209+ return result , position + 1
183210
184211
185- def _get_binary (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
186- length , position = _get_int (data , position )
212+ def _get_binary (
213+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
214+ length , position = _get_int (data , position , name )
187215 subtype = ord (data [position :position + 1 ])
188216 position += 1
189217 if subtype == 2 :
190- length2 , position = _get_int (data , position )
218+ length2 , position = _get_int (data , position , name )
191219 if length2 != length - 4 :
192220 raise InvalidBSON ("invalid binary (st 2) - lengths don't match!" )
193221 length = length2
@@ -213,20 +241,22 @@ def _get_binary(data, position, as_class, tz_aware, uuid_subtype, compile_re):
213241 return value , position
214242
215243
216- def _get_oid (data , position , as_class = None ,
244+ def _get_oid (data , position , name , as_class = None ,
217245 tz_aware = False , uuid_subtype = OLD_UUID_SUBTYPE , compile_re = True ):
218246 value = ObjectId (data [position :position + 12 ])
219247 position += 12
220248 return value , position
221249
222250
223- def _get_boolean (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
251+ def _get_boolean (
252+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
224253 value = data [position :position + 1 ] == ONE
225254 position += 1
226255 return value , position
227256
228257
229- def _get_date (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
258+ def _get_date (
259+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
230260 millis = struct .unpack ("<q" , data [position :position + 8 ])[0 ]
231261 diff = millis % 1000
232262 seconds = (millis - diff ) / 1000
@@ -238,27 +268,30 @@ def _get_date(data, position, as_class, tz_aware, uuid_subtype, compile_re):
238268 return dt .replace (microsecond = diff * 1000 ), position
239269
240270
241- def _get_code (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
242- code , position = _get_string (data , position ,
271+ def _get_code (
272+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
273+ code , position = _get_string (data , position , name ,
243274 as_class , tz_aware , uuid_subtype , compile_re )
244275 return Code (code ), position
245276
246277
247278def _get_code_w_scope (
248- data , position , as_class , tz_aware , uuid_subtype , compile_re ):
249- _ , position = _get_int (data , position )
250- code , position = _get_string (data , position ,
279+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
280+ _ , position = _get_int (data , position , name )
281+ code , position = _get_string (data , position , name ,
251282 as_class , tz_aware , uuid_subtype , compile_re )
252- scope , position = _get_object (data , position ,
283+ scope , position = _get_object (data , position , name ,
253284 as_class , tz_aware , uuid_subtype , compile_re )
254285 return Code (code , scope ), position
255286
256287
257- def _get_null (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
288+ def _get_null (
289+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
258290 return None , position
259291
260292
261- def _get_regex (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
293+ def _get_regex (
294+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
262295 pattern , position = _get_c_string (data , position )
263296 bson_flags , position = _get_c_string (data , position )
264297 bson_re = Regex (pattern , bson_flags )
@@ -268,21 +301,23 @@ def _get_regex(data, position, as_class, tz_aware, uuid_subtype, compile_re):
268301 return bson_re , position
269302
270303
271- def _get_ref (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
272- collection , position = _get_string (data , position , as_class , tz_aware ,
273- uuid_subtype , compile_re )
274- oid , position = _get_oid (data , position )
304+ def _get_ref (
305+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
306+ collection , position = _get_string (
307+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re )
308+ oid , position = _get_oid (data , position , name )
275309 return DBRef (collection , oid ), position
276310
277311
278312def _get_timestamp (
279- data , position , as_class , tz_aware , uuid_subtype , compile_re ):
280- inc , position = _get_int (data , position , unsigned = True )
281- timestamp , position = _get_int (data , position , unsigned = True )
313+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
314+ inc , position = _get_int (data , position , name , unsigned = True )
315+ timestamp , position = _get_int (data , position , name , unsigned = True )
282316 return Timestamp (timestamp , inc ), position
283317
284318
285- def _get_long (data , position , as_class , tz_aware , uuid_subtype , compile_re ):
319+ def _get_long (
320+ data , position , name , as_class , tz_aware , uuid_subtype , compile_re ):
286321 # Have to cast to long; on 32-bit unpack may return an int.
287322 # 2to3 will change long to int. That's fine since long doesn't
288323 # exist in python3.
@@ -310,17 +345,21 @@ def _get_long(data, position, as_class, tz_aware, uuid_subtype, compile_re):
310345 BSONINT : _get_int , # number_int
311346 BSONTIM : _get_timestamp ,
312347 BSONLON : _get_long , # Same as _get_int after 2to3 runs.
313- BSONMIN : lambda u , v , w , x , y , z : (MinKey (), v ),
314- BSONMAX : lambda u , v , w , x , y , z : (MaxKey (), v )}
348+ BSONMIN : lambda t , u , v , w , x , y , z : (MinKey (), u ),
349+ BSONMAX : lambda t , u , v , w , x , y , z : (MaxKey (), u )}
315350
316351
317352def _element_to_dict (
318353 data , position , as_class , tz_aware , uuid_subtype , compile_re ):
319354 element_type = data [position :position + 1 ]
320355 position += 1
321356 element_name , position = _get_c_string (data , position )
322- value , position = _element_getter [element_type ](
323- data , position , as_class , tz_aware , uuid_subtype , compile_re )
357+ try :
358+ func = _element_getter [element_type ]
359+ except KeyError :
360+ _raise_unknown_type (element_type , element_name )
361+ value , position = func (data , position , element_name ,
362+ as_class , tz_aware , uuid_subtype , compile_re )
324363
325364 return element_name , value , position
326365
0 commit comments