Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
schema: support encoding=None connections
Several different problems are fixed here, but all have the same root.
When a connection encoding is None (it is default on Python 2 and may be
set explicitly on Python 3), all mp_str values are decoded into bytes,
not Unicode strings (note that bytes is alias for str in Python 2). But
the database schema parsing code have assumptions that _vspace / _vindex
values are Unicode strings.

The resolved problems are the following:

1. Default encoding in bytes#decode() method is 'ascii', however names
   in tarantool can contain symbols beyond ASCII symbol table. Set
   'utf-8' for names decoding.
2. Convert all binary values into Unicode strings before parse or store
   them. This allows further correct accesses to the local schema
   representation.
3. Convert binary parameters like space, index or field name into
   Unicode strings, when a schema is accessed to don't trigger redundant
   schema refetching.

Those problems are briefly mentioned in [1].

Tested manually with Python 2 and Python 3: my testing tarantool
instance has a space with name '©' and after the changes I'm able to
connect to it when the connection encoding is set to None. Also I
verified that schema is not fetched each time when I do
<connection>.select('©') in Python 2 (where such string literal is str /
bytes, not Unicode string).

Relevant test cases are added in next commits.

[1]: #105 (comment)
  • Loading branch information
Totktonada committed Aug 28, 2020
commit 3a915f14204f72f4b9aa4c34d9d93d88a488ebaa
84 changes: 73 additions & 11 deletions tarantool/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,83 @@
integer_types,
)
from tarantool.error import (
Error,
SchemaError,
DatabaseError
)
import tarantool.const as const


class RecursionError(Error):
"""Report the situation when max recursion depth is reached.

This is internal error for <to_unicode_recursive> caller
and it should be re-raised properly be the caller.
"""


def to_unicode(s):
if isinstance(s, bytes):
return s.decode(encoding='utf-8')
return s


def to_unicode_recursive(x, max_depth):
"""Same as to_unicode(), but traverses over dictionaries,
lists and tuples recursivery.

x: value to convert

max_depth: 1 accepts a scalar, 2 accepts a list of scalars,
etc.
"""
if max_depth <= 0:
raise RecursionError('Max recursion depth is reached')

if isinstance(x, dict):
res = dict()
for key, val in x.items():
key = to_unicode_recursive(key, max_depth - 1)
val = to_unicode_recursive(val, max_depth - 1)
res[key] = val
return res

if isinstance(x, list) or isinstance(x, tuple):
res = []
for val in x:
val = to_unicode_recursive(val, max_depth - 1)
res.append(val)
if isinstance(x, tuple):
return tuple(res)
return res

return to_unicode(x)


class SchemaIndex(object):
def __init__(self, index_row, space):
self.iid = index_row[1]
self.name = index_row[2]
if isinstance(self.name, bytes):
self.name = self.name.decode()
self.name = to_unicode(index_row[2])
self.index = index_row[3]
self.unique = index_row[4]
self.parts = []
if isinstance(index_row[5], (list, tuple)):
for val in index_row[5]:
try:
parts_raw = to_unicode_recursive(index_row[5], 3)
except RecursionError as e:
errmsg = 'Unexpected index parts structure: ' + str(e)
raise SchemaError(errmsg)
if isinstance(parts_raw, (list, tuple)):
for val in parts_raw:
if isinstance(val, dict):
self.parts.append((val['field'], val['type']))
else:
self.parts.append((val[0], val[1]))
else:
for i in range(index_row[5]):
for i in range(parts_raw):
self.parts.append((
index_row[5 + 1 + i * 2],
index_row[5 + 2 + i * 2]
to_unicode(index_row[5 + 1 + i * 2]),
to_unicode(index_row[5 + 2 + i * 2])
))
self.space = space
self.space.indexes[self.iid] = self
Expand All @@ -52,16 +103,19 @@ class SchemaSpace(object):
def __init__(self, space_row, schema):
self.sid = space_row[0]
self.arity = space_row[1]
self.name = space_row[2]
if isinstance(self.name, bytes):
self.name = self.name.decode()
self.name = to_unicode(space_row[2])
self.indexes = {}
self.schema = schema
self.schema[self.sid] = self
if self.name:
self.schema[self.name] = self
self.format = dict()
for part_id, part in enumerate(space_row[6]):
try:
format_raw = to_unicode_recursive(space_row[6], 3)
except RecursionError as e:
errmsg = 'Unexpected space format structure: ' + str(e)
raise SchemaError(errmsg)
for part_id, part in enumerate(format_raw):
part['id'] = part_id
self.format[part['name']] = part
self.format[part_id ] = part
Expand All @@ -78,6 +132,8 @@ def __init__(self, con):
self.con = con

def get_space(self, space):
space = to_unicode(space)

try:
return self.schema[space]
except KeyError:
Expand Down Expand Up @@ -135,6 +191,9 @@ def fetch_space_all(self):
SchemaSpace(row, self.schema)

def get_index(self, space, index):
space = to_unicode(space)
index = to_unicode(index)

_space = self.get_space(space)
try:
return _space.indexes[index]
Expand Down Expand Up @@ -203,6 +262,9 @@ def fetch_index_from(self, space, index):
return index_row

def get_field(self, space, field):
space = to_unicode(space)
field = to_unicode(field)

_space = self.get_space(space)
try:
return _space.format[field]
Expand Down