Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
gh-94808: Cover PyUnicode_Count in CAPI
  • Loading branch information
sobolevn committed Sep 19, 2022
commit 22aee9a9bd99ac609fcbd038a06d3d48196a9a5d
38 changes: 38 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2945,6 +2945,44 @@ def test_asutf8andsize(self):
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')

# Test PyUnicode_Count()
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
def test_count(self):
from _testcapi import unicode_count

st = 'abcabd'
self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2)
self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2)
self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1)
self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a"
# start < end
self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1)
self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0)
self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2)
# start >= end
self.assertEqual(unicode_count(st, 'abc', 0, 0), 0)
self.assertEqual(unicode_count(st, 'a', 3, 2), 0)
self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0)
# negative
self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2)
self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1)
# wrong args
self.assertRaises(TypeError, unicode_count, 'a', 'a')
self.assertRaises(TypeError, unicode_count, 'a', 'a', 1)
self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1)
self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1)
# empty string
self.assertEqual(unicode_count('abc', '', 0, 3), 4)
self.assertEqual(unicode_count('abc', '', 1, 3), 3)
self.assertEqual(unicode_count('', '', 0, 1), 1)
self.assertEqual(unicode_count('', 'a', 0, 1), 0)
# different unicode kinds
for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
for ch in uni:
self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1)
self.assertEqual(unicode_count(st, ch, 0, len(st)), 0)

# Test PyUnicode_FindChar()
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
Expand Down
21 changes: 21 additions & 0 deletions Modules/_testcapi/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,26 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", result, utf8_len);
}

static PyObject *
unicode_count(PyObject *self, PyObject *args)
{
PyObject *str;
PyObject *substr;
Py_ssize_t result;
Py_ssize_t start, end;

if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr,
&start, &end)) {
return NULL;
}

result = PyUnicode_Count(str, substr, start, end);
if (result == -1)
return NULL;
else
return PyLong_FromSsize_t(result);
}

static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
Expand Down Expand Up @@ -696,6 +716,7 @@ static PyMethodDef TestMethods[] = {
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_count", unicode_count, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{NULL},
Expand Down