add pandera hypothesis

khuyentran1401 · khuyentran1401 · commit ff10cd1e6400 · 2022-09-16T10:27:11.000-05:00
diff --git a/data_science_tools/pandera_hypothesis/test1.py b/data_science_tools/pandera_hypothesis/test1.py
@@ -0,0 +1,19 @@
+import pandas as pd
+from pandas.testing import assert_frame_equal
+
+
+def processing_fn(df):
+    processed = df.assign(val3=df.val1 / df.val2)
+    return processed
+
+
+def test_processing_fn():
+    # Create test data
+    df = pd.DataFrame({"val1": [1, 1, -1, -2, 2], "val2": [1, 2, -2, -1, 2]})
+    # Get result
+    result = processing_fn(df)
+    # Create expected output
+    expected = df.copy()
+    expected["val3"] = [1, 0.5, 0.5, 2, 1]
+    # Test
+    assert_frame_equal(result, expected, check_dtype=False)
diff --git a/data_science_tools/pandera_hypothesis/test2.py b/data_science_tools/pandera_hypothesis/test2.py
@@ -0,0 +1,29 @@
+import pandas as pd
+import pytest
+from pandas.testing import assert_frame_equal
+
+
+def processing_fn(df):
+    processed = df.assign(val3=df.val1 / df.val2)
+    return processed
+
+
+val1 = [[1, 1, -1, -2, 2], [1, 1, -1, -2, 2]]
+val2 = [[1, 2, -2, -1, 2], [1, 1, 1, 1, 1]]
+val3 = [[1, 0.5, 0.5, 2, 1], [1, 1, -1, -2, 2]]
+
+
+@pytest.mark.parametrize("val1,val2,val3", list(zip(val1, val2, val3)))
+def test_processing_fn(val1, val2, val3):
+    # Create test data
+    df = pd.DataFrame({"val1": val1, "val2": val2})
+
+    # Get result
+    result = processing_fn(df)
+
+    # Create expected output
+    expected = df.copy()
+    expected["val3"] = val3
+
+    # Test
+    assert_frame_equal(result, expected, check_dtype=False)
diff --git a/data_science_tools/pandera_hypothesis/test3.py b/data_science_tools/pandera_hypothesis/test3.py
@@ -0,0 +1,21 @@
+import pandas as pd
+import pandera as pa
+
+out_schema = pa.DataFrameSchema(
+    {
+        "val1": pa.Column(int, pa.Check.in_range(-2, 3)),
+        "val2": pa.Column(int, pa.Check.in_range(-2, 3)),
+        "val3": pa.Column(float, pa.Check.in_range(-2, 3)),
+    }
+)
+
+
+@pa.check_output(out_schema)
+def processing_fn(df):
+    processed = df.assign(val3=df.val1 / df.val2)
+    return processed
+
+
+if __name__ == "__main__":
+    df = pd.DataFrame({"val1": [1, 1, -1, -2, 2], "val2": [1, 1, -1, -2, 2]})
+    processing_fn(df)
diff --git a/data_science_tools/pandera_hypothesis/test4.py b/data_science_tools/pandera_hypothesis/test4.py
@@ -0,0 +1,26 @@
+import hypothesis
+import pandera as pa
+
+schema = pa.DataFrameSchema(
+    {
+        "val1": pa.Column(int, pa.Check.in_range(-2, 3)),
+        "val2": pa.Column(int, pa.Check.in_range(-2, 3)),
+    }
+)
+
+out_schema = schema.add_columns(
+    {
+        "val3": pa.Column(float, pa.Check.in_range(-2, 3)),
+    },
+)
+
+
+@pa.check_output(out_schema)
+def processing_fn(df):
+    processed = df.assign(val3=df.val1 / df.val2)
+    return processed
+
+
+@hypothesis.given(schema.strategy(size=5))
+def test_processing_fn(dataframe):
+    processing_fn(dataframe)