apache · viirya · Oct 31, 2017 · Oct 31, 2017 · HyukjinKwon · Oct 31, 2017
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -2185,6 +2185,13 @@ def udf(f=None, returnType=StringType()):
         duplicate invocations may be eliminated or the function may even be invoked more times than
         it is present in the query.
 
+    .. note:: The user-defined functions do not support conditional execution by using them with
+        SQL conditional expressions such as `when` or `if`. The functions still apply on all rows no
+        matter the conditions are met or not. So the output is correct if the functions can be
+        correctly run on all rows without failure. If the functions can cause runtime failure on the
+        rows that do not satisfy the conditions, the suggested workaround is to incorporate the
+        condition logic into the functions.
+
     :param f: python function if used as a standalone function
     :param returnType: a :class:`pyspark.sql.types.DataType` object
 
@@ -2278,6 +2285,13 @@ def pandas_udf(f=None, returnType=StringType()):
        .. seealso:: :meth:`pyspark.sql.GroupedData.apply`
 
     .. note:: The user-defined function must be deterministic.
+
+    .. note:: The user-defined functions do not support conditional execution by using them with
+        SQL conditional expressions such as `when` or `if`. The functions still apply on all rows no
+        matter the conditions are met or not. So the output is correct if the functions can be
+        correctly run on all rows without failure. If the functions can cause runtime failure on the
+        rows that do not satisfy the conditions, the suggested workaround is to incorporate the
+        condition logic into the functions.
     """
     return _create_udf(f, returnType=returnType, pythonUdfType=PythonUdfType.PANDAS_UDF)