Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
deleted old implementation of Rand
  • Loading branch information
brkyvz committed Apr 30, 2015
commit c5909eb779a29e533fe28c668990c6e880e1da2a
46 changes: 24 additions & 22 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,6 @@ def _(col):
return _


def _create_zero_arg_function(name, doc=""):
""" Create a zero arg function by name"""
def _(val=None):
sc = SparkContext._active_spark_context
if val:
jc = getattr(sc._jvm.functions, name)(val)
else:
jc = getattr(sc._jvm.functions, name)()
return Column(jc)
_.__name__ = name
_.__doc__ = doc
return _


_functions = {
'lit': 'Creates a :class:`Column` of literal value.',
'col': 'Returns a :class:`Column` based on the given column name.',
Expand All @@ -81,21 +67,37 @@ def _(val=None):
'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
}

_randfunctions = {
'rand': 'Generate a random column with i.i.d. samples from U[0.0, 1.0].',
'randn': 'Generate a column with i.i.d. samples from the standard normal distribution.'
}

for _name, _doc in _functions.items():
globals()[_name] = _create_function(_name, _doc)
for _name, _doc in _randfunctions.items():
globals()[_name] = _create_zero_arg_function(_name, _doc)
del _name, _doc
__all__ += _functions.keys()
__all__ += _randfunctions.keys()
__all__.sort()


def rand(seed=None):
"""
Generate a random column with i.i.d. samples from U[0.0, 1.0].
"""
sc = SparkContext._active_spark_context
if seed:
jc = sc._jvm.functions.rand(seed)
else:
jc = sc._jvm.functions.rand()
return Column(jc)


def randn(seed=None):
"""
Generate a column with i.i.d. samples from the standard normal distribution.
"""
sc = SparkContext._active_spark_context
if seed:
jc = sc._jvm.functions.randn(seed)
else:
jc = sc._jvm.functions.randn()
return Column(jc)


def approxCountDistinct(col, rsd=None):
"""Returns a new :class:`Column` for approximate distinct count of ``col``.

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.expressions.randfuncs
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.TaskContext
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.{DoubleType, DataType}
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.util.random.XORShiftRandom

/**
Expand All @@ -29,7 +28,7 @@ import org.apache.spark.util.random.XORShiftRandom
*
* Since this expression is stateful, it cannot be a case object.
*/
private[sql] abstract class RDG(seed: Long) extends LeafExpression with Serializable {
abstract class RDG(seed: Long) extends LeafExpression with Serializable {
self: Product =>

/**
Expand All @@ -52,11 +51,11 @@ private[sql] abstract class RDG(seed: Long) extends LeafExpression with Serializ
}

/** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */
private[sql] case class Rand(seed: Long) extends RDG(seed) {
case class Rand(seed: Long) extends RDG(seed) {
override def generateNumber(random: XORShiftRandom): Double = random.nextDouble()
}

/** Generate a random column with i.i.d. gaussian random distribution. */
private[sql] case class Randn(seed: Long) extends RDG(seed) {
case class Randn(seed: Long) extends RDG(seed) {
override def generateNumber(random: XORShiftRandom): Double = random.nextGaussian()
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, Star}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.randfuncs.{Randn, Rand}
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils

Expand Down