Skip to content
Prev Previous commit
making some additional small detail changes
  • Loading branch information
Transurgeon committed Aug 26, 2022
commit aadf3d0ae291e3dbf636e7e01b322def3198bdea
70 changes: 40 additions & 30 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
Examples
--------
>>> df = spark.createDataFrame([(14, "Tom"), (23, "Alice"),
... (16, "Bob")], ["age", "name"])
... (16, "Bob")], ["age", "name"])
>>> df.show()
+---+-----+
|age| name|
Expand All @@ -766,7 +766,7 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
+---+-----+
only showing top 2 rows

Show DataFrame where the maximum number of characters is 3.
Show :class:`DataFrame` where the maximum number of characters is 3.

>>> df.show(truncate=3)
+---+----+
Expand All @@ -777,7 +777,7 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
| 16| Bob|
+---+----+

Show DataFrame vertically.
Show :class:`DataFrame` vertically.

>>> df.show(vertical=True)
-RECORD 0-----
Expand Down Expand Up @@ -1028,7 +1028,7 @@ def count(self) -> int:
Examples
--------
>>> df = spark.createDataFrame([(14, "Tom"), (23, "Alice"),
... (16, "Bob")], ["age", "name"])
... (16, "Bob")], ["age", "name"])
>>> df.show()
+---+-----+
|age| name|
Expand Down Expand Up @@ -1806,15 +1806,18 @@ def dtypes(self) -> List[Tuple[str, str]]:

Examples
--------
>>> df = spark.createDataFrame([(14, "Tom"),
... (23, "Alice")], ["age", "name"])
>>> df = spark.createDataFrame(
... [(14, "Tom"), (23, "Alice"),(16, "Bob")], ["age", "name"])
>>> df.show()
+---+-----+
|age| name|
+---+-----+
| 14| Tom|
| 23|Alice|
+---+-----+

Return the name of each column along with their respective data types

>>> df.dtypes
[('age', 'bigint'), ('name', 'string')]
"""
Expand Down Expand Up @@ -3451,8 +3454,6 @@ def fillna(

Examples
--------
Fill all null values with 50 when the data type of the column is an integer

>>> df = spark.createDataFrame([(10, 80, "Alice"), (5, None, "Bob"),
... (None, None, "Tom"), (None, None, None)], ["age", "height", "name"])
>>> df.show()
Expand All @@ -3464,6 +3465,9 @@ def fillna(
|null| null| Tom|
|null| null| null|
+----+------+-----+

Fill all null values with 50 when the data type of the column is an Integer

>>> df.na.fill(50).show()
+---+------+-----+
|age|height| name|
Expand All @@ -3474,8 +3478,6 @@ def fillna(
| 50| 50| null|
+---+------+-----+

Fill all null values with ``False`` when the data type of the column is a boolean

>>> df = spark.createDataFrame([(10, "Alice", None), (5, "Bob", None),
... (None, "Mallory", True)], ["age", "name", "spy"])
>>> df.show()
Expand All @@ -3486,6 +3488,9 @@ def fillna(
| 5| Bob|null|
|null|Mallory|true|
+----+-------+----+

Fill all null values with ``False`` when the data type of the column is a boolean

>>> df.na.fill(False).show()
+----+-------+-----+
| age| name| spy|
Expand All @@ -3495,8 +3500,6 @@ def fillna(
|null|Mallory| true|
+----+-------+-----+

Fill all null values in the 'age' column to 50 and "unknown" in the 'name' column

>>> df = spark.createDataFrame([(10, 80, "Alice"), (5, None, "Bob"),
... (None, None, "Tom"), (None, None, None)], ["age", "height", "name"])
>>> df.show()
Expand All @@ -3508,6 +3511,9 @@ def fillna(
|null| null| Tom|
|null| null| null|
+----+------+-----+

Fill all null values in the 'age' column to 50 and "unknown" in the 'name' column

>>> df.na.fill({'age': 50, 'name': 'unknown'}).show()
+---+------+-------+
|age|height| name|
Expand Down Expand Up @@ -3631,6 +3637,9 @@ def replace( # type: ignore[misc]
|null| null| Tom|
|null| null| null|
+----+------+-----+

Replace all instances of the value 10 to the value 20

>>> df.na.replace(10, 20).show()
+----+------+-----+
| age|height| name|
Expand Down Expand Up @@ -4210,22 +4219,22 @@ def drop(self, *cols: "ColumnOrName") -> "DataFrame": # type: ignore[misc]
| 23|Alice|
| 16| Bob|
+---+-----+
>>> df.drop('age').collect()
[Row(name='Tom'), Row(name='Alice'), Row(name='Bob')]

>>> df = spark.createDataFrame([(14, "Tom"), (23, "Alice"),
... (16, "Bob")], ["age", "name"])
>>> df.show()
+---+-----+
|age| name|
+---+-----+
| 14| Tom|
| 23|Alice|
| 16| Bob|
+---+-----+
>>> df.drop(df.age).collect()
[Row(name='Tom'), Row(name='Alice'), Row(name='Bob')]

>>> df.drop('age').show()
+-----+
| name|
+-----+
| Tom|
|Alice|
| Bob|
+-----+
>>> df.drop(df.age).show()
+-----+
| name|
+-----+
| Tom|
|Alice|
| Bob|
+-----+
>>> df.join(df2, df.name == df2.name, 'inner').drop(df.name).collect()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what these 3 inner joins do exactly. I dont see anywhere an instantiation of df2..

What should I do with these 3 examples?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's showing a common example that join and drop the join key.

[Row(age=5, height=85, name='Bob')]

Expand Down Expand Up @@ -4256,7 +4265,8 @@ def toDF(self, *cols: "ColumnOrName") -> "DataFrame":
Parameters
----------
cols : str
new column names. The length of the list needs to be the same as the number of columns in the initial :class:`DataFrame`
new column names. The length of the list needs to be the same as the number
of columns in the initial :class:`DataFrame`

Returns
-------
Expand All @@ -4266,7 +4276,7 @@ def toDF(self, *cols: "ColumnOrName") -> "DataFrame":
Examples
--------
>>> df = spark.createDataFrame([(14, "Tom"), (23, "Alice"),
... (16, "Bob")], ["age", "name"])
... (16, "Bob")], ["age", "name"])
>>> df.show()
+---+-----+
|age| name|
Expand Down