@@ -5501,7 +5501,22 @@ def corr(self, method='pearson', min_periods=1):
55015501
55025502 def cov (self , min_periods = None ):
55035503 """
5504- Compute pairwise covariance of columns, excluding NA/null values
5504+ Compute pairwise covariance of columns, excluding NA/null values.
5505+
5506+ Compute the pairwise covariance among the series of a DataFrame.
5507+ The returned data frame is the `covariance matrix
5508+ <https://en.wikipedia.org/wiki/Covariance_matrix>`__ of the columns
5509+ of the DataFrame.
5510+
5511+ Both NA and null values are automatically excluded from the
5512+ calculation. (See the note below about bias from missing values.)
5513+ A threshold can be set for the minimum number of
5514+ observations for each value created. Comparisons with observations
5515+ below this threshold will be returned as ``NaN``.
5516+
5517+ This method is generally used for the analysis of time series data to
5518+ understand the relationship between different measures
5519+ across time.
55055520
55065521 Parameters
55075522 ----------
@@ -5511,12 +5526,71 @@ def cov(self, min_periods=None):
55115526
55125527 Returns
55135528 -------
5514- y : DataFrame
5529+ DataFrame
5530+ The covariance matrix of the series of the DataFrame.
5531+
5532+ See Also
5533+ --------
5534+ pandas.Series.cov : compute covariance with another Series
5535+ pandas.core.window.EWM.cov: expoential weighted sample covariance
5536+ pandas.core.window.Expanding.cov : expanding sample covariance
5537+ pandas.core.window.Rolling.cov : rolling sample covariance
55155538
55165539 Notes
55175540 -----
5518- `y` contains the covariance matrix of the DataFrame's time series.
5519- The covariance is normalized by N-1 (unbiased estimator).
5541+ Returns the covariance matrix of the DataFrame's time series.
5542+ The covariance is normalized by N-1.
5543+
5544+ For DataFrames that have Series that are missing data (assuming that
5545+ data is `missing at random
5546+ <https://en.wikipedia.org/wiki/Missing_data#Missing_at_random>`__)
5547+ the returned covariance matrix will be an unbiased estimate
5548+ of the variance and covariance between the member Series.
5549+
5550+ However, for many applications this estimate may not be acceptable
5551+ because the estimate covariance matrix is not guaranteed to be positive
5552+ semi-definite. This could lead to estimate correlations having
5553+ absolute values which are greater than one, and/or a non-invertible
5554+ covariance matrix. See `Estimation of covariance matrices
5555+ <http://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_
5556+ matrices>`__ for more details.
5557+
5558+ Examples
5559+ --------
5560+ >>> df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)],
5561+ ... columns=['dogs', 'cats'])
5562+ >>> df.cov()
5563+ dogs cats
5564+ dogs 0.666667 -1.000000
5565+ cats -1.000000 1.666667
5566+
5567+ >>> np.random.seed(42)
5568+ >>> df = pd.DataFrame(np.random.randn(1000, 5),
5569+ ... columns=['a', 'b', 'c', 'd', 'e'])
5570+ >>> df.cov()
5571+ a b c d e
5572+ a 0.998438 -0.020161 0.059277 -0.008943 0.014144
5573+ b -0.020161 1.059352 -0.008543 -0.024738 0.009826
5574+ c 0.059277 -0.008543 1.010670 -0.001486 -0.000271
5575+ d -0.008943 -0.024738 -0.001486 0.921297 -0.013692
5576+ e 0.014144 0.009826 -0.000271 -0.013692 0.977795
5577+
5578+ **Minimum number of periods**
5579+
5580+ This method also supports an optional ``min_periods`` keyword
5581+ that specifies the required minimum number of non-NA observations for
5582+ each column pair in order to have a valid result:
5583+
5584+ >>> np.random.seed(42)
5585+ >>> df = pd.DataFrame(np.random.randn(20, 3),
5586+ ... columns=['a', 'b', 'c'])
5587+ >>> df.loc[df.index[:5], 'a'] = np.nan
5588+ >>> df.loc[df.index[5:10], 'b'] = np.nan
5589+ >>> df.cov(min_periods=12)
5590+ a b c
5591+ a 0.316741 NaN -0.150812
5592+ b NaN 1.248003 0.191417
5593+ c -0.150812 0.191417 0.895202
55205594 """
55215595 numeric_df = self ._get_numeric_data ()
55225596 cols = numeric_df .columns
0 commit comments