|
| 1 | +import pandas as pd |
| 2 | +import matplotlib.pyplot as plt |
| 3 | + |
| 4 | +drinks = pd.read_csv('http://bit.ly/drinksbycountry') |
| 5 | +print(drinks.head(), "\n"*3) # I used *3 for seperating head of dataset. |
| 6 | + |
| 7 | +# country beer_servings ... total_litres_of_pure_alcohol continent |
| 8 | +# 0 Afghanistan 0 ... 0.0 Asia |
| 9 | +# 1 Albania 89 ... 4.9 Europe |
| 10 | +# 2 Algeria 25 ... 0.7 Africa |
| 11 | +# 3 Andorra 245 ... 12.4 Europe |
| 12 | +# 4 Angola 217 ... 5.9 Africa |
| 13 | +# |
| 14 | +# [5 rows x 6 columns] |
| 15 | + |
| 16 | +########################################################### |
| 17 | +# lets calculate the mean beer service of entire dataset |
| 18 | + |
| 19 | +# print(drinks['beer_servings'].mean()) |
| 20 | +# or you can do that : |
| 21 | +# print(drinks.beer_servings.mean()) but do not use it. because: |
| 22 | +# 1 ) With brackets you can select multiple columns (e.g., df[['col1', 'col2']]) or add |
| 23 | +# a new column (df['newcol'] = ...), which can't be done with dot access. |
| 24 | +# 2 ) accessing a single column with a simple name, but you can do more with the bracket notation. |
| 25 | +# 3 ) You can only use df.col if the column name is a valid Python identifier, be careful. |
| 26 | + |
| 27 | +# 106.16062176165804 |
| 28 | + |
| 29 | +########################################################### |
| 30 | +# lets calculate the mean beer servings only for African countries |
| 31 | +# print(drinks[drinks["continent"] == "Africa"]["beer_servings"].mean()) |
| 32 | +# 61.471698113207545 |
| 33 | + |
| 34 | +########################################################### |
| 35 | +# lets calculate the mean of beer servings of each continent |
| 36 | +# print(drinks.groupby("continent")["beer_servings"].mean()) |
| 37 | +# continent |
| 38 | +# Africa 61.471698 |
| 39 | +# Asia 37.045455 |
| 40 | +# Europe 193.777778 |
| 41 | +# North America 145.434783 |
| 42 | +# Oceania 89.687500 |
| 43 | +# South America 175.083333 |
| 44 | +# Name: beer_servings, dtype: float64 |
| 45 | + |
| 46 | +# print(drinks.groupby("continent")[["beer_servings"]].mean()) #or making it dataframe |
| 47 | +# continent |
| 48 | +# Africa 61.471698 |
| 49 | +# Asia 37.045455 |
| 50 | +# Europe 193.777778 |
| 51 | +# North America 145.434783 |
| 52 | +# Oceania 89.687500 |
| 53 | +# South America 175.083333 |
| 54 | +########################################################### |
| 55 | +# using aggregation function with group by |
| 56 | +# print(drinks.groupby('continent')['beer_servings'].min()) |
| 57 | +# continent |
| 58 | +# Africa 0 |
| 59 | +# Asia 0 |
| 60 | +# Europe 0 |
| 61 | +# North America 1 |
| 62 | +# Oceania 0 |
| 63 | +# South America 93 |
| 64 | +# Name: beer_servings, dtype: int64 |
| 65 | + |
| 66 | +# we can use multiple agg functions |
| 67 | +# print(drinks.groupby('continent')['beer_servings'].agg(['count','sum','max', 'min'])) |
| 68 | +# count sum max min |
| 69 | +# continent |
| 70 | +# Africa 53 3258 376 0 |
| 71 | +# Asia 44 1630 247 0 |
| 72 | +# Europe 45 8720 361 0 |
| 73 | +# North America 23 3345 285 1 |
| 74 | +# Oceania 16 1435 306 0 |
| 75 | +# South America 12 2101 333 93 |
| 76 | + |
| 77 | +########################################################### |
| 78 | +print(drinks.groupby('continent').mean()) |
| 79 | +# beer_servings ... total_litres_of_pure_alcohol |
| 80 | +# continent ... |
| 81 | +# Africa 61.471698 ... 3.007547 |
| 82 | +# Asia 37.045455 ... 2.170455 |
| 83 | +# Europe 193.777778 ... 8.617778 |
| 84 | +# North America 145.434783 ... 5.995652 |
| 85 | +# Oceania 89.687500 ... 3.381250 |
| 86 | +# South America 175.083333 ... 6.308333 |
| 87 | +# |
| 88 | +# [6 rows x 4 columns] |
| 89 | + |
| 90 | +########################################################### |
| 91 | +# lets use plots |
| 92 | +drinks.groupby('continent').mean().plot(kind= 'bar') |
| 93 | +plt.show() |
| 94 | +########################################################### |
| 95 | + |
| 96 | + |
| 97 | + |
| 98 | + |
| 99 | + |
| 100 | + |
| 101 | + |
| 102 | + |
| 103 | + |
0 commit comments