Skip to content

Commit ae53601

Browse files
committed
Text, time methods file added.
1 parent b1cacef commit ae53601

File tree

1 file changed

+206
-0
lines changed

1 file changed

+206
-0
lines changed

Pandas_Text_Time_Methods.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
import numpy as np
2+
import pandas as pd
3+
import seaborn as sns
4+
5+
df = pd.read_excel('/Users/oscar/Desktop/My_Python_Libraries/text_exercise.XLSX')
6+
# print(df)
7+
# id staff department job salary age
8+
# 0 M0001 Tom BLUE HR manager "$150,000" 52
9+
# 1 M0002 JOHN BLACK IT manager "$180,000" 48
10+
# 2 E0001 Micheal Brown IT data scientist "$150,000" 35
11+
# 3 E0002 jason walker HR recruiter 130000dolar 38
12+
# 4 E0003 Alex Green IT backend developer "$110,000" -
13+
# 5 E0004 OSCAR SMİTH IT frontend developer "$120,000" 32
14+
# 6 E0005 Adrian STAR IT data scientist "$135,000" 40
15+
# 7 E0006 Albert simon IT data scientist 125000dolar 35
16+
17+
# print(df.info())
18+
# <class 'pandas.core.frame.DataFrame'>
19+
# RangeIndex: 8 entries, 0 to 7
20+
# Data columns (total 6 columns):
21+
# # Column Non-Null Count Dtype
22+
# --- ------ -------------- -----
23+
# 0 id 8 non-null object
24+
# 1 staff 8 non-null object
25+
# 2 department 8 non-null object
26+
# 3 job 8 non-null object
27+
# 4 salary 8 non-null object
28+
# 5 age 8 non-null object
29+
# dtypes: object(6)
30+
# memory usage: 512.0+ bytes
31+
# None
32+
33+
#------------------------------------------------------------------------------------------------
34+
# some explanations :
35+
# lower() => Converts a string into lower case
36+
# upper() => Converts a string into upper case
37+
# capitalize() => Converts the first character to upper case
38+
# title() => Converts the first character of each word to upper case
39+
# swapcase() => Swaps the case lower/upper
40+
41+
# this is python built-in lower:
42+
# print('steve'.lower())
43+
# steve
44+
45+
# this is for series in pandas :
46+
# print(df['staff'].str.lower())
47+
# 0 tom blue
48+
# 1 john black
49+
# 2 micheal brown
50+
# 3 jason walker
51+
# 4 alex green
52+
# 5 oscar smi̇th
53+
# 6 adrian star
54+
# 7 albert simon
55+
# Name: staff, dtype: object
56+
57+
# also you can use it for srt.upper(), str.title(), str.capitalize(), str.swapcase().
58+
59+
#------------------------------------------------------------------------------------------------
60+
# isalpha() => Returns True if all characters in the string are in the alphabet
61+
# isnumeric() => Returns True if all characters in the string are numeric
62+
# isalnum() => Returns True if all characters in the string are alphanumeric
63+
# endswith() => Returns true if the string ends with the specified value
64+
# startswith() => Returns true if the string starts with the specified value
65+
# contains() => Returns a Boolean value True for each element if the substring contains in the element, else False.
66+
67+
# print(df)
68+
# id staff department job salary age
69+
# 0 M0001 Tom BLUE HR manager "$150,000" 52
70+
# 1 M0002 JOHN BLACK IT manager "$180,000" 48
71+
# 2 E0001 Micheal Brown IT data scientist "$150,000" 35
72+
# 3 E0002 jason walker HR recruiter 130000dolar 38
73+
# 4 E0003 Alex Green IT backend developer "$110,000" -
74+
# 5 E0004 OSCAR SMİTH IT frontend developer "$120,000" 32
75+
# 6 E0005 Adrian STAR IT data scientist "$135,000" 40
76+
# 7 E0006 Albert simon IT data scientist 125000dolar 35
77+
78+
# print(df['job'].str.isalpha())
79+
# 0 True
80+
# 1 True
81+
# 2 False
82+
# 3 True
83+
# 4 False
84+
# 5 False
85+
# 6 False
86+
# 7 False
87+
# Name: job, dtype: bool
88+
89+
# print(df['age'].str.isnumeric())
90+
# 0 NaN
91+
# 1 NaN
92+
# 2 NaN
93+
# 3 NaN
94+
# 4 False
95+
# 5 NaN
96+
# 6 NaN
97+
# 7 NaN
98+
# Name: age, dtype: object
99+
#
100+
# it returns them as NaN but we need them as boolean.
101+
102+
# If the types are object we can't check them by using str.isnumeric() attribute :
103+
# lets convert them to str initially .
104+
# astype let us to convert an item to related type, in below example to str .
105+
# print(df['age'].astype(str).str.isnumeric())
106+
# 0 True
107+
# 1 True
108+
# 2 True
109+
# 3 True
110+
# 4 False
111+
# 5 True
112+
# 6 True
113+
# 7 True
114+
# Name: age, dtype: bool
115+
116+
# print(df['job'])
117+
# 0 manager
118+
# 1 manager
119+
# 2 data scientist
120+
# 3 recruiter
121+
# 4 backend developer
122+
# 5 frontend developer
123+
# 6 data scientist
124+
# 7 data scientist
125+
# Name: job, dtype: object
126+
127+
# print(df['job'].str.startswith('data'))
128+
# 0 False
129+
# 1 False
130+
# 2 True
131+
# 3 False
132+
# 4 False
133+
# 5 False
134+
# 6 True
135+
# 7 True
136+
# Name: job, dtype: bool
137+
138+
# Also you can use it with str.endswith("per"), str.contains("data"),
139+
140+
141+
# print(df['salary'])
142+
# 0 "$150,000"
143+
# 1 "$180,000"
144+
# 2 "$150,000"
145+
# 3 130000dolar
146+
# 4 "$110,000"
147+
# 5 "$120,000"
148+
# 6 "$135,000"
149+
# 7 125000dolar
150+
# Name: salary, dtype: object
151+
152+
# print(df['salary'].str.isalnum())
153+
# 0 False
154+
# 1 False
155+
# 2 False
156+
# 3 True
157+
# 4 False
158+
# 5 False
159+
# 6 False
160+
# 7 True
161+
# Name: salary, dtype: bool
162+
163+
# if there is a any punctuation or sign inside the series, result will be deceptive.
164+
# first of all we need to clean them .
165+
166+
# With using regex :
167+
# It shows the results which contains a character a to z .
168+
# print(df['salary'].str.contains(r'[a-z]'))
169+
# 0 False
170+
# 1 False
171+
# 2 False
172+
# 3 True
173+
# 4 False
174+
# 5 False
175+
# 6 False
176+
# 7 True
177+
# Name: salary, dtype: bool
178+
179+
#------------------------------------------------------------------------------------------------
180+
# We can use these string methods which returning boolean expression for creating condition and so selecting relative rows
181+
# it returns the all rows which contains the 'data' inside the job's column :
182+
# print(df[ df['job'].str.contains('data') ])
183+
# id staff department job salary age
184+
# 2 E0001 Micheal Brown IT data scientist "$150,000" 35
185+
# 6 E0005 Adrian STAR IT data scientist "$135,000" 40
186+
# 7 E0006 Albert simon IT data scientist 125000dolar 35
187+
188+
189+
190+
191+
192+
193+
194+
195+
196+
197+
198+
199+
200+
201+
202+
203+
204+
205+
206+

0 commit comments

Comments
 (0)