Pandas Exploration#

[1]:

import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

[2]:

iris = load_iris()

[3]:

featureColumns = [i.replace(" ","").replace("(cm)","") for i in iris.feature_names]
df = pd.DataFrame(iris.data,columns=featureColumns)
df['target'] = iris.target

df.head()

[3]:

	sepallength	sepalwidth	petallength	petalwidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

[4]:

## Single filter
df[df['sepallength'] < 5].head()

[4]:

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2

[5]:

## applying 2 filters

df[(df['sepallength'] < 5) & (df['target'].isin([0,1]))].head()

[5]:

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2

Transforming Data#

[6]:

df = pd.DataFrame({
    'temperature' : pd.Series(23 + 10*np.random.randn(11)),
    'thunderstorm' : pd.Series(150 + 10*np.random.randn(11)),
    'location' : list('XXYYXXYYXXY')

})

df.head()

[6]:

	temperature	thunderstorm	location
0	37.178703	151.250130	X
1	16.338412	148.930679	X
2	16.614139	151.664537	Y
3	35.818557	154.044738	Y
4	21.598561	143.369174	X

[7]:

replaceValues = {
    'location' : {
        "X" : "MISSISSIPPI",
        "Y" : "MANALI"
    }
}

df = df.replace(replaceValues,regex=True)
df.head()

[7]:

	temperature	thunderstorm	location
0	37.178703	151.250130	MISSISSIPPI
1	16.338412	148.930679	MISSISSIPPI
2	16.614139	151.664537	MANALI
3	35.818557	154.044738	MANALI
4	21.598561	143.369174	MISSISSIPPI

[8]:

# df.location.str.contains("ISSI")
df.loc[df.location.str.contains("ISSI")]

[8]:

	temperature	thunderstorm	location
0	37.178703	151.250130	MISSISSIPPI
1	16.338412	148.930679	MISSISSIPPI
4	21.598561	143.369174	MISSISSIPPI
5	29.470110	141.694351	MISSISSIPPI
8	35.765885	144.513669	MISSISSIPPI
9	27.894740	156.470016	MISSISSIPPI

[9]:

df.groupby('location').mean()

[9]:

	temperature	thunderstorm
location
MANALI	29.238917	155.537351
MISSISSIPPI	28.041068	147.704670

[10]:

import pandas as pd
import numpy as np

serIndex = ['s1','s2','s3','s4','s5']

heights_A = pd.Series(np.array([176.2, 158.4, 167.6, 156.2,161.4]),index=serIndex)
weights_A = pd.Series(np.array([85.1, 90.2, 76.8, 80.4,78.9]),index=serIndex)


df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A

df_A['Gender'] = ['M','F','M','M','F']

s = pd.Series(np.array([165.4, 82.7, 'F']),index=['Student_height', 'Student_weight', 'Gender'])
s.name = 's6'
df_AA = df_A.append(s)
# print(df_AA)

np.random.seed(100)


heights_B = pd.Series(np.random.normal(loc=170.0,scale=25,size=5))

np.random.seed(100)

weights_B = pd.Series(np.random.normal(loc=75.0,scale=12.0,size=5))

df_B = pd.DataFrame()
df_B['Student_height'] = heights_B
df_B['Student_weight'] = weights_B
df_B.index = ['s7','s8','s9','s10','s11']

df_B['Gender'] = ['F','M','F','F','M']

pd.concat([df_AA,df_B])

[10]:

	Student_height	Student_weight	Gender
s1	176.2	85.1	M
s2	158.4	90.2	F
s3	167.6	76.8	M
s4	156.2	80.4	M
s5	161.4	78.9	F
s6	165.4	82.7	F
s7	126.256	54.0028	F
s8	178.567	79.1122	M
s9	198.826	88.8364	F
s10	163.689	71.9708	F
s11	194.533	86.7758	M

[13]:

s = pd.Series([89.2, 76.4, 98.2, 75.9], index=list('abcd'))

'b' in s

[13]:

True

[ ]:

	sepallength	sepalwidth	petallength	petalwidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2

	sepallength	sepalwidth	petallength	petalwidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2

Pandas Exploration

Contents

Pandas Exploration#

Transforming Data#

	sepallength	sepalwidth	petallength	petalwidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2

	sepallength	sepalwidth	petallength	petalwidth
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
6	4.6	3.4	1.4	0.3
8	4.4	2.9	1.4	0.2