In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(color_codes =True)
%matplotlib inline
import statsmodels.api as sm
from scipy import stats
from scipy.stats import ttest_1samp, ttest_ind,mannwhitneyu,levene,shapiro,wilcoxon
In [2]:
SLADT=pd.read_excel("F:/2019 GB Python/SLA1t.xlsx")
In [3]:
SLADT
Out[3]:
SLA
0 95.004430
1 93.565637
2 94.737492
3 95.789231
4 95.530447
5 93.233199
6 94.903952
7 94.202423
8 95.675634
9 92.749504
10 93.692614
11 96.329568
12 95.112963
13 94.333942
In [4]:
# 95% - Standard SLA
In [5]:
stats.ttest_1samp(SLADT.SLA,95)
Out[5]:
Ttest_1sampResult(statistic=-1.3065269974711893, pvalue=0.21401598351235218)
In [7]:
# P value > 0.05 ---> Accept the Null Hypo
In [8]:
stats.normaltest(SLADT.SLA).pvalue
C:\Users\Neil\Anaconda3\lib\site-packages\scipy\stats\stats.py:1394: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=14
  "anyway, n=%i" % int(n))
Out[8]:
0.7384860279627459

2 T Test

In [9]:
TDT=pd.read_excel("F:/2019 GB Python/2tandpairedt.xlsx")
In [10]:
TDT
Out[10]:
Pre Post Associate Beforecoaching Aftercoaching
0 30.105925 30.007614 A 3.999752 3.908891
1 30.107895 29.991830 B 4.006046 3.894249
2 30.119569 29.994598 C 3.999894 3.892256
3 30.096333 30.023697 D 3.974033 3.893144
4 30.092991 30.003774 E 4.005389 3.904257
5 30.092980 30.001910 F 4.008832 3.898088
6 30.107753 30.001502 G 4.013410 3.881486
7 30.103797 30.006164 H 3.994697 3.879631
8 30.101589 30.007950 I 4.013145 3.906453
9 30.084902 30.019612 J 4.011839 3.905207
10 30.077321 30.006371 K 3.987651 3.878025
11 30.103668 30.011245 L 4.005439 3.885159
12 30.077804 29.982616 M 4.000336 3.905813
13 30.080428 30.005372 N 3.993623 3.905487
14 30.109188 30.004687 O 3.987985 3.889207
In [11]:
TWOTDT=TDT[['Pre', 'Post']]
In [12]:
TWOTDT
Out[12]:
Pre Post
0 30.105925 30.007614
1 30.107895 29.991830
2 30.119569 29.994598
3 30.096333 30.023697
4 30.092991 30.003774
5 30.092980 30.001910
6 30.107753 30.001502
7 30.103797 30.006164
8 30.101589 30.007950
9 30.084902 30.019612
10 30.077321 30.006371
11 30.103668 30.011245
12 30.077804 29.982616
13 30.080428 30.005372
14 30.109188 30.004687
In [13]:
sns.boxplot(x='variable', y='value', data = pd.melt(TWOTDT), width =0.3)
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0xa0b3ed0>
In [14]:
t_statistic, p_value = ttest_ind(TWOTDT.Pre, TWOTDT.Post)
print(t_statistic, p_value )
22.085107571721483 2.963586474101535e-19
In [15]:
stats.normaltest(TWOTDT.Pre).pvalue
C:\Users\Neil\Anaconda3\lib\site-packages\scipy\stats\stats.py:1394: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=15
  "anyway, n=%i" % int(n))
Out[15]:
0.6531705824168154
In [16]:
stats.normaltest(TWOTDT.Post).pvalue
C:\Users\Neil\Anaconda3\lib\site-packages\scipy\stats\stats.py:1394: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=15
  "anyway, n=%i" % int(n))
Out[16]:
0.5100222307903698
In [17]:
levene(TWOTDT.Pre,TWOTDT.Post)
Out[17]:
LeveneResult(statistic=1.5738976177624129, pvalue=0.22001358733907092)

Paired t Test

In [18]:
PAIRTDT= TDT[['Beforecoaching', 'Aftercoaching']]
PAIRTDT
Out[18]:
Beforecoaching Aftercoaching
0 3.999752 3.908891
1 4.006046 3.894249
2 3.999894 3.892256
3 3.974033 3.893144
4 4.005389 3.904257
5 4.008832 3.898088
6 4.013410 3.881486
7 3.994697 3.879631
8 4.013145 3.906453
9 4.011839 3.905207
10 3.987651 3.878025
11 4.005439 3.885159
12 4.000336 3.905813
13 3.993623 3.905487
14 3.987985 3.889207
In [19]:
sns.boxplot(x="variable", y="value", data=pd.melt(PAIRTDT),width=0.3)
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x14e7470>
In [20]:
stats.normaltest(TDT.Beforecoaching).pvalue
C:\Users\Neil\Anaconda3\lib\site-packages\scipy\stats\stats.py:1394: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=15
  "anyway, n=%i" % int(n))
Out[20]:
0.21332517297557374
In [21]:
stats.normaltest(TDT.Aftercoaching).pvalue
C:\Users\Neil\Anaconda3\lib\site-packages\scipy\stats\stats.py:1394: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=15
  "anyway, n=%i" % int(n))
Out[21]:
0.19734137796686999
In [22]:
stats.ttest_rel(TDT.Beforecoaching, TDT.Aftercoaching)
Out[22]:
Ttest_relResult(statistic=31.04193826919751, pvalue=2.6039212090443113e-14)

ANOVA

In [24]:
AN=pd.read_excel("F:/2019 GB Python/ANOVA.xlsx")
In [25]:
AN
Out[25]:
Day Noon Night
0 9.236348 9.090199 8.901892
1 9.252986 8.950993 9.100092
2 9.248021 8.895417 8.841298
3 9.406213 8.844150 8.991732
4 9.252373 9.026661 8.802963
5 9.475237 8.944478 8.917330
6 9.309529 9.104782 8.828534
7 9.338340 8.947740 8.811452
8 9.364601 8.934479 8.987952
9 9.304998 8.992930 8.961595
10 9.236231 9.106185 8.775197
11 9.140339 8.907438 8.798524
In [30]:
ANS=AN.stack().rename_axis(('Series', 'Timing')).reset_index(name='Val')
ANS
Out[30]:
Series Timing Val
0 0 Day 9.236348
1 0 Noon 9.090199
2 0 Night 8.901892
3 1 Day 9.252986
4 1 Noon 8.950993
5 1 Night 9.100092
6 2 Day 9.248021
7 2 Noon 8.895417
8 2 Night 8.841298
9 3 Day 9.406213
10 3 Noon 8.844150
11 3 Night 8.991732
12 4 Day 9.252373
13 4 Noon 9.026661
14 4 Night 8.802963
15 5 Day 9.475237
16 5 Noon 8.944478
17 5 Night 8.917330
18 6 Day 9.309529
19 6 Noon 9.104782
20 6 Night 8.828534
21 7 Day 9.338340
22 7 Noon 8.947740
23 7 Night 8.811452
24 8 Day 9.364601
25 8 Noon 8.934479
26 8 Night 8.987952
27 9 Day 9.304998
28 9 Noon 8.992930
29 9 Night 8.961595
30 10 Day 9.236231
31 10 Noon 9.106185
32 10 Night 8.775197
33 11 Day 9.140339
34 11 Noon 8.907438
35 11 Night 8.798524
In [31]:
sns.boxplot(x=ANS.Timing, y=ANS.Val, width=0.3)
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0xa1d9ff0>
In [32]:
stats.normaltest(ANS.Val).pvalue
Out[32]:
0.09589191834455094
In [34]:
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
In [35]:
y=ANS.Val
X=ANS.Timing
formula='y ~ X'
model = ols(formula, ANS).fit()
aov_table = anova_lm(model)
print(aov_table)
            df    sum_sq   mean_sq          F        PR(>F)
X          2.0  1.087088  0.543544  63.627012  4.743867e-12
Residual  33.0  0.281908  0.008543        NaN           NaN
In [ ]: