#from datascience import *
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')


scores = pd.read_csv('../data/scores_by_section.csv')
scores


scores1 = scores.groupby(by=['Section']).count()
scores1


scores.groupby(by=['Section']).mean()


observed_average = 13.6667


random_sample = scores.sample(27, replace=False)
random_sample['Midterm'].mean()

16.62962962962963


np.average(random_sample['Midterm'])

16.62962962962963


# Simulate one value of the test statistic 
# under the hypothesis that the section is like a random sample from the class
# this function is 'hard wired' to use the 'scores' data set

def random_sample_midterm_avg():
    random_sample = scores.sample(27, replace = False)
    return np.average(random_sample['Midterm'])


# Simulate 50,000 copies of the test statistic - may take a moment or two
    
sample_averages = np.array([])

for i in np.arange(50000):
    sample_averages = np.append(sample_averages, random_sample_midterm_avg())
    
sample_averages[:10]

array([15.55555556, 15.44444444, 15.18518519, 15.18518519, 15.33333333,
       13.14814815, 15.59259259, 16.77777778, 14.51851852, 14.96296296])


# Compare the simulated distribution of the statistic
# and the actual observed statistic

averages_tbl = pd.DataFrame({'Random Sample Average':sample_averages})
averages_tbl.hist(bins = 20, alpha=0.5, ec='white', zorder=5)
plt.scatter(observed_average, 0, color = 'red', s=40, zorder=10).set_clip_on(False)
plt.show()


averages_tbl


# what proportion of the results were less than or equal to the observed average?

sum(sample_averages <= observed_average) / 50000

0.05676


averages_tbl = averages_tbl.sort_values(by=['Random Sample Average'])
averages_tbl


five_percent_point = averages_tbl['Random Sample Average'].loc[2500]
five_percent_point

17.22222222222222


five_percent_point = averages_tbl.sort_values(['Random Sample Average']).loc[2500]
five_percent_point

Random Sample Average    17.222222
Name: 2500, dtype: float64


averages_tbl.hist(bins = 20, alpha=0.5)

plt.vlines(five_percent_point, 0, 8000, colors='gold', linestyles='solid', lw=2)

plt.title('Area to the left of the gold line: contains the 5% cut-off');


averages_tbl1 = averages_tbl.sort_values(by=['Random Sample Average']).reset_index()
averages_tbl1 = averages_tbl1.drop(columns=['index'])
averages_tbl1


five_percent_point1 = averages_tbl1['Random Sample Average'].loc[2500]
five_percent_point1

13.592592592592593


averages_tbl.hist(bins = 20, alpha=0.5)

plt.vlines(five_percent_point1, 0, 8000, colors='gold', linestyles='solid', lw=2)

plt.scatter(observed_average, 0, color = 'red', s=40, zorder=10).set_clip_on(False)

plt.title('Area to the left of the gold line: contains the 5% cut-off');

	Section	Midterm
0	1	22
1	2	12
2	2	23
3	2	14
4	1	20
...	...	...
354	5	24
355	2	16
356	2	17
357	12	16
358	10	14

	Midterm
Section
1	32
2	32
3	27
4	30
5	33
6	32
7	24
8	29
9	30
10	34
11	26
12	30

	Midterm
Section
1	15.593750
2	15.125000
3	13.666667
4	14.766667
5	17.454545
6	15.031250
7	16.625000
8	16.310345
9	14.566667
10	15.235294
11	15.807692
12	15.733333

	Random Sample Average
0	15.555556
1	15.444444
2	15.185185
3	15.185185
4	15.333333
...	...
49995	16.407407
49996	17.222222
49997	14.851852
49998	16.000000
49999	16.925926

	Random Sample Average
28119	10.629630
27278	10.814815
3750	10.851852
26693	10.962963
47400	10.962963
...	...
11862	19.333333
14309	19.444444
514	19.592593
26762	19.703704
17336	19.740741

Lecture Decisions and Uncertainty¶

Mean group scores¶

Group average contested¶

27 students in group 3¶

Randomly select 27 scores from different sections¶

The P-Value as an area¶

Consistency¶

Wait ! What ??¶

This doesn't look right!¶

Using .loc we must remember to reset the index!¶

	Section	Midterm
0	1	22
1	2	12
2	2	23
3	2	14
4	1	20
...	...	...
354	5	24
355	2	16
356	2	17
357	12	16
358	10	14

	Section	Midterm
0	1	22
1	2	12
2	2	23
3	2	14
4	1	20
...	...	...
354	5	24
355	2	16
356	2	17
357	12	16
358	10	14

	Section	Midterm
0	1	22
1	2	12
2	2	23
3	2	14
4	1	20
...	...	...
354	5	24
355	2	16
356	2	17
357	12	16
358	10	14