import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')


3 > 1

True


type(3 > 1)

bool


True

True


true

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Input In [5], in <cell line: 1>()
----> 1 true

NameError: name 'true' is not defined


3 = 3

  Input In [6]
    3 = 3
    ^
SyntaxError: cannot assign to literal here. Maybe you meant '==' instead of '='?


3 == 3.0

True


10 != 2

True


x = 14
y = 3


x > 15

False


12 < x

True


x < 20

True


12 < x < 20

True


10 < x-y < 13

True


x > 13 and y < 3.14159

True


pets = np.array(['cat', 'cat', 'dog', 'cat', 'dog', 'rabbit'])


pets == 'cat'

array([ True,  True, False,  True, False, False])


1 + 1 + 0 + 1 + 0 + 0

3


#sum(make_array(True, True, False, True, False, False))

np.sum(np.array([True, True, False, True, False, False]))

3


sum(pets == 'dog')

2


np.count_nonzero(pets == 'dog')

2


x = np.arange(20, 31)


x > 28

array([False, False, False, False, False, False, False, False, False,
        True,  True])


# Work in progress
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1


one_round(4, 3)

1


one_round(2, 6)


# Final correct version
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1
    elif your_roll > my_roll:
        return -1
    elif your_roll == my_roll:
        return 0


one_round(1, 1)

0


one_round(6, 5)

1


one_round(7, -1)

1


mornings = np.array(['wake up', 'sleep in'])
mornings

array(['wake up', 'sleep in'], dtype='<U8')


np.random.choice(mornings)

'wake up'


np.random.choice(mornings)

'sleep in'


np.random.choice(mornings)

'wake up'


np.random.choice(mornings, 7)

array(['sleep in', 'wake up', 'sleep in', 'wake up', 'wake up', 'wake up',
       'wake up'], dtype='<U8')


sum(np.random.choice(mornings, 7) == 'wake up')

5


sum(np.random.choice(mornings, 7) == 'sleep in')

5


morning_week = np.random.choice(mornings, 7)
morning_week

array(['wake up', 'sleep in', 'sleep in', 'sleep in', 'sleep in',
       'sleep in', 'sleep in'], dtype='<U8')


sum(morning_week == 'wake up')

1


sum(morning_week == 'sleep in')

6


die_faces = np.arange(1, 7)
die_faces

array([1, 2, 3, 4, 5, 6])


np.random.choice(die_faces)

3


def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)
    return one_round(my_roll, your_roll)


simulate_one_round()

-1


first = np.arange(4)
second = np.arange(10, 17)


np.append(first, 6)

array([0, 1, 2, 3, 6])


plus = np.append(first, 6)
plus

array([0, 1, 2, 3, 6])


first

array([0, 1, 2, 3])


plus

array([0, 1, 2, 3, 6])


np.append(first, second)

array([ 0,  1,  2,  3, 10, 11, 12, 13, 14, 15, 16])


first

array([0, 1, 2, 3])


second

array([10, 11, 12, 13, 14, 15, 16])


results = np.array([])

type(results)


results = np.append(results, simulate_one_round())
results


for pet in np.array(['cat', 'dog', 'rabbit']):
    print('I love my ' + pet)

I love my cat
I love my dog
I love my rabbit


pet = np.array(['cat', 'dog', 'rabbit']).item(0)
print('I love my ' + pet)

pet = np.array(['cat', 'dog', 'rabbit']).item(1)
print('I love my ' + pet)

pet = np.array(['cat', 'dog', 'rabbit']).item(2)
print('I love my ' + pet)

I love my cat
I love my dog
I love my rabbit


game_outcomes = np.array([])

for i in np.arange(5):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

array([-1.,  1.,  1., -1.,  1.])


game_outcomes = np.array([])

for i in np.arange(10000):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

array([ 1.,  1.,  1., ...,  1.,  1., -1.])


len(game_outcomes)

10000


results = pd.DataFrame({'My winnings':game_outcomes})


results


chart = results.groupby(by='My winnings')\
                [['My winnings']]\
                .count()\
                    .plot\
                        .barh();


# Bonus question: This simulation is relatively simple. 
# Can you find a way to run it without using a 'For' loop?


coin = np.array(['heads', 'tails'])


sum(np.random.choice(coin, 100) == 'heads')

47


# Simulate one outcome

def num_heads():
    return sum(np.random.choice(coin, 100) == 'heads')


# Decide how many times you want to repeat the experiment

repetitions = 10000


# Simulate that many outcomes

outcomes = np.array([])

for i in np.arange(repetitions):
    outcomes = np.append(outcomes, num_heads())
    
outcomes

array([46., 52., 44., ..., 47., 52., 50.])


len(outcomes)

10000


heads = pd.DataFrame({'Heads':outcomes})
heads.hist(bins = np.arange(29.5, 70.6), ec='white');


trip = pd.read_csv('../data/trip.csv')

trip.head()


commute = trip[trip['Duration'] < 1800]

commute.hist('Duration', ec='yellow');


commute.hist('Duration', bins=60, ec='red');


# Percent of people who have a ride duration between 500 and 250 seconds
(500-250) * 0.15

37.5


#NB - in this instance use of 'Duration' is arbitrary as field required for count()

starts = commute.groupby(by='Start Station')[['Duration']].count()#.sort_values(by='Start Station', ascending=False)

starts.sort_values(by='Duration', ascending=False)


# what is the problem here?

pd.pivot_table(commute, index=['Start Station'], columns=['End Station']).fillna(0)

/var/folders/bm/l_yhcr911wv7_tf_ywk2h2mh0000gn/T/ipykernel_12782/2694654531.py:1: FutureWarning: pivot_table dropped a column because it failed to aggregate. This behavior is deprecated and will raise in a future version of pandas. Select only the columns that can be aggregated.
  pd.pivot_table(commute, index=['Start Station'], columns=['End Station']).fillna(0)


duration = trip[['Start Station', 'End Station', 'Duration']]

duration


ages = np.array([16, 22, 18, 15, 19, 15, 16, 21])
age = pd.DataFrame({'Age':ages})

age


age[age['Age']>=18]


voter = ages >= 18


voter

array([False,  True,  True, False,  True, False, False,  True])


voter1 = age >= 18
voter1


voter

array([False,  True,  True, False,  True, False, False,  True])


def is_voter(j):
    return j >= 18


type(is_voter)

function


is_voter(22)

True


is_voter(3)

False


is_voter(age)


ages >= 18

array([False,  True,  True, False,  True, False, False,  True])


voter

array([False,  True,  True, False,  True, False, False,  True])


def my_voter_function(x):
    return x >= 18


age.where(age['Age'] >= 18)

	My winnings
0	1.0
1	1.0
2	1.0
3	-1.0
4	1.0
...	...
9995	1.0
9996	1.0
9997	1.0
9998	1.0
9999	-1.0

	Trip ID	Duration	Start Date	Start Station	Start Terminal	End Date	End Station	End Terminal	Bike #	Subscriber Type	Zip Code
0	913460	765	8/31/2015 23:26	Harry Bridges Plaza (Ferry Building)	50	8/31/2015 23:39	San Francisco Caltrain (Townsend at 4th)	70	288	Subscriber	2139
1	913459	1036	8/31/2015 23:11	San Antonio Shopping Center	31	8/31/2015 23:28	Mountain View City Hall	27	35	Subscriber	95032
2	913455	307	8/31/2015 23:13	Post at Kearny	47	8/31/2015 23:18	2nd at South Park	64	468	Subscriber	94107
3	913454	409	8/31/2015 23:10	San Jose City Hall	10	8/31/2015 23:17	San Salvador at 1st	8	68	Subscriber	95113
4	913453	789	8/31/2015 23:09	Embarcadero at Folsom	51	8/31/2015 23:22	Embarcadero at Sansome	60	487	Customer	9069

	Duration
Start Station
San Francisco Caltrain (Townsend at 4th)	25858
San Francisco Caltrain 2 (330 Townsend)	21523
Harry Bridges Plaza (Ferry Building)	15543
Temporary Transbay Terminal (Howard at Beale)	14298
2nd at Townsend	13674
...	...
Mezes Park	189
Redwood City Medical Center	139
San Mateo County Center	108
Redwood City Public Library	101
Franklin at Maple	62

	Bike #										...	Trip ID
End Station	2nd at Folsom	2nd at South Park	2nd at Townsend	5th at Howard	Adobe on Almaden	Arena Green / SAP Center	Beale at Market	Broadway St at Battery St	California Ave Caltrain Station	Castro Street and El Camino Real	...	South Van Ness at Market	Spear at Folsom	St James Park	Stanford in Redwood City	Steuart at Market	Temporary Transbay Terminal (Howard at Beale)	Townsend at 7th	University and Emerson	Washington at Kearny	Yerba Buena Center of the Arts (3rd @ Howard)
Start Station
2nd at Folsom	437.148148	441.155932	445.883295	436.752212	0.000000	0.000000	446.007874	437.223881	0.000000	0.0	...	731516.456522	641003.357798	0.0	0.0	681677.226562	700727.048309	711661.028818	0.000000	621464.521127	658447.493976
2nd at South Park	440.700000	415.981707	434.788079	431.531073	0.000000	0.000000	462.189873	421.089888	0.000000	0.0	...	649108.048780	672376.186603	0.0	0.0	687462.008929	620272.384439	677145.012945	0.000000	692072.852113	641217.822222
2nd at Townsend	444.411552	433.366197	460.389189	431.087838	0.000000	0.000000	430.218579	436.491039	0.000000	0.0	...	624034.820000	675171.864865	0.0	0.0	689369.168491	647041.948560	722415.416268	0.000000	645436.111111	641750.689655
5th at Howard	450.878505	441.883333	412.934783	463.493976	0.000000	0.000000	456.067797	427.647059	0.000000	0.0	...	659459.941176	668855.610000	0.0	0.0	695081.692722	681125.388592	658704.217252	0.000000	751567.319149	647610.811111
Adobe on Almaden	0.000000	0.000000	0.000000	0.000000	210.727273	190.857143	0.000000	0.000000	0.000000	0.0	...	0.000000	0.000000	725137.3	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
Temporary Transbay Terminal (Howard at Beale)	446.573840	442.955711	442.817602	433.681333	0.000000	0.000000	435.742515	435.077540	0.000000	0.0	...	667164.854701	650493.939394	0.0	0.0	752327.857843	653980.553191	697919.355152	0.000000	667566.544444	672149.610422
Townsend at 7th	444.014620	434.839161	448.628297	437.110000	0.000000	0.000000	465.514286	441.240000	0.000000	0.0	...	685422.292350	702776.401786	0.0	0.0	641267.253623	679435.512295	675926.075758	0.000000	696148.068966	719238.209150
University and Emerson	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	292.175439	0.0	...	0.000000	0.000000	0.0	0.0	0.000000	0.000000	0.000000	618905.774194	0.000000	0.000000
Washington at Kearny	390.235294	440.460317	413.403509	425.651163	0.000000	0.000000	444.718750	447.164557	0.000000	0.0	...	680496.680000	653986.791667	0.0	0.0	646498.354839	709506.234694	674198.301887	0.000000	693241.600000	644410.527778
Yerba Buena Center of the Arts (3rd @ Howard)	428.548387	430.301435	444.855422	434.737828	0.000000	0.000000	408.711111	422.212766	0.000000	0.0	...	686250.573913	653510.140845	0.0	0.0	662276.247525	637054.646018	702914.881226	0.000000	690123.318182	641848.219178

	Start Station	End Station	Duration
0	Harry Bridges Plaza (Ferry Building)	San Francisco Caltrain (Townsend at 4th)	765
1	San Antonio Shopping Center	Mountain View City Hall	1036
2	Post at Kearny	2nd at South Park	307
3	San Jose City Hall	San Salvador at 1st	409
4	Embarcadero at Folsom	Embarcadero at Sansome	789
...	...	...	...
354147	Powell Street BART	Townsend at 7th	619
354148	Harry Bridges Plaza (Ferry Building)	San Francisco Caltrain (Townsend at 4th)	6712
354149	South Van Ness at Market	5th at Howard	538
354150	South Van Ness at Market	5th at Howard	568
354151	South Van Ness at Market	5th at Howard	569

Lecture - Conditionals and Iteration¶

Comparison¶

Comparisons with arrays¶

Simulation

Conditional Statements - If¶

What about 10,000 rolls?¶

Random Selection¶

Appending Arrays¶

Repeated Betting¶

`For` Statements¶

Another example: simulating heads in 100 coin tosses¶

np.random.choice(array, number of times)¶

Optional: Advanced `where`¶

	Age
0	16
1	22
2	18
3	15
4	19
5	15
6	16
7	21

	Age
1	22
2	18
4	19
7	21

	Age
0	NaN
1	22.0
2	18.0
3	NaN
4	19.0
5	NaN
6	NaN
7	21.0

Lecture - Conditionals and Iteration¶

Comparison¶

Comparisons with arrays¶

Simulation

Conditional Statements - If¶

What about 10,000 rolls?¶

Random Selection¶

Appending Arrays¶

Repeated Betting¶

For Statements¶

Another example: simulating heads in 100 coin tosses¶

np.random.choice(array, number of times)¶

Optional: Bay Area Bike Share¶

Optional: Advanced where¶

`For` Statements¶

Optional: Advanced `where`¶