mv ~/Downloads/python-novice-gapminder-data.zip .
unzip python-novice-gapminder-data.zip

import pandas as pd

data_oceania = pd.read_csv('data/gapminder_gdp_oceania.csv')
data_oceania

type(data_oceania)

pandas.core.frame.DataFrame

data_oceania_country = pd.read_csv('data/gapminder_gdp_oceania.csv',
                                   index_col='country')
data_oceania_country

data_oceania_country.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, Australia to New Zealand
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   gdpPercap_1952  2 non-null      float64
 1   gdpPercap_1957  2 non-null      float64
 2   gdpPercap_1962  2 non-null      float64
 3   gdpPercap_1967  2 non-null      float64
 4   gdpPercap_1972  2 non-null      float64
 5   gdpPercap_1977  2 non-null      float64
 6   gdpPercap_1982  2 non-null      float64
 7   gdpPercap_1987  2 non-null      float64
 8   gdpPercap_1992  2 non-null      float64
 9   gdpPercap_1997  2 non-null      float64
 10  gdpPercap_2002  2 non-null      float64
 11  gdpPercap_2007  2 non-null      float64
dtypes: float64(12)
memory usage: 208.0+ bytes

data_oceania_country.columns

Index(['gdpPercap_1952', 'gdpPercap_1957', 'gdpPercap_1962', 'gdpPercap_1967',
       'gdpPercap_1972', 'gdpPercap_1977', 'gdpPercap_1982', 'gdpPercap_1987',
       'gdpPercap_1992', 'gdpPercap_1997', 'gdpPercap_2002', 'gdpPercap_2007'],
      dtype='object')

data_oceania_country.T

data_oceania_country.T.describe()

import pandas as pd
data = pd.read_csv('data/gapminder_gdp_europe.csv', index_col='country')
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, Albania to United Kingdom
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   gdpPercap_1952  30 non-null     float64
 1   gdpPercap_1957  30 non-null     float64
 2   gdpPercap_1962  30 non-null     float64
 3   gdpPercap_1967  30 non-null     float64
 4   gdpPercap_1972  30 non-null     float64
 5   gdpPercap_1977  30 non-null     float64
 6   gdpPercap_1982  30 non-null     float64
 7   gdpPercap_1987  30 non-null     float64
 8   gdpPercap_1992  30 non-null     float64
 9   gdpPercap_1997  30 non-null     float64
 10  gdpPercap_2002  30 non-null     float64
 11  gdpPercap_2007  30 non-null     float64
dtypes: float64(12)
memory usage: 3.0+ KB

print(data.iloc[0, 0])

1601.056136

data.head()

print(data.loc["Albania", "gdpPercap_1952"])

1601.056136

print(data.loc['Italy':'Poland', 'gdpPercap_1962':'gdpPercap_1972'])

             gdpPercap_1962  gdpPercap_1967  gdpPercap_1972
country                                                    
Italy           8243.582340    10022.401310    12269.273780
Montenegro      4649.593785     5907.850937     7778.414017
Netherlands    12790.849560    15363.251360    18794.745670
Norway         13450.401510    16361.876470    18965.055510
Poland          5338.752143     6557.152776     8006.506993

print(data.loc['Italy':'Poland', 'gdpPercap_1962':'gdpPercap_1972'].max())

gdpPercap_1962    13450.40151
gdpPercap_1967    16361.87647
gdpPercap_1972    18965.05551
dtype: float64

print(data.loc['Italy':'Poland', 'gdpPercap_1962':'gdpPercap_1972'].min())

gdpPercap_1962    4649.593785
gdpPercap_1967    5907.850937
gdpPercap_1972    7778.414017
dtype: float64

# Use a subset of data to keep output readable.
subset = data.loc['Italy':'Poland', 'gdpPercap_1962':'gdpPercap_1972']
print('Subset of data:')
subset

Subset of data:

# Which values were greater than 10000 ?
print('\nWhere are values large?')
subset > 10000

Where are values large?

mask = subset > 10000
subset[mask]

subset[subset > 10000].describe()

mask_higher = data > data.mean()
wealth_score = mask_higher.aggregate('sum', axis=1) / len(data.columns)
print(wealth_score)

country
Albania                   0.000000
Austria                   1.000000
Belgium                   1.000000
Bosnia and Herzegovina    0.000000
Bulgaria                  0.000000
Croatia                   0.000000
Czech Republic            0.500000
Denmark                   1.000000
Finland                   1.000000
France                    1.000000
Germany                   1.000000
Greece                    0.333333
Hungary                   0.000000
Iceland                   1.000000
Ireland                   0.333333
Italy                     0.500000
Montenegro                0.000000
Netherlands               1.000000
Norway                    1.000000
Poland                    0.000000
Portugal                  0.000000
Romania                   0.000000
Serbia                    0.000000
Slovak Republic           0.000000
Slovenia                  0.333333
Spain                     0.333333
Sweden                    1.000000
Switzerland               1.000000
Turkey                    0.000000
United Kingdom            1.000000
dtype: float64

data.groupby(wealth_score).sum()

first = pd.read_csv('data/gapminder_all.csv', index_col='country')
second = first[first['continent'] == 'Americas']
third = second.drop('Puerto Rico')
fourth = third.drop('continent', axis = 1)
fourth.to_csv('result.csv')

data = pd.read_csv('data/gapminder_gdp_europe.csv', index_col='country')
print(data.idxmin())
print(data.idxmax())

	country	gdpPercap_1952	gdpPercap_1957	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972	gdpPercap_1977	gdpPercap_1982	gdpPercap_1987	gdpPercap_1992	gdpPercap_1997	gdpPercap_2002	gdpPercap_2007
0	Australia	10039.59564	10949.64959	12217.22686	14526.12465	16788.62948	18334.19751	19477.00928	21888.88903	23424.76683	26997.93657	30687.75473	34435.36744
1	New Zealand	10556.57566	12247.39532	13175.67800	14463.91893	16046.03728	16233.71770	17632.41040	19007.19129	18363.32494	21050.41377	23189.80135	25185.00911

	gdpPercap_1952	gdpPercap_1957	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972	gdpPercap_1977	gdpPercap_1982	gdpPercap_1987	gdpPercap_1992	gdpPercap_1997	gdpPercap_2002	gdpPercap_2007
country
Australia	10039.59564	10949.64959	12217.22686	14526.12465	16788.62948	18334.19751	19477.00928	21888.88903	23424.76683	26997.93657	30687.75473	34435.36744
New Zealand	10556.57566	12247.39532	13175.67800	14463.91893	16046.03728	16233.71770	17632.41040	19007.19129	18363.32494	21050.41377	23189.80135	25185.00911

country	Australia	New Zealand
gdpPercap_1952	10039.59564	10556.57566
gdpPercap_1957	10949.64959	12247.39532
gdpPercap_1962	12217.22686	13175.67800
gdpPercap_1967	14526.12465	14463.91893
gdpPercap_1972	16788.62948	16046.03728
gdpPercap_1977	18334.19751	16233.71770
gdpPercap_1982	19477.00928	17632.41040
gdpPercap_1987	21888.88903	19007.19129
gdpPercap_1992	23424.76683	18363.32494
gdpPercap_1997	26997.93657	21050.41377
gdpPercap_2002	30687.75473	23189.80135
gdpPercap_2007	34435.36744	25185.00911

country	Australia	New Zealand
count	12.000000	12.000000
mean	19980.595634	17262.622813
std	7815.405220	4409.009167
min	10039.595640	10556.575660
25%	13948.900203	14141.858697
50%	18905.603395	16933.064050
75%	24318.059265	19517.996910
max	34435.367440	25185.009110

	gdpPercap_1952	gdpPercap_1957	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972	gdpPercap_1977	gdpPercap_1982	gdpPercap_1987	gdpPercap_1992	gdpPercap_1997	gdpPercap_2002	gdpPercap_2007
country
Albania	1601.056136	1942.284244	2312.888958	2760.196931	3313.422188	3533.003910	3630.880722	3738.932735	2497.437901	3193.054604	4604.211737	5937.029526
Austria	6137.076492	8842.598030	10750.721110	12834.602400	16661.625600	19749.422300	21597.083620	23687.826070	27042.018680	29095.920660	32417.607690	36126.492700
Belgium	8343.105127	9714.960623	10991.206760	13149.041190	16672.143560	19117.974480	20979.845890	22525.563080	25575.570690	27561.196630	30485.883750	33692.605080
Bosnia and Herzegovina	973.533195	1353.989176	1709.683679	2172.352423	2860.169750	3528.481305	4126.613157	4314.114757	2546.781445	4766.355904	6018.975239	7446.298803
Bulgaria	2444.286648	3008.670727	4254.337839	5577.002800	6597.494398	7612.240438	8224.191647	8239.854824	6302.623438	5970.388760	7696.777725	10680.792820

Tabular data¶

Objectives¶

Data to use¶

Pandas for tabular data¶

Pandas methods for exploration¶

Exercise¶

Pandas Dataframes¶

Objectives¶

Note about Pandas DataFrames/Series¶

Selecting values¶

Select multiple columns or rows using `DataFrame.loc` and a named slice.¶

Result of slicing can be used in further operations.¶

Select values or NaN using a Boolean mask.¶

Group By: split-apply-combine¶

Exercises¶

Takeaway¶

Continue¶

	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972
country
Italy	8243.582340	10022.401310	12269.273780
Montenegro	4649.593785	5907.850937	7778.414017
Netherlands	12790.849560	15363.251360	18794.745670
Norway	13450.401510	16361.876470	18965.055510
Poland	5338.752143	6557.152776	8006.506993

	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972
country
Italy	False	True	True
Montenegro	False	False	False
Netherlands	True	True	True
Norway	True	True	True
Poland	False	False	False

	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972
country
Italy	NaN	10022.40131	12269.27378
Montenegro	NaN	NaN	NaN
Netherlands	12790.84956	15363.25136	18794.74567
Norway	13450.40151	16361.87647	18965.05551
Poland	NaN	NaN	NaN

	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972
count	2.000000	3.000000	3.000000
mean	13120.625535	13915.843047	16676.358320
std	466.373656	3408.589070	3817.597015
min	12790.849560	10022.401310	12269.273780
25%	12955.737548	12692.826335	15532.009725
50%	13120.625535	15363.251360	18794.745670
75%	13285.513522	15862.563915	18879.900590
max	13450.401510	16361.876470	18965.055510

	gdpPercap_1952	gdpPercap_1957	gdpPercap_1962	gdpPercap_1967	gdpPercap_1972	gdpPercap_1977	gdpPercap_1982	gdpPercap_1987	gdpPercap_1992	gdpPercap_1997	gdpPercap_2002	gdpPercap_2007
0.000000	36916.854200	46110.918793	56850.065437	71324.848786	88569.346898	104459.358438	113553.768507	119649.599409	92380.047256	103772.937598	118590.929863	149577.357928
0.333333	16790.046878	20942.456800	25744.935321	33567.667670	45277.839976	53860.456750	59679.634020	64436.912960	67918.093220	80876.051580	102086.795210	122803.729520
0.500000	11807.544405	14505.000150	18380.449470	21421.846200	25377.727380	29056.145370	31914.712050	35517.678220	36310.666080	40723.538700	45564.308390	51403.028210
1.000000	104317.277560	127332.008735	149989.154201	178000.350040	215162.343140	241143.412730	263388.781960	296825.131210	315238.235970	346930.926170	385109.939210	427850.333420

Tabular data¶

Objectives¶

Data to use¶

Pandas for tabular data¶

Pandas methods for exploration¶

Exercise¶

Pandas Dataframes¶

Objectives¶

Note about Pandas DataFrames/Series¶

Selecting values¶

Select multiple columns or rows using DataFrame.loc and a named slice.¶

Result of slicing can be used in further operations.¶

Select values or NaN using a Boolean mask.¶

Group By: split-apply-combine¶

Exercises¶

Takeaway¶

Continue¶

Select multiple columns or rows using `DataFrame.loc` and a named slice.¶