Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
2 views

03.python.08.plot.examples

The document provides a comprehensive guide on visualizing data using Python libraries such as pandas and matplotlib, focusing on life expectancy and health expenditure. It includes various plotting techniques, including line plots, bar plots, box plots, and heatmaps, while allowing user interaction for selecting countries and data types. Additionally, it covers password data analysis with visualizations for password categories and average online breaking times.

Uploaded by

dznz1999
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

03.python.08.plot.examples

The document provides a comprehensive guide on visualizing data using Python libraries such as pandas and matplotlib, focusing on life expectancy and health expenditure. It includes various plotting techniques, including line plots, bar plots, box plots, and heatmaps, while allowing user interaction for selecting countries and data types. Additionally, it covers password data analysis with visualizations for password categories and average online breaking times.

Uploaded by

dznz1999
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Visualization

Python
pandas and matplotlib
line plot
multiple line plots
barplot
boxplot
heatmap

Life Expectancy and Health Expenditure

FDATA = "./files/matplot.life-expectancy-vs-health-expenditure.csv"
OPTCHART = {1: ['Life Expectancy', 'LifeExpectancy','Number of years'], 2: ['Health
Expenditure','HealthExpenditure','Dollars/Capita Year']}
COLORS = {"LifeExpectancy": "#16acd8", "HealthExpenditure": "#4e16d8"}
COLRENAME = {'Entity':'Country'}

def menu():
strOut = ""
for key, val in OPTCHART.items():
strOut += str(key) + ": " + val[0] + "\n"
strOut += "Your choice: "
return strOut
print(menu())

df = pd.read_csv(FDATA)
df = df.rename(columns=COLRENAME)

#print(df.head())
# list of countries to check user's input
clist = df.Country.unique()

# select one Country for plotting


country = input("Country: ")
while country not in clist:
country = input("Country: ")
# select what data
opt = int(input(menu()))
while opt not in OPTCHART.keys():
opt = int(input(menu()))
colname = OPTCHART[opt][1]

selcol = {}
selcol[colname] = COLORS[colname]

# select data for chart


dfsel = df[(df.Country == country) & (~df[colname].isna())][['Year',colname]]
#print(dfsel)

Q1 plot one information for a selected country

pandas plot

# one line, no legend


ax = dfsel.plot.line(x='Year', legend=False, color=selcol)
plt.xlabel('Year')
plt.ylabel(OPTCHART[opt][2])
plt.title("pandas.plot.line: " +OPTCHART[opt][0] + "\n" + country.title())
#plt.show()

pandas plot - uses line

ax = dfsel.plot(x='Year', legend=False, color=selcol)


plt.xlabel('Year')
plt.ylabel(OPTCHART[opt][2])
plt.title("pandas.plot: " + OPTCHART[opt][0] + "\n" + country.title())
#plt.show()

matplotlib

fig, ax = plt.subplots()
plt.plot(dfsel['Year'], dfsel[colname], label=OPTCHART[opt][0]) # Set label for first
line
plt.xlabel('Year')
plt.ylabel(OPTCHART[opt][2])
plt.title("matplotlib: " + OPTCHART[opt][0] + "\n" + country.title())
plt.legend() # Show legend with the specified labels
#plt.show()

Q2 plot both data

## Two different Y axes


### separate charts
dfcountry = df[(df.Country == country) & (~df[colname].isna())]
fig, axes = plt.subplots(2, 1, figsize=(8, 10))
axes[0].plot(dfcountry.Year, dfcountry.LifeExpectancy)
axes[1].plot(dfcountry.Year, dfcountry.HealthExpenditure)
plt.show()

fig, ax1 = plt.subplots(figsize=(8, 8))


ax2 = ax1.twinx()
ax1.plot(dfcountry.Year, dfcountry.LifeExpectancy, color=COLORS['LifeExpectancy'])
ax2.plot(dfcountry.Year, dfcountry.HealthExpenditure,
color=COLORS['HealthExpenditure'])
ax1.set_xlabel("Year")
ax1.set_ylabel(OPTCHART[1][2])
#ax1.tick_params(axis="y", labelcolor=COLOR_TEMPERATURE)
ax2.set_ylabel(OPTCHART[2][2])
#ax2.tick_params(axis="y", labelcolor=COLOR_PRICE)
plt.show()

Q3 plot several selected countries

create table with column per country and plot with single chart

#TODO : ask the user a set of countries


countries = ['Italy','France']
dftab = df[(df.Country.isin(countries)) & (~df[colname].isna())]
[['Country','Year',colname]].pivot(index='Year', columns='Country', values=colname)
ax = dftab.plot(kind='line')
plt.ylabel(OPTCHART[opt][2])
#plt.title(OPTCHART[opt][0] + "\n(countries: " + ", ".join(countries).strip() + ")")
plt.title("pandas.plot - single chart: " + OPTCHART[opt][0])
plt.show()

one chart per country

#dfsel = df[(df.Country.isin(countries)) & (~df[colname].isna())]


[['Country','Year',colname]].pivot(index='Year', columns='Country', values=colname)
#print(dfsel)
ax = dftab.plot(kind='line', subplots=True, title="pandas.plot - multiple chart: " +
OPTCHART[opt][0])
plt.ylabel(OPTCHART[opt][2])
#it appears on the last subplot
#plt.title("pandas.plot - multiple chart: " + OPTCHART[opt][0])
plt.show()

matplotlib

# single char
fig, axes = plt.subplots()
for i, country in enumerate(countries):
plt.plot(dftab.index, dftab[country], label=country) # Plot each entity
plt.xlabel('Year')
plt.ylabel(OPTCHART[opt][2])
plt.title("matplotlib - single chart: " + OPTCHART[opt][0])
plt.legend(title='Country')
plt.show()

one chart per country

COLORS = ['#00202e', '#003f5c', '#2c4875', '#8a508f', '#bc5090', '#ff6361',


'#ff8531', '#ffa600']

ncountries = len(countries)
fig, axes = plt.subplots(ncountries, 1, figsize=(8, 6 * ncountries))
for i, country in enumerate(dftab.columns):
axes[i].plot(dftab.index, dftab[country], color=COLORS[i], legend=False)
axes[i].set_title(f'{country} Life Expectancy Over Years')
axes[i].set_xlabel('Year')
axes[i].set_ylabel(OPTCHART[opt][2])
plt.tight_layout()
plt.show()

Q4 boxplot

selcountries = ['France','Germany','Italy']
df.groupby('Year').agg({'LifeExpectancy':'mean'})
dfcs = df[(df['Year']>2000) & (df['Country'].isin(selcountries))]
[['Country','Year','LifeExpectancy','HealthExpenditure']]
#dfcs.groupby('Country')[['LifeExpectancy']].boxplot()
ax = dfcs.groupby('Country')[['LifeExpectancy']].boxplot(subplots=False)
ax.set_xticklabels(selcountries)

Passwords

FDATA = "./files/passwords.txt.csv"
FCAT = "./files/passwords.cat.csv"

TIMECONV = {'seconds': 1/3600,


'minutes': 1/60,
'hours': 1,
'days': 24,
'weeks': 168,
'months': 720,
'years': 8760}

dfp = pd.read_csv(FDATA)
dfc = pd.read_csv(FCAT)

dfp.head()
dfc.head()

dfp['online_hours'] = dfp['value']*dfp['time_unit'].map(TIMECONV)
dfp['online_hours'] = dfp['online_hours'].astype(int)
#dfp[dfp['online_hours'].isna()]

# number of passwords per category and average online breaking time


dfg = dfp[['catid','online_hours']].groupby(['catid']).agg({'catid': 'size',
'online_hours': 'mean'})
dfg.rename(columns={'catid': 'count'}, inplace=True)
dfg = dfg.reset_index()
dfres = dfg.merge(dfc, how='right', left_on='catid', right_on='id')
#print(dfres)

Q1 pandas number of passwords per category, showing the name

dfres.plot(kind='bar', x='category', y='count')


plt.ylabel('number of passwords')
plt.title('Plot on grouped - flat data')
plt.show()

pandas let it compute

dfpc = dfp.merge(dfc, how='right', left_on='catid', right_on='id')


dfpc[['category']].value_counts().plot(kind='bar', xlabel='Category', ylabel='Count',
rot=90)
plt.title('Plot value count')
plt.show()
#print(dfpc)

matplotlib

fig, ax = plt.subplots()
ax.bar(dfres['category'], dfres['online_hours'], width=1, edgecolor="white",
linewidth=0.7)
###
ncat = dfres['category'].nunique()
ax.set_xlim(-1,ncat)
###
plt.xticks(rotation=90)
plt.title('Matplot on grouped')
plt.show()

Q2 show average times

dfpc[dfpc.online_hours < 10000].boxplot(column=['online_hours'], by='category',


grid=False, color='black', rot=90)

histogram

dfpc[['strength']].plot(kind='hist')

You might also like