Song Jiaming | 09 Jun 2020
Disclaimer: This is just a notes from coursera course Data Visualization with Python
%matplotlib inline # allows changes to the figure plotted inline
import matplotlib.pyplot as plt
plt.plot(5,5,'0') # plot a dot at coordinate (5,5)
plt.show()
# Alternative plotting using pandas:
df.plot[kind='inline'], # note the x-axis is the index of the dataframe
df.loc[index_name, [column_names]].plot(kind='line') # select the row of index_name and corresponding columns of column_names
plt.title('Title')
plt.ylabel('Y-axis label')
plt.xlabel('X-axis label')
plt.show()
df.sort_values(['Total'], ascending = False, axis=1, inplace=True)
# remember to make your x_axis the index, different line/area in the plot: column, the height/area of the line: values
df.plot(kind='area')
plt.title('Title')
plt.ylabel('Y-axis label')
plt.xlabel('X-axis label')
plt.show
df.plot(kind='hist')
# OR
count, bin_edges = np.histograme[df['target_col']] # set the x axis to each bin of values in df['target_col']
df['target_col'].plot(kind='hist', xticks = bin_edges)
# barchat
df.plot(kind='bar') # vertical barchart
df.plot(kind='barh') # horizontal barchart
df.plot(kind='pie')
df.plot(kind='box')
df.plot(kind='box', x = 'independent_var_col', y ='dependent_var_col')
pywaffle
def create_waffle_chart(categories, values, height, width, colormap, value_sign=''):
# compute the proportion of each category with respect to the total
total_values = sum(values)
category_proportions = [(float(value) / total_values) for value in values]
# compute the total number of tiles
total_num_tiles = width * height # total number of tiles
print ('Total number of tiles is', total_num_tiles)
# compute the number of tiles for each catagory
tiles_per_category = [round(proportion * total_num_tiles) for proportion in category_proportions]
# print out number of tiles per category
for i, tiles in enumerate(tiles_per_category):
print (df_dsn.index.values[i] + ': ' + str(tiles))
# initialize the waffle chart as an empty matrix
waffle_chart = np.zeros((height, width))
# define indices to loop through waffle chart
category_index = 0
tile_index = 0
# populate the waffle chart
for col in range(width):
for row in range(height):
tile_index += 1
# if the number of tiles populated for the current category
# is equal to its corresponding allocated tiles...
if tile_index > sum(tiles_per_category[0:category_index]):
# ...proceed to the next category
category_index += 1
# set the class value to an integer, which increases with class
waffle_chart[row, col] = category_index
# instantiate a new figure object
fig = plt.figure()
# use matshow to display the waffle chart
colormap = plt.cm.coolwarm
plt.matshow(waffle_chart, cmap=colormap)
plt.colorbar()
# get the axis
ax = plt.gca()
# set minor ticks
ax.set_xticks(np.arange(-.5, (width), 1), minor=True)
ax.set_yticks(np.arange(-.5, (height), 1), minor=True)
# add dridlines based on minor ticks
ax.grid(which='minor', color='w', linestyle='-', linewidth=2)
plt.xticks([])
plt.yticks([])
# compute cumulative sum of individual categories to match color schemes between chart and legend
values_cumsum = np.cumsum(values)
total_values = values_cumsum[len(values_cumsum) - 1]
# create legend
legend_handles = []
for i, category in enumerate(categories):
if value_sign == '%':
label_str = category + ' (' + str(values[i]) + value_sign + ')'
else:
label_str = category + ' (' + value_sign + str(values[i]) + ')'
color_val = colormap(float(values_cumsum[i])/total_values)
legend_handles.append(mpatches.Patch(color=color_val, label=label_str))
# add legend to chart
plt.legend(
handles=legend_handles,
loc='lower center',
ncol=len(categories),
bbox_to_anchor=(0., -0.2, 0.95, .1)
)
width = 40 # width of chart
height = 10 # height of chart
categories = df_dsn.index.values # categories
values = df_dsn['Total'] # correponding values of categories
colormap = plt.cm.coolwarm # color map class
# create waffle
create_waffle_chart(categories, values, height, width, colormap)
wordcloud
# import package and its set of stopwords
from wordcloud import WordCloud, STOPWORDS
# let's use the stopwords that we imported from `word_cloud`. We use the function *set* to remove any redundant stopwords.
stopwords = set(STOPWORDS) # Stopwords are list of words which are stop words, which has no meanings
# the word 'said' has no meaning
stopwords.add('said')
# Create a word cloud object and generate a word cloud
# instantiate a word cloud object
word_count = WordCloud(
background_color='white',
max_words=2000, #use only 2k words first
stopwords=stopwords
)
# generate the word cloud
word_count.generate(a_txt_file_with_words)
# Visualize:
fig = plt.figure()
fig.set_figwidth(14) # set width
fig.set_figheight(18) # set height
# display the cloud
plt.imshow(word_count, interpolation='bilinear')
plt.axis('off')
plt.show()
Open the image to a mask
a_mask = np.array(Image.open('image.png'))
Create wordcloud with the mask
word_count = WordCloud(
background_color='white',
max_words=2000,
stopwords=stopwords,
mask=a_mask
)
word_count.generate(a_txt_file_with_words)
fig = plt.figure()
fig.set_figwidth(14) # set width
fig.set_figheight(18) # set height
plt.imshow(word_count, cmap=plt.cm.gray, interpolation='bilinear')
plt.axis('off')
plt.show()
import seaborn as sns
ax = sns.regplot(x='year', y='total', data=df, color='red', marker='+')
import folium
world_map = folium.Map()
# Map for Canada
canada_map = folium.Map(
location = [56.130, -106.35],
zoom_start = 4,
tiles = 'Stamen Toner' # different map styles
)
# Map for Canada
canada_map = folium.Map(
location = [56.130, -106.35],
zoom_start = 4
)
# create a feature group
ontario = folium.map.FeatureGroup()
ontario.add_child(
folium.features.CircleMarker(
[56.130, -85.32],
radius = 5,
color = 'red',
fill_color = 'red'
)
)
canada_map.add_child(ontario)
# label the marker
folium.Marker([56.130, -85.32], popup='Ontario').add_to(canada_map)
{"type":"FeatureCollection","features":[
{
"type":"Feature",
"properties":{"name":"Afghanistan"},
"geometry":{"type":"Polygon","coordinates":[[[61.210817,35.650072],[61.210817,35.650072]]]},
"id":"AFG"
},
{
"type":"Feature",
"properties":{"name":"Afghanistan"},
"geometry" :{"type":"Polygon","coordinates":[[[],[]]],"id":"ALB" }
}]
}
# 1 Create a plain world map
world_map = folium.Map(
zoom_start = 2
tiles = 'Mapbox Bright' # different map styles
)
# 2 load the geojson file
world_geo = r'world_countries.json'
world_map.choropleth(
geo_path = world_geo,
data = df_canada,
columns = ['Country', 'Total'],
key_on = 'feature.properties.name',
fill_color = 'YlOrRd',
legend_name = 'Immigration to Canada'
)