Song Jiaming | 09 Jun 2020
    
Disclaimer: This is just a notes from coursera course Data Visualization with Python
%matplotlib inline # allows changes to the figure plotted inline
import matplotlib.pyplot as pltplt.plot(5,5,'0') # plot a dot at coordinate (5,5) 
plt.show()
# Alternative plotting using pandas:
df.plot[kind='inline'], # note the x-axis is the index of the dataframedf.loc[index_name, [column_names]].plot(kind='line') # select the row of index_name and corresponding columns of column_names
plt.title('Title')
plt.ylabel('Y-axis label')
plt.xlabel('X-axis label')
plt.show()df.sort_values(['Total'], ascending = False, axis=1, inplace=True)
# remember to make your x_axis the index, different line/area in the plot: column, the height/area of the line: values
df.plot(kind='area')
plt.title('Title')
plt.ylabel('Y-axis label')
plt.xlabel('X-axis label')
plt.showdf.plot(kind='hist')
# OR
count, bin_edges = np.histograme[df['target_col']] # set the x axis to each bin of values in df['target_col']
df['target_col'].plot(kind='hist', xticks = bin_edges)
# barchat
df.plot(kind='bar') # vertical barchart
df.plot(kind='barh') # horizontal barchartdf.plot(kind='pie')df.plot(kind='box')df.plot(kind='box', x = 'independent_var_col', y ='dependent_var_col')pywaffledef create_waffle_chart(categories, values, height, width, colormap, value_sign=''):
    # compute the proportion of each category with respect to the total
    total_values = sum(values)
    category_proportions = [(float(value) / total_values) for value in values]
    # compute the total number of tiles
    total_num_tiles = width * height # total number of tiles
    print ('Total number of tiles is', total_num_tiles)
    
    # compute the number of tiles for each catagory
    tiles_per_category = [round(proportion * total_num_tiles) for proportion in category_proportions]
    # print out number of tiles per category
    for i, tiles in enumerate(tiles_per_category):
        print (df_dsn.index.values[i] + ': ' + str(tiles))
    
    # initialize the waffle chart as an empty matrix
    waffle_chart = np.zeros((height, width))
    # define indices to loop through waffle chart
    category_index = 0
    tile_index = 0
    # populate the waffle chart
    for col in range(width):
        for row in range(height):
            tile_index += 1
            # if the number of tiles populated for the current category 
            # is equal to its corresponding allocated tiles...
            if tile_index > sum(tiles_per_category[0:category_index]):
                # ...proceed to the next category
                category_index += 1       
            
            # set the class value to an integer, which increases with class
            waffle_chart[row, col] = category_index
    
    # instantiate a new figure object
    fig = plt.figure()
    # use matshow to display the waffle chart
    colormap = plt.cm.coolwarm
    plt.matshow(waffle_chart, cmap=colormap)
    plt.colorbar()
    # get the axis
    ax = plt.gca()
    # set minor ticks
    ax.set_xticks(np.arange(-.5, (width), 1), minor=True)
    ax.set_yticks(np.arange(-.5, (height), 1), minor=True)
    
    # add dridlines based on minor ticks
    ax.grid(which='minor', color='w', linestyle='-', linewidth=2)
    plt.xticks([])
    plt.yticks([])
    # compute cumulative sum of individual categories to match color schemes between chart and legend
    values_cumsum = np.cumsum(values)
    total_values = values_cumsum[len(values_cumsum) - 1]
    # create legend
    legend_handles = []
    for i, category in enumerate(categories):
        if value_sign == '%':
            label_str = category + ' (' + str(values[i]) + value_sign + ')'
        else:
            label_str = category + ' (' + value_sign + str(values[i]) + ')'
            
        color_val = colormap(float(values_cumsum[i])/total_values)
        legend_handles.append(mpatches.Patch(color=color_val, label=label_str))
    # add legend to chart
    plt.legend(
        handles=legend_handles,
        loc='lower center', 
        ncol=len(categories),
        bbox_to_anchor=(0., -0.2, 0.95, .1)
    )width = 40 # width of chart
height = 10 # height of chart
categories = df_dsn.index.values # categories
values = df_dsn['Total'] # correponding values of categories
colormap = plt.cm.coolwarm # color map class
# create waffle
create_waffle_chart(categories, values, height, width, colormap)wordcloud# import package and its set of stopwords
from wordcloud import WordCloud, STOPWORDS
# let's use the stopwords that we imported from `word_cloud`. We use the function *set* to remove any redundant stopwords.
stopwords = set(STOPWORDS) # Stopwords are list of words which are stop words, which has no meanings
# the word 'said' has no meaning
stopwords.add('said')
# Create a word cloud object and generate a word cloud
# instantiate a word cloud object
word_count = WordCloud(
    background_color='white',
    max_words=2000, #use only 2k words first
    stopwords=stopwords
)
# generate the word cloud
word_count.generate(a_txt_file_with_words)
# Visualize:
fig = plt.figure()
fig.set_figwidth(14) # set width
fig.set_figheight(18) # set height
# display the cloud
plt.imshow(word_count, interpolation='bilinear')
plt.axis('off')
plt.show()Open the image to a mask
a_mask = np.array(Image.open('image.png'))
Create wordcloud with the mask
word_count = WordCloud(
    background_color='white',
    max_words=2000, 
    stopwords=stopwords,
    mask=a_mask
)
word_count.generate(a_txt_file_with_words)
fig = plt.figure()
fig.set_figwidth(14) # set width
fig.set_figheight(18) # set height
plt.imshow(word_count, cmap=plt.cm.gray, interpolation='bilinear')
plt.axis('off')
plt.show()import seaborn as sns
ax = sns.regplot(x='year', y='total', data=df, color='red', marker='+')import folium
world_map = folium.Map()
# Map for Canada
canada_map = folium.Map(
	location = [56.130, -106.35],
	zoom_start = 4,
	tiles = 'Stamen Toner' # different map styles
	)# Map for Canada
canada_map = folium.Map(
	location = [56.130, -106.35],
	zoom_start = 4
	)
# create a feature group
ontario = folium.map.FeatureGroup()
ontario.add_child(
	folium.features.CircleMarker(
			[56.130, -85.32],
			radius = 5,
			color = 'red',
			fill_color = 'red'
		)
	)
canada_map.add_child(ontario)
# label the marker
folium.Marker([56.130, -85.32], popup='Ontario').add_to(canada_map){"type":"FeatureCollection","features":[
    {
        "type":"Feature",
        "properties":{"name":"Afghanistan"},
        "geometry":{"type":"Polygon","coordinates":[[[61.210817,35.650072],[61.210817,35.650072]]]},
        "id":"AFG"
    },
    {
        "type":"Feature",
        "properties":{"name":"Afghanistan"},
        "geometry" :{"type":"Polygon","coordinates":[[[],[]]],"id":"ALB" }
    }]
}# 1 Create a plain world map
world_map = folium.Map(
	zoom_start = 2
	tiles = 'Mapbox Bright' # different map styles
	)
# 2 load the geojson file
world_geo = r'world_countries.json'
world_map.choropleth(
	geo_path = world_geo,
	data = df_canada,
	columns = ['Country', 'Total'],
	key_on = 'feature.properties.name',
	fill_color = 'YlOrRd',
	legend_name = 'Immigration to Canada'
	)