import csv, os
import numpy as np
import pandas as pd
# print pd.__version__
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
data import
cb_data = pd.read_csv('data/b_styles_expanded.csv')
print cb_data.dtypes
cb_data.tail()
Unnamed: 0 int64
Unnamed: 0.1 int64
beer_style object
cheese object
Finish Length object
Phenols object
Alcohol object
Hops object
dessert object
Malt object
other_styles object
glass object
descrip_long object
Hop object
Body object
dish object
Carbonation (Visual) object
category object
Esters object
Attenuation object
temp object
Yeast object
Color object
srm object
Water object
abv object
Carbonation object
Clarity object
ibu object
Fermentation By-Products object
...
fruity float64
hoppy float64
malty float64
nutty float64
sour float64
spicy float64
abv_max float64
abv_min float64
ibu_max float64
ibu_min float64
srm_max float64
srm_min float64
temp_max float64
temp_min float64
temp_avg float64
temp_range float64
srm_avg float64
srm_range float64
abv_avg float64
abv_range float64
ibu_avg float64
ibu_range float64
Flute float64
Goblet float64
Nonic Pint float64
Snifter float64
Thistle float64
Tulip float64
Varies float64
Vase float64
Length: 66, dtype: object
Unnamed: 0 | Unnamed: 0.1 | beer_style | cheese | Finish Length | Phenols | Alcohol | Hops | dessert | Malt | ... | ibu_avg | ibu_range | Flute | Goblet | Nonic Pint | Snifter | Thistle | Tulip | Varies | Vase | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
72 | 72 | 72 | Session Beer | Varies | Varies | Can be present. | Not Detectable to Mild | Varies | Varies | Varies | ... | 20.0 | 20.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN |
73 | 73 | 73 | Smoke Beer | Parmesan | Varies | Can be present. | Varies | Varies | Gingerbread Cookies | Varies | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 |
74 | 74 | 74 | Smoke Porter | Red Dragon Cheddar | Medium to Long | Not common to style | Varies | Kent Goldings, Willamette | S'mores | Crystal, Chocolate, Black Patent | ... | 30.0 | 20.0 | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN |
75 | 75 | 75 | Specialty Beer | Varies | Varies | Can be present. | Varies | Varies | Varies | Varies | ... | 50.5 | 99.0 | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN |
76 | 76 | 76 | Vienna-Style Lager | Mild Cheeses | Short to Medium | Not common to style. | Mild | German Noble | Almond Biscotti | Vienna | ... | 25.0 | 6.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 66 columns
# cb_data.to_csv('data/cb_data.csv', sep=';')
style_category = cb_data[['beer_style', 'category', 'other_styles']]
style_category.tail()
beer_style | category | other_styles | |
---|---|---|---|
72 | Session Beer | Specialty Beers | [English-Style Bitter, Irish-Style Dry Stout, ... |
73 | Smoke Beer | Specialty Beers | [English-Style Old Ale, American Brett, Herb a... |
74 | Smoke Porter | Porters | [English-Style Old Ale, American Brett, Herb a... |
75 | Specialty Beer | Specialty Beers | [Belgian-Style Fruit Lambic, Herb and Spice Be... |
76 | Vienna-Style Lager | Dark Lagers | [English-Style Bitter, German-Style Bock, Belg... |
style_category.other_styles[0][1:-1].split(',')
['English-Style Pale Ale/ESB', ' English-Style Mild', ' American Amber Lager']
# style_category.other_styles.str.slice(1,-1)
style_cat_dict = cb_data[['beer_style', 'category', 'other_styles']].to_dict('records')
print style_cat_dict[:3]
print ''
for style in style_cat_dict:
style['other_styles'] = style['other_styles'][1:-1].split(', ')
if style['other_styles'] == ['']:
style['other_styles'] = []
print style_cat_dict[:3]
print ''
print style_cat_dict[61]['other_styles']
[{'category': 'Pale Ales', 'beer_style': 'American Amber Ale', 'other_styles': '[English-Style Pale Ale/ESB, English-Style Mild, American Amber Lager]'}, {'category': 'Dark Lagers', 'beer_style': 'American Amber Lager', 'other_styles': '[German-Style Marzen/Oktoberfest, Vienna-Style Lager, English-Style Mild]'}, {'category': 'Strong Ales', 'beer_style': 'American Barley Wine', 'other_styles': '[Imperial India Pale Ale, German-Style Doppelbock, Scotch Ale/Wee Heavy]'}]
[{'category': 'Pale Ales', 'beer_style': 'American Amber Ale', 'other_styles': ['English-Style Pale Ale/ESB', 'English-Style Mild', 'American Amber Lager']}, {'category': 'Dark Lagers', 'beer_style': 'American Amber Lager', 'other_styles': ['German-Style Marzen/Oktoberfest', 'Vienna-Style Lager', 'English-Style Mild']}, {'category': 'Strong Ales', 'beer_style': 'American Barley Wine', 'other_styles': ['Imperial India Pale Ale', 'German-Style Doppelbock', 'Scotch Ale/Wee Heavy']}]
[]
beer_list = cb_data.beer_style.tolist()
beer_list[-5:]
['Session Beer',
'Smoke Beer',
'Smoke Porter',
'Specialty Beer',
'Vienna-Style Lager']
# possible links across all styles
77*76/2
2926
cb_DiLinks = []
for style in style_cat_dict:
if len(style['other_styles']) > 0:
for other in style['other_styles']:
cb_DiLinks.append((style['beer_style'], other))
print len(cb_DiLinks)
cb_DiLinks[:10]
187
[('American Amber Ale', 'English-Style Pale Ale/ESB'),
('American Amber Ale', 'English-Style Mild'),
('American Amber Ale', 'American Amber Lager'),
('American Amber Lager', 'German-Style Marzen/Oktoberfest'),
('American Amber Lager', 'Vienna-Style Lager'),
('American Amber Lager', 'English-Style Mild'),
('American Barley Wine', 'Imperial India Pale Ale'),
('American Barley Wine', 'German-Style Doppelbock'),
('American Barley Wine', 'Scotch Ale/Wee Heavy'),
('American Black Ale', 'Robust Porter')]
import networkx as nx
# colors from our friends at http://colorbrewer2.org
COLORS = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462',
'#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f']
G_toy = nx.DiGraph()
G_toy.add_nodes_from(['Alice', 'Bob', 'Chuck', 'Dick', 'Edgar', 'Fred'])
G_toy.nodes()
['Dick', 'Alice', 'Edgar', 'Fred', 'Chuck', 'Bob']
G_toy.add_edges_from([('Alice', 'Bob'), ('Alice', 'Chuck'), ('Bob', 'Alice'), ('Bob', 'Chuck'),
('Chuck', 'Dick'), ('Alice', 'Dick'), ('Bob', 'Edgar'), ('Edgar', 'Bob'),
('Dick', 'Fred'), ('Fred' , 'Dick')
])
G_toy.edges()
[('Dick', 'Fred'),
('Alice', 'Bob'),
('Alice', 'Dick'),
('Alice', 'Chuck'),
('Edgar', 'Bob'),
('Fred', 'Dick'),
('Chuck', 'Dick'),
('Bob', 'Alice'),
('Bob', 'Edgar'),
('Bob', 'Chuck')]
nx.draw_circular(G_toy,
node_color=COLORS[0],
node_size=2000,
with_labels=True)
plt.axis('equal')
(-1.5, 1.5, -1.5, 1.5)
Directed Graph
G = nx.DiGraph()
G.add_nodes_from(beer_list)
print len(G.nodes())
G.nodes()[:3]
77
['German-Style Bock', 'German-Style Dunkelweizen', 'Belgian-Style Tripel']
G.add_edges_from(cb_DiLinks)
print len(G.edges())
G.edges()[:3]
186
[('German-Style Bock', 'Belgian-Style Dubbel'),
('German-Style Bock', 'American Amber Lager'),
('German-Style Dunkelweizen', 'English-Style Brown Ale')]
# why is the edge count lower than list len?
# must be a dupe
# https://stackoverflow.com/questions/9835762/find-and-list-duplicates-in-a-list
set([x for x in cb_DiLinks if cb_DiLinks.count(x) > 1])
{('Rye Beer', 'Herb and Spice Beer')}
PageRank
nx.pagerank(G, max_iter=1000)
{'American Amber Ale': 0.014582164068922921,
'American Amber Lager': 0.025391424521712974,
'American Barley Wine': 0.0020602941879801764,
'American Black Ale': 0.03524226870144604,
'American Brett': 0.02595062159582658,
'American Brown Ale': 0.018426950422189954,
'American Cream Ale': 0.0058283900725772265,
'American Imperial Porter': 0.003514312388015356,
'American Imperial Red Ale': 0.0029880065204832495,
'American Imperial Stout': 0.0020602941879801764,
'American India Pale Ale/IPA': 0.023727626131963907,
'American Pale Ale': 0.010514226095834273,
'American Sour': 0.05576906759108761,
'American Stout': 0.0020602941879801764,
'American Wheat': 0.02709132371840384,
'American-Style Wheat Wine Ale': 0.0028094467632845087,
'Baltic-Style Porter': 0.003274237915072287,
'Barrel-Aged Beer': 0.05209433396378796,
'Belgian-Style Blonde Ale': 0.0028563107818306375,
'Belgian-Style Dubbel': 0.059656549110350024,
'Belgian-Style Flanders': 0.0020602941879801764,
'Belgian-Style Fruit Lambic': 0.002644044296828293,
'Belgian-Style Golden Strong Ale': 0.009654698646410383,
'Belgian-Style Lambic/Gueuze': 0.0165306709842972,
'Belgian-Style Pale Ale': 0.021346693322413936,
'Belgian-Style Quadrupel': 0.006043600643162409,
'Belgian-Style Saison': 0.002644044296828293,
'Belgian-Style Tripel': 0.03326817397479022,
'Belgian-Style Wit': 0.03302477253969235,
'Berliner-Style Weisse': 0.004614446345485116,
'Blonde Ale': 0.006009608789874498,
'Bohemian-Style Pilsener': 0.011416945753781337,
'British-Style Barley Wine Ale': 0.0020602941879801764,
'California Common': 0.0020602941879801764,
'Chocolate Beer': 0.005131724883279979,
'Coffee Beer': 0.005265562714559706,
'English-Style Bitter': 0.011211835358211469,
'English-Style Brown Ale': 0.018996373601948575,
'English-Style Brown Porter': 0.03332469398697684,
'English-Style India Pale Ale/IPA': 0.012999718806943639,
'English-Style Mild': 0.018768276979798208,
'English-Style Oatmeal Stout': 0.0020602941879801764,
'English-Style Old Ale': 0.0095770010214148,
'English-Style Pale Ale/ESB': 0.009850525239380759,
'English-Style Sweet Stout (Milk Stout)': 0.015491522106287861,
'European-Style Export': 0.0020602941879801764,
'French-Style Biere de Garde': 0.0020602941879801764,
'Fruit and Field Beer': 0.05107155015442901,
'German-Style Bock': 0.016411283674223277,
'German-Style Brown/Altbier': 0.0020602941879801764,
'German-Style Doppelbock': 0.01492484922048327,
'German-Style Dunkel': 0.03378873176318905,
'German-Style Dunkelweizen': 0.0028563107818306375,
'German-Style Hefeweizen': 0.016976287891728965,
'German-Style Helles': 0.003807596085600591,
'German-Style Kolsch': 0.008609285690684169,
'German-Style Maibock': 0.01148588047023225,
'German-Style Marzen/Oktoberfest': 0.01293765353918299,
'German-Style Pilsener': 0.025135139197141255,
'German-Style Schwarzbier': 0.00431511378636796,
'German-Style Weizenbock': 0.002644044296828293,
'Gluten Free': 0.0020602941879801764,
'Herb and Spice Beer': 0.007018221684265235,
'Honey Beer': 0.006166855994626241,
'Imperial India Pale Ale': 0.012069630579080367,
'Irish-Style Dry Stout': 0.007957892512966488,
'Irish-Style Red': 0.0020602941879801764,
'Pumpkin Beer': 0.0020602941879801764,
'Robust Porter': 0.014723679453298067,
'Rye Beer': 0.0020602941879801764,
'Scotch Ale/Wee Heavy': 0.008993250912566685,
'Scottish-Style Ale': 0.013249163088915621,
'Session Beer': 0.005986237697278011,
'Smoke Beer': 0.0020602941879801764,
'Smoke Porter': 0.0020602941879801764,
'Specialty Beer': 0.0020602941879801764,
'Vienna-Style Lager': 0.026314145674263253}
import operator
x = nx.pagerank(G, max_iter=1000)
sorted_x = sorted(x.items(), key=operator.itemgetter(1))
sorted_x[-5:]
[('American Black Ale', 0.03524226870144604),
('Fruit and Field Beer', 0.05107155015442901),
('Barrel-Aged Beer', 0.05209433396378796),
('American Sour', 0.05576906759108761),
('Belgian-Style Dubbel', 0.059656549110350024)]
nodelist = ['Belgian-Style Dubbel']
for n in G.neighbors('Belgian-Style Dubbel'):
nodelist.append(n)
nodelist.extend(G.neighbors(n))
nodelist = list(set(nodelist))
nodelist
['Barrel-Aged Beer',
'Belgian-Style Dubbel',
'Belgian-Style Lambic/Gueuze',
'American Brown Ale',
'German-Style Dunkel',
'Fruit and Field Beer',
'American Sour',
'English-Style Brown Porter']
Have isolated nodes, but now need their edges too
nx.draw_circular(G,
nodelist=nodelist,
node_color=COLORS[0],
node_size=2000,
with_labels=True)
plt.axis('equal')
(-1.5, 1.5, -1.5, 1.5)
nx.draw_networkx_nodes(G, pos=nx.circular_layout(G), nodelist=nodelist,
node_color=COLORS[0],
node_size=2000,
with_labels=True)
plt.axis('equal')
(-1.5, 1.5, -1.5, 1.5)
G.predecessors('Belgian-Style Dubbel')
['German-Style Bock',
'English-Style India Pale Ale/IPA',
'English-Style Brown Ale',
'Irish-Style Red',
'German-Style Dunkel',
'German-Style Schwarzbier',
'German-Style Brown/Altbier',
'English-Style Brown Porter',
'American Imperial Porter',
'Vienna-Style Lager',
'Irish-Style Dry Stout',
'Scottish-Style Ale']
for link in cb_DiLinks:
if link[0] == 'Belgian-Style Dubbel':
print link
('Belgian-Style Dubbel', 'English-Style Brown Porter')
('Belgian-Style Dubbel', 'German-Style Dunkel')
('Belgian-Style Dubbel', 'Fruit and Field Beer')
features and sorting
# def parent_cnt(G, style):
# '''
# Count of networkx predecessors.
# Parameters
# ----------
# G : netorkx graph
# sytle : beer style node
# Returns
# -------
# int
# '''
# return len(G.predecessors(style))
# parent_cnt(G, 'Belgian-Style Dubbel')
###
def grandpappy_cnt(G, style, unique=False):
'''
Count of networkx predecessors' predecessors, i.e., grandparents two levels up.
Parameters
----------
G : netorkx graph
sytle : beer style node
unique : whether to count unique grandparent styles; default==False
Returns
-------
int
'''
gp_cnt = 0
if unique:
gp_list = []
for parent in G.predecessors(style):
gp_list.extend(G.predecessors(parent))
gp_cnt = len(set(gp_list))
else:
for parent in G.predecessors(style):
gp_cnt += len(G.predecessors(parent))
return gp_cnt
grandpappy_cnt(G, 'Belgian-Style Dubbel', unique=True)
27
len(G.neighbors('Belgian-Style Dubbel'))
3
[(pred) for pred in G.predecessors('American Black Ale')]
['Robust Porter',
'Imperial India Pale Ale',
'American India Pale Ale/IPA',
'English-Style Sweet Stout (Milk Stout)']
pr_dict = {x[0]:x[1] for x in sorted_x}
# https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
# flat_list = [item for sublist in l for item in sublist]
l=[G.predecessors(pred) for pred in G.predecessors('American Sour')]
np.mean([pr_dict[pred] for pred in set([item for sublist in l for item in sublist])])
0.022757348312791126
feat_list = []
for k in G.nodes_iter():
temp_dict = {}
parents = len(G.predecessors(k))
parent_avg = np.mean([pr_dict[pred] for pred in G.predecessors(k)])
total_parent_links = np.sum([len(G.successors(pred)) for pred in G.predecessors(k)])
share_of_parent_links = float(parents)/total_parent_links
grandparents = [G.predecessors(pred) for pred in G.predecessors(k)]
grandparent_avg = np.mean([pr_dict[pred] for pred in set([item for sublist in grandparents for item in sublist])])
children = len(G.successors(k))
if children > 0:
ratio = float(parents)/children
else:
ratio = np.nan
temp_dict['grandparents'], temp_dict['grandparent_PR_avg'],\
temp_dict['parents'], temp_dict['parent_PR_avg'], temp_dict['share_of_parent_links'],\
temp_dict['style'], temp_dict['children'], temp_dict['ratio'],\
temp_dict['PageRank'] = grandpappy_cnt(G, k, unique=True), grandparent_avg,\
parents, parent_avg, share_of_parent_links,\
k, children, ratio, pr_dict[k]
feat_list.append(temp_dict)
df_feat = pd.DataFrame(feat_list, columns=['style', 'PageRank', 'children',\
'parents', 'parent_PR_avg', 'share_of_parent_links',\
'grandparents', 'grandparent_PR_avg']).sort_values('PageRank', ascending=False)
# df_feat = df_feat.sort_values('page_rank', ascending=False)
df_feat.head(10)
C:\Users\rstancut\AppData\Local\Continuum\Anaconda2\lib\site-packages\ipykernel\__main__.py:8: RuntimeWarning: invalid value encountered in double_scalars
style | PageRank | children | parents | parent_PR_avg | share_of_parent_links | grandparents | grandparent_PR_avg | |
---|---|---|---|---|---|---|---|---|
39 | Belgian-Style Dubbel | 0.059657 | 3 | 12 | 0.014583 | 0.375000 | 27 | 0.014047 |
71 | American Sour | 0.055769 | 3 | 6 | 0.027033 | 0.375000 | 17 | 0.022757 |
59 | Barrel-Aged Beer | 0.052094 | 2 | 5 | 0.033770 | 0.357143 | 22 | 0.018776 |
25 | Fruit and Field Beer | 0.051072 | 3 | 7 | 0.022776 | 0.350000 | 24 | 0.014908 |
5 | American Black Ale | 0.035242 | 3 | 4 | 0.016503 | 0.571429 | 12 | 0.011367 |
8 | German-Style Dunkel | 0.033789 | 2 | 3 | 0.037326 | 0.333333 | 16 | 0.016987 |
19 | English-Style Brown Porter | 0.033325 | 3 | 2 | 0.046723 | 0.400000 | 13 | 0.018050 |
2 | Belgian-Style Tripel | 0.033268 | 3 | 5 | 0.016726 | 0.384615 | 15 | 0.013445 |
63 | Belgian-Style Wit | 0.033025 | 3 | 5 | 0.018597 | 0.416667 | 18 | 0.013387 |
38 | American Wheat | 0.027091 | 2 | 5 | 0.016703 | 0.357143 | 10 | 0.022229 |
df_feat.sort_values('parents', ascending=False).head(5)
style | PageRank | children | parents | parent_PR_avg | share_of_parent_links | grandparents | grandparent_PR_avg | |
---|---|---|---|---|---|---|---|---|
39 | Belgian-Style Dubbel | 0.059657 | 3 | 12 | 0.014583 | 0.375000 | 27 | 0.014047 |
29 | Vienna-Style Lager | 0.026314 | 3 | 8 | 0.010020 | 0.380952 | 15 | 0.009974 |
25 | Fruit and Field Beer | 0.051072 | 3 | 7 | 0.022776 | 0.350000 | 24 | 0.014908 |
71 | American Sour | 0.055769 | 3 | 6 | 0.027033 | 0.375000 | 17 | 0.022757 |
15 | Herb and Spice Beer | 0.007018 | 2 | 6 | 0.002745 | 0.352941 | 2 | 0.004831 |
df_feat.sort_values('children', ascending=False).head(5)
style | PageRank | children | parents | parent_PR_avg | share_of_parent_links | grandparents | grandparent_PR_avg | |
---|---|---|---|---|---|---|---|---|
39 | Belgian-Style Dubbel | 0.059657 | 3 | 12 | 0.014583 | 0.375000 | 27 | 0.014047 |
17 | Smoke Porter | 0.002060 | 3 | 0 | NaN | NaN | 0 | NaN |
52 | American Imperial Porter | 0.003514 | 3 | 1 | 0.005132 | 0.333333 | 3 | 0.003613 |
20 | Scottish-Style Ale | 0.013249 | 3 | 3 | 0.013163 | 0.333333 | 9 | 0.011831 |
7 | English-Style India Pale Ale/IPA | 0.013000 | 3 | 3 | 0.012871 | 0.333333 | 5 | 0.017641 |
# df_feat.sort_values('ratio', ascending=False).head(5)
# df_feat.sort_values('ratio').head(5)
df_feat['style'].tolist()[:5]
['Belgian-Style Dubbel',
'American Sour',
'Barrel-Aged Beer',
'Fruit and Field Beer',
'American Black Ale']
for style in df_feat['style'].tolist()[:5]:
print grandpappy_cnt(G, style, unique=True)
27
17
22
24
12
sub graphing
cb_BSD_parents = []
for link in cb_DiLinks:
if link[1] == 'Belgian-Style Dubbel':
cb_BSD_parents.append(link)
cb_BSD_parents
[('American Imperial Porter', 'Belgian-Style Dubbel'),
('English-Style Brown Ale', 'Belgian-Style Dubbel'),
('English-Style Brown Porter', 'Belgian-Style Dubbel'),
('English-Style India Pale Ale/IPA', 'Belgian-Style Dubbel'),
('German-Style Bock', 'Belgian-Style Dubbel'),
('German-Style Brown/Altbier', 'Belgian-Style Dubbel'),
('German-Style Dunkel', 'Belgian-Style Dubbel'),
('German-Style Schwarzbier', 'Belgian-Style Dubbel'),
('Irish-Style Dry Stout', 'Belgian-Style Dubbel'),
('Irish-Style Red', 'Belgian-Style Dubbel'),
('Scottish-Style Ale', 'Belgian-Style Dubbel'),
('Vienna-Style Lager', 'Belgian-Style Dubbel')]
cb_BSD_nodes = ['Belgian-Style Dubbel']
cb_BSD_nodes.extend([x[0] for x in cb_BSD_parents])
cb_BSD_nodes
['Belgian-Style Dubbel',
'American Imperial Porter',
'English-Style Brown Ale',
'English-Style Brown Porter',
'English-Style India Pale Ale/IPA',
'German-Style Bock',
'German-Style Brown/Altbier',
'German-Style Dunkel',
'German-Style Schwarzbier',
'Irish-Style Dry Stout',
'Irish-Style Red',
'Scottish-Style Ale',
'Vienna-Style Lager']
G_bsd = nx.DiGraph()
G_bsd.add_nodes_from(cb_BSD_nodes)
G_bsd.add_edges_from(cb_BSD_parents)
nx.draw_circular(G_bsd,
node_color=COLORS[0],
node_size=2000,
with_labels=True)
plt.axis('equal')
(-1.5, 1.5, -1.5, 1.5)
nx.draw_spring(G_bsd,
node_color=COLORS[0],
node_size=2000,
with_labels=True)
plt.figure(figsize=(12,12))
nx.draw_spring(G_bsd,
node_color=range(len(cb_BSD_nodes)),
node_size=2000,
cmap=plt.cm.BrBG,
with_labels=True)
grandparents
cb_BSD_parents
[('American Imperial Porter', 'Belgian-Style Dubbel'),
('English-Style Brown Ale', 'Belgian-Style Dubbel'),
('English-Style Brown Porter', 'Belgian-Style Dubbel'),
('English-Style India Pale Ale/IPA', 'Belgian-Style Dubbel'),
('German-Style Bock', 'Belgian-Style Dubbel'),
('German-Style Brown/Altbier', 'Belgian-Style Dubbel'),
('German-Style Dunkel', 'Belgian-Style Dubbel'),
('German-Style Schwarzbier', 'Belgian-Style Dubbel'),
('Irish-Style Dry Stout', 'Belgian-Style Dubbel'),
('Irish-Style Red', 'Belgian-Style Dubbel'),
('Scottish-Style Ale', 'Belgian-Style Dubbel'),
('Vienna-Style Lager', 'Belgian-Style Dubbel')]
G.predecessors('American Imperial Porter')
['Chocolate Beer']
cb_BSD_grandparents = []
for parent in cb_BSD_parents:
# print G.predecessors(parent[0])
for link in cb_DiLinks:
if link[1] == parent[0]:
cb_BSD_grandparents.append(link)
cb_BSD_grandparents
[('Chocolate Beer', 'American Imperial Porter'),
('Belgian-Style Pale Ale', 'English-Style Brown Ale'),
('German-Style Dunkelweizen', 'English-Style Brown Ale'),
('German-Style Marzen/Oktoberfest', 'English-Style Brown Ale'),
('Irish-Style Dry Stout', 'English-Style Brown Ale'),
('Scottish-Style Ale', 'English-Style Brown Ale'),
('Belgian-Style Dubbel', 'English-Style Brown Porter'),
('German-Style Dunkel', 'English-Style Brown Porter'),
('Bohemian-Style Pilsener', 'English-Style India Pale Ale/IPA'),
('California Common', 'English-Style India Pale Ale/IPA'),
('German-Style Pilsener', 'English-Style India Pale Ale/IPA'),
('Belgian-Style Golden Strong Ale', 'German-Style Bock'),
('English-Style Pale Ale/ESB', 'German-Style Bock'),
('Vienna-Style Lager', 'German-Style Bock'),
('Belgian-Style Dubbel', 'German-Style Dunkel'),
('English-Style Brown Ale', 'German-Style Dunkel'),
('English-Style Brown Porter', 'German-Style Dunkel'),
('Irish-Style Dry Stout', 'German-Style Schwarzbier'),
('American Stout', 'Irish-Style Dry Stout'),
('Coffee Beer', 'Irish-Style Dry Stout'),
('German-Style Schwarzbier', 'Irish-Style Dry Stout'),
('Session Beer', 'Irish-Style Dry Stout'),
('American Brown Ale', 'Scottish-Style Ale'),
('English-Style Bitter', 'Scottish-Style Ale'),
('English-Style Pale Ale/ESB', 'Scottish-Style Ale'),
('American Amber Lager', 'Vienna-Style Lager'),
('American Brown Ale', 'Vienna-Style Lager'),
('American Cream Ale', 'Vienna-Style Lager'),
('American Imperial Red Ale', 'Vienna-Style Lager'),
('Belgian-Style Flanders', 'Vienna-Style Lager'),
('Belgian-Style Pale Ale', 'Vienna-Style Lager'),
('California Common', 'Vienna-Style Lager'),
('French-Style Biere de Garde', 'Vienna-Style Lager')]
cb_BSD_nodes2 = list(cb_BSD_nodes)
cb_BSD_nodes2.extend([x[0] for x in cb_BSD_grandparents])
cb_BSD_nodes2 = set(cb_BSD_nodes2)
cb_BSD_nodes2 = list(cb_BSD_nodes2)
cb_BSD_nodes2
['German-Style Bock',
'German-Style Dunkelweizen',
'Scottish-Style Ale',
'English-Style India Pale Ale/IPA',
'German-Style Dunkel',
'Bohemian-Style Pilsener',
'California Common',
'Session Beer',
'Chocolate Beer',
'American Amber Lager',
'Irish-Style Dry Stout',
'German-Style Pilsener',
'English-Style Bitter',
'English-Style Brown Porter',
'French-Style Biere de Garde',
'Belgian-Style Pale Ale',
'Vienna-Style Lager',
'German-Style Marzen/Oktoberfest',
'Belgian-Style Dubbel',
'English-Style Brown Ale',
'American Cream Ale',
'American Brown Ale',
'German-Style Schwarzbier',
'American Imperial Porter',
'Coffee Beer',
'Belgian-Style Flanders',
'Belgian-Style Golden Strong Ale',
'Irish-Style Red',
'German-Style Brown/Altbier',
'English-Style Pale Ale/ESB',
'American Stout',
'American Imperial Red Ale']
G_bsd2 = nx.DiGraph()
G_bsd2.add_nodes_from(cb_BSD_nodes2)
G_bsd2.add_edges_from(cb_BSD_parents)
G_bsd2.add_edges_from(cb_BSD_grandparents)
plt.figure(figsize=(16,16))
nx.draw_spring(G_bsd2,
node_color=range(len(cb_BSD_nodes2)),
node_size=2000,
cmap=plt.cm.BrBG,
with_labels=True)
plt.figure(figsize=(16,16))
nx.draw_circular(G_bsd2,
node_color=range(len(cb_BSD_nodes2)),
node_size=2000,
cmap=plt.cm.BrBG,
with_labels=True)
plt.figure(figsize=(16,16))
nx.draw_random(G_bsd2,
node_color=range(len(cb_BSD_nodes2)),
node_size=2000,
cmap=plt.cm.BrBG,
with_labels=True)
plt.figure(figsize=(16,16))
nx.draw_spectral(G_bsd2,
node_color=range(len(cb_BSD_nodes2)),
node_size=2000,
cmap=plt.cm.BrBG,
with_labels=True)
let’s try one more time with color, size/pagerank, dict
BSD_grandpappies = pd.read_excel('cb_data.xlsx', sheetname='node_size_color')
BSD_grandpappies
node | PageRank | srm_avg | hex | |
---|---|---|---|---|
0 | Session Beer | 0.005986 | 2.50 | #F8F753 |
1 | German-Style Pilsener | 0.025135 | 3.50 | #F6F513 |
2 | American Cream Ale | 0.005828 | 3.50 | #F6F513 |
3 | Bohemian-Style Pilsener | 0.011417 | 5.00 | #ECE61A |
4 | English-Style Bitter | 0.011212 | 8.50 | #BF923B |
5 | Belgian-Style Pale Ale | 0.021347 | 9.00 | #BF923B |
6 | German-Style Marzen/Oktoberfest | 0.012938 | 9.50 | #BF923B |
7 | English-Style India Pale Ale/IPA | 0.013000 | 10.00 | #BF813A |
8 | American Amber Lager | 0.025391 | 10.00 | #BF813A |
9 | English-Style Pale Ale/ESB | 0.009851 | 10.75 | #BF813A |
10 | California Common | 0.002060 | 11.50 | #BF813A |
11 | French-Style Biere de Garde | 0.002060 | 11.50 | #BF813A |
12 | Scottish-Style Ale | 0.013249 | 12.50 | #BF813A |
13 | American Imperial Red Ale | 0.002988 | 13.00 | #BC6733 |
14 | Vienna-Style Lager | 0.026314 | 14.00 | #BC6733 |
15 | Irish-Style Red | 0.002060 | 14.50 | #BC6733 |
16 | German-Style Brown/Altbier | 0.002060 | 15.00 | #BC6733 |
17 | German-Style Dunkelweizen | 0.002856 | 17.50 | #8D4C32 |
18 | German-Style Dunkel | 0.033789 | 17.50 | #8D4C32 |
19 | English-Style Brown Ale | 0.018996 | 18.50 | #8D4C32 |
20 | Belgian-Style Flanders | 0.002060 | 18.50 | #8D4C32 |
21 | American Brown Ale | 0.018427 | 20.50 | #5D341A |
22 | Belgian-Style Golden Strong Ale | 0.009655 | 22.00 | #5D341A |
23 | German-Style Bock | 0.016411 | 25.00 | #261716 |
24 | Belgian-Style Dubbel | 0.059657 | 26.00 | #261716 |
25 | Coffee Beer | 0.005266 | 27.00 | #261716 |
26 | English-Style Brown Porter | 0.033325 | 27.50 | #261716 |
27 | German-Style Schwarzbier | 0.004315 | 27.50 | #261716 |
28 | Chocolate Beer | 0.005132 | 32.50 | #0F0B0A |
29 | Irish-Style Dry Stout | 0.007958 | 37.50 | #080707 |
30 | American Imperial Porter | 0.003514 | 39.50 | #080707 |
31 | American Stout | 0.002060 | 39.50 | #080707 |
ns = BSD_grandpappies.PageRank * 100000
plt.figure(figsize=(12,12))
nx.draw_spring(G_bsd2,
nodelist=BSD_grandpappies.node.tolist(),
node_color = BSD_grandpappies.hex.tolist(),
node_size=ns.tolist(),
font_color = 'g',
with_labels=True)