import numpy as np
import pandas as pd
import csv
import codecs
from bs4 import BeautifulSoup
import re
import os
import socket
import urllib
import time
import requests
gtrend1 = pd.read_csv('timelines_brownTruckJoin/multiTimeline (1).csv', skiprows=2, index_col=0)
gtrend1.columns = [col.split(': ')[0].lower().replace(' ', '_') for col in gtrend1.columns]
gtrend1.head()
millersburg_brewing bells_brewery rubicon_brewing_company_pub the_bruery brown_truck_brewery
Day
2016-07-01 12 12 0 37 0
2016-07-02 0 29 0 14 14
2016-07-03 0 15 0 25 0
2016-07-04 0 14 0 33 0
2016-07-05 0 13 0 38 0
gtrend2 = pd.read_csv('timelines_brownTruckJoin/multiTimeline (2).csv', skiprows=2, index_col=0)
gtrend2.columns = [col.split(': ')[0].lower().replace(' ', '_') for col in gtrend2.columns]
gtrend2.head()
brown_truck_brewery crank_arm_brewing el_segundo_brewing iowa_brewing figueroa_mountain_brewing
Day
2016-07-01 0 0 31 31 0
2016-07-02 36 0 0 0 36
2016-07-03 0 0 74 37 0
2016-07-04 0 0 35 71 0
2016-07-05 0 0 0 0 0
def import_gtrend(file_path):
    gtrend = pd.read_csv(file_path, skiprows=2, index_col=0)
    gtrend.columns = [col.split(': ')[0].lower().replace(' ', '_') for col in gtrend.columns]
    return gtrend

import_gtrend('timelines_brownTruckJoin/multiTimeline (3).csv').head()
brown_truck_brewery bns_brewing_&_distilling vintage_brewing smog_city_brewing lynnwood_brewing_concern
Day
2016-07-01 0 0 0 0 30
2016-07-02 34 0 34 0 0
2016-07-03 0 0 0 0 35
2016-07-04 34 0 0 34 0
2016-07-05 0 0 60 0 0
pd.concat([gtrend1.brown_truck_brewery, gtrend2.brown_truck_brewery, 
           np.isfinite(gtrend1.brown_truck_brewery/gtrend2.brown_truck_brewery)
          ], axis=1).head()
brown_truck_brewery brown_truck_brewery brown_truck_brewery
Day
2016-07-01 0 0 False
2016-07-02 14 36 True
2016-07-03 0 0 False
2016-07-04 0 0 False
2016-07-05 0 0 False
np.mean((gtrend1.brown_truck_brewery/gtrend2.brown_truck_brewery)[np.isfinite(gtrend1.brown_truck_brewery/gtrend2.brown_truck_brewery)])
0.4035917946776314
def gtrend_norm(df_1, df_2, col='brown_truck_brewery'):
    norm_mult = np.mean((df_1.brown_truck_brewery/df_2.brown_truck_brewery)[np.isfinite(df_1.brown_truck_brewery/df_2.brown_truck_brewery)])
    return norm_mult

gtrend_norm(gtrend1, gtrend2)
0.4035917946776314
def df_norm(df_2, norm_mult):
    return df_2.iloc[:,1:] * norm_mult

df_norm(gtrend2, 0.4036).head()
crank_arm_brewing el_segundo_brewing iowa_brewing figueroa_mountain_brewing
Day
2016-07-01 0.0 12.5116 12.5116 0.0000
2016-07-02 0.0 0.0000 0.0000 14.5296
2016-07-03 0.0 29.8664 14.9332 0.0000
2016-07-04 0.0 14.1260 28.6556 0.0000
2016-07-05 0.0 0.0000 0.0000 0.0000
gtrend1.join(gtrend2.iloc[:,1:] * gtrend_norm(gtrend1, gtrend2)).head()
millersburg_brewing bells_brewery rubicon_brewing_company_pub the_bruery brown_truck_brewery crank_arm_brewing el_segundo_brewing iowa_brewing figueroa_mountain_brewing
Day
2016-07-01 12 12 0 37 0 0.0 12.511346 12.511346 0.000000
2016-07-02 0 29 0 14 14 0.0 0.000000 0.000000 14.529305
2016-07-03 0 15 0 25 0 0.0 29.865793 14.932896 0.000000
2016-07-04 0 14 0 33 0 0.0 14.125713 28.655017 0.000000
2016-07-05 0 13 0 38 0 0.0 0.000000 0.000000 0.000000
os.listdir('timelines_brownTruckJoin/')
['multiTimeline (1).csv',
 'multiTimeline (10).csv',
 'multiTimeline (11).csv',
 'multiTimeline (12).csv',
 'multiTimeline (13).csv',
 'multiTimeline (14).csv',
 'multiTimeline (15).csv',
 'multiTimeline (16).csv',
 'multiTimeline (17).csv',
 'multiTimeline (18).csv',
 'multiTimeline (19).csv',
 'multiTimeline (2).csv',
 'multiTimeline (20).csv',
 'multiTimeline (21).csv',
 'multiTimeline (22).csv',
 'multiTimeline (23).csv',
 'multiTimeline (24).csv',
 'multiTimeline (25).csv',
 'multiTimeline (26).csv',
 'multiTimeline (27).csv',
 'multiTimeline (28).csv',
 'multiTimeline (29).csv',
 'multiTimeline (3).csv',
 'multiTimeline (30).csv',
 'multiTimeline (31).csv',
 'multiTimeline (32).csv',
 'multiTimeline (33).csv',
 'multiTimeline (34).csv',
 'multiTimeline (35).csv',
 'multiTimeline (36).csv',
 'multiTimeline (37).csv',
 'multiTimeline (38).csv',
 'multiTimeline (39).csv',
 'multiTimeline (4).csv',
 'multiTimeline (40).csv',
 'multiTimeline (41).csv',
 'multiTimeline (42).csv',
 'multiTimeline (43).csv',
 'multiTimeline (44).csv',
 'multiTimeline (45).csv',
 'multiTimeline (46).csv',
 'multiTimeline (47).csv',
 'multiTimeline (48).csv',
 'multiTimeline (49).csv',
 'multiTimeline (5).csv',
 'multiTimeline (50).csv',
 'multiTimeline (51).csv',
 'multiTimeline (52).csv',
 'multiTimeline (53).csv',
 'multiTimeline (54).csv',
 'multiTimeline (55).csv',
 'multiTimeline (56).csv',
 'multiTimeline (57).csv',
 'multiTimeline (58).csv',
 'multiTimeline (59).csv',
 'multiTimeline (6).csv',
 'multiTimeline (60).csv',
 'multiTimeline (61).csv',
 'multiTimeline (62).csv',
 'multiTimeline (63).csv',
 'multiTimeline (7).csv',
 'multiTimeline (8).csv',
 'multiTimeline (9).csv']

let’s put it together

  • load files in directory
  • hold first file as ‘node’
  • cycle through other files
  • match on common term/brewery
  • normalize the cycled file
  • join to ‘node’
  • suffix in case of dupes; not all bad, will allow for comparisons (dupe columns should match/be close on values)
  • repeat for remaining files
  • return wide df
dir_trends = 'timelines_brownTruckJoin/'
node_file = os.listdir(dir_trends)[0]

def gtrend_join(file_dir = dir_trends, node_file = node_file):
    
    node_file = import_gtrend(file_dir + node_file)
    
    for i in os.listdir(file_dir)[1:]:
        
        temp_file = import_gtrend(file_dir + i)
        node_file = node_file.join(df_norm(temp_file, gtrend_norm(node_file, temp_file)),
                                   lsuffix = '', rsuffix = '_y')
#         node_file = pd.merge(node_file, df_norm(temp_file, gtrend_norm(node_file, temp_file)), 
#                              left_index=True, right_index=True, suffixes=('', '_y'))
    
    return node_file


combined_trends = gtrend_join()
print combined_trends.shape
combined_trends.head()
(123, 252)
millersburg_brewing bells_brewery rubicon_brewing_company_pub the_bruery brown_truck_brewery great_notion_brewing max_lagers_wood 10_barrel_brewing boise_brewing societe_brewing ... coors_brewing craft_brew_alliance_y carver_brewing aeronaut_brewing pelican_brewing ska_brewing bootstrap_brewing central_coast_brewing maplewood_brewery karl_strauss_brewing_y
Day
2016-07-01 12 12 0 37 0 20.007564 0.0 9.733410 9.733410 9.842289 ... 0.000000 9.26641 9.774888 14.662332 9.774888 24.881533 0.0 0.000000 0.000000 9.839457
2016-07-02 0 29 0 14 14 0.000000 0.0 30.281719 11.355645 11.482671 ... 0.000000 0.00000 0.000000 0.000000 11.107827 11.552140 0.0 0.000000 0.000000 11.181201
2016-07-03 0 15 0 25 0 11.896390 0.0 35.689169 11.896390 0.000000 ... 0.000000 0.00000 0.000000 0.000000 11.552140 11.552140 0.0 11.628449 0.000000 11.628449
2016-07-04 0 14 0 33 0 0.000000 0.0 22.711289 11.355645 0.000000 ... 0.000000 0.00000 0.000000 22.659967 11.107827 11.107827 0.0 0.000000 11.181201 11.181201
2016-07-05 0 13 0 38 0 10.274155 0.0 25.415014 10.274155 10.252385 ... 18.918921 0.00000 0.000000 0.000000 10.219201 15.106645 0.0 0.000000 0.000000 0.000000

5 rows × 252 columns

from collections import Counter

cnt = Counter()

for i in os.listdir(dir_trends):
    temp_file = import_gtrend(dir_trends + i)
    for col in temp_file.columns:
        cnt[col] += 1
        
[brw for brw in cnt if cnt[brw] > 1]
['craft_brew_alliance',
 'figueroa_mountain_brewing',
 'ram/big_horn_brewery',
 'lynnwood_brewing_concern',
 'echo_brewing',
 'bjs_restaurant_&_brewery',
 'devils_backbone_brewing',
 'brown_truck_brewery',
 'karl_strauss_brewing']
none_dupes = [brw for brw in combined_trends.columns.tolist() if '_y' not in brw]
combined_trends = combined_trends[none_dupes]
print combined_trends.shape
combined_trends.head()
(123, 242)
millersburg_brewing bells_brewery rubicon_brewing_company_pub the_bruery brown_truck_brewery great_notion_brewing max_lagers_wood 10_barrel_brewing boise_brewing societe_brewing ... riip_beer knee_deep_brewing coors_brewing carver_brewing aeronaut_brewing pelican_brewing ska_brewing bootstrap_brewing central_coast_brewing maplewood_brewery
Day
2016-07-01 12 12 0 37 0 20.007564 0.0 9.733410 9.733410 9.842289 ... 0.0 13.899615 0.000000 9.774888 14.662332 9.774888 24.881533 0.0 0.000000 0.000000
2016-07-02 0 29 0 14 14 0.000000 0.0 30.281719 11.355645 11.482671 ... 0.0 21.621624 0.000000 0.000000 0.000000 11.107827 11.552140 0.0 0.000000 0.000000
2016-07-03 0 15 0 25 0 11.896390 0.0 35.689169 11.896390 0.000000 ... 0.0 0.000000 0.000000 0.000000 0.000000 11.552140 11.552140 0.0 11.628449 0.000000
2016-07-04 0 14 0 33 0 0.000000 0.0 22.711289 11.355645 0.000000 ... 0.0 0.000000 0.000000 0.000000 22.659967 11.107827 11.107827 0.0 0.000000 11.181201
2016-07-05 0 13 0 38 0 10.274155 0.0 25.415014 10.274155 10.252385 ... 0.0 0.000000 18.918921 0.000000 0.000000 10.219201 15.106645 0.0 0.000000 0.000000

5 rows × 242 columns

# combined_trends.to_csv('timelines_combined/combined_trends.csv', sep=';')

look at trend diff for date split

  • pre.period <- as.Date(c(“2016-07-01”, “2016-10-05”))
  • post.period <- as.Date(c(“2016-10-09”, “2016-10-31”))
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
pre_gabf16 = combined_trends.index <= '2016-10-05'
post_gabf16 = combined_trends.index >= '2016-10-09'
pre_post_avgs = combined_trends[pre_gabf16].mean().to_frame(name='pre_gabf16').join(combined_trends[post_gabf16].mean().to_frame(name='post_gabf16'))
pre_post_avgs.head()
pre_gabf16 post_gabf16
millersburg_brewing 2.525773 2.086957
bells_brewery 18.896907 15.521739
rubicon_brewing_company_pub 0.000000 0.000000
the_bruery 28.855670 33.478261
brown_truck_brewery 3.278351 9.478261
pre_post_avgs['dif_gabf16_norm'] = pre_post_avgs.post_gabf16/(pre_post_avgs.pre_gabf16 + 0.000001)
pre_post_avgs.head()
pre_gabf16 post_gabf16 dif_gabf16_norm
millersburg_brewing 2.525773 2.086957 0.826264
bells_brewery 18.896907 15.521739 0.821390
rubicon_brewing_company_pub 0.000000 0.000000 0.000000
the_bruery 28.855670 33.478261 1.160197
brown_truck_brewery 3.278351 9.478261 2.891167
pre_post_avgs.dif_gabf16_norm.describe()
count    242.000000
mean       0.893139
std        0.837155
min        0.000000
25%        0.432029
50%        0.843814
75%        1.115964
max        5.504798
Name: dif_gabf16_norm, dtype: float64
plt.figure(figsize=(12, 6))
plt.hist(pre_post_avgs.dif_gabf16_norm)
plt.title('Search Engagement Ratio - Post GABF Medal (2016)')
plt.ylabel('Brewery Count')
<matplotlib.text.Text at 0xb8c8048>

plt.boxplot(pre_post_avgs.dif_gabf16_norm)
{'boxes': [<matplotlib.lines.Line2D at 0xbb8cd30>],
 'caps': [<matplotlib.lines.Line2D at 0xbba09b0>,
  <matplotlib.lines.Line2D at 0xbba0f28>],
 'fliers': [<matplotlib.lines.Line2D at 0xbbeea58>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0xbbee4e0>],
 'whiskers': [<matplotlib.lines.Line2D at 0xbb8ce10>,
  <matplotlib.lines.Line2D at 0xbba0438>]}

time series of non-zero averages

# 1/6 of breweries have NO Google search data
np.sum(combined_trends.mean(axis=0) == 0.0)
40
mean_idx_trends = combined_trends.divide(combined_trends.mean(axis=0))
mean_idx_trends.head()
millersburg_brewing bells_brewery rubicon_brewing_company_pub the_bruery brown_truck_brewery great_notion_brewing max_lagers_wood 10_barrel_brewing boise_brewing societe_brewing ... riip_beer knee_deep_brewing coors_brewing carver_brewing aeronaut_brewing pelican_brewing ska_brewing bootstrap_brewing central_coast_brewing maplewood_brewery
Day
2016-07-01 4.641509 0.656876 NaN 1.239042 0.000000 2.913572 NaN 0.447997 1.112004 1.101082 ... 0.0 1.673469 0.000000 4.609881 1.706179 1.210197 2.085377 0.0 0.000000 0.0000
2016-07-02 0.000000 1.587450 NaN 0.468827 3.015762 0.000000 NaN 1.393768 1.297338 1.284595 ... 0.0 2.603175 0.000000 0.000000 0.000000 1.375224 0.968211 0.0 0.000000 0.0000
2016-07-03 0.000000 0.821095 NaN 0.837190 0.000000 1.732394 NaN 1.642655 1.359116 0.000000 ... 0.0 0.000000 0.000000 0.000000 0.000000 1.430233 0.968211 0.0 6.580247 0.0000
2016-07-04 0.000000 0.766355 NaN 1.105091 0.000000 0.000000 NaN 1.045326 1.297338 0.000000 ... 0.0 0.000000 0.000000 0.000000 2.636822 1.375224 0.930972 0.0 0.000000 7.6875
2016-07-05 0.000000 0.711615 NaN 1.272529 0.000000 1.496159 NaN 1.169769 1.173782 1.146960 ... 0.0 0.000000 1.806655 0.000000 0.000000 1.265206 1.266122 0.0 0.000000 0.0000

5 rows × 242 columns


mean_idx_trends.plot(legend=False, figsize=(16, 8))
plt.title('Brewery Search Hits (indexed)')
plt.ylabel('relative to own avg')
<matplotlib.text.Text at 0xbd35748>

mean_idx_trends[pre_gabf16].max().sort_values(ascending=False)[:10]
two_kilts_brewing              123.000000
12degree_brewing                61.500000
ghostfish_brewing_company       60.701299
high_heel_brewing               49.785714
zwanzigz_brewing                27.954545
auburn_alehouse                 25.408451
black_tooth_brewing             25.408451
logsdon_farmhouse_ales          23.697248
grimm_brothers_brewhouse        23.455814
taps_fish_house_and_brewery     22.932203
dtype: float64
mean_idx_trends[post_gabf16].max().sort_values(ascending=False)[:10]
12degree_brewing               61.500000
zwanzigz_brewing               31.448864
logsdon_farmhouse_ales         22.004587
taps_fish_house_and_brewery    21.889831
pizza_port_san_clemente        20.856522
auburn_alehouse                19.056338
morgan_territory_brewing       17.571429
solid_rock_brewing             17.253394
gibbs_hundred_brewing          16.400000
ardent_craft_ales              15.216495
dtype: float64

IQR

(mean_idx_trends[pre_gabf16].sum(axis=1)).describe()
count     97.000000
mean     201.333917
std       48.543656
min      127.849023
25%      160.680484
50%      191.979998
75%      231.980401
max      350.348231
dtype: float64
(mean_idx_trends[post_gabf16].sum(axis=1)).describe()
count     23.000000
mean     196.012536
std       39.039190
min      116.992323
25%      160.378756
50%      201.091106
75%      231.406028
max      252.729727
dtype: float64

import brewery type, state, and medal count

gabf16_type = pd.read_excel('comp_breweries/gabf16_type.xlsx', index_col=0)

print gabf16_type.shape
gabf16_type.tail()
(303, 16)
medal beer_name brewery city state category year medal_cnt brewery_name2 brewery_type name_brewassoc name_Gtrends FAM PARENT DUPE RETAIN
302 Silver Mélange A Trois Nebraska Brewing Co. - Papillion Papillion NE Wood- and Barrel-Aged Strong Beer 2016 2 nebraska brew Brewpub Nebraska Brewing Co nebraska_brewing False False False True
303 Bronze 15th Anniversary Ale Island Brewing Co. Carpinteria CA Wood- and Barrel-Aged Strong Beer 2016 1 island brew Micro Island Brewing Co island_brewing False False False True
304 Gold Barrel-Aged Darkness Surly Brewing Co. Brooklyn Center MN Wood- and Barrel-Aged Strong Stout 2016 1 surly brew Regional Surly Brewing Company surly_brewing False False False True
305 Silver The Event Horizon Olde Hickory Brewery Hickory NC Wood- and Barrel-Aged Strong Stout 2016 2 olde hickory brew Micro Olde Hickory Brewery - Production olde_hickory_brewery False False False True
306 Bronze Little Nonsense Verboten Brewing Loveland CO Wood- and Barrel-Aged Strong Stout 2016 1 verboten brew Micro Verboten Brewing verboten_brewing False False False True
gabf16_new = gabf16_type[gabf16_type.RETAIN == True]

print gabf16_new.shape
gabf16_new.tail()
(286, 16)
medal beer_name brewery city state category year medal_cnt brewery_name2 brewery_type name_brewassoc name_Gtrends FAM PARENT DUPE RETAIN
302 Silver Mélange A Trois Nebraska Brewing Co. - Papillion Papillion NE Wood- and Barrel-Aged Strong Beer 2016 2 nebraska brew Brewpub Nebraska Brewing Co nebraska_brewing False False False True
303 Bronze 15th Anniversary Ale Island Brewing Co. Carpinteria CA Wood- and Barrel-Aged Strong Beer 2016 1 island brew Micro Island Brewing Co island_brewing False False False True
304 Gold Barrel-Aged Darkness Surly Brewing Co. Brooklyn Center MN Wood- and Barrel-Aged Strong Stout 2016 1 surly brew Regional Surly Brewing Company surly_brewing False False False True
305 Silver The Event Horizon Olde Hickory Brewery Hickory NC Wood- and Barrel-Aged Strong Stout 2016 2 olde hickory brew Micro Olde Hickory Brewery - Production olde_hickory_brewery False False False True
306 Bronze Little Nonsense Verboten Brewing Loveland CO Wood- and Barrel-Aged Strong Stout 2016 1 verboten brew Micro Verboten Brewing verboten_brewing False False False True
gabf16_new[['name_Gtrends', 'state', 'brewery_type', 'medal_cnt']].head()
name_Gtrends state brewery_type medal_cnt
0 millersburg_brewing OH Brewpub 1
1 bells_brewery MI Regional 1
2 rubicon_brewing_company_pub CA n/a 1
3 the_bruery CA Micro 2
4 brown_truck_brewery NC Micro 3

combine craft brewery data

gabf16_eda = pd.merge(gabf16_new[['brewery', 'name_Gtrends', 'state', 'brewery_type', 'medal_cnt']], pre_post_avgs, 
                      left_on='name_Gtrends', right_index=True).drop_duplicates()

print gabf16_eda.shape
gabf16_eda.head()
(249, 8)
brewery name_Gtrends state brewery_type medal_cnt pre_gabf16 post_gabf16 dif_gabf16_norm
0 Millersburg Brewing millersburg_brewing OH Brewpub 1 2.525773 2.086957 0.826264
1 Bell's Brewery, Inc bells_brewery MI Regional 1 18.896907 15.521739 0.821390
2 Rubicon Brewing Company Pub rubicon_brewing_company_pub CA n/a 1 0.000000 0.000000 0.000000
3 The Bruery the_bruery CA Micro 2 28.855670 33.478261 1.160197
4 Brown Truck Brewery brown_truck_brewery NC Micro 3 3.278351 9.478261 2.891167
gabf16_eda['multiple_wins'] = 0
gabf16_eda.loc[gabf16_eda.medal_cnt>1, 'multiple_wins'] = 1

gabf16_eda.multiple_wins.value_counts()
0    220
1     29
Name: multiple_wins, dtype: int64
gabf16_eda.pivot_table(index='brewery_type', columns='multiple_wins', values='brewery', aggfunc='count')
multiple_wins 0 1
brewery_type
Brewpub 59.0 10.0
Contract 2.0 NaN
Large 6.0 2.0
Micro 127.0 13.0
Proprietor 1.0 NaN
Regional 24.0 4.0
n/a 1.0 NaN
gabf16_eda.pivot_table(index='brewery_type', columns='multiple_wins', values='dif_gabf16_norm', aggfunc='mean')
multiple_wins 0 1
brewery_type
Brewpub 0.730805 0.978195
Contract 0.519592 NaN
Large 0.912032 1.145409
Micro 0.940714 1.083965
Proprietor 2.194376 NaN
Regional 0.774173 1.130548
n/a 0.000000 NaN

brewpub, micro, & regional (single and multi wins)

gabf16_eda[(gabf16_eda.multiple_wins==0) & (gabf16_eda.brewery_type=='Brewpub')].nlargest(3,'dif_gabf16_norm')
brewery name_Gtrends state brewery_type medal_cnt pre_gabf16 post_gabf16 dif_gabf16_norm multiple_wins
86 12Degree Brewing 12degree_brewing CO Brewpub 1 0.102369 0.431731 4.217350 0
301 Taps Fish House and Brewery - Corona taps_fish_house_and_brewery CA Brewpub 1 0.412732 0.859309 2.081998 0
122 Pizza Port San Clemente pizza_port_san_clemente CA Brewpub 1 0.472705 0.926952 1.960948 0
gabf16_eda[(gabf16_eda.multiple_wins==0) & (gabf16_eda.brewery_type=='Micro')].nlargest(3,'dif_gabf16_norm')
brewery name_Gtrends state brewery_type medal_cnt pre_gabf16 post_gabf16 dif_gabf16_norm multiple_wins
202 Hardywood Park Craft Brewery hardywood_park_craft_brewery VA Micro 1 0.785911 4.326285 5.504798 0
101 Ardent Craft Ales ardent_craft_ales VA Micro 1 0.456800 2.288627 5.010123 0
80 Logsdon Farmhouse Ales logsdon_farmhouse_ales OR Micro 1 0.337216 1.345986 3.991448 0
gabf16_eda[(gabf16_eda.multiple_wins==0) & (gabf16_eda.brewery_type=='Regional')].nlargest(3,'dif_gabf16_norm')
brewery name_Gtrends state brewery_type medal_cnt pre_gabf16 post_gabf16 dif_gabf16_norm multiple_wins
81 Two Roads Brewing Co. two_roads_brewing CT Regional 1 9.442060 12.075778 1.278935 0
304 Surly Brewing Co. surly_brewing MN Regional 1 15.677050 17.159652 1.094571 0
79 Sun King Brewing Co. sun_king_brewing IN Regional 1 6.952078 7.491808 1.077636 0
gabf16_eda[(gabf16_eda.multiple_wins==1) & (gabf16_eda.brewery_type=='Brewpub')].nlargest(3,'dif_gabf16_norm')
brewery name_Gtrends state brewery_type medal_cnt pre_gabf16 post_gabf16 dif_gabf16_norm multiple_wins
109 ZwanzigZ Brewing zwanzigz_brewing IN Brewpub 2 0.117996 0.559839 4.744525 1
15 Uberbrew uberbrew MT Brewpub 4 1.152701 1.283407 1.113390 1
76 Nebraska Brewing Co. - Papillion nebraska_brewing NE Brewpub 2 12.617107 12.913848 1.023519 1
gabf16_eda[(gabf16_eda.multiple_wins==1) & (gabf16_eda.brewery_type=='Micro')].nlargest(3,'dif_gabf16_norm')
brewery name_Gtrends state brewery_type medal_cnt pre_gabf16 post_gabf16 dif_gabf16_norm multiple_wins
71 2SP Brewing Co. 2sp_brewing PA Micro 2 1.755364 6.395837 3.643594 1
4 Brown Truck Brewery brown_truck_brewery NC Micro 3 3.278351 9.478261 2.891167 1
275 Neshaminy Creek Brewing Co. neshaminy_creek_brewing PA Micro 2 3.785876 4.727902 1.248826 1
gabf16_eda[(gabf16_eda.multiple_wins==1) & (gabf16_eda.brewery_type=='Regional')].nlargest(3,'dif_gabf16_norm')
brewery name_Gtrends state brewery_type medal_cnt pre_gabf16 post_gabf16 dif_gabf16_norm multiple_wins
26 Georgetown Brewing Co. georgetown_brewing WA Regional 2 2.236994 4.012087 1.793516 1
14 Fat Head's Brewery fat_heads_brewery OH Regional 2 3.218342 3.636321 1.129874 1
41 Karl Strauss Brewing Co. - San Diego karl_strauss_brewing CA Regional 3 6.549709 5.653187 0.863120 1

select a few from above

  • definitely want a CA, CO, OR, & WA… so:
    • CA: taps_fish_house_and_brewery (brewpub, 1)
    • CO: 12degree_brewing (brewpub, 1)
    • OR: logsdon_farmhouse_ales (mirco, 1)
    • WA: georgetown_brewing (regional, 2)
  • some compliments:
    • uberbrew (MT; brewpub, 4 wins!)
    • hardywood_park_craft_brewery (VA; micro, 1; largest post GABF uptick!)
    • brown_truck_brewery (NC; micro, 3; the joining brewery, why not?)

find comparable (type & state) non-GABF16 winners

brw_assoc = pd.read_csv('comp_breweries/brw_assoc_usa_gabf16.csv', sep=';', index_col=0)

print brw_assoc.shape
brw_assoc.head()
(7429, 12)
name brewery_type address city state z_code ownership telephone url brewery_name2 name_short gabf16_win
0 Bradley Farm / RB Brew, LLC Micro 317 Springtown Rd New Paltz NY 12561-3020 NaN (845) 255-8769 www.raybradleyfarm.com bradley farm / rb brew Bradley Farm 0
1 Kona Brewing Co Regional 74-5612 Pawai Pl Kailua Kona HI 96740- Greater than 25% ownership by Anheuser-Busch I... (808) 334-1133 www.konabrewingco.com kona brew Kona Brewing 1
2 (405) Brewing Co Micro 1716 Topeka St Norman OK 73069-8224 NaN (405) 816-0490 www.405brewing.com (405) brew (405) Brewing 0
3 (512) Brewing Co Micro 407 Radam Ln Ste F200 Austin TX 78745-1197 NaN (512) 921-1545 www.512brewing.com (512) brew (512) Brewing 0
4 10 Barrel Brewing Co Large 62970 18th St Bend OR 97701-9847 Greater than 25% ownership by Anheuser-Busch I... (541) 585-1007 www.10barrel.com 10 barrel brew 10 Barrel 1
brewery_combos = [('CA', 'Brewpub'), ('CO', 'Brewpub'), ('OR', 'Micro'), ('WA', 'Regional'),
                  ('MT', 'Brewpub'), ('VA', 'Micro'), ('NC', 'Micro')
                  ]
np.random.seed(8)

for combo in brewery_combos:
    candidates = brw_assoc[(brw_assoc.state==combo[0]) & 
                           (brw_assoc.brewery_type==combo[1]) & 
                           (brw_assoc.gabf16_win==0)
                          ]['name']
    print combo
    print np.random.choice(candidates, 4, replace=False), '\n'
('CA', 'Brewpub')
['Downtown Joes Brewery and Restaurant' 'Taplands Brewery'
 'Woods Bar & Brewery' 'Miner\xe2\x80\x99s Alley Brewing Company'] 

('CO', 'Brewpub')
['Oskar Blues Brewery - Lyons' 'Whistle Pig Brewing Company'
 'Brix Taphouse and Brewery' 'Moonlight Pizza'] 

('OR', 'Micro')
['Mazama Brewing Co' 'Siuslaw Brewing'
 "Krauski's Brewskis / The Hoppy Brewer" 'Red Ox Brewing'] 

('WA', 'Regional')
['Fremont Brewing Co' 'Redhook Brewery' 'Mac and Jacks Brewery Inc'
 'Iron Horse Brewery'] 

('MT', 'Brewpub')
['Bridger Brewing Company' 'Cabinet Mountain Brewing Co'
 'The Front Brewing Company' 'Backslope Brewing '] 

('VA', 'Micro')
['Lickinghole Creek Craft Brewery' 'Barrel Oak Farm Taphouse'
 'Sunken City Brewing Co' 'New District Brewing Company'] 

('NC', 'Micro')
['Good Hops Brewing LLC' 'Preyer Brewing Company'
 'Fortnight Brewing Company' 'Burial Beer Co Forestry Camp'] 

some last minute eda by segment

single_medals = gabf16_new[gabf16_new.medal_cnt==1][['brewery', 'name_Gtrends', 'state', 'brewery_type', 'medal', 'category']]
single_medals.head()
brewery name_Gtrends state brewery_type medal category
0 Millersburg Brewing millersburg_brewing OH Brewpub Gold Aged Beer
1 Bell's Brewery, Inc bells_brewery MI Regional Silver Aged Beer
2 Rubicon Brewing Company Pub rubicon_brewing_company_pub CA n/a Bronze Aged Beer
5 Crank Arm Brewing Co. crank_arm_brewing NC Micro Bronze American-Belgo-Style Ale
6 El Segundo Brewing Co. el_segundo_brewing CA Micro Gold American-Style Amber Lager or Dark Lager
gold = [s for s in single_medals[single_medals.medal=='Gold']['name_Gtrends'].tolist() if s != u'n/a']
mean_idx_trends[gold].plot(legend=False, figsize=(16, 8))
plt.title('Gold Medal Brewery Search Hits (indexed)')
plt.ylabel('relative to own avg')
<matplotlib.text.Text at 0xf84a748>

silver = [s for s in single_medals[single_medals.medal=='Silver']['name_Gtrends'].tolist() if s != u'n/a']
mean_idx_trends[silver].plot(legend=False, figsize=(16, 8))
plt.title('Silver Medal Brewery Search Hits (indexed)')
plt.ylabel('relative to own avg')
<matplotlib.text.Text at 0xfbf3208>

bronze = [s for s in single_medals[single_medals.medal=='Bronze']['name_Gtrends'].tolist() if s != u'n/a']
mean_idx_trends[bronze].plot(legend=False, figsize=(16, 8))
plt.title('Bronze Medal Brewery Search Hits (indexed)')
plt.ylabel('relative to own avg')
<matplotlib.text.Text at 0x10f0ecc0>

IQR by medal (single medal winners)

mean_idx_trends[gold].sum(axis=1)[pre_gabf16].describe(), mean_idx_trends[gold].sum(axis=1)[post_gabf16].describe()
(count     97.000000
 mean      57.907986
 std       17.754784
 min       32.077572
 25%       45.378124
 50%       52.536740
 75%       66.628534
 max      112.251168
 dtype: float64, count    23.000000
 mean     57.466237
 std      18.771940
 min      24.325076
 25%      45.764013
 50%      58.324900
 75%      72.582052
 max      98.950253
 dtype: float64)
mean_idx_trends[silver].sum(axis=1)[pre_gabf16].describe(), mean_idx_trends[silver].sum(axis=1)[post_gabf16].describe()
(count     97.000000
 mean      60.439844
 std       22.552839
 min       28.589938
 25%       45.640806
 50%       56.970620
 75%       70.268552
 max      195.875898
 dtype: float64, count    23.000000
 mean     57.492863
 std      14.953135
 min      30.036341
 25%      47.732232
 50%      61.927325
 75%      69.956782
 max      80.223873
 dtype: float64)
mean_idx_trends[bronze].sum(axis=1)[pre_gabf16].describe(), mean_idx_trends[bronze].sum(axis=1)[post_gabf16].describe()
(count     97.000000
 mean      62.787726
 std       22.994400
 min       27.376774
 25%       46.681124
 50%       57.716062
 75%       70.865554
 max      145.357013
 dtype: float64, count    23.000000
 mean     60.411436
 std      17.978898
 min      29.892762
 25%      47.813260
 50%      60.287267
 75%      68.125172
 max      97.718445
 dtype: float64)
single_medals_idx = pd.DataFrame({'gold': mean_idx_trends[gold].sum(axis=1),
                                  'silver': mean_idx_trends[silver].sum(axis=1),
                                  'bronze': mean_idx_trends[bronze].sum(axis=1),
                                  'gabf': 'GABF'
                                 })

single_medals_idx.loc[pre_gabf16, 'gabf']  = 'pre'
single_medals_idx.loc[post_gabf16, 'gabf']  = 'post'
single_medals_idx = single_medals_idx[['gold', 'silver', 'bronze', 'gabf']]

single_medals_idx.tail()
gold silver bronze gabf
Day
2016-10-27 36.031479 51.258344 60.137878 post
2016-10-28 52.193937 79.114118 60.287267 post
2016-10-29 83.409933 64.636172 67.456476 post
2016-10-30 46.589409 63.336685 33.481906 post
2016-10-31 46.590061 59.287918 54.354502 post
import seaborn as sns
sns.set_style("whitegrid")
df = single_medals_idx[single_medals_idx.gabf!='GABF'].set_index('gabf', append=True).stack().to_frame().reset_index()\
.rename(columns={'level_2': 'medal', 0: 'value'}) 

df.head()
Day gabf medal value
0 2016-07-01 pre gold 87.037179
1 2016-07-01 pre silver 88.758016
2 2016-07-01 pre bronze 65.273149
3 2016-07-02 pre gold 103.857065
4 2016-07-02 pre silver 59.777300
fig, ax = plt.subplots()
fig.set_size_inches(11.7, 8.27)
sns.boxplot(x='medal', y='value', hue='gabf', data=df, palette="Set3")

sns.despine(trim=True)

misc

pre_post_avgs.sort_values('dif_gabf16_norm', ascending=False)
pre_gabf16 post_gabf16 dif_gabf16_norm
hardywood_park_craft_brewery 0.785911 4.326285 5.504798
ardent_craft_ales 0.456800 2.288627 5.010123
zwanzigz_brewing 0.117996 0.559839 4.744525
12degree_brewing 0.102369 0.431731 4.217350
logsdon_farmhouse_ales 0.337216 1.345986 3.991448
2sp_brewing 1.755364 6.395837 3.643594
solid_rock_brewing 0.443389 1.376959 3.105527
brown_truck_brewery 3.278351 9.478261 2.891167
gibbs_hundred_brewing 0.398042 1.091153 2.741297
hi-wire_brewing 0.676520 1.830562 2.705844
14er_brewing 1.291659 3.212589 2.487178
blackberry_farm_brewery 1.422047 3.246674 2.283097
high_water_brewing 3.014830 6.615672 2.194376
taps_fish_house_and_brewery 0.412732 0.859309 2.081998
second_chance_beer 1.447585 2.986046 2.062776
riip_beer 1.229949 2.450898 1.992682
pizza_port_san_clemente 0.472705 0.926952 1.960948
verboten_brewing 1.848623 3.439574 1.860613
slo_brew 2.113737 3.836893 1.815217
spencer_devon_brewing 1.122478 2.014437 1.794633
georgetown_brewing 2.236994 4.012087 1.793516
la_cumbre_brewing 2.591634 4.635861 1.788779
daredevil_brewing 1.829192 3.206119 1.752751
propolis_brewing 1.460230 2.514240 1.721810
alvarado_street_brewery 5.433718 9.260751 1.704312
pollyanna_brewing 1.975679 3.269762 1.655006
4_noses_brewing 0.783885 1.272879 1.623806
morgan_territory_brewing 0.494651 0.784530 1.586024
echo_brewing 4.451411 6.996020 1.571641
revolver_brewing 9.335360 14.266856 1.528260
... ... ... ...
altitude_chophouse_and_brewery 0.000000 0.000000 0.000000
missoula_brewing 0.000000 0.000000 0.000000
two_kilts_brewing 0.111016 0.000000 0.000000
high_heel_brewing 0.501533 0.000000 0.000000
ram/big_horn_brewery 0.000000 0.000000 0.000000
the_packinghouse_brewing 0.000000 0.000000 0.000000
coopersmiths_pub_&_brewing 0.000000 0.000000 0.000000
bns_brewing_&_distilling 0.000000 0.000000 0.000000
no_clue_craft_brewery 0.000000 0.000000 0.000000
emmetts_tavern_&_brewing 0.000000 0.000000 0.000000
black_tooth_brewing 1.152725 0.000000 0.000000
iron_springs_pub_&_brewery 0.000000 0.000000 0.000000
colorado_boy_pub 0.000000 0.000000 0.000000
brewers_alley_restaurant_&_brewery 0.000000 0.000000 0.000000
redwood_curtain_brewing 0.000000 0.000000 0.000000
horse_thief_hollow_brewery 0.569610 0.000000 0.000000
ghostfish_brewing_company 0.097172 0.000000 0.000000
fiction_beer 0.000000 0.000000 0.000000
philipsburg_brewing 0.000000 0.000000 0.000000
three_creeks_production 0.000000 0.000000 0.000000
alesong_brewing_&_blending 0.000000 0.000000 0.000000
hop_dogma_brewing 0.000000 0.000000 0.000000
swamp_rabbit_brewery_&_taproom 0.000000 0.000000 0.000000
glenwood_canyon_brewing 0.000000 0.000000 0.000000
kootenai_river_brewing 0.000000 0.000000 0.000000
greenview_brewing 0.000000 0.000000 0.000000
big_dogs_brewing 0.000000 0.000000 0.000000
rivers_edge_brewing 0.000000 0.000000 0.000000
maize_valley_craft_brewery 0.000000 0.000000 0.000000
grimm_brothers_brewhouse 0.518925 0.000000 0.000000

242 rows × 3 columns