endlesspint

import csv, os, json
import numpy as np
import pandas as pd
print pd.__version__

from datetime import datetime

from collections import Counter, defaultdict

import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline

0.18.0

pull tw [ggg1]

ingest tw [ggg2]

tweets_data = []

with open('data/kov_ward_tweets.txt', 'r') as f:
    for line in f:
        try:
            tweet = json.loads(line)
            tweets_data.append(tweet)
        except:
            continue
            
print len(tweets_data)

convert to DF

tweets_df = pd.DataFrame(tweets_data)

tweets_df.created_at = pd.to_datetime(tweets_df.created_at)
tweets_df.sort('created_at', inplace=True)

tweets_df = tweets_df[tweets_df.created_at.notnull()]
tweets_df.drop_duplicates(subset='id_str', inplace=True)
tweets_df.reset_index(drop=True, inplace=True)

print tweets_df.dtypes
print tweets_df.shape

tweets_df.tail()

contributors                         object
coordinates                          object
created_at                   datetime64[ns]
entities                             object
extended_entities                    object
favorite_count                        int64
favorited                              bool
geo                                  object
id                                    int64
id_str                               object
in_reply_to_screen_name              object
in_reply_to_status_id               float64
in_reply_to_status_id_str            object
in_reply_to_user_id                 float64
in_reply_to_user_id_str              object
is_quote_status                        bool
lang                                 object
metadata                             object
place                                object
possibly_sensitive                   object
quoted_status                        object
quoted_status_id                    float64
quoted_status_id_str                 object
retweet_count                         int64
retweeted                              bool
retweeted_status                     object
source                               object
text                                 object
truncated                              bool
user                                 object
dtype: object
(74702, 30)

	contributors	coordinates	created_at	entities	extended_entities	favorite_count	favorited	geo	id	id_str	...	quoted_status	quoted_status_id	quoted_status_id_str	retweet_count	retweeted	retweeted_status	source	text	truncated	user
74697	None	None	2016-11-20 23:58:49	{u'symbols': [], u'user_mentions': [{u'indices...	NaN	3	False	None	800488602656444417	800488602656444417	...	NaN	NaN	NaN	1	False	NaN	<a href="http://twitter.com/download/iphone" r...	Say what you want about @andreward but make su...	False	{u'follow_request_sent': False, u'has_extended...
74698	None	None	2016-11-20 23:59:14	{u'symbols': [], u'user_mentions': [{u'indices...	NaN	0	False	None	800488704703860736	800488704703860736	...	NaN	NaN	NaN	1	False	{u'contributors': None, u'truncated': False, u...	<a href="http://twitter.com/download/iphone" r...	RT @GarethADaviesDT: Jerry Izenberg tells me s...	False	{u'follow_request_sent': False, u'has_extended...
74699	None	None	2016-11-20 23:59:31	{u'symbols': [], u'user_mentions': [{u'indices...	{u'media': [{u'source_user_id': 239159456, u's...	0	False	None	800488776401240064	800488776401240064	...	NaN	NaN	NaN	5	False	{u'contributors': None, u'truncated': False, u...	<a href="http://twitter.com/download/android" ...	RT @Shady__007: Was good to bump into the one ...	False	{u'follow_request_sent': False, u'has_extended...
74700	None	None	2016-11-20 23:59:35	{u'symbols': [], u'user_mentions': [{u'indices...	NaN	0	False	None	800488794453639168	800488794453639168	...	NaN	NaN	NaN	10	False	{u'contributors': None, u'truncated': True, u'...	<a href="http://twitter.com" rel="nofollow">Tw...	RT @JCGBoxing: Basta de #KovalevWard por un ra...	False	{u'follow_request_sent': False, u'has_extended...
74701	None	None	2016-11-20 23:59:43	{u'symbols': [], u'user_mentions': [{u'indices...	NaN	0	False	None	800488827278258177	800488827278258177	...	NaN	NaN	NaN	3	False	{u'contributors': None, u'truncated': False, u...	<a href="http://twitter.com/download/android" ...	RT @IzquierdazoBox: "Ward tiene un gran futuro...	False	{u'follow_request_sent': False, u'has_extended...

5 rows × 30 columns

plot on ts [ggg2]
identify rd’s & rest [ggg2]
export rest tw with regex scores [ggg2]

print tweets_df.created_at.min()
print tweets_df.created_at.max()

2016-11-16 00:41:50
2016-11-20 23:59:43

create new user_ DF fields

user_fields = ['created_at', 'description', 'followers_count', 'friends_count', 'geo_enabled', 
               'location', 'name', 'screen_name', 'statuses_count', 'time_zone', 'utc_offset']

user_fields_df = ['user_'+i for i in user_fields]
print user_fields_df

['user_created_at', 'user_description', 'user_followers_count', 'user_friends_count', 'user_geo_enabled', 'user_location', 'user_name', 'user_screen_name', 'user_statuses_count', 'user_time_zone', 'user_utc_offset']

tweets_df = pd.concat([tweets_df,pd.DataFrame(columns=user_fields_df)])
tweets_df.columns

Index([             u'contributors',               u'coordinates',
                      u'created_at',                  u'entities',
               u'extended_entities',            u'favorite_count',
                       u'favorited',                       u'geo',
                              u'id',                    u'id_str',
         u'in_reply_to_screen_name',     u'in_reply_to_status_id',
       u'in_reply_to_status_id_str',       u'in_reply_to_user_id',
         u'in_reply_to_user_id_str',           u'is_quote_status',
                            u'lang',                  u'metadata',
                           u'place',        u'possibly_sensitive',
                   u'quoted_status',          u'quoted_status_id',
            u'quoted_status_id_str',               u'rest_period',
                   u'retweet_count',                 u'retweeted',
                u'retweeted_status',                    u'source',
                            u'text',                 u'truncated',
                            u'user',           u'user_created_at',
                u'user_description',      u'user_followers_count',
              u'user_friends_count',          u'user_geo_enabled',
                   u'user_location',                 u'user_name',
                u'user_screen_name',       u'user_statuses_count',
                  u'user_time_zone',           u'user_utc_offset'],
      dtype='object')

def user_info(row, field):
    try:
        return row[field]
    except:
        return None
    

for f,df in zip(user_fields, user_fields_df):
    tweets_df[df] = tweets_df.user.apply(user_info, field=f)

tweets_df.user_created_at = pd.to_datetime(tweets_df.user_created_at)
print tweets_df.dtypes

tweets_df[user_fields_df].head()

contributors                         object
coordinates                          object
created_at                   datetime64[ns]
entities                             object
extended_entities                    object
favorite_count                      float64
favorited                            object
geo                                  object
id                                  float64
id_str                               object
in_reply_to_screen_name              object
in_reply_to_status_id               float64
in_reply_to_status_id_str            object
in_reply_to_user_id                 float64
in_reply_to_user_id_str              object
is_quote_status                      object
lang                                 object
metadata                             object
place                                object
possibly_sensitive                   object
quoted_status                        object
quoted_status_id                    float64
quoted_status_id_str                 object
rest_period                         float64
retweet_count                       float64
retweeted                            object
retweeted_status                     object
source                               object
text                                 object
truncated                            object
user                                 object
user_created_at              datetime64[ns]
user_description                     object
user_followers_count                  int64
user_friends_count                    int64
user_geo_enabled                       bool
user_location                        object
user_name                            object
user_screen_name                     object
user_statuses_count                   int64
user_time_zone                       object
user_utc_offset                     float64
dtype: object

	user_created_at	user_description	user_followers_count	user_friends_count	user_geo_enabled	user_location	user_name	user_screen_name	user_statuses_count	user_time_zone	user_utc_offset
0	2011-05-30 18:13:42		123	102	True		Rico	Deef_djam	9005	London	0.0
1	2011-05-30 18:13:42		123	102	True		Rico	Deef_djam	9005	London	0.0
2	2015-11-15 21:59:32	amosc: derreberrygirl 🎓SEN17RS🤘🏽	150	130	True	New Orleans, LA	Queen Derreberry	_derreberry	2177	None	NaN
3	2012-09-12 23:56:32	#IRONADDICT #MARTIALARTIST\n#BOXING \n@TOPCLAS...	1960	959	False	KINGSTON UPON HULL : ENGLAND	STEVE HAIGH	_SH70_	13022	None	NaN
4	2014-09-16 15:05:26	I'm so mean I make medicine sick	316	421	True		George	georgehughes77	2487	None	NaN

create `user` UTC offset in hours

for another time, ignore for now (out of scope)

tweets_df['user_utc_offset_hr'] = tweets_df.user_utc_offset / 3600.0
tweets_df['user_utc_offset_hr'].value_counts()

-5.00     11602
-8.00     11190
-6.00      6058
 0.00      6021
-7.00      2269
-4.00      2147
 1.00      2067
-3.00       872
-10.00      668
-9.00       560
 2.00       441
 3.00       416
 8.00       327
 11.00      236
 7.00       180
-2.00       165
-11.00      151
 9.00       140
 6.00       123
 4.00       120
 10.00      111
 5.50       108
 5.00       106
 13.00       61
 10.50       55
 5.75        12
 3.50         6
 4.50         5
-3.50         4
 9.50         2
 6.50         1
-1.00         1
Name: user_utc_offset_hr, dtype: int64

time index

dates_allTweets = []

for tweet in tweets_data:
    try:
        dates_allTweets.append(tweet['created_at'])
    except:
        pass
    
len(dates_allTweets)

dates_allTweets = tweets_df['created_at'].tolist()

# a list of "1's" to count the tweets
ones = [1]*len(dates_allTweets)
# the index of the series
idx = pd.DatetimeIndex(dates_allTweets)
# the actual series (at series of 1s for the moment)
tweets_KovWard = pd.Series(ones, index=idx)
 
# Resampling / bucketing
per_minute = tweets_KovWard.resample('1Min', how='sum').fillna(0)

per_minute[(per_minute.index > '2016-11-20 04:30:00') & 
           (per_minute.index < '2016-11-20 06:00:00')].plot(figsize=(12,6), label='Tweets')

<matplotlib.axes._subplots.AxesSubplot at 0x8d25e48>

print per_minute[(per_minute.index > '2016-11-20 04:44:00') & 
           (per_minute.index < '2016-11-20 05:34:00')]

2016-11-20 04:45:00     296.0
2016-11-20 04:46:00     392.0
2016-11-20 04:47:00     195.0
2016-11-20 04:48:00     219.0
2016-11-20 04:49:00     607.0
2016-11-20 04:50:00     825.0
2016-11-20 04:51:00     472.0
2016-11-20 04:52:00     273.0
2016-11-20 04:53:00     358.0
2016-11-20 04:54:00     548.0
2016-11-20 04:55:00     283.0
2016-11-20 04:56:00     168.0
2016-11-20 04:57:00     224.0
2016-11-20 04:58:00     428.0
2016-11-20 04:59:00     237.0
2016-11-20 05:00:00     188.0
2016-11-20 05:01:00     241.0
2016-11-20 05:02:00     463.0
2016-11-20 05:03:00     247.0
2016-11-20 05:04:00     154.0
2016-11-20 05:05:00     280.0
2016-11-20 05:06:00     421.0
2016-11-20 05:07:00     257.0
2016-11-20 05:08:00     189.0
2016-11-20 05:09:00     277.0
2016-11-20 05:10:00     481.0
2016-11-20 05:11:00     223.0
2016-11-20 05:12:00     167.0
2016-11-20 05:13:00     246.0
2016-11-20 05:14:00     446.0
2016-11-20 05:15:00     240.0
2016-11-20 05:16:00     165.0
2016-11-20 05:17:00     368.0
2016-11-20 05:18:00     741.0
2016-11-20 05:19:00     443.0
2016-11-20 05:20:00     212.0
2016-11-20 05:21:00     333.0
2016-11-20 05:22:00     704.0
2016-11-20 05:23:00     379.0
2016-11-20 05:24:00     232.0
2016-11-20 05:25:00     350.0
2016-11-20 05:26:00     836.0
2016-11-20 05:27:00     363.0
2016-11-20 05:28:00     255.0
2016-11-20 05:29:00     448.0
2016-11-20 05:30:00    1183.0
2016-11-20 05:31:00    1003.0
2016-11-20 05:32:00    1375.0
2016-11-20 05:33:00    2277.0
Freq: T, dtype: float64

taking closer look to determine rest period tweets

per_10sec = tweets_KovWard.resample('30S', how='sum').fillna(0)

per_10sec[(per_10sec.index > '2016-11-20 04:45:00') & 
           (per_10sec.index < '2016-11-20 05:00:00')]

2016-11-20 04:45:30    203.0
2016-11-20 04:46:00    242.0
2016-11-20 04:46:30    150.0
2016-11-20 04:47:00    122.0
2016-11-20 04:47:30     73.0
2016-11-20 04:48:00     85.0
2016-11-20 04:48:30    134.0
2016-11-20 04:49:00    206.0
2016-11-20 04:49:30    401.0
2016-11-20 04:50:00    478.0
2016-11-20 04:50:30    347.0
2016-11-20 04:51:00    264.0
2016-11-20 04:51:30    208.0
2016-11-20 04:52:00    148.0
2016-11-20 04:52:30    125.0
2016-11-20 04:53:00    144.0
2016-11-20 04:53:30    214.0
2016-11-20 04:54:00    317.0
2016-11-20 04:54:30    231.0
2016-11-20 04:55:00    156.0
2016-11-20 04:55:30    127.0
2016-11-20 04:56:00     98.0
2016-11-20 04:56:30     70.0
2016-11-20 04:57:00     80.0
2016-11-20 04:57:30    144.0
2016-11-20 04:58:00    250.0
2016-11-20 04:58:30    178.0
2016-11-20 04:59:00    134.0
2016-11-20 04:59:30    103.0
Freq: 30S, dtype: float64

# rd1: 2016-11-20 04:42:30 - 04:45:30
# rd1 rest: 2016-11-20 04:45:30 - 04:46:30
# rd2 rest: 2016-11-20 04:49:30 - 04:50:30

tweets_df[(tweets_df['created_at'] >= '2016-11-20 04:49:30') & \
              (tweets_df['created_at'] < '2016-11-20 04:50:30')]['text'].count()

from datetime import timedelta

fight_start_text = '2016-11-20 04:42:30'
fight_start_time = datetime.strptime('2016-11-20 04:42:30', '%Y-%m-%d %H:%M:%S')

rnd = timedelta(minutes = 3)
rst = timedelta(minutes = 1)

rnd_start_stop = []
rst_start_stop = []

for i in range(12):
    print 'round %d start: %s' % (i+1, (i*(rnd+rst)+fight_start_time).strftime('%Y-%m-%d %H:%M:%S'))
    
    rnd_start = i*(rnd+rst)+fight_start_time
    rnd_stop = rnd_start + rnd
    rst_start = rnd_start + rnd
    rst_stop = rst_start + rst
    
    rnd_start_stop.append((rnd_start.strftime('%Y-%m-%d %H:%M:%S'), rnd_stop.strftime('%Y-%m-%d %H:%M:%S')))
    rst_start_stop.append((rst_start.strftime('%Y-%m-%d %H:%M:%S'), rst_stop.strftime('%Y-%m-%d %H:%M:%S')))

print ''
print rnd_start_stop
print rst_start_stop

round 1 start: 2016-11-20 04:42:30
round 2 start: 2016-11-20 04:46:30
round 3 start: 2016-11-20 04:50:30
round 4 start: 2016-11-20 04:54:30
round 5 start: 2016-11-20 04:58:30
round 6 start: 2016-11-20 05:02:30
round 7 start: 2016-11-20 05:06:30
round 8 start: 2016-11-20 05:10:30
round 9 start: 2016-11-20 05:14:30
round 10 start: 2016-11-20 05:18:30
round 11 start: 2016-11-20 05:22:30
round 12 start: 2016-11-20 05:26:30

[('2016-11-20 04:42:30', '2016-11-20 04:45:30'), ('2016-11-20 04:46:30', '2016-11-20 04:49:30'), ('2016-11-20 04:50:30', '2016-11-20 04:53:30'), ('2016-11-20 04:54:30', '2016-11-20 04:57:30'), ('2016-11-20 04:58:30', '2016-11-20 05:01:30'), ('2016-11-20 05:02:30', '2016-11-20 05:05:30'), ('2016-11-20 05:06:30', '2016-11-20 05:09:30'), ('2016-11-20 05:10:30', '2016-11-20 05:13:30'), ('2016-11-20 05:14:30', '2016-11-20 05:17:30'), ('2016-11-20 05:18:30', '2016-11-20 05:21:30'), ('2016-11-20 05:22:30', '2016-11-20 05:25:30'), ('2016-11-20 05:26:30', '2016-11-20 05:29:30')]
[('2016-11-20 04:45:30', '2016-11-20 04:46:30'), ('2016-11-20 04:49:30', '2016-11-20 04:50:30'), ('2016-11-20 04:53:30', '2016-11-20 04:54:30'), ('2016-11-20 04:57:30', '2016-11-20 04:58:30'), ('2016-11-20 05:01:30', '2016-11-20 05:02:30'), ('2016-11-20 05:05:30', '2016-11-20 05:06:30'), ('2016-11-20 05:09:30', '2016-11-20 05:10:30'), ('2016-11-20 05:13:30', '2016-11-20 05:14:30'), ('2016-11-20 05:17:30', '2016-11-20 05:18:30'), ('2016-11-20 05:21:30', '2016-11-20 05:22:30'), ('2016-11-20 05:25:30', '2016-11-20 05:26:30'), ('2016-11-20 05:29:30', '2016-11-20 05:30:30')]

tweets_df['rest_period'] = 0

rst_prd = 1

for i in rst_start_stop:
    tweets_df.loc[(tweets_df['created_at'] >= i[0]) & (tweets_df['created_at'] < i[1]), 'rest_period'] = rst_prd
    rst_prd += 1
    
tweets_df.rest_period.value_counts()

   67814
     879
    861
     726
    706
    632
     531
     445
     440
     430
     429
     415
     394
Name: rest_period, dtype: int64

tweets_df[tweets_df.rest_period == 1]['text']

  Kovalev won round one. Good jab. Ward not very...
  RT @RBRBoxing: Retweet if you think Kovalev wi...
                 UH OH WARD GOT CAUGHT #KovalevWard
  RT @amirkingkhan: Let's go @andreward. #Kovale...
  #kovalevward @ T-Mobile Arena https://t.co/9vI...
  Ward staggering after a jab. Going to change m...
  RT @PromoDelPueblo: Vaya combate que nos esper...
  RT @TheDKano: Here we go for boxing's pound fo...
  RT @amirkingkhan: Let's go @andreward. #Kovale...
  RT @AndreasHale: Kovalev’s jab is dominant in ...
  Kovalev's armpit is about to be the real winne...
  !!!!!!!!!!!!!! Kov starting out strong. Ward n...
  RT @SugarRayLeonard: Who will be winning this ...
  RT @danrafaelespn: Rd 1 for Kovalev, no questi...
  RT @amirkingkhan: Let's go @andreward. #Kovale...
  RT @RBRBoxing: Good start, Ward runs into a ha...
               Round 1 #KovalevWard  : 10-9 Kovalev
                Kovalev takes R1 10-9. #KovalevWard
  RT @Jeskeliin22: Se viene que peleón, Kovalev ...
  Finally people are seeing why I was shocked by...
  RT @Main_Events: This is the moment we have al...
  Rd. 1: Tough round to score. Largely a feel-ou...
  RT @rosieperezbklyn: Oh! Ward stumbled! @HBObo...
  Le jab de Kovalev fait le travail. 10-9 Kovale...
  RT @danrafaelespn: Rd 1 for Kovalev, no questi...
                  Fucking Russians!!!! #KovalevWard
  Ward was winning 1st round with jab to body, b...
                      #KovalevWard round 1 kovalev.
           That jab rocking Ward. Wow! #KovalevWard
                    Round 1 to kovalev #KovalevWard
                               ...                        
  RT @boxingcorner247: Here we go #KovalevWard ���...
                1st round Kovalev 10-9 #KovalevWard
  R1 Ward showing more fight than flight, but ea...
  1st round i think goes to Kovalev he landed th...
  Ward just needs to be super clever in the figh...
  #KovalevWard jordanaharkness @ T-Mobile Arena ...
  Kovalev showing his power early with the jab. ...
  RT @LennoxLewis: Let's get ready to ruuuuumble...
               ウォードさん、コバレフさんのジャブに手こずる。 #KovalevWard
  Round 1: Ward appears stunned by Kovalev's pow...
  RT @chaka_210: Es para kovalev este round!!! #...
  RT @rosieperezbklyn: Good 1st round! Gave it t...
  RT @Patrick_Wyman: 10-9 Kovalev in the first. ...
           Kovalev wins round 1 for me #KovalevWard
  WHOAH CUT THE TENSION WITH A KNIFEEEE. That ja...
  Wow. Ward buzzed by a jab already. \n\n#Kovale...
  Kovalev 1-0. Ward felt the power and was a lit...
  #letsgetreadytorumble #kovalevward https://t.c...
            Felt that power already 😏 #KovalevWard
  A cautious start, not surprising. First round ...
                              1-0 Ward #KovalevWard
  RT @sand_trevino: This round goes to Kovalev. ...
  Ward's face is already showing the results of ...
                         10-9 Kovalev. #KovalevWard
  Kovalev got Ward looking like Sullinger..\n#Ko...
  Slow first round. If kovalev keeps leaning on ...
  Good start for Sergey Kovalev.\nGood start for...
  RT @ChavaESPN: Round 1. Kovalev se ha llevado ...
  Round 1de estudio pero se lo doy a Kovalev #Ko...
  Both guys understandably cagey in round one bu...
Name: text, dtype: object

calc tw prob/rd

any rd 2 surprises?

tweets_df[(tweets_df['text'].str.contains(r'\d+\-\d+|round|rd', case=False)) & (tweets_df.rest_period!=0)]\
            [['rest_period', 'user_screen_name', 'text']].to_excel('data/scorecard_tweets.xlsx', encoding='utf8')

ax = per_minute[(per_minute.index > '2016-11-20 04:30:00') & 
           (per_minute.index < '2016-11-20 06:00:00')].plot(figsize=(12,6), label='Tweets')

ax.axvspan(rnd_start_stop[0][0], rnd_start_stop[0][1], alpha=0.7, color='lightblue', label="Rounds 1-12")
ax.axvspan(rst_start_stop[0][0], rst_start_stop[0][1], color='white', label="Rest Period")

for i in range(1,12):
    ax.axvspan(rnd_start_stop[i][0], rnd_start_stop[i][1], alpha=0.7, color='lightblue')
    ax.axvspan(rst_start_stop[i][0], rst_start_stop[i][1], color='white')

plt.title('Fight "Hour" Tweets Kovalev/Ward')
plt.xlabel("Time (UTC)")
plt.ylabel("Tweet Count")
plt.legend(loc='upper right')

ax.text('2016-11-20 05:55:00', 20, 'endlesspint.com',
         fontsize=16, color='gray',
         ha='right', va='bottom', alpha=0.3)

ax.grid(False, which='both')

### save file localy w high resolution
plt.savefig('img/fight_hour_tweets.PNG', dpi=1200)

plot hist of rd’s won over 1k sim [nfl/wk8]

what % of sim agree with judges?

kov_rd_prob = np.array([0.962, 1.000, 0.627, 0.727, 0.293, 0.862, 0.082, 0.075, 0.028, 0.689, 0.082, 0.167])

np.random.seed(506)    # "SOG" ~ 506

bouts = 1000
samp_bouts = np.random.random((bouts, len(kov_rd_prob)))

print kov_rd_prob
print samp_bouts[:5]

[ 0.962  1.     0.627  0.727  0.293  0.862  0.082  0.075  0.028  0.689
  0.082  0.167]
[[ 0.847888    0.0062996   0.11547329  0.06227527  0.70417491  0.45142321
   0.02663976  0.68392883  0.8048215   0.01091125  0.99085692  0.98667746]
 [ 0.51372109  0.05352931  0.55695119  0.56343986  0.16066945  0.24419693
   0.00722941  0.2815361   0.48279483  0.50589732  0.53835301  0.28329453]
 [ 0.27038547  0.49515731  0.86365123  0.46734458  0.73658639  0.62529576
   0.44125324  0.47066594  0.27224368  0.37605197  0.34024841  0.95194275]
 [ 0.81634129  0.58905575  0.97129374  0.46887292  0.83701569  0.17920073
   0.0192533   0.1664162   0.58023702  0.93020315  0.6370348   0.42960779]
 [ 0.14706038  0.5908011   0.49457534  0.04813603  0.10570926  0.05762396
   0.86765057  0.63971255  0.70860045  0.95180043  0.43166628  0.31270852]]

kov_rd_wins = samp_bouts < kov_rd_prob
kov_rd_wins[:5]

array([[ True,  True,  True,  True, False,  True,  True, False, False,
         True, False, False],
       [ True,  True,  True,  True,  True,  True,  True, False, False,
         True, False, False],
       [ True,  True, False,  True, False,  True, False, False, False,
         True, False, False],
       [ True,  True, False,  True, False,  True,  True, False, False,
        False, False, False],
       [ True,  True,  True,  True,  True,  True, False, False, False,
        False, False, False]], dtype=bool)

kov_scores = np.sum(kov_rd_wins, axis=1)
print kov_scores[:25]

[7 8 5 5 6 4 6 5 7 5 5 3 4 7 8 3 4 6 5 5 3 6 5 4 9]

plt.hist(kov_scores)

(array([   4.,   38.,  129.,  303.,    0.,  323.,  159.,   38.,    4.,    2.]),
 array([  2. ,   2.8,   3.6,   4.4,   5.2,   6. ,   6.8,   7.6,   8.4,
          9.2,  10. ]),
 <a list of 10 Patch objects>)

df = pd.DataFrame(kov_scores)
ax = df.plot(kind='density', figsize=(12,6), legend=False)

# plt.legend(False)
plt.title('Kovalev Rounds Won (1,000 bouts simulated)')
plt.xlim((0,12))
plt.xlabel('Rounds')

ax.text(11.8, 0.01, 'endlesspint.com',
         fontsize=16, color='gray',
         ha='right', va='bottom', alpha=0.3)

ax.grid(False, which='both')

### save file localy w high resolution
plt.savefig('img/kov_rnds_won_density.PNG', dpi=1200)

print np.median(kov_scores)
print np.mean(kov_scores)

6.0
5.564

print np.mean(kov_scores <= 5.0)

print np.mean(kov_scores == 6.0)
print np.mean(kov_scores <= 6.0)

474
323
797

len(kov_scores)

df.describe()

	0
count	1000.000000
mean	5.564000
std	1.189672
min	2.000000
25%	5.000000
50%	6.000000
75%	6.000000
max	10.000000

df.plot(kind='box')

<matplotlib.axes._subplots.AxesSubplot at 0xb6494748>

pull tw [ggg1]

ingest tw [ggg2]

convert to DF

create new user_ DF fields

create user UTC offset in hours

time index

taking closer look to determine rest period tweets

calc tw prob/rd

plot hist of rd’s won over 1k sim [nfl/wk8]

create `user` UTC offset in hours