1+ from pandas .core .frame import DataFrame
2+ import pandas as pd
3+ from pandas import json_normalize
4+ import numpy as np
5+ import matplotlib .pyplot as plt
6+ import time
7+ from datetime import datetime , timedelta
8+ import matplotlib .dates as mdates
9+ from matplotlib .dates import DayLocator , HourLocator , DateFormatter , drange
10+ from graphql_query import get_PR_data
11+
12+
13+ def createDateColumn (dataframe ):
14+ """This function will create a date column in the data frame which will have datetime type rather
15+ than a string type"""
16+
17+ newDatecol = [] #conatin new date format and will be appended to the dataframe
18+ format_str = r"%Y-%m-%dT%H:%M:%SZ"
19+ for i in dataframe ['node.mergedAt' ]:
20+ if ( i != None ):
21+ newdate = datetime .strptime (i , format_str ) #making the string to a datetime format
22+ newDatecol .append (newdate .date ()) #appending to the list as a date
23+ if (i == None ):
24+ newDatecol .append ("None" )
25+ dataframe ['Date Merged' ] = newDatecol
26+
27+ return dataframe
28+
29+
30+ def numPRMerged_graph (df ):
31+ """This function will create a graph for Num of Pr merged"""
32+
33+ #get oldest and youngest dates from the list
34+ datelist = df ['dates' ]
35+ oldest = min (datelist )
36+ youngest = max (datelist )
37+ timegap = 12
38+ dates = mdates .drange (oldest , youngest , timedelta (weeks = timegap ))
39+ # data
40+ counts = df ['counts' ]
41+
42+ # Set up the axes and figure
43+ fig , ax = plt .subplots ()
44+
45+ # (To use the exact code below, you'll need to convert your sequence
46+ # of datetimes into matplotlib's float-based date format.
47+ # Use "dates = mdates.date2num(dates)" to convert them.)
48+ dates = mdates .date2num (dates )
49+ width = np .diff (dates ).min ()
50+
51+ # Make a bar plot. Note that I'm using "dates" directly instead of plotting
52+ # "counts" against x-values of [0,1,2...]
53+ ax .bar (datelist , counts .tolist (), align = 'center' , width = width , ec = 'blue' )
54+
55+ # Tell matplotlib to interpret the x-axis values as dates
56+ ax .xaxis_date ()
57+
58+ # Make space for and rotate the x-axis tick labels
59+ fig .autofmt_xdate ()
60+
61+ plt .ylabel ('Counts' )
62+ plt .xlabel ('Dates' )
63+ plt .title ('Number of PRs merged over time' )
64+ plt .savefig ('PRmergeRates.png' ,dpi = 400 )
65+ plt .show ()
66+
67+ def computeMergetime (created_at , merged_at ):
68+ """This function will calculate the merge time"""
69+
70+ format_str = r"%Y-%m-%dT%H:%M:%SZ"
71+ date_created = datetime .strptime (created_at , format_str )
72+ date_merged = datetime .strptime (merged_at , format_str )
73+ time_diff = (date_merged - date_created ).total_seconds () / 86400 #return diff in days [86400 secs in a day]
74+ return int (time_diff )
75+
76+ def addlabels (x ,y ):
77+ for i in range (len (x )):
78+ plt .text (i ,y [i ],y [i ], ha = 'center' )
79+
80+ def avgMergetime_graph (df ):
81+ """This function will create a graph for avg merge time"""
82+
83+ x = df ['Merged_YM' ]
84+ y = df ['mergetime' ]
85+ fig , ax = plt .subplots ()
86+ x_pos = np .arange (len (x )) # <--
87+ plt .bar (x_pos , y )
88+ plt .xticks (x_pos , x ) # <--
89+ # Make space for and rotate the x-axis tick labels
90+ fig .autofmt_xdate ()
91+ ax .xaxis_date ()
92+ addlabels (x , y )
93+ plt .xlabel ("Dates" )
94+ plt .ylabel ("Merge Time in Days" )
95+ plt .title ("Avg Merge Times" )
96+ plt .savefig ('AvgMergeTimes.png' ,dpi = 400 )
97+ plt .show ()
98+
99+ def avgMergetime (df ):
100+ """ This function will be called to calculate the avg mergetime and produce a graph"""
101+
102+ #1. calculate the mergetime for each PR and add to the dataframe
103+
104+ mergetime_ = []
105+
106+ for index , row in df .iterrows ():
107+ if (row .loc ['node.mergedAt' ] != None ) :
108+ mergetime = computeMergetime (row .loc ['node.createdAt' ] , row .loc ['node.mergedAt' ])
109+ mergetime_ .append (mergetime )
110+ else :
111+ mergetime_ .append ("None" )
112+
113+ df ['mergetime' ] = mergetime_
114+ print (df )
115+ #2. calculate the average merge time for each month
116+
117+ df ['Merged_YM' ] = pd .to_datetime (df ['node.mergedAt' ]).dt .to_period ('M' )
118+ new_df = df .filter (['Merged_YM' ,'mergetime' ], axis = 1 )
119+
120+ group_mean = new_df .groupby ('Merged_YM' )['mergetime' ].mean ()
121+ mean_df = group_mean .reset_index ()
122+
123+ mean_df ['mergetime' ] = mean_df .mergetime .astype (int ) #change from float to int
124+
125+ #3. create a bar graph
126+ avgMergetime_graph (mean_df )
127+
128+ def process_data (dataframe ):
129+ """This function will be called in the main() to process the data gathered from the query
130+ and create a dataframe"""
131+
132+ #add a new column for just the date in date format
133+ dataframe = createDateColumn (dataframe )
134+ frequency = dataframe ['Date Merged' ].value_counts () #get the frequency of each date
135+ # converting to df and assigning new names to the columns
136+ df_value_counts = pd .DataFrame (frequency )
137+ df_value_counts = df_value_counts .reset_index ()
138+ df_value_counts .columns = ['dates' , 'counts' ] # change column names
139+ #delete the the row with None
140+ dateFreq = df_value_counts .loc [df_value_counts ["dates" ] != "None" ]
141+
142+ #1. Create a graph for number of PRs merged over time
143+ numPRMerged_graph (dateFreq )
144+ #2. Create a graph for avg PR merge time
145+ avgMergetime (dataframe )
146+ #average time it takes to merge PRs
147+ #pie chart of author is merger vs author is not the merger
148+
149+
150+ def main ():
151+ #get data from the graphql query
152+ pr_cursor = None
153+ res_data = get_PR_data (pr_cursor )
154+ process_data (res_data )
155+
156+ main ()
0 commit comments