1+ from pandas .core .frame import DataFrame
2+ import pandas as pd
3+ from pandas import json_normalize
4+ import numpy as np
5+ import matplotlib .pyplot as plt
6+ import time
7+ from datetime import datetime , timedelta
8+ import matplotlib .dates as mdates
9+ from matplotlib .dates import DayLocator , HourLocator , DateFormatter , drange
10+ from graphql_query import get_PR_data
11+
12+
13+ def createDateColumn (dataframe ):
14+ """This function will create a date column in the data frame which will have datetime type rather
15+ than a string type"""
16+
17+ newDatecol = [] #conatin new date format and will be appended to the dataframe
18+ format_str = r"%Y-%m-%dT%H:%M:%SZ"
19+ for i in dataframe ['node.mergedAt' ]:
20+ if ( i != None ):
21+ newdate = datetime .strptime (i , format_str ) #making the string to a datetime format
22+ newDatecol .append (newdate .date ()) #appending to the list as a date
23+ if (i == None ):
24+ newDatecol .append ("None" )
25+ dataframe ['Date Merged' ] = newDatecol
26+
27+ return dataframe
28+
29+
30+ def numPRMerged_graph (df ):
31+ """This function will create a graph for Num of Pr merged"""
32+
33+ #get oldest and youngest dates from the list
34+ datelist = df ['dates' ]
35+ oldest = min (datelist )
36+ youngest = max (datelist )
37+ timegap = 12
38+ dates = mdates .drange (oldest , youngest , timedelta (weeks = timegap ))
39+ # data
40+ counts = df ['counts' ]
41+
42+ # Set up the axes and figure
43+ fig , ax = plt .subplots ()
44+
45+ # (To use the exact code below, you'll need to convert your sequence
46+ # of datetimes into matplotlib's float-based date format.
47+ # Use "dates = mdates.date2num(dates)" to convert them.)
48+ dates = mdates .date2num (dates )
49+ width = np .diff (dates ).min ()
50+
51+ # Make a bar plot. Note that I'm using "dates" directly instead of plotting
52+ # "counts" against x-values of [0,1,2...]
53+ ax .bar (datelist , counts .tolist (), align = 'center' , width = width , ec = 'blue' )
54+
55+ # Tell matplotlib to interpret the x-axis values as dates
56+ ax .xaxis_date ()
57+
58+ # Make space for and rotate the x-axis tick labels
59+ fig .autofmt_xdate ()
60+
61+ plt .ylabel ('Counts' )
62+ plt .xlabel ('Dates' )
63+ plt .title ('Number of PRs merged over time' )
64+ plt .savefig ('PRmergeRates.png' ,dpi = 400 )
65+ plt .show ()
66+
67+ def computeMergetime (created_at , merged_at ):
68+ """This function will calculate the merge time"""
69+
70+ format_str = r"%Y-%m-%dT%H:%M:%SZ"
71+ date_created = datetime .strptime (created_at , format_str )
72+ date_merged = datetime .strptime (merged_at , format_str )
73+ time_diff = (date_merged - date_created ).total_seconds () / 60 #return diff in mins
74+ return int (time_diff )
75+
76+ def avgMergetime_graph (df ):
77+ """This function will create a graph for avg merge time"""
78+
79+ x = df ['Merged_YM' ]
80+ y = df ['mergetime' ]
81+ fig , ax = plt .subplots ()
82+ x_pos = np .arange (len (x )) # <--
83+ plt .bar (x_pos , y )
84+ plt .xticks (x_pos , x ) # <--
85+ # Make space for and rotate the x-axis tick labels
86+ fig .autofmt_xdate ()
87+ ax .xaxis_date ()
88+ plt .xlabel ("Dates" )
89+ plt .ylabel ("Merge Time in Minutes" )
90+ plt .title ("Avg Merge Times" )
91+ plt .savefig ('AvgMergeTimes.png' ,dpi = 400 )
92+ plt .show ()
93+
94+ def avgMergetime (df ):
95+ """ This function will be called to calculate the avg mergetime and produce a graph"""
96+
97+ #1. calculate the mergetime for each PR and add to the dataframe
98+
99+ mergetime_ = []
100+
101+ for index , row in df .iterrows ():
102+ if (row .loc ['node.mergedAt' ] != None ) :
103+ mergetime = computeMergetime (row .loc ['node.createdAt' ] , row .loc ['node.mergedAt' ])
104+ mergetime_ .append (mergetime )
105+ else :
106+ mergetime_ .append ("None" )
107+
108+ df ['mergetime' ] = mergetime_
109+
110+ #2. calculate the average merge time for each month
111+
112+ df ['Merged_YM' ] = pd .to_datetime (df ['node.mergedAt' ]).dt .to_period ('M' )
113+ new_df = df .filter (['Merged_YM' ,'mergetime' ], axis = 1 )
114+
115+ group_mean = new_df .groupby ('Merged_YM' )['mergetime' ].mean ()
116+ mean_df = group_mean .reset_index ()
117+
118+ mean_df ['mergetime' ] = mean_df .mergetime .astype (int ) #change from float to int
119+
120+ #3. create a bar graph
121+ avgMergetime_graph (mean_df )
122+
123+ def process_data (dataframe ):
124+ """This function will be called in the main() to process the data gathered from the query
125+ and create a dataframe"""
126+
127+ #add a new column for just the date in date format
128+ dataframe = createDateColumn (dataframe )
129+ frequency = dataframe ['Date Merged' ].value_counts () #get the frequency of each date
130+ # converting to df and assigning new names to the columns
131+ df_value_counts = pd .DataFrame (frequency )
132+ df_value_counts = df_value_counts .reset_index ()
133+ df_value_counts .columns = ['dates' , 'counts' ] # change column names
134+ #delete the the row with None
135+ dateFreq = df_value_counts .loc [df_value_counts ["dates" ] != "None" ]
136+
137+ #1. Create a graph for number of PRs merged over time
138+ numPRMerged_graph (dateFreq )
139+ #2. Create a graph for avg PR merge time
140+ avgMergetime (dataframe )
141+ #average time it takes to merge PRs
142+ #pie chart of author is merger vs author is not the merger
143+
144+
145+ def main ():
146+ #get data from the graphql query
147+ pr_cursor = None
148+ res_data = get_PR_data (pr_cursor )
149+ process_data (res_data )
150+
151+ main ()
0 commit comments